Delete SYCL support

See discussion here:
https://github.com/tensorflow/tensorflow/issues/41809#issuecomment-688021592

Fixes #41809.

PiperOrigin-RevId: 331808169
Change-Id: Ib0861cf250c92c20f0e8a22adce89a4dc4d3548a
diff --git a/.bazelrc b/.bazelrc
index 774f614..3be904f 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -46,10 +46,6 @@
 #     using_cuda:   CUDA is available to build system.
 #     cuda:         Build with full cuda support.
 #     rocm:         Build with AMD GPU support (rocm).
-#     sycl:         Build with SYCL support.
-#     sycl_nodouble:
-#     sycl_asan:
-#     sycl_trisycl:
 #     mkl:          Enable full mkl support.
 #     tensorrt:     Enable Tensorrt support.
 #     ngraph:       Enable ngraph support.
@@ -214,19 +210,6 @@
 build:rocm --define=using_rocm=true --define=using_rocm_hipcc=true
 build:rocm --action_env TF_NEED_ROCM=1
 
-build:sycl --crosstool_top=@local_config_sycl//crosstool:toolchain
-build:sycl --define=using_sycl=true
-build:sycl --action_env TF_NEED_OPENCL_SYCL=1
-
-build:sycl_nodouble --config=sycl
-build:sycl_nodouble --cxxopt -DTENSORFLOW_SYCL_NO_DOUBLE
-
-build:sycl_nodouble --config=sycl
-build:sycl_asan --copt -fno-omit-frame-pointer --copt -fsanitize-coverage=3 --copt -DGPR_NO_DIRECT_SYSCALLS --linkopt -fPIC --linkopt -fsanitize=address
-
-build:sycl_nodouble --config=sycl
-build:sycl_trisycl --define=using_trisycl=true
-
 # Options extracted from configure script
 build:ngraph --define=with_ngraph_support=true
 build:numa --define=with_numa_support=true
diff --git a/configure.py b/configure.py
index 96cc70a..5b9fd55 100644
--- a/configure.py
+++ b/configure.py
@@ -38,9 +38,6 @@
 _DEFAULT_TENSORRT_VERSION = '6'
 _DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,7.0'
 
-_TF_OPENCL_VERSION = '1.2'
-_DEFAULT_COMPUTECPP_TOOLKIT_PATH = '/usr/local/computecpp'
-_DEFAULT_TRISYCL_INCLUDE_DIR = '/usr/local/triSYCL/include'
 _SUPPORTED_ANDROID_NDK_VERSIONS = [10, 11, 12, 13, 14, 15, 16, 17, 18]
 
 _DEFAULT_PROMPT_ASK_ATTEMPTS = 10
@@ -1114,62 +1111,6 @@
   write_action_env_to_bazelrc('HOST_C_COMPILER', host_c_compiler)
 
 
-def set_computecpp_toolkit_path(environ_cp):
-  """Set COMPUTECPP_TOOLKIT_PATH."""
-
-  def toolkit_exists(toolkit_path):
-    """Check if a computecpp toolkit path is valid."""
-    if is_linux():
-      sycl_rt_lib_path = 'lib/libComputeCpp.so'
-    else:
-      sycl_rt_lib_path = ''
-
-    sycl_rt_lib_path_full = os.path.join(toolkit_path, sycl_rt_lib_path)
-    exists = os.path.exists(sycl_rt_lib_path_full)
-    if not exists:
-      print('Invalid SYCL %s library path. %s cannot be found' %
-            (_TF_OPENCL_VERSION, sycl_rt_lib_path_full))
-    return exists
-
-  computecpp_toolkit_path = prompt_loop_or_load_from_env(
-      environ_cp,
-      var_name='COMPUTECPP_TOOLKIT_PATH',
-      var_default=_DEFAULT_COMPUTECPP_TOOLKIT_PATH,
-      ask_for_var=(
-          'Please specify the location where ComputeCpp for SYCL %s is '
-          'installed.' % _TF_OPENCL_VERSION),
-      check_success=toolkit_exists,
-      error_msg='Invalid SYCL compiler path. %s cannot be found.',
-      suppress_default_error=True)
-
-  write_action_env_to_bazelrc('COMPUTECPP_TOOLKIT_PATH',
-                              computecpp_toolkit_path)
-
-
-def set_trisycl_include_dir(environ_cp):
-  """Set TRISYCL_INCLUDE_DIR."""
-
-  ask_trisycl_include_dir = ('Please specify the location of the triSYCL '
-                             'include directory. (Use --config=sycl_trisycl '
-                             'when building with Bazel) '
-                             '[Default is %s]: ') % (
-                                 _DEFAULT_TRISYCL_INCLUDE_DIR)
-
-  while True:
-    trisycl_include_dir = get_from_env_or_user_or_default(
-        environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir,
-        _DEFAULT_TRISYCL_INCLUDE_DIR)
-    if os.path.exists(trisycl_include_dir):
-      break
-
-    print('Invalid triSYCL include directory, %s cannot be found' %
-          (trisycl_include_dir))
-
-  # Set TRISYCL_INCLUDE_DIR
-  environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir
-  write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', trisycl_include_dir)
-
-
 def system_specific_test_config(environ_cp):
   """Add default build and test flags required for TF tests to bazelrc."""
   write_to_bazelrc('test --flaky_test_attempts=3')
@@ -1397,8 +1338,6 @@
   setup_python(environ_cp)
 
   if is_windows():
-    environ_cp['TF_NEED_OPENCL_SYCL'] = '0'
-    environ_cp['TF_NEED_COMPUTECPP'] = '0'
     environ_cp['TF_NEED_OPENCL'] = '0'
     environ_cp['TF_CUDA_CLANG'] = '0'
     environ_cp['TF_NEED_TENSORRT'] = '0'
@@ -1416,21 +1355,6 @@
     write_to_bazelrc('build --config=xla')
 
   set_action_env_var(
-      environ_cp,
-      'TF_NEED_OPENCL_SYCL',
-      'OpenCL SYCL',
-      False,
-      bazel_config_name='sycl')
-  if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1':
-    set_host_cxx_compiler(environ_cp)
-    set_host_c_compiler(environ_cp)
-    set_action_env_var(environ_cp, 'TF_NEED_COMPUTECPP', 'ComputeCPP', True)
-    if environ_cp.get('TF_NEED_COMPUTECPP') == '1':
-      set_computecpp_toolkit_path(environ_cp)
-    else:
-      set_trisycl_include_dir(environ_cp)
-
-  set_action_env_var(
       environ_cp, 'TF_NEED_ROCM', 'ROCm', False, bazel_config_name='rocm')
   if (environ_cp.get('TF_NEED_ROCM') == '1' and
       'LD_LIBRARY_PATH' in environ_cp and
@@ -1528,17 +1452,15 @@
     # use it for the CPU build.
     set_tf_download_clang(environ_cp)
 
-  # SYCL / ROCm / CUDA are mutually exclusive.
+  # ROCm / CUDA are mutually exclusive.
   # At most 1 GPU platform can be configured.
   gpu_platform_count = 0
-  if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1':
-    gpu_platform_count += 1
   if environ_cp.get('TF_NEED_ROCM') == '1':
     gpu_platform_count += 1
   if environ_cp.get('TF_NEED_CUDA') == '1':
     gpu_platform_count += 1
   if gpu_platform_count >= 2:
-    raise UserInputError('SYCL / CUDA / ROCm are mututally exclusive. '
+    raise UserInputError('CUDA / ROCm are mututally exclusive. '
                          'At most 1 GPU platform can be configured.')
 
   set_cc_opt_flags(environ_cp)
diff --git a/tensorflow/core/common_runtime/BUILD b/tensorflow/core/common_runtime/BUILD
index 9c8f14c..684ec7c 100644
--- a/tensorflow/core/common_runtime/BUILD
+++ b/tensorflow/core/common_runtime/BUILD
@@ -88,7 +88,6 @@
     deps = [
         ":core_cpu",
         "//tensorflow/core/common_runtime/gpu:gpu_runtime",
-        "//tensorflow/core/common_runtime/sycl:sycl_runtime",
     ] + if_tpu(["//tensorflow/core/tpu:tpu_runtime"]),
 )
 
diff --git a/tensorflow/core/common_runtime/device_factory.h b/tensorflow/core/common_runtime/device_factory.h
index 9d911c2..f10a718 100644
--- a/tensorflow/core/common_runtime/device_factory.h
+++ b/tensorflow/core/common_runtime/device_factory.h
@@ -123,7 +123,6 @@
   //
   // The default priority values for built-in devices is:
   // GPU: 210
-  // SYCL: 200
   // GPUCompatibleCPU: 70
   // ThreadPoolDevice: 60
   // Default: 50
diff --git a/tensorflow/core/common_runtime/direct_session_test.cc b/tensorflow/core/common_runtime/direct_session_test.cc
index eab5086..613449f 100644
--- a/tensorflow/core/common_runtime/direct_session_test.cc
+++ b/tensorflow/core/common_runtime/direct_session_test.cc
@@ -1965,7 +1965,6 @@
       ->set_constant_folding(RewriterConfig::OFF);
   (*options.config.mutable_device_count())["CPU"] = 2;
   (*options.config.mutable_device_count())["GPU"] = 0;
-  (*options.config.mutable_device_count())["SYCL"] = 0;
 
   auto* p = options.config.add_session_inter_op_thread_pool();
   if (use_global_pools) p->set_global_name("large pool");
diff --git a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc
index ee4cc2d..c314d29 100644
--- a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc
+++ b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc
@@ -175,16 +175,10 @@
   test::FillValues<float>(&x_tensor, {1, 1});
   Node* x = test::graph::Constant(&graph, x_tensor);
   x->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
-#ifdef TENSORFLOW_USE_SYCL
-  x->set_assigned_device_name("/job:localhost/replica:0/task:0/device:SYCL:0");
-#endif  // TENSORFLOW_USE_SYCL
 
   // y = A * x
   Node* y = test::graph::Matmul(&graph, a, x, false, false);
   y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
-#ifdef TENSORFLOW_USE_SYCL
-  y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:SYCL:0");
-#endif  // TENSORFLOW_USE_SYCL
 
   Node* y_neg = test::graph::Unary(&graph, "Neg", y);
   y_neg->set_assigned_device_name("/job:localhost/replica:0/task:0/cpu:0");
@@ -195,9 +189,6 @@
   SessionOptions options;
   (*options.config.mutable_device_count())["CPU"] = 1;
   (*options.config.mutable_device_count())["GPU"] = 1;
-#ifdef TENSORFLOW_USE_SYCL
-  (*options.config.mutable_device_count())["SYCL"] = 1;
-#endif  // TENSORFLOW_USE_SYCL
   options.config.set_allow_soft_placement(true);
   options.config.mutable_graph_options()->set_build_cost_model(1);
   std::unique_ptr<Session> session(NewSession(options));
diff --git a/tensorflow/core/common_runtime/memory_types.cc b/tensorflow/core/common_runtime/memory_types.cc
index b37e65a..71fe7df 100644
--- a/tensorflow/core/common_runtime/memory_types.cc
+++ b/tensorflow/core/common_runtime/memory_types.cc
@@ -48,13 +48,12 @@
 static Status ProcessMemoryTypes(
     const DeviceType& device_type, const Graph* g,
     const std::function<Status(const Edge*, MemoryType, MemoryType)>& fn) {
-  if (device_type != DEVICE_GPU && device_type != DEVICE_SYCL) {
-    // On non-GPU and non-SYCL devices, HOST_MEMORY and DEVICE_MEMORY are always
-    // compatible.
+  if (device_type != DEVICE_GPU) {
+    // On non-GPU devices, HOST_MEMORY and DEVICE_MEMORY are always compatible.
     return Status::OK();
   }
-  // For GPU and SYCL device, HOST_MEMORY and DEVICE_MEMORY is not
-  // compatible. I.e., a conversion/transfer must be done.
+  // For GPU, HOST_MEMORY and DEVICE_MEMORY is not compatible. I.e., a
+  // conversion/transfer must be done.
   //
   // {node id, slot id} -> memory type.
   typedef std::unordered_map<Endpoint, MemoryType, EndpointHash, EndpointEq>
diff --git a/tensorflow/core/common_runtime/memory_types_test.cc b/tensorflow/core/common_runtime/memory_types_test.cc
index e2ed7aa..45e0a8b 100644
--- a/tensorflow/core/common_runtime/memory_types_test.cc
+++ b/tensorflow/core/common_runtime/memory_types_test.cc
@@ -34,9 +34,6 @@
   // There is a kernel for adding two int32s on host memory.
   TF_EXPECT_OK(ValidateMemoryTypes(DEVICE_GPU, g));
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#ifdef TENSORFLOW_USE_SYCL
-  TF_EXPECT_OK(ValidateMemoryTypes(DEVICE_SYCL, g));
-#endif  // TENSORFLOW_USE_SYCL
   delete g;
 }
 
@@ -56,15 +53,6 @@
   TF_EXPECT_OK(EnsureMemoryTypes(DEVICE_GPU, "/device:GPU:0", g));
   TF_EXPECT_OK(ValidateMemoryTypes(DEVICE_GPU, g));
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#ifdef TENSORFLOW_USE_SYCL
-  // There is no kernel for casting int32/host memory to float/device
-  // memory.
-  EXPECT_TRUE(errors::IsInternal(ValidateMemoryTypes(DEVICE_SYCL, g)));
-
-  // But we can insert _HostSend/_HostRecv to ensure the invariant.
-  TF_EXPECT_OK(EnsureMemoryTypes(DEVICE_SYCL, "/device:SYCL:0", g));
-  TF_EXPECT_OK(ValidateMemoryTypes(DEVICE_SYCL, g));
-#endif  // TENSORFLOW_USE_SYCL
   delete g;
 }
 
@@ -86,12 +74,6 @@
   // int Switch's output on GPU has HOST_MEMORY constraint.
   EXPECT_EQ(memory_type, HOST_MEMORY);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#ifdef TENSORFLOW_USE_SYCL
-  auto si = test::graph::Switch(g, test::graph::Constant(g, vi), pred);
-  TF_EXPECT_OK(MemoryTypeForOutput(DEVICE_SYCL, g, si, 0, &memory_type));
-  // int Switch's output on GPU has HOST_MEMORY constraint.
-  EXPECT_EQ(memory_type, HOST_MEMORY);
-#endif  // TENSORFLOW_USE_SYCL
   delete g;
 }
 
diff --git a/tensorflow/core/common_runtime/renamed_device.h b/tensorflow/core/common_runtime/renamed_device.h
index cbec750..9a7c730 100644
--- a/tensorflow/core/common_runtime/renamed_device.h
+++ b/tensorflow/core/common_runtime/renamed_device.h
@@ -91,11 +91,6 @@
     return underlying_device_->has_eigen_cpu_device();
   }
 
-#ifdef TENSORFLOW_USE_SYCL
-  const Eigen::SyclDevice* eigen_sycl_device() const override {
-    return underlying_device_->eigen_sycl_device();
-  }
-#endif
 
   PerOpGpuDevice* MakeGpuDevice() override {
     return underlying_device_->MakeGpuDevice();
diff --git a/tensorflow/core/common_runtime/sycl/BUILD b/tensorflow/core/common_runtime/sycl/BUILD
deleted file mode 100644
index 4269031..0000000
--- a/tensorflow/core/common_runtime/sycl/BUILD
+++ /dev/null
@@ -1,46 +0,0 @@
-load(
-    "//tensorflow:tensorflow.bzl",
-    "if_not_windows",
-    "tf_copts",
-)
-load(
-    "//tensorflow/core/platform:rules_cc.bzl",
-    "cc_library",
-)
-
-package(
-    default_visibility = [
-        "//tensorflow:internal",
-    ],
-    features = ["-parse_headers"],
-    licenses = ["notice"],  # Apache 2.0
-)
-
-cc_library(
-    name = "sycl_runtime",
-    srcs = if_not_windows([
-        "sycl_allocator.cc",
-        "sycl_device.cc",
-        "sycl_device_context.cc",
-        "sycl_device_factory.cc",
-    ]),
-    hdrs = if_not_windows([
-        "sycl_allocator.h",
-        "sycl_device.h",
-        "sycl_util.h",
-        "sycl_device_context.h",
-    ]),
-    copts = tf_copts(),
-    linkstatic = 0,
-    deps = [
-        "//tensorflow/core:framework",
-        "//tensorflow/core:framework_internal",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:lib_internal",
-        "//tensorflow/core/common_runtime:core_cpu",
-        "//tensorflow/core/common_runtime:core_cpu_internal",
-        "//third_party/eigen3",
-        "@local_config_sycl//sycl",
-    ],
-    alwayslink = 0,
-)
diff --git a/tensorflow/core/common_runtime/sycl/sycl_allocator.cc b/tensorflow/core/common_runtime/sycl/sycl_allocator.cc
deleted file mode 100644
index 6a784ef..0000000
--- a/tensorflow/core/common_runtime/sycl/sycl_allocator.cc
+++ /dev/null
@@ -1,92 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifdef TENSORFLOW_USE_SYCL
-
-#include "tensorflow/core/common_runtime/sycl/sycl_allocator.h"
-
-namespace tensorflow {
-
-SYCLAllocator::SYCLAllocator(Eigen::QueueInterface* queue)
-    : sycl_device_(new Eigen::SyclDevice(queue)) {
-  cl::sycl::queue& sycl_queue = sycl_device_->sycl_queue();
-  const cl::sycl::device& device = sycl_queue.get_device();
-  stats_.bytes_limit =
-      device.get_info<cl::sycl::info::device::max_mem_alloc_size>();
-}
-
-SYCLAllocator::~SYCLAllocator() {
-  if (sycl_device_) {
-    delete sycl_device_;
-  }
-}
-
-string SYCLAllocator::Name() { return "device:SYCL"; }
-
-void* SYCLAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
-  mutex_lock lock(mu_);
-  assert(sycl_device_);
-  if (num_bytes == 0) {
-    // Cannot allocate no bytes in SYCL, so instead allocate a single byte
-    num_bytes = 1;
-  }
-  auto p = sycl_device_->allocate(num_bytes);
-  const auto& allocated_buffer = sycl_device_->get_sycl_buffer(p);
-  const std::size_t bytes_allocated = allocated_buffer.get_range().size();
-
-  ++stats_.num_allocs;
-  stats_.bytes_in_use += bytes_allocated;
-  stats_.max_bytes_in_use =
-      std::max<int64>(stats_.max_bytes_in_use, stats_.bytes_in_use);
-  stats_.max_alloc_size =
-      std::max<int64>(stats_.max_alloc_size, bytes_allocated);
-
-  return p;
-}
-
-void SYCLAllocator::DeallocateRaw(void* ptr) {
-  mutex_lock lock(mu_);
-  if (sycl_device_) {
-    const auto& buffer_to_delete = sycl_device_->get_sycl_buffer(ptr);
-    const std::size_t dealloc_size = buffer_to_delete.get_range().size();
-    stats_.bytes_in_use -= dealloc_size;
-    sycl_device_->deallocate(ptr);
-  }
-}
-
-void SYCLAllocator::GetStats(AllocatorStats* stats) {
-  mutex_lock lock(mu_);
-  *stats = stats_;
-}
-
-void SYCLAllocator::ClearStats() override {
-  mutex_lock l(mu_);
-  stats_.num_allocs = 0;
-  stats_.max_bytes_in_use = stats_.bytes_in_use;
-  stats_.max_alloc_size = 0;
-}
-
-size_t SYCLAllocator::RequestedSize(const void* ptr) const {
-  mutex_lock lock(mu_);
-  if (!sycl_device_) {
-    return 0;
-  }
-  const auto& buffer = sycl_device_->get_sycl_buffer(ptr);
-  return buffer.get_size();
-}
-
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/common_runtime/sycl/sycl_allocator.h b/tensorflow/core/common_runtime/sycl/sycl_allocator.h
deleted file mode 100644
index a702911..0000000
--- a/tensorflow/core/common_runtime/sycl/sycl_allocator.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#if !TENSORFLOW_USE_SYCL
-#error This file must only be included when building TensorFlow with SYCL support
-#endif
-
-#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_ALLOCATOR_H_
-#define TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_ALLOCATOR_H_
-
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/framework/allocator.h"
-#include "tensorflow/core/platform/mutex.h"
-#include "tensorflow/core/platform/types.h"
-
-namespace tensorflow {
-
-class SYCLAllocator : public Allocator {
- public:
-  SYCLAllocator(Eigen::QueueInterface* queue);
-  ~SYCLAllocator() override;
-  string Name() override;
-  void* AllocateRaw(size_t alignment, size_t num_bytes) override;
-  void DeallocateRaw(void* ptr) override;
-
-  bool ShouldAllocateEmptyTensors() const final { return true; }
-  void Synchronize() {
-    mutex_lock lock(mu_);
-    if (sycl_device_) {
-      sycl_device_->synchronize();
-    }
-  }
-  bool Ok() const { return sycl_device_ && sycl_device_->ok(); }
-  void GetStats(AllocatorStats* stats) override;
-  void ClearStats() override;
-
-  // The SYCL buffers keep track of their size, so we already have tracking.
-  bool TracksAllocationSizes() const override { return true; }
-  // Get the size of the corresponding SYCL buffer.
-  // Implementing this also provides an implementation of
-  // AllocatedSize(void* ptr) by default.
-  size_t RequestedSize(const void* ptr) const override;
-  Eigen::SyclDevice* getSyclDevice() { return sycl_device_; }
-  // Clear the SYCL device used by the Allocator
-  void ClearSYCLDevice() {
-    mutex_lock lock(mu_);
-    if (sycl_device_) {
-      delete sycl_device_;
-      sycl_device_ = nullptr;
-    }
-  }
-
- private:
-  mutable mutex mu_;
-  Eigen::SyclDevice* sycl_device_ TF_GUARDED_BY(mu_);  // owned
-  AllocatorStats stats_ TF_GUARDED_BY(mu_);
-
-  TF_DISALLOW_COPY_AND_ASSIGN(SYCLAllocator);
-};
-
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_ALLOCATOR_H_
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device.cc b/tensorflow/core/common_runtime/sycl/sycl_device.cc
deleted file mode 100644
index 8293e6d..0000000
--- a/tensorflow/core/common_runtime/sycl/sycl_device.cc
+++ /dev/null
@@ -1,94 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#if TENSORFLOW_USE_SYCL
-
-#include "tensorflow/core/common_runtime/sycl/sycl_device.h"
-
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/framework/tensor.pb.h"
-#include "tensorflow/core/platform/tracing.h"
-
-namespace tensorflow {
-
-SYCLDevice::~SYCLDevice() {}
-
-void SYCLDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) {
-  assert(context);
-  // When ThreadScape profiling is off (which is the default), constructing the
-  // following code is simple enough that its overhead is negligible.
-  tracing::ScopedRegion region(tracing::EventCategory::kCompute,
-                               op_kernel->name());
-
-  op_kernel->Compute(context);
-}
-
-Allocator* SYCLDevice::GetAllocator(AllocatorAttributes attr) {
-  if (attr.on_host())
-    return cpu_allocator_;
-  else
-    return sycl_allocator_;
-}
-
-Status SYCLDevice::MakeTensorFromProto(const TensorProto& tensor_proto,
-                                       const AllocatorAttributes alloc_attrs,
-                                       Tensor* tensor) {
-  AllocatorAttributes attr;
-  attr.set_on_host(true);
-  Allocator* host_alloc = GetAllocator(attr);
-
-  Tensor parsed(tensor_proto.dtype());
-  if (!parsed.FromProto(host_alloc, tensor_proto)) {
-    return errors::InvalidArgument("Cannot parse tensor from proto: ",
-                                   tensor_proto.DebugString());
-  }
-  Status status;
-  if (alloc_attrs.on_host()) {
-    *tensor = parsed;
-  } else {
-    Tensor copy(GetAllocator(alloc_attrs), parsed.dtype(), parsed.shape());
-
-    // If the tensor is not initialized, we likely ran out of memory.
-    if (!copy.IsInitialized()) {
-      return errors::ResourceExhausted(
-          "OOM when allocating tensor of shape ", parsed.shape().DebugString(),
-          " and type ", DataTypeString(parsed.dtype()));
-    }
-
-    device_context_->CopyCPUTensorToDevice(
-        &parsed, this, &copy, [&status](const Status& s) { status = s; });
-    *tensor = copy;
-  }
-  return status;
-}
-
-Status SYCLDevice::TryGetDeviceContext(DeviceContext** out_context) {
-  device_context_->Ref();
-  *out_context = device_context_;
-  return Status::OK();
-}
-
-Status SYCLDevice::Sync() {
-  sycl_allocator_->Synchronize();
-  if (sycl_allocator_->Ok()) {
-    return Status::OK();
-  } else {
-    return errors::Internal("Unknown error detected on device ", name());
-  }
-}
-
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device.h b/tensorflow/core/common_runtime/sycl/sycl_device.h
deleted file mode 100644
index 08b5b39..0000000
--- a/tensorflow/core/common_runtime/sycl/sycl_device.h
+++ /dev/null
@@ -1,231 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#if !TENSORFLOW_USE_SYCL
-#error This file must only be included when building TensorFlow with SYCL support
-#endif
-
-#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_
-#define TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_
-
-#include "tensorflow/core/common_runtime/local_device.h"
-#include "tensorflow/core/common_runtime/sycl/sycl_allocator.h"
-#include "tensorflow/core/common_runtime/sycl/sycl_device_context.h"
-#include "tensorflow/core/public/session_options.h"
-
-namespace tensorflow {
-
-class GSYCLInterface {
-  std::vector<Eigen::QueueInterface*> m_queue_interface_;  // owned
-  std::vector<Allocator*> m_cpu_allocator_;                // not owned
-  std::vector<SYCLAllocator*> m_sycl_allocator_;           // owned
-  std::vector<SYCLDeviceContext*> m_sycl_context_;         // ref counted
-  GSYCLInterface() {
-    bool found_device = false;
-    auto device_list = Eigen::get_sycl_supported_devices();
-    // Obtain list of supported devices from Eigen
-    for (const auto& device : device_list) {
-      if (device.is_gpu()) {
-        // returns first found GPU
-        AddDevice(device);
-        found_device = true;
-      }
-    }
-
-    if (!found_device) {
-      // Currently Intel GPU is not supported
-      LOG(WARNING) << "No OpenCL GPU found that is supported by "
-                   << "ComputeCpp/triSYCL, trying OpenCL CPU";
-    }
-
-    for (const auto& device : device_list) {
-      if (device.is_cpu()) {
-        // returns first found CPU
-        AddDevice(device);
-        found_device = true;
-      }
-    }
-
-    if (!found_device) {
-      LOG(WARNING) << "No OpenCL CPU found that is supported by "
-                   << "ComputeCpp/triSYCL, checking for host sycl device";
-    }
-
-    for (const auto& device : device_list) {
-      // triSYCL only supports the host device for now
-      if (device.is_host()) {
-        LOG(WARNING) << "Found SYCL host device";
-        AddDevice(device);
-        found_device = true;
-      }
-    }
-
-    if (!found_device) {
-      // Currently Intel GPU is not supported
-      LOG(FATAL) << "No SYCL host and no OpenCL GPU nor CPU"
-                 << " supported by ComputeCPP/triSYCL was found";
-    } else {
-      LOG(INFO) << "Found following OpenCL devices:";
-      for (int i = 0; i < device_list.size(); i++) {
-        LOG(INFO) << GetShortDeviceDescription(i);
-      }
-    }
-  }
-
-  ~GSYCLInterface() {
-    m_cpu_allocator_.clear();
-
-    for (auto p : m_sycl_allocator_) {
-      p->Synchronize();
-      p->ClearSYCLDevice();
-      // Cannot delete the Allocator instances, as the Allocator lifetime
-      // needs to exceed any Tensor created by it. There is no way of
-      // knowing when all Tensors have been deallocated, as they are
-      // RefCounted and wait until all instances of a Tensor have been
-      // destroyed before calling Allocator.Deallocate. This could happen at
-      // program exit, which can set up a race condition between destroying
-      // Tensors and Allocators when the program is cleaning up.
-    }
-    m_sycl_allocator_.clear();
-
-    for (auto p : m_sycl_context_) {
-      p->Unref();
-    }
-    m_sycl_context_.clear();
-
-    for (auto p : m_queue_interface_) {
-      p->deallocate_all();
-      delete p;
-    }
-    m_queue_interface_.clear();
-  }
-
-  void AddDevice(const cl::sycl::device& d) {
-    m_queue_interface_.push_back(new Eigen::QueueInterface(d));
-    m_cpu_allocator_.push_back(cpu_allocator());
-    m_sycl_allocator_.push_back(new SYCLAllocator(m_queue_interface_.back()));
-    m_sycl_context_.push_back(new SYCLDeviceContext());
-  }
-
- public:
-  static const GSYCLInterface* instance() {
-    // c++11 guarantees that this will be constructed in a thread safe way
-    static const GSYCLInterface instance;
-    return &instance;
-  }
-
-  Eigen::QueueInterface* GetQueueInterface(size_t i = 0) const {
-    if (!m_queue_interface_.empty()) {
-      return m_queue_interface_[i];
-    } else {
-      std::cerr << "No cl::sycl::device has been added" << std::endl;
-      return nullptr;
-    }
-  }
-
-  SYCLAllocator* GetSYCLAllocator(size_t i = 0) const {
-    if (!m_sycl_allocator_.empty()) {
-      return m_sycl_allocator_[i];
-    } else {
-      std::cerr << "No cl::sycl::device has been added" << std::endl;
-      return nullptr;
-    }
-  }
-
-  Allocator* GetCPUAllocator(size_t i = 0) const {
-    if (!m_cpu_allocator_.empty()) {
-      return m_cpu_allocator_[i];
-    } else {
-      std::cerr << "No cl::sycl::device has been added" << std::endl;
-      return nullptr;
-    }
-  }
-
-  SYCLDeviceContext* GetSYCLContext(size_t i = 0) const {
-    if (!m_sycl_context_.empty()) {
-      return m_sycl_context_[i];
-    } else {
-      std::cerr << "No cl::sycl::device has been added" << std::endl;
-      return nullptr;
-    }
-  }
-
-  string GetShortDeviceDescription(int device_id = 0) const {
-    Eigen::QueueInterface* queue_ptr = GetQueueInterface(device_id);
-    if (!queue_ptr) {
-      LOG(ERROR)
-          << "Device name cannot be given after Eigen QueueInterface destroyed";
-      return "";
-    }
-    auto device = queue_ptr->sycl_queue().get_device();
-    auto name = device.get_info<cl::sycl::info::device::name>();
-    auto vendor = device.get_info<cl::sycl::info::device::vendor>();
-    auto profile = device.get_info<cl::sycl::info::device::profile>();
-
-    std::string type;
-    if (device.is_host()) {
-      type = "Host";
-    } else if (device.is_cpu()) {
-      type = "CPU";
-    } else if (device.is_gpu()) {
-      type = "GPU";
-    } else if (device.is_accelerator()) {
-      type = "Accelerator";
-    } else {
-      type = "Unknown";
-    }
-
-    return strings::StrCat(
-        "id: ", device_id, ", type: ", type, ", name: ", name.c_str(),
-        ", vendor: ", vendor.c_str(), ", profile: ", profile.c_str());
-  }
-};
-
-class SYCLDevice : public LocalDevice {
- public:
-  SYCLDevice(const SessionOptions& options, const string& name,
-             Bytes memory_limit, const DeviceLocality& locality,
-             const string& physical_device_desc, SYCLAllocator* sycl_allocator,
-             Allocator* cpu_allocator, SYCLDeviceContext* ctx)
-      : LocalDevice(options, Device::BuildDeviceAttributes(
-                                 name, DEVICE_SYCL, memory_limit, locality,
-                                 physical_device_desc)),
-        cpu_allocator_(cpu_allocator),
-        sycl_allocator_(sycl_allocator),
-        device_context_(ctx) {
-    set_eigen_sycl_device(sycl_allocator->getSyclDevice());
-  }
-
-  ~SYCLDevice() override;
-
-  void Compute(OpKernel* op_kernel, OpKernelContext* context) override;
-  Allocator* GetAllocator(AllocatorAttributes attr) override;
-  Status MakeTensorFromProto(const TensorProto& tensor_proto,
-                             const AllocatorAttributes alloc_attrs,
-                             Tensor* tensor) override;
-
-  Status TryGetDeviceContext(DeviceContext** out_context) override;
-
-  Status Sync() override;
-
- private:
-  Allocator* cpu_allocator_;           // not owned
-  SYCLAllocator* sycl_allocator_;      // not owned
-  SYCLDeviceContext* device_context_;  // not owned
-};
-
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device_context.cc b/tensorflow/core/common_runtime/sycl/sycl_device_context.cc
deleted file mode 100644
index 1c868f5..0000000
--- a/tensorflow/core/common_runtime/sycl/sycl_device_context.cc
+++ /dev/null
@@ -1,181 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#if TENSORFLOW_USE_SYCL
-
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-
-#include "tensorflow/core/common_runtime/dma_helper.h"
-#include "tensorflow/core/common_runtime/sycl/sycl_device_context.h"
-
-namespace tensorflow {
-
-void SYCLDeviceContext::CopyCPUTensorToDevice(const Tensor *cpu_tensor,
-                                              Device *device,
-                                              Tensor *device_tensor,
-                                              StatusCallback done) const {
-  const int64 total_bytes = cpu_tensor->TotalBytes();
-  if (total_bytes > 0) {
-    const void *src_ptr = DMAHelper::base(cpu_tensor);
-    void *dst_ptr = DMAHelper::base(device_tensor);
-    switch (cpu_tensor->dtype()) {
-      case DT_FLOAT:
-        device->eigen_sycl_device()->memcpyHostToDevice(
-            static_cast<float *>(dst_ptr), static_cast<const float *>(src_ptr),
-            total_bytes);
-        break;
-      case DT_DOUBLE:
-        device->eigen_sycl_device()->memcpyHostToDevice(
-            static_cast<double *>(dst_ptr),
-            static_cast<const double *>(src_ptr), total_bytes);
-        break;
-      case DT_INT32:
-        device->eigen_sycl_device()->memcpyHostToDevice(
-            static_cast<int32 *>(dst_ptr), static_cast<const int32 *>(src_ptr),
-            total_bytes);
-        break;
-      case DT_INT64:
-        device->eigen_sycl_device()->memcpyHostToDevice(
-            static_cast<int64 *>(dst_ptr), static_cast<const int64 *>(src_ptr),
-            total_bytes);
-        break;
-      case DT_HALF:
-        device->eigen_sycl_device()->memcpyHostToDevice(
-            static_cast<Eigen::half *>(dst_ptr),
-            static_cast<const Eigen::half *>(src_ptr), total_bytes);
-        break;
-      case DT_COMPLEX64:
-        device->eigen_sycl_device()->memcpyHostToDevice(
-            static_cast<std::complex<float> *>(dst_ptr),
-            static_cast<const std::complex<float> *>(src_ptr), total_bytes);
-        break;
-      case DT_COMPLEX128:
-        device->eigen_sycl_device()->memcpyHostToDevice(
-            static_cast<std::complex<double> *>(dst_ptr),
-            static_cast<const std::complex<double> *>(src_ptr), total_bytes);
-        break;
-      case DT_INT8:
-        device->eigen_sycl_device()->memcpyHostToDevice(
-            static_cast<int8 *>(dst_ptr), static_cast<const int8 *>(src_ptr),
-            total_bytes);
-        break;
-      case DT_INT16:
-        device->eigen_sycl_device()->memcpyHostToDevice(
-            static_cast<int16 *>(dst_ptr), static_cast<const int16 *>(src_ptr),
-            total_bytes);
-        break;
-      case DT_UINT8:
-        device->eigen_sycl_device()->memcpyHostToDevice(
-            static_cast<uint8 *>(dst_ptr), static_cast<const uint8 *>(src_ptr),
-            total_bytes);
-        break;
-      case DT_UINT16:
-        device->eigen_sycl_device()->memcpyHostToDevice(
-            static_cast<uint16 *>(dst_ptr),
-            static_cast<const uint16 *>(src_ptr), total_bytes);
-        break;
-      case DT_BOOL:
-        device->eigen_sycl_device()->memcpyHostToDevice(
-            static_cast<bool *>(dst_ptr), static_cast<const bool *>(src_ptr),
-            total_bytes);
-        break;
-      default:
-        assert(false && "unsupported type");
-    }
-  }
-  device->eigen_sycl_device()->synchronize();
-  done(Status::OK());
-}
-
-void SYCLDeviceContext::CopyDeviceTensorToCPU(const Tensor *device_tensor,
-                                              StringPiece edge_name,
-                                              Device *device,
-                                              Tensor *cpu_tensor,
-                                              StatusCallback done) {
-  const int64 total_bytes = device_tensor->TotalBytes();
-  if (total_bytes > 0) {
-    const void *src_ptr = DMAHelper::base(device_tensor);
-    void *dst_ptr = DMAHelper::base(cpu_tensor);
-    switch (device_tensor->dtype()) {
-      case DT_FLOAT:
-        device->eigen_sycl_device()->memcpyDeviceToHost(
-            static_cast<float *>(dst_ptr), static_cast<const float *>(src_ptr),
-            total_bytes);
-        break;
-      case DT_DOUBLE:
-        device->eigen_sycl_device()->memcpyDeviceToHost(
-            static_cast<double *>(dst_ptr),
-            static_cast<const double *>(src_ptr), total_bytes);
-        break;
-      case DT_INT32:
-        device->eigen_sycl_device()->memcpyDeviceToHost(
-            static_cast<int32 *>(dst_ptr), static_cast<const int32 *>(src_ptr),
-            total_bytes);
-        break;
-      case DT_INT64:
-        device->eigen_sycl_device()->memcpyDeviceToHost(
-            static_cast<int64 *>(dst_ptr), static_cast<const int64 *>(src_ptr),
-            total_bytes);
-        break;
-      case DT_HALF:
-        device->eigen_sycl_device()->memcpyDeviceToHost(
-            static_cast<Eigen::half *>(dst_ptr),
-            static_cast<const Eigen::half *>(src_ptr), total_bytes);
-        break;
-      case DT_COMPLEX64:
-        device->eigen_sycl_device()->memcpyDeviceToHost(
-            static_cast<std::complex<float> *>(dst_ptr),
-            static_cast<const std::complex<float> *>(src_ptr), total_bytes);
-        break;
-      case DT_COMPLEX128:
-        device->eigen_sycl_device()->memcpyDeviceToHost(
-            static_cast<std::complex<double> *>(dst_ptr),
-            static_cast<const std::complex<double> *>(src_ptr), total_bytes);
-        break;
-      case DT_INT8:
-        device->eigen_sycl_device()->memcpyDeviceToHost(
-            static_cast<int8 *>(dst_ptr), static_cast<const int8 *>(src_ptr),
-            total_bytes);
-        break;
-      case DT_INT16:
-        device->eigen_sycl_device()->memcpyDeviceToHost(
-            static_cast<int16 *>(dst_ptr), static_cast<const int16 *>(src_ptr),
-            total_bytes);
-        break;
-      case DT_UINT8:
-        device->eigen_sycl_device()->memcpyDeviceToHost(
-            static_cast<uint8 *>(dst_ptr), static_cast<const uint8 *>(src_ptr),
-            total_bytes);
-        break;
-      case DT_UINT16:
-        device->eigen_sycl_device()->memcpyDeviceToHost(
-            static_cast<uint16 *>(dst_ptr),
-            static_cast<const uint16 *>(src_ptr), total_bytes);
-        break;
-      case DT_BOOL:
-        device->eigen_sycl_device()->memcpyDeviceToHost(
-            static_cast<bool *>(dst_ptr), static_cast<const bool *>(src_ptr),
-            total_bytes);
-        break;
-      default:
-        assert(false && "unsupported type");
-    }
-  }
-  device->eigen_sycl_device()->synchronize();
-  done(Status::OK());
-}
-
-}  // namespace tensorflow
-#endif  // TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device_context.h b/tensorflow/core/common_runtime/sycl/sycl_device_context.h
deleted file mode 100644
index 0f8f17b..0000000
--- a/tensorflow/core/common_runtime/sycl/sycl_device_context.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#if !TENSORFLOW_USE_SYCL
-#error This file must only be included when building TensorFlow with SYCL support
-#endif
-
-#ifndef TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_
-#define TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_
-
-#include "tensorflow/core/common_runtime/device.h"
-#include "tensorflow/core/framework/device_base.h"
-
-namespace tensorflow {
-
-class SYCLDeviceContext : public DeviceContext {
- public:
-  SYCLDeviceContext() {}
-
-  ~SYCLDeviceContext() override {}
-
-  void CopyCPUTensorToDevice(const Tensor *cpu_tensor, Device *device,
-                             Tensor *device_tensor,
-                             StatusCallback done) const override;
-
-  void CopyDeviceTensorToCPU(const Tensor *device_tensor, StringPiece edge_name,
-                             Device *device, Tensor *cpu_tensor,
-                             StatusCallback done) override;
-};
-
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_COMMON_RUNTIME_SYCL_SYCL_DEVICE_CONTEXT_H_
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc b/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc
deleted file mode 100644
index ca57545..0000000
--- a/tensorflow/core/common_runtime/sycl/sycl_device_factory.cc
+++ /dev/null
@@ -1,57 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#if TENSORFLOW_USE_SYCL
-
-#include "tensorflow/core/common_runtime/device_factory.h"
-#include "tensorflow/core/common_runtime/sycl/sycl_device.h"
-
-#include "tensorflow/core/common_runtime/sycl/sycl_util.h"
-
-namespace tensorflow {
-
-class SYCLDeviceFactory : public DeviceFactory {
- public:
-  Status ListPhysicalDevices(std::vector<string>* devices) override {
-    return tensorflow::Status::OK();
-  }
-
-  Status CreateDevices(const SessionOptions& options, const string& name_prefix,
-                       std::vector<std::unique_ptr<Device>>* devices) override {
-    auto syclInterface = GSYCLInterface::instance();
-
-    size_t n = 1;
-    auto iter = options.config.device_count().find("SYCL");
-    if (iter != options.config.device_count().end()) {
-      n = iter->second;
-    }
-
-    for (int i = 0; i < n; i++) {
-      string name = strings::StrCat(name_prefix, "/device:SYCL:", i);
-      devices->push_back(new SYCLDevice(
-          options, name, Bytes(256 << 20), DeviceLocality(),
-          syclInterface->GetShortDeviceDescription(i),
-          syclInterface->GetSYCLAllocator(i), syclInterface->GetCPUAllocator(i),
-          syclInterface->GetSYCLContext(i)));
-    }
-
-    return Status::OK();
-  }
-};
-
-REGISTER_LOCAL_DEVICE_FACTORY("SYCL", SYCLDeviceFactory, 200);
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/common_runtime/sycl/sycl_util.h b/tensorflow/core/common_runtime/sycl/sycl_util.h
deleted file mode 100644
index 3124ed2..0000000
--- a/tensorflow/core/common_runtime/sycl/sycl_util.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#if !TENSORFLOW_USE_SYCL
-#error This file must only be included when building TensorFlow with SYCL support
-#endif
-
-#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_UTIL_H_
-#define TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_UTIL_H_
-
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/common_runtime/device.h"
-// For DMA helper
-#include "tensorflow/core/common_runtime/dma_helper.h"
-#include "tensorflow/core/framework/tensor.h"
-
-namespace tensorflow {
-inline void const* GetBase(const Tensor* src) { return DMAHelper::base(src); }
-inline void* GetBase(Tensor* dst) { return DMAHelper::base(dst); }
-
-inline void SYCLmemcpy(Eigen::SyclDevice const& device,
-                       Tensor const& src_tensor, Tensor* dst_tensor) {
-  const size_t size = src_tensor.TotalBytes();
-  void* dst_ptr = GetBase(dst_tensor);
-  void const* src_ptr = GetBase(&src_tensor);
-
-#define COPY_WITH_TYPE(T) \
-  device.memcpy(dst_ptr, static_cast<T const*>(src_ptr), size);
-  switch (src_tensor.dtype()) {
-    case DT_COMPLEX128:
-      COPY_WITH_TYPE(cl::sycl::cl_ulong2);
-      break;
-    case DT_DOUBLE:
-    case DT_COMPLEX64:
-    case DT_INT64:
-      COPY_WITH_TYPE(cl::sycl::cl_ulong);
-      break;
-    case DT_FLOAT:
-    case DT_INT32:
-    case DT_QINT32:
-      COPY_WITH_TYPE(cl::sycl::cl_uint);
-      break;
-    case DT_INT16:
-    case DT_UINT16:
-    case DT_BFLOAT16:
-    case DT_QINT16:
-    case DT_QUINT16:
-    case DT_HALF:
-      COPY_WITH_TYPE(cl::sycl::cl_ushort);
-      break;
-    case DT_BOOL:
-      COPY_WITH_TYPE(bool);
-      break;
-    case DT_UINT8:
-    case DT_INT8:
-    case DT_QINT8:
-    case DT_QUINT8:
-      COPY_WITH_TYPE(cl::sycl::cl_uchar);
-      break;
-    default:
-      LOG(FATAL) << "Unknown data type " << src_tensor.dtype();
-      break;
-  }
-#undef COPY_WITH_TYPE
-}
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_UTIL_H_
diff --git a/tensorflow/core/debug/grpc_session_debug_test.cc b/tensorflow/core/debug/grpc_session_debug_test.cc
index 65ec1ef..cb722c6 100644
--- a/tensorflow/core/debug/grpc_session_debug_test.cc
+++ b/tensorflow/core/debug/grpc_session_debug_test.cc
@@ -283,12 +283,10 @@
 
         DeleteDumpDir();
       } else {
-        // CUDA and SYCL devices do not have an Identity op for strings
+        // The CUDA device does not have an Identity op for strings
         LOG(ERROR) << "Error: " << s;
         ASSERT_TRUE((a_dev.device_type() == DEVICE_GPU) ||
-                    (a_dev.device_type() == DEVICE_SYCL) ||
-                    (b_dev.device_type() == DEVICE_GPU) ||
-                    (b_dev.device_type() == DEVICE_SYCL));
+                    (b_dev.device_type() == DEVICE_GPU));
         ASSERT_FALSE(s.ok());
       }
     }
diff --git a/tensorflow/core/framework/device_base.h b/tensorflow/core/framework/device_base.h
index fabb0b2..c39cf43 100644
--- a/tensorflow/core/framework/device_base.h
+++ b/tensorflow/core/framework/device_base.h
@@ -32,9 +32,6 @@
 
 namespace Eigen {
 struct ThreadPoolDevice;
-#ifdef TENSORFLOW_USE_SYCL
-struct SyclDevice;
-#endif
 }  // end namespace Eigen
 
 namespace stream_executor {
@@ -176,9 +173,6 @@
   // Does not take ownership.
   void set_eigen_cpu_device(Eigen::ThreadPoolDevice* d);
 
-#ifdef TENSORFLOW_USE_SYCL
-  void set_eigen_sycl_device(Eigen::SyclDevice* d) { eigen_sycl_device_ = d; }
-#endif
 
   // Return the Allocator implementation to use based on the allocator
   // attributes requested.  See allocator.h for more details.
@@ -210,12 +204,6 @@
 
   virtual const Eigen::ThreadPoolDevice* eigen_cpu_device();
 
-#ifdef TENSORFLOW_USE_SYCL
-  virtual const Eigen::SyclDevice* eigen_sycl_device() const {
-    CHECK(eigen_sycl_device_ != nullptr);
-    return eigen_sycl_device_;
-  }
-#endif
 
   // Caller owns the return value. The OpKernelContext calls this even
   // for devices that do not implement an eigen_gpu_device. Overridden
@@ -290,9 +278,6 @@
   GpuDeviceInfo* gpu_device_info_ = nullptr;
   thread::ThreadPool* device_thread_pool_ = nullptr;
   std::vector<Eigen::ThreadPoolDevice*> eigen_cpu_devices_;
-#ifdef TENSORFLOW_USE_SYCL
-  Eigen::SyclDevice* eigen_sycl_device_ = nullptr;
-#endif
 };
 
 // Methods to create and check for Symbolic execution devices.
diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc
index 32bb220..b4ed7f9 100644
--- a/tensorflow/core/framework/op_kernel.cc
+++ b/tensorflow/core/framework/op_kernel.cc
@@ -114,10 +114,9 @@
   OP_REQUIRES_OK(context, CheckOpDeprecation(*props_->op_def,
                                              context->graph_def_version()));
 
-  // Kernels executing on GPU/SYCL tie very few resources on the CPU where the
+  // Kernels executing on GPU tie very few resources on the CPU where the
   // scheduler runs: we consider them as inexpensive.
-  expensive_ = context->device_type() != DeviceType(DEVICE_GPU) &&
-               context->device_type() != DeviceType(DEVICE_SYCL);
+  expensive_ = context->device_type() != DeviceType(DEVICE_GPU);
 }
 
 OpKernel::OpKernel(OpKernelConstruction* context, NodeDef&& custom_def,
@@ -141,10 +140,9 @@
   OP_REQUIRES_OK(context, CheckOpDeprecation(*props_->op_def,
                                              context->graph_def_version()));
 
-  // Kernels executing on GPU/SYCL tie very few resources on the CPU where the
+  // Kernels executing on GPU tie very few resources on the CPU where the
   // scheduler runs: we consider them as inexpensive.
-  expensive_ = context->device_type() != DeviceType(DEVICE_GPU) &&
-               context->device_type() != DeviceType(DEVICE_SYCL);
+  expensive_ = context->device_type() != DeviceType(DEVICE_GPU);
 }
 
 OpKernel::~OpKernel() {}
@@ -1722,12 +1720,6 @@
   return eigen_gpu_device();
 }
 
-#ifdef TENSORFLOW_USE_SYCL
-template <>
-const Eigen::SyclDevice& OpKernelContext::eigen_device() const {
-  return eigen_sycl_device();
-}
-#endif
 
 void OpKernelConstruction::CtxFailure(const Status& s) {
   VLOG(1) << s;
diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h
index 0116a1f..82cab5b 100644
--- a/tensorflow/core/framework/op_kernel.h
+++ b/tensorflow/core/framework/op_kernel.h
@@ -58,7 +58,6 @@
 namespace Eigen {
 struct ThreadPoolDevice;
 struct GpuDevice;
-struct SyclDevice;
 }  // end namespace Eigen
 
 namespace tensorflow {
@@ -1149,11 +1148,6 @@
   const Eigen::GpuDevice& eigen_gpu_device() const {
     return params_->eigen_gpu_device->device();
   }
-#ifdef TENSORFLOW_USE_SYCL
-  const Eigen::SyclDevice& eigen_sycl_device() const {
-    return *device()->eigen_sycl_device();
-  }
-#endif
   template <typename EigenDeviceType>
   const EigenDeviceType& eigen_device() const;
 
@@ -1336,10 +1330,6 @@
 template <>
 const Eigen::GpuDevice& OpKernelContext::eigen_device() const;
 
-#ifdef TENSORFLOW_USE_SYCL
-template <>
-const Eigen::SyclDevice& OpKernelContext::eigen_device() const;
-#endif
 
 // Register your OpKernel by specifying the Op's name, the device the
 // kernel runs on, any type attr constraints for this kernel, any
diff --git a/tensorflow/core/framework/register_types.h b/tensorflow/core/framework/register_types.h
index 0cf6536..d34b580 100644
--- a/tensorflow/core/framework/register_types.h
+++ b/tensorflow/core/framework/register_types.h
@@ -211,16 +211,4 @@
   TF_CALL_COMPLEX_TYPES(m)                 \
   TF_CALL_QUANTIZED_TYPES(m) TF_CALL_bool(m) TF_CALL_tstring(m)
 
-#ifdef TENSORFLOW_SYCL_NO_DOUBLE
-#define TF_CALL_SYCL_double(m)
-#else  // TENSORFLOW_SYCL_NO_DOUBLE
-#define TF_CALL_SYCL_double(m) TF_CALL_double(m)
-#endif  // TENSORFLOW_SYCL_NO_DOUBLE
-
-#ifdef __ANDROID_TYPES_SLIM__
-#define TF_CALL_SYCL_NUMBER_TYPES(m) TF_CALL_float(m)
-#else  // __ANDROID_TYPES_SLIM__
-#define TF_CALL_SYCL_NUMBER_TYPES(m) TF_CALL_float(m) TF_CALL_SYCL_double(m)
-#endif  // __ANDROID_TYPES_SLIM__
-
 #endif  // TENSORFLOW_CORE_FRAMEWORK_REGISTER_TYPES_H_
diff --git a/tensorflow/core/framework/register_types_traits.h b/tensorflow/core/framework/register_types_traits.h
index 6600217..ff6c9fb 100644
--- a/tensorflow/core/framework/register_types_traits.h
+++ b/tensorflow/core/framework/register_types_traits.h
@@ -21,9 +21,6 @@
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
 
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 #include "tensorflow/core/framework/numeric_types.h"
 #include "tensorflow/core/platform/types.h"
@@ -74,16 +71,6 @@
   typedef ::tensorflow::int8 type;
 };
 
-#ifdef TENSORFLOW_USE_SYCL
-template <>
-struct proxy_type_pod<SYCLDevice, 8> {
-  typedef double type;
-};
-template <>
-struct proxy_type_pod<SYCLDevice, 4> {
-  typedef float type;
-};
-#endif  // TENSORFLOW_USE_SYCL
 
 /// If POD we use proxy_type_pod, otherwise this maps to identity.
 template <typename Device, typename T>
@@ -101,10 +88,6 @@
 #define TF_CALL_GPU_PROXY_TYPES(m)                                    \
   TF_CALL_double(m) TF_CALL_float(m) TF_CALL_half(m) TF_CALL_int32(m) \
       TF_CALL_int8(m)
-#ifdef TENSORFLOW_USE_SYCL
-#define TF_CALL_SYCL_PROXY_TYPES(m) \
-  TF_CALL_double(m) TF_CALL_float(m) TF_CALL_int32(m)
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_FRAMEWORK_REGISTER_TYPES_TRAITS_H_
diff --git a/tensorflow/core/framework/types.cc b/tensorflow/core/framework/types.cc
index 294f7a2..457ba63 100644
--- a/tensorflow/core/framework/types.cc
+++ b/tensorflow/core/framework/types.cc
@@ -38,7 +38,6 @@
 const char* const DEVICE_DEFAULT = "DEFAULT";
 const char* const DEVICE_CPU = "CPU";
 const char* const DEVICE_GPU = "GPU";
-const char* const DEVICE_SYCL = "SYCL";
 const char* const DEVICE_TPU_SYSTEM = "TPU_SYSTEM";
 
 const std::string DeviceName<Eigen::ThreadPoolDevice>::value = DEVICE_CPU;
@@ -46,9 +45,6 @@
     (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
 const std::string DeviceName<Eigen::GpuDevice>::value = DEVICE_GPU;
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#ifdef TENSORFLOW_USE_SYCL
-const std::string DeviceName<Eigen::SyclDevice>::value = DEVICE_SYCL;
-#endif  // TENSORFLOW_USE_SYCL
 
 namespace {
 string DataTypeStringInternal(DataType dtype) {
diff --git a/tensorflow/core/framework/types.h b/tensorflow/core/framework/types.h
index 2b5f41b..2ba2594 100644
--- a/tensorflow/core/framework/types.h
+++ b/tensorflow/core/framework/types.h
@@ -74,7 +74,6 @@
 TF_EXPORT extern const char* const DEVICE_DEFAULT;     // "DEFAULT"
 TF_EXPORT extern const char* const DEVICE_CPU;         // "CPU"
 TF_EXPORT extern const char* const DEVICE_GPU;         // "GPU"
-TF_EXPORT extern const char* const DEVICE_SYCL;        // "SYCL"
 TF_EXPORT extern const char* const DEVICE_TPU_SYSTEM;  // "TPU_SYSTEM"
 
 template <typename Device>
@@ -93,12 +92,6 @@
 };
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-template <>
-struct DeviceName<Eigen::SyclDevice> {
-  static const std::string value;
-};
-#endif  // TENSORFLOW_USE_SYCL
 
 typedef gtl::InlinedVector<MemoryType, 4> MemoryTypeVector;
 typedef gtl::ArraySlice<MemoryType> MemoryTypeSlice;
diff --git a/tensorflow/core/framework/types_test.cc b/tensorflow/core/framework/types_test.cc
index 63fb3508..060e86e 100644
--- a/tensorflow/core/framework/types_test.cc
+++ b/tensorflow/core/framework/types_test.cc
@@ -26,7 +26,6 @@
 TEST(TypesTest, DeviceTypeName) {
   EXPECT_EQ("CPU", DeviceTypeString(DeviceType(DEVICE_CPU)));
   EXPECT_EQ("GPU", DeviceTypeString(DeviceType(DEVICE_GPU)));
-  EXPECT_EQ("SYCL", DeviceTypeString(DeviceType(DEVICE_SYCL)));
 }
 
 TEST(TypesTest, kDataTypeRefOffset) {
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 3047e00..9917b8e 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -15,7 +15,6 @@
     "tf_kernel_library",
     "tf_opts_nortti_if_lite_protos",
 )
-load("@local_config_sycl//sycl:build_defs.bzl", "if_sycl")
 load("//tensorflow/core/kernels/mlir_generated:build_defs.bzl", "if_mlir_generated_gpu_kernels_enabled")
 
 # buildifier: disable=same-origin-load
@@ -922,7 +921,7 @@
     "//tensorflow/core:lib_internal",
     "//tensorflow/core:protos_all_cc",
     "//third_party/eigen3",
-] + if_sycl(["//tensorflow/core/common_runtime/sycl:sycl_runtime"])
+]
 
 tf_kernel_library(
     name = "immutable_constant_op",
@@ -1240,7 +1239,6 @@
         "tile_functor_cpu_uint64.cc",
         "tile_functor_cpu_uint8.cc",
         "tile_functor_cpu_variant.cc",
-        "tile_functor_sycl.cc",
     ],
     hdrs = ["tile_functor.h"],
     gpu_srcs = [
@@ -4206,7 +4204,7 @@
         "maxpooling_op.h",
         "pooling_ops_3d.h",
         "pooling_ops_common.h",
-    ] + if_sycl(["pooling_ops_3d_sycl.h"]),
+    ],
     gpu_srcs = [
         "avgpooling_op.h",
         "avgpooling_op_gpu.cu.cc",
@@ -4872,7 +4870,7 @@
     "//tensorflow/core:framework",
     "//tensorflow/core:lib",
     "//tensorflow/core:lib_internal",
-] + if_sycl(["//tensorflow/core/common_runtime/sycl:sycl_runtime"])
+]
 
 tf_kernel_library(
     name = "count_up_to_op",
@@ -6393,7 +6391,6 @@
             "unicode_script_op.cc",
             # Ops that are inherently incompatible with Android (e.g. tied to x86 platform).
             "xsmm_*",
-            "cwise_ops_sycl_common.h",
             "nextafter_op.cc",
         ] + ANDROID_TEXTUAL_HDRS,
     ) + [
diff --git a/tensorflow/core/kernels/aggregate_ops.cc b/tensorflow/core/kernels/aggregate_ops.cc
index 79062ae..3b6f89a 100644
--- a/tensorflow/core/kernels/aggregate_ops.cc
+++ b/tensorflow/core/kernels/aggregate_ops.cc
@@ -28,9 +28,6 @@
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 #define REGISTER_ADDN(type, dev)                                   \
   REGISTER_KERNEL_BUILDER(                                         \
@@ -67,21 +64,6 @@
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_ADDN(float, SYCL);
-REGISTER_ADDN(double, SYCL);
-
-// A special GPU kernel for int32.
-// TODO(b/25387198): Also enable int32 in device memory. This kernel
-// registration requires all int32 inputs and outputs to be in host memory.
-REGISTER_KERNEL_BUILDER(
-    Name("AddN")
-        .Device(DEVICE_SYCL)
-        .TypeConstraint<int32>("T")
-        .HostMemory("inputs")
-        .HostMemory("sum"),
-    AddNOp<CPUDevice, int32, OpKernel, OpKernelConstruction, OpKernelContext>);
-#endif  // TENSORFLOW_USE_SYCL
 
 #undef REGISTER_ADDN
 
diff --git a/tensorflow/core/kernels/aggregate_ops_cpu.h b/tensorflow/core/kernels/aggregate_ops_cpu.h
index 3e87917..d64d306 100644
--- a/tensorflow/core/kernels/aggregate_ops_cpu.h
+++ b/tensorflow/core/kernels/aggregate_ops_cpu.h
@@ -23,9 +23,6 @@
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 namespace tensorflow {
 
@@ -137,114 +134,6 @@
   }
 };
 
-#ifdef TENSORFLOW_USE_SYCL
-// Partial specializations for a SYCLDevice, that uses the Eigen implementation
-// from AddNEigenImpl.
-template <typename T>
-struct Add2Functor<SYCLDevice, T> {
-  void operator()(const SYCLDevice& d, typename TTypes<T>::Flat out,
-                  typename TTypes<T>::ConstFlat in1,
-                  typename TTypes<T>::ConstFlat in2) {
-    Add2EigenImpl<SYCLDevice, T>::Compute(d, out, in1, in2);
-  }
-};
-template <typename T>
-struct Add3Functor<SYCLDevice, T> {
-  void operator()(const SYCLDevice& d, typename TTypes<T>::Flat out,
-                  typename TTypes<T>::ConstFlat in1,
-                  typename TTypes<T>::ConstFlat in2,
-                  typename TTypes<T>::ConstFlat in3) {
-    Add3EigenImpl<SYCLDevice, T>::Compute(d, out, in1, in2, in3);
-  }
-};
-template <typename T>
-struct Add4Functor<SYCLDevice, T> {
-  void operator()(const SYCLDevice& d, typename TTypes<T>::Flat out,
-                  typename TTypes<T>::ConstFlat in1,
-                  typename TTypes<T>::ConstFlat in2,
-                  typename TTypes<T>::ConstFlat in3,
-                  typename TTypes<T>::ConstFlat in4) {
-    Add4EigenImpl<SYCLDevice, T>::Compute(d, out, in1, in2, in3, in4);
-  }
-};
-template <typename T>
-struct Add5Functor<SYCLDevice, T> {
-  void operator()(const SYCLDevice& d, typename TTypes<T>::Flat out,
-                  typename TTypes<T>::ConstFlat in1,
-                  typename TTypes<T>::ConstFlat in2,
-                  typename TTypes<T>::ConstFlat in3,
-                  typename TTypes<T>::ConstFlat in4,
-                  typename TTypes<T>::ConstFlat in5) {
-    Add5EigenImpl<SYCLDevice, T>::Compute(d, out, in1, in2, in3, in4, in5);
-  }
-};
-template <typename T>
-struct Add6Functor<SYCLDevice, T> {
-  void operator()(const SYCLDevice& d, typename TTypes<T>::Flat out,
-                  typename TTypes<T>::ConstFlat in1,
-                  typename TTypes<T>::ConstFlat in2,
-                  typename TTypes<T>::ConstFlat in3,
-                  typename TTypes<T>::ConstFlat in4,
-                  typename TTypes<T>::ConstFlat in5,
-                  typename TTypes<T>::ConstFlat in6) {
-    Add6EigenImpl<SYCLDevice, T>::Compute(d, out, in1, in2, in3, in4, in5, in6);
-  }
-};
-template <typename T>
-struct Add7Functor<SYCLDevice, T> {
-  void operator()(const SYCLDevice& d, typename TTypes<T>::Flat out,
-                  typename TTypes<T>::ConstFlat in1,
-                  typename TTypes<T>::ConstFlat in2,
-                  typename TTypes<T>::ConstFlat in3,
-                  typename TTypes<T>::ConstFlat in4,
-                  typename TTypes<T>::ConstFlat in5,
-                  typename TTypes<T>::ConstFlat in6,
-                  typename TTypes<T>::ConstFlat in7) {
-    Add7EigenImpl<SYCLDevice, T>::Compute(d, out, in1, in2, in3, in4, in5, in6,
-                                          in7);
-  }
-};
-
-template <typename T>
-struct Add8Functor<SYCLDevice, T> {
-  void operator()(
-      const SYCLDevice& d, typename TTypes<T>::Flat out,
-      typename TTypes<T>::ConstFlat in1, typename TTypes<T>::ConstFlat in2,
-      typename TTypes<T>::ConstFlat in3, typename TTypes<T>::ConstFlat in4,
-      typename TTypes<T>::ConstFlat in5, typename TTypes<T>::ConstFlat in6,
-      typename TTypes<T>::ConstFlat in7, typename TTypes<T>::ConstFlat in8) {
-    Add8EigenImpl<SYCLDevice, T>::Compute(d, out, in1, in2, in3, in4, in5, in6,
-                                          in7, in8);
-  }
-};
-
-template <typename T>
-struct Add8pFunctor<SYCLDevice, T> {
-  void operator()(
-      const SYCLDevice& d, typename TTypes<T>::Flat out,
-      typename TTypes<T>::ConstFlat in1, typename TTypes<T>::ConstFlat in2,
-      typename TTypes<T>::ConstFlat in3, typename TTypes<T>::ConstFlat in4,
-      typename TTypes<T>::ConstFlat in5, typename TTypes<T>::ConstFlat in6,
-      typename TTypes<T>::ConstFlat in7, typename TTypes<T>::ConstFlat in8) {
-    Add8pEigenImpl<SYCLDevice, T>::Compute(d, out, in1, in2, in3, in4, in5, in6,
-                                           in7, in8);
-  }
-};
-
-template <typename T>
-struct Add9Functor<SYCLDevice, T> {
-  void operator()(
-      const SYCLDevice& d, typename TTypes<T>::Flat out,
-      typename TTypes<T>::ConstFlat in1, typename TTypes<T>::ConstFlat in2,
-      typename TTypes<T>::ConstFlat in3, typename TTypes<T>::ConstFlat in4,
-      typename TTypes<T>::ConstFlat in5, typename TTypes<T>::ConstFlat in6,
-      typename TTypes<T>::ConstFlat in7, typename TTypes<T>::ConstFlat in8,
-      typename TTypes<T>::ConstFlat in9) {
-    Add9EigenImpl<SYCLDevice, T>::Compute(d, out, in1, in2, in3, in4, in5, in6,
-                                          in7, in8, in9);
-  }
-};
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace functor
 
diff --git a/tensorflow/core/kernels/batch_matmul_op_impl.h b/tensorflow/core/kernels/batch_matmul_op_impl.h
index 89c438b..d6cc980 100644
--- a/tensorflow/core/kernels/batch_matmul_op_impl.h
+++ b/tensorflow/core/kernels/batch_matmul_op_impl.h
@@ -50,9 +50,6 @@
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 namespace {
 
@@ -632,48 +629,6 @@
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename Scalar>
-struct ParallelMatMulKernelSYCL {
-  static void Run(const OpKernelContext* context, const Tensor& in_x,
-                  const Tensor& in_y, bool adj_x, bool adj_y, bool trans_x,
-                  bool trans_y, const MatMulBCast& bcast, Tensor* out,
-                  int start, int limit) {
-    auto Tx = in_x.tensor<Scalar, 3>();
-    auto Ty = in_y.tensor<Scalar, 3>();
-    auto Tz = out->tensor<Scalar, 3>();
-    Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> contract_pairs;
-    contract_pairs[0] = ContractionDims(adj_x || trans_x, adj_y || trans_y);
-    auto d = context->eigen_sycl_device();
-
-    const bool should_bcast = bcast.IsBroadcastingRequired();
-    const auto& x_batch_indices = bcast.x_batch_indices();
-    const auto& y_batch_indices = bcast.y_batch_indices();
-    for (int64 i = start; i < limit; ++i) {
-      const int64 x_batch_index = should_bcast ? x_batch_indices[i] : i;
-      const int64 y_batch_index = should_bcast ? y_batch_indices[i] : i;
-
-      auto x = Tx.template chip<0>(x_batch_index);
-      auto y = Ty.template chip<0>(y_batch_index);
-      auto z = Tz.template chip<0>(i);
-      z.device(d) = x.contract(y, contract_pairs);
-    }
-  }
-};
-
-template <typename Scalar>
-struct LaunchBatchMatMul<SYCLDevice, Scalar> {
-  static void Launch(OpKernelContext* context, const Tensor& in_x,
-                     const Tensor& in_y, bool adj_x, bool adj_y, bool trans_x,
-                     bool trans_y, const MatMulBCast& bcast, Tensor* out) {
-    // Number of matrix multiplies i.e. size of the batch.
-    const int64 batch_size = bcast.output_batch_size();
-    ParallelMatMulKernelSYCL<Scalar>::Run(context, in_x, in_y, adj_x, adj_y,
-                                          trans_x, trans_y, bcast, out, 0,
-                                          batch_size);
-  }
-};
-#endif  // TENSORFLOW_USE_SYCL
 
 template <typename Device, typename Scalar>
 class BaseBatchMatMulOp : public OpKernel {
@@ -826,15 +781,6 @@
       Name("BatchMatMulV2").Device(DEVICE_GPU).TypeConstraint<TYPE>("T"), \
       BatchMatMulV2Op<GPUDevice, TYPE>)
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_BATCH_MATMUL_SYCL(TYPE)                                   \
-  REGISTER_KERNEL_BUILDER(                                                 \
-      Name("BatchMatMul").Device(DEVICE_SYCL).TypeConstraint<TYPE>("T"),   \
-      BatchMatMulOp<SYCLDevice, TYPE>);                                    \
-  REGISTER_KERNEL_BUILDER(                                                 \
-      Name("BatchMatMulV2").Device(DEVICE_SYCL).TypeConstraint<TYPE>("T"), \
-      BatchMatMulV2Op<SYCLDevice, TYPE>)
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_KERNELS_BATCH_MATMUL_OP_IMPL_H_
diff --git a/tensorflow/core/kernels/batch_matmul_op_real.cc b/tensorflow/core/kernels/batch_matmul_op_real.cc
index 075666c..30ec13e 100644
--- a/tensorflow/core/kernels/batch_matmul_op_real.cc
+++ b/tensorflow/core/kernels/batch_matmul_op_real.cc
@@ -34,8 +34,4 @@
 TF_CALL_half(REGISTER_BATCH_MATMUL_GPU);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-TF_CALL_float(REGISTER_BATCH_MATMUL_SYCL);
-TF_CALL_double(REGISTER_BATCH_MATMUL_SYCL);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/batch_norm_op.cc b/tensorflow/core/kernels/batch_norm_op.cc
index 4a03abb..f9783b5 100644
--- a/tensorflow/core/kernels/batch_norm_op.cc
+++ b/tensorflow/core/kernels/batch_norm_op.cc
@@ -28,9 +28,6 @@
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 template <typename Device, typename T>
 class BatchNormOp : public OpKernel {
@@ -208,17 +205,6 @@
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#if TENSORFLOW_USE_SYCL
-#define REGISTER_KERNEL(T)                                         \
-  REGISTER_KERNEL_BUILDER(Name("BatchNormWithGlobalNormalization") \
-                              .Device(DEVICE_SYCL)                 \
-                              .TypeConstraint<T>("T"),             \
-                          BatchNormOp<SYCLDevice, T>);
-
-TF_CALL_float(REGISTER_KERNEL);
-TF_CALL_double(REGISTER_KERNEL);
-#undef REGISTER_KERNEL
-#endif  // TENSORFLOW_USE_SYCL
 
 #define REGISTER_KERNEL(T)                                             \
   REGISTER_KERNEL_BUILDER(Name("BatchNormWithGlobalNormalizationGrad") \
@@ -267,17 +253,5 @@
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#if TENSORFLOW_USE_SYCL
-#define REGISTER_KERNEL(T)                                             \
-  REGISTER_KERNEL_BUILDER(Name("BatchNormWithGlobalNormalizationGrad") \
-                              .Device(DEVICE_SYCL)                     \
-                              .TypeConstraint<T>("T"),                 \
-                          BatchNormGradOp<SYCLDevice, T>);
-
-TF_CALL_float(REGISTER_KERNEL);
-TF_CALL_double(REGISTER_KERNEL);
-#undef REGISTER_KERNEL
-
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/batching_util/concat_split_util.h b/tensorflow/core/kernels/batching_util/concat_split_util.h
index fcd3b6e..77c4463 100644
--- a/tensorflow/core/kernels/batching_util/concat_split_util.h
+++ b/tensorflow/core/kernels/batching_util/concat_split_util.h
@@ -29,9 +29,6 @@
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 // Concatenates 'inputs' into a single tensor along the zeroth dimension.
 // Requires that all elements of 'inputs' have element type T. Writes to
diff --git a/tensorflow/core/kernels/bcast_ops.cc b/tensorflow/core/kernels/bcast_ops.cc
index fe185bd..f8a640b 100644
--- a/tensorflow/core/kernels/bcast_ops.cc
+++ b/tensorflow/core/kernels/bcast_ops.cc
@@ -145,22 +145,6 @@
                             .HostMemory("r0"),
                         BCastArgsOp<int64>);
 
-#if TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(Name("BroadcastArgs")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .HostMemory("s0")
-                            .HostMemory("s1")
-                            .HostMemory("r0"),
-                        BCastArgsOp<int32>);
-REGISTER_KERNEL_BUILDER(Name("BroadcastArgs")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int64>("T")
-                            .HostMemory("s0")
-                            .HostMemory("s1")
-                            .HostMemory("r0"),
-                        BCastArgsOp<int32>);
-#endif
 
 REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
                             .Device(DEVICE_CPU)
@@ -195,22 +179,4 @@
                             .HostMemory("r1"),
                         BCastGradArgsOp<int64>);
 
-#if TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .HostMemory("s0")
-                            .HostMemory("s1")
-                            .HostMemory("r0")
-                            .HostMemory("r1"),
-                        BCastGradArgsOp<int32>);
-REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int64>("T")
-                            .HostMemory("s0")
-                            .HostMemory("s1")
-                            .HostMemory("r0")
-                            .HostMemory("r1"),
-                        BCastGradArgsOp<int64>);
-#endif
 }  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/bias_op.cc b/tensorflow/core/kernels/bias_op.cc
index bf001cc..e3dd9ac 100644
--- a/tensorflow/core/kernels/bias_op.cc
+++ b/tensorflow/core/kernels/bias_op.cc
@@ -39,9 +39,6 @@
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 namespace {
 
@@ -216,20 +213,6 @@
 TF_CALL_NUMBER_TYPES(REGISTER_KERNEL);
 #undef REGISTER_KERNEL
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_KERNEL(type)                                          \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("BiasAdd").Device(DEVICE_SYCL).TypeConstraint<type>("T"),   \
-      BiasOp<SYCLDevice, type>);                                       \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("BiasAddV1").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
-      BiasOp<SYCLDevice, type>);
-
-TF_CALL_INTEGRAL_TYPES(REGISTER_KERNEL);
-REGISTER_KERNEL(float);
-REGISTER_KERNEL(double);
-#undef REGISTER_KERNEL
-#endif  // TENSORFLOW_USE_SYCL
 
 template <typename Device, typename T>
 class BiasGradOp : public OpKernel {
@@ -308,17 +291,6 @@
 TF_CALL_NUMBER_TYPES(REGISTER_KERNEL);
 #undef REGISTER_KERNEL
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_KERNEL(type)                                            \
-  REGISTER_KERNEL_BUILDER(                                               \
-      Name("BiasAddGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
-      BiasGradOp<SYCLDevice, type>);
-
-TF_CALL_INTEGRAL_TYPES(REGISTER_KERNEL);
-REGISTER_KERNEL(float);
-REGISTER_KERNEL(double);
-#undef REGISTER_KERNEL
-#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 template <typename T>
diff --git a/tensorflow/core/kernels/cast_op.cc b/tensorflow/core/kernels/cast_op.cc
index e8c428a..5f32291 100644
--- a/tensorflow/core/kernels/cast_op.cc
+++ b/tensorflow/core/kernels/cast_op.cc
@@ -34,9 +34,6 @@
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 #define CURRY_TYPES2(FN, arg0)   \
   FN(arg0, bool);                \
@@ -253,50 +250,6 @@
 #undef REGISTER_CAST_GPU
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-class SyclCastOp : public CastOpBase {
- public:
-  explicit SyclCastOp(OpKernelConstruction* ctx) : CastOpBase(ctx) {
-    OP_REQUIRES_OK(ctx, Prepare());
-  }
-
- private:
-  Status Prepare() {
-    if (external_src_dtype_ == external_dst_dtype_) {
-      work_ = nullptr;  // Identity
-      return Status::OK();
-    }
-    if (src_dtype_ == DT_BOOL) {
-      work_ = GetSyclCastFromBool(dst_dtype_);
-    } else if (src_dtype_ == DT_INT32) {
-      work_ = GetSyclCastFromInt32(dst_dtype_);
-    } else if (src_dtype_ == DT_INT64) {
-      work_ = GetSyclCastFromInt64(dst_dtype_);
-    } else if (src_dtype_ == DT_FLOAT) {
-      work_ = GetSyclCastFromFloat(dst_dtype_);
-    } else if (src_dtype_ == DT_DOUBLE) {
-      work_ = GetSyclCastFromDouble(dst_dtype_);
-    }
-
-    return work_ == nullptr ? Unimplemented() : Status::OK();
-  }
-};
-
-#define REGISTER_CAST_SYCL(srctype, dsttype)                   \
-  REGISTER_KERNEL_BUILDER(Name("Cast")                         \
-                              .TypeConstraint<srctype>("SrcT") \
-                              .TypeConstraint<dsttype>("DstT") \
-                              .Device(DEVICE_SYCL),            \
-                          SyclCastOp)
-CURRY_TYPES2(REGISTER_CAST_SYCL, bool);
-CURRY_TYPES2(REGISTER_CAST_SYCL, int32);
-CURRY_TYPES2(REGISTER_CAST_SYCL, int64);
-CURRY_TYPES2(REGISTER_CAST_SYCL, float);
-CURRY_TYPES2(REGISTER_CAST_SYCL, double);
-
-#undef REGISTER_CAST_SYCL
-
-#endif  // TENSORFLOW_USE_SYCL
 
 #undef CURRY_TYPES2
 
diff --git a/tensorflow/core/kernels/cast_op_impl.h b/tensorflow/core/kernels/cast_op_impl.h
index 266e2ce..536afb4 100644
--- a/tensorflow/core/kernels/cast_op_impl.h
+++ b/tensorflow/core/kernels/cast_op_impl.h
@@ -27,9 +27,6 @@
 
 CAST_FUNCTORS(Eigen::ThreadPoolDevice);
 
-#ifdef TENSORFLOW_USE_SYCL
-CAST_FUNCTORS(Eigen::SyclDevice);
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace functor
 
@@ -134,27 +131,6 @@
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-CastFunctorType GetSyclCastFromBool(DataType dst_dtype);
-
-CastFunctorType GetSyclCastFromUint8(DataType dst_dtype);
-
-CastFunctorType GetSyclCastFromUint16(DataType dst_dtype);
-
-CastFunctorType GetSyclCastFromUint32(DataType dst_dtype);
-
-CastFunctorType GetSyclCastFromUint64(DataType dst_dtype);
-
-CastFunctorType GetSyclCastFromInt16(DataType dst_dtype);
-
-CastFunctorType GetSyclCastFromInt32(DataType dst_dtype);
-
-CastFunctorType GetSyclCastFromInt64(DataType dst_dtype);
-
-CastFunctorType GetSyclCastFromFloat(DataType dst_dtype);
-
-CastFunctorType GetSyclCastFromDouble(DataType dst_dtype);
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/kernels/cast_op_impl_bool.cc b/tensorflow/core/kernels/cast_op_impl_bool.cc
index d08a45a..bbe3347 100644
--- a/tensorflow/core/kernels/cast_op_impl_bool.cc
+++ b/tensorflow/core/kernels/cast_op_impl_bool.cc
@@ -33,12 +33,5 @@
 }
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-CastFunctorType GetSyclCastFromBool(DataType dst_dtype) {
-  CURRY_TYPES3_NO_HALF(CAST_CASE, SYCLDevice, bool);
-  return nullptr;
-}
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cast_op_impl_double.cc b/tensorflow/core/kernels/cast_op_impl_double.cc
index 8637f3d..ad89766 100644
--- a/tensorflow/core/kernels/cast_op_impl_double.cc
+++ b/tensorflow/core/kernels/cast_op_impl_double.cc
@@ -33,12 +33,5 @@
 }
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-CastFunctorType GetSyclCastFromDouble(DataType dst_dtype) {
-  CURRY_TYPES3_NO_HALF(CAST_CASE, SYCLDevice, double);
-  return nullptr;
-}
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cast_op_impl_float.cc b/tensorflow/core/kernels/cast_op_impl_float.cc
index c2418e9..6989230 100644
--- a/tensorflow/core/kernels/cast_op_impl_float.cc
+++ b/tensorflow/core/kernels/cast_op_impl_float.cc
@@ -35,12 +35,5 @@
 }
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-CastFunctorType GetSyclCastFromFloat(DataType dst_dtype) {
-  CURRY_TYPES3_NO_HALF(CAST_CASE, SYCLDevice, float);
-  return nullptr;
-}
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cast_op_impl_int16.cc b/tensorflow/core/kernels/cast_op_impl_int16.cc
index b322006..04c9952 100644
--- a/tensorflow/core/kernels/cast_op_impl_int16.cc
+++ b/tensorflow/core/kernels/cast_op_impl_int16.cc
@@ -33,12 +33,5 @@
 }
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-CastFunctorType GetSyclCastFromInt16(DataType dst_dtype) {
-  CURRY_TYPES3_NO_HALF(CAST_CASE, SYCLDevice, int16);
-  return nullptr;
-}
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cast_op_impl_int32.cc b/tensorflow/core/kernels/cast_op_impl_int32.cc
index 154fd14..5c8b716 100644
--- a/tensorflow/core/kernels/cast_op_impl_int32.cc
+++ b/tensorflow/core/kernels/cast_op_impl_int32.cc
@@ -33,12 +33,5 @@
 }
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-CastFunctorType GetSyclCastFromInt32(DataType dst_dtype) {
-  CURRY_TYPES3_NO_HALF(CAST_CASE, SYCLDevice, int32);
-  return nullptr;
-}
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cast_op_impl_int64.cc b/tensorflow/core/kernels/cast_op_impl_int64.cc
index 1f4ebc9..0175231 100644
--- a/tensorflow/core/kernels/cast_op_impl_int64.cc
+++ b/tensorflow/core/kernels/cast_op_impl_int64.cc
@@ -33,12 +33,5 @@
 }
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-CastFunctorType GetSyclCastFromInt64(DataType dst_dtype) {
-  CURRY_TYPES3_NO_HALF(CAST_CASE, SYCLDevice, int64);
-  return nullptr;
-}
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cast_op_impl_int8.cc b/tensorflow/core/kernels/cast_op_impl_int8.cc
index 00a72ab..2aaac7a 100644
--- a/tensorflow/core/kernels/cast_op_impl_int8.cc
+++ b/tensorflow/core/kernels/cast_op_impl_int8.cc
@@ -33,12 +33,5 @@
 }
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-CastFunctorType GetSyclCastFromInt8(DataType dst_dtype) {
-  CURRY_TYPES3_NO_HALF(CAST_CASE, SYCLDevice, int8);
-  return nullptr;
-}
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cast_op_impl_uint16.cc b/tensorflow/core/kernels/cast_op_impl_uint16.cc
index 2981fe9..aca3c87 100644
--- a/tensorflow/core/kernels/cast_op_impl_uint16.cc
+++ b/tensorflow/core/kernels/cast_op_impl_uint16.cc
@@ -33,12 +33,5 @@
 }
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-CastFunctorType GetSyclCastFromUint16(DataType dst_dtype) {
-  CURRY_TYPES3_NO_HALF(CAST_CASE, SYCLDevice, uint16);
-  return nullptr;
-}
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cast_op_impl_uint32.cc b/tensorflow/core/kernels/cast_op_impl_uint32.cc
index b94540d..d41ac9d 100644
--- a/tensorflow/core/kernels/cast_op_impl_uint32.cc
+++ b/tensorflow/core/kernels/cast_op_impl_uint32.cc
@@ -33,12 +33,5 @@
 }
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-CastFunctorType GetSyclCastFromUint32(DataType dst_dtype) {
-  CURRY_TYPES3_NO_HALF(CAST_CASE, SYCLDevice, uint32);
-  return nullptr;
-}
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cast_op_impl_uint64.cc b/tensorflow/core/kernels/cast_op_impl_uint64.cc
index e04c0a2..d941f1d 100644
--- a/tensorflow/core/kernels/cast_op_impl_uint64.cc
+++ b/tensorflow/core/kernels/cast_op_impl_uint64.cc
@@ -33,12 +33,5 @@
 }
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-CastFunctorType GetSyclCastFromUint64(DataType dst_dtype) {
-  CURRY_TYPES3_NO_HALF(CAST_CASE, SYCLDevice, uint64);
-  return nullptr;
-}
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cast_op_impl_uint8.cc b/tensorflow/core/kernels/cast_op_impl_uint8.cc
index 20c5729..fbffeb5 100644
--- a/tensorflow/core/kernels/cast_op_impl_uint8.cc
+++ b/tensorflow/core/kernels/cast_op_impl_uint8.cc
@@ -33,12 +33,5 @@
 }
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-CastFunctorType GetSyclCastFromUint8(DataType dst_dtype) {
-  CURRY_TYPES3_NO_HALF(CAST_CASE, SYCLDevice, uint8);
-  return nullptr;
-}
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cast_op_test.cc b/tensorflow/core/kernels/cast_op_test.cc
index c8da7c5..11550be 100644
--- a/tensorflow/core/kernels/cast_op_test.cc
+++ b/tensorflow/core/kernels/cast_op_test.cc
@@ -138,9 +138,6 @@
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
   test::Benchmark("gpu", Cast<float, int64>(num)).Run(iters);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#ifdef TENSORFLOW_USE_SYCL
-  test::Benchmark("sycl", Cast<float, int64>(num)).Run(iters);
-#endif  // TENSORFLOW_USE_SYCL
 }
 BENCHMARK(BM_gpu_float_int64)->Arg(64 << 10)->Arg(32 << 20);
 
@@ -161,9 +158,6 @@
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
   test::Benchmark("gpu", Cast<bool, float>(num)).Run(iters);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#ifdef TENSORFLOW_USE_SYCL
-  test::Benchmark("sycl", Cast<bool, float>(num)).Run(iters);
-#endif  // TENSORFLOW_USE_SYCL
 }
 BENCHMARK(BM_gpu_bool_float)->Arg(64 << 10)->Arg(32 << 20);
 
diff --git a/tensorflow/core/kernels/concat_lib.h b/tensorflow/core/kernels/concat_lib.h
index 35da7af..2f813ae 100644
--- a/tensorflow/core/kernels/concat_lib.h
+++ b/tensorflow/core/kernels/concat_lib.h
@@ -73,14 +73,6 @@
 #undef REGISTER
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename T>
-void ConcatSYCL(
-    const Eigen::SyclDevice& d,
-    const std::vector<std::unique_ptr<typename TTypes<T, 2>::ConstMatrix>>&
-        inputs,
-    typename TTypes<T, 2>::Matrix* output);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_KERNELS_CONCAT_LIB_H_
diff --git a/tensorflow/core/kernels/concat_lib_cpu.cc b/tensorflow/core/kernels/concat_lib_cpu.cc
index 1dec589..d1748e0 100644
--- a/tensorflow/core/kernels/concat_lib_cpu.cc
+++ b/tensorflow/core/kernels/concat_lib_cpu.cc
@@ -127,24 +127,4 @@
         // !defined(SUPPORT_SELECTIVE_REGISTRATION) &&
         // !defined(__ANDROID_TYPES_FULL__)
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename T>
-void ConcatSYCL(
-    const Eigen::SyclDevice& d,
-    const std::vector<std::unique_ptr<typename TTypes<T, 2>::ConstMatrix>>&
-        inputs,
-    typename TTypes<T, 2>::Matrix* output) {
-  ConcatSYCLImpl<T>(d, inputs, sizeof(T) /* cost_per_unit */, MemCpyCopier<T>(),
-                    output);
-}
-#define REGISTER_SYCL(T)                                                       \
-  template void ConcatSYCL<T>(                                                 \
-      const Eigen::SyclDevice&,                                                \
-      const std::vector<std::unique_ptr<typename TTypes<T, 2>::ConstMatrix>>&, \
-      typename TTypes<T, 2>::Matrix* output);
-
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL)
-
-#undef REGISTER_SYCL
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/concat_lib_cpu.h b/tensorflow/core/kernels/concat_lib_cpu.h
index 6ee717a..34f9929 100644
--- a/tensorflow/core/kernels/concat_lib_cpu.h
+++ b/tensorflow/core/kernels/concat_lib_cpu.h
@@ -130,41 +130,6 @@
         cost_per_unit, work);
 }
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename T, typename ElementCopier>
-void ConcatSYCLImpl(
-    const Eigen::SyclDevice& d,
-    const std::vector<std::unique_ptr<typename TTypes<T, 2>::ConstMatrix>>&
-        inputs,
-    int64 cost_per_unit, ElementCopier copier,
-    typename TTypes<T, 2>::Matrix* output) {
-  size_t num_inputs = inputs.size();
-
-  std::vector<ptrdiff_t> sizes;
-  sizes.reserve(num_inputs);
-  int64 row_size = 0;
-  for (const auto& input : inputs) {
-    sizes.push_back(input->dimension(1));
-    row_size += sizes.back();
-  }
-
-  T* out = &(*output)(0, 0);
-  std::vector<const T*> inp;
-  inp.reserve(num_inputs);
-  for (const auto& input : inputs) {
-    inp.push_back(&(*input)(0, 0));
-  }
-  const int64 dim0 = output->dimension(0);
-  for (int64 i = 0; i < dim0; ++i) {
-    for (int64 j = 0; j < num_inputs; ++j) {
-      auto size = sizes[j];
-      d.memcpy(out, inp[j], size * sizeof(T));
-      out += size;
-      inp[j] += size;
-    }
-  }
-}
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_KERNELS_CONCAT_LIB_CPU_H_
diff --git a/tensorflow/core/kernels/concat_op.cc b/tensorflow/core/kernels/concat_op.cc
index d3f3a04..88ffe28 100644
--- a/tensorflow/core/kernels/concat_op.cc
+++ b/tensorflow/core/kernels/concat_op.cc
@@ -35,9 +35,6 @@
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 typedef Eigen::GpuDevice GPUDevice;
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 enum AxisArgumentName { NAME_IS_AXIS, NAME_IS_CONCAT_DIM };
 
@@ -168,12 +165,6 @@
         return;
       }
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#ifdef TENSORFLOW_USE_SYCL
-      if (std::is_same<Device, SYCLDevice>::value) {
-        ConcatSYCL<T>(c->eigen_sycl_device(), inputs_flat, &output_flat);
-        return;
-      }
-#endif  // TENSORFLOW_USE_SYCL
       ConcatCPU<T>(c->device(), inputs_flat, &output_flat);
     }
   }
@@ -251,38 +242,6 @@
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL(type)                              \
-  REGISTER_KERNEL_BUILDER(Name("Concat")                 \
-                              .Device(DEVICE_SYCL)       \
-                              .TypeConstraint<type>("T") \
-                              .HostMemory("concat_dim"), \
-                          ConcatOp<SYCLDevice, type>)    \
-  REGISTER_KERNEL_BUILDER(Name("ConcatV2")               \
-                              .Device(DEVICE_SYCL)       \
-                              .TypeConstraint<type>("T") \
-                              .HostMemory("axis"),       \
-                          ConcatV2Op<SYCLDevice, type>)
-
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL);
-
-REGISTER_KERNEL_BUILDER(Name("Concat")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .HostMemory("concat_dim")
-                            .HostMemory("values")
-                            .HostMemory("output"),
-                        ConcatOp<CPUDevice, int32>);
-REGISTER_KERNEL_BUILDER(Name("ConcatV2")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .HostMemory("values")
-                            .HostMemory("axis")
-                            .HostMemory("output"),
-                        ConcatV2Op<CPUDevice, int32>);
-
-#undef REGISTER_SYCL
-#endif  // TENSORFLOW_USE_SYCL
 
 class ConcatOffsetOp : public OpKernel {
  public:
@@ -370,12 +329,4 @@
                             .HostMemory("offset"),
                         ConcatOffsetOp);
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(Name("ConcatOffset")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("concat_dim")
-                            .HostMemory("shape")
-                            .HostMemory("offset"),
-                        ConcatOffsetOp);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc
index 682da43..f9b382c 100644
--- a/tensorflow/core/kernels/constant_op.cc
+++ b/tensorflow/core/kernels/constant_op.cc
@@ -39,9 +39,6 @@
 #include "tensorflow/core/kernels/fill_functor.h"
 #include "tensorflow/core/platform/macros.h"
 
-#ifdef TENSORFLOW_USE_SYCL
-#include "tensorflow/core/common_runtime/sycl/sycl_util.h"
-#endif  // TENSORFLOW_USE_SYCL
 
 namespace tensorflow {
 
@@ -127,33 +124,9 @@
 #undef REGISTER_KERNEL
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(D, TYPE)                                 \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("Const").Device(DEVICE_##D).TypeConstraint<TYPE>("dtype"), \
-      ConstantOp);
-REGISTER_SYCL_KERNEL(SYCL, float);
-REGISTER_SYCL_KERNEL(SYCL, double);
-REGISTER_SYCL_KERNEL(SYCL, uint8);
-REGISTER_SYCL_KERNEL(SYCL, int8);
-REGISTER_SYCL_KERNEL(SYCL, qint8);
-REGISTER_SYCL_KERNEL(SYCL, uint16);
-REGISTER_SYCL_KERNEL(SYCL, int16);
-REGISTER_SYCL_KERNEL(SYCL, qint16);
-REGISTER_SYCL_KERNEL(SYCL, quint16);
-REGISTER_SYCL_KERNEL(SYCL, uint32);
-REGISTER_SYCL_KERNEL(SYCL, qint32);
-REGISTER_SYCL_KERNEL(SYCL, int64);
-REGISTER_SYCL_KERNEL(SYCL, uint64);
-REGISTER_SYCL_KERNEL(SYCL, bool);
-#undef REGISTER_SYCL_KERNEL
-#endif
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 template <typename Device, typename T, typename Index>
 class FillOp : public OpKernel {
@@ -216,25 +189,6 @@
 REGISTER_KERNEL(CPU, qint16);
 #undef REGISTER_CPU_KERNEL
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL(SYCL, float);
-REGISTER_KERNEL(SYCL, double);
-REGISTER_KERNEL(SYCL, uint8);
-REGISTER_KERNEL(SYCL, int8);
-REGISTER_KERNEL(SYCL, uint16);
-REGISTER_KERNEL(SYCL, int16);
-REGISTER_KERNEL(SYCL, int64);
-
-REGISTER_KERNEL_BUILDER(Name("Fill")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .TypeConstraint<int32>("index_type")
-                            .HostMemory("dims")
-                            .HostMemory("value")
-                            .HostMemory("output"),
-                        FillOp<CPUDevice, int32, int32>);
-#undef REGISTER_KERNEL_SYCL
-#endif  // TENSORFLOW_USE_SYCL
 
 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
     (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
@@ -309,17 +263,6 @@
 REGISTER_CPU(Variant);
 #undef REGISTER_CPU
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL(bool, SYCL);
-REGISTER_KERNEL(float, SYCL);
-REGISTER_KERNEL(double, SYCL);
-REGISTER_KERNEL(int64, SYCL);
-REGISTER_KERNEL_BUILDER(Name("ZerosLike")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .HostMemory("y"),
-                        ZerosLikeOp<CPUDevice, int32>);
-#endif  // TENSORFLOW_USE_SYCL
 
 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
     (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
@@ -365,15 +308,6 @@
 TF_CALL_POD_TYPES(REGISTER_CPU);
 #undef REGISTER_CPU
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL(float, SYCL);
-REGISTER_KERNEL(bool, SYCL);
-REGISTER_KERNEL_BUILDER(Name("OnesLike")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .HostMemory("y"),
-                        OnesLikeOp<CPUDevice, int32>);
-#endif  // TENSORFLOW_USE_SYCL
 
 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
     (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
diff --git a/tensorflow/core/kernels/control_flow_ops.cc b/tensorflow/core/kernels/control_flow_ops.cc
index f886235..64b1390 100644
--- a/tensorflow/core/kernels/control_flow_ops.cc
+++ b/tensorflow/core/kernels/control_flow_ops.cc
@@ -156,57 +156,6 @@
 #undef REGISTER_GPU_HOST_KERNEL
 #undef REGISTER_GPU_HOST_REF_KERNEL
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_SWITCH(type)                        \
-  REGISTER_KERNEL_BUILDER(Name("Switch")                  \
-                              .Device(DEVICE_SYCL)        \
-                              .HostMemory("pred")         \
-                              .TypeConstraint<type>("T"), \
-                          SwitchOp)
-TF_CALL_REAL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_SWITCH);
-
-#define REGISTER_SYCL_REF_SWITCH(type)                    \
-  REGISTER_KERNEL_BUILDER(Name("RefSwitch")               \
-                              .Device(DEVICE_SYCL)        \
-                              .HostMemory("pred")         \
-                              .TypeConstraint<type>("T"), \
-                          SwitchOp)
-TF_CALL_REAL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_REF_SWITCH);
-
-#undef REGISTER_SYCL_SWITCH
-#undef REGISTER_SYCL_REF_SWITCH
-
-#define REGISTER_SYCL_HOST_KERNEL(type)                   \
-  REGISTER_KERNEL_BUILDER(Name("Switch")                  \
-                              .Device(DEVICE_SYCL)        \
-                              .HostMemory("data")         \
-                              .HostMemory("pred")         \
-                              .HostMemory("output_false") \
-                              .HostMemory("output_true")  \
-                              .TypeConstraint<type>("T"), \
-                          SwitchOp)
-
-REGISTER_SYCL_HOST_KERNEL(bool);
-REGISTER_SYCL_HOST_KERNEL(tstring);
-REGISTER_SYCL_HOST_KERNEL(int32);
-
-#define REGISTER_SYCL_HOST_REF_KERNEL(type)               \
-  REGISTER_KERNEL_BUILDER(Name("RefSwitch")               \
-                              .Device(DEVICE_SYCL)        \
-                              .HostMemory("data")         \
-                              .HostMemory("pred")         \
-                              .HostMemory("output_false") \
-                              .HostMemory("output_true")  \
-                              .TypeConstraint<type>("T"), \
-                          SwitchOp)
-
-REGISTER_SYCL_HOST_REF_KERNEL(int32);
-REGISTER_SYCL_HOST_REF_KERNEL(bool);
-REGISTER_SYCL_HOST_REF_KERNEL(tstring);
-
-#undef REGISTER_SYCL_HOST_KERNEL
-#undef REGISTER_SYCL_HOST_REF_KERNEL
-#endif  // TENSORFLOW_USE_SYCL
 
 class RefSelectOp : public OpKernel {
  public:
@@ -316,28 +265,6 @@
 #undef REGISTER_GPU_KERNEL
 #undef REGISTER_GPU_REF_KERNEL
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(type)                        \
-  REGISTER_KERNEL_BUILDER(Name("Merge")                   \
-                              .Device(DEVICE_SYCL)        \
-                              .TypeConstraint<type>("T")  \
-                              .HostMemory("value_index"), \
-                          MergeOp);
-REGISTER_SYCL_KERNEL(bool);
-TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
-
-#define REGISTER_SYCL_REF_KERNEL(type)                    \
-  REGISTER_KERNEL_BUILDER(Name("RefMerge")                \
-                              .Device(DEVICE_SYCL)        \
-                              .TypeConstraint<type>("T")  \
-                              .HostMemory("value_index"), \
-                          MergeOp);
-REGISTER_SYCL_REF_KERNEL(bool);
-TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_REF_KERNEL);
-
-#undef REGISTER_SYCL_KERNEL
-#undef REGISTER_SYCL_REF_KERNEL
-#endif  // TENSORFLOW_USE_SYCL
 
 // Special GPU kernels for int32 and string.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
@@ -364,29 +291,6 @@
 
 #undef REGISTER_GPU_HOST_KERNEL
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_HOST_KERNEL(type)                   \
-  REGISTER_KERNEL_BUILDER(Name("Merge")                   \
-                              .Device(DEVICE_SYCL)        \
-                              .HostMemory("inputs")       \
-                              .HostMemory("output")       \
-                              .HostMemory("value_index")  \
-                              .TypeConstraint<type>("T"), \
-                          MergeOp);                       \
-  REGISTER_KERNEL_BUILDER(Name("RefMerge")                \
-                              .Device(DEVICE_SYCL)        \
-                              .HostMemory("inputs")       \
-                              .HostMemory("output")       \
-                              .HostMemory("value_index")  \
-                              .TypeConstraint<type>("T"), \
-                          MergeOp)
-
-REGISTER_SYCL_HOST_KERNEL(int32);
-REGISTER_SYCL_HOST_KERNEL(tstring);
-REGISTER_SYCL_HOST_KERNEL(ResourceHandle);
-
-#undef REGISTER_SYCL_HOST_KERNEL
-#endif  // TENSORFLOW_USE_SYCL
 
 void EnterOp::Compute(OpKernelContext* context) {
   if (IsRefType(context->input_dtype(0))) {
@@ -416,46 +320,6 @@
 #undef REGISTER_GPU_KERNEL
 #undef REGISTER_GPU_REF_KERNEL
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(type) \
-  REGISTER_KERNEL_BUILDER(         \
-      Name("Enter").Device(DEVICE_SYCL).TypeConstraint<type>("T"), EnterOp)
-REGISTER_SYCL_KERNEL(bool);
-TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
-
-#define REGISTER_SYCL_REF_KERNEL(type) \
-  REGISTER_KERNEL_BUILDER(             \
-      Name("RefEnter").Device(DEVICE_SYCL).TypeConstraint<type>("T"), EnterOp)
-REGISTER_SYCL_REF_KERNEL(bool);
-TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_REF_KERNEL);
-
-#undef REGISTER_SYCL_KERNEL
-#undef REGISTER_SYCL_REF_KERNEL
-#define REGISTER_SYCL_HOST_KERNEL(type)                   \
-  REGISTER_KERNEL_BUILDER(Name("Enter")                   \
-                              .Device(DEVICE_SYCL)        \
-                              .HostMemory("data")         \
-                              .HostMemory("output")       \
-                              .TypeConstraint<type>("T"), \
-                          EnterOp)
-
-#define REGISTER_SYCL_HOST_REF_KERNEL(type)               \
-  REGISTER_KERNEL_BUILDER(Name("RefEnter")                \
-                              .Device(DEVICE_SYCL)        \
-                              .HostMemory("data")         \
-                              .HostMemory("output")       \
-                              .TypeConstraint<type>("T"), \
-                          EnterOp)
-
-REGISTER_SYCL_HOST_KERNEL(int32);
-REGISTER_SYCL_HOST_REF_KERNEL(int32);
-REGISTER_SYCL_HOST_KERNEL(tstring);
-REGISTER_SYCL_HOST_REF_KERNEL(tstring);
-REGISTER_SYCL_HOST_KERNEL(ResourceHandle);
-
-#undef REGISTER_SYCL_HOST_KERNEL
-#undef REGISTER_SYCL_HOST_REF_KERNEL
-#endif  // TENSORFLOW_USE_SYCL
 
 // Special GPU kernels for int32 and string.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
@@ -513,36 +377,6 @@
 #undef REGISTER_GPU_KERNEL
 #undef REGISTER_GPU_REF_KERNEL
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(type)                                         \
-  REGISTER_KERNEL_BUILDER(                                                 \
-      Name("Exit").Device(DEVICE_SYCL).TypeConstraint<type>("T"), ExitOp); \
-  REGISTER_KERNEL_BUILDER(                                                 \
-      Name("RefExit").Device(DEVICE_SYCL).TypeConstraint<type>("T"), ExitOp);
-REGISTER_SYCL_KERNEL(bool);
-TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
-
-#undef REGISTER_SYCL_KERNEL
-#undef REGISTER_SYCL_REF_KERNEL
-
-#define REGISTER_SYCL_HOST_KERNEL(type)                   \
-  REGISTER_KERNEL_BUILDER(Name("Exit")                    \
-                              .Device(DEVICE_SYCL)        \
-                              .HostMemory("data")         \
-                              .HostMemory("output")       \
-                              .TypeConstraint<type>("T"), \
-                          ExitOp);                        \
-  REGISTER_KERNEL_BUILDER(Name("RefExit")                 \
-                              .Device(DEVICE_SYCL)        \
-                              .HostMemory("data")         \
-                              .HostMemory("output")       \
-                              .TypeConstraint<type>("T"), \
-                          ExitOp)
-
-REGISTER_SYCL_HOST_KERNEL(int32);
-REGISTER_SYCL_HOST_KERNEL(tstring);
-#undef REGISTER_SYCL_HOST_KERNEL
-#endif  // TENSORFLOW_USE_SYCL
 
 // Special GPU kernels for int32 and string.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
@@ -619,37 +453,6 @@
 
 #undef REGISTER_GPU_HOST_KERNEL
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(type)                                            \
-  REGISTER_KERNEL_BUILDER(                                                    \
-      Name("NextIteration").Device(DEVICE_SYCL).TypeConstraint<type>("T"),    \
-      NextIterationOp);                                                       \
-  REGISTER_KERNEL_BUILDER(                                                    \
-      Name("RefNextIteration").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
-      NextIterationOp)
-REGISTER_SYCL_KERNEL(bool);
-TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
-
-#undef REGISTER_SYCL_KERNEL
-
-#define REGISTER_SYCL_HOST_KERNEL(type)                   \
-  REGISTER_KERNEL_BUILDER(Name("NextIteration")           \
-                              .Device(DEVICE_SYCL)        \
-                              .HostMemory("data")         \
-                              .HostMemory("output")       \
-                              .TypeConstraint<type>("T"), \
-                          NextIterationOp);               \
-  REGISTER_KERNEL_BUILDER(Name("RefNextIteration")        \
-                              .Device(DEVICE_SYCL)        \
-                              .HostMemory("data")         \
-                              .HostMemory("output")       \
-                              .TypeConstraint<type>("T"), \
-                          NextIterationOp)
-
-REGISTER_SYCL_HOST_KERNEL(int32);
-REGISTER_SYCL_HOST_KERNEL(tstring);
-#undef REGISTER_SYCL_HOST_KERNEL
-#endif  // TENSORFLOW_USE_SYCL
 
 LoopCondOp::LoopCondOp(OpKernelConstruction* context) : OpKernel(context) {}
 LoopCondOp::~LoopCondOp() = default;
diff --git a/tensorflow/core/kernels/cwise_op_abs.cc b/tensorflow/core/kernels/cwise_op_abs.cc
index d3b09f7..20befa1 100644
--- a/tensorflow/core/kernels/cwise_op_abs.cc
+++ b/tensorflow/core/kernels/cwise_op_abs.cc
@@ -39,13 +39,4 @@
 #endif
 #endif
 
-#if TENSORFLOW_USE_SYCL
-REGISTER3(UnaryOp, SYCL, "Abs", functor::abs, float, double, int64);
-REGISTER_KERNEL_BUILDER(Name("Abs")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("x")
-                            .HostMemory("y")
-                            .TypeConstraint<int32>("T"),
-                        UnaryOp<CPUDevice, functor::abs<int32>>);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_acos.cc b/tensorflow/core/kernels/cwise_op_acos.cc
index 8449f06..7cd01cf 100644
--- a/tensorflow/core/kernels/cwise_op_acos.cc
+++ b/tensorflow/core/kernels/cwise_op_acos.cc
@@ -22,7 +22,4 @@
 REGISTER2(UnaryOp, GPU, "Acos", functor::acos, float, double);
 #endif
 
-#if TENSORFLOW_USE_SYCL
-REGISTER2(UnaryOp, SYCL, "Acos", functor::acos, float, double);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_acosh.cc b/tensorflow/core/kernels/cwise_op_acosh.cc
index 06aee86..05acf66 100644
--- a/tensorflow/core/kernels/cwise_op_acosh.cc
+++ b/tensorflow/core/kernels/cwise_op_acosh.cc
@@ -20,9 +20,6 @@
 REGISTER4(UnaryOp, CPU, "Acosh", functor::acosh, float, double, complex64,
           complex128);
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(UnaryOp, SYCL, "Acosh", functor::acosh, float, double);
-#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER2(UnaryOp, GPU, "Acosh", functor::acosh, float, double);
diff --git a/tensorflow/core/kernels/cwise_op_add_1.cc b/tensorflow/core/kernels/cwise_op_add_1.cc
index 608fe3f..0af4154 100644
--- a/tensorflow/core/kernels/cwise_op_add_1.cc
+++ b/tensorflow/core/kernels/cwise_op_add_1.cc
@@ -44,26 +44,4 @@
                         BinaryOp<CPUDevice, functor::add<int32>>);
 #endif
 
-#if TENSORFLOW_USE_SYCL
-#define REGISTER_KERNEL(type)                          \
-  REGISTER(BinaryOp, SYCL, "Add", functor::add, type); \
-  REGISTER(BinaryOp, SYCL, "AddV2", functor::add, type);
-
-TF_CALL_SYCL_NUMBER_TYPES(REGISTER_KERNEL);
-
-REGISTER_KERNEL_BUILDER(Name("Add")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("x")
-                            .HostMemory("y")
-                            .HostMemory("z")
-                            .TypeConstraint<int32>("T"),
-                        BinaryOp<CPUDevice, functor::add<int32>>);
-REGISTER_KERNEL_BUILDER(Name("AddV2")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("x")
-                            .HostMemory("y")
-                            .HostMemory("z")
-                            .TypeConstraint<int32>("T"),
-                        BinaryOp<CPUDevice, functor::add<int32>>);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_asin.cc b/tensorflow/core/kernels/cwise_op_asin.cc
index 9089dfc..2471f8d 100644
--- a/tensorflow/core/kernels/cwise_op_asin.cc
+++ b/tensorflow/core/kernels/cwise_op_asin.cc
@@ -22,7 +22,4 @@
 REGISTER2(UnaryOp, GPU, "Asin", functor::asin, float, double);
 #endif
 
-#if TENSORFLOW_USE_SYCL
-REGISTER2(UnaryOp, SYCL, "Asin", functor::asin, float, double);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_asinh.cc b/tensorflow/core/kernels/cwise_op_asinh.cc
index 9801b31..d096deb 100644
--- a/tensorflow/core/kernels/cwise_op_asinh.cc
+++ b/tensorflow/core/kernels/cwise_op_asinh.cc
@@ -20,9 +20,6 @@
 REGISTER4(UnaryOp, CPU, "Asinh", functor::asinh, float, double, complex64,
           complex128);
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(UnaryOp, SYCL, "Asinh", functor::asinh, float, double);
-#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER2(UnaryOp, GPU, "Asinh", functor::asinh, float, double);
diff --git a/tensorflow/core/kernels/cwise_op_atan.cc b/tensorflow/core/kernels/cwise_op_atan.cc
index d8f84f0..07b0305 100644
--- a/tensorflow/core/kernels/cwise_op_atan.cc
+++ b/tensorflow/core/kernels/cwise_op_atan.cc
@@ -22,7 +22,4 @@
 REGISTER2(UnaryOp, GPU, "Atan", functor::atan, float, double);
 #endif
 
-#if TENSORFLOW_USE_SYCL
-REGISTER2(UnaryOp, SYCL, "Atan", functor::atan, float, double);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_atanh.cc b/tensorflow/core/kernels/cwise_op_atanh.cc
index e58adb5..2404cd1 100644
--- a/tensorflow/core/kernels/cwise_op_atanh.cc
+++ b/tensorflow/core/kernels/cwise_op_atanh.cc
@@ -20,9 +20,6 @@
 REGISTER4(UnaryOp, CPU, "Atanh", functor::atanh, float, double, complex64,
           complex128);
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(UnaryOp, SYCL, "Atanh", functor::atanh, float, double);
-#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER2(UnaryOp, GPU, "Atanh", functor::atanh, float, double);
diff --git a/tensorflow/core/kernels/cwise_op_bitwise_and.cc b/tensorflow/core/kernels/cwise_op_bitwise_and.cc
index 49d5044..5e557e7 100644
--- a/tensorflow/core/kernels/cwise_op_bitwise_and.cc
+++ b/tensorflow/core/kernels/cwise_op_bitwise_and.cc
@@ -19,22 +19,6 @@
 REGISTER8(BinaryOp, CPU, "BitwiseAnd", functor::bitwise_and, int8, int16, int32,
           int64, uint8, uint16, uint32, uint64);
 
-#if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(TYPE)                                      \
-  REGISTER_KERNEL_BUILDER(                                              \
-      Name("BitwiseAnd").Device(DEVICE_SYCL).TypeConstraint<TYPE>("T"), \
-      BinaryOp<SYCLDevice, functor::bitwise_and<TYPE>>);
-REGISTER_SYCL_KERNEL(int8);
-REGISTER_SYCL_KERNEL(int16);
-REGISTER_SYCL_KERNEL(int32);
-REGISTER_SYCL_KERNEL(int64);
-REGISTER_SYCL_KERNEL(uint8);
-REGISTER_SYCL_KERNEL(uint16);
-REGISTER_SYCL_KERNEL(uint32);
-REGISTER_SYCL_KERNEL(uint64);
-#undef REGISTER_SYCL_KERNEL
-
-#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER8(BinaryOp, GPU, "BitwiseAnd", functor::bitwise_and, int8, int16, int32,
diff --git a/tensorflow/core/kernels/cwise_op_bitwise_or.cc b/tensorflow/core/kernels/cwise_op_bitwise_or.cc
index f448968..3b371f9 100644
--- a/tensorflow/core/kernels/cwise_op_bitwise_or.cc
+++ b/tensorflow/core/kernels/cwise_op_bitwise_or.cc
@@ -19,22 +19,6 @@
 REGISTER8(BinaryOp, CPU, "BitwiseOr", functor::bitwise_or, int8, int16, int32,
           int64, uint8, uint16, uint32, uint64);
 
-#if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(TYPE)                                     \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("BitwiseOr").Device(DEVICE_SYCL).TypeConstraint<TYPE>("T"), \
-      BinaryOp<SYCLDevice, functor::bitwise_or<TYPE>>);
-REGISTER_SYCL_KERNEL(int8);
-REGISTER_SYCL_KERNEL(int16);
-REGISTER_SYCL_KERNEL(int32);
-REGISTER_SYCL_KERNEL(int64);
-REGISTER_SYCL_KERNEL(uint8);
-REGISTER_SYCL_KERNEL(uint16);
-REGISTER_SYCL_KERNEL(uint32);
-REGISTER_SYCL_KERNEL(uint64);
-#undef REGISTER_SYCL_KERNEL
-
-#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER8(BinaryOp, GPU, "BitwiseOr", functor::bitwise_or, int8, int16, int32,
diff --git a/tensorflow/core/kernels/cwise_op_bitwise_xor.cc b/tensorflow/core/kernels/cwise_op_bitwise_xor.cc
index b4387c2..bb3c727 100644
--- a/tensorflow/core/kernels/cwise_op_bitwise_xor.cc
+++ b/tensorflow/core/kernels/cwise_op_bitwise_xor.cc
@@ -19,22 +19,6 @@
 REGISTER8(BinaryOp, CPU, "BitwiseXor", functor::bitwise_xor, int8, int16, int32,
           int64, uint8, uint16, uint32, uint64);
 
-#if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(TYPE)                                      \
-  REGISTER_KERNEL_BUILDER(                                              \
-      Name("BitwiseXor").Device(DEVICE_SYCL).TypeConstraint<TYPE>("T"), \
-      BinaryOp<SYCLDevice, functor::bitwise_xor<TYPE>>);
-REGISTER_SYCL_KERNEL(int8);
-REGISTER_SYCL_KERNEL(int16);
-REGISTER_SYCL_KERNEL(int32);
-REGISTER_SYCL_KERNEL(int64);
-REGISTER_SYCL_KERNEL(uint8);
-REGISTER_SYCL_KERNEL(uint16);
-REGISTER_SYCL_KERNEL(uint32);
-REGISTER_SYCL_KERNEL(uint64);
-#undef REGISTER_SYCL_KERNEL
-
-#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER8(BinaryOp, GPU, "BitwiseXor", functor::bitwise_xor, int8, int16, int32,
diff --git a/tensorflow/core/kernels/cwise_op_ceil.cc b/tensorflow/core/kernels/cwise_op_ceil.cc
index f8907ff..765e5b9 100644
--- a/tensorflow/core/kernels/cwise_op_ceil.cc
+++ b/tensorflow/core/kernels/cwise_op_ceil.cc
@@ -23,7 +23,4 @@
 REGISTER3(UnaryOp, GPU, "Ceil", functor::ceil, float, Eigen::half, double);
 #endif
 
-#if TENSORFLOW_USE_SYCL
-REGISTER2(UnaryOp, SYCL, "Ceil", functor::ceil, float, double);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_cos.cc b/tensorflow/core/kernels/cwise_op_cos.cc
index 3d406fe..64e9fab 100644
--- a/tensorflow/core/kernels/cwise_op_cos.cc
+++ b/tensorflow/core/kernels/cwise_op_cos.cc
@@ -23,7 +23,4 @@
 REGISTER3(UnaryOp, GPU, "Cos", functor::cos, float, Eigen::half, double);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(UnaryOp, SYCL, "Cos", functor::cos, float, double);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_cosh.cc b/tensorflow/core/kernels/cwise_op_cosh.cc
index e6dff0e..6e1c536 100644
--- a/tensorflow/core/kernels/cwise_op_cosh.cc
+++ b/tensorflow/core/kernels/cwise_op_cosh.cc
@@ -19,15 +19,6 @@
 REGISTER5(UnaryOp, CPU, "Cosh", functor::cosh, float, double, bfloat16,
           complex64, complex128);
 
-#if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(TYPE)                                \
-  REGISTER_KERNEL_BUILDER(                                        \
-      Name("Cosh").Device(DEVICE_SYCL).TypeConstraint<TYPE>("T"), \
-      UnaryOp<SYCLDevice, functor::cosh<TYPE>>);
-REGISTER_SYCL_KERNEL(float);
-REGISTER_SYCL_KERNEL(double);
-#undef REGISTER_SYCL_KERNEL
-#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER2(UnaryOp, GPU, "Cosh", functor::cosh, float, double);
diff --git a/tensorflow/core/kernels/cwise_op_div.cc b/tensorflow/core/kernels/cwise_op_div.cc
index 733f388..6e43f45 100644
--- a/tensorflow/core/kernels/cwise_op_div.cc
+++ b/tensorflow/core/kernels/cwise_op_div.cc
@@ -50,15 +50,4 @@
                         BinaryOp<CPUDevice, functor::safe_div<int32>>);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(BinaryOp, SYCL, "Div", functor::div, float, double);
-REGISTER2(BinaryOp, SYCL, "RealDiv", functor::div, float, double);
-REGISTER_KERNEL_BUILDER(Name("Div")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("x")
-                            .HostMemory("y")
-                            .HostMemory("z")
-                            .TypeConstraint<int32>("T"),
-                        BinaryOp<CPUDevice, functor::safe_div<int32>>);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_equal_to_1.cc b/tensorflow/core/kernels/cwise_op_equal_to_1.cc
index 64cd784..41eadd6 100644
--- a/tensorflow/core/kernels/cwise_op_equal_to_1.cc
+++ b/tensorflow/core/kernels/cwise_op_equal_to_1.cc
@@ -47,16 +47,5 @@
                         BinaryOp<CPUDevice, functor::equal_to<int32>>);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER5(BinaryOp, SYCL, "Equal", functor::equal_to, float, double, uint8,
-          int8, int16);
-REGISTER_KERNEL_BUILDER(Name("Equal")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("x")
-                            .HostMemory("y")
-                            .HostMemory("z")
-                            .TypeConstraint<int32>("T"),
-                        BinaryOp<CPUDevice, functor::equal_to<int32>>);
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_exp.cc b/tensorflow/core/kernels/cwise_op_exp.cc
index d937dd0..28ace80 100644
--- a/tensorflow/core/kernels/cwise_op_exp.cc
+++ b/tensorflow/core/kernels/cwise_op_exp.cc
@@ -24,7 +24,4 @@
           complex64, complex128);
 #endif
 
-#if TENSORFLOW_USE_SYCL
-REGISTER2(UnaryOp, SYCL, "Exp", functor::exp, float, double);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_expm1.cc b/tensorflow/core/kernels/cwise_op_expm1.cc
index 0b145d8..62a26eb 100644
--- a/tensorflow/core/kernels/cwise_op_expm1.cc
+++ b/tensorflow/core/kernels/cwise_op_expm1.cc
@@ -21,7 +21,4 @@
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(UnaryOp, GPU, "Expm1", functor::expm1, float, Eigen::half, double);
 #endif
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(UnaryOp, SYCL, "Expm1", functor::expm1, float, double);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_floor.cc b/tensorflow/core/kernels/cwise_op_floor.cc
index 1dbd9bf..da5619b 100644
--- a/tensorflow/core/kernels/cwise_op_floor.cc
+++ b/tensorflow/core/kernels/cwise_op_floor.cc
@@ -22,7 +22,4 @@
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(UnaryOp, GPU, "Floor", functor::floor, float, Eigen::half, double);
 #endif
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(UnaryOp, SYCL, "Floor", functor::floor, float, double);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_floor_div.cc b/tensorflow/core/kernels/cwise_op_floor_div.cc
index d1f6d4c..a98eecd 100644
--- a/tensorflow/core/kernels/cwise_op_floor_div.cc
+++ b/tensorflow/core/kernels/cwise_op_floor_div.cc
@@ -41,13 +41,4 @@
                         BinaryOp<CPUDevice, functor::safe_floor_div<int32>>);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(Name("FloorDiv")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("x")
-                            .HostMemory("y")
-                            .HostMemory("z")
-                            .TypeConstraint<int32>("T"),
-                        BinaryOp<CPUDevice, functor::safe_floor_div<int32>>);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_floor_mod.cc b/tensorflow/core/kernels/cwise_op_floor_mod.cc
index 599ed1a..6d8a12a 100644
--- a/tensorflow/core/kernels/cwise_op_floor_mod.cc
+++ b/tensorflow/core/kernels/cwise_op_floor_mod.cc
@@ -34,13 +34,4 @@
                         BinaryOp<CPUDevice, functor::safe_floor_mod<int32>>);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(Name("FloorMod")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("x")
-                            .HostMemory("y")
-                            .HostMemory("z")
-                            .TypeConstraint<int32>("T"),
-                        BinaryOp<CPUDevice, functor::safe_floor_mod<int32>>);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_greater.cc b/tensorflow/core/kernels/cwise_op_greater.cc
index d70233d..f9a2b8c 100644
--- a/tensorflow/core/kernels/cwise_op_greater.cc
+++ b/tensorflow/core/kernels/cwise_op_greater.cc
@@ -33,15 +33,4 @@
                             .TypeConstraint<int32>("T"),
                         BinaryOp<CPUDevice, functor::greater<int32>>);
 #endif
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(BinaryOp, SYCL, "Greater", functor::greater, float, double);
-
-REGISTER_KERNEL_BUILDER(Name("Greater")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("x")
-                            .HostMemory("y")
-                            .HostMemory("z")
-                            .TypeConstraint<int32>("T"),
-                        BinaryOp<CPUDevice, functor::greater<int32>>);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_greater_equal.cc b/tensorflow/core/kernels/cwise_op_greater_equal.cc
index 7f6b788..d33adc2 100644
--- a/tensorflow/core/kernels/cwise_op_greater_equal.cc
+++ b/tensorflow/core/kernels/cwise_op_greater_equal.cc
@@ -34,16 +34,4 @@
                         BinaryOp<CPUDevice, functor::greater_equal<int32>>);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(BinaryOp, SYCL, "GreaterEqual", functor::greater_equal, float,
-          double);
-
-REGISTER_KERNEL_BUILDER(Name("GreaterEqual")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("x")
-                            .HostMemory("y")
-                            .HostMemory("z")
-                            .TypeConstraint<int32>("T"),
-                        BinaryOp<CPUDevice, functor::greater_equal<int32>>);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_invert.cc b/tensorflow/core/kernels/cwise_op_invert.cc
index 7bdc3d0..455e773 100644
--- a/tensorflow/core/kernels/cwise_op_invert.cc
+++ b/tensorflow/core/kernels/cwise_op_invert.cc
@@ -19,10 +19,6 @@
 REGISTER8(UnaryOp, CPU, "Invert", functor::invert, int8, int16, int32, int64,
           uint8, uint16, uint32, uint64);
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER6(UnaryOp, SYCL, "Invert", functor::invert, int8, int16, int32, int64,
-          uint8, uint16, uint32, uint64);
-#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER8(UnaryOp, GPU, "Invert", functor::invert, int8, int16, int32, int64,
diff --git a/tensorflow/core/kernels/cwise_op_isfinite.cc b/tensorflow/core/kernels/cwise_op_isfinite.cc
index 42c7cbd..0246d89 100644
--- a/tensorflow/core/kernels/cwise_op_isfinite.cc
+++ b/tensorflow/core/kernels/cwise_op_isfinite.cc
@@ -24,7 +24,4 @@
           double);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(UnaryOp, SYCL, "IsFinite", functor::isfinite, float, double);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_isinf.cc b/tensorflow/core/kernels/cwise_op_isinf.cc
index 68141f4..d4da9fc 100644
--- a/tensorflow/core/kernels/cwise_op_isinf.cc
+++ b/tensorflow/core/kernels/cwise_op_isinf.cc
@@ -23,7 +23,4 @@
 REGISTER3(UnaryOp, GPU, "IsInf", functor::isinf, float, Eigen::half, double);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(UnaryOp, SYCL, "IsInf", functor::isinf, float, double);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_isnan.cc b/tensorflow/core/kernels/cwise_op_isnan.cc
index 2867b16..b168b1c 100644
--- a/tensorflow/core/kernels/cwise_op_isnan.cc
+++ b/tensorflow/core/kernels/cwise_op_isnan.cc
@@ -23,7 +23,4 @@
 REGISTER3(UnaryOp, GPU, "IsNan", functor::isnan, float, Eigen::half, double);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(UnaryOp, SYCL, "IsNan", functor::isnan, float, double);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_left_shift.cc b/tensorflow/core/kernels/cwise_op_left_shift.cc
index 38109a1..ed65bea 100644
--- a/tensorflow/core/kernels/cwise_op_left_shift.cc
+++ b/tensorflow/core/kernels/cwise_op_left_shift.cc
@@ -19,22 +19,6 @@
 REGISTER8(BinaryOp, CPU, "LeftShift", functor::left_shift, int8, int16, int32,
           int64, uint8, uint16, uint32, uint64);
 
-#if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(TYPE)                                     \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("LeftShift").Device(DEVICE_SYCL).TypeConstraint<TYPE>("T"), \
-      BinaryOp<SYCLDevice, functor::left_shift<TYPE>>);
-REGISTER_SYCL_KERNEL(int8);
-REGISTER_SYCL_KERNEL(int16);
-REGISTER_SYCL_KERNEL(int32);
-REGISTER_SYCL_KERNEL(int64);
-REGISTER_SYCL_KERNEL(uint8);
-REGISTER_SYCL_KERNEL(uint16);
-REGISTER_SYCL_KERNEL(uint32);
-REGISTER_SYCL_KERNEL(uint64);
-#undef REGISTER_SYCL_KERNEL
-
-#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER8(BinaryOp, GPU, "LeftShift", functor::left_shift, int8, int16, int32,
diff --git a/tensorflow/core/kernels/cwise_op_less.cc b/tensorflow/core/kernels/cwise_op_less.cc
index 062a029..817f07a 100644
--- a/tensorflow/core/kernels/cwise_op_less.cc
+++ b/tensorflow/core/kernels/cwise_op_less.cc
@@ -35,14 +35,4 @@
                             .TypeConstraint<int32>("T"),
                         BinaryOp<CPUDevice, functor::less<int32>>);
 #endif
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER3(BinaryOp, SYCL, "Less", functor::less, float, double, int64);
-REGISTER_KERNEL_BUILDER(Name("Less")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("x")
-                            .HostMemory("y")
-                            .HostMemory("z")
-                            .TypeConstraint<int32>("T"),
-                        BinaryOp<CPUDevice, functor::less<int32>>);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_less_equal.cc b/tensorflow/core/kernels/cwise_op_less_equal.cc
index 43af038..17b9915 100644
--- a/tensorflow/core/kernels/cwise_op_less_equal.cc
+++ b/tensorflow/core/kernels/cwise_op_less_equal.cc
@@ -37,15 +37,4 @@
                         BinaryOp<CPUDevice, functor::less_equal<int32>>);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER6(BinaryOp, SYCL, "LessEqual", functor::less_equal, float, double,
-          int64, uint8, int8, int16);
-REGISTER_KERNEL_BUILDER(Name("LessEqual")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("x")
-                            .HostMemory("y")
-                            .HostMemory("z")
-                            .TypeConstraint<int32>("T"),
-                        BinaryOp<CPUDevice, functor::less_equal<int32>>);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_log.cc b/tensorflow/core/kernels/cwise_op_log.cc
index e4ff380..236f95d 100644
--- a/tensorflow/core/kernels/cwise_op_log.cc
+++ b/tensorflow/core/kernels/cwise_op_log.cc
@@ -23,7 +23,4 @@
 REGISTER3(UnaryOp, GPU, "Log", functor::log, float, Eigen::half, double);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(UnaryOp, SYCL, "Log", functor::log, float, double);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_log1p.cc b/tensorflow/core/kernels/cwise_op_log1p.cc
index 88ddfd6..392067f 100644
--- a/tensorflow/core/kernels/cwise_op_log1p.cc
+++ b/tensorflow/core/kernels/cwise_op_log1p.cc
@@ -23,7 +23,4 @@
 REGISTER3(UnaryOp, GPU, "Log1p", functor::log1p, float, Eigen::half, double);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(UnaryOp, SYCL, "Log1p", functor::log1p, float, double);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_maximum.cc b/tensorflow/core/kernels/cwise_op_maximum.cc
index 5ebfa74..2b70cdb 100644
--- a/tensorflow/core/kernels/cwise_op_maximum.cc
+++ b/tensorflow/core/kernels/cwise_op_maximum.cc
@@ -34,14 +34,4 @@
                         BinaryOp<CPUDevice, functor::maximum<int32>>);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER3(BinaryOp, SYCL, "Maximum", functor::maximum, float, double, int64);
-REGISTER_KERNEL_BUILDER(Name("Maximum")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("x")
-                            .HostMemory("y")
-                            .HostMemory("z")
-                            .TypeConstraint<int32>("T"),
-                        BinaryOp<CPUDevice, functor::maximum<int32>>);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_minimum.cc b/tensorflow/core/kernels/cwise_op_minimum.cc
index 8b301e8..f8ba071 100644
--- a/tensorflow/core/kernels/cwise_op_minimum.cc
+++ b/tensorflow/core/kernels/cwise_op_minimum.cc
@@ -34,15 +34,5 @@
                         BinaryOp<CPUDevice, functor::minimum<int32>>);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER3(BinaryOp, SYCL, "Minimum", functor::minimum, float, double, int64);
-REGISTER_KERNEL_BUILDER(Name("Minimum")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("x")
-                            .HostMemory("y")
-                            .HostMemory("z")
-                            .TypeConstraint<int32>("T"),
-                        BinaryOp<CPUDevice, functor::minimum<int32>>);
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_mul_1.cc b/tensorflow/core/kernels/cwise_op_mul_1.cc
index 4e2aa6b..5660f43 100644
--- a/tensorflow/core/kernels/cwise_op_mul_1.cc
+++ b/tensorflow/core/kernels/cwise_op_mul_1.cc
@@ -49,14 +49,4 @@
           double, complex64, complex128);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER3(BinaryOp, SYCL, "Mul", functor::mul, float, double, uint8);
-REGISTER_KERNEL_BUILDER(Name("Mul")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("x")
-                            .HostMemory("y")
-                            .HostMemory("z")
-                            .TypeConstraint<int32>("T"),
-                        BinaryOp<CPUDevice, functor::mul<int32>>);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_neg_1.cc b/tensorflow/core/kernels/cwise_op_neg_1.cc
index 18a7c61..fde5fae 100644
--- a/tensorflow/core/kernels/cwise_op_neg_1.cc
+++ b/tensorflow/core/kernels/cwise_op_neg_1.cc
@@ -18,15 +18,6 @@
 namespace tensorflow {
 REGISTER4(UnaryOp, CPU, "Neg", functor::neg, int8, int16, int32, int64);
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER3(UnaryOp, SYCL, "Neg", functor::neg, float, double, int64);
-REGISTER_KERNEL_BUILDER(Name("Neg")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("x")
-                            .HostMemory("y")
-                            .TypeConstraint<int32>("T"),
-                        UnaryOp<CPUDevice, functor::neg<int32>>);
-#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(UnaryOp, GPU, "Neg", functor::neg, int8, int16, int64);
diff --git a/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc b/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc
index 4de69ed..f0dbac1 100644
--- a/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc
+++ b/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc
@@ -35,16 +35,5 @@
                         BinaryOp<CPUDevice, functor::not_equal_to<int32>>);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(BinaryOp, SYCL, "NotEqual", functor::not_equal_to, float, double);
-
-REGISTER_KERNEL_BUILDER(Name("NotEqual")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("x")
-                            .HostMemory("y")
-                            .HostMemory("z")
-                            .TypeConstraint<int32>("T"),
-                        BinaryOp<CPUDevice, functor::not_equal_to<int32>>);
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_pow.cc b/tensorflow/core/kernels/cwise_op_pow.cc
index 214d083..e969c39 100644
--- a/tensorflow/core/kernels/cwise_op_pow.cc
+++ b/tensorflow/core/kernels/cwise_op_pow.cc
@@ -24,7 +24,4 @@
 REGISTER4(BinaryOp, GPU, "Pow", functor::pow, float, Eigen::half, double,
           int64);
 #endif
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(BinaryOp, SYCL, "Pow", functor::pow, float, double);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_reciprocal.cc b/tensorflow/core/kernels/cwise_op_reciprocal.cc
index 4fe201e..76480e1 100644
--- a/tensorflow/core/kernels/cwise_op_reciprocal.cc
+++ b/tensorflow/core/kernels/cwise_op_reciprocal.cc
@@ -36,9 +36,6 @@
 REGISTER4(UnaryOp, GPU, "Reciprocal", functor::inverse, float, Eigen::half,
           double, int64);
 #endif
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER(UnaryOp, SYCL, "Reciprocal", functor::inverse, float);
-#endif  // TENSORFLOW_USE_SYCL
 
 REGISTER6(SimpleBinaryOp, CPU, "ReciprocalGrad", functor::inverse_grad, float,
           Eigen::half, bfloat16, double, complex64, complex128);
@@ -46,7 +43,4 @@
 REGISTER3(SimpleBinaryOp, GPU, "ReciprocalGrad", functor::inverse_grad, float,
           Eigen::half, double);
 #endif
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER(SimpleBinaryOp, SYCL, "ReciprocalGrad", functor::inverse_grad, float);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_right_shift.cc b/tensorflow/core/kernels/cwise_op_right_shift.cc
index 8165662..2bf819c 100644
--- a/tensorflow/core/kernels/cwise_op_right_shift.cc
+++ b/tensorflow/core/kernels/cwise_op_right_shift.cc
@@ -19,22 +19,6 @@
 REGISTER8(BinaryOp, CPU, "RightShift", functor::right_shift, int8, int16, int32,
           int64, uint8, uint16, uint32, uint64);
 
-#if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(TYPE)                                      \
-  REGISTER_KERNEL_BUILDER(                                              \
-      Name("RightShift").Device(DEVICE_SYCL).TypeConstraint<TYPE>("T"), \
-      BinaryOp<SYCLDevice, functor::right_shift<TYPE>>);
-REGISTER_SYCL_KERNEL(int8);
-REGISTER_SYCL_KERNEL(int16);
-REGISTER_SYCL_KERNEL(int32);
-REGISTER_SYCL_KERNEL(int64);
-REGISTER_SYCL_KERNEL(uint8);
-REGISTER_SYCL_KERNEL(uint16);
-REGISTER_SYCL_KERNEL(uint32);
-REGISTER_SYCL_KERNEL(uint64);
-#undef REGISTER_SYCL_KERNEL
-
-#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER8(BinaryOp, GPU, "RightShift", functor::right_shift, int8, int16, int32,
diff --git a/tensorflow/core/kernels/cwise_op_round.cc b/tensorflow/core/kernels/cwise_op_round.cc
index 86e709b..73a1d9e 100644
--- a/tensorflow/core/kernels/cwise_op_round.cc
+++ b/tensorflow/core/kernels/cwise_op_round.cc
@@ -19,9 +19,6 @@
 REGISTER5(UnaryOp, CPU, "Round", functor::round, Eigen::half, float, double,
           int32, int64);
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(UnaryOp, SYCL, "Round", functor::round, float, double);
-#endif
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER5(UnaryOp, GPU, "Round", functor::round, Eigen::half, float, double,
diff --git a/tensorflow/core/kernels/cwise_op_rsqrt.cc b/tensorflow/core/kernels/cwise_op_rsqrt.cc
index b219daf..e051e4d 100644
--- a/tensorflow/core/kernels/cwise_op_rsqrt.cc
+++ b/tensorflow/core/kernels/cwise_op_rsqrt.cc
@@ -22,9 +22,6 @@
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER3(UnaryOp, GPU, "Rsqrt", functor::rsqrt, float, Eigen::half, double);
 #endif
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(UnaryOp, SYCL, "Rsqrt", functor::rsqrt, float, double);
-#endif  // TENSORFLOW_USE_SYCL
 
 REGISTER6(SimpleBinaryOp, CPU, "RsqrtGrad", functor::rsqrt_grad, float,
           Eigen::half, bfloat16, double, complex64, complex128);
@@ -32,8 +29,4 @@
 REGISTER3(SimpleBinaryOp, GPU, "RsqrtGrad", functor::rsqrt_grad, float,
           Eigen::half, double);
 #endif
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(SimpleBinaryOp, SYCL, "RsqrtGrad", functor::rsqrt_grad, float,
-          double);
-#endif  //  TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_select.cc b/tensorflow/core/kernels/cwise_op_select.cc
index af00308..02a8289 100644
--- a/tensorflow/core/kernels/cwise_op_select.cc
+++ b/tensorflow/core/kernels/cwise_op_select.cc
@@ -29,9 +29,6 @@
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
 
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 namespace functor {
 template <typename Device, typename T>
@@ -294,22 +291,6 @@
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-// Registration of the SYCL implementations.
-#define REGISTER_SELECT_SYCL(type)                                    \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("Select").Device(DEVICE_SYCL).TypeConstraint<type>("T"),   \
-      SelectOp<SYCLDevice, type>);                                    \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("SelectV2").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
-      SelectOp<SYCLDevice, type>);
-
-REGISTER_SELECT_SYCL(float);
-REGISTER_SELECT_SYCL(double);
-REGISTER_SELECT_SYCL(int32);
-REGISTER_SELECT_SYCL(int64);
-#undef REGISTER_SELECT_SYCL
-#endif  // TENSORFLOW_USE_SYCL
 
 namespace functor {
 
@@ -326,10 +307,6 @@
 
 template <typename T>
 struct SelectFunctor<CPUDevice, T> : SelectFunctorBase<CPUDevice, T> {};
-#ifdef TENSORFLOW_USE_SYCL
-template <typename T>
-struct SelectFunctor<SYCLDevice, T> : SelectFunctorBase<SYCLDevice, T> {};
-#endif  // TENSORFLOW_USE_SYCL
 
 template <typename Device, typename T>
 struct SelectScalarHandler {
@@ -364,21 +341,6 @@
   }
 };
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename Device, typename T>
-struct SelectScalarFunctorBase {
-  void operator()(const Device& d, typename TTypes<T>::Flat out,
-                  TTypes<bool>::ConstScalar cond,
-                  typename TTypes<T>::ConstFlat then_flat,
-                  typename TTypes<T>::ConstFlat else_flat) {
-    out.device(d) = cond() ? then_flat : else_flat;
-  }
-};
-
-template <typename T>
-struct SelectScalarFunctor<SYCLDevice, T>
-    : SelectScalarFunctorBase<SYCLDevice, T> {};
-#endif  // TENSORFLOW_USE_SYCL
 
 template <typename Device, typename T>
 struct BatchSelectFunctorBase {
@@ -469,16 +431,6 @@
 struct BCastSelectFunctor<CPUDevice, T, NDIMS>
     : BCastSelectFunctorBase<CPUDevice, T, NDIMS> {};
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename T>
-struct BatchSelectFunctor<SYCLDevice, T>
-    : BatchSelectFunctorBase<SYCLDevice, T> {};
-
-template <typename T, int NDIMS>
-struct BCastSelectFunctor<SYCLDevice, T, NDIMS>
-    : BCastSelectFunctorBase<SYCLDevice, T, NDIMS> {};
-
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace functor
 
diff --git a/tensorflow/core/kernels/cwise_op_sigmoid.cc b/tensorflow/core/kernels/cwise_op_sigmoid.cc
index 175cba3..22ec20d 100644
--- a/tensorflow/core/kernels/cwise_op_sigmoid.cc
+++ b/tensorflow/core/kernels/cwise_op_sigmoid.cc
@@ -23,9 +23,6 @@
 REGISTER3(UnaryOp, GPU, "Sigmoid", functor::sigmoid, float, Eigen::half,
           double);
 #endif
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER(UnaryOp, SYCL, "Sigmoid", functor::sigmoid, float);
-#endif  // TENSORFLOW_USE_SYCL
 
 REGISTER6(SimpleBinaryOp, CPU, "SigmoidGrad", functor::sigmoid_grad, bfloat16,
           float, Eigen::half, double, complex64, complex128);
@@ -33,8 +30,5 @@
 REGISTER3(SimpleBinaryOp, GPU, "SigmoidGrad", functor::sigmoid_grad, float,
           Eigen::half, double);
 #endif
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER(SimpleBinaryOp, SYCL, "SigmoidGrad", functor::sigmoid_grad, float);
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_sign.cc b/tensorflow/core/kernels/cwise_op_sign.cc
index 200a56e..b150155 100644
--- a/tensorflow/core/kernels/cwise_op_sign.cc
+++ b/tensorflow/core/kernels/cwise_op_sign.cc
@@ -33,14 +33,5 @@
                         UnaryOp<CPUDevice, functor::sign<int32>>);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER3(UnaryOp, SYCL, "Sign", functor::sign, float, double, int64);
-REGISTER_KERNEL_BUILDER(Name("Sign")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("x")
-                            .HostMemory("y")
-                            .TypeConstraint<int32>("T"),
-                        UnaryOp<CPUDevice, functor::sign<int32>>);
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_sin.cc b/tensorflow/core/kernels/cwise_op_sin.cc
index f0fc2af..d3e8f3b 100644
--- a/tensorflow/core/kernels/cwise_op_sin.cc
+++ b/tensorflow/core/kernels/cwise_op_sin.cc
@@ -23,7 +23,4 @@
 REGISTER3(UnaryOp, GPU, "Sin", functor::sin, float, Eigen::half, double);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(UnaryOp, SYCL, "Sin", functor::sin, float, double);
-#endif  // TENSORFLOW_USE_SYC
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_sinh.cc b/tensorflow/core/kernels/cwise_op_sinh.cc
index 4448d2f..24b3a66 100644
--- a/tensorflow/core/kernels/cwise_op_sinh.cc
+++ b/tensorflow/core/kernels/cwise_op_sinh.cc
@@ -19,15 +19,6 @@
 REGISTER5(UnaryOp, CPU, "Sinh", functor::sinh, float, double, bfloat16,
           complex64, complex128);
 
-#if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(TYPE)                                \
-  REGISTER_KERNEL_BUILDER(                                        \
-      Name("Sinh").Device(DEVICE_SYCL).TypeConstraint<TYPE>("T"), \
-      UnaryOp<SYCLDevice, functor::sinh<TYPE>>);
-REGISTER_SYCL_KERNEL(float);
-REGISTER_SYCL_KERNEL(double);
-#undef REGISTER_SYCL_KERNEL
-#endif  // TENSORFLOW_USE_SYC
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER2(UnaryOp, GPU, "Sinh", functor::sinh, float, double);
diff --git a/tensorflow/core/kernels/cwise_op_sqrt.cc b/tensorflow/core/kernels/cwise_op_sqrt.cc
index 976f8b0..2e33297 100644
--- a/tensorflow/core/kernels/cwise_op_sqrt.cc
+++ b/tensorflow/core/kernels/cwise_op_sqrt.cc
@@ -23,9 +23,6 @@
 REGISTER3(UnaryOp, GPU, "Sqrt", functor::sqrt, float, Eigen::half, double);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(UnaryOp, SYCL, "Sqrt", functor::sqrt, float, double);
-#endif  // TENSORFLOW_USE_SYCL
 
 REGISTER6(SimpleBinaryOp, CPU, "SqrtGrad", functor::sqrt_grad, float,
           Eigen::half, bfloat16, double, complex64, complex128);
@@ -34,7 +31,4 @@
           Eigen::half, double);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(SimpleBinaryOp, SYCL, "SqrtGrad", functor::sqrt_grad, float, double);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_square.cc b/tensorflow/core/kernels/cwise_op_square.cc
index 40dea5a..3811839 100644
--- a/tensorflow/core/kernels/cwise_op_square.cc
+++ b/tensorflow/core/kernels/cwise_op_square.cc
@@ -34,13 +34,4 @@
                         UnaryOp<CPUDevice, functor::square<int32>>);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER3(UnaryOp, SYCL, "Square", functor::square, float, double, int64);
-REGISTER_KERNEL_BUILDER(Name("Square")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("x")
-                            .HostMemory("y")
-                            .TypeConstraint<int32>("T"),
-                        UnaryOp<CPUDevice, functor::square<int32>>);
-#endif  // TENSORFLOW_USE_SYC
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_squared_difference.cc b/tensorflow/core/kernels/cwise_op_squared_difference.cc
index 12520b7..9bd457f 100644
--- a/tensorflow/core/kernels/cwise_op_squared_difference.cc
+++ b/tensorflow/core/kernels/cwise_op_squared_difference.cc
@@ -36,17 +36,5 @@
         .TypeConstraint<int32>("T"),
     BinaryOp<CPUDevice, functor::squared_difference<int32>>);
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER3(BinaryOp, SYCL, "SquaredDifference", functor::squared_difference,
-          float, double, int64);
-REGISTER_KERNEL_BUILDER(
-    Name("SquaredDifference")
-        .Device(DEVICE_SYCL)
-        .HostMemory("x")
-        .HostMemory("y")
-        .HostMemory("z")
-        .TypeConstraint<int32>("T"),
-    BinaryOp<CPUDevice, functor::squared_difference<int32>>);
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_sub.cc b/tensorflow/core/kernels/cwise_op_sub.cc
index 17e690b..6164b2a 100644
--- a/tensorflow/core/kernels/cwise_op_sub.cc
+++ b/tensorflow/core/kernels/cwise_op_sub.cc
@@ -45,14 +45,4 @@
                         BinaryOp<CPUDevice, functor::sub<int32>>);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER3(BinaryOp, SYCL, "Sub", functor::sub, float, double, int64);
-REGISTER_KERNEL_BUILDER(Name("Sub")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("x")
-                            .HostMemory("y")
-                            .HostMemory("z")
-                            .TypeConstraint<int32>("T"),
-                        BinaryOp<CPUDevice, functor::sub<int32>>);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_tan.cc b/tensorflow/core/kernels/cwise_op_tan.cc
index 1155312..a9ccc58 100644
--- a/tensorflow/core/kernels/cwise_op_tan.cc
+++ b/tensorflow/core/kernels/cwise_op_tan.cc
@@ -23,7 +23,4 @@
 REGISTER3(UnaryOp, GPU, "Tan", functor::tan, Eigen::half, float, double);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(UnaryOp, SYCL, "Tan", functor::tan, float, double);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_tanh.cc b/tensorflow/core/kernels/cwise_op_tanh.cc
index de56a5e..2dbd77d 100644
--- a/tensorflow/core/kernels/cwise_op_tanh.cc
+++ b/tensorflow/core/kernels/cwise_op_tanh.cc
@@ -26,9 +26,6 @@
 #endif
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER2(UnaryOp, SYCL, "Tanh", functor::tanh, float, double);
-#endif  // TENSORFLOW_USE_SYCL
 
 REGISTER6(SimpleBinaryOp, CPU, "TanhGrad", functor::tanh_grad, float,
           Eigen::half, bfloat16, double, complex64, complex128);
diff --git a/tensorflow/core/kernels/cwise_op_xdivy.cc b/tensorflow/core/kernels/cwise_op_xdivy.cc
index dbd0a69..2baf788 100644
--- a/tensorflow/core/kernels/cwise_op_xdivy.cc
+++ b/tensorflow/core/kernels/cwise_op_xdivy.cc
@@ -19,16 +19,6 @@
 REGISTER5(BinaryOp, CPU, "Xdivy", functor::xdivy, float, Eigen::half, double,
           complex64, complex128);
 
-#if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(TYPE)                                 \
-  REGISTER_KERNEL_BUILDER(                                         \
-      Name("Xdivy").Device(DEVICE_SYCL).TypeConstraint<TYPE>("T"), \
-      BinaryOp<SYCLDevice, functor::xdivy<TYPE>>);
-REGISTER_SYCL_KERNEL(float);
-REGISTER_SYCL_KERNEL(double);
-#undef REGISTER_SYCL_KERNEL
-
-#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER5(BinaryOp, GPU, "Xdivy", functor::xdivy, float, Eigen::half, double,
diff --git a/tensorflow/core/kernels/cwise_op_xlog1py.cc b/tensorflow/core/kernels/cwise_op_xlog1py.cc
index f00d73e..493ee91 100644
--- a/tensorflow/core/kernels/cwise_op_xlog1py.cc
+++ b/tensorflow/core/kernels/cwise_op_xlog1py.cc
@@ -19,19 +19,6 @@
 REGISTER5(BinaryOp, CPU, "Xlog1py", functor::xlog1py, float, Eigen::half,
           double, complex64, complex128);
 
-#if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(TYPE)                                   \
-  REGISTER_KERNEL_BUILDER(                                           \
-      Name("Xlog1py").Device(DEVICE_SYCL).TypeConstraint<TYPE>("T"), \
-      BinaryOp<SYCLDevice, functor::xlog1py<TYPE>>);
-REGISTER_SYCL_KERNEL(Eigen::half);
-REGISTER_SYCL_KERNEL(float);
-REGISTER_SYCL_KERNEL(double);
-REGISTER_SYCL_KERNEL(complex64);
-REGISTER_SYCL_KERNEL(complex128);
-#undef REGISTER_SYCL_KERNEL
-
-#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
 REGISTER5(BinaryOp, GPU, "Xlog1py", functor::xlog1py, float, Eigen::half,
diff --git a/tensorflow/core/kernels/cwise_op_xlogy.cc b/tensorflow/core/kernels/cwise_op_xlogy.cc
index a7eefa5..a48a786 100644
--- a/tensorflow/core/kernels/cwise_op_xlogy.cc
+++ b/tensorflow/core/kernels/cwise_op_xlogy.cc
@@ -19,19 +19,6 @@
 REGISTER5(BinaryOp, CPU, "Xlogy", functor::xlogy, float, Eigen::half, double,
           complex64, complex128);
 
-#if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(TYPE)                                 \
-  REGISTER_KERNEL_BUILDER(                                         \
-      Name("Xlogy").Device(DEVICE_SYCL).TypeConstraint<TYPE>("T"), \
-      BinaryOp<SYCLDevice, functor::xlogy<TYPE>>);
-REGISTER_SYCL_KERNEL(Eigen::half);
-REGISTER_SYCL_KERNEL(float);
-REGISTER_SYCL_KERNEL(double);
-REGISTER_SYCL_KERNEL(complex64);
-REGISTER_SYCL_KERNEL(complex128);
-#undef REGISTER_SYCL_KERNEL
-
-#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
 REGISTER5(BinaryOp, GPU, "Xlogy", functor::xlogy, float, Eigen::half, double,
diff --git a/tensorflow/core/kernels/cwise_ops_common.h b/tensorflow/core/kernels/cwise_ops_common.h
index 9920da3..9adc628 100644
--- a/tensorflow/core/kernels/cwise_ops_common.h
+++ b/tensorflow/core/kernels/cwise_ops_common.h
@@ -24,9 +24,6 @@
 
 #include "tensorflow/core/platform/bfloat16.h"
 
-#ifdef TENSORFLOW_USE_SYCL
-#include "tensorflow/core/kernels/cwise_ops_sycl_common.h"
-#endif
 
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -42,9 +39,6 @@
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif
 
 class BinaryOpShared : public OpKernel {
  public:
diff --git a/tensorflow/core/kernels/cwise_ops_gradients.h b/tensorflow/core/kernels/cwise_ops_gradients.h
index ab91973..78f77ca 100644
--- a/tensorflow/core/kernels/cwise_ops_gradients.h
+++ b/tensorflow/core/kernels/cwise_ops_gradients.h
@@ -188,19 +188,6 @@
   }
 };
 
-#ifdef TENSORFLOW_USE_SYCL
-// Partial specialization of BinaryFunctor for SYCL devices
-typedef Eigen::SyclDevice SYCLDevice;
-template <typename Functor>
-struct SimpleBinaryFunctor<SYCLDevice, Functor> {
-  void operator()(const SYCLDevice& d, typename Functor::tout_type out,
-                  typename Functor::tin_type in0,
-                  typename Functor::tin_type in1) {
-    out.device(d) = in0.binaryExpr(in1, typename Functor::func());
-  }
-};
-
-#endif  // TENSORFLOW_USE_SYCL
 
 template <typename T>
 struct tanh_grad : base<T, Eigen::internal::scalar_tanh_gradient_op<T>> {};
diff --git a/tensorflow/core/kernels/cwise_ops_sycl_common.h b/tensorflow/core/kernels/cwise_ops_sycl_common.h
deleted file mode 100644
index 3e107ce..0000000
--- a/tensorflow/core/kernels/cwise_ops_sycl_common.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#if !TENSORFLOW_USE_SYCL
-#error This file must only be included when building TensorFlow with SYCL support
-#endif
-
-#ifndef TENSORFLOW_CORE_KERNELS_CWISE_OPS_SYCL_COMMON_H_
-#define TENSORFLOW_CORE_KERNELS_CWISE_OPS_SYCL_COMMON_H_
-
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-
-#include "tensorflow/core/framework/register_types.h"
-#include "tensorflow/core/kernels/cwise_ops.h"
-#include "tensorflow/core/platform/types.h"
-
-namespace tensorflow {
-namespace functor {
-
-typedef Eigen::SyclDevice SYCLDevice;
-
-template <typename OUT, typename RHS>
-void Assign(const SYCLDevice& d, OUT out, RHS rhs) {
-  out.device(d) = rhs;
-}
-
-// Partial specialization of UnaryFunctor<Device=SYCLDevice, Functor>.
-template <typename Functor>
-struct UnaryFunctor<SYCLDevice, Functor> {
-  void operator()(const SYCLDevice& d, typename Functor::tout_type out,
-                  typename Functor::tin_type in) {
-    To32Bit(out).device(d) = To32Bit(in).unaryExpr(typename Functor::func());
-  }
-};
-
-// Partial specialization of BinaryFunctor<Device=SYCLDevice, Functor>.
-template <typename Functor, int NDIMS, bool has_errors>
-struct BinaryFunctor<SYCLDevice, Functor, NDIMS, has_errors> {
-  void operator()(const SYCLDevice& d, typename Functor::tout_type out,
-                  typename Functor::tin_type in0,
-                  typename Functor::tin_type in1, bool* error) {
-    To32Bit(out).device(d) =
-        To32Bit(in0).binaryExpr(To32Bit(in1), typename Functor::func());
-  }
-
-  void Left(const SYCLDevice& d, typename Functor::tout_type out,
-            typename Functor::tscalar_type scalar,
-            typename Functor::tin_type in, bool* error) {
-    typedef typename Functor::func Binary;
-    constexpr int NumDims = Functor::tin_type::NumDimensions;
-    static_assert(NumDims == 1, "Unexpected size");
-    Eigen::Sizes<1> scalar_dim;
-    out.device(d) = scalar.reshape(scalar_dim)
-                        .broadcast(in.dimensions())
-                        .binaryExpr(in, Binary());
-  }
-
-  void Right(const SYCLDevice& d, typename Functor::tout_type out,
-             typename Functor::tin_type in,
-             typename Functor::tscalar_type scalar, bool* error) {
-    typedef typename Functor::func Binary;
-    constexpr int NumDims = Functor::tin_type::NumDimensions;
-    static_assert(NumDims == 1, "Unexpected size");
-    Eigen::Sizes<1> scalar_dim;
-    out.device(d) = in.binaryExpr(
-        scalar.reshape(scalar_dim).broadcast(in.dimensions()), Binary());
-  }
-
-  void BCast(const SYCLDevice& d,
-             typename TTypes<typename Functor::out_type, NDIMS>::Tensor out,
-             typename TTypes<typename Functor::in_type, NDIMS>::ConstTensor in0,
-             typename Eigen::array<Eigen::DenseIndex, NDIMS> bcast0,
-             typename TTypes<typename Functor::in_type, NDIMS>::ConstTensor in1,
-             typename Eigen::array<Eigen::DenseIndex, NDIMS> bcast1,
-             bool* error) {
-    typedef typename Functor::in_type T;
-    typename Functor::func func;
-    if ((NDIMS == 2) && Functor::use_bcast_optimization &&
-        use_bcast_optimization<T>::value) {
-      const bool bcast0_all_one = AllOne<NDIMS>(bcast0);
-      const bool bcast1_all_one = AllOne<NDIMS>(bcast1);
-      if (bcast0_all_one && !bcast1_all_one) {
-        To32Bit(out).device(d) =
-            To32Bit(in0).binaryExpr(To32Bit(in1).broadcast(bcast1), func);
-        return;
-      }
-      if (!bcast0_all_one && bcast1_all_one) {
-        To32Bit(out).device(d) =
-            To32Bit(in0).broadcast(bcast0).binaryExpr(To32Bit(in1), func);
-        return;
-      }
-    }
-    To32Bit(out).device(d) = To32Bit(in0).broadcast(bcast0).binaryExpr(
-        To32Bit(in1).broadcast(bcast1), func);
-  }
-};
-
-// Macros to explicitly instantiate kernels on GPU for multiple types
-// (T0, T1, etc.) for UnaryFunctor (e.g., functor::sqrt).
-#define DEFINE_UNARY1(F, T) template struct UnaryFunctor<SYCLDevice, F<T> >
-#define DEFINE_UNARY2(F, T0, T1) \
-  DEFINE_UNARY1(F, T0);          \
-  DEFINE_UNARY1(F, T1)
-#define DEFINE_UNARY3(F, T0, T1, T2) \
-  DEFINE_UNARY2(F, T0, T1);          \
-  DEFINE_UNARY1(F, T2)
-#define DEFINE_UNARY4(F, T0, T1, T2, T3) \
-  DEFINE_UNARY2(F, T0, T1);              \
-  DEFINE_UNARY2(F, T2, T3)
-#define DEFINE_UNARY5(F, T0, T1, T2, T3, T4) \
-  DEFINE_UNARY2(F, T0, T1);                  \
-  DEFINE_UNARY3(F, T2, T3, T4)
-
-// Macros to explicitly instantiate kernels on GPU for multiple types
-// (T0, T1, etc.) for BinaryFunctor.
-#define DEFINE_BINARY1(F, T)                          \
-  template struct BinaryFunctor<SYCLDevice, F<T>, 1>; \
-  template struct BinaryFunctor<SYCLDevice, F<T>, 2>; \
-  template struct BinaryFunctor<SYCLDevice, F<T>, 3>
-#define DEFINE_BINARY2(F, T0, T1) \
-  DEFINE_BINARY1(F, T0);          \
-  DEFINE_BINARY1(F, T1)
-#define DEFINE_BINARY3(F, T0, T1, T2) \
-  DEFINE_BINARY2(F, T0, T1);          \
-  DEFINE_BINARY1(F, T2)
-#define DEFINE_BINARY4(F, T0, T1, T2, T3) \
-  DEFINE_BINARY2(F, T0, T1);              \
-  DEFINE_BINARY2(F, T2, T3)
-#define DEFINE_BINARY5(F, T0, T1, T2, T3, T4) \
-  DEFINE_BINARY2(F, T0, T1);                  \
-  DEFINE_BINARY3(F, T2, T3, T4)
-#define DEFINE_BINARY6(F, T0, T1, T2, T3, T4, T5) \
-  DEFINE_BINARY3(F, T0, T1, T2);                  \
-  DEFINE_BINARY3(F, T3, T4, T5)
-#define DEFINE_BINARY7(F, T0, T1, T2, T3, T4, T5, T6) \
-  DEFINE_BINARY3(F, T0, T1, T2);                      \
-  DEFINE_BINARY4(F, T3, T4, T5, T6)
-#define DEFINE_BINARY8(F, T0, T1, T2, T3, T4, T5, T6, T7) \
-  DEFINE_BINARY4(F, T0, T1, T2, T3);                      \
-  DEFINE_BINARY4(F, T4, T5, T6, T7)
-#define DEFINE_BINARY9(F, T0, T1, T2, T3, T4, T5, T6, T7, T8) \
-  DEFINE_BINARY4(F, T0, T1, T2, T3);                          \
-  DEFINE_BINARY5(F, T4, T5, T6, T7, T8)
-#define DEFINE_BINARY10(F, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9) \
-  DEFINE_BINARY5(F, T0, T1, T2, T3, T4);                           \
-  DEFINE_BINARY5(F, T5, T6, T7, T8, T9)
-
-}  // end namespace functor
-}  // end namespace tensorflow
-
-#endif  // TENSORFLOW_CORE_KERNELS_CWISE_OPS_SYCL_COMMON_H_
diff --git a/tensorflow/core/kernels/cwise_ops_test.cc b/tensorflow/core/kernels/cwise_ops_test.cc
index bc77a11..61f4b89 100644
--- a/tensorflow/core/kernels/cwise_ops_test.cc
+++ b/tensorflow/core/kernels/cwise_ops_test.cc
@@ -56,17 +56,11 @@
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 BM_UNARY(gpu, Floor, float, DT_FLOAT);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#ifdef TENSORFLOW_USE_SYCL
-BM_UNARY(sycl, Floor, float, DT_FLOAT);
-#endif  // TENSORFLOW_USE_SYCL
 
 BM_UNARY(cpu, Floor, double, DT_DOUBLE);
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 BM_UNARY(gpu, Floor, double, DT_DOUBLE);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#ifdef TENSORFLOW_USE_SYCL
-BM_UNARY(sycl, Floor, double, DT_DOUBLE);
-#endif  // TENSORFLOW_USE_SYCL
 
 BM_UNARY(cpu, Conj, std::complex<float>, DT_COMPLEX64);
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
@@ -134,25 +128,16 @@
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 BM_BINARY_SCALAR(gpu, Less);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#ifdef TENSORFLOW_USE_SYCL
-BM_BINARY_SCALAR(sycl, Less);
-#endif  // TENSORFLOW_USE_SYCL
 
 BM_BINARY_SCALAR(cpu, Add);
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 BM_BINARY_SCALAR(gpu, Add);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#ifdef TENSORFLOW_USE_SYCL
-BM_BINARY_SCALAR(sycl, Add);
-#endif  // TENSORFLOW_USE_SYCL
 
 BM_BINARY_SCALAR(cpu, DivNoNan);
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 BM_BINARY_SCALAR(gpu, DivNoNan);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#ifdef TENSORFLOW_USE_SYCL
-BM_BINARY_SCALAR(sycl, DivNoNan);
-#endif  // TENSORFLOW_USE_SYCL
 
 #undef BM_BINARY_SCALAR
 
@@ -209,11 +194,6 @@
 BM_CUBE(gpu, CubeWithTwoMuls);
 BM_CUBE(gpu, CubeWithMulSquare);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#ifdef TENSORFLOW_USE_SYCL
-BM_CUBE(sycl, CubeWithPow3);
-BM_CUBE(sycl, CubeWithTwoMuls);
-BM_CUBE(sycl, CubeWithMulSquare);
-#endif  // TENSORFLOW_USE_SYCL
 
 #undef BM_CUBE
 
@@ -367,9 +347,6 @@
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 BM_BCAST_ADD_ROW_ALL(gpu);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#ifdef TENSORFLOW_USE_SYCL
-BM_BCAST_ADD_ROW_ALL(sycl);
-#endif  // TENSORFLOW_USE_SYCL
 #undef BM_BCAST_ADD_ROW_ALL
 #undef BM_BCAST_ADD_ROW
 
@@ -394,9 +371,6 @@
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 BM_BCAST_ADD_COL_ALL(gpu);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#ifdef TENSORFLOW_USE_SYCL
-BM_BCAST_ADD_COL_ALL(sycl);
-#endif  // TENSORFLOW_USE_SYCL
 #undef BM_BCAST_ADD_COL_ALL
 #undef BM_BCAST_ADD_COL
 
@@ -422,9 +396,6 @@
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 BM_BCAST_ADD_CROSS_RC_ALL(gpu);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#ifdef TENSORFLOW_USE_SYCL
-BM_BCAST_ADD_CROSS_RC_ALL(sycl);
-#endif  // TENSORFLOW_USE_SYCL
 #undef BM_BCAST_ADD_CROSS_RC_ALL
 #undef BM_BCAST_ADD_CROSS_RC
 
@@ -450,9 +421,6 @@
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 BM_BCAST_ADD_CROSS_CR_ALL(gpu);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#ifdef TENSORFLOW_USE_SYCL
-BM_BCAST_ADD_CROSS_CR_ALL(sycl);
-#endif  // TENSORFLOW_USE_SYCL
 #undef BM_BCAST_ADD_CROSS_CR_ALL
 #undef BM_BCAST_ADD_CROSS_CR
 
diff --git a/tensorflow/core/kernels/debug_ops.cc b/tensorflow/core/kernels/debug_ops.cc
index db42b9f..92abc7a 100644
--- a/tensorflow/core/kernels/debug_ops.cc
+++ b/tensorflow/core/kernels/debug_ops.cc
@@ -38,15 +38,6 @@
                         CopyOp);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(Name("Copy").Device(DEVICE_SYCL), CopyOp);
-
-REGISTER_KERNEL_BUILDER(Name("CopyHost")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("input")
-                            .HostMemory("output"),
-                        CopyOp);
-#endif  // TENSORFLOW_USE_SYCL
 
 // Register debug identity (non-ref and ref) ops.
 REGISTER_KERNEL_BUILDER(Name("DebugIdentity").Device(DEVICE_CPU),
@@ -60,13 +51,6 @@
                         DebugIdentityOp);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(Name("DebugIdentity")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("input")
-                            .HostMemory("output"),
-                        DebugIdentityOp);
-#endif  // TENSORFLOW_USE_SYCL
 
 // Register debug NaN-counter (non-ref and ref) ops.
 #define REGISTER_DEBUG_NAN_COUNT(type)                                    \
@@ -88,17 +72,6 @@
 REGISTER_GPU_DEBUG_NAN_COUNT(double);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_GPU_DEBUG_NAN_COUNT(type)                \
-  REGISTER_KERNEL_BUILDER(Name("DebugNanCount")           \
-                              .Device(DEVICE_SYCL)        \
-                              .HostMemory("input")        \
-                              .HostMemory("output")       \
-                              .TypeConstraint<type>("T"), \
-                          DebugNanCountOp<type>);
-REGISTER_GPU_DEBUG_NAN_COUNT(float);
-REGISTER_GPU_DEBUG_NAN_COUNT(double);
-#endif  // TENSORFLOW_USE_SYCL
 
 // Register debug numeric summary ops.
 #define REGISTER_DEBUG_NUMERIC_SUMMARY_COUNT(type)        \
@@ -125,19 +98,6 @@
 TF_CALL_double(REGISTER_GPU_DEBUG_NUMERIC_SUMMARY_COUNT);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_DEBUG_NUMERIC_SUMMARY_COUNT(type)   \
-  REGISTER_KERNEL_BUILDER(Name("DebugNumericSummary")     \
-                              .Device(DEVICE_SYCL)        \
-                              .HostMemory("input")        \
-                              .HostMemory("output")       \
-                              .TypeConstraint<type>("T"), \
-                          DebugNumericSummaryOp<type>);
-TF_CALL_bool(REGISTER_SYCL_DEBUG_NUMERIC_SUMMARY_COUNT);
-TF_CALL_INTEGRAL_TYPES(REGISTER_SYCL_DEBUG_NUMERIC_SUMMARY_COUNT);
-TF_CALL_float(REGISTER_SYCL_DEBUG_NUMERIC_SUMMARY_COUNT);
-TF_CALL_double(REGISTER_SYCL_DEBUG_NUMERIC_SUMMARY_COUNT);
-#endif  // TENSORFLOW_USE_SYCL
 
 REGISTER_KERNEL_BUILDER(Name("DebugIdentityV2").Device(DEVICE_CPU),
                         DebugIdentityV2Op);
diff --git a/tensorflow/core/kernels/debug_ops.h b/tensorflow/core/kernels/debug_ops.h
index 0b256a0..b7cb7eb 100644
--- a/tensorflow/core/kernels/debug_ops.h
+++ b/tensorflow/core/kernels/debug_ops.h
@@ -31,9 +31,6 @@
 #include "tensorflow/core/platform/rocm.h"
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-#include "tensorflow/core/common_runtime/sycl/sycl_util.h"
-#endif  // TENSORFLOW_USE_SYCL
 #include "tensorflow/core/debug/debug_io_utils.h"
 #include "tensorflow/core/framework/device_base.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -100,17 +97,6 @@
         // The input tensor is on the host (CPU): deep-copy from CPU to CPU.
         *copied_tensor = tensor::DeepCopy(src_tensor);
       }
-#elif defined(TENSORFLOW_USE_SYCL)
-      Device* device = static_cast<Device*>(context->device());
-      // Determine if the input tensor is not on CPU (e.g., on GPU).
-      const bool off_host_input = device->device_type() == DEVICE_SYCL &&
-                                  !context->input_alloc_attr(0).on_host();
-
-      if (off_host_input) {
-        SYCLmemcpy(context->eigen_sycl_device(), src_tensor, copied_tensor);
-      } else {
-        *copied_tensor = tensor::DeepCopy(src_tensor);
-      }
 #else
       *copied_tensor = tensor::DeepCopy(src_tensor);
 #endif
diff --git a/tensorflow/core/kernels/dense_update_functor.h b/tensorflow/core/kernels/dense_update_functor.h
index 61b5731..791d4b3 100644
--- a/tensorflow/core/kernels/dense_update_functor.h
+++ b/tensorflow/core/kernels/dense_update_functor.h
@@ -27,9 +27,6 @@
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
 
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 enum DenseUpdateType { ADD, SUB, ASSIGN };
 
@@ -65,31 +62,6 @@
   }
 };
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename T>
-struct DenseUpdate<SYCLDevice, T, ADD> {
-  void operator()(const SYCLDevice& d, typename TTypes<T>::Flat params,
-                  typename TTypes<T>::ConstFlat update) {
-    params.device(d) += update;
-  }
-};
-
-template <typename T>
-struct DenseUpdate<SYCLDevice, T, SUB> {
-  void operator()(const SYCLDevice& d, typename TTypes<T>::Flat params,
-                  typename TTypes<T>::ConstFlat update) {
-    params.device(d) -= update;
-  }
-};
-
-template <typename T>
-struct DenseUpdate<SYCLDevice, T, ASSIGN> {
-  void operator()(const SYCLDevice& d, typename TTypes<T>::Flat params,
-                  typename TTypes<T>::ConstFlat update) {
-    params.device(d) = update;
-  }
-};
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // end namespace functor
 
diff --git a/tensorflow/core/kernels/dense_update_ops.cc b/tensorflow/core/kernels/dense_update_ops.cc
index 71235fc..f27eab8 100644
--- a/tensorflow/core/kernels/dense_update_ops.cc
+++ b/tensorflow/core/kernels/dense_update_ops.cc
@@ -87,9 +87,6 @@
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 #define REGISTER_KERNELS(type)                                     \
   REGISTER_KERNEL_BUILDER(                                         \
@@ -117,15 +114,6 @@
 #undef REGISTER_GPU_KERNELS
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNELS(type)                                 \
-  REGISTER_KERNEL_BUILDER(                                          \
-      Name("Assign").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
-      AssignOpT<SYCLDevice, type>);
-
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNELS);
-#undef REGISTER_SYCL_KERNELS
-#endif  // TENSORFLOW_USE_SYCL
 
 #define REGISTER_KERNELS(type)                                        \
   REGISTER_KERNEL_BUILDER(                                            \
@@ -151,16 +139,4 @@
 #undef REGISTER_GPU_KERNELS
 #endif  // end GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNELS(type)                                    \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("AssignAdd").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
-      DenseUpdateOp<SYCLDevice, type, DenseUpdateType::ADD>);          \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("AssignSub").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
-      DenseUpdateOp<SYCLDevice, type, DenseUpdateType::SUB>);
-
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNELS);
-#undef REGISTER_SYCL_KERNELS
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/dynamic_stitch_op.cc b/tensorflow/core/kernels/dynamic_stitch_op.cc
index 5f6b035..cad691a 100644
--- a/tensorflow/core/kernels/dynamic_stitch_op.cc
+++ b/tensorflow/core/kernels/dynamic_stitch_op.cc
@@ -365,24 +365,4 @@
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_DYNAMIC_STITCH_SYCL(type)               \
-  REGISTER_KERNEL_BUILDER(Name("DynamicStitch")          \
-                              .Device(DEVICE_SYCL)       \
-                              .TypeConstraint<type>("T") \
-                              .HostMemory("indices")     \
-                              .HostMemory("data")        \
-                              .HostMemory("merged"),     \
-                          DynamicStitchOpCPU<type>)      \
-  REGISTER_KERNEL_BUILDER(Name("ParallelDynamicStitch")  \
-                              .Device(DEVICE_SYCL)       \
-                              .TypeConstraint<type>("T") \
-                              .HostMemory("indices")     \
-                              .HostMemory("data")        \
-                              .HostMemory("merged"),     \
-                          ParallelDynamicStitchOpCPU<type>)
-
-TF_CALL_POD_STRING_TYPES(REGISTER_DYNAMIC_STITCH_SYCL);
-#undef REGISTER_DYNAMIC_STITCH_SYCL
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/fill_functor.cc b/tensorflow/core/kernels/fill_functor.cc
index 0619fac..140497b 100644
--- a/tensorflow/core/kernels/fill_functor.cc
+++ b/tensorflow/core/kernels/fill_functor.cc
@@ -63,26 +63,6 @@
 DEFINE_SETZERO_CPU(Variant);
 #undef DEFINE_SETZERO_CPU
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename T>
-void SetZeroFunctor<Eigen::SyclDevice, T>::operator()(
-    const Eigen::SyclDevice& d, typename TTypes<T>::Flat out) {
-  To32Bit(out).device(d) = To32Bit(out).constant(T(0));
-}
-
-#define DEFINE_SETZERO_SYCL(T) \
-  template struct SetZeroFunctor<Eigen::SyclDevice, T>;
-DEFINE_SETZERO_SYCL(bool);
-DEFINE_SETZERO_SYCL(float);
-DEFINE_SETZERO_SYCL(double);
-DEFINE_SETZERO_SYCL(uint8);
-DEFINE_SETZERO_SYCL(int8);
-DEFINE_SETZERO_SYCL(uint16);
-DEFINE_SETZERO_SYCL(int16);
-DEFINE_SETZERO_SYCL(int32);
-DEFINE_SETZERO_SYCL(int64);
-#undef DEFINE_SETZERO_SYCL
-#endif  // TENSORFLOW_USE_SYCL
 
 template <typename T>
 void SetOneFunctor<Eigen::ThreadPoolDevice, T>::operator()(
@@ -110,20 +90,6 @@
 DEFINE_SETONE_CPU(complex128);
 #undef DEFINE_SETONE_CPU
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename T>
-void SetOneFunctor<Eigen::SyclDevice, T>::operator()(
-    const Eigen::SyclDevice& d, typename TTypes<T>::Flat out) {
-  out.device(d) = out.constant(T(1));
-}
-
-#define DEFINE_SETONE_SYCL(T) \
-  template struct SetOneFunctor<Eigen::SyclDevice, T>;
-DEFINE_SETONE_SYCL(float);
-DEFINE_SETONE_SYCL(bool);
-DEFINE_SETONE_SYCL(double);
-#undef DEFINE_SETONE_SYCL
-#endif  // TENSORFLOW_USE_SYCL
 
 template <typename T>
 struct FillFunctor<Eigen::ThreadPoolDevice, T> {
@@ -145,29 +111,6 @@
 DEFINE_FILL_CPU(qint16);
 #undef DEFINE_FILL_CPU
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename T>
-struct FillFunctor<Eigen::SyclDevice, T> {
-  void operator()(const Eigen::SyclDevice& d, typename TTypes<T>::Flat out,
-                  typename TTypes<T>::ConstScalar in) {
-#if !defined(EIGEN_HAS_INDEX_LIST)
-    Eigen::array<int, 1> rank1{1};
-#else
-    Eigen::IndexList<Eigen::type2index<1> > rank1;
-#endif
-    const int size = out.dimension(0);
-    Eigen::array<int, 1> broadcast_dims{size};
-
-    To32Bit(out).device(d) = in.reshape(rank1).broadcast(broadcast_dims);
-  }
-};
-
-#define DEFINE_FILL_SYCL(T) template struct FillFunctor<Eigen::SyclDevice, T>;
-DEFINE_FILL_SYCL(float);
-DEFINE_FILL_SYCL(double);
-TF_CALL_INTEGRAL_TYPES(DEFINE_FILL_SYCL)
-#undef DEFINE_FILL_SYCL
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace functor
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/fill_functor.h b/tensorflow/core/kernels/fill_functor.h
index a9a47c6..7e2d558 100644
--- a/tensorflow/core/kernels/fill_functor.h
+++ b/tensorflow/core/kernels/fill_functor.h
@@ -45,13 +45,6 @@
                   typename TTypes<T>::Flat out);
 };
 
-#ifdef TENSORFLOW_USE_SYCL
-// Partial specialization of SetZeroFunctor<Device=Eigen::SyclDevice, T>.
-template <typename T>
-struct SetZeroFunctor<Eigen::SyclDevice, T> {
-  void operator()(const Eigen::SyclDevice& d, typename TTypes<T>::Flat out);
-};
-#endif  // TENSORFLOW_USE_SYCL
 
 template <>
 struct SetZeroFunctor<Eigen::ThreadPoolDevice, tstring> {
@@ -72,13 +65,6 @@
                   typename TTypes<T>::Flat out);
 };
 
-#ifdef TENSORFLOW_USE_SYCL
-// Partial specialization of SetOneFunctor<Device=Eigen::SyclDevice, T>.
-template <typename T>
-struct SetOneFunctor<Eigen::SyclDevice, T> {
-  void operator()(const Eigen::SyclDevice& d, typename TTypes<T>::Flat out);
-};
-#endif  // TENSORFLOW_USE_SYCL
 
 template <>
 struct SetOneFunctor<Eigen::ThreadPoolDevice, tstring> {
diff --git a/tensorflow/core/kernels/function_ops.cc b/tensorflow/core/kernels/function_ops.cc
index 0af095f..82b1aa8 100644
--- a/tensorflow/core/kernels/function_ops.cc
+++ b/tensorflow/core/kernels/function_ops.cc
@@ -94,28 +94,6 @@
 // is turned on.
 REGISTER_KERNEL_BUILDER(Name(kRetOp).Device(DEVICE_TPU_SYSTEM), RetvalOp);
 
-#if TENSORFLOW_USE_SYCL
-#define REGISTER(type)     \
-  REGISTER_KERNEL_BUILDER( \
-      Name(kArgOp).Device(DEVICE_SYCL).TypeConstraint<type>("T"), ArgOp);
-TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER)
-TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name(kArgOp)
-                                                   .Device(DEVICE_SYCL)
-                                                   .HostMemory("output")
-                                                   .TypeConstraint<int32>("T"),
-                                               ArgOp);
-#undef REGISTER
-#define REGISTER(type)     \
-  REGISTER_KERNEL_BUILDER( \
-      Name(kRetOp).Device(DEVICE_SYCL).TypeConstraint<type>("T"), RetvalOp);
-TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER)
-TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name(kRetOp)
-                                                   .Device(DEVICE_SYCL)
-                                                   .HostMemory("input")
-                                                   .TypeConstraint<int32>("T"),
-                                               RetvalOp);
-#undef REGISTER
-#endif
 
 #define REGISTER(type)     \
   REGISTER_KERNEL_BUILDER( \
@@ -225,33 +203,6 @@
                             .TypeConstraint<int32>("T"),
                         PassOn);
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNELS(type)                                       \
-  REGISTER_KERNEL_BUILDER(                                                \
-      Name("_ListToArray").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
-      PassOn);                                                            \
-  REGISTER_KERNEL_BUILDER(                                                \
-      Name("_ArrayToList").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
-      PassOn);
-
-REGISTER_SYCL_KERNELS(float);
-REGISTER_SYCL_KERNELS(double);
-
-#undef REGISTER_SYCL_KERNELS
-
-REGISTER_KERNEL_BUILDER(Name("_ListToArray")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("input")
-                            .HostMemory("output")
-                            .TypeConstraint<int32>("T"),
-                        PassOn);
-REGISTER_KERNEL_BUILDER(Name("_ArrayToList")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("input")
-                            .HostMemory("output")
-                            .TypeConstraint<int32>("T"),
-                        PassOn);
-#endif  // TENSORFLOW_USE_SYCL
 
 class SymbolicGradientOp : public AsyncOpKernel {
  public:
@@ -309,11 +260,6 @@
                         SymbolicGradientOp);
 REGISTER_KERNEL_BUILDER(Name(kGradientOp).Device(DEVICE_GPU),
                         SymbolicGradientOp);
-#if TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(Name(kGradientOp).Device(DEVICE_SYCL),
-                        SymbolicGradientOp);
-
-#endif  // TENSORFLOW_USE_SYCL
 
 RemoteCallOp::RemoteCallOp(OpKernelConstruction* ctx) : AsyncOpKernel(ctx) {
   OP_REQUIRES_OK(ctx,
@@ -449,9 +395,4 @@
     Name("RemoteCall").Device(DEVICE_CPU).HostMemory("target"), RemoteCallOp);
 REGISTER_KERNEL_BUILDER(
     Name("RemoteCall").Device(DEVICE_GPU).HostMemory("target"), RemoteCallOp);
-#if TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(
-    Name("RemoteCall").Device(DEVICE_SYCL).HostMemory("target"), RemoteCallOp);
-
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/host_constant_op.cc b/tensorflow/core/kernels/host_constant_op.cc
index cb1afdb1..dbba1fe 100644
--- a/tensorflow/core/kernels/host_constant_op.cc
+++ b/tensorflow/core/kernels/host_constant_op.cc
@@ -54,13 +54,6 @@
                         _HostConstantOp);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(Name("Const")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("output")
-                            .TypeConstraint<int32>("dtype"),
-                        _HostConstantOp);
-#endif  // TENSORFLOW_USE_SYCL
 
 // HostConst: forced to generate output on the host.
 REGISTER_KERNEL_BUILDER(Name("HostConst").Device(DEVICE_CPU), _HostConstantOp);
diff --git a/tensorflow/core/kernels/identity_op.cc b/tensorflow/core/kernels/identity_op.cc
index aee7b54..b5a17d8 100644
--- a/tensorflow/core/kernels/identity_op.cc
+++ b/tensorflow/core/kernels/identity_op.cc
@@ -60,45 +60,6 @@
                             .HostMemory("output"),
                         IdentityOp);
 
-#if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(type)                                           \
-  REGISTER_KERNEL_BUILDER(                                                   \
-      Name("Identity").Device(DEVICE_SYCL).TypeConstraint<type>("T"),        \
-      IdentityOp);                                                           \
-  REGISTER_KERNEL_BUILDER(                                                   \
-      Name("PreventGradient").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
-      IdentityOp);                                                           \
-  REGISTER_KERNEL_BUILDER(                                                   \
-      Name("RefIdentity").Device(DEVICE_SYCL).TypeConstraint<type>("T"),     \
-      IdentityOp);                                                           \
-  REGISTER_KERNEL_BUILDER(                                                   \
-      Name("StopGradient").Device(DEVICE_SYCL).TypeConstraint<type>("T"),    \
-      IdentityOp)
-
-TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
-
-#undef REGISTER_SYCL_KERNEL
-
-#define REGISTER_SYCL_HOST_KERNEL(type)                   \
-  REGISTER_KERNEL_BUILDER(Name("Identity")                \
-                              .Device(DEVICE_SYCL)        \
-                              .HostMemory("input")        \
-                              .HostMemory("output")       \
-                              .TypeConstraint<type>("T"), \
-                          IdentityOp);                    \
-  REGISTER_KERNEL_BUILDER(Name("RefIdentity")             \
-                              .Device(DEVICE_SYCL)        \
-                              .HostMemory("input")        \
-                              .HostMemory("output")       \
-                              .TypeConstraint<type>("T"), \
-                          IdentityOp)
-
-REGISTER_SYCL_HOST_KERNEL(int32);
-REGISTER_SYCL_HOST_KERNEL(bool);
-
-#undef REGISTER_SYCL_HOST_KERNEL
-
-#endif  // TENSORFLOW_USE_SYCL
 
 #define REGISTER_GPU_KERNEL(type)                                           \
   REGISTER_KERNEL_BUILDER(                                                  \
diff --git a/tensorflow/core/kernels/image/adjust_contrast_op.cc b/tensorflow/core/kernels/image/adjust_contrast_op.cc
index 6853465..b43964a 100644
--- a/tensorflow/core/kernels/image/adjust_contrast_op.cc
+++ b/tensorflow/core/kernels/image/adjust_contrast_op.cc
@@ -33,9 +33,6 @@
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif
 
 // AdjustContrastOp is deprecated as of GraphDef version >= 2
 
@@ -434,26 +431,5 @@
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-template <>
-class AdjustContrastOpv2<SYCLDevice, float> : public AdjustContrastOpV2Base {
- public:
-  explicit AdjustContrastOpv2(OpKernelConstruction* context)
-      : AdjustContrastOpV2Base(context) {}
-
-  void DoCompute(OpKernelContext* context,
-                 const ComputeOptions& options) override {
-    const int64 shape[4] = {options.batch, options.height, options.width,
-                            options.channels};
-    functor::AdjustContrastv2<SYCLDevice>()(
-        context->eigen_device<SYCLDevice>(),
-        options.input->shaped<float, 4>(shape), options.factor->scalar<float>(),
-        options.output->shaped<float, 4>(shape));
-  }
-};
-REGISTER_KERNEL_BUILDER(
-    Name("AdjustContrastv2").Device(DEVICE_SYCL).TypeConstraint<float>("T"),
-    AdjustContrastOpv2<SYCLDevice, float>);
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/image/adjust_contrast_op_benchmark_test.cc b/tensorflow/core/kernels/image/adjust_contrast_op_benchmark_test.cc
index 0b9142c..bcbbc24 100644
--- a/tensorflow/core/kernels/image/adjust_contrast_op_benchmark_test.cc
+++ b/tensorflow/core/kernels/image/adjust_contrast_op_benchmark_test.cc
@@ -60,8 +60,5 @@
     (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
 BM_AdjustContrastDev(gpu, 32, 299, 299);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#ifdef TENSORFLOW_USE_SYCL
-BM_AdjustContrastDev(sycl, 32, 299, 299);
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/image/colorspace_op.cc b/tensorflow/core/kernels/image/colorspace_op.cc
index a3164bb..8e81038 100644
--- a/tensorflow/core/kernels/image/colorspace_op.cc
+++ b/tensorflow/core/kernels/image/colorspace_op.cc
@@ -36,9 +36,6 @@
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif
 
 template <typename Device, typename T>
 class RGBToHSVOp : public OpKernel {
@@ -150,16 +147,5 @@
 TF_CALL_double(REGISTER_GPU);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL(T)                                           \
-  REGISTER_KERNEL_BUILDER(                                         \
-      Name("RGBToHSV").Device(DEVICE_SYCL).TypeConstraint<T>("T"), \
-      RGBToHSVOp<SYCLDevice, T>);                                  \
-  REGISTER_KERNEL_BUILDER(                                         \
-      Name("HSVToRGB").Device(DEVICE_SYCL).TypeConstraint<T>("T"), \
-      HSVToRGBOp<SYCLDevice, T>);
-TF_CALL_float(REGISTER_SYCL);
-TF_CALL_double(REGISTER_SYCL);
-#endif
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/inplace_ops.cc b/tensorflow/core/kernels/inplace_ops.cc
index b5191b9..1849cb4 100644
--- a/tensorflow/core/kernels/inplace_ops.cc
+++ b/tensorflow/core/kernels/inplace_ops.cc
@@ -25,9 +25,6 @@
 
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SyclDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 namespace functor {
 
@@ -60,23 +57,6 @@
   }
 }
 
-#ifdef TENSORFLOW_USE_SYCL
-template <>
-Status DoParallelConcat(const SyclDevice& d, const Tensor& value, int32 loc,
-                        Tensor* output) {
-  CHECK_EQ(value.dtype(), output->dtype());
-  switch (value.dtype()) {
-#define CASE(type)                  \
-  case DataTypeToEnum<type>::value: \
-    return DoParallelConcatUpdate<SyclDevice, type>(d, value, loc, output);
-    TF_CALL_GPU_NUMBER_TYPES_NO_HALF(CASE);
-#undef CASE
-    default:
-      return errors::InvalidArgument("Unsupported data type: ",
-                                     DataTypeString(value.dtype()));
-  }
-}
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // end namespace functor
 
@@ -175,41 +155,6 @@
 TF_CALL_POD_STRING_TYPES(REGISTER_PARALLEL_CONCAT);
 #undef REGISTER_PARALLEL_CONCAT
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_EMPTY(type)                                  \
-  REGISTER_KERNEL_BUILDER(Name("_ParallelConcatStart")        \
-                              .Device(DEVICE_SYCL)            \
-                              .TypeConstraint<type>("dtype"), \
-                          ParallelConcatStart<SyclDevice, type>);
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_EMPTY)
-#undef REGISTER_EMPTY
-
-#define REGISTER_PARALLEL_CONCAT(type)                                      \
-  REGISTER_KERNEL_BUILDER(                                                  \
-      Name("ParallelConcat").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
-      FailureKernel);
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_PARALLEL_CONCAT);
-#undef REGISTER_PARALLEL_CONCAT
-
-#define REGISTER(type)                                    \
-  REGISTER_KERNEL_BUILDER(Name("_ParallelConcatUpdate")   \
-                              .Device(DEVICE_SYCL)        \
-                              .TypeConstraint<type>("T"), \
-                          ParallelConcatUpdate<SyclDevice>);
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER)
-#undef REGISTER
-
-// Register versions that operate on int32 data on the CPU even though the op
-// has been placed on the SYCL
-
-REGISTER_KERNEL_BUILDER(Name("_ParallelConcatUpdate")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("value")
-                            .HostMemory("update")
-                            .HostMemory("output")
-                            .TypeConstraint<int32>("T"),
-                        ParallelConcatUpdate<CPUDevice>);
-#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
diff --git a/tensorflow/core/kernels/map_stage_op.cc b/tensorflow/core/kernels/map_stage_op.cc
index 6c01e42..89b760e 100644
--- a/tensorflow/core/kernels/map_stage_op.cc
+++ b/tensorflow/core/kernels/map_stage_op.cc
@@ -556,18 +556,6 @@
                         MapStageOp<true>);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(Name("MapStage")
-                            .HostMemory("key")
-                            .HostMemory("indices")
-                            .Device(DEVICE_SYCL),
-                        MapStageOp<false>);
-REGISTER_KERNEL_BUILDER(Name("OrderedMapStage")
-                            .HostMemory("key")
-                            .HostMemory("indices")
-                            .Device(DEVICE_SYCL),
-                        MapStageOp<true>);
-#endif  // TENSORFLOW_USE_SYCL
 
 template <bool Ordered>
 class MapUnstageOp : public OpKernel {
@@ -617,18 +605,6 @@
                             .Device(DEVICE_GPU),
                         MapUnstageOp<true>);
 #endif
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(Name("MapUnstage")
-                            .HostMemory("key")
-                            .HostMemory("indices")
-                            .Device(DEVICE_SYCL),
-                        MapUnstageOp<false>);
-REGISTER_KERNEL_BUILDER(Name("OrderedMapUnstage")
-                            .HostMemory("key")
-                            .HostMemory("indices")
-                            .Device(DEVICE_SYCL),
-                        MapUnstageOp<true>);
-#endif  // TENSORFLOW_USE_SYCL
 
 template <bool Ordered>
 class MapPeekOp : public OpKernel {
@@ -676,16 +652,6 @@
                         MapPeekOp<true>);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(
-    Name("MapPeek").HostMemory("key").HostMemory("indices").Device(DEVICE_SYCL),
-    MapPeekOp<false>);
-REGISTER_KERNEL_BUILDER(Name("OrderedMapPeek")
-                            .HostMemory("key")
-                            .HostMemory("indices")
-                            .Device(DEVICE_SYCL),
-                        MapPeekOp<true>);
-#endif  // TENSORFLOW_USE_SYCL
 
 template <bool Ordered>
 class MapUnstageNoKeyOp : public OpKernel {
@@ -741,18 +707,6 @@
                         MapUnstageNoKeyOp<true>);
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(Name("MapUnstageNoKey")
-                            .HostMemory("key")
-                            .HostMemory("indices")
-                            .Device(DEVICE_SYCL),
-                        MapUnstageNoKeyOp<false>);
-REGISTER_KERNEL_BUILDER(Name("OrderedMapUnstageNoKey")
-                            .HostMemory("key")
-                            .HostMemory("indices")
-                            .Device(DEVICE_SYCL),
-                        MapUnstageNoKeyOp<true>);
-#endif  // TENSORFLOW_USE_SYCL
 
 template <bool Ordered>
 class MapSizeOp : public OpKernel {
@@ -784,13 +738,6 @@
     Name("OrderedMapSize").Device(DEVICE_GPU).HostMemory("size"),
     MapSizeOp<true>);
 #endif
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(Name("MapSize").Device(DEVICE_SYCL).HostMemory("size"),
-                        MapSizeOp<false>);
-REGISTER_KERNEL_BUILDER(
-    Name("OrderedMapSize").Device(DEVICE_SYCL).HostMemory("size"),
-    MapSizeOp<true>);
-#endif  // TENSORFLOW_USE_SYCL
 
 template <bool Ordered>
 class MapIncompleteSizeOp : public OpKernel {
@@ -824,14 +771,6 @@
     Name("OrderedMapIncompleteSize").Device(DEVICE_GPU).HostMemory("size"),
     MapIncompleteSizeOp<true>);
 #endif
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(
-    Name("MapIncompleteSize").Device(DEVICE_SYCL).HostMemory("size"),
-    MapIncompleteSizeOp<false>);
-REGISTER_KERNEL_BUILDER(
-    Name("OrderedMapIncompleteSize").Device(DEVICE_SYCL).HostMemory("size"),
-    MapIncompleteSizeOp<true>);
-#endif  // TENSORFLOW_USE_SYCL
 
 template <bool Ordered>
 class MapClearOp : public OpKernel {
@@ -856,12 +795,6 @@
 REGISTER_KERNEL_BUILDER(Name("OrderedMapClear").Device(DEVICE_GPU),
                         MapClearOp<true>);
 #endif
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(Name("MapClear").Device(DEVICE_SYCL),
-                        MapClearOp<false>);
-REGISTER_KERNEL_BUILDER(Name("OrderedMapClear").Device(DEVICE_SYCL),
-                        MapClearOp<true>);
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/matmul_op.cc b/tensorflow/core/kernels/matmul_op.cc
index 2e3c120..3b57f09 100644
--- a/tensorflow/core/kernels/matmul_op.cc
+++ b/tensorflow/core/kernels/matmul_op.cc
@@ -36,9 +36,6 @@
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 template <typename Device, typename T, bool USE_CUBLAS>
 struct LaunchMatMul;
@@ -123,18 +120,14 @@
       OpKernelContext* ctx, const Tensor& a, const Tensor& b,
       const Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1>& dim_pair,
       std::vector<AlgorithmType>* algorithms, bool use_autotune, Tensor* out) {
-#ifndef TENSORFLOW_USE_SYCL
     // An explicit vector-matrix multiply is much better optimized than an
     // implicit one and this is a bottleneck during non-batched inference.
     bool was_vector = ExplicitVectorMatrixOptimization<T>(a, b, dim_pair, out);
     if (!was_vector) {
-#endif  // TENSORFLOW_USE_SYCL
       functor::MatMulFunctor<Device, T>()(ctx->eigen_device<Device>(),
                                           out->matrix<T>(), a.matrix<T>(),
                                           b.matrix<T>(), dim_pair);
-#ifndef TENSORFLOW_USE_SYCL
     }
-#endif  // TENSORFLOW_USE_SYCL
   }
 
   static void GetBlasGemmAlgorithm(OpKernelConstruction* ctx,
@@ -148,13 +141,6 @@
 template <typename T, bool USE_CUBLAS>
 struct LaunchMatMul<CPUDevice, T, USE_CUBLAS> : public LaunchMatMulCPU<T> {};
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename T>
-struct LaunchMatMulSYCL : LaunchMatMulBase<SYCLDevice, T> {};
-
-template <typename T, bool USE_CUBLAS>
-struct LaunchMatMul<SYCLDevice, T, USE_CUBLAS> : public LaunchMatMulSYCL<T> {};
-#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
@@ -544,19 +530,6 @@
   }
 };
 
-#ifdef TENSORFLOW_USE_SYCL
-// Partial specialization MatMulFunctor<Device=SYCLDevice, T>.
-template <typename T>
-struct MatMulFunctor<SYCLDevice, T> {
-  void operator()(
-      const SYCLDevice& d, typename MatMulTypes<T>::out_type out,
-      typename MatMulTypes<T>::in_type in0,
-      typename MatMulTypes<T>::in_type in1,
-      const Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1>& dim_pair) {
-    MatMul<SYCLDevice>(d, out, in0, in1, dim_pair);
-  }
-};
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // end namespace functor
 
@@ -591,18 +564,4 @@
 TF_CALL_COMPLEX_TYPES(REGISTER_GPU);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL(T)                                         \
-  REGISTER_KERNEL_BUILDER(                                       \
-      Name("MatMul").Device(DEVICE_SYCL).TypeConstraint<T>("T"), \
-      MatMulOp<SYCLDevice, T, false /* xxblas */>);              \
-  REGISTER_KERNEL_BUILDER(Name("MatMul")                         \
-                              .Device(DEVICE_SYCL)               \
-                              .TypeConstraint<T>("T")            \
-                              .Label("eigen"),                   \
-                          MatMulOp<SYCLDevice, T, false /* xxblas */>)
-TF_CALL_float(REGISTER_SYCL);
-TF_CALL_double(REGISTER_SYCL);
-
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/nextafter_op.cc b/tensorflow/core/kernels/nextafter_op.cc
index d97b737..923fc23 100644
--- a/tensorflow/core/kernels/nextafter_op.cc
+++ b/tensorflow/core/kernels/nextafter_op.cc
@@ -22,15 +22,6 @@
 
 REGISTER2(BinaryOp, CPU, "NextAfter", functor::nextafter, float, double);
 
-#if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(TYPE)                                     \
-  REGISTER_KERNEL_BUILDER(                                             \
-      Name("NextAfter").Device(DEVICE_SYCL).TypeConstraint<TYPE>("T"), \
-      BinaryOp<SYCLDevice, functor::nextafter<TYPE>>);
-REGISTER_SYCL_KERNEL(float);
-REGISTER_SYCL_KERNEL(double);
-#undef REGISTER_SYCL_KERNEL
-#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER2(BinaryOp, GPU, "NextAfter", functor::nextafter, float, double);
diff --git a/tensorflow/core/kernels/pack_op.cc b/tensorflow/core/kernels/pack_op.cc
index 04b5c72..1418159 100644
--- a/tensorflow/core/kernels/pack_op.cc
+++ b/tensorflow/core/kernels/pack_op.cc
@@ -34,9 +34,6 @@
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 typedef Eigen::GpuDevice GPUDevice;
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 // --------------------------------------------------------------------------
 template <typename Device, typename T>
@@ -115,12 +112,6 @@
         return;
       }
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#ifdef TENSORFLOW_USE_SYCL
-      if (std::is_same<Device, SYCLDevice>::value) {
-        ConcatSYCL<T>(c->eigen_sycl_device(), inputs_flat, &output_flat);
-        return;
-      }
-#endif  // TENSORFLOW_USE_SYCL
       ConcatCPU<T>(c->device(), inputs_flat, &output_flat);
     }
   }
@@ -170,19 +161,4 @@
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL(type)                                       \
-  REGISTER_KERNEL_BUILDER(                                        \
-      Name("Pack").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
-      PackOp<SYCLDevice, type>)
-
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL);
-REGISTER_KERNEL_BUILDER(Name("Pack")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("values")
-                            .HostMemory("output")
-                            .TypeConstraint<int32>("T"),
-                        PackOp<CPUDevice, int32>);
-#undef REGISTER_SYCL
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/pad_op.cc b/tensorflow/core/kernels/pad_op.cc
index 0b40423..4a1d0cf 100644
--- a/tensorflow/core/kernels/pad_op.cc
+++ b/tensorflow/core/kernels/pad_op.cc
@@ -38,9 +38,6 @@
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 template <typename Device, typename T, typename Tpadding>
 class PadOp : public OpKernel {
@@ -392,72 +389,5 @@
                         PadOp<CPUDevice, int32, int64>);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-// Registration of the GPU implementations.
-#define REGISTER_SYCL_KERNEL(T)                                   \
-  REGISTER_KERNEL_BUILDER(Name("Pad")                             \
-                              .Device(DEVICE_SYCL)                \
-                              .TypeConstraint<T>("T")             \
-                              .TypeConstraint<int32>("Tpaddings") \
-                              .HostMemory("paddings"),            \
-                          PadOp<SYCLDevice, T, int32>);           \
-  REGISTER_KERNEL_BUILDER(Name("Pad")                             \
-                              .Device(DEVICE_SYCL)                \
-                              .TypeConstraint<T>("T")             \
-                              .TypeConstraint<int64>("Tpaddings") \
-                              .HostMemory("paddings"),            \
-                          PadOp<SYCLDevice, T, int64>);           \
-  REGISTER_KERNEL_BUILDER(Name("PadV2")                           \
-                              .Device(DEVICE_SYCL)                \
-                              .TypeConstraint<T>("T")             \
-                              .TypeConstraint<int32>("Tpaddings") \
-                              .HostMemory("paddings")             \
-                              .HostMemory("constant_values"),     \
-                          PadOp<SYCLDevice, T, int32>)            \
-  REGISTER_KERNEL_BUILDER(Name("PadV2")                           \
-                              .Device(DEVICE_SYCL)                \
-                              .TypeConstraint<T>("T")             \
-                              .TypeConstraint<int64>("Tpaddings") \
-                              .HostMemory("paddings")             \
-                              .HostMemory("constant_values"),     \
-                          PadOp<SYCLDevice, T, int64>)
-
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNEL);
-REGISTER_KERNEL_BUILDER(Name("Pad")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .TypeConstraint<int32>("Tpaddings")
-                            .HostMemory("input")
-                            .HostMemory("paddings")
-                            .HostMemory("output"),
-                        PadOp<CPUDevice, int32, int32>);
-REGISTER_KERNEL_BUILDER(Name("Pad")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .TypeConstraint<int64>("Tpaddings")
-                            .HostMemory("input")
-                            .HostMemory("paddings")
-                            .HostMemory("output"),
-                        PadOp<CPUDevice, int32, int64>);
-REGISTER_KERNEL_BUILDER(Name("PadV2")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .TypeConstraint<int32>("Tpaddings")
-                            .HostMemory("input")
-                            .HostMemory("paddings")
-                            .HostMemory("constant_values")
-                            .HostMemory("output"),
-                        PadOp<CPUDevice, int32, int32>);
-REGISTER_KERNEL_BUILDER(Name("PadV2")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .TypeConstraint<int64>("Tpaddings")
-                            .HostMemory("input")
-                            .HostMemory("paddings")
-                            .HostMemory("constant_values")
-                            .HostMemory("output"),
-                        PadOp<CPUDevice, int32, int64>);
-#undef REGISTER_SYCL_KERNEL
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc
index a6b8954..6a1e2d5 100644
--- a/tensorflow/core/kernels/partitioned_function_ops.cc
+++ b/tensorflow/core/kernels/partitioned_function_ops.cc
@@ -289,11 +289,5 @@
 REGISTER_INPUT_COLOCATION_EXEMPTION("PartitionedCall");
 REGISTER_INPUT_COLOCATION_EXEMPTION("StatefulPartitionedCall");
 
-#if TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(Name("PartitionedCall").Device(DEVICE_SYCL),
-                        PartitionedCallOp);
-REGISTER_KERNEL_BUILDER(Name("StatefulPartitionedCall").Device(DEVICE_SYCL),
-                        PartitionedCallOp);
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/pooling_ops_3d.cc b/tensorflow/core/kernels/pooling_ops_3d.cc
index 532d861..1114e69 100644
--- a/tensorflow/core/kernels/pooling_ops_3d.cc
+++ b/tensorflow/core/kernels/pooling_ops_3d.cc
@@ -39,17 +39,11 @@
 #include "tensorflow/core/kernels/pooling_ops_3d_gpu.h"
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-#include "tensorflow/core/kernels/pooling_ops_3d_sycl.h"
-#endif  // TENSORFLOW_USE_SYCL
 
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 Pool3dParameters::Pool3dParameters(OpKernelContext* context,
                                    const std::vector<int32>& ksize,
@@ -830,11 +824,6 @@
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNELS(T) REGISTER_KERNELS(SYCL, T)
-    TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNELS)
-#undef REGISTER_SYCL_KERNELS
-#endif  // TENSORFLOW_USE_SYCL
 
 #undef REGISTER_KERNELS
 
diff --git a/tensorflow/core/kernels/pooling_ops_3d_sycl.h b/tensorflow/core/kernels/pooling_ops_3d_sycl.h
deleted file mode 100644
index b4bead2..0000000
--- a/tensorflow/core/kernels/pooling_ops_3d_sycl.h
+++ /dev/null
@@ -1,758 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#if !TENSORFLOW_USE_SYCL
-#error This file must only be included when building with SYCL support
-#endif
-
-#ifndef TENSORFLOW_CORE_KERNELS_POOLING_OP_3D_SYCL_H_
-#define TENSORFLOW_CORE_KERNELS_POOLING_OP_3D_SYCL_H_
-
-#include "tensorflow/core/kernels/pooling_ops_3d.h"
-
-namespace tensorflow {
-
-typedef Eigen::SyclDevice SYCLDevice;
-
-// Helper struct to contain the various pool parameters used in the SYCL
-// pooling kernels. Similar to the Pool3dParameters, but with a number of
-// convenient constructors.
-struct SYCL3DPoolParams {
-  SYCL3DPoolParams(const int depth, const int batch, const int in_planes,
-                   const int in_rows, const int in_cols, const int out_planes,
-                   const int out_rows, const int out_cols,
-                   const std::array<int64, 3>& window,
-                   const std::array<int64, 3>& stride,
-                   const std::array<int64, 3>& padding)
-      : depth_(depth),
-        batch_(batch),
-        in_planes_(in_planes),
-        in_rows_(in_rows),
-        in_cols_(in_cols),
-        window_planes_(window[2]),
-        window_rows_(window[1]),
-        window_cols_(window[0]),
-        stride_planes_(stride[2]),
-        stride_rows_(stride[1]),
-        stride_cols_(stride[0]),
-        out_planes_(out_planes),
-        out_rows_(out_rows),
-        out_cols_(out_cols),
-        pad_planes_(padding[2]),
-        pad_rows_(padding[1]),
-        pad_cols_(padding[0]) {}
-
-  SYCL3DPoolParams(const int depth, const int batch, const int in_planes,
-                   const int in_rows, const int in_cols,
-                   const std::array<int64, 3>& out_shape,
-                   const std::array<int64, 3>& window,
-                   const std::array<int64, 3>& stride,
-                   const std::array<int64, 3>& padding)
-      : SYCL3DPoolParams(depth, batch, in_planes, in_rows, in_cols,
-                         out_shape[2], out_shape[1], out_shape[0], window,
-                         stride, padding) {}
-
-  SYCL3DPoolParams(const Pool3dParameters& params)
-      : depth_(params.depth),
-        batch_(params.tensor_in_batch),
-        in_planes_(params.tensor_in_planes),
-        in_rows_(params.tensor_in_rows),
-        in_cols_(params.tensor_in_cols),
-        window_planes_(params.window_planes),
-        window_rows_(params.window_rows),
-        window_cols_(params.window_cols),
-        stride_planes_(params.plane_stride),
-        stride_rows_(params.row_stride),
-        stride_cols_(params.col_stride),
-        out_planes_(params.out_plane),
-        out_rows_(params.out_height),
-        out_cols_(params.out_width),
-        pad_planes_(params.pad_planes),
-        pad_rows_(params.pad_rows),
-        pad_cols_(params.pad_cols) {}
-
-  const int depth_;
-  const int batch_;
-  const int in_planes_;
-  const int in_rows_;
-  const int in_cols_;
-
-  const int window_planes_;
-  const int window_rows_;
-  const int window_cols_;
-
-  const int stride_planes_;
-  const int stride_rows_;
-  const int stride_cols_;
-
-  const int out_planes_;
-  const int out_rows_;
-  const int out_cols_;
-
-  const int pad_planes_;
-  const int pad_rows_;
-  const int pad_cols_;
-};
-// MaxPool3d SYCL kernel. Expects the number of threads to be equal to the
-// number of elements in the output tensor.
-//
-// For each output element, find the corresponding input window and run over
-// all values in the window to find the maximum value. This value is then
-// copied into that output element.
-template <typename T>
-class MaxPool3DSYCL {
-  using write_accessor =
-      cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::write,
-                         cl::sycl::access::target::global_buffer>;
-  using read_accessor =
-      cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::read,
-                         cl::sycl::access::target::global_buffer>;
-
- public:
-  MaxPool3DSYCL(const int depth, const int batch, const int in_planes,
-                const int in_rows, const int in_cols, const int out_planes,
-                const int out_rows, const int out_cols,
-                const std::array<int64, 3>& window,
-                const std::array<int64, 3>& stride,
-                const std::array<int64, 3>& padding,
-                const read_accessor input_accessor,
-                write_accessor output_accessor)
-      : p_(depth, batch, in_planes, in_rows, in_cols, out_planes, out_rows,
-           out_cols, window, stride, padding),
-        input_accessor_(input_accessor),
-        output_accessor_(output_accessor) {}
-  void operator()(cl::sycl::item<1> item) {
-    T* input_data = ConvertToActualTypeSycl(T, input_accessor_);
-    T* output_data = ConvertToActualTypeSycl(T, output_accessor_);
-
-    int index = item.get_linear_id();
-    int n = index;
-    int d = n % p_.depth_;
-    n /= p_.depth_;
-    int cstart = (n % p_.out_cols_) * p_.stride_cols_ - p_.pad_cols_;
-    int cend = std::min(cstart + p_.window_cols_, p_.in_cols_);
-    cstart = std::max(cstart, 0);
-    n /= p_.out_cols_;
-    int rstart = (n % p_.out_rows_) * p_.stride_rows_ - p_.pad_rows_;
-    int rend = std::min(rstart + p_.window_rows_, p_.in_rows_);
-    rstart = std::max(rstart, 0);
-    n /= p_.out_rows_;
-    int pstart = (n % p_.out_planes_) * p_.stride_planes_ - p_.pad_planes_;
-    int pend = std::min(pstart + p_.window_planes_, p_.in_planes_);
-    pstart = std::max(pstart, 0);
-    n /= p_.out_planes_;
-    T maxval = Eigen::NumTraits<T>::lowest();
-    const T* input_data_n =
-        input_data + n * p_.in_planes_ * p_.in_cols_ * p_.in_rows_ * p_.depth_;
-    for (int p = pstart; p < pend; ++p) {
-      for (int r = rstart; r < rend; ++r) {
-        for (int c = cstart; c < cend; ++c) {
-          int idx = ((p * p_.in_rows_ + r) * p_.in_cols_ + c) * p_.depth_ + d;
-          if (input_data_n[idx] > maxval) {
-            maxval = input_data_n[idx];
-          }
-        }
-      }
-    }
-    output_data[index] = maxval;
-  }
-
- private:
-  const SYCL3DPoolParams p_;
-  const read_accessor input_accessor_;
-  write_accessor output_accessor_;
-};
-template <typename T>
-struct LaunchPoolingOp<SYCLDevice, T, MAX> {
-  static void launch(OpKernelContext* context, const Tensor& tensor_in,
-                     const std::array<int64, 3>& window,
-                     const std::array<int64, 3>& stride,
-                     const std::array<int64, 3>& padding,
-                     TensorFormat data_format, Padding padding_type,
-                     Tensor* output) {
-    const SYCLDevice& device = context->eigen_device<SYCLDevice>();
-    const int out_planes = GetTensorDim(*output, data_format, '0');
-    const int out_rows = GetTensorDim(*output, data_format, '1');
-    const int out_cols = GetTensorDim(*output, data_format, '2');
-    const int batch = GetTensorDim(tensor_in, data_format, 'N');
-    const int in_planes = GetTensorDim(tensor_in, data_format, '0');
-    const int in_rows = GetTensorDim(tensor_in, data_format, '1');
-    const int in_cols = GetTensorDim(tensor_in, data_format, '2');
-    const int depth = GetTensorDim(tensor_in, data_format, 'C');
-
-    const int num_threads = output->NumElements();
-
-    auto input_buffer =
-        device.get_sycl_buffer(tensor_in.template flat<T>().data());
-    auto output_buffer =
-        device.get_sycl_buffer(output->template flat<T>().data());
-
-    device.sycl_queue().submit([&](cl::sycl::handler& cgh) {
-      auto input_access =
-          input_buffer.template get_access<cl::sycl::access::mode::read>(cgh);
-      auto output_access =
-          output_buffer.template get_access<cl::sycl::access::mode::write>(cgh);
-      MaxPool3DSYCL<T> max_pool(depth, batch, in_planes, in_rows, in_cols,
-                                out_planes, out_rows, out_cols, window, stride,
-                                padding, input_access, output_access);
-
-      cgh.parallel_for(cl::sycl::range<1>(num_threads), max_pool);
-    });
-  }
-};
-// MaxPool3DGrad SYCL kernel. Expects the number of threads to be equal to the
-// number of elements in the output backprop tensor (i.e. the number of elements
-// in the input data tensor).
-//
-// For each output backprop element we compute the possible window of values in
-// the input backprop tensor which might contribute to this element. Then for
-// each error in this window, compute the corresponding input window which was
-// pooled into that element in the output. Walk through this input window to
-// determine whether the input value is the first maximum value, and so the
-// error should be propagated back to the corresponding backprop element.
-template <typename T>
-class MaxPool3DGradSYCL {
-  using write_accessor =
-      cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::write,
-                         cl::sycl::access::target::global_buffer>;
-  using read_accessor =
-      cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::read,
-                         cl::sycl::access::target::global_buffer>;
-
- public:
-  MaxPool3DGradSYCL(const int depth, const int batch, const int in_planes,
-                    const int in_rows, const int in_cols,
-                    const std::array<int64, 3>& output_shape,
-                    const std::array<int64, 3>& window,
-                    const std::array<int64, 3>& stride,
-                    const std::array<int64, 3>& padding,
-                    const read_accessor input_data_accessor,
-                    const read_accessor output_data_accessor,
-                    const read_accessor input_backprop_accessor,
-                    write_accessor output_backprop_accessor)
-      : p_(depth, batch, in_planes, in_rows, in_cols, output_shape, window,
-           stride, padding),
-        input_data_accessor_(input_data_accessor),
-        output_data_accessor_(output_data_accessor),
-        input_backprop_accessor_(input_backprop_accessor),
-        output_backprop_accessor_(output_backprop_accessor) {}
-  void operator()(cl::sycl::item<1> item) {
-    T* input_data = ConvertToActualTypeSycl(T, input_data_accessor_);
-    T* output_data = ConvertToActualTypeSycl(T, output_data_accessor_);
-    T* input_backprop = ConvertToActualTypeSycl(T, input_backprop_accessor_);
-    T* output_backprop = ConvertToActualTypeSycl(T, output_backprop_accessor_);
-
-    const int index = item.get_linear_id();
-    T output_value = 0;
-    int n = index;
-    const int d = n % p_.depth_;
-    n /= p_.depth_;
-    const int c = (n % p_.in_cols_) + p_.pad_cols_;
-    const int poolcstart =
-        (c < p_.window_cols_) ? 0 : (c - p_.window_cols_) / p_.stride_cols_ + 1;
-    const int poolcend = std::min(c / p_.stride_cols_ + 1, p_.out_cols_);
-    n /= p_.in_cols_;
-    const int r = (n % p_.in_rows_) + p_.pad_rows_;
-    const int poolrstart =
-        (r < p_.window_rows_) ? 0 : (r - p_.window_rows_) / p_.stride_rows_ + 1;
-    const int poolrend = std::min(r / p_.stride_rows_ + 1, p_.out_rows_);
-    n /= p_.in_rows_;
-    const int p = (n % p_.in_planes_) + p_.pad_planes_;
-    const int poolpstart =
-        (p < p_.window_planes_)
-            ? 0
-            : (p - p_.window_planes_) / p_.stride_planes_ + 1;
-    const int poolpend = std::min(p / p_.stride_planes_ + 1, p_.out_planes_);
-    n /= p_.in_planes_;
-    const int index_no_n =
-        index - n * p_.in_planes_ * p_.in_cols_ * p_.in_rows_ * p_.depth_;
-
-    const T* input_data_n =
-        input_data + n * p_.in_planes_ * p_.in_cols_ * p_.in_rows_ * p_.depth_;
-    const T* output_data_n = output_data + n * p_.out_planes_ * p_.out_cols_ *
-                                               p_.out_rows_ * p_.depth_;
-    const T* input_backprop_n = input_backprop + n * p_.out_planes_ *
-                                                     p_.out_cols_ *
-                                                     p_.out_rows_ * p_.depth_;
-    for (int poolp = poolpstart; poolp < poolpend; ++poolp) {
-      int pstart = poolp * p_.stride_planes_ - p_.pad_planes_;
-      const int pend = std::min(pstart + p_.window_planes_, p_.in_planes_);
-      pstart = std::max(pstart, 0);
-
-      for (int poolr = poolrstart; poolr < poolrend; ++poolr) {
-        int rstart = poolr * p_.stride_rows_ - p_.pad_rows_;
-        const int rend = std::min(rstart + p_.window_rows_, p_.in_rows_);
-        rstart = std::max(rstart, 0);
-
-        for (int poolc = poolcstart; poolc < poolcend; ++poolc) {
-          int cstart = poolc * p_.stride_cols_ - p_.pad_cols_;
-          const int cend = std::min(cstart + p_.window_cols_, p_.in_cols_);
-          cstart = std::max(cstart, 0);
-
-          const int output_data_idx =
-              ((poolp * p_.out_rows_ + poolr) * p_.out_cols_ + poolc) *
-                  p_.depth_ +
-              d;
-          bool should_continue = true;
-          bool is_max = (input_data[index] == output_data_n[output_data_idx]);
-          for (int win_p = pstart; win_p < pend && should_continue; ++win_p) {
-            for (int win_r = rstart; win_r < rend && should_continue; ++win_r) {
-              for (int win_c = cstart; win_c < cend && should_continue;
-                   ++win_c) {
-                const int input_data_idx =
-                    ((win_p * p_.in_rows_ + win_r) * p_.in_cols_ + win_c) *
-                        p_.depth_ +
-                    d;
-                if (input_data_idx == index_no_n) {
-                  should_continue = false;
-                } else if (input_data_n[input_data_idx] ==
-                           output_data_n[output_data_idx]) {
-                  should_continue = false;
-                  is_max = false;
-                }
-              }
-            }
-          }
-          if (is_max) {
-            output_value += input_backprop_n[output_data_idx];
-          }
-        }
-      }
-    }
-    output_backprop[index] = output_value;
-  }
-
- private:
-  const SYCL3DPoolParams p_;
-
-  const read_accessor input_data_accessor_;
-  const read_accessor output_data_accessor_;
-  const read_accessor input_backprop_accessor_;
-  write_accessor output_backprop_accessor_;
-};
-template <typename T>
-struct LaunchMaxPooling3dGradOp<SYCLDevice, T> {
-  static void launch(OpKernelContext* context, const Tensor& tensor_in,
-                     const Tensor& tensor_out, const Tensor& out_backprop,
-                     const std::array<int64, 3>& window,
-                     const std::array<int64, 3>& stride,
-                     const std::array<int64, 3>& out,
-                     const std::array<int64, 3>& padding,
-                     TensorFormat data_format, Tensor* output) {
-    const SYCLDevice& device = context->eigen_device<SYCLDevice>();
-    const int batch = GetTensorDim(tensor_in, data_format, 'N');
-    const int in_planes = GetTensorDim(tensor_in, data_format, '0');
-    const int in_rows = GetTensorDim(tensor_in, data_format, '1');
-    const int in_cols = GetTensorDim(tensor_in, data_format, '2');
-    const int depth = GetTensorDim(tensor_in, data_format, 'C');
-
-    const int output_size = output->NumElements();
-
-    auto input_data_buffer =
-        device.get_sycl_buffer(tensor_in.template flat<T>().data());
-    auto output_data_buffer =
-        device.get_sycl_buffer(tensor_out.template flat<T>().data());
-    auto input_backprop_buffer =
-        device.get_sycl_buffer(out_backprop.template flat<T>().data());
-    auto output_backprop_buffer =
-        device.get_sycl_buffer(output->template flat<T>().data());
-
-    device.sycl_queue().submit([&](cl::sycl::handler& cgh) {
-      auto input_data_access =
-          input_data_buffer.template get_access<cl::sycl::access::mode::read>(
-              cgh);
-      auto output_data_access =
-          output_data_buffer.template get_access<cl::sycl::access::mode::read>(
-              cgh);
-      auto input_backprop_access =
-          input_backprop_buffer
-              .template get_access<cl::sycl::access::mode::read>(cgh);
-      auto output_backprop_access =
-          output_backprop_buffer
-              .template get_access<cl::sycl::access::mode::write>(cgh);
-      MaxPool3DGradSYCL<T> max_pool(
-          depth, batch, in_planes, in_rows, in_cols, out, window, stride,
-          padding, input_data_access, output_data_access, input_backprop_access,
-          output_backprop_access);
-
-      cgh.parallel_for(cl::sycl::range<1>(output_size), max_pool);
-    });
-  }
-};
-// MaxPool3DGradGrad SYCL kernel. Expects the number of threads to be equal to
-// the number of elements in the output backprop tensor, i.e. the number of
-// elements in the output tensor.
-//
-// For each element in the output backprop tensor, find the corresponding input
-// window, and compare the input and output data to find the index of the
-// maximum value in the input tensor. This is then the index of the gradient to
-// pass through to the output backprop tensor.
-template <typename T>
-class MaxPool3DGradGradSYCL {
-  using write_accessor =
-      cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::write,
-                         cl::sycl::access::target::global_buffer>;
-  using read_accessor =
-      cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::read,
-                         cl::sycl::access::target::global_buffer>;
-
- public:
-  MaxPool3DGradGradSYCL(const Pool3dParameters& params,
-                        const read_accessor input_data_accessor,
-                        const read_accessor output_data_accessor,
-                        const read_accessor input_backprop_accessor,
-                        write_accessor output_backprop_accessor)
-      : p_(params),
-        input_data_accessor_(input_data_accessor),
-        output_data_accessor_(output_data_accessor),
-        input_backprop_accessor_(input_backprop_accessor),
-        output_backprop_accessor_(output_backprop_accessor) {}
-  void operator()(cl::sycl::item<1> item) {
-    T* input_data = ConvertToActualTypeSycl(T, input_data_accessor_);
-    T* output_data = ConvertToActualTypeSycl(T, output_data_accessor_);
-    T* input_backprop = ConvertToActualTypeSycl(T, input_backprop_accessor_);
-    T* output_backprop = ConvertToActualTypeSycl(T, output_backprop_accessor_);
-
-    int index = item.get_linear_id();
-    int n = index;
-    int d = n % p_.depth_;
-    n /= p_.depth_;
-    int cstart = (n % p_.out_cols_) * p_.stride_cols_ - p_.pad_cols_;
-    int cend = std::min(cstart + p_.window_cols_, p_.in_cols_);
-    cstart = std::max(cstart, 0);
-    n /= p_.out_cols_;
-    int rstart = (n % p_.out_rows_) * p_.stride_rows_ - p_.pad_rows_;
-    int rend = std::min(rstart + p_.window_rows_, p_.in_rows_);
-    rstart = std::max(rstart, 0);
-    n /= p_.out_rows_;
-    int pstart = (n % p_.out_planes_) * p_.stride_planes_ - p_.pad_planes_;
-    int pend = std::min(pstart + p_.window_planes_, p_.in_planes_);
-    pstart = std::max(pstart, 0);
-    n /= p_.out_planes_;
-    int maxidx = -1;
-    bool should_stop = false;
-    const T* input_data_n =
-        input_data + n * p_.in_planes_ * p_.in_cols_ * p_.in_rows_ * p_.depth_;
-    for (int p = pstart; p < pend && !should_stop; ++p) {
-      for (int r = rstart; r < rend && !should_stop; ++r) {
-        for (int c = cstart; c < cend && !should_stop; ++c) {
-          int idx = ((p * p_.in_rows_ + r) * p_.in_cols_ + c) * p_.depth_ + d;
-          if (output_data[index] == input_data_n[idx]) {
-            maxidx = idx;
-            should_stop = true;
-          }
-        }
-      }
-    }
-    if (maxidx != -1) {
-      output_backprop[index] = input_backprop[n * p_.in_planes_ * p_.in_rows_ *
-                                                  p_.in_cols_ * p_.depth_ +
-                                              maxidx];
-    }
-  }
-
- private:
-  const SYCL3DPoolParams p_;
-
-  const read_accessor input_data_accessor_;
-  const read_accessor output_data_accessor_;
-  const read_accessor input_backprop_accessor_;
-  write_accessor output_backprop_accessor_;
-};
-template <typename T>
-struct LaunchMaxPooling3dGradGradOp<SYCLDevice, T> {
-  static void launch(OpKernelContext* context, const Pool3dParameters& params,
-                     const Tensor& tensor_in, const Tensor& tensor_out,
-                     const Tensor& out_backprop, Tensor* output) {
-    const SYCLDevice& device = context->eigen_device<SYCLDevice>();
-
-    const int num_threads = output->NumElements();
-
-    auto input_data_buffer =
-        device.get_sycl_buffer(tensor_in.template flat<T>().data());
-    auto output_data_buffer =
-        device.get_sycl_buffer(tensor_out.template flat<T>().data());
-    auto input_backprop_buffer =
-        device.get_sycl_buffer(out_backprop.template flat<T>().data());
-    auto output_backprop_buffer =
-        device.get_sycl_buffer(output->template flat<T>().data());
-
-    device.sycl_queue().submit([&](cl::sycl::handler& cgh) {
-      auto input_data_access =
-          input_data_buffer.template get_access<cl::sycl::access::mode::read>(
-              cgh);
-      auto output_data_access =
-          output_data_buffer.template get_access<cl::sycl::access::mode::read>(
-              cgh);
-      auto input_backprop_access =
-          input_backprop_buffer
-              .template get_access<cl::sycl::access::mode::read>(cgh);
-      auto output_backprop_access =
-          output_backprop_buffer
-              .template get_access<cl::sycl::access::mode::write>(cgh);
-      MaxPool3DGradGradSYCL<T> functor(
-          params, input_data_access, output_data_access, input_backprop_access,
-          output_backprop_access);
-
-      cgh.parallel_for(cl::sycl::range<1>(num_threads), functor);
-    });
-  }
-};
-// AvgPool3D SYCL kernel. Expects the number of threads to be equal to the
-// number of elements in the output tensor.
-//
-// For each output value find the corresponding input window, and run through
-// the window accumulating the values to form an average. We divide each value
-// before accumulating to prevent the accumulator from becoming significantly
-// bigger than the values we are adding and so decrease any errors.
-template <typename T>
-class AvgPool3DSYCL {
-  using write_accessor =
-      cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::write,
-                         cl::sycl::access::target::global_buffer>;
-  using read_accessor =
-      cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::read,
-                         cl::sycl::access::target::global_buffer>;
-
- public:
-  AvgPool3DSYCL(const int depth, const int batch, const int in_planes,
-                const int in_rows, const int in_cols, const int out_planes,
-                const int out_rows, const int out_cols,
-                const std::array<int64, 3>& window,
-                const std::array<int64, 3>& stride,
-                const std::array<int64, 3>& padding,
-                const read_accessor input_accessor,
-                write_accessor output_accessor)
-      : p_(depth, batch, in_planes, in_rows, in_cols, out_planes, out_rows,
-           out_cols, window, stride, padding),
-        input_accessor_(input_accessor),
-        output_accessor_(output_accessor) {}
-  void operator()(cl::sycl::item<1> item) {
-    T* input_data = ConvertToActualTypeSycl(T, input_accessor_);
-    T* output_data = ConvertToActualTypeSycl(T, output_accessor_);
-
-    int index = item.get_linear_id();
-    int n = index;
-    int d = n % p_.depth_;
-    n /= p_.depth_;
-    int cstart = (n % p_.out_cols_) * p_.stride_cols_ - p_.pad_cols_;
-    int cend = std::min(cstart + p_.window_cols_, p_.in_cols_);
-    cstart = std::max(cstart, 0);
-    n /= p_.out_cols_;
-    int rstart = (n % p_.out_rows_) * p_.stride_rows_ - p_.pad_rows_;
-    int rend = std::min(rstart + p_.window_rows_, p_.in_rows_);
-    rstart = std::max(rstart, 0);
-    n /= p_.out_rows_;
-    int pstart = (n % p_.out_planes_) * p_.stride_planes_ - p_.pad_planes_;
-    int pend = std::min(pstart + p_.window_planes_, p_.in_planes_);
-    pstart = std::max(pstart, 0);
-    n /= p_.out_planes_;
-    T accum = T(0);
-    T count =
-        static_cast<T>((pend - pstart) * (rend - rstart) * (cend - cstart));
-    const T* input_data_n =
-        input_data + n * p_.in_planes_ * p_.in_cols_ * p_.in_rows_ * p_.depth_;
-    for (int p = pstart; p < pend; ++p) {
-      for (int r = rstart; r < rend; ++r) {
-        for (int c = cstart; c < cend; ++c) {
-          int idx = ((p * p_.in_rows_ + r) * p_.in_cols_ + c) * p_.depth_ + d;
-          accum += input_data_n[idx] / count;
-        }
-      }
-    }
-    output_data[index] = accum;
-  }
-
- private:
-  const SYCL3DPoolParams p_;
-  const read_accessor input_accessor_;
-  write_accessor output_accessor_;
-};
-template <typename T>
-struct LaunchPoolingOp<SYCLDevice, T, AVG> {
-  static void launch(OpKernelContext* context, const Tensor& tensor_in,
-                     const std::array<int64, 3>& window,
-                     const std::array<int64, 3>& stride,
-                     const std::array<int64, 3>& padding,
-                     TensorFormat data_format, Padding padding_type,
-                     Tensor* output) {
-    const SYCLDevice& device = context->eigen_device<SYCLDevice>();
-    const int out_planes = GetTensorDim(*output, data_format, '0');
-    const int out_rows = GetTensorDim(*output, data_format, '1');
-    const int out_cols = GetTensorDim(*output, data_format, '2');
-    const int batch = GetTensorDim(tensor_in, data_format, 'N');
-    const int in_planes = GetTensorDim(tensor_in, data_format, '0');
-    const int in_rows = GetTensorDim(tensor_in, data_format, '1');
-    const int in_cols = GetTensorDim(tensor_in, data_format, '2');
-    const int depth = GetTensorDim(tensor_in, data_format, 'C');
-
-    const int num_threads = output->NumElements();
-
-    auto input_buffer =
-        device.get_sycl_buffer(tensor_in.template flat<T>().data());
-    auto output_buffer =
-        device.get_sycl_buffer(output->template flat<T>().data());
-
-    device.sycl_queue().submit([&](cl::sycl::handler& cgh) {
-      auto input_access =
-          input_buffer.template get_access<cl::sycl::access::mode::read>(cgh);
-      auto output_access =
-          output_buffer.template get_access<cl::sycl::access::mode::write>(cgh);
-      AvgPool3DSYCL<T> avg_pool(depth, batch, in_planes, in_rows, in_cols,
-                                out_planes, out_rows, out_cols, window, stride,
-                                padding, input_access, output_access);
-
-      cgh.parallel_for(cl::sycl::range<1>(num_threads), avg_pool);
-    });
-  }
-};
-// AvgPool3DGrad SYCL kernel. Expects the number of threads to be equal to the
-// number of elements in the output backprop tensor, i.e. the number of
-// elements in the input tensor.
-//
-// For each output backprop index find a window in the input backprop tensor
-// which corresponds to all the values of the output which were affected by the
-// input value at this index. Then for each gradient in this window, compute
-// the size of the input window which was averaged to give this output, and use
-// this size to scale the gradient accordingly. Add this scaled gradient to the
-// output backprop value.
-template <typename T>
-class AvgPool3DGradSYCL {
-  using write_accessor =
-      cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::write,
-                         cl::sycl::access::target::global_buffer>;
-  using read_accessor =
-      cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::read,
-                         cl::sycl::access::target::global_buffer>;
-
- public:
-  AvgPool3DGradSYCL(const int depth, const int batch, const int in_planes,
-                    const int in_rows, const int in_cols,
-                    const std::array<int64, 3>& out_shape,
-                    const std::array<int64, 3>& window,
-                    const std::array<int64, 3>& stride,
-                    const std::array<int64, 3>& padding,
-                    const read_accessor input_backprop_accessor,
-                    write_accessor output_backprop_accessor)
-      : p_(depth, batch, in_planes, in_rows, in_cols, out_shape, window, stride,
-           padding),
-        input_backprop_accessor_(input_backprop_accessor),
-        output_backprop_accessor_(output_backprop_accessor) {}
-  void operator()(cl::sycl::item<1> item) {
-    T* input_backprop = ConvertToActualTypeSycl(T, input_backprop_accessor_);
-    T* output_backprop = ConvertToActualTypeSycl(T, output_backprop_accessor_);
-
-    const int index = item.get_linear_id();
-    int n = index;
-    const int d = n % p_.depth_;
-    n /= p_.depth_;
-    const int c = (n % p_.in_cols_) + p_.pad_cols_;
-    const int poolcstart =
-        (c < p_.window_cols_) ? 0 : (c - p_.window_cols_) / p_.stride_cols_ + 1;
-    const int poolcend = std::min(c / p_.stride_cols_ + 1, p_.out_cols_);
-    n /= p_.in_cols_;
-    const int r = (n % p_.in_rows_) + p_.pad_rows_;
-    const int poolrstart =
-        (r < p_.window_rows_) ? 0 : (r - p_.window_rows_) / p_.stride_rows_ + 1;
-    const int poolrend = std::min(r / p_.stride_rows_ + 1, p_.out_rows_);
-    n /= p_.in_rows_;
-    const int p = (n % p_.in_planes_) + p_.pad_planes_;
-    const int poolpstart =
-        (p < p_.window_planes_)
-            ? 0
-            : (p - p_.window_planes_) / p_.stride_planes_ + 1;
-    const int poolpend = std::min(p / p_.stride_planes_ + 1, p_.out_planes_);
-    n /= p_.in_planes_;
-
-    T gradient = T(0);
-    const T* input_backprop_n = input_backprop + n * p_.out_planes_ *
-                                                     p_.out_cols_ *
-                                                     p_.out_rows_ * p_.depth_;
-    for (int poolp = poolpstart; poolp < poolpend; ++poolp) {
-      int pstart = poolp * p_.stride_planes_ - p_.pad_planes_;
-      const int pend = std::min(pstart + p_.window_planes_, p_.in_planes_);
-      pstart = std::max(pstart, 0);
-      const int plane_window_size = pend - pstart;
-      for (int poolr = poolrstart; poolr < poolrend; ++poolr) {
-        int rstart = poolr * p_.stride_rows_ - p_.pad_rows_;
-        const int rend = std::min(rstart + p_.window_rows_, p_.in_rows_);
-        rstart = std::max(rstart, 0);
-        const int row_window_size = rend - rstart;
-        for (int poolc = poolcstart; poolc < poolcend; ++poolc) {
-          const int idx =
-              ((poolp * p_.out_rows_ + poolr) * p_.out_cols_ + poolc) *
-                  p_.depth_ +
-              d;
-          int cstart = poolc * p_.stride_cols_ - p_.pad_cols_;
-          const int cend = std::min(cstart + p_.window_cols_, p_.in_cols_);
-          cstart = std::max(cstart, 0);
-          const int col_window_size = cend - cstart;
-          const int window_size =
-              plane_window_size * row_window_size * col_window_size;
-          gradient += input_backprop_n[idx] / static_cast<T>(window_size);
-        }
-      }
-    }
-    output_backprop[index] = gradient;
-  }
-
- private:
-  const SYCL3DPoolParams p_;
-  const read_accessor input_backprop_accessor_;
-  write_accessor output_backprop_accessor_;
-};
-template <typename T>
-struct LaunchAvgPooling3dGradOp<SYCLDevice, T> {
-  static void launch(OpKernelContext* context,
-                     const TensorShape& tensor_in_shape,
-                     const Tensor& out_backprop,
-                     const std::array<int64, 3>& window,
-                     const std::array<int64, 3>& stride,
-                     const std::array<int64, 3>& output_shape,
-                     const std::array<int64, 3>& padding,
-                     TensorFormat data_format, Tensor* output) {
-    const SYCLDevice& device = context->eigen_device<SYCLDevice>();
-    const int batch = GetTensorDim(tensor_in_shape, data_format, 'N');
-    const int in_planes = GetTensorDim(tensor_in_shape, data_format, '0');
-    const int in_rows = GetTensorDim(tensor_in_shape, data_format, '1');
-    const int in_cols = GetTensorDim(tensor_in_shape, data_format, '2');
-    const int depth = GetTensorDim(tensor_in_shape, data_format, 'C');
-
-    const int num_threads = output->NumElements();
-
-    auto input_backprop_buffer =
-        device.get_sycl_buffer(out_backprop.template flat<T>().data());
-    auto output_backprop_buffer =
-        device.get_sycl_buffer(output->template flat<T>().data());
-
-    device.sycl_queue().submit([&](cl::sycl::handler& cgh) {
-      auto input_backprop_access =
-          input_backprop_buffer
-              .template get_access<cl::sycl::access::mode::read>(cgh);
-      auto output_backprop_access =
-          output_backprop_buffer
-              .template get_access<cl::sycl::access::mode::write>(cgh);
-      AvgPool3DGradSYCL<T> functor(
-          depth, batch, in_planes, in_rows, in_cols, output_shape, window,
-          stride, padding, input_backprop_access, output_backprop_access);
-
-      cgh.parallel_for(cl::sycl::range<1>(num_threads), functor);
-    });
-  }
-};
-
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_CORE_KERNELS_POOLING_OP_3D_SYCL_H_
diff --git a/tensorflow/core/kernels/random_op.cc b/tensorflow/core/kernels/random_op.cc
index 152ab5f..e72f1d9 100644
--- a/tensorflow/core/kernels/random_op.cc
+++ b/tensorflow/core/kernels/random_op.cc
@@ -48,9 +48,6 @@
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 namespace {
 
@@ -457,52 +454,5 @@
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-
-#define REGISTER(TYPE)                                                         \
-  template struct functor::FillPhiloxRandom<                                   \
-      SYCLDevice, random::UniformDistribution<random::PhiloxRandom, TYPE>>;    \
-  REGISTER_KERNEL_BUILDER(                                                     \
-      Name("RandomUniform")                                                    \
-          .Device(DEVICE_SYCL)                                                 \
-          .HostMemory("shape")                                                 \
-          .TypeConstraint<TYPE>("dtype"),                                      \
-      PhiloxRandomOp<SYCLDevice, random::UniformDistribution<                  \
-                                     random::PhiloxRandom, TYPE>>);            \
-  REGISTER_KERNEL_BUILDER(                                                     \
-      Name("RandomStandardNormal")                                             \
-          .Device(DEVICE_SYCL)                                                 \
-          .HostMemory("shape")                                                 \
-          .TypeConstraint<TYPE>("dtype"),                                      \
-      PhiloxRandomOp<SYCLDevice,                                               \
-                     random::NormalDistribution<random::PhiloxRandom, TYPE>>); \
-  REGISTER_KERNEL_BUILDER(                                                     \
-      Name("TruncatedNormal")                                                  \
-          .Device(DEVICE_SYCL)                                                 \
-          .HostMemory("shape")                                                 \
-          .TypeConstraint<TYPE>("dtype"),                                      \
-      PhiloxRandomOp<                                                          \
-          SYCLDevice,                                                          \
-          random::TruncatedNormalDistribution<                                 \
-              random::SingleSampleAdapter<random::PhiloxRandom>, TYPE>>);
-
-#define REGISTER_INT(IntType)                                   \
-  REGISTER_KERNEL_BUILDER(Name("RandomUniformInt")              \
-                              .Device(DEVICE_SYCL)              \
-                              .HostMemory("shape")              \
-                              .HostMemory("minval")             \
-                              .HostMemory("maxval")             \
-                              .TypeConstraint<IntType>("Tout"), \
-                          RandomUniformIntOp<SYCLDevice, IntType>);
-
-TF_CALL_float(REGISTER);
-TF_CALL_double(REGISTER);
-TF_CALL_int32(REGISTER_INT);
-TF_CALL_int64(REGISTER_INT);
-
-#undef REGISTER
-#undef REGISTER_INT
-
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/random_op.h b/tensorflow/core/kernels/random_op.h
index c3f138a..f610b35 100644
--- a/tensorflow/core/kernels/random_op.h
+++ b/tensorflow/core/kernels/random_op.h
@@ -54,17 +54,6 @@
 };
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#if TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-// Declares the partially SYCL-specialized functor struct.
-template <class Distribution>
-struct FillPhiloxRandom<SYCLDevice, Distribution> {
-  void operator()(OpKernelContext* ctx, const SYCLDevice& d,
-                  random::PhiloxRandom gen,
-                  typename Distribution::ResultElementType* data, int64 size,
-                  Distribution dist);
-};
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace functor
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/random_op_cpu.h b/tensorflow/core/kernels/random_op_cpu.h
index eac1fae..bdf5162 100644
--- a/tensorflow/core/kernels/random_op_cpu.h
+++ b/tensorflow/core/kernels/random_op_cpu.h
@@ -48,9 +48,6 @@
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 namespace functor {
 using random::PhiloxRandom;
@@ -182,146 +179,6 @@
 
 }  // namespace functor
 
-#ifdef TENSORFLOW_USE_SYCL
-
-namespace functor {
-
-template <class Distribution, bool VariableSamplesPerOutput>
-struct FillPhiloxRandomKernel;
-
-template <class Distribution>
-struct FillPhiloxRandomKernel<Distribution, false> {
-  typedef typename Distribution::ResultElementType T;
-  using write_accessor = sycl::accessor<uint8_t, 1, sycl::access::mode::write,
-                                        sycl::access::target::global_buffer>;
-
-  FillPhiloxRandomKernel(write_accessor& data, random::PhiloxRandom& gen,
-                         Distribution& dist)
-      : data_(data), gen_(gen), dist_(dist) {}
-
-  void operator()(sycl::nd_item<1> item) {
-    const size_t kGroupSize = Distribution::kResultElementCount;
-
-    const size_t item_id = item.get_global(0);
-    const size_t total_item_count = item.get_global_range();
-    size_t offset = item_id * kGroupSize;
-    gen_.Skip(item_id);
-
-    const size_t size = data_.get_size() / sizeof(T);
-    T* data = ConvertToActualTypeSycl(T, data_);
-
-    while (offset + kGroupSize <= size) {
-      const typename Distribution::ResultType samples = dist_(&gen_);
-      for (size_t i = 0; i < kGroupSize; ++i) {
-        data[offset + i] = samples[i];
-      }
-
-      offset += (total_item_count - 1) * kGroupSize;
-      gen_.Skip(total_item_count - 1);
-    }
-
-    const typename Distribution::ResultType samples = dist_(&gen_);
-    for (size_t i = 0; i < kGroupSize; ++i) {
-      if (offset >= size) {
-        return;
-      }
-      data[offset] = samples[i];
-      ++offset;
-    }
-  }
-
- private:
-  write_accessor data_;
-  random::PhiloxRandom gen_;
-  Distribution dist_;
-};
-
-template <class Distribution>
-struct FillPhiloxRandomKernel<Distribution, true> {
-  typedef typename Distribution::ResultElementType T;
-  using write_accessor = sycl::accessor<uint8_t, 1, sycl::access::mode::write,
-                                        sycl::access::target::global_buffer>;
-
-  FillPhiloxRandomKernel(write_accessor& data, random::PhiloxRandom& gen,
-                         Distribution& dist)
-      : data_(data), gen_(gen), dist_(dist) {}
-
-  void operator()(sycl::nd_item<1> item) {
-    using random::PhiloxRandom;
-    using random::SingleSampleAdapter;
-
-    const size_t kReservedSamplesPerOutput = 256;
-    const size_t kGroupSize = Distribution::kResultElementCount;
-    const size_t kGeneratorSkipPerOutputGroup =
-        kGroupSize * kReservedSamplesPerOutput /
-        PhiloxRandom::kResultElementCount;
-
-    const size_t item_id = item.get_global(0);
-    const size_t total_item_count = item.get_global_range();
-    size_t group_index = item_id;
-    size_t offset = group_index * kGroupSize;
-
-    T* data = ConvertToActualTypeSycl(T, data_);
-    const size_t size = data_.get_size() / sizeof(T);
-
-    while (offset < size) {
-      // Since each output takes a variable number of samples, we need to
-      // realign the generator to the beginning for the current output group
-      PhiloxRandom gen = gen_;
-      gen.Skip(group_index * kGeneratorSkipPerOutputGroup);
-      SingleSampleAdapter<PhiloxRandom> single_samples(&gen);
-
-      const typename Distribution::ResultType samples = dist_(&single_samples);
-
-      for (size_t i = 0; i < kGroupSize; ++i) {
-        if (offset >= size) {
-          return;
-        }
-        data[offset] = samples[i];
-        ++offset;
-      }
-
-      offset += (total_item_count - 1) * kGroupSize;
-      group_index += total_item_count;
-    }
-  }
-
- private:
-  write_accessor data_;
-  random::PhiloxRandom gen_;
-  Distribution dist_;
-};
-
-template <typename T>
-class FillRandomKernel;
-// Partial specialization for SYCL to fill the entire region with randoms
-// It splits the work into several tasks and run them in parallel
-template <class Distribution>
-void FillPhiloxRandom<SYCLDevice, Distribution>::operator()(
-    OpKernelContext* context, const SYCLDevice& device,
-    random::PhiloxRandom gen, typename Distribution::ResultElementType* data,
-    int64 size, Distribution dist) {
-  const size_t group_size = device.maxSyclThreadsPerBlock();
-  const size_t group_count = (size + group_size - 1) / group_size;
-
-  auto buffer = device.get_sycl_buffer(data);
-
-  device.sycl_queue().submit([&](sycl::handler& cgh) {
-    auto access = buffer.template get_access<sycl::access::mode::write>(cgh);
-
-    FillPhiloxRandomKernel<Distribution,
-                           Distribution::kVariableSamplesPerOutput>
-        task(access, gen, dist);
-    cgh.parallel_for<class FillRandomKernel<Distribution>>(
-        sycl::nd_range<1>(sycl::range<1>(group_count * group_size),
-                          sycl::range<1>(group_size)),
-        task);
-  });
-}
-
-}  // namespace functor
-
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // end namespace tensorflow
 
diff --git a/tensorflow/core/kernels/reduction_ops_common.h b/tensorflow/core/kernels/reduction_ops_common.h
index 0726992..2dbf5f7 100644
--- a/tensorflow/core/kernels/reduction_ops_common.h
+++ b/tensorflow/core/kernels/reduction_ops_common.h
@@ -41,9 +41,6 @@
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 template <typename Device>
 struct Constants {
@@ -71,10 +68,6 @@
 };
 template <>
 struct Constants<CPUDevice> : ConstantsBase {};
-#ifdef TENSORFLOW_USE_SYCL
-template <>
-struct Constants<SYCLDevice> : ConstantsBase {};
-#endif  // TENSORFLOW_USE_SYCL
 #endif  // EIGEN_HAS_INDEX_LIST
 
 class ReductionHelper {
@@ -279,11 +272,6 @@
 template <typename Reducer>
 struct ReduceFunctor<CPUDevice, Reducer>
     : ReduceFunctorBase<CPUDevice, Reducer> {};
-#if TENSORFLOW_USE_SYCL
-template <typename Reducer>
-struct ReduceFunctor<SYCLDevice, Reducer>
-    : ReduceFunctorBase<SYCLDevice, Reducer> {};
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace functor
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/reduction_ops_euclidean.cc b/tensorflow/core/kernels/reduction_ops_euclidean.cc
index 9bc11e2..370328a 100644
--- a/tensorflow/core/kernels/reduction_ops_euclidean.cc
+++ b/tensorflow/core/kernels/reduction_ops_euclidean.cc
@@ -58,25 +58,5 @@
 
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNELS(type)                                          \
-  REGISTER_KERNEL_BUILDER(Name("EuclideanNorm")                              \
-                              .Device(DEVICE_SYCL)                           \
-                              .TypeConstraint<type>("T")                     \
-                              .TypeConstraint<int32>("Tidx")                 \
-                              .HostMemory("reduction_indices"),              \
-                          ReductionOp<SYCLDevice, type, int32,               \
-                                      functor::EuclideanNormReducer<type>>); \
-  REGISTER_KERNEL_BUILDER(Name("EuclideanNorm")                              \
-                              .Device(DEVICE_SYCL)                           \
-                              .TypeConstraint<type>("T")                     \
-                              .TypeConstraint<int64>("Tidx")                 \
-                              .HostMemory("reduction_indices"),              \
-                          ReductionOp<SYCLDevice, type, int64,               \
-                                      functor::EuclideanNormReducer<type>>);
-REGISTER_SYCL_KERNELS(float);
-REGISTER_SYCL_KERNELS(double);
-#undef REGISTER_SYCL_KERNELS
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/reduction_ops_max.cc b/tensorflow/core/kernels/reduction_ops_max.cc
index fe9775f..99b17f4 100644
--- a/tensorflow/core/kernels/reduction_ops_max.cc
+++ b/tensorflow/core/kernels/reduction_ops_max.cc
@@ -82,44 +82,5 @@
 
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNELS(type)                                        \
-  REGISTER_KERNEL_BUILDER(Name("Max")                                      \
-                              .Device(DEVICE_SYCL)                         \
-                              .TypeConstraint<type>("T")                   \
-                              .TypeConstraint<int32>("Tidx")               \
-                              .HostMemory("reduction_indices"),            \
-                          ReductionOp<SYCLDevice, type, int32,             \
-                                      Eigen::internal::MaxReducer<type>>); \
-  REGISTER_KERNEL_BUILDER(Name("Max")                                      \
-                              .Device(DEVICE_SYCL)                         \
-                              .TypeConstraint<type>("T")                   \
-                              .TypeConstraint<int64>("Tidx")               \
-                              .HostMemory("reduction_indices"),            \
-                          ReductionOp<SYCLDevice, type, int64,             \
-                                      Eigen::internal::MaxReducer<type>>);
-REGISTER_SYCL_KERNELS(float);
-REGISTER_SYCL_KERNELS(double);
-
-REGISTER_KERNEL_BUILDER(
-    Name("Max")
-        .Device(DEVICE_SYCL)
-        .HostMemory("reduction_indices")
-        .HostMemory("input")
-        .HostMemory("output")
-        .TypeConstraint<int32>("T")
-        .TypeConstraint<int32>("Tidx"),
-    ReductionOp<CPUDevice, int32, int32, Eigen::internal::MaxReducer<int32>>);
-REGISTER_KERNEL_BUILDER(
-    Name("Max")
-        .Device(DEVICE_SYCL)
-        .HostMemory("reduction_indices")
-        .HostMemory("input")
-        .HostMemory("output")
-        .TypeConstraint<int32>("T")
-        .TypeConstraint<int64>("Tidx"),
-    ReductionOp<CPUDevice, int32, int64, Eigen::internal::MaxReducer<int32>>);
-#undef REGISTER_SYCL_KERNELS
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/reduction_ops_mean.cc b/tensorflow/core/kernels/reduction_ops_mean.cc
index e96d6f8..2eff475 100644
--- a/tensorflow/core/kernels/reduction_ops_mean.cc
+++ b/tensorflow/core/kernels/reduction_ops_mean.cc
@@ -58,25 +58,5 @@
 
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNELS(type)                                      \
-  REGISTER_KERNEL_BUILDER(                                               \
-      Name("Mean")                                                       \
-          .Device(DEVICE_SYCL)                                           \
-          .TypeConstraint<type>("T")                                     \
-          .TypeConstraint<int32>("Tidx")                                 \
-          .HostMemory("reduction_indices"),                              \
-      ReductionOp<SYCLDevice, type, int32, functor::MeanReducer<type>>); \
-  REGISTER_KERNEL_BUILDER(                                               \
-      Name("Mean")                                                       \
-          .Device(DEVICE_SYCL)                                           \
-          .TypeConstraint<type>("T")                                     \
-          .TypeConstraint<int64>("Tidx")                                 \
-          .HostMemory("reduction_indices"),                              \
-      ReductionOp<SYCLDevice, type, int64, functor::MeanReducer<type>>);
-REGISTER_SYCL_KERNELS(float);
-REGISTER_SYCL_KERNELS(double);
-#undef REGISTER_SYCL_KERNELS
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/reduction_ops_min.cc b/tensorflow/core/kernels/reduction_ops_min.cc
index 9f1feae..be1d093 100644
--- a/tensorflow/core/kernels/reduction_ops_min.cc
+++ b/tensorflow/core/kernels/reduction_ops_min.cc
@@ -80,44 +80,5 @@
 
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNELS(type)                                        \
-  REGISTER_KERNEL_BUILDER(Name("Min")                                      \
-                              .Device(DEVICE_SYCL)                         \
-                              .TypeConstraint<type>("T")                   \
-                              .TypeConstraint<int32>("Tidx")               \
-                              .HostMemory("reduction_indices"),            \
-                          ReductionOp<SYCLDevice, type, int32,             \
-                                      Eigen::internal::MinReducer<type>>); \
-  REGISTER_KERNEL_BUILDER(Name("Min")                                      \
-                              .Device(DEVICE_SYCL)                         \
-                              .TypeConstraint<type>("T")                   \
-                              .TypeConstraint<int64>("Tidx")               \
-                              .HostMemory("reduction_indices"),            \
-                          ReductionOp<SYCLDevice, type, int64,             \
-                                      Eigen::internal::MinReducer<type>>);
-REGISTER_SYCL_KERNELS(float);
-REGISTER_SYCL_KERNELS(double);
-
-REGISTER_KERNEL_BUILDER(
-    Name("Min")
-        .Device(DEVICE_SYCL)
-        .HostMemory("reduction_indices")
-        .HostMemory("input")
-        .HostMemory("output")
-        .TypeConstraint<int32>("T")
-        .TypeConstraint<int32>("Tidx"),
-    ReductionOp<CPUDevice, int32, int32, Eigen::internal::MinReducer<int32>>);
-REGISTER_KERNEL_BUILDER(
-    Name("Min")
-        .Device(DEVICE_SYCL)
-        .HostMemory("reduction_indices")
-        .HostMemory("input")
-        .HostMemory("output")
-        .TypeConstraint<int32>("T")
-        .TypeConstraint<int64>("Tidx"),
-    ReductionOp<CPUDevice, int32, int64, Eigen::internal::MinReducer<int32>>);
-#undef REGISTER_SYCL_KERNELS
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/reduction_ops_prod.cc b/tensorflow/core/kernels/reduction_ops_prod.cc
index 33742e9..a9dfbbc 100644
--- a/tensorflow/core/kernels/reduction_ops_prod.cc
+++ b/tensorflow/core/kernels/reduction_ops_prod.cc
@@ -59,26 +59,5 @@
 
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNELS(type)                                         \
-  REGISTER_KERNEL_BUILDER(Name("Prod")                                      \
-                              .Device(DEVICE_SYCL)                          \
-                              .TypeConstraint<type>("T")                    \
-                              .TypeConstraint<int32>("Tidx")                \
-                              .HostMemory("reduction_indices"),             \
-                          ReductionOp<SYCLDevice, type, int32,              \
-                                      Eigen::internal::ProdReducer<type>>); \
-  REGISTER_KERNEL_BUILDER(Name("Prod")                                      \
-                              .Device(DEVICE_SYCL)                          \
-                              .TypeConstraint<type>("T")                    \
-                              .TypeConstraint<int64>("Tidx")                \
-                              .HostMemory("reduction_indices"),             \
-                          ReductionOp<SYCLDevice, type, int64,              \
-                                      Eigen::internal::ProdReducer<type>>);
-REGISTER_SYCL_KERNELS(int32);
-REGISTER_SYCL_KERNELS(float);
-REGISTER_SYCL_KERNELS(double);
-#undef REGISTER_SYCL_KERNELS
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/reduction_ops_sum.cc b/tensorflow/core/kernels/reduction_ops_sum.cc
index b5f7a5d..1c3c03f 100644
--- a/tensorflow/core/kernels/reduction_ops_sum.cc
+++ b/tensorflow/core/kernels/reduction_ops_sum.cc
@@ -81,44 +81,5 @@
 
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNELS(type)                                        \
-  REGISTER_KERNEL_BUILDER(Name("Sum")                                      \
-                              .Device(DEVICE_SYCL)                         \
-                              .TypeConstraint<type>("T")                   \
-                              .TypeConstraint<int32>("Tidx")               \
-                              .HostMemory("reduction_indices"),            \
-                          ReductionOp<SYCLDevice, type, int32,             \
-                                      Eigen::internal::SumReducer<type>>); \
-  REGISTER_KERNEL_BUILDER(Name("Sum")                                      \
-                              .Device(DEVICE_SYCL)                         \
-                              .TypeConstraint<type>("T")                   \
-                              .TypeConstraint<int64>("Tidx")               \
-                              .HostMemory("reduction_indices"),            \
-                          ReductionOp<SYCLDevice, type, int64,             \
-                                      Eigen::internal::SumReducer<type>>);
-REGISTER_SYCL_KERNELS(float);
-REGISTER_SYCL_KERNELS(double);
-
-REGISTER_KERNEL_BUILDER(
-    Name("Sum")
-        .Device(DEVICE_SYCL)
-        .TypeConstraint<int32>("T")
-        .TypeConstraint<int32>("Tidx")
-        .HostMemory("input")
-        .HostMemory("output")
-        .HostMemory("reduction_indices"),
-    ReductionOp<CPUDevice, int32, int32, Eigen::internal::SumReducer<int32>>);
-REGISTER_KERNEL_BUILDER(
-    Name("Sum")
-        .Device(DEVICE_SYCL)
-        .TypeConstraint<int32>("T")
-        .TypeConstraint<int64>("Tidx")
-        .HostMemory("input")
-        .HostMemory("output")
-        .HostMemory("reduction_indices"),
-    ReductionOp<CPUDevice, int32, int64, Eigen::internal::SumReducer<int32>>);
-#undef REGISTER_SYCL_KERNELS
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/relu_op.cc b/tensorflow/core/kernels/relu_op.cc
index 784c977..210b994 100644
--- a/tensorflow/core/kernels/relu_op.cc
+++ b/tensorflow/core/kernels/relu_op.cc
@@ -29,9 +29,6 @@
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 #define REGISTER_RELU_KERNELS(type)                                       \
   REGISTER_KERNEL_BUILDER(                                                \
@@ -211,42 +208,5 @@
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-// Registration of the GPU implementations.
-#define REGISTER_SYCL_KERNELS(type)                                        \
-  REGISTER_KERNEL_BUILDER(                                                 \
-      Name("Relu").Device(DEVICE_SYCL).TypeConstraint<type>("T"),          \
-      ReluOp<SYCLDevice, type>);                                           \
-  REGISTER_KERNEL_BUILDER(                                                 \
-      Name("ReluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"),      \
-      ReluGradOp<SYCLDevice, type>);                                       \
-  REGISTER_KERNEL_BUILDER(                                                 \
-      Name("Relu6").Device(DEVICE_SYCL).TypeConstraint<type>("T"),         \
-      Relu6Op<SYCLDevice, type>);                                          \
-  REGISTER_KERNEL_BUILDER(                                                 \
-      Name("Relu6Grad").Device(DEVICE_SYCL).TypeConstraint<type>("T"),     \
-      Relu6GradOp<SYCLDevice, type>);                                      \
-  REGISTER_KERNEL_BUILDER(                                                 \
-      Name("LeakyRelu").Device(DEVICE_SYCL).TypeConstraint<type>("T"),     \
-      LeakyReluOp<SYCLDevice, type>);                                      \
-  REGISTER_KERNEL_BUILDER(                                                 \
-      Name("LeakyReluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
-      LeakyReluGradOp<SYCLDevice, type>);                                  \
-  REGISTER_KERNEL_BUILDER(                                                 \
-      Name("Elu").Device(DEVICE_SYCL).TypeConstraint<type>("T"),           \
-      EluOp<SYCLDevice, type>);                                            \
-  REGISTER_KERNEL_BUILDER(                                                 \
-      Name("EluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"),       \
-      EluGradOp<SYCLDevice, type>);                                        \
-  REGISTER_KERNEL_BUILDER(                                                 \
-      Name("Selu").Device(DEVICE_SYCL).TypeConstraint<type>("T"),          \
-      SeluOp<SYCLDevice, type>);                                           \
-  REGISTER_KERNEL_BUILDER(                                                 \
-      Name("SeluGrad").Device(DEVICE_SYCL).TypeConstraint<type>("T"),      \
-      SeluGradOp<SYCLDevice, type>)
-
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNELS);
-#undef REGISTER_SYCL_KERNELS
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/reshape_op.cc b/tensorflow/core/kernels/reshape_op.cc
index 9860448..d43cc5a 100644
--- a/tensorflow/core/kernels/reshape_op.cc
+++ b/tensorflow/core/kernels/reshape_op.cc
@@ -46,45 +46,6 @@
 TF_CALL_bool(REGISTER_GPU_KERNEL);
 #undef REGISTER_GPU_KERNEL
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(type)                              \
-  REGISTER_KERNEL_BUILDER(Name("Reshape")                       \
-                              .Device(DEVICE_SYCL)              \
-                              .HostMemory("shape")              \
-                              .TypeConstraint<type>("T")        \
-                              .TypeConstraint<int32>("Tshape"), \
-                          ReshapeOp);                           \
-  REGISTER_KERNEL_BUILDER(Name("Reshape")                       \
-                              .Device(DEVICE_SYCL)              \
-                              .HostMemory("shape")              \
-                              .TypeConstraint<type>("T")        \
-                              .TypeConstraint<int64>("Tshape"), \
-                          ReshapeOp);
-REGISTER_SYCL_KERNEL(float)
-REGISTER_SYCL_KERNEL(double)
-REGISTER_SYCL_KERNEL(uint8)
-REGISTER_SYCL_KERNEL(int8)
-REGISTER_SYCL_KERNEL(int64)
-REGISTER_SYCL_KERNEL(uint16)
-
-REGISTER_KERNEL_BUILDER(Name("Reshape")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("tensor")
-                            .HostMemory("shape")
-                            .HostMemory("output")
-                            .TypeConstraint<int32>("T")
-                            .TypeConstraint<int32>("Tshape"),
-                        ReshapeOp);
-REGISTER_KERNEL_BUILDER(Name("Reshape")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("tensor")
-                            .HostMemory("shape")
-                            .HostMemory("output")
-                            .TypeConstraint<int32>("T")
-                            .TypeConstraint<int64>("Tshape"),
-                        ReshapeOp);
-#undef REGISTER_SYCL_KERNEL
-#endif  // TENSORFLOW_USE_SYCL
 
 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
     (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
diff --git a/tensorflow/core/kernels/reverse_op.cc b/tensorflow/core/kernels/reverse_op.cc
index 393231f..4b4aa05 100644
--- a/tensorflow/core/kernels/reverse_op.cc
+++ b/tensorflow/core/kernels/reverse_op.cc
@@ -34,9 +34,6 @@
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 namespace {
 
@@ -399,52 +396,4 @@
                         ReverseV2Op<CPUDevice, int32, int64>);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNELS(T)                             \
-  REGISTER_KERNEL_BUILDER(Name("Reverse")                    \
-                              .Device(DEVICE_SYCL)           \
-                              .TypeConstraint<T>("T")        \
-                              .HostMemory("dims"),           \
-                          ReverseOp<SYCLDevice, T>)          \
-  REGISTER_KERNEL_BUILDER(Name("ReverseV2")                  \
-                              .Device(DEVICE_SYCL)           \
-                              .TypeConstraint<T>("T")        \
-                              .TypeConstraint<int32>("Tidx") \
-                              .HostMemory("axis"),           \
-                          ReverseV2Op<SYCLDevice, T, int32>) \
-  REGISTER_KERNEL_BUILDER(Name("ReverseV2")                  \
-                              .Device(DEVICE_SYCL)           \
-                              .TypeConstraint<T>("T")        \
-                              .TypeConstraint<int64>("Tidx") \
-                              .HostMemory("axis"),           \
-                          ReverseV2Op<SYCLDevice, T, int64>)
-TF_CALL_uint8(REGISTER_SYCL_KERNELS);
-TF_CALL_int8(REGISTER_SYCL_KERNELS);
-TF_CALL_float(REGISTER_SYCL_KERNELS);
-TF_CALL_double(REGISTER_SYCL_KERNELS);
-
-REGISTER_KERNEL_BUILDER(Name("Reverse")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .HostMemory("tensor")
-                            .HostMemory("dims")
-                            .HostMemory("output"),
-                        ReverseOp<CPUDevice, int32>);
-REGISTER_KERNEL_BUILDER(Name("ReverseV2")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .TypeConstraint<int32>("Tidx")
-                            .HostMemory("tensor")
-                            .HostMemory("axis")
-                            .HostMemory("output"),
-                        ReverseV2Op<CPUDevice, int32, int32>);
-REGISTER_KERNEL_BUILDER(Name("ReverseV2")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .TypeConstraint<int64>("Tidx")
-                            .HostMemory("tensor")
-                            .HostMemory("axis")
-                            .HostMemory("output"),
-                        ReverseV2Op<CPUDevice, int32, int64>);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/scatter_functor.h b/tensorflow/core/kernels/scatter_functor.h
index fd2724a..5af04c7 100644
--- a/tensorflow/core/kernels/scatter_functor.h
+++ b/tensorflow/core/kernels/scatter_functor.h
@@ -33,9 +33,6 @@
 class OpKernelContext;
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 namespace scatter_op {
 
@@ -125,65 +122,6 @@
   }
 };
 
-#ifdef TENSORFLOW_USE_SYCL
-template <scatter_op::UpdateOp Op>
-struct AssignSYCL {};
-template <>
-struct AssignSYCL<scatter_op::UpdateOp::ASSIGN> {
-  template <typename Device, typename Params, typename Update>
-  static void Run(Device d, Params p, Update u) {
-    p.device(d) = u;
-  }
-};
-
-template <>
-struct AssignSYCL<scatter_op::UpdateOp::ADD> {
-  template <typename Device, typename Params, typename Update>
-  static void Run(Device d, Params p, Update u) {
-    p.device(d) += u;
-  }
-};
-
-template <>
-struct AssignSYCL<scatter_op::UpdateOp::SUB> {
-  template <typename Device, typename Params, typename Update>
-  static void Run(Device d, Params p, Update u) {
-    p.device(d) -= u;
-  }
-};
-
-template <>
-struct AssignSYCL<scatter_op::UpdateOp::MUL> {
-  template <typename Device, typename Params, typename Update>
-  static void Run(Device d, Params p, Update u) {
-    p.device(d) = p * u;
-  }
-};
-
-template <>
-struct AssignSYCL<scatter_op::UpdateOp::DIV> {
-  template <typename Device, typename Params, typename Update>
-  static void Run(Device d, Params p, Update u) {
-    p.device(d) = p / u;
-  }
-};
-
-template <>
-struct AssignSYCL<scatter_op::UpdateOp::MIN> {
-  template <typename Device, typename Params, typename Update>
-  static void Run(Device d, Params p, Update u) {
-    p.device(d) = p.cwiseMin(u);
-  }
-};
-
-template <>
-struct AssignSYCL<scatter_op::UpdateOp::MAX> {
-  template <typename Device, typename Params, typename Update>
-  static void Run(Device d, Params p, Update u) {
-    p.device(d) = p.cwiseMax(u);
-  }
-};
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace internal
 }  // namespace scatter_op
@@ -328,30 +266,6 @@
 struct ScatterFunctor<GPUDevice, Variant, Index, scatter_op::UpdateOp::ASSIGN>
     : ScatterFunctorVariantAssignBase<GPUDevice, Index> {};
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename T, typename Index, scatter_op::UpdateOp op>
-struct ScatterFunctorBase<SYCLDevice, T, Index, op> {
-  Index operator()(OpKernelContext* c, const SYCLDevice& d,
-                   typename TTypes<T>::Matrix params,
-                   typename TTypes<T>::ConstMatrix updates,
-                   typename TTypes<Index>::ConstFlat indices) {
-    // indices and params sizes were validated in DoCompute().
-    const Index N = static_cast<Index>(indices.size());
-    const Index limit = static_cast<Index>(params.dimension(0));
-    for (Index i = 0; i < N; i++) {
-      // Grab the index and check its validity.  Do this carefully,
-      // to avoid checking the value and grabbing it again from
-      // memory a second time (a security risk since it may change in between).
-      const Index index = ::tensorflow::internal::SubtleMustCopy(indices(i));
-      if (!FastBoundsCheck(index, limit)) return i;
-      // Copy last Ndim-1 dimensions of updates[i] to params[index]
-      scatter_op::internal::AssignSYCL<op>::Run(
-          d, params.template chip<0>(index), updates.template chip<0>(i));
-    }
-    return -1;
-  }
-};
-#endif  // TENSORFLOW_USE_SYCL
 
 template <typename T, typename Index>
 struct ScatterFunctorBase<CPUDevice, T, Index, scatter_op::UpdateOp::ASSIGN> {
@@ -395,27 +309,6 @@
 struct ScatterFunctor<CPUDevice, T, Index, op>
     : ScatterFunctorBase<CPUDevice, T, Index, op> {};
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename T, typename Index, scatter_op::UpdateOp op>
-struct ScatterFunctorSYCL {
-  Index operator()(OpKernelContext* c, const SYCLDevice& d,
-                   typename TTypes<T>::Matrix params,
-                   typename TTypes<T>::ConstMatrix updates,
-                   typename TTypes<Index>::Flat indices) {
-    // indices and params sizes were validated in DoCompute().
-    const Index N = static_cast<Index>(indices.size());
-    const Index limit = static_cast<Index>(params.dimension(0));
-    for (Index i = 0; i < N; i++) {
-      const Index index = ::tensorflow::internal::SubtleMustCopy(indices(i));
-      if (!FastBoundsCheck(index, limit)) return i;
-      // Copy last Ndim-1 dimensions of updates[i] to params[index]
-      scatter_op::internal::AssignSYCL<op>::Run(
-          d, params.template chip<0>(index), updates.template chip<0>(i));
-    }
-    return -1;
-  }
-};
-#endif  // TENSORFLOW_USE_SYCL
 
 template <typename Device, typename T, typename Index, scatter_op::UpdateOp op>
 struct ScatterScalarFunctor {
@@ -483,30 +376,6 @@
                             scatter_op::UpdateOp::ASSIGN>
     : ScatterScalarFunctorVariantAssignBase<GPUDevice, Index> {};
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename T, typename Index, scatter_op::UpdateOp op>
-struct ScatterScalarFunctorBase<SYCLDevice, T, Index, op> {
-  Index operator()(OpKernelContext* c, const SYCLDevice& d,
-                   typename TTypes<T>::Matrix params,
-                   const typename TTypes<T>::ConstScalar update,
-                   typename TTypes<Index>::ConstFlat indices) {
-    // indices and params sizes were validated in DoCompute().
-    const Index N = static_cast<Index>(indices.size());
-    const Index limit = static_cast<Index>(params.dimension(0));
-    for (Index i = 0; i < N; i++) {
-      // Grab the index and check its validity.  Do this carefully,
-      // to avoid checking the value and grabbing it again from
-      // memory a second time (a security risk since it may change in between).
-      const Index index = ::tensorflow::internal::SubtleMustCopy(indices(i));
-      if (!FastBoundsCheck(index, limit)) return i;
-      // Broadcast update to params[index]
-      scatter_op::internal::AssignSYCL<op>::RunScalar(
-          d, params.template chip<0>(index), update);
-    }
-    return -1;
-  }
-};
-#endif  // TENSORFLOW_USE_SYCL
 
 template <typename T, typename Index>
 struct ScatterScalarFunctorBase<CPUDevice, T, Index,
@@ -536,27 +405,6 @@
 struct ScatterScalarFunctor<CPUDevice, T, Index, op>
     : ScatterScalarFunctorBase<CPUDevice, T, Index, op> {};
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename T, typename Index, scatter_op::UpdateOp op>
-struct ScatterScalarFunctorSYCL {
-  Index operator()(OpKernelContext* c, const SYCLDevice& d,
-                   typename TTypes<T>::Matrix params,
-                   const typename TTypes<T>::ConstScalar update,
-                   typename TTypes<Index>::Flat indices) {
-    // indices and params sizes were validated in DoCompute().
-    const Index N = static_cast<Index>(indices.size());
-    const Index limit = static_cast<Index>(params.dimension(0));
-    for (Index i = 0; i < N; i++) {
-      const Index index = ::tensorflow::internal::SubtleMustCopy(indices(i));
-      if (!FastBoundsCheck(index, limit)) return i;
-      // Broadcast update to params[index]
-      scatter_op::internal::AssignSYCL<op>::Run(
-          d, params.template chip<0>(index), update());
-    }
-    return -1;
-  }
-};
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace functor
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/scatter_nd_op.cc b/tensorflow/core/kernels/scatter_nd_op.cc
index 04a66d3..b50c8d2 100644
--- a/tensorflow/core/kernels/scatter_nd_op.cc
+++ b/tensorflow/core/kernels/scatter_nd_op.cc
@@ -38,17 +38,11 @@
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/util.h"
 
-#ifdef TENSORFLOW_USE_SYCL
-#include "tensorflow/core/common_runtime/sycl/sycl_util.h"
-#endif  // TENSORFLOW_USE_SYCL
 
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 // Returns true if the three tensors have valid number of elements
 // If shape_input has 0 elements, then we need to have indices and updates with
@@ -677,28 +671,6 @@
 
 #undef REGISTER_SCATTER_ND_ALL_GPU
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SCATTER_ND_ADD_SUB_SYCL(type) \
-  REGISTER_SCATTER_ND_ADD_SUB(type, SYCL);
-
-#define REGISTER_SCATTER_ND_UPDATE_SYCL(type) \
-  REGISTER_SCATTER_ND_UPDATE(type, SYCL);
-
-#define REGISTER_SCATTER_ND_MIN_MAX_SYCL(type) \
-  REGISTER_SCATTER_ND_MIN_MAX(type, SYCL);
-
-TF_CALL_int32(REGISTER_SCATTER_ND_ADD_SUB_SYCL);
-TF_CALL_int32(REGISTER_SCATTER_ND_UPDATE_SYCL);
-TF_CALL_int32(REGISTER_SCATTER_ND_MIN_MAX_SYCL);
-TF_CALL_bool(REGISTER_SCATTER_ND_UPDATE_SYCL);
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_ADD_SUB_SYCL);
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_UPDATE_SYCL);
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_MIN_MAX_SYCL);
-
-#undef REGISTER_SCATTER_ND_ADD_SUB_SYCL
-#undef REGISTER_SCATTER_ND_MIN_MAX_SYCL
-#undef REGISTER_SCATTER_ND_UPDATE_SYCL
-#endif  // TENSORFLOW_USE_SYCL
 
 #define REGISTER_SCATTER_ND_TENSOR_UPDATE_GPU(type)                    \
   REGISTER_SCATTER_ND_TENSOR_UPDATE_TYPE_INDEX_TYPE(type, int32, GPU); \
@@ -924,30 +896,6 @@
   }
 };
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename Index>
-class IndexFlattener<SYCLDevice, Index> {
- public:
-  IndexFlattener() { indices_host_ = nullptr; }
-  ~IndexFlattener() { delete[] indices_host_; }
-
-  inline typename TTypes<Index, 2>::ConstTensor operator()(
-      OpKernelContext* c, const Tensor& indices) {
-    size_t num_indices = indices.NumElements();
-    indices_host_ = new Index[num_indices];
-    auto device = c->eigen_sycl_device();
-    auto size = sizeof(Index) * num_indices;
-    auto src_ptr = GetBase(&indices);
-    device.memcpyDeviceToHost(indices_host_, static_cast<const Index*>(src_ptr),
-                              size);
-    return typename TTypes<Index, 2>::ConstTensor(
-        indices_host_, indices.shape().AsEigenDSizes<2>());
-  }
-
- private:
-  Index* indices_host_;
-};
-#endif
 
 template <typename Device, typename T, typename Index,
           scatter_nd_op::UpdateOp Op>
diff --git a/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h b/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h
index 948db7f..6cfa1df 100644
--- a/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h
+++ b/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h
@@ -38,9 +38,6 @@
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 class OpKernelContext;
 
@@ -194,97 +191,6 @@
 #undef REGISTER_SCATTER_ND_UPDATE
 #undef REGISTER_SCATTER_ND_INDEX
 #undef REGISTER_SCATTER_ND_FULL
-
-// Implementation of update functor for SYCL.
-#ifdef TENSORFLOW_USE_SYCL
-
-template <typename T, typename Index, scatter_nd_op::UpdateOp OP, int IXDIM>
-struct ScatterNdFunctor<SYCLDevice, T, Index, OP, IXDIM> {
-  Index operator()(
-      const SYCLDevice& d, const Index slice_size,
-      const Eigen::array<Eigen::DenseIndex, IXDIM> output_shape_prefix,
-      typename TTypes<T, 2>::Tensor Tparams,
-      typename TTypes<Index, 2>::ConstTensor Tindices,
-      typename TTypes<T, 2>::ConstTensor Tupdates,
-      typename TTypes<T, 2>::Tensor Toutput) {
-    // error_loc is -1 if there's no out-of-bounds index,
-    // otherwise it is the location of an OOB index in Tindices.
-    Index error_loc = -1;
-
-    const Eigen::DenseIndex batch_size = Tindices.dimension(0);
-
-    Index batch_strides[IXDIM];
-    for (int dim = IXDIM - 1; dim >= 0; --dim) {
-      if (dim == IXDIM - 1) {
-        batch_strides[dim] = 1;
-      } else {
-        batch_strides[dim] =
-            batch_strides[dim + 1] * output_shape_prefix[dim + 1];
-      }
-    }
-
-    for (Eigen::DenseIndex loc = 0; loc < batch_size; ++loc) {
-      Index i = 0;
-      bool out_of_bounds = false;
-      for (int dim = 0; dim < IXDIM; ++dim) {
-        const Index ix_d = internal::SubtleMustCopy(Tindices(loc, dim));
-        out_of_bounds |= !FastBoundsCheck(ix_d, output_shape_prefix[dim]);
-        i += ix_d * batch_strides[dim];
-      }
-      if (TF_PREDICT_FALSE(out_of_bounds)) {
-        error_loc = loc;
-        break;
-      } else {
-        auto input_chip = Toutput.template chip<0>(i);
-        auto output_chip = input_chip;
-        auto update_chip = Tupdates.template chip<0>(loc);
-        update_executor::UpdateExecutor<
-            SYCLDevice, decltype(input_chip), decltype(update_chip),
-            decltype(output_chip), OP>::Execute(d, input_chip, update_chip,
-                                                output_chip);
-      }
-    }
-
-    return error_loc;
-  }
-};
-
-#define REGISTER_SCATTER_ND_FULL_SYCL(T, Index, op)                           \
-  template Index                                                              \
-  ScatterNdFunctor<SYCLDevice, T, Index, op, CPU_PROVIDED_IXDIM>::operator()( \
-      const SYCLDevice& d, const Index slice_size,                            \
-      const Eigen::array<Eigen::DenseIndex, CPU_PROVIDED_IXDIM>               \
-          output_shape_prefix,                                                \
-      typename TTypes<T, 2>::Tensor Tparams,                                  \
-      typename TTypes<Index, 2>::ConstTensor Tindices,                        \
-      typename TTypes<T, 2>::ConstTensor Tupdates,                            \
-      typename TTypes<T, 2>::Tensor Toutput)
-
-#define REGISTER_SCATTER_ND_INDEX_SYCL(type, op)  \
-  REGISTER_SCATTER_ND_FULL_SYCL(type, int32, op); \
-  REGISTER_SCATTER_ND_FULL_SYCL(type, int64, op)
-
-#define REGISTER_SCATTER_ND_UPDATE_SYCL(type) \
-  REGISTER_SCATTER_ND_INDEX_SYCL(type, scatter_nd_op::UpdateOp::ASSIGN);
-
-#define REGISTER_SCATTER_ND_MATH_SYCL(type)                           \
-  REGISTER_SCATTER_ND_INDEX_SYCL(type, scatter_nd_op::UpdateOp::ADD); \
-  REGISTER_SCATTER_ND_INDEX_SYCL(type, scatter_nd_op::UpdateOp::SUB); \
-  REGISTER_SCATTER_ND_INDEX_SYCL(type, scatter_nd_op::UpdateOp::MIN); \
-  REGISTER_SCATTER_ND_INDEX_SYCL(type, scatter_nd_op::UpdateOp::MAX);
-
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_UPDATE_SYCL)
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_MATH_SYCL)
-REGISTER_SCATTER_ND_UPDATE_SYCL(int32);
-REGISTER_SCATTER_ND_MATH_SYCL(int32);
-
-#undef REGISTER_SCATTER_ND_MATH_SYCL
-#undef REGISTER_SCATTER_ND_UPDATE_SYCL
-#undef REGISTER_SCATTER_ND_INDEX_SYCL
-#undef REGISTER_SCATTER_ND_FULL_SYCL
-
-#endif  // TENSORFLOW_USE_SYCL
-
 }  // namespace functor
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/scatter_op.cc b/tensorflow/core/kernels/scatter_op.cc
index c7ea9de..f551711 100644
--- a/tensorflow/core/kernels/scatter_op.cc
+++ b/tensorflow/core/kernels/scatter_op.cc
@@ -23,17 +23,11 @@
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/util.h"
 
-#ifdef TENSORFLOW_USE_SYCL
-#include "tensorflow/core/common_runtime/sycl/sycl_util.h"
-#endif  // TENSORFLOW_USE_SYCL
 
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 // Check whether updates.shape = indices.shape + params.shape[1:]
 static bool ValidShapes(const Tensor& params, const Tensor& updates,
@@ -151,94 +145,6 @@
   }
 };
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename T, typename Index, scatter_op::UpdateOp op>
-class ScatterUpdateOp<SYCLDevice, T, Index, op> : public OpKernel {
- public:
-  explicit ScatterUpdateOp(OpKernelConstruction* c) : OpKernel(c) {
-    OP_REQUIRES_OK(c, c->GetAttr("use_locking", &use_exclusive_lock_));
-  }
-
-  void Compute(OpKernelContext* c) override {
-    if (use_exclusive_lock_) {
-      // Hold mutex while we apply updates
-      mutex_lock l(*c->input_ref_mutex(0));
-      DoCompute(c);
-    } else {
-      DoCompute(c);
-    }
-  }
-
- private:
-  bool use_exclusive_lock_;
-
-  void DoCompute(OpKernelContext* c) {
-    Tensor params = c->mutable_input(0, use_exclusive_lock_);
-    const Tensor& indices = c->input(1);
-    const Tensor& updates = c->input(2);
-    DoValidationChecking(c, params, indices, updates);
-    if (!c->status().ok()) return;
-
-    // Check that we have enough index space
-    const int64 N_big = indices.NumElements();
-    OP_REQUIRES(
-        c, N_big <= std::numeric_limits<Index>::max(),
-        errors::InvalidArgument("indices has too many elements for ",
-                                DataTypeString(DataTypeToEnum<Index>::v()),
-                                " indexing: ", N_big, " > ",
-                                std::numeric_limits<Index>::max()));
-    const Index N = static_cast<Index>(indices.NumElements());
-    OP_REQUIRES(
-        c, params.dim_size(0) <= std::numeric_limits<Index>::max(),
-        errors::InvalidArgument("params.shape[0] too large for ",
-                                DataTypeString(DataTypeToEnum<Index>::v()),
-                                " indexing: ", params.dim_size(0), " > ",
-                                std::numeric_limits<Index>::max()));
-
-    // We always return the input ref.
-    c->forward_ref_input_to_ref_output(0, 0);
-
-    if (N > 0) {
-      auto index_size = indices.NumElements() * sizeof(Index);
-      Tensor indices_host = Tensor(indices.dtype(), indices.shape());
-
-      auto src_ptr = GetBase(&indices);
-      auto dst_ptr = GetBase(&indices_host);
-
-      c->eigen_sycl_device().memcpyDeviceToHost(
-          dst_ptr, static_cast<const Index*>(src_ptr), index_size);
-
-      auto indices_flat = indices_host.flat<Index>();
-      auto params_flat = params.flat_outer_dims<T>();
-
-      if (TensorShapeUtils::IsScalar(updates.shape())) {
-        const auto update = updates.scalar<T>();
-
-        functor::ScatterScalarFunctorSYCL<T, Index, op> functor;
-        const Index bad_i = functor(c, c->template eigen_device<SYCLDevice>(),
-                                    params_flat, update, indices_flat);
-        OP_REQUIRES(c, bad_i < 0,
-                    errors::InvalidArgument(
-                        "indices", SliceDebugString(indices.shape(), bad_i),
-                        " = ", indices_flat(bad_i), " is not in [0, ",
-                        params.dim_size(0), ")"));
-      } else {
-        auto updates_flat =
-            updates.shaped<T, 2>({N, updates.NumElements() / N});
-
-        functor::ScatterFunctorSYCL<T, Index, op> functor;
-        const Index bad_i = functor(c, c->template eigen_device<SYCLDevice>(),
-                                    params_flat, updates_flat, indices_flat);
-        OP_REQUIRES(c, bad_i < 0,
-                    errors::InvalidArgument(
-                        "indices", SliceDebugString(indices.shape(), bad_i),
-                        " = ", indices_flat(bad_i), " is not in [0, ",
-                        params.dim_size(0), ")"));
-      }
-    }
-  }
-};
-#endif  // TENSORFLOW_USE_SYCL
 
 #define REGISTER_SCATTER_KERNEL_INDEX(type, index_type, dev, name, op) \
   REGISTER_KERNEL_BUILDER(Name(name)                                   \
@@ -293,22 +199,6 @@
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 // Registers GPU kernels.
-#if TENSORFLOW_USE_SYCL
-#define REGISTER_SCATTER_ARITHMETIC_SYCL(type) \
-  REGISTER_SCATTER_ARITHMETIC(type, SYCL);
-
-#define REGISTER_SCATTER_MINMAX_SYCL(type) REGISTER_SCATTER_MINMAX(type, SYCL);
-
-#define REGISTER_SCATTER_UPDATE_SYCL(type) REGISTER_SCATTER_UPDATE(type, SYCL);
-
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ARITHMETIC_SYCL);
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_MINMAX_SYCL);
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_UPDATE_SYCL);
-
-#undef REGISTER_SCATTER_ARITHMETIC_SYCL
-#undef REGISTER_SCATTER_MINMAX_SYCL
-#undef REGISTER_SCATTER_UPDATE_SYCL
-#endif  // TENSORFLOW_USE_SYCL
 
 #undef REGISTER_SCATTER_ARITHMETIC
 #undef REGISTER_SCATTER_ARITHMETIC_CPU
diff --git a/tensorflow/core/kernels/sequence_ops.cc b/tensorflow/core/kernels/sequence_ops.cc
index 7ce2016..d15f951 100644
--- a/tensorflow/core/kernels/sequence_ops.cc
+++ b/tensorflow/core/kernels/sequence_ops.cc
@@ -99,14 +99,6 @@
 
 #define REGISTER_CPU_KERNEL(T) REGISTER_KERNEL(DEVICE_CPU, T)
 #define REGISTER_GPU_KERNEL(T) REGISTER_KERNEL(DEVICE_GPU, T)
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(T) REGISTER_KERNEL(DEVICE_SYCL, T)
-TF_CALL_float(REGISTER_SYCL_KERNEL);
-TF_CALL_double(REGISTER_SYCL_KERNEL);
-TF_CALL_int32(REGISTER_SYCL_KERNEL);
-TF_CALL_int64(REGISTER_SYCL_KERNEL);
-#undef REGISTER_SYCL_KERNEL
-#endif  // TENSORFLOW_USE_SYCL
 
 TF_CALL_float(REGISTER_CPU_KERNEL);
 TF_CALL_double(REGISTER_CPU_KERNEL);
@@ -189,12 +181,6 @@
 TF_CALL_double(REGISTER_GPU_KERNEL);
 #undef REGISTER_GPU_KERNEL
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(T) REGISTER_KERNEL_ALL_NUMS(DEVICE_SYCL, T)
-TF_CALL_float(REGISTER_SYCL_KERNEL);
-TF_CALL_double(REGISTER_SYCL_KERNEL);
-#undef REGISTER_SYCL_KERNEL
-#endif  // TENSORFLOW_USE_SYCL
 
 #undef REGISTER_CPU_KERNEL
 #undef REGISTER_KERNEL_ALL_NUMS
diff --git a/tensorflow/core/kernels/session_ops.cc b/tensorflow/core/kernels/session_ops.cc
index d83a714..9e67fec 100644
--- a/tensorflow/core/kernels/session_ops.cc
+++ b/tensorflow/core/kernels/session_ops.cc
@@ -85,23 +85,6 @@
 REGISTER_GPU_KERNEL(bool);
 #undef REGISTER_GPU_KERNEL
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(type)                        \
-  REGISTER_KERNEL_BUILDER(Name("GetSessionHandle")        \
-                              .Device(DEVICE_SYCL)        \
-                              .HostMemory("handle")       \
-                              .TypeConstraint<type>("T"), \
-                          GetSessionHandleOp)             \
-  REGISTER_KERNEL_BUILDER(Name("GetSessionHandleV2")      \
-                              .Device(DEVICE_SYCL)        \
-                              .HostMemory("handle")       \
-                              .TypeConstraint<type>("T"), \
-                          GetSessionHandleOp)
-
-TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
-REGISTER_SYCL_KERNEL(bool);
-#undef REGISTER_SYCL_KERNEL
-#endif  // TENSORFLOW_USE_SYCL
 
 class GetSessionTensorOp : public OpKernel {
  public:
@@ -133,18 +116,6 @@
 REGISTER_GPU_KERNEL(bool);
 #undef REGISTER_GPU_KERNEL
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(type)                            \
-  REGISTER_KERNEL_BUILDER(Name("GetSessionTensor")            \
-                              .Device(DEVICE_SYCL)            \
-                              .HostMemory("handle")           \
-                              .TypeConstraint<type>("dtype"), \
-                          GetSessionTensorOp)
-
-TF_CALL_NUMBER_TYPES(REGISTER_SYCL_KERNEL);
-REGISTER_SYCL_KERNEL(bool);
-#undef REGISTER_SYCL_KERNEL
-#endif  // TENSORFLOW_USE_SYCL
 
 class DeleteSessionTensorOp : public OpKernel {
  public:
@@ -166,9 +137,4 @@
     Name("DeleteSessionTensor").Device(DEVICE_GPU).HostMemory("handle"),
     DeleteSessionTensorOp);
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(
-    Name("DeleteSessionTensor").Device(DEVICE_SYCL).HostMemory("handle"),
-    DeleteSessionTensorOp);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/shape_ops.cc b/tensorflow/core/kernels/shape_ops.cc
index cf065f7..7b2ffa8 100644
--- a/tensorflow/core/kernels/shape_ops.cc
+++ b/tensorflow/core/kernels/shape_ops.cc
@@ -33,40 +33,6 @@
                             .TypeConstraint<int64>("out_type"),
                         ShapeOp<int64>);
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(type)                               \
-  REGISTER_KERNEL_BUILDER(Name("Shape")                          \
-                              .Device(DEVICE_SYCL)               \
-                              .HostMemory("output")              \
-                              .TypeConstraint<int32>("out_type") \
-                              .TypeConstraint<type>("T"),        \
-                          ShapeOp<int32>);                       \
-  REGISTER_KERNEL_BUILDER(Name("Shape")                          \
-                              .Device(DEVICE_SYCL)               \
-                              .HostMemory("output")              \
-                              .TypeConstraint<int64>("out_type") \
-                              .TypeConstraint<type>("T"),        \
-                          ShapeOp<int64>);
-
-TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
-TF_CALL_bool(REGISTER_SYCL_KERNEL);
-#undef REGISTER_SYCL_KERNEL
-
-REGISTER_KERNEL_BUILDER(Name("Shape")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("input")
-                            .HostMemory("output")
-                            .TypeConstraint<int32>("T")
-                            .TypeConstraint<int32>("out_type"),
-                        ShapeOp<int32>);
-REGISTER_KERNEL_BUILDER(Name("Shape")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("input")
-                            .HostMemory("output")
-                            .TypeConstraint<int32>("T")
-                            .TypeConstraint<int64>("out_type"),
-                        ShapeOp<int64>);
-#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 #define REGISTER_GPU_KERNEL(type)                                \
@@ -158,69 +124,11 @@
                         ShapeNOp<int64>);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(type)                               \
-  REGISTER_KERNEL_BUILDER(Name("ShapeN")                         \
-                              .Device(DEVICE_SYCL)               \
-                              .HostMemory("output")              \
-                              .TypeConstraint<int32>("out_type") \
-                              .TypeConstraint<type>("T"),        \
-                          ShapeNOp<int32>);                      \
-  REGISTER_KERNEL_BUILDER(Name("ShapeN")                         \
-                              .Device(DEVICE_SYCL)               \
-                              .HostMemory("output")              \
-                              .TypeConstraint<int64>("out_type") \
-                              .TypeConstraint<type>("T"),        \
-                          ShapeNOp<int64>)
-
-TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
-TF_CALL_bool(REGISTER_SYCL_KERNEL);
-#undef REGISTER_SYCL_KERNEL
-
-REGISTER_KERNEL_BUILDER(Name("ShapeN")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("input")
-                            .HostMemory("output")
-                            .TypeConstraint<int32>("T")
-                            .TypeConstraint<int32>("out_type"),
-                        ShapeNOp<int32>);
-REGISTER_KERNEL_BUILDER(Name("ShapeN")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("input")
-                            .HostMemory("output")
-                            .TypeConstraint<int32>("T")
-                            .TypeConstraint<int64>("out_type"),
-                        ShapeNOp<int64>);
-#endif  // TENSORFLOW_USE_SYCL
 
 // Rank ------------------------------------------
 REGISTER_KERNEL_BUILDER(Name("Rank").Device(DEVICE_CPU).HostMemory("output"),
                         RankOp);
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(type)                       \
-  REGISTER_KERNEL_BUILDER(Name("Rank")                   \
-                              .Device(DEVICE_SYCL)       \
-                              .TypeConstraint<type>("T") \
-                              .HostMemory("output"),     \
-                          RankOp);
-TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
-#undef REGISTER_SYCL_KERNEL
-
-REGISTER_KERNEL_BUILDER(Name("Rank")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .HostMemory("input")
-                            .HostMemory("output"),
-                        RankOp);
-
-REGISTER_KERNEL_BUILDER(Name("Rank")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<bool>("T")
-                            .HostMemory("input")
-                            .HostMemory("output"),
-                        RankOp);
-#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 #define REGISTER_GPU_KERNEL(type)                        \
@@ -303,39 +211,6 @@
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(type)                               \
-  REGISTER_KERNEL_BUILDER(Name("Size")                           \
-                              .Device(DEVICE_SYCL)               \
-                              .TypeConstraint<type>("T")         \
-                              .TypeConstraint<int32>("out_type") \
-                              .HostMemory("output"),             \
-                          SizeOp<int32>);                        \
-  REGISTER_KERNEL_BUILDER(Name("Size")                           \
-                              .Device(DEVICE_SYCL)               \
-                              .TypeConstraint<type>("T")         \
-                              .TypeConstraint<int64>("out_type") \
-                              .HostMemory("output"),             \
-                          SizeOp<int64>);
-TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
-TF_CALL_bool(REGISTER_SYCL_KERNEL);
-#undef REGISTER_SYCL_KERNEL
-
-REGISTER_KERNEL_BUILDER(Name("Size")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .TypeConstraint<int32>("out_type")
-                            .HostMemory("input")
-                            .HostMemory("output"),
-                        SizeOp<int32>);
-REGISTER_KERNEL_BUILDER(Name("Size")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .TypeConstraint<int64>("out_type")
-                            .HostMemory("input")
-                            .HostMemory("output"),
-                        SizeOp<int64>);
-#endif  // TENSORFLOW_USE_SYCL
 
 // ExpandDims ------------------------------------
 REGISTER_KERNEL_BUILDER(Name("ExpandDims")
@@ -385,41 +260,6 @@
                         ExpandDimsOp<int64>);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(type)                           \
-  REGISTER_KERNEL_BUILDER(Name("ExpandDims")                 \
-                              .Device(DEVICE_SYCL)           \
-                              .TypeConstraint<type>("T")     \
-                              .TypeConstraint<int32>("Tdim") \
-                              .HostMemory("dim"),            \
-                          ExpandDimsOp<int32>);              \
-  REGISTER_KERNEL_BUILDER(Name("ExpandDims")                 \
-                              .Device(DEVICE_SYCL)           \
-                              .TypeConstraint<type>("T")     \
-                              .TypeConstraint<int64>("Tdim") \
-                              .HostMemory("dim"),            \
-                          ExpandDimsOp<int64>);
-TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
-TF_CALL_bool(REGISTER_SYCL_KERNEL);
-#undef REGISTER_SYCL_KERNEL
-
-REGISTER_KERNEL_BUILDER(Name("ExpandDims")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .TypeConstraint<int32>("Tdim")
-                            .HostMemory("input")
-                            .HostMemory("dim")
-                            .HostMemory("output"),
-                        ExpandDimsOp<int32>);
-REGISTER_KERNEL_BUILDER(Name("ExpandDims")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .TypeConstraint<int64>("Tdim")
-                            .HostMemory("input")
-                            .HostMemory("dim")
-                            .HostMemory("output"),
-                        ExpandDimsOp<int64>);
-#endif  // TENSORFLOW_USE_SYCL
 
 // Squeeze ---------------------------------------
 REGISTER_KERNEL_BUILDER(Name("Squeeze").Device(DEVICE_CPU), SqueezeOp);
@@ -444,22 +284,6 @@
                         SqueezeOp);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(type)                                   \
-  REGISTER_KERNEL_BUILDER(                                           \
-      Name("Squeeze").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
-      SqueezeOp);
-TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
-TF_CALL_bool(REGISTER_SYCL_KERNEL);
-#undef REGISTER_SYCL_KERNEL
-
-REGISTER_KERNEL_BUILDER(Name("Squeeze")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .HostMemory("input")
-                            .HostMemory("output"),
-                        SqueezeOp);
-#endif  // TENSORFLOW_USE_SYCL
 
 class EnsureShapeOp : public OpKernel {
  public:
@@ -497,30 +321,6 @@
 // constraints.
 REGISTER_KERNEL_BUILDER(Name("EnsureShape").Device(DEVICE_CPU), EnsureShapeOp);
 
-#if TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(type)                                       \
-  REGISTER_KERNEL_BUILDER(                                               \
-      Name("EnsureShape").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
-      EnsureShapeOp)
-
-TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
-
-#undef REGISTER_SYCL_KERNEL
-
-#define REGISTER_SYCL_HOST_KERNEL(type)                   \
-  REGISTER_KERNEL_BUILDER(Name("EnsureShape")             \
-                              .Device(DEVICE_SYCL)        \
-                              .HostMemory("input")        \
-                              .HostMemory("output")       \
-                              .TypeConstraint<type>("T"), \
-                          EnsureShapeOp)
-
-REGISTER_SYCL_HOST_KERNEL(int32);
-REGISTER_SYCL_HOST_KERNEL(bool);
-
-#undef REGISTER_SYCL_HOST_KERNEL
-
-#endif  // TENSORFLOW_USE_SYCL
 
 #define REGISTER_GPU_KERNEL(type)                                       \
   REGISTER_KERNEL_BUILDER(                                              \
diff --git a/tensorflow/core/kernels/slice_op.cc b/tensorflow/core/kernels/slice_op.cc
index 6d7cd6f..3bf3ce4 100644
--- a/tensorflow/core/kernels/slice_op.cc
+++ b/tensorflow/core/kernels/slice_op.cc
@@ -57,9 +57,6 @@
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 // Shared code that is not dependent on the type of T.  We do this to reduce
 // code size by not duplicating all this for all T (float, double, int32, etc.)
@@ -339,57 +336,4 @@
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-// Forward declarations of the functor specializations for SYCL.
-namespace functor {
-#define DECLARE_SYCL_SPEC(T, NDIM)                                  \
-  template <>                                                       \
-  void Slice<SYCLDevice, T, NDIM>::operator()(                      \
-      const SYCLDevice& d, typename TTypes<T, NDIM>::Tensor output, \
-      typename TTypes<T, NDIM>::ConstTensor input,                  \
-      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& indices,        \
-      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& sizes);         \
-  extern template struct Slice<SYCLDevice, T, NDIM>;
-
-#define DECLARE_FOR_N(T)   \
-  DECLARE_SYCL_SPEC(T, 1); \
-  DECLARE_SYCL_SPEC(T, 2); \
-  DECLARE_SYCL_SPEC(T, 3); \
-  DECLARE_SYCL_SPEC(T, 4); \
-  DECLARE_SYCL_SPEC(T, 5); \
-  DECLARE_SYCL_SPEC(T, 6); \
-  DECLARE_SYCL_SPEC(T, 7); \
-  DECLARE_SYCL_SPEC(T, 8);
-
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(DECLARE_FOR_N);
-DECLARE_FOR_N(int32);
-DECLARE_FOR_N(bool);
-
-#undef DECLARE_FOR_N
-#undef DECLARE_SYCL_SPEC
-}  // namespace functor
-
-#define REGISTER_SYCL(type)                                    \
-  REGISTER_KERNEL_BUILDER(Name("Slice")                        \
-                              .Device(DEVICE_SYCL)             \
-                              .TypeConstraint<type>("T")       \
-                              .HostMemory("begin")             \
-                              .HostMemory("size")              \
-                              .TypeConstraint<int32>("Index"), \
-                          SliceOp<SYCLDevice, type>)
-
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL);
-
-REGISTER_KERNEL_BUILDER(Name("Slice")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .TypeConstraint<int32>("Index")
-                            .HostMemory("input")
-                            .HostMemory("begin")
-                            .HostMemory("size")
-                            .HostMemory("output"),
-                        SliceOp<CPUDevice, int32>);
-#undef REGISTER_SYCL
-
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/slice_op_cpu_impl.h b/tensorflow/core/kernels/slice_op_cpu_impl.h
index 64b6948..9eda840 100644
--- a/tensorflow/core/kernels/slice_op_cpu_impl.h
+++ b/tensorflow/core/kernels/slice_op_cpu_impl.h
@@ -33,17 +33,6 @@
 
 #undef DEFINE_CPU_KERNELS
 
-#ifdef TENSORFLOW_USE_SYCL
-using SyclDevice = Eigen::SyclDevice;
-
-#define DEFINE_SYCL_KERNELS(T) \
-  template struct functor::Slice<SyclDevice, T, CPU_PROVIDED_IXDIM>;
-
-TF_CALL_GPU_NUMBER_TYPES(DEFINE_SYCL_KERNELS);
-DEFINE_SYCL_KERNELS(int32);
-
-#undef DEFINE_SYCL_KERNELS
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/kernels/snapshot_op.cc b/tensorflow/core/kernels/snapshot_op.cc
index 95bcfd6..1cbcb49 100644
--- a/tensorflow/core/kernels/snapshot_op.cc
+++ b/tensorflow/core/kernels/snapshot_op.cc
@@ -61,16 +61,5 @@
 #undef REGISTER_KERNEL
 #endif
 
-#if TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SyclDevice;
-#define REGISTER_SYCL_KERNEL(TYPE)                                    \
-  REGISTER_KERNEL_BUILDER(                                            \
-      Name("Snapshot").Device(DEVICE_SYCL).TypeConstraint<TYPE>("T"), \
-      SnapshotOp<SyclDevice, TYPE>);
-
-TF_CALL_POD_TYPES(REGISTER_SYCL_KERNEL);
-
-#undef REGISTER_SYCL_KERNEL
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/softmax_op.cc b/tensorflow/core/kernels/softmax_op.cc
index 7d09b39..5bb6c37 100644
--- a/tensorflow/core/kernels/softmax_op.cc
+++ b/tensorflow/core/kernels/softmax_op.cc
@@ -29,9 +29,6 @@
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 // Partial specialization for a CPUDevice, that uses the Eigen implementation
 // from SoftmaxEigenImpl.
@@ -46,10 +43,6 @@
 template <typename T>
 struct SoftmaxFunctor<CPUDevice, T> : SoftmaxFunctorBase<CPUDevice, T> {};
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename T>
-struct SoftmaxFunctor<SYCLDevice, T> : SoftmaxFunctorBase<SYCLDevice, T> {};
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace functor
 
 template <typename Device, typename T>
@@ -93,12 +86,4 @@
 
 #undef REGISTER_CPU
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(
-    Name("Softmax").Device(DEVICE_SYCL).TypeConstraint<float>("T"),
-    SoftmaxOp<SYCLDevice, float>);
-REGISTER_KERNEL_BUILDER(
-    Name("Softmax").Device(DEVICE_SYCL).TypeConstraint<double>("T"),
-    SoftmaxOp<SYCLDevice, double>);
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/split_lib.h b/tensorflow/core/kernels/split_lib.h
index 9d43a00..674083b 100644
--- a/tensorflow/core/kernels/split_lib.h
+++ b/tensorflow/core/kernels/split_lib.h
@@ -48,16 +48,6 @@
                   const Eigen::DSizes<Eigen::DenseIndex, NDims>& slice_sizes);
 };
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename T, int NDims>
-struct Split<Eigen::SyclDevice, T> {
-  void operator()(const Eigen::SyclDevice& d,
-                  typename TTypes<T, NDims>::Tensor output,
-                  typename TTypes<T, NDims>::ConstTensor input,
-                  const Eigen::DSizes<Eigen::DenseIndex, NDims>& slice_indices,
-                  const Eigen::DSizes<Eigen::DenseIndex, NDims>& slice_sizes);
-};
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace functor
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/split_lib_cpu.cc b/tensorflow/core/kernels/split_lib_cpu.cc
index a3060e4..743ff1f 100644
--- a/tensorflow/core/kernels/split_lib_cpu.cc
+++ b/tensorflow/core/kernels/split_lib_cpu.cc
@@ -44,22 +44,6 @@
 TF_CALL_ALL_TYPES(DEFINE_CPU_KERNELS)
 DEFINE_CPU_KERNELS(quint8)
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename T, int NDims>
-void Split<Eigen::SyclDevice, T, NDims>::operator()(
-    const Eigen::SyclDevice& d, typename TTypes<T, NDims>::Tensor output,
-    typename TTypes<T, NDims>::ConstTensor input,
-    const Eigen::DSizes<Eigen::DenseIndex, NDims>& slice_indices,
-    const Eigen::DSizes<Eigen::DenseIndex, NDims>& slice_sizes) {
-  output.device(d) = input.slice(slice_indices, slice_sizes);
-}
-
-#define DEFINE_SYCL_KERNELS(T)                    \
-  template struct Split<Eigen::SyclDevice, T, 2>; \
-  template struct Split<Eigen::SyclDevice, T, 3>;
-
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(DEFINE_SYCL_KERNELS);
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace functor
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/split_op.cc b/tensorflow/core/kernels/split_op.cc
index 08575f0..6f2cd96 100644
--- a/tensorflow/core/kernels/split_op.cc
+++ b/tensorflow/core/kernels/split_op.cc
@@ -38,9 +38,6 @@
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 template <typename Device, typename T>
 class SplitOpBase : public OpKernel {
@@ -325,75 +322,6 @@
 };
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename T>
-class SplitOpSYCL : public SplitOpBase<SYCLDevice, T> {
- public:
-  typedef SplitOpBase<SYCLDevice, T> Base;
-  explicit SplitOpSYCL(OpKernelConstruction* c) : Base(c) {}
-
-  void Compute(OpKernelContext* context) override {
-    bool done = false;
-    Base::ComputeEasyCases(context, &done);
-    if (!context->status().ok() || done) {
-      return;
-    }
-    const Tensor& input = context->input(1);
-    const TensorShape& input_shape = input.shape();
-    const int32 split_dim_orig = context->input(0).flat<int32>()(0);
-    const int32 split_dim =
-        split_dim_orig < 0 ? split_dim_orig + input.dims() : split_dim_orig;
-    const int32 num_split = Base::num_outputs();
-
-    // Android also uses int32 indexing, so check here also.
-    OP_REQUIRES(
-        context,
-        FastBoundsCheck(input.NumElements(),
-                        std::numeric_limits<Eigen::DenseIndex>::max()),
-        errors::InvalidArgument("Split requires input size < ",
-                                std::numeric_limits<Eigen::DenseIndex>::max()));
-
-    Eigen::DenseIndex prefix_dim_size;
-    Eigen::DenseIndex split_dim_size;
-    Eigen::DenseIndex suffix_dim_size;
-
-    std::tie(prefix_dim_size, split_dim_size, suffix_dim_size) =
-        Base::template SetDims<Eigen::DenseIndex>(input_shape, split_dim);
-    auto input_reshaped =
-        input.shaped<T, 3>({prefix_dim_size, split_dim_size, suffix_dim_size});
-
-    const int64 split_dim_output_size = split_dim_size / num_split;
-    TensorShape output_shape(input_shape);
-    output_shape.set_dim(split_dim, split_dim_output_size);
-
-    Eigen::DSizes<Eigen::DenseIndex, 3> indices{0, 0, 0};
-    Eigen::DSizes<Eigen::DenseIndex, 3> sizes{
-        prefix_dim_size, split_dim_output_size, suffix_dim_size};
-
-    for (int i = 0; i < num_split; ++i) {
-      Tensor* result = nullptr;
-      OP_REQUIRES_OK(context,
-                     context->allocate_output(i, output_shape, &result));
-      if (prefix_dim_size * split_dim_output_size * suffix_dim_size > 0) {
-        Eigen::DSizes<Eigen::DenseIndex, 3> slice_indices;
-        Eigen::DSizes<Eigen::DenseIndex, 3> slice_sizes;
-        for (int j = 0; j < 3; ++j) {
-          slice_indices[j] = indices[j];
-          slice_sizes[j] = sizes[j];
-        }
-
-        auto result_shaped = result->shaped<T, 3>(
-            {prefix_dim_size, split_dim_output_size, suffix_dim_size});
-
-        functor::Split<SYCLDevice, T>()(context->eigen_device<SYCLDevice>(),
-                                        result_shaped, input_reshaped,
-                                        slice_indices, slice_sizes);
-      }
-      indices[1] += split_dim_output_size;
-    }
-  }
-};
-#endif  // TENSORFLOW_USE_SYCL
 
 #define REGISTER_SPLIT(type)                             \
   REGISTER_KERNEL_BUILDER(Name("Split")                  \
@@ -423,17 +351,5 @@
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL(type)                              \
-  REGISTER_KERNEL_BUILDER(Name("Split")                  \
-                              .Device(DEVICE_SYCL)       \
-                              .TypeConstraint<type>("T") \
-                              .HostMemory("split_dim"),  \
-                          SplitOpSYCL<type>)
-
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL);
-#undef REGISTER_SYCL
-
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/stage_op.cc b/tensorflow/core/kernels/stage_op.cc
index 9c0f370..58c41c4 100644
--- a/tensorflow/core/kernels/stage_op.cc
+++ b/tensorflow/core/kernels/stage_op.cc
@@ -220,9 +220,6 @@
     (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
 REGISTER_KERNEL_BUILDER(Name("Stage").Device(DEVICE_GPU), StageOp);
 #endif
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(Name("Stage").Device(DEVICE_SYCL), StageOp);
-#endif  // TENSORFLOW_USE_SYCL
 
 class UnstageOp : public OpKernel {
  public:
@@ -254,9 +251,6 @@
     (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
 REGISTER_KERNEL_BUILDER(Name("Unstage").Device(DEVICE_GPU), UnstageOp);
 #endif
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(Name("Unstage").Device(DEVICE_SYCL), UnstageOp);
-#endif  // TENSORFLOW_USE_SYCL
 
 class StagePeekOp : public OpKernel {
  public:
@@ -291,10 +285,6 @@
 REGISTER_KERNEL_BUILDER(
     Name("StagePeek").HostMemory("index").Device(DEVICE_GPU), StagePeekOp);
 #endif
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(
-    Name("StagePeek").HostMemory("index").Device(DEVICE_SYCL), StagePeekOp);
-#endif  // TENSORFLOW_USE_SYCL
 
 class StageSizeOp : public OpKernel {
  public:
@@ -322,10 +312,6 @@
 REGISTER_KERNEL_BUILDER(Name("StageSize").HostMemory("size").Device(DEVICE_GPU),
                         StageSizeOp);
 #endif
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(
-    Name("StageSize").HostMemory("size").Device(DEVICE_SYCL), StageSizeOp);
-#endif  // TENSORFLOW_USE_SYCL
 
 class StageClearOp : public OpKernel {
  public:
@@ -347,8 +333,5 @@
     (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
 REGISTER_KERNEL_BUILDER(Name("StageClear").Device(DEVICE_GPU), StageClearOp);
 #endif
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(Name("StageClear").Device(DEVICE_SYCL), StageClearOp);
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc
index 7d9dfa4..4714706 100644
--- a/tensorflow/core/kernels/strided_slice_op.cc
+++ b/tensorflow/core/kernels/strided_slice_op.cc
@@ -529,90 +529,4 @@
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL(type)                                              \
-  REGISTER_KERNEL_BUILDER(Name("StridedSlice")                           \
-                              .Device(DEVICE_SYCL)                       \
-                              .TypeConstraint<type>("T")                 \
-                              .HostMemory("begin")                       \
-                              .HostMemory("end")                         \
-                              .HostMemory("strides"),                    \
-                          StridedSliceOp<SYCLDevice, type>)              \
-  REGISTER_KERNEL_BUILDER(Name("StridedSliceGrad")                       \
-                              .Device(DEVICE_SYCL)                       \
-                              .TypeConstraint<type>("T")                 \
-                              .HostMemory("shape")                       \
-                              .HostMemory("begin")                       \
-                              .HostMemory("end")                         \
-                              .HostMemory("strides"),                    \
-                          StridedSliceGradOp<SYCLDevice, type>)          \
-  REGISTER_KERNEL_BUILDER(Name("StridedSliceAssign")                     \
-                              .Device(DEVICE_SYCL)                       \
-                              .TypeConstraint<type>("T")                 \
-                              .HostMemory("begin")                       \
-                              .HostMemory("end")                         \
-                              .HostMemory("strides"),                    \
-                          StridedSliceAssignOp<SYCLDevice, type, false>) \
-  REGISTER_KERNEL_BUILDER(Name("ResourceStridedSliceAssign")             \
-                              .Device(DEVICE_SYCL)                       \
-                              .TypeConstraint<type>("T")                 \
-                              .HostMemory("ref")                         \
-                              .HostMemory("begin")                       \
-                              .HostMemory("end")                         \
-                              .HostMemory("strides"),                    \
-                          StridedSliceAssignOp<SYCLDevice, type, false>) \
-  REGISTER_KERNEL_BUILDER(Name("TensorStridedSliceUpdate")               \
-                              .Device(DEVICE_SYCL)                       \
-                              .TypeConstraint<type>("T")                 \
-                              .HostMemory("begin")                       \
-                              .HostMemory("end")                         \
-                              .HostMemory("strides"),                    \
-                          StridedSliceAssignOp<SYCLDevice, type, true>)
-
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL);
-
-REGISTER_KERNEL_BUILDER(Name("StridedSlice")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .HostMemory("input")
-                            .HostMemory("begin")
-                            .HostMemory("end")
-                            .HostMemory("strides")
-                            .HostMemory("output"),
-                        StridedSliceOp<CPUDevice, int32>);
-REGISTER_KERNEL_BUILDER(Name("StridedSliceGrad")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .HostMemory("shape")
-                            .HostMemory("begin")
-                            .HostMemory("end")
-                            .HostMemory("strides")
-                            .HostMemory("dy")
-                            .HostMemory("output"),
-                        StridedSliceGradOp<CPUDevice, int32>);
-REGISTER_KERNEL_BUILDER(Name("StridedSliceAssign")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .HostMemory("ref")
-                            .HostMemory("begin")
-                            .HostMemory("end")
-                            .HostMemory("strides"),
-                        StridedSliceAssignOp<CPUDevice, int32, false>);
-REGISTER_KERNEL_BUILDER(Name("ResourceStridedSliceAssign")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .HostMemory("ref")
-                            .HostMemory("begin")
-                            .HostMemory("end")
-                            .HostMemory("strides"),
-                        StridedSliceAssignOp<CPUDevice, int32, false>);
-REGISTER_KERNEL_BUILDER(Name("TensorStridedSliceUpdate")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .HostMemory("begin")
-                            .HostMemory("end")
-                            .HostMemory("strides"),
-                        StridedSliceAssignOp<CPUDevice, int32, true>)
-#undef REGISTER_SYCL
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/strided_slice_op_impl.h b/tensorflow/core/kernels/strided_slice_op_impl.h
index 5ce1d77..6f4f5fc 100644
--- a/tensorflow/core/kernels/strided_slice_op_impl.h
+++ b/tensorflow/core/kernels/strided_slice_op_impl.h
@@ -288,20 +288,6 @@
 
 TF_CALL_ALL_TYPES(DECLARE_FOR_N_CPU);
 
-#ifdef TENSORFLOW_USE_SYCL
-#define PREVENT_FOR_N_SYCL(T) \
-  PREVENT_INSTANTIATE(T, STRIDED_SLICE_INSTANTIATE_DIM)
-
-#define DECLARE_FOR_N_SYCL(T) \
-  INSTANTIATE(SYCLDevice, T, STRIDED_SLICE_INSTANTIATE_DIM)
-
-TF_CALL_SYCL_PROXY_TYPES(PREVENT_FOR_N_SYCL);
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(DECLARE_FOR_N_SYCL);
-DECLARE_FOR_N_SYCL(int32);
-DECLARE_FOR_N_SYCL(int64);
-
-#undef DECLARE_FOR_N_SYCL
-#endif  // TENSORFLOW_USE_SYCL
 
 #undef INSTANTIATE
 #undef DECLARE_FOR_N_CPU
diff --git a/tensorflow/core/kernels/tile_functor.h b/tensorflow/core/kernels/tile_functor.h
index d8ce39d..f2428cd 100644
--- a/tensorflow/core/kernels/tile_functor.h
+++ b/tensorflow/core/kernels/tile_functor.h
@@ -37,10 +37,6 @@
 void TileSimple(const Eigen::GpuDevice& d, Tensor* out, const Tensor& in);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename T>
-void TileSimple(const Eigen::SyclDevice& d, Tensor* out, const Tensor& in);
-#endif
 
 template <typename Device, typename T, typename Tmultiples, int NDIM>
 void TileUsingEigen(const Device& d, Tensor* out, const Tensor& in,
diff --git a/tensorflow/core/kernels/tile_functor_cpu.h b/tensorflow/core/kernels/tile_functor_cpu.h
index 5b005e4..2967d56 100644
--- a/tensorflow/core/kernels/tile_functor_cpu.h
+++ b/tensorflow/core/kernels/tile_functor_cpu.h
@@ -48,12 +48,6 @@
                 const Tensor& in) {
   return TileSimpleImpl<Eigen::ThreadPoolDevice, T>(d, out, in);
 }
-#ifdef TENSORFLOW_USE_SYCL
-template <typename T>
-void TileSimple(const Eigen::SyclDevice& d, Tensor* out, const Tensor& in) {
-  return TileSimpleImpl<Eigen::SyclDevice, T>(d, out, in);
-}
-#endif
 
 }  // namespace internal
 }  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/tile_functor_sycl.cc b/tensorflow/core/kernels/tile_functor_sycl.cc
index 2157425..b15a1f2 100644
--- a/tensorflow/core/kernels/tile_functor_sycl.cc
+++ b/tensorflow/core/kernels/tile_functor_sycl.cc
@@ -19,24 +19,6 @@
 namespace tensorflow {
 namespace functor {
 
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-
-#define DEFINE_TYPE(T)                        \
-  template struct Tile<SYCLDevice, T, int32>; \
-  template struct Tile<SYCLDevice, T, int64>;
-
-TF_CALL_bool(DEFINE_TYPE);
-TF_CALL_float(DEFINE_TYPE);
-TF_CALL_bfloat16(DEFINE_TYPE);
-TF_CALL_double(DEFINE_TYPE);
-TF_CALL_uint8(DEFINE_TYPE);
-TF_CALL_int32(DEFINE_TYPE);
-TF_CALL_int16(DEFINE_TYPE);
-TF_CALL_int64(DEFINE_TYPE);
-
-#undef DEFINE_TYPE
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // end namespace functor
 }  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/tile_ops.cc b/tensorflow/core/kernels/tile_ops.cc
index f733d9b..c24c7f1 100644
--- a/tensorflow/core/kernels/tile_ops.cc
+++ b/tensorflow/core/kernels/tile_ops.cc
@@ -41,9 +41,6 @@
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 // Forward declarations of functors that will be defined in tile_ops_impl.h
 namespace functor {
@@ -108,26 +105,6 @@
 #define DECLARE_CUDA_DIM(T, NDIM)
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-#define DECLARE_TYPE(T)                              \
-  extern template struct Tile<SYCLDevice, T, int32>; \
-  extern template struct Tile<SYCLDevice, T, int64>;
-TF_CALL_bool(DECLARE_TYPE);
-TF_CALL_float(DECLARE_TYPE);
-TF_CALL_bfloat16(DECLARE_TYPE);
-TF_CALL_double(DECLARE_TYPE);
-TF_CALL_uint8(DECLARE_TYPE);
-TF_CALL_int32(DECLARE_TYPE);
-TF_CALL_int16(DECLARE_TYPE);
-TF_CALL_int64(DECLARE_TYPE);
-#undef DECLARE_TYPE
-#define DECLARE_SYCL_DIM(T, NDIM)                       \
-  extern template struct TileGrad<SYCLDevice, T, NDIM>; \
-  extern template struct ReduceAndReshape<SYCLDevice, T, NDIM, 1>
-#else  // TENSORFLOW_USE_SYCL
-#define DECLARE_SYCL_DIM(T, NDIM)
-#endif  // TENSORFLOW_USE_SYCL
-
 #define DECLARE_TYPE(T)                             \
   extern template struct Tile<CPUDevice, T, int32>; \
   extern template struct Tile<CPUDevice, T, int64>;
@@ -150,7 +127,6 @@
 
 #define DECLARE_DIM(T, NDIM)                           \
   DECLARE_CUDA_DIM(T, NDIM);                           \
-  DECLARE_SYCL_DIM(T, NDIM);                           \
   extern template struct TileGrad<CPUDevice, T, NDIM>; \
   extern template struct ReduceAndReshape<CPUDevice, T, NDIM, 1>;
 
@@ -174,7 +150,6 @@
 #undef DECLARE_TYPE
 
 #undef DECLARE_DIM
-#undef DECLARE_SYCL_DIM
 #undef DECLARE_CUDA_DIM
 
 }  // namespace functor
@@ -310,11 +285,6 @@
   HANDLE_CASE(GPUDevice, DataTypeToEnum<T>::value, int32); \
   HANDLE_CASE(GPUDevice, DataTypeToEnum<T>::value, int64);
 
-#ifdef TENSORFLOW_USE_SYCL
-#define HANDLE_TYPE_NAME_SYCL(T)                            \
-  HANDLE_CASE(SYCLDevice, DataTypeToEnum<T>::value, int32); \
-  HANDLE_CASE(SYCLDevice, DataTypeToEnum<T>::value, int64);
-#endif  // TENSORFLOW_USE_SYCL
 
 TF_CALL_bool(HANDLE_TYPE_NAME_CPU);
 TF_CALL_float(HANDLE_TYPE_NAME_CPU);
@@ -345,19 +315,9 @@
 TF_CALL_complex128(HANDLE_TYPE_NAME_GPU);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-TF_CALL_float(HANDLE_TYPE_NAME_SYCL);
-TF_CALL_double(HANDLE_TYPE_NAME_SYCL);
-TF_CALL_int16(HANDLE_TYPE_NAME_SYCL);
-TF_CALL_int32(HANDLE_TYPE_NAME_SYCL);
-TF_CALL_int64(HANDLE_TYPE_NAME_SYCL);
-#endif  // TENSORFLOW_USE_SYCL
 
 #undef HANDLE_TYPE_NAME_CPU
 #undef HANDLE_TYPE_NAME_GPU
-#ifdef TENSORFLOW_USE_SYCL
-#undef HANDLE_TYPE_NAME_SYCL
-#endif  // TENSORFLOW_USE_SYCL
 #undef HANDLE_CASE
 
 // --------------------------------------------------------------------------
@@ -610,17 +570,6 @@
 TF_CALL_complex128(HANDLE_TYPE_NAME_GPU);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#if TENSORFLOW_USE_SYCL
-#define HANDLE_TYPE_NAME_SYCL(T) \
-  HANDLE_CASE_DIM(SYCLDevice, T, DataTypeToEnum<T>::value);
-
-TF_CALL_float(HANDLE_TYPE_NAME_SYCL);
-TF_CALL_double(HANDLE_TYPE_NAME_SYCL);
-TF_CALL_int16(HANDLE_TYPE_NAME_SYCL);
-TF_CALL_int32(HANDLE_TYPE_NAME_SYCL);
-TF_CALL_int64(HANDLE_TYPE_NAME_SYCL);
-#undef HANDLE_TYPE_NAME_SYCL
-#endif  // TENSORFLOW_USE_SYCL
 
 #undef HANDLE_TYPE_NAME_CPU
 #undef HANDLE_TYPE_NAME_GPU
@@ -696,37 +645,5 @@
 #undef REGISTER_GPU
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL(type)                                        \
-  REGISTER_KERNEL_BUILDER(Name("Tile")                             \
-                              .Device(DEVICE_SYCL)                 \
-                              .TypeConstraint<type>("T")           \
-                              .TypeConstraint<int32>("Tmultiples") \
-                              .HostMemory("multiples"),            \
-                          TileOp<SYCLDevice, int32>);              \
-  REGISTER_KERNEL_BUILDER(Name("Tile")                             \
-                              .Device(DEVICE_SYCL)                 \
-                              .TypeConstraint<type>("T")           \
-                              .TypeConstraint<int64>("Tmultiples") \
-                              .HostMemory("multiples"),            \
-                          TileOp<SYCLDevice, int64>);              \
-  REGISTER_KERNEL_BUILDER(Name("TileGrad")                         \
-                              .Device(DEVICE_SYCL)                 \
-                              .TypeConstraint<type>("T")           \
-                              .TypeConstraint<int32>("Tmultiples") \
-                              .HostMemory("multiples"),            \
-                          TileGradientOp<SYCLDevice, int32>);      \
-  REGISTER_KERNEL_BUILDER(Name("TileGrad")                         \
-                              .Device(DEVICE_SYCL)                 \
-                              .TypeConstraint<type>("T")           \
-                              .TypeConstraint<int64>("Tmultiples") \
-                              .HostMemory("multiples"),            \
-                          TileGradientOp<SYCLDevice, int64>);
-
-    TF_CALL_float(REGISTER_SYCL);
-TF_CALL_double(REGISTER_SYCL);
-
-#undef REGISTER_SYCL
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/tile_ops_cpu_impl.h b/tensorflow/core/kernels/tile_ops_cpu_impl.h
index 8b0c801..066954a 100644
--- a/tensorflow/core/kernels/tile_ops_cpu_impl.h
+++ b/tensorflow/core/kernels/tile_ops_cpu_impl.h
@@ -45,27 +45,6 @@
 #undef DEFINE_DIM
 #undef DEFINE_TYPE
 
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-
-// Register functors used for TileGradientOp.
-#define DEFINE_DIM(T, NDIM)                      \
-  template struct TileGrad<SYCLDevice, T, NDIM>; \
-  template struct ReduceAndReshape<SYCLDevice, T, NDIM, 1>;
-#define DEFINE_TYPE(T) DEFINE_DIM(T, CPU_PROVIDED_IXDIM)
-
-TF_CALL_bool(DEFINE_TYPE);
-TF_CALL_float(DEFINE_TYPE);
-TF_CALL_bfloat16(DEFINE_TYPE);
-TF_CALL_double(DEFINE_TYPE);
-TF_CALL_uint8(DEFINE_TYPE);
-TF_CALL_int16(DEFINE_TYPE);
-TF_CALL_int32(DEFINE_TYPE);
-TF_CALL_int64(DEFINE_TYPE);
-
-#undef DEFINE_DIM
-#undef DEFINE_TYPE
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // end namespace functor
 }  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/training_ops.cc b/tensorflow/core/kernels/training_ops.cc
index 557e73e..bdb0747 100644
--- a/tensorflow/core/kernels/training_ops.cc
+++ b/tensorflow/core/kernels/training_ops.cc
@@ -27,15 +27,11 @@
 #include "tensorflow/core/platform/bfloat16.h"
 #include "tensorflow/core/util/util.h"
 
-#ifdef TENSORFLOW_USE_SYCL
-#include "tensorflow/core/common_runtime/sycl/sycl_util.h"
-#endif  // TENSORFLOW_USE_SYCL
 
 namespace tensorflow {
 
 using CPUDevice = Eigen::ThreadPoolDevice;
 using GPUDevice = Eigen::GpuDevice;
-using SYCLDevice = Eigen::SyclDevice;
 using Index = Eigen::Index;
 
 namespace {
@@ -57,15 +53,6 @@
   }
 };
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename T>
-struct ApplyGradientDescentSYCL {
-  void operator()(const SYCLDevice& d, typename TTypes<T>::Flat var, T lr,
-                  typename TTypes<T>::ConstFlat grad) {
-    var.device(d) -= grad * lr;
-  }
-};
-#endif
 
 template <typename T>
 struct ApplyAdadelta<CPUDevice, T> {
@@ -496,21 +483,6 @@
   }
 };
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename T>
-struct ApplyAdamSYCL {
-  void operator()(const SYCLDevice& d, typename TTypes<T>::Flat var,
-                  typename TTypes<T>::Flat m, typename TTypes<T>::Flat v,
-                  T beta1_power, T beta2_power, T lr, T beta1, T beta2,
-                  T epsilon, typename TTypes<T>::ConstFlat grad) {
-    const T alpha =
-        lr * Eigen::numext::sqrt(T(1) - beta2_power) / (T(1) - beta1_power);
-    m.device(d) += (grad - m) * (T(1) - beta1);
-    v.device(d) += (grad.square() - v) * (T(1) - beta2);
-    var.device(d) -= (m * alpha) / (v.sqrt() + epsilon);
-  }
-};
-#endif  // TENSORFLOW_USE_SYCL
 
 template <typename T>
 struct ApplyAdam<CPUDevice, T> : ApplyAdamNonCuda<CPUDevice, T> {};
@@ -666,53 +638,6 @@
   bool use_exclusive_lock_;
 };
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename T>
-class ApplyGradientDescentOp<SYCLDevice, T> : public OpKernel {
- public:
-  explicit ApplyGradientDescentOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_));
-  }
-
-  void Compute(OpKernelContext* ctx) override {
-    const bool sparse = false;
-    auto locks = MaybeLockVariableInputMutexesInOrder<SYCLDevice, T>(
-        ctx, use_exclusive_lock_, sparse, {0});
-    Tensor var;
-    OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<SYCLDevice, T>(
-                            ctx, 0, use_exclusive_lock_, sparse, &var));
-
-    OP_REQUIRES(
-        ctx, var.IsInitialized(),
-        errors::FailedPrecondition(
-            "Attempting to use uninitialized variables: ", requested_input(0)));
-    const Tensor& alpha_dev = ctx->input(1);
-    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(alpha_dev.shape()),
-                errors::InvalidArgument("alpha is not a scalar: ",
-                                        alpha_dev.shape().DebugString()));
-    const Tensor& delta = ctx->input(2);
-    OP_REQUIRES(
-        ctx, var.shape().IsSameSize(delta.shape()),
-        errors::InvalidArgument("var and delta do not have the same shape",
-                                var.shape().DebugString(), " ",
-                                delta.shape().DebugString()));
-
-    auto device = ctx->eigen_sycl_device();
-    auto size = sizeof(T);
-    T alpha = T(0);
-    auto src_ptr = GetBase(&alpha_dev);
-    device.memcpyDeviceToHost(&alpha, static_cast<const T*>(src_ptr), size);
-
-    functor::ApplyGradientDescentSYCL<T>()(device, var.flat<T>(), alpha,
-                                           delta.flat<T>());
-
-    MaybeForwardRefInputToRefOutput(ctx, 0, 0);
-  }
-
- private:
-  bool use_exclusive_lock_;
-};
-#endif  // TENSORFLOW_USE_SYCL
 
 #define REGISTER_KERNELS(D, T)                                                \
   REGISTER_KERNEL_BUILDER(                                                    \
@@ -757,12 +682,6 @@
 #endif
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNELS(T) REGISTER_KERNELS(SYCL, T);
-TF_CALL_float(REGISTER_SYCL_KERNELS);
-TF_CALL_double(REGISTER_SYCL_KERNELS);
-#undef REGISTER_SYCL_KERNELS
-#endif  // TENSORFLOW_USE_SYCL
 
 #undef REGISTER_CPU_KERNELS
 #undef REGISTER_KERNELS
@@ -3523,123 +3442,6 @@
   bool use_nesterov_;
 };
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename T>
-class ApplyAdamOp<SYCLDevice, T> : public OpKernel {
- public:
-  explicit ApplyAdamOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
-    OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_));
-  }
-
-  void Compute(OpKernelContext* ctx) override {
-    const bool sparse = false;
-    auto locks = MaybeLockVariableInputMutexesInOrder<SYCLDevice, T>(
-        ctx, use_exclusive_lock_, sparse, {0, 1, 2});
-
-    Tensor var;
-    OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<SYCLDevice, T>(
-                            ctx, 0, use_exclusive_lock_, sparse, &var));
-    Tensor m;
-    OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<SYCLDevice, T>(
-                            ctx, 1, use_exclusive_lock_, sparse, &m));
-    Tensor v;
-    OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<SYCLDevice, T>(
-                            ctx, 2, use_exclusive_lock_, sparse, &v));
-    OP_REQUIRES(
-        ctx, var.IsInitialized(),
-        errors::FailedPrecondition(
-            "Attempting to use uninitialized variables: ", requested_input(0)));
-    OP_REQUIRES(
-        ctx, m.IsInitialized(),
-        errors::FailedPrecondition(
-            "Attempting to use uninitialized variables: ", requested_input(1)));
-    OP_REQUIRES(
-        ctx, v.IsInitialized(),
-        errors::FailedPrecondition(
-            "Attempting to use uninitialized variables: ", requested_input(2)));
-
-    const Tensor& beta1_power_dev = ctx->input(3);
-    const Tensor& beta2_power_dev = ctx->input(4);
-    const Tensor& lr_dev = ctx->input(5);
-    const Tensor& beta1_dev = ctx->input(6);
-    const Tensor& beta2_dev = ctx->input(7);
-    const Tensor& epsilon_dev = ctx->input(8);
-
-    T beta1_power = 0;
-    T beta2_power = 0;
-    T lr = 0;
-    T beta1 = 0;
-    T beta2 = 0;
-    T epsilon = 0;
-
-    auto device = ctx->eigen_sycl_device();
-    auto size = sizeof(T);
-    auto src_ptr = GetBase(&beta1_power_dev);
-    device.memcpyDeviceToHost(&beta1_power, static_cast<const T*>(src_ptr),
-                              size);
-
-    src_ptr = GetBase(&beta2_power_dev);
-    device.memcpyDeviceToHost(&beta2_power, static_cast<const T*>(src_ptr),
-                              size);
-
-    src_ptr = GetBase(&lr_dev);
-    device.memcpyDeviceToHost(&lr, static_cast<const T*>(src_ptr), size);
-
-    src_ptr = GetBase(&beta1_dev);
-    device.memcpyDeviceToHost(&beta1, static_cast<const T*>(src_ptr), size);
-
-    src_ptr = GetBase(&beta2_dev);
-    device.memcpyDeviceToHost(&beta2, static_cast<const T*>(src_ptr), size);
-
-    src_ptr = GetBase(&epsilon_dev);
-    device.memcpyDeviceToHost(&epsilon, static_cast<const T*>(src_ptr), size);
-
-    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(beta1_power_dev.shape()),
-                errors::InvalidArgument("beta1_power is not a scalar: ",
-                                        beta1_power_dev.shape().DebugString()));
-    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(beta2_power_dev.shape()),
-                errors::InvalidArgument("beta2_power is not a scalar: ",
-                                        beta2_power_dev.shape().DebugString()));
-    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(lr_dev.shape()),
-                errors::InvalidArgument("lr is not a scalar : ",
-                                        lr_dev.shape().DebugString()));
-    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(beta1_dev.shape()),
-                errors::InvalidArgument("beta1 is not a scalar: ",
-                                        beta1_dev.shape().DebugString()));
-    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(beta2_dev.shape()),
-                errors::InvalidArgument("beta2 is not a scalar: ",
-                                        beta2_dev.shape().DebugString()));
-    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(epsilon_dev.shape()),
-                errors::InvalidArgument("epsilon is not a scalar: ",
-                                        epsilon_dev.shape().DebugString()));
-
-    const Tensor& grad = ctx->input(9);
-
-    OP_REQUIRES(ctx, var.shape().IsSameSize(m.shape()),
-                errors::InvalidArgument("var and m do not have the same shape",
-                                        var.shape().DebugString(), " ",
-                                        m.shape().DebugString()));
-    OP_REQUIRES(ctx, var.shape().IsSameSize(v.shape()),
-                errors::InvalidArgument("var and v do not have the same shape",
-                                        var.shape().DebugString(), " ",
-                                        v.shape().DebugString()));
-    OP_REQUIRES(
-        ctx, var.shape().IsSameSize(grad.shape()),
-        errors::InvalidArgument("var and grad do not have the same shape",
-                                var.shape().DebugString(), " ",
-                                grad.shape().DebugString()));
-
-    functor::ApplyAdamSYCL<T>()(device, var.flat<T>(), m.flat<T>(), v.flat<T>(),
-                                beta1_power, beta2_power, lr, beta1, beta2,
-                                epsilon, grad.flat<T>());
-
-    MaybeForwardRefInputToRefOutput(ctx, 0, 0);
-  }
-
- private:
-  bool use_exclusive_lock_;
-};
-#endif  // TENSORFLOW_USE_SYCL
 
 #define REGISTER_KERNELS(D, T)                                     \
   REGISTER_KERNEL_BUILDER(                                         \
@@ -3657,12 +3459,6 @@
 TF_CALL_FLOAT_TYPES(REGISTER_CPU_KERNELS);
 TF_CALL_COMPLEX_TYPES(REGISTER_CPU_KERNELS);
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNELS(T) REGISTER_KERNELS(SYCL, T);
-
-TF_CALL_float(REGISTER_SYCL_KERNELS);
-TF_CALL_double(REGISTER_SYCL_KERNELS);
-#endif
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 // Forward declarations of the functor specializations for GPU.
diff --git a/tensorflow/core/kernels/transpose_functor.h b/tensorflow/core/kernels/transpose_functor.h
index 0c22b11..e6aaca8 100644
--- a/tensorflow/core/kernels/transpose_functor.h
+++ b/tensorflow/core/kernels/transpose_functor.h
@@ -247,13 +247,6 @@
   return DoTransposeImpl(device, in, perm, conjugate, out);
 }
 
-#ifdef TENSORFLOW_USE_SYCL
-// For SYCL lets always go through Eigen
-template <typename Device, typename T>
-void TransposeSYCL(const Device& d, const Tensor& in,
-                   const gtl::ArraySlice<int32> perm, bool conjugate,
-                   Tensor* out);
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace internal
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/transpose_functor_cpu.cc b/tensorflow/core/kernels/transpose_functor_cpu.cc
index 1271c02..6d0dd98 100644
--- a/tensorflow/core/kernels/transpose_functor_cpu.cc
+++ b/tensorflow/core/kernels/transpose_functor_cpu.cc
@@ -136,69 +136,5 @@
 
 INSTANTIATE(CPUDevice)
 
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-
-namespace internal {
-template <typename T>
-void TransposeSYCL(const SYCLDevice& d, const Tensor& in,
-                   const gtl::ArraySlice<int32> perm, bool conjugate,
-                   Tensor* out) {
-  switch (in.dims()) {
-    case 1:
-      TransposeUsingEigen<SYCLDevice, T, 1>(d, in, perm, conjugate, out);
-      break;
-    case 2:
-      TransposeUsingEigen<SYCLDevice, T, 2>(d, in, perm, conjugate, out);
-      break;
-    case 3:
-      TransposeUsingEigen<SYCLDevice, T, 3>(d, in, perm, conjugate, out);
-      break;
-    case 4:
-      TransposeUsingEigen<SYCLDevice, T, 4>(d, in, perm, conjugate, out);
-      break;
-    case 5:
-      TransposeUsingEigen<SYCLDevice, T, 5>(d, in, perm, conjugate, out);
-      break;
-    case 6:
-      TransposeUsingEigen<SYCLDevice, T, 6>(d, in, perm, conjugate, out);
-      break;
-    case 7:
-      TransposeUsingEigen<SYCLDevice, T, 7>(d, in, perm, conjugate, out);
-      break;
-    case 8:
-      TransposeUsingEigen<SYCLDevice, T, 8>(d, in, perm, conjugate, out);
-      break;
-    default:
-      LOG(FATAL) << "Unsupported TransposeUsingEigen for: " << in.dims();
-      break;
-  }
-}
-
-}  // namespace internal
-
-template <typename T, bool conjugate>
-struct Transpose<SYCLDevice, T, conjugate> {
-  static void run(const SYCLDevice& d, const Tensor& in,
-                  const gtl::ArraySlice<int32> perm, Tensor* out) {
-    internal::TransposeSycl(d, in, perm, conjugate, out);
-  }
-};
-
-template <bool conjugate>
-struct Transpose<SYCLDevice, tstring, conjugate> {
-  static void run(const SYCLDevice& d, const Tensor& in,
-                  const gtl::ArraySlice<int32> perm, Tensor* out) {
-    LOG(FATAL) << "DT_STRING not supported on SYCL device.";
-  }
-};
-
-// Explicit instantiation.
-template struct Transpose<SYCLDevice, tstring, false>;
-
-INSTANTIATE(SYCLDevice)
-#undef INSTANTIATE
-
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/transpose_op.cc b/tensorflow/core/kernels/transpose_op.cc
index acd278d..8c21969 100644
--- a/tensorflow/core/kernels/transpose_op.cc
+++ b/tensorflow/core/kernels/transpose_op.cc
@@ -91,20 +91,6 @@
                             .HostMemory("y"),
                         InvertPermutationOp<int64>);
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(Name("InvertPermutation")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .HostMemory("x")
-                            .HostMemory("y"),
-                        InvertPermutationOp<int32>);
-REGISTER_KERNEL_BUILDER(Name("InvertPermutation")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int64>("T")
-                            .HostMemory("x")
-                            .HostMemory("y"),
-                        InvertPermutationOp<int64>);
-#endif  // TENSORFLOW_USE_SYCL
 
 namespace {
 template <typename Tperm>
@@ -263,33 +249,4 @@
 #undef REGISTER
 #endif
 
-#ifdef TENSORFLOW_USE_SYCL
-Status TransposeSyclOp::DoTranspose(OpKernelContext* ctx, const Tensor& in,
-                                    gtl::ArraySlice<int32> perm, Tensor* out) {
-  typedef Eigen::SyclDevice SYCLDevice;
-  return ::tensorflow::DoTranspose(ctx->eigen_device<SYCLDevice>(), in, perm,
-                                   out);
-}
-Status ConjugateTransposeSyclOp::DoTranspose(OpKernelContext* ctx,
-                                             const Tensor& in,
-                                             gtl::ArraySlice<int32> perm,
-                                             Tensor* out) {
-  typedef Eigen::SyclDevice SYCLDevice;
-  return ::tensorflow::DoConjugateTranspose(ctx->eigen_device<SYCLDevice>(), in,
-                                            perm, out);
-}
-#define REGISTER(T)                                   \
-  REGISTER_KERNEL_BUILDER(Name("Transpose")           \
-                              .Device(DEVICE_SYCL)    \
-                              .TypeConstraint<T>("T") \
-                              .HostMemory("perm"),    \
-                          TransposeSyclOp);           \
-  REGISTER_KERNEL_BUILDER(Name("ConjugateTranspose")  \
-                              .Device(DEVICE_SYCL)    \
-                              .TypeConstraint<T>("T") \
-                              .HostMemory("perm"),    \
-                          ConjugateTransposeSyclOp);
-TF_CALL_POD_TYPES(REGISTER);
-#undef REGISTER
-#endif
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/transpose_op.h b/tensorflow/core/kernels/transpose_op.h
index 9e8c573..3ea51c7 100644
--- a/tensorflow/core/kernels/transpose_op.h
+++ b/tensorflow/core/kernels/transpose_op.h
@@ -62,16 +62,6 @@
                      gtl::ArraySlice<int32> perm, Tensor* out) override;
 };
 
-#ifdef TENSORFLOW_USE_SYCL
-class TransposeSyclOp : public TransposeOp {
- public:
-  explicit TransposeSyclOp(OpKernelConstruction* ctx) : TransposeOp(ctx) {}
-
- protected:
-  Status DoTranspose(OpKernelContext* ctx, const Tensor& in,
-                     gtl::ArraySlice<int32> perm, Tensor* out) override;
-};
-#endif  // TENSORFLOW_USE_SYCL
 
 // Conjugating transpose ops.
 class ConjugateTransposeCpuOp : public TransposeOp {
@@ -109,18 +99,6 @@
   bool IsConjugate() const override { return true; }
 };
 
-#ifdef TENSORFLOW_USE_SYCL
-class ConjugateTransposeSyclOp : public TransposeOp {
- public:
-  explicit ConjugateTransposeSyclOp(OpKernelConstruction* ctx)
-      : TransposeOp(ctx) {}
-
- protected:
-  Status DoTranspose(OpKernelContext* ctx, const Tensor& in,
-                     gtl::ArraySlice<int32> perm, Tensor* out) override;
-  bool IsConjugate() const override { return true; }
-};
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/kernels/unique_op.cc b/tensorflow/core/kernels/unique_op.cc
index 20dccdc..d049d1f 100644
--- a/tensorflow/core/kernels/unique_op.cc
+++ b/tensorflow/core/kernels/unique_op.cc
@@ -322,40 +322,6 @@
                             .HostMemory("idx"),
                         UniqueOp<int64, int64>);
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(Name("Unique")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .TypeConstraint<int32>("out_idx")
-                            .HostMemory("x")
-                            .HostMemory("y")
-                            .HostMemory("idx"),
-                        UniqueOp<int32, int32>);
-REGISTER_KERNEL_BUILDER(Name("Unique")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int64>("T")
-                            .TypeConstraint<int32>("out_idx")
-                            .HostMemory("x")
-                            .HostMemory("y")
-                            .HostMemory("idx"),
-                        UniqueOp<int64, int32>);
-REGISTER_KERNEL_BUILDER(Name("Unique")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int32>("T")
-                            .TypeConstraint<int64>("out_idx")
-                            .HostMemory("x")
-                            .HostMemory("y")
-                            .HostMemory("idx"),
-                        UniqueOp<int32, int64>);
-REGISTER_KERNEL_BUILDER(Name("Unique")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<int64>("T")
-                            .TypeConstraint<int64>("out_idx")
-                            .HostMemory("x")
-                            .HostMemory("y")
-                            .HostMemory("idx"),
-                        UniqueOp<int64, int64>);
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/unpack_op.cc b/tensorflow/core/kernels/unpack_op.cc
index 7ac02e8..1bdb247 100644
--- a/tensorflow/core/kernels/unpack_op.cc
+++ b/tensorflow/core/kernels/unpack_op.cc
@@ -32,9 +32,6 @@
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
 
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 template <typename Device, typename T>
 class UnpackOp : public OpKernel {
@@ -70,8 +67,6 @@
                         std::numeric_limits<Eigen::DenseIndex>::max()),
         errors::InvalidArgument("output size must fit in Eigen DenseIndex"));
 
-// This optimization is currently not applicable for SYCL devices
-#ifndef TENSORFLOW_USE_SYCL
     // Special case: Aligned, so we can share the underlying buffer.
     //
     // Apply this optimization conservatively: if input is aligned,
@@ -88,7 +83,6 @@
       }
       return;
     }
-#endif  // TENSORFLOW_USE_SYCL
 
     Eigen::DenseIndex before_dim = 1;
     for (int i = 0; i < axis; ++i) {
@@ -167,28 +161,5 @@
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL(type)                                         \
-  REGISTER_KERNEL_BUILDER(                                          \
-      Name("Unpack").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
-      UnpackOp<SYCLDevice, type>)
-
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL);
-
-REGISTER_KERNEL_BUILDER(Name("Unpack")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("value")
-                            .HostMemory("output")
-                            .TypeConstraint<int32>("T"),
-                        UnpackOp<CPUDevice, int32>);
-
-REGISTER_KERNEL_BUILDER(Name("Unpack")
-                            .Device(DEVICE_SYCL)
-                            .HostMemory("value")
-                            .HostMemory("output")
-                            .TypeConstraint<int64>("T"),
-                        UnpackOp<CPUDevice, int64>);
-#undef REGISTER_SYCL
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/variable_ops.cc b/tensorflow/core/kernels/variable_ops.cc
index ccd33e8..259c8f6 100644
--- a/tensorflow/core/kernels/variable_ops.cc
+++ b/tensorflow/core/kernels/variable_ops.cc
@@ -200,31 +200,6 @@
 REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized").Device(DEVICE_CPU),
                         IsVariableInitializedOp);
 
-#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNEL(type)                                          \
-  REGISTER_KERNEL_BUILDER(                                                  \
-      Name("Variable").Device(DEVICE_SYCL).TypeConstraint<type>("dtype"),   \
-      VariableOp);                                                          \
-  REGISTER_KERNEL_BUILDER(                                                  \
-      Name("VariableV2").Device(DEVICE_SYCL).TypeConstraint<type>("dtype"), \
-      VariableOp);                                                          \
-  REGISTER_KERNEL_BUILDER(Name("TemporaryVariable")                         \
-                              .Device(DEVICE_SYCL)                          \
-                              .TypeConstraint<type>("dtype"),               \
-                          TemporaryVariableOp);                             \
-  REGISTER_KERNEL_BUILDER(Name("DestroyTemporaryVariable")                  \
-                              .Device(DEVICE_SYCL)                          \
-                              .TypeConstraint<type>("T"),                   \
-                          DestroyTemporaryVariableOp);                      \
-  REGISTER_KERNEL_BUILDER(Name("IsVariableInitialized")                     \
-                              .Device(DEVICE_SYCL)                          \
-                              .TypeConstraint<type>("dtype")                \
-                              .HostMemory("is_initialized"),                \
-                          IsVariableInitializedOp);
-
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNEL);
-#undef REGISTER_SYCL_KERNEL
-#endif  // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 // Only register 'Variable' on GPU for the subset of types also supported by
diff --git a/tensorflow/core/kernels/xent_op.cc b/tensorflow/core/kernels/xent_op.cc
index 8a7c163..0e82627 100644
--- a/tensorflow/core/kernels/xent_op.cc
+++ b/tensorflow/core/kernels/xent_op.cc
@@ -30,9 +30,6 @@
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
-#ifdef TENSORFLOW_USE_SYCL
-typedef Eigen::SyclDevice SYCLDevice;
-#endif  // TENSORFLOW_USE_SYCL
 
 template <typename Device, typename T>
 class SoftmaxXentWithLogitsOp : public OpKernel {
@@ -119,10 +116,6 @@
 template <typename T>
 struct XentFunctor<CPUDevice, T> : XentFunctorBase<CPUDevice, T> {};
 
-#ifdef TENSORFLOW_USE_SYCL
-template <typename T>
-struct XentFunctor<SYCLDevice, T> : XentFunctorBase<SYCLDevice, T> {};
-#endif  // TENSORFLOW_USE_SYCL
 }  // namespace functor
 
 #define REGISTER_CPU(T)                                         \
@@ -150,11 +143,5 @@
                         SoftmaxXentWithLogitsOp<GPUDevice, double>);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(Name("SoftmaxCrossEntropyWithLogits")
-                            .Device(DEVICE_SYCL)
-                            .TypeConstraint<float>("T"),
-                        SoftmaxXentWithLogitsOp<SYCLDevice, float>);
-#endif  // TENSORFLOW_USE_SYCL
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/lib/random/random_distributions.h b/tensorflow/core/lib/random/random_distributions.h
index 4dc2c7f..79ca724 100644
--- a/tensorflow/core/lib/random/random_distributions.h
+++ b/tensorflow/core/lib/random/random_distributions.h
@@ -710,7 +710,7 @@
   }
   const float v1 = 2.0f * M_PI * Uint32ToFloat(x1);
   const float u2 = Eigen::numext::sqrt(-2.0f * Eigen::numext::log(u1));
-#if defined(TENSORFLOW_USE_SYCL) || !defined(__linux__)
+#if !defined(__linux__)
   *f0 = Eigen::numext::sin(v1);
   *f1 = Eigen::numext::cos(v1);
 #else
@@ -736,7 +736,7 @@
   }
   const double v1 = 2 * M_PI * Uint64ToDouble(x2, x3);
   const double u2 = Eigen::numext::sqrt(-2.0 * Eigen::numext::log(u1));
-#if defined(TENSORFLOW_USE_SYCL) || !defined(__linux__)
+#if !defined(__linux__)
   *d0 = Eigen::numext::sin(v1);
   *d1 = Eigen::numext::cos(v1);
 #else
diff --git a/tensorflow/core/ops/math_grad_test.cc b/tensorflow/core/ops/math_grad_test.cc
index ef839de..0bc0e35 100644
--- a/tensorflow/core/ops/math_grad_test.cc
+++ b/tensorflow/core/ops/math_grad_test.cc
@@ -434,9 +434,6 @@
   void Compute(OpKernelContext* ctx) override { ctx->set_output(0, Tensor()); }
 };
 REGISTER_KERNEL_BUILDER(Name("TestOpWithNoGrad").Device(DEVICE_CPU), TestOp);
-#ifdef TENSORFLOW_USE_SYCL
-REGISTER_KERNEL_BUILDER(Name("TestOpWithNoGrad").Device(DEVICE_SYCL), TestOp);
-#endif  // TENSORFLOW_USE_SYCL
 
 TEST_F(MathGradTest, Error_Reporting) {
   auto x = test::AsTensor<float>({-3.f});
@@ -893,8 +890,6 @@
   }
 }
 
-// TODO{lukeiwanski}: Implement Complex Pow for SYCL
-#ifndef TENSORFLOW_USE_SYCL
 TEST_F(MathGradTest, ComplexPow) {
   auto x = test::AsTensor<complex64>({0.f, 2.f, -2.f}, TensorShape({3}));
   auto y = test::AsTensor<complex64>({2.f, 2.f, 2.f}, TensorShape({3}));
@@ -941,7 +936,6 @@
                                 TensorShape({3})),
       4.5e-6f);
 }
-#endif  // TENSORFLOW_USE_SYCL
 
 TEST_F(MathGradTest, Xlogy) {
   auto x = test::AsTensor<float>({0.f, 0.f, 2.f, 3.f, 4.f, 5.f},
@@ -1185,8 +1179,6 @@
   test::ExpectClose(dy, MatMul(dz, true, x, true));
 }
 
-// TODO{lukeiwanski}: Implement BatchMatMul for SYCL
-#ifndef TENSORFLOW_USE_SYCL
 TEST_F(MathGradTest, BatchMatMul_00) {
   auto x = test::AsTensor<float>({1.f, 2.f, 3.f, 4.f, 5.f, 6.f},
                                  TensorShape({1, 2, 3}));
@@ -1234,7 +1226,6 @@
   test::ExpectClose(dx, BatchMatMul(y, true, dz, true));
   test::ExpectClose(dy, BatchMatMul(dz, true, x, true));
 }
-#endif  // TENSORFLOW_USE_SYCL
 
 TEST_F(MathGradTest, BatchMatMulV2_00) {
   auto x = test::AsTensor<float>({1.f, 2.f, 3.f, 4.f, 5.f, 6.f},
diff --git a/tensorflow/core/platform/build_config_root.bzl b/tensorflow/core/platform/build_config_root.bzl
index c5626ca..b82e104 100644
--- a/tensorflow/core/platform/build_config_root.bzl
+++ b/tensorflow/core/platform/build_config_root.bzl
@@ -14,7 +14,6 @@
     _tf_cuda_tests_tags = "tf_cuda_tests_tags",
     _tf_exec_properties = "tf_exec_properties",
     _tf_gpu_tests_tags = "tf_gpu_tests_tags",
-    _tf_sycl_tests_tags = "tf_sycl_tests_tags",
 )
 
 if_dynamic_kernels = _if_dynamic_kernels
@@ -29,4 +28,3 @@
 tf_cuda_tests_tags = _tf_cuda_tests_tags
 tf_exec_properties = _tf_exec_properties
 tf_gpu_tests_tags = _tf_gpu_tests_tags
-tf_sycl_tests_tags = _tf_sycl_tests_tags
diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD
index 20f0e9e..2e2ef2c 100644
--- a/tensorflow/core/platform/default/build_config/BUILD
+++ b/tensorflow/core/platform/default/build_config/BUILD
@@ -3,8 +3,6 @@
 
 load("//tensorflow:tensorflow.bzl", "tf_copts", "tf_cuda_library")
 load("//tensorflow/core/platform:build_config_root.bzl", "if_static")
-load("@local_config_sycl//sycl:platform.bzl", "sycl_library_path")
-load("@local_config_sycl//sycl:build_defs.bzl", "if_ccpp")
 
 package(default_visibility = ["//tensorflow:internal"])
 
@@ -219,17 +217,3 @@
     }),
     deps = [],
 )
-
-cc_library(
-    name = "sycl",
-    data = if_ccpp([
-        "@local_config_sycl//sycl:{}".format(sycl_library_path("ComputeCpp")),
-    ]),
-    linkopts = if_ccpp([
-        "-Wl,-rpath,../local_config_sycl/sycl/lib",
-    ]),
-    deps = if_ccpp(
-        ["@local_config_sycl//sycl:syclrt"],
-        ["@local_config_sycl//sycl:sycl_headers"],
-    ),
-)
diff --git a/tensorflow/core/platform/default/build_config_root.bzl b/tensorflow/core/platform/default/build_config_root.bzl
index 3afe1de..6012b4d 100644
--- a/tensorflow/core/platform/default/build_config_root.bzl
+++ b/tensorflow/core/platform/default/build_config_root.bzl
@@ -18,9 +18,6 @@
 def tf_cuda_tests_tags():
     return tf_gpu_tests_tags()
 
-def tf_sycl_tests_tags():
-    return ["requires-gpu", "gpu"] + gpu_test_tags()
-
 def tf_exec_properties(kwargs):
     if ("tags" in kwargs and kwargs["tags"] != None and
         "remote-gpu" in kwargs["tags"]):
diff --git a/tensorflow/core/profiler/internal/tfprof_node.cc b/tensorflow/core/profiler/internal/tfprof_node.cc
index 8bcec0c..6b1d0ee 100644
--- a/tensorflow/core/profiler/internal/tfprof_node.cc
+++ b/tensorflow/core/profiler/internal/tfprof_node.cc
@@ -23,8 +23,7 @@
   return device.find("stream:all") != device.npos;
 }
 bool CountAsCPUTime(const string& device) {
-  return RE2::FullMatch(device,
-                        ".*/(device:gpu|gpu|device:cpu|cpu|device:sycl):\\d+");
+  return RE2::FullMatch(device, ".*/(device:gpu|gpu|device:cpu|cpu):\\d+");
 }
 bool IsCanonicalDevice(const string& device) { return CountAsCPUTime(device); }
 
@@ -210,11 +209,7 @@
     } else {
       node_.set_canonical_device(dev);
       // TODO(xpan): Support things other than gpu?
-      if (dev.find("sycl") != dev.npos) {
-        node_.set_host_device(StringReplace(dev, "device:sycl:\\d+", "cpu:0"));
-      } else {
-        node_.set_host_device(StringReplace(dev, "gpu:\\d+", "cpu:0"));
-      }
+      node_.set_host_device(StringReplace(dev, "gpu:\\d+", "cpu:0"));
       AddOpType(node_.canonical_device());
     }
   }
@@ -288,8 +283,7 @@
 }
 
 bool IsPlacedOnAccelerator(const string& device) {
-  return device.find("gpu") != device.npos ||
-         device.find("sycl") != device.npos;
+  return device.find("gpu") != device.npos;
 }
 bool IsPlacedOnCPU(const string& device) {
   return device.find("cpu") != device.npos;
diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files
index 212a417..2aad135 100644
--- a/tensorflow/opensource_only.files
+++ b/tensorflow/opensource_only.files
@@ -163,7 +163,6 @@
 tensorflow/third_party/six.BUILD
 tensorflow/third_party/snappy.BUILD
 tensorflow/third_party/sqlite.BUILD
-tensorflow/third_party/sycl/crosstool/BUILD
 tensorflow/third_party/systemlibs/BUILD
 tensorflow/third_party/systemlibs/BUILD.tpl
 tensorflow/third_party/systemlibs/absl_py.BUILD
diff --git a/tensorflow/python/client/device_lib_test.py b/tensorflow/python/client/device_lib_test.py
index fec41f5..431cafa 100644
--- a/tensorflow/python/client/device_lib_test.py
+++ b/tensorflow/python/client/device_lib_test.py
@@ -39,8 +39,7 @@
     # GPU test
     if test.is_gpu_available():
       self.assertGreater(len(devices), 1)
-      self.assertTrue("GPU" in [d.device_type for d in devices] or
-                      "SYCL" in [d.device_type for d in devices])
+      self.assertIn("GPU", [d.device_type for d in devices])
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index bbe2899..1c5ed18 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -133,7 +133,7 @@
 def gpu_device_name():
   """Returns the name of a GPU device if available or the empty string."""
   for x in device_lib.list_local_devices():
-    if x.device_type == "GPU" or x.device_type == "SYCL":
+    if x.device_type == "GPU":
       return compat.as_str(x.name)
   return ""
 
@@ -1563,6 +1563,10 @@
   Returns:
     True if a GPU device of the requested kind is available.
   """
+
+  # This was needed earlier when we had support for SYCL in TensorFlow.
+  del cuda_only
+
   try:
     for local_device in device_lib.list_local_devices():
       if local_device.device_type == "GPU":
@@ -1570,8 +1574,6 @@
         cc = gpu_info.compute_capability or (0, 0)
         if not min_cuda_compute_capability or cc >= min_cuda_compute_capability:
           return True
-      if local_device.device_type == "SYCL" and not cuda_only:
-        return True
     return False
   except errors_impl.NotFoundError as e:
     if not all(x in str(e) for x in ["CUDA", "not find"]):
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index f731b74..2c9de17 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -1,6 +1,6 @@
 # Tests of TensorFlow kernels written using the Python API.
 
-load("//tensorflow:tensorflow.bzl", "sycl_py_test", "tf_custom_op_library")
+load("//tensorflow:tensorflow.bzl", "tf_custom_op_library")
 load("//tensorflow:tensorflow.bzl", "cuda_py_test")
 
 # buildifier: disable=same-origin-load
@@ -3658,20 +3658,6 @@
     ],
 )
 
-sycl_py_test(
-    name = "basic_gpu_test",
-    size = "small",
-    srcs = ["basic_gpu_test.py"],
-    deps = [
-        "//tensorflow/python:array_ops_gen",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:math_ops_gen",
-        "//third_party/py/numpy",
-    ],
-)
-
 tf_py_test(
     name = "sets_test",
     size = "medium",
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 3fbeb02..132ba3b 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -9,7 +9,6 @@
     "tf_additional_xla_deps_py",
     "tf_exec_properties",
     "tf_gpu_tests_tags",
-    "tf_sycl_tests_tags",
 )
 load(
     "//tensorflow/core/platform:rules_cc.bzl",
@@ -2355,44 +2354,6 @@
     label_regex_map = {"deps": "deps:{extension_name}"},
 )
 
-def sycl_py_test(
-        name,
-        srcs,
-        size = "medium",
-        data = [],
-        main = None,
-        args = [],
-        shard_count = 1,
-        kernels = [],
-        tags = [],
-        flaky = 0,
-        xla_enabled = False,
-        grpc_enabled = False,
-        **kwargs):
-    test_tags = tags + tf_sycl_tests_tags()
-    if "additional_deps" in kwargs:
-        fail("Use `deps` to specify dependencies. `additional_deps` has been replaced with the standard pattern of `deps`.")
-    tf_py_test(
-        name = name,
-        size = size,
-        srcs = srcs,
-        args = args,
-        data = data,
-        flaky = flaky,
-        grpc_enabled = grpc_enabled,
-        kernels = kernels,
-        main = main,
-        shard_count = shard_count,
-        tags = test_tags,
-        xla_enabled = xla_enabled,
-        **kwargs
-    )
-
-register_extension_info(
-    extension_name = "sycl_py_test",
-    label_regex_map = {"deps": "deps:{extension_name}"},
-)
-
 def py_tests(
         name,
         srcs,
diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh
index fc8fad8..7b2ba29 100755
--- a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh
+++ b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh
@@ -60,6 +60,5 @@
   -e "TF_NEED_TENSORRT=${TF_NEED_CUDA}" \
   -e "TF_CUDA_COMPUTE_CAPABILITIES=${TF_CUDA_COMPUTE_CAPABILITIES}" \
   -e "TF_NEED_ROCM=${TF_NEED_ROCM}" \
-  -e "TF_NEED_OPENCL_SYCL=0" \
   "${DOCKER_IMAGE}" \
   "/workspace/tensorflow/tools/ci_build/linux/libtensorflow.sh"
diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh
index 06798ad..69f0152 100755
--- a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh
+++ b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh
@@ -27,7 +27,6 @@
 export TF_NEED_HDFS=0
 export TF_NEED_CUDA=0
 export TF_NEED_ROCM=0
-export TF_NEED_OPENCL_SYCL=0
 export TF_NEED_MKL=0
 export COMPUTECPP_PATH="/usr/local"
 
diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh
index 95f1992..73920e9 100755
--- a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh
+++ b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh
@@ -28,7 +28,6 @@
 export PYTHON_BIN_PATH="/usr/bin/python"
 export TF_NEED_HDFS=0
 export TF_NEED_ROCM=0
-export TF_NEED_OPENCL_SYCL=0
 export TF_NEED_MKL=0
 export COMPUTECPP_PATH="/usr/local"
 
diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh b/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh
index aeabc0e..4f3b67f 100755
--- a/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh
+++ b/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh
@@ -28,7 +28,6 @@
 export TF_NEED_GCP=0
 export TF_NEED_HDFS=0
 export TF_NEED_CUDA=0
-export TF_NEED_OPENCL_SYCL=0
 export TF_NEED_MKL=0
 export COMPUTECPP_PATH="/usr/local"
 
diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD
index b336ff2..6d5ec6a 100644
--- a/tensorflow/tools/lib_package/BUILD
+++ b/tensorflow/tools/lib_package/BUILD
@@ -157,7 +157,6 @@
         "@llvm-project//llvm:LICENSE.TXT",
         "@llvm-project//mlir:LICENSE.TXT",
         "@lmdb//:LICENSE",
-        "@local_config_sycl//sycl:LICENSE.text",
         "@local_config_tensorrt//:LICENSE",
         "@nasm//:LICENSE",
         "@nsync//:LICENSE",
@@ -238,7 +237,6 @@
         "@llvm-project//llvm:LICENSE.TXT",
         "@llvm-project//mlir:LICENSE.TXT",
         "@lmdb//:LICENSE",
-        "@local_config_sycl//sycl:LICENSE.text",
         "@local_config_tensorrt//:LICENSE",
         "@nasm//:LICENSE",
         "@nsync//:LICENSE",
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index b47924a..01bdde7 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -202,7 +202,6 @@
         "@llvm-project//llvm:LICENSE.TXT",
         "@llvm-project//mlir:LICENSE.TXT",
         "@lmdb//:LICENSE",
-        "@local_config_sycl//sycl:LICENSE.text",
         "@local_config_tensorrt//:LICENSE",
         "@nasm//:LICENSE",
         "@nsync//:LICENSE",
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 232ff64..bbc5d5c 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -7,7 +7,6 @@
 load("//third_party/mkl:build_defs.bzl", "mkl_repository")
 load("//third_party/git:git_configure.bzl", "git_configure")
 load("//third_party/py:python_configure.bzl", "python_configure")
-load("//third_party/sycl:sycl_configure.bzl", "sycl_configure")
 load("//third_party/systemlibs:syslibs_configure.bzl", "syslibs_configure")
 load("//third_party/toolchains/remote:configure.bzl", "remote_execution_configure")
 load("//third_party/toolchains/clang6:repo.bzl", "clang6_configure")
@@ -99,7 +98,6 @@
     tensorrt_configure(name = "local_config_tensorrt")
     nccl_configure(name = "local_config_nccl")
     git_configure(name = "local_config_git")
-    sycl_configure(name = "local_config_sycl")
     syslibs_configure(name = "local_config_syslibs")
     python_configure(name = "local_config_python")
     rocm_configure(name = "local_config_rocm")
diff --git a/third_party/eigen3/BUILD b/third_party/eigen3/BUILD
index 595321f..bbe74cf 100644
--- a/third_party/eigen3/BUILD
+++ b/third_party/eigen3/BUILD
@@ -2,6 +2,8 @@
 #   Eigen is a C++ template library for linear algebra: vectors,
 #   matrices, and related algorithms.
 
+load("//third_party/mkl:build_defs.bzl", "if_mkl")
+
 licenses([
     # Note: Eigen is an MPL2 library that includes GPL v3 and LGPL v2.1+ code.
     #       We've taken special care to not reference any restricted code.
@@ -11,8 +13,6 @@
 
 exports_files(["LICENSE"])
 
-load("//third_party/mkl:build_defs.bzl", "if_mkl")
-
 EIGEN3_THIRD_PARTY_HEADERS = [
     "Eigen/Core",
     "Eigen/LU",
@@ -37,7 +37,6 @@
     visibility = ["//visibility:public"],
     deps = [
         "@eigen_archive//:eigen",
-        "@local_config_sycl//sycl",
     ],
 )
 
diff --git a/third_party/sycl/BUILD b/third_party/sycl/BUILD
deleted file mode 100644
index 2b86f73..0000000
--- a/third_party/sycl/BUILD
+++ /dev/null
@@ -1,4 +0,0 @@
-package(
-    default_visibility = ["//visibility:public"],
-    licenses = ["notice"],  # Apache 2.0
-)
diff --git a/third_party/sycl/crosstool/BUILD b/third_party/sycl/crosstool/BUILD
deleted file mode 100644
index e69de29..0000000
--- a/third_party/sycl/crosstool/BUILD
+++ /dev/null
diff --git a/third_party/sycl/crosstool/BUILD.tpl b/third_party/sycl/crosstool/BUILD.tpl
deleted file mode 100755
index 7274433..0000000
--- a/third_party/sycl/crosstool/BUILD.tpl
+++ /dev/null
@@ -1,27 +0,0 @@
-licenses(["notice"])  # Apache 2.0
-
-package(default_visibility = ["//visibility:public"])
-
-cc_toolchain_suite(
-    name = "toolchain",
-    toolchains = {
-        "local|compiler": ":cc-compiler-local",
-    },
-)
-
-cc_toolchain(
-    name = "cc-compiler-local",
-    all_files = ":empty",
-    compiler_files = ":empty",
-    cpu = "local",
-    dwp_files = ":empty",
-    linker_files = ":empty",
-    objcopy_files = ":empty",
-    strip_files = ":empty",
-    supports_param_files = 1,
-)
-
-filegroup(
-    name = "empty",
-    srcs = [],
-)
diff --git a/third_party/sycl/crosstool/CROSSTOOL.tpl b/third_party/sycl/crosstool/CROSSTOOL.tpl
deleted file mode 100755
index f8e50ef..0000000
--- a/third_party/sycl/crosstool/CROSSTOOL.tpl
+++ /dev/null
@@ -1,217 +0,0 @@
-major_version: "local"
-minor_version: ""
-default_target_cpu: "same_as_host"
-
-default_toolchain {
-  cpu: "k8"
-  toolchain_identifier: "local_linux"
-}
-
-default_toolchain {
-  cpu: "arm"
-  toolchain_identifier: "local_arm"
-}
-
-toolchain {
-  abi_version: "local"
-  abi_libc_version: "local"
-  builtin_sysroot: ""
-  compiler: "compiler"
-  host_system_name: "local"
-  needsPic: true
-  supports_gold_linker: false
-  supports_incremental_linker: false
-  supports_fission: false
-  supports_interface_shared_objects: false
-  supports_normalizing_ar: false
-  supports_start_end_lib: false
-  supports_thin_archives: false
-  target_libc: "local"
-  target_cpu: "local"
-  target_system_name: "local"
-  toolchain_identifier: "local_linux"
-
-  tool_path { name: "ar" path: "/usr/bin/ar" }
-  tool_path { name: "compat-ld" path: "/usr/bin/ld" }
-  tool_path { name: "cpp" path: "/usr/bin/cpp" }
-  tool_path { name: "dwp" path: "/usr/bin/dwp" }
-  tool_path { name: "gcc" path: "%{sycl_impl}" }
-  # Use "-std=c++11" for nvcc. For consistency, force both the host compiler
-  # and the device compiler to use "-std=c++11".
-  cxx_flag: "%{c++_std}"
-  linker_flag: "-Wl,-no-as-needed"
-  linker_flag: "-lstdc++"
-  linker_flag: "-B/usr/bin/"
-
-  # TODO(bazel-team): In theory, the path here ought to exactly match the path
-  # used by gcc. That works because bazel currently doesn't track files at
-  # absolute locations and has no remote execution, yet. However, this will need
-  # to be fixed, maybe with auto-detection?
-  cxx_builtin_include_directory: "/usr/lib/gcc/"
-  cxx_builtin_include_directory: "/usr/lib"
-  cxx_builtin_include_directory: "/usr/lib64"
-  cxx_builtin_include_directory: "/usr/local/include"
-  cxx_builtin_include_directory: "/usr/include"
-
-  cxx_builtin_include_directory: "%{sycl_include_dir}"
-  cxx_builtin_include_directory: "%{python_lib_path}"
-
-  tool_path { name: "gcov" path: "/usr/bin/gcov" }
-
-  # C(++) compiles invoke the compiler (as that is the one knowing where
-  # to find libraries), but we provide LD so other rules can invoke the linker.
-  tool_path { name: "ld" path: "/usr/bin/ld" }
-
-  tool_path { name: "nm" path: "/usr/bin/nm" }
-  tool_path { name: "objcopy" path: "/usr/bin/objcopy" }
-  objcopy_embed_flag: "-I"
-  objcopy_embed_flag: "binary"
-  tool_path { name: "objdump" path: "/usr/bin/objdump" }
-  tool_path { name: "strip" path: "/usr/bin/strip" }
-
-  # Make C++ compilation deterministic. Use linkstamping instead of these
-  # compiler symbols.
-  unfiltered_cxx_flag: "-Wno-builtin-macro-redefined"
-  unfiltered_cxx_flag: "-D__DATE__=\"redacted\""
-  unfiltered_cxx_flag: "-D__TIMESTAMP__=\"redacted\""
-  unfiltered_cxx_flag: "-D__TIME__=\"redacted\""
-
-  compiler_flag: "-fPIE"
-
-  # Keep stack frames for debugging, even in opt mode.
-  compiler_flag: "-fno-omit-frame-pointer"
-
-  # Anticipated future default.
-  linker_flag: "-no-canonical-prefixes"
-  unfiltered_cxx_flag: "-fno-canonical-system-headers"
-
-  # Have gcc return the exit code from ld.
-  linker_flag: "-pass-exit-codes"
-
-  # All warnings are enabled. Maybe enable -Werror as well?
-  compiler_flag: "-Wall"
-
-  # Enable SSE instructions by default
-  compiler_flag: "-msse3"
-
-  # Anticipated future default.
-  linker_flag: "-Wl,-no-as-needed"
-  # Stamp the binary with a unique identifier.
-  linker_flag: "-Wl,--build-id=md5"
-  linker_flag: "-Wl,--hash-style=gnu"
-
-  linking_mode_flags { mode: DYNAMIC }
-
-  compilation_mode_flags {
-    mode: FASTBUILD
-    compiler_flag: "-O0"
-  }
-
-  compilation_mode_flags {
-    mode: DBG
-    compiler_flag: "-g"
-  }
-
-  compilation_mode_flags {
-    mode: OPT
-    compiler_flag: "-g0"
-    compiler_flag: "-O2"
-    compiler_flag: "-DNDEBUG"
-    compiler_flag: "-ffunction-sections"
-    compiler_flag: "-fdata-sections"
-    linker_flag: "-Wl,--gc-sections"
-  }
-}
-
-toolchain {
-  abi_version: "local"
-  abi_libc_version: "local"
-  builtin_sysroot: ""
-  compiler: "compiler"
-  host_system_name: "local"
-  needsPic: true
-  supports_gold_linker: false
-  supports_incremental_linker: false
-  supports_fission: false
-  supports_interface_shared_objects: false
-  supports_normalizing_ar: false
-  supports_start_end_lib: false
-  supports_thin_archives: false
-  target_libc: "local"
-  target_cpu: "local"
-  target_system_name: "local"
-  toolchain_identifier: "local_arm"
-
-  tool_path { name: "ar" path: "/usr/bin/ar" }
-  tool_path { name: "compat-ld" path: "/usr/bin/ld" }
-  tool_path { name: "cpp" path: "/usr/bin/cpp" }
-  tool_path { name: "dwp" path: "/usr/bin/dwp" }
-  tool_path { name: "gcc" path: "computecpp" }
-  # Use "-std=c++11" for nvcc. For consistency, force both the host compiler
-  # and the device compiler to use "-std=c++11".
-  cxx_flag: "-std=c++11"
-  linker_flag: "-Wl,-no-as-needed"
-  linker_flag: "-lstdc++"
-  linker_flag: "-B/usr/bin/"
-
-  # TODO(bazel-team): In theory, the path here ought to exactly match the path
-  # used by gcc. That works because bazel currently doesn't track files at
-  # absolute locations and has no remote execution, yet. However, this will need
-  # to be fixed, maybe with auto-detection?
-  cxx_builtin_include_directory: "/usr/lib/gcc/"
-  cxx_builtin_include_directory: "/usr/lib"
-  cxx_builtin_include_directory: "/usr/lib64"
-  cxx_builtin_include_directory: "/usr/local/include"
-  cxx_builtin_include_directory: "/usr/include"
-
-  cxx_builtin_include_directory: "%{computecpp_toolkit_path}"
-  cxx_builtin_include_directory: "%{python_lib_path}"
-
-  tool_path { name: "gcov" path: "/usr/bin/gcov" }
-
-  # C(++) compiles invoke the compiler (as that is the one knowing where
-  # to find libraries), but we provide LD so other rules can invoke the linker.
-  tool_path { name: "ld" path: "/usr/bin/ld" }
-
-  tool_path { name: "nm" path: "/usr/bin/nm" }
-  tool_path { name: "objcopy" path: "/usr/bin/objcopy" }
-  objcopy_embed_flag: "-I"
-  objcopy_embed_flag: "binary"
-  tool_path { name: "objdump" path: "/usr/bin/objdump" }
-  tool_path { name: "strip" path: "/usr/bin/strip" }
-
-  # Make C++ compilation deterministic. Use linkstamping instead of these
-  # compiler symbols.
-  unfiltered_cxx_flag: "-Wno-builtin-macro-redefined"
-  unfiltered_cxx_flag: "-D__DATE__=\"redacted\""
-  unfiltered_cxx_flag: "-D__TIMESTAMP__=\"redacted\""
-  unfiltered_cxx_flag: "-D__TIME__=\"redacted\""
-
-  # All warnings are enabled. Maybe enable -Werror as well?
-  compiler_flag: "-Wall"
-
-  # Anticipated future default.
-  linker_flag: "-Wl,-no-as-needed"
-  # Stamp the binary with a unique identifier.
-  linker_flag: "-Wl,--build-id=md5"
-  linker_flag: "-Wl,--hash-style=gnu"
-
-  linking_mode_flags { mode: DYNAMIC }
-
-  compilation_mode_flags {
-    mode: FASTBUILD
-    compiler_flag: "-O0"
-  }
-
-  compilation_mode_flags {
-    mode: DBG
-    compiler_flag: "-g"
-  }
-
-  compilation_mode_flags {
-    mode: OPT
-    compiler_flag: "-g0"
-    compiler_flag: "-O2"
-    compiler_flag: "-DNDEBUG"
-  }
-}
\ No newline at end of file
diff --git a/third_party/sycl/crosstool/computecpp.tpl b/third_party/sycl/crosstool/computecpp.tpl
deleted file mode 100755
index ac27e81..0000000
--- a/third_party/sycl/crosstool/computecpp.tpl
+++ /dev/null
@@ -1,94 +0,0 @@
-#!/usr/bin/env python
-
-import os
-import sys
-import tempfile
-from subprocess import call, Popen, PIPE
-
-CPU_CXX_COMPILER = ('%{host_cxx_compiler}')
-CPU_C_COMPILER = ('%{host_c_compiler}')
-
-CURRENT_DIR = os.path.dirname(sys.argv[0])
-COMPUTECPP_ROOT = CURRENT_DIR + '/../sycl/'
-COMPUTECPP_DRIVER= COMPUTECPP_ROOT + 'bin/compute++'
-COMPUTECPP_INCLUDE = COMPUTECPP_ROOT + 'include'
-
-def main():
-  remove_flags = ('-Wl,--no-undefined', '-Wno-unused-but-set-variable', '-Wignored-attributes')
-  # remove -fsanitize-coverage from string with g++
-  if 'g++' in CPU_CXX_COMPILER:
-    remove_flags += ('-fsanitize-coverage',)
-  compiler_flags = [flag for flag in sys.argv[1:] if not flag.startswith(remove_flags)]
-
-  output_file_index = compiler_flags.index('-o') + 1
-  output_file_name = compiler_flags[output_file_index]
-
-  if output_file_index == 1:
-    # we are linking
-    return call([CPU_CXX_COMPILER] + compiler_flags + ['-Wl,--no-undefined'])
-
-  # find what we compile
-  compiling_cpp = False
-  if '-c' in compiler_flags:
-    compiled_file_index = compiler_flags.index('-c') + 1
-    compiled_file_name = compiler_flags[compiled_file_index]
-    compiling_cpp = compiled_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP', '.C', '.cxx'))
-
-  # add -D_GLIBCXX_USE_CXX11_ABI=0 to the command line if you have custom installation of GCC/Clang
-  compiler_flags = compiler_flags + ['-DEIGEN_USE_SYCL=1', '-DTENSORFLOW_USE_SYCL', '-DEIGEN_HAS_C99_MATH']
-
-  if not compiling_cpp:
-    # compile for C
-    return call([CPU_C_COMPILER] + compiler_flags)
-
-  # create a denylist of folders that will be skipped when compiling with ComputeCpp
-  skip_extensions = [".cu.cc"]
-  skip_folders = ["tensorflow/compiler", "tensorflow/docs_src", "third_party", "external", "hexagon"]
-  skip_folders = [(folder + '/') for folder in skip_folders]
-  # if compiling external project skip computecpp
-  if any(compiled_file_name.endswith(_ext) for _ext in skip_extensions) or any(_folder in output_file_name for _folder in skip_folders):
-    return call([CPU_CXX_COMPILER] + compiler_flags)
-
-  # this is an optimisation that will check if compiled file has to be compiled with ComputeCpp
-  flags_without_output = list(compiler_flags)
-  del flags_without_output[output_file_index]   # remove output_file_name
-  del flags_without_output[output_file_index - 1] # remove '-o'
-  # create preprocessed of the file and store it for later use
-  pipe = Popen([CPU_CXX_COMPILER] + flags_without_output + ["-E"], stdout=PIPE)
-  preprocessed_file_str = pipe.communicate()[0]
-  if pipe.returncode != 0:
-    return pipe.returncode
-
-  # check if it has parallel_for in it
-  if not '.parallel_for' in preprocessed_file_str:
-    # call CXX compiler like usual
-    with tempfile.NamedTemporaryFile(suffix=".ii") as preprocessed_file: # Force '.ii' extension so that g++ does not preprocess the file again
-      preprocessed_file.write(preprocessed_file_str)
-      preprocessed_file.flush()
-      compiler_flags[compiled_file_index] = preprocessed_file.name
-      return call([CPU_CXX_COMPILER] + compiler_flags)
-  del preprocessed_file_str   # save some memory as this string can be quite big
-
-  filename, file_extension = os.path.splitext(output_file_name)
-  bc_out = filename + '.sycl'
-
-  # strip asan for the device
-  computecpp_device_compiler_flags = ['-sycl-compress-name', '-Wno-unused-variable', '-Wno-c++11-narrowing',
-                                      '-I', COMPUTECPP_INCLUDE, '-isystem', COMPUTECPP_INCLUDE,
-                                      '-std=c++11', '-sycl', '-emit-llvm', '-no-serial-memop',
-                                      '-Xclang', '-cl-denorms-are-zero', '-Xclang', '-cl-fp32-correctly-rounded-divide-sqrt']
-  # disable flags enabling SIMD instructions
-  computecpp_device_compiler_flags += [flag for flag in compiler_flags if \
-    not any(x in flag.lower() for x in ('-fsanitize', '-fno-canonical-system-headers', '=native', '=core2', 'msse', 'vectorize', 'mavx', 'mmmx', 'm3dnow', 'fma'))]
-
-  x = call([COMPUTECPP_DRIVER] + computecpp_device_compiler_flags)
-  if x == 0:
-    # dont want that in case of compiling with computecpp first
-    host_compiler_flags = [flag for flag in compiler_flags if (not flag.startswith(('-MF', '-MD',)) and not '.d' in flag)]
-    host_compiler_flags[host_compiler_flags.index('-c')] = "--include"
-    host_compiler_flags = ['-xc++', '-Wno-unused-variable', '-I', COMPUTECPP_INCLUDE, '-c', bc_out] + host_compiler_flags
-    x = call([CPU_CXX_COMPILER] + host_compiler_flags)
-  return x
-
-if __name__ == '__main__':
-  sys.exit(main())
diff --git a/third_party/sycl/crosstool/trisycl.tpl b/third_party/sycl/crosstool/trisycl.tpl
deleted file mode 100644
index 8206a1a..0000000
--- a/third_party/sycl/crosstool/trisycl.tpl
+++ /dev/null
@@ -1,85 +0,0 @@
-#!/usr/bin/env python
-
-import os
-import sys
-import tempfile
-from subprocess import call
-
-CPU_CXX_COMPILER = ('%{host_cxx_compiler}')
-CPU_C_COMPILER = ('%{host_c_compiler}')
-
-CURRENT_DIR = os.path.dirname(sys.argv[0])
-TRISYCL_INCLUDE_DIR = CURRENT_DIR + '/../sycl/include'
-
-
-def main():
-  compiler_flags = []
-
-  remove_flags = ('-Wl,--no-undefined', '-Wno-unused-but-set-variable',
-                  '-Wignored-attributes', '-fno-exceptions')
-  # remove -fsamotoze-coverage from string with g++
-  if 'g++' in CPU_CXX_COMPILER:
-    remove_flags += ('-fsanitize-coverage',)
-    compiler_flags += ['-fopenmp']
-  else:
-    compiler_flags += ['-fopenmp=libomp']
-
-  compiler_flags += [
-      flag for flag in sys.argv[1:] if not flag.startswith(remove_flags)
-  ]
-
-  output_file_index = compiler_flags.index('-o') + 1
-  output_file_name = compiler_flags[output_file_index]
-
-  if (output_file_index == 1):
-    # we are linking
-    return call([CPU_CXX_COMPILER] + compiler_flags + ['-Wl,--no-undefined'])
-
-  # find what we compile
-  compiling_cpp = 0
-  if ('-c' in compiler_flags):
-    compiled_file_index = compiler_flags.index('-c') + 1
-    compiled_file_name = compiler_flags[compiled_file_index]
-    if (compiled_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP', '.C',
-                                     '.cxx'))):
-      compiling_cpp = 1
-
-  debug_flags = [
-      '-DTRISYCL_DEBUG', '-DBOOST_LOG_DYN_LINK', '-DTRISYCL_TRACE_KERNEL',
-      '-lpthread', '-lboost_log', '-g', '-rdynamic'
-  ]
-
-  opt_flags = ['-DNDEBUG', '-DBOOST_DISABLE_ASSERTS', '-O3']
-
-  compiler_flags = compiler_flags + [
-      '-DEIGEN_USE_SYCL=1', '-DEIGEN_HAS_C99_MATH',
-      '-DEIGEN_MAX_ALIGN_BYTES=16', '-DTENSORFLOW_USE_SYCL'
-  ] + opt_flags
-
-  if (compiling_cpp == 1):
-    # create a denylist of folders that will be skipped when compiling
-    # with triSYCL
-    skip_extensions = ['.cu.cc']
-    skip_folders = [
-        'tensorflow/compiler', 'tensorflow/docs_src', 'tensorflow/tensorboard',
-        'third_party', 'external', 'hexagon'
-    ]
-    skip_folders = [(folder + '/') for folder in skip_folders]
-    # if compiling external project skip triSYCL
-    if any(
-        compiled_file_name.endswith(_ext) for _ext in skip_extensions) or any(
-            _folder in output_file_name for _folder in skip_folders):
-      return call([CPU_CXX_COMPILER] + compiler_flags)
-
-    host_compiler_flags = [
-        '-xc++', '-Wno-unused-variable', '-I', TRISYCL_INCLUDE_DIR
-    ] + compiler_flags
-    x = call([CPU_CXX_COMPILER] + host_compiler_flags)
-    return x
-  else:
-    # compile for C
-    return call([CPU_C_COMPILER] + compiler_flags)
-
-
-if __name__ == '__main__':
-  sys.exit(main())
diff --git a/third_party/sycl/sycl/BUILD b/third_party/sycl/sycl/BUILD
deleted file mode 100644
index 65f5a84..0000000
--- a/third_party/sycl/sycl/BUILD
+++ /dev/null
@@ -1,8 +0,0 @@
-# Description:
-# A minimal BUILD file to make template files in this folder available. Without this BUILD file,
-# bazel returns errors when trying to access tpl files in this folder.
-
-package(
-    default_visibility = ["//visibility:public"],
-    licenses = ["notice"],  # Apache 2.0
-)
diff --git a/third_party/sycl/sycl/BUILD.tpl b/third_party/sycl/sycl/BUILD.tpl
deleted file mode 100755
index b7e9aa8..0000000
--- a/third_party/sycl/sycl/BUILD.tpl
+++ /dev/null
@@ -1,56 +0,0 @@
-licenses(["notice"])  # Apache 2.0
-
-load("@local_config_sycl//sycl:build_defs.bzl", "if_sycl")
-load(":platform.bzl", "sycl_library_path")
-
-load(":platform.bzl", "readlink_command")
-
-package(default_visibility = ["//visibility:public"])
-
-exports_files(["LICENSE.text"])
-
-config_setting(
-    name = "using_sycl_ccpp",
-    define_values = {
-        "using_sycl": "true",
-        "using_trisycl": "false",
-    },
-)
-
-config_setting(
-    name = "using_sycl_trisycl",
-    define_values = {
-        "using_sycl": "true",
-        "using_trisycl": "true",
-    },
-)
-
-
-cc_library(
-    name = "sycl_headers",
-    hdrs = glob([
-        "**/*.h",
-        "**/*.hpp",
-    ]),
-    includes = [".", "include"],
-)
-
-cc_library(
-    name = "syclrt",
-    srcs = [
-        sycl_library_path("ComputeCpp")
-    ],
-    data = [
-        sycl_library_path("ComputeCpp")
-    ],
-    includes = ["include/"],
-    linkstatic = 0,
-)
-
-cc_library(
-    name = "sycl",
-    deps = if_sycl([
-        ":sycl_headers",
-        ":syclrt",
-    ]),
-)
diff --git a/third_party/sycl/sycl/LICENSE.text b/third_party/sycl/sycl/LICENSE.text
deleted file mode 100644
index 8d3f050..0000000
--- a/third_party/sycl/sycl/LICENSE.text
+++ /dev/null
@@ -1,268 +0,0 @@
-
----------------------------------------------------------------------
-
-SOFTWARE LICENSE AGREEMENT
-
----------------------------------------------------------------------
----------------------------------------------------------------------
-
-By downloading, installing, copying, or otherwise using the
-ComputeCpp Community Edition software, including any associated
-components, media, printed materials, and electronic documentation
-("Software"), the user agrees to the following terms and conditions
-of this Software License Agreement ("Agreement"). Please read the
-terms of this Agreement carefully before beginning your download, as
-pressing the "I AGREE" button at the end of this Agreement will
-confirm your assent. If you do not agree to these terms, then
-Codeplay Software Limited is unwilling to license the Software to
-you; so please press the "CANCEL" button to cancel your download.
-
- 1. License. Codeplay Software Ltd., a company incorporated in
-    England and Wales with registered number 04567874 and having its
-    registered office at Regent House, 316 Beulah Hill, London,
-    United Kingdom, SE19 3HF ("Codeplay") hereby grants the user,
-    free of charge, a non-exclusive worldwide license to use and
-    replicate (but not modify) the Software for any use, whether
-    commercial or non-commercial, in accordance with this Agreement.
-    Codeplay reserves all rights to the Software that are not
-    expressly granted by this Agreement.
- 2. Redistribution. The user may copy and redistribute unmodified
-    copies of only those components of the Software which are
-    specified below ("Redistributable Components"), in object code
-    form, as part of the user’s software applications or libraries
-    ("Applications"). The user acknowledges and agrees that it has no
-    right to modify the Redistributable Components in any way. Any
-    use of the Redistributable Components within the user’s
-    Applications will continue to be subject to the terms and
-    conditions of this Agreement, and the user must also distribute a
-    copy of this Agreement and reproduce and include all notices of
-    copyrights or other proprietary rights in the Software. The
-    user’s redistribution of the Redistributable Components will not
-    entitle it to any payment from Codeplay. The user may not
-    transfer any of its rights or obligations under this Agreement.
-
-+-------------------------------------------+
-|Redistributable Component|File Name        |
-|-------------------------+-----------------|
-|Runtime (for Linux)      |libComputeCpp.so |
-|-------------------------+-----------------|
-|Runtime (for Windows)    |libComputeCpp.dll|
-+-------------------------------------------+
-
- 3. Restrictions. The user shall not:
-
-     a. circumvent or bypass any technological protection measures in
-        or relating to the Software;
-     b. use the Software to perform any unauthorized transfer of
-        information or for any illegal purpose;
-     c. de-compile, decrypt, disassemble, hack, emulate, exploit or
-        reverse-engineer the Software (other than to the limited
-        extent permitted by law);
-     d. copy or redistribute any components of the Software that are
-        not listed in the table of Redistributable Components;
-     e. publish, rent, lease, sell, export, import, or lend the
-        Software;
-     f. represent in any way that it is selling the Software itself
-        or any license to use the Software, nor refer to Codeplay or
-        ComputeCpp within its marketing materials, without the
-        express prior written permission of Codeplay.
- 4. Support. Codeplay does not provide any guarantees of support for
-    the Software to the user. Codeplay will use reasonable endeavors
-    to respond to users' support requests, for the most recent
-    release only, via the community support website at https://
-    computecpp.codeplay.com.
- 5. Intellectual Property. The Software is owned by Codeplay or its
-    licensors, and is protected by the copyright laws of the United
-    Kingdom and other countries and international treaty provisions.
-    Codeplay (and/or its licensors, as the case may be) retains all
-    copyrights, trade secrets and other proprietary rights in the
-    Software, including the rights to make and license the use of all
-    copies. To the extent that any patents owned by Codeplay or its
-    licensors relate to any component of the Software, the license
-    granted to the user in accordance with this Agreement allows for
-    the lawful use of such patents but only for the purposes of this
-    Agreement and not further or otherwise. Therefore, the user may
-    make no copies of the Software, or the written materials that
-    accompany the Software, or reproduce it in any way, except as set
-    forth above.
- 6. Terms. This Agreement is effective until terminated. Codeplay or
-    the user may terminate it immediately at any time. Any violation
-    of the terms of this Agreement by the user will result in
-    immediate termination by Codeplay. Upon termination, the user
-    must return or destroy the Software and accompanying materials
-    and notify Codeplay of its actions by email to info@codeplay.com.
- 7. NO WARRANTIES. Codeplay expressly disclaims any warranty for the
-    Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
-    ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
-    WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE
-    AND NON-INFRINGEMENT. IN NO EVENT SHALL CODEPLAY BE LIABLE FOR
-    ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
-    CONTRACT, DELICT OR TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-    SOFTWARE. In particular, Codeplay provides no guarantees of
-    application performance on the target hardware.
- 8. General. The invalidity of any portion or provision of this
-    Agreement shall not affect any other portions or provisions. This
-    Agreement shall be governed by the laws of Scotland. This
-    Agreement is the complete and exclusive agreement between the
-    user and Codeplay regarding the Software, and it supersedes any
-    prior agreement, oral or written, and any other communication
-    between the user and Codeplay relating to the subject matter of
-    the Agreement. Any amendment or modification of this Agreement
-    must be in writing and signed by both parties. If the user does
-    not agree to the terms of this Agreement, the user must not
-    install or use the Software.
- 9. Third Party Licenses. The following licenses are for third-party
-    components included in the software.
-
-     a. License for Clang/LLVM compiler technology components:
-
-==============================================================================
-
-LLVM Release License
-
-==============================================================================
-
-University of Illinois/NCSA
-
-Open Source License
-
-Copyright (c) 2007-2014 University of Illinois at Urbana-Champaign.
-
-All rights reserved.
-
-Developed by:
-
- LLVM Team
-
- University of Illinois at Urbana-Champaign
-
- http://llvm.org
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal with
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is furnished to do
-so, subject to the following conditions:
-
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimers.
-
- * Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimers in the
- documentation and/or other materials provided with the distribution.
-
- * Neither the names of the LLVM Team, University of Illinois at
- Urbana-Champaign, nor the names of its contributors may be used to
- endorse or promote products derived from this Software without specific
- prior written permission.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
-SOFTWARE.
-
-==============================================================================
-
- b. License for OpenBSD regex components:
-
-$OpenBSD: COPYRIGHT,v 1.3 2003/06/02 20:18:36 millert Exp $
-Copyright 1992, 1993, 1994 Henry Spencer. All rights reserved.
-This software is not subject to any license of the American Telephone
-and Telegraph Company or of the Regents of the University of California.
-Permission is granted to anyone to use this software for any purpose on
-any computer system, and to alter it and redistribute it, subject
-to the following restrictions:
-
-1. The author is not responsible for the consequences of use of this
- software, no matter how awful, even if they arise from flaws in it.
-
-2. The origin of this software must not be misrepresented, either by
- explicit claim or by omission. Since few users ever read sources,
- credits must appear in the documentation.
-
-3. Altered versions must be plainly marked as such, and must not be
- misrepresented as being the original software. Since few users
- ever read sources, credits must appear in the documentation.
-
-4. This notice may not be removed or altered.
-
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
-
-/*-
- * Copyright (c) 1994
- *      The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *                  @(#)COPYRIGHT8.1 (Berkeley) 3/16/94
- */
-
- c. License for MD5 components:
-
-/*
- * This code is derived from (original license follows):
- *
- * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
- * MD5 Message-Digest Algorithm (RFC 1321).
- *
- * Homepage:
- *  http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
- *
- * Author:
- * Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
- *
- * This software was written by Alexander Peslyak in 2001. No copyright is
- * claimed, and the software is hereby placed in the public domain.
- * In case this attempt to disclaim copyright and place the software in the
- * public domain is deemed null and void, then the software is
- * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
- * general public under the following terms:
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted.
- *
- * There's ABSOLUTELY NO WARRANTY, express or implied.
- *
- * (This is a heavily cut-down "BSD license".)
- *
- * This differs from Colin Plumb's older public domain implementation in that
- * no exactly 32-bit integer data type is required (any 32-bit or wider
- * unsigned integer data type will do), there's no compile-time endianness
- * configuration, and the function prototypes match OpenSSL's. No code from
- * Colin Plumb's implementation has been reused; this comment merely compares
- * the properties of the two independent implementations.
- *
- * The primary goals of this implementation are portability and ease of use.
- * It is meant to be fast, but not as fast as possible. Some known
- * optimizations are not included to reduce source code size and avoid
- * compile-time configuration.
- */
-
-
diff --git a/third_party/sycl/sycl/build_defs.bzl.tpl b/third_party/sycl/sycl/build_defs.bzl.tpl
deleted file mode 100755
index a726c8d..0000000
--- a/third_party/sycl/sycl/build_defs.bzl.tpl
+++ /dev/null
@@ -1,28 +0,0 @@
-# Macros for building SYCL code.
-
-def if_sycl(if_true, if_false = []):
-    """Shorthand for select()'ing on whether we're building with SYCL.
-
-    Returns a select statement which evaluates to if_true if we're building
-    with SYCL enabled.  Otherwise, the select statement evaluates to if_false.
-    If we are building with triSYCL instead of ComputeCPP, a list with
-    the first element of if_true is returned.
-    """
-    return select({
-        "@local_config_sycl//sycl:using_sycl_ccpp": if_true,
-        "@local_config_sycl//sycl:using_sycl_trisycl": if_true[0:1],
-        "//conditions:default": if_false,
-    })
-
-def if_ccpp(if_true, if_false = []):
-    """Shorthand for select()'ing if we are building with ComputeCPP.
-
-    Returns a select statement which evaluates to if_true if we're building
-    with ComputeCPP enabled. Otherwise, the select statement evaluates
-    to if_false.
-    """
-    return select({
-        "@local_config_sycl//sycl:using_sycl_ccpp": if_true,
-        "@local_config_sycl//sycl:using_sycl_trisycl": if_false,
-        "//conditions:default": if_false,
-    })
diff --git a/third_party/sycl/sycl/platform.bzl.tpl b/third_party/sycl/sycl/platform.bzl.tpl
deleted file mode 100755
index cb4b335..0000000
--- a/third_party/sycl/sycl/platform.bzl.tpl
+++ /dev/null
@@ -1,5 +0,0 @@
-def sycl_library_path(name):
-    return "lib/lib{}.so".format(name)
-
-def readlink_command():
-    return "readlink"
diff --git a/third_party/sycl/sycl_configure.bzl b/third_party/sycl/sycl_configure.bzl
deleted file mode 100644
index 185160a..0000000
--- a/third_party/sycl/sycl_configure.bzl
+++ /dev/null
@@ -1,260 +0,0 @@
-"""SYCL autoconfiguration.
-`sycl_configure` depends on the following environment variables:
-
-  * HOST_CXX_COMPILER:  The host C++ compiler
-  * HOST_C_COMPILER:    The host C compiler
-  * COMPUTECPP_TOOLKIT_PATH: The path to the ComputeCpp toolkit.
-  * TRISYCL_INCLUDE_DIR: The path to the include directory of triSYCL.
-                         (if using triSYCL instead of ComputeCPP)
-  * PYTHON_LIB_PATH: The path to the python lib
-"""
-
-_HOST_CXX_COMPILER = "HOST_CXX_COMPILER"
-_HOST_C_COMPILER = "HOST_C_COMPILER"
-_COMPUTECPP_TOOLKIT_PATH = "COMPUTECPP_TOOLKIT_PATH"
-_TRISYCL_INCLUDE_DIR = "TRISYCL_INCLUDE_DIR"
-_PYTHON_LIB_PATH = "PYTHON_LIB_PATH"
-
-def _enable_sycl(repository_ctx):
-    if "TF_NEED_OPENCL_SYCL" in repository_ctx.os.environ:
-        enable_sycl = repository_ctx.os.environ["TF_NEED_OPENCL_SYCL"].strip()
-        return enable_sycl == "1"
-    return False
-
-def _enable_compute_cpp(repository_ctx):
-    return _COMPUTECPP_TOOLKIT_PATH in repository_ctx.os.environ
-
-def auto_configure_fail(msg):
-    """Output failure message when auto configuration fails."""
-    red = "\033[0;31m"
-    no_color = "\033[0m"
-    fail("\n%sAuto-Configuration Error:%s %s\n" % (red, no_color, msg))
-
-# END cc_configure common functions (see TODO above).
-
-def find_c(repository_ctx):
-    """Find host C compiler."""
-    c_name = "gcc"
-    if _HOST_C_COMPILER in repository_ctx.os.environ:
-        c_name = repository_ctx.os.environ[_HOST_C_COMPILER].strip()
-    if c_name.startswith("/"):
-        return c_name
-    c = repository_ctx.which(c_name)
-    if c == None:
-        fail("Cannot find C compiler, please correct your path.")
-    return c
-
-def find_cc(repository_ctx):
-    """Find host C++ compiler."""
-    cc_name = "g++"
-    if _HOST_CXX_COMPILER in repository_ctx.os.environ:
-        cc_name = repository_ctx.os.environ[_HOST_CXX_COMPILER].strip()
-    if cc_name.startswith("/"):
-        return cc_name
-    cc = repository_ctx.which(cc_name)
-    if cc == None:
-        fail("Cannot find C++ compiler, please correct your path.")
-    return cc
-
-def find_computecpp_root(repository_ctx):
-    """Find ComputeCpp compiler."""
-    sycl_name = ""
-    if _COMPUTECPP_TOOLKIT_PATH in repository_ctx.os.environ:
-        sycl_name = repository_ctx.os.environ[_COMPUTECPP_TOOLKIT_PATH].strip()
-    if sycl_name.startswith("/"):
-        return sycl_name
-    fail("Cannot find SYCL compiler, please correct your path")
-
-def find_trisycl_include_dir(repository_ctx):
-    """Find triSYCL include directory. """
-    if _TRISYCL_INCLUDE_DIR in repository_ctx.os.environ:
-        sycl_name = repository_ctx.os.environ[_TRISYCL_INCLUDE_DIR].strip()
-        if sycl_name.startswith("/"):
-            return sycl_name
-    fail("Cannot find triSYCL include directory, please correct your path")
-
-def find_python_lib(repository_ctx):
-    """Returns python path."""
-    if _PYTHON_LIB_PATH in repository_ctx.os.environ:
-        return repository_ctx.os.environ[_PYTHON_LIB_PATH].strip()
-    fail("Environment variable PYTHON_LIB_PATH was not specified re-run ./configure")
-
-def _check_lib(repository_ctx, toolkit_path, lib):
-    """Checks if lib exists under sycl_toolkit_path or fail if it doesn't.
-
-    Args:
-      repository_ctx: The repository context.
-      toolkit_path: The toolkit directory containing the libraries.
-      ib: The library to look for under toolkit_path.
-    """
-    lib_path = toolkit_path + "/" + lib
-    if not repository_ctx.path(lib_path).exists:
-        auto_configure_fail("Cannot find %s" % lib_path)
-
-def _check_dir(repository_ctx, directory):
-    """Checks whether the directory exists and fail if it does not.
-
-    Args:
-      repository_ctx: The repository context.
-      directory: The directory to check the existence of.
-    """
-    if not repository_ctx.path(directory).exists:
-        auto_configure_fail("Cannot find dir: %s" % directory)
-
-def _symlink_dir(repository_ctx, src_dir, dest_dir):
-    """Symlinks all the files in a directory.
-
-    Args:
-      repository_ctx: The repository context.
-      src_dir: The source directory.
-      dest_dir: The destination directory to create the symlinks in.
-    """
-    files = repository_ctx.path(src_dir).readdir()
-    for src_file in files:
-        repository_ctx.symlink(src_file, dest_dir + "/" + src_file.basename)
-
-def _tpl(repository_ctx, tpl, substitutions = {}, out = None):
-    if not out:
-        out = tpl.replace(":", "/")
-    repository_ctx.template(
-        out,
-        Label("//third_party/sycl/%s.tpl" % tpl),
-        substitutions,
-    )
-
-def _file(repository_ctx, label):
-    repository_ctx.template(
-        label.replace(":", "/"),
-        Label("//third_party/sycl/%s" % label),
-        {},
-    )
-
-_DUMMY_CROSSTOOL_BZL_FILE = """
-def error_sycl_disabled():
-  fail("ERROR: Building with --config=sycl but TensorFlow is not configured " +
-       "to build with SYCL support. Please re-run ./configure and enter 'Y' " +
-       "at the prompt to build with SYCL support.")
-
-  native.genrule(
-      name = "error_gen_crosstool",
-      outs = ["CROSSTOOL"],
-      cmd = "echo 'Should not be run.' && exit 1",
-  )
-
-  native.filegroup(
-      name = "crosstool",
-      srcs = [":CROSSTOOL"],
-      output_licenses = ["unencumbered"],
-  )
-"""
-
-_DUMMY_CROSSTOOL_BUILD_FILE = """
-load("//crosstool:error_sycl_disabled.bzl", "error_sycl_disabled")
-
-error_sycl_disabled()
-"""
-
-def _create_dummy_repository(repository_ctx):
-    # Set up BUILD file for sycl/.
-    _tpl(repository_ctx, "sycl:build_defs.bzl")
-    _tpl(repository_ctx, "sycl:BUILD")
-    _file(repository_ctx, "sycl:LICENSE.text")
-    _tpl(repository_ctx, "sycl:platform.bzl")
-
-    # Create dummy files for the SYCL toolkit since they are still required by
-    # tensorflow/sycl/platform/default/build_config:sycl.
-    repository_ctx.file("sycl/include/sycl.hpp", "")
-    repository_ctx.file("sycl/lib/libComputeCpp.so", "")
-
-    # If sycl_configure is not configured to build with SYCL support, and the user
-    # attempts to build with --config=sycl, add a dummy build rule to intercept
-    # this and fail with an actionable error message.
-    repository_ctx.file(
-        "crosstool/error_sycl_disabled.bzl",
-        _DUMMY_CROSSTOOL_BZL_FILE,
-    )
-    repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE)
-
-def _sycl_autoconf_imp(repository_ctx):
-    """Implementation of the sycl_autoconf rule."""
-    if not _enable_sycl(repository_ctx):
-        _create_dummy_repository(repository_ctx)
-    else:
-        # copy template files
-        _tpl(repository_ctx, "sycl:build_defs.bzl")
-        _tpl(repository_ctx, "sycl:BUILD")
-        _tpl(repository_ctx, "sycl:platform.bzl")
-        _tpl(repository_ctx, "crosstool:BUILD")
-        _file(repository_ctx, "sycl:LICENSE.text")
-
-        if _enable_compute_cpp(repository_ctx):
-            _tpl(
-                repository_ctx,
-                "crosstool:computecpp",
-                {
-                    "%{host_cxx_compiler}": find_cc(repository_ctx),
-                    "%{host_c_compiler}": find_c(repository_ctx),
-                },
-            )
-
-            computecpp_root = find_computecpp_root(repository_ctx)
-            _check_dir(repository_ctx, computecpp_root)
-
-            _tpl(
-                repository_ctx,
-                "crosstool:CROSSTOOL",
-                {
-                    "%{sycl_include_dir}": computecpp_root,
-                    "%{sycl_impl}": "computecpp",
-                    "%{c++_std}": "-std=c++11",
-                    "%{python_lib_path}": find_python_lib(repository_ctx),
-                },
-            )
-
-            # symlink libraries
-            _check_lib(repository_ctx, computecpp_root + "/lib", "libComputeCpp.so")
-            _symlink_dir(repository_ctx, computecpp_root + "/lib", "sycl/lib")
-            _symlink_dir(repository_ctx, computecpp_root + "/include", "sycl/include")
-            _symlink_dir(repository_ctx, computecpp_root + "/bin", "sycl/bin")
-        else:
-            trisycl_include_dir = find_trisycl_include_dir(repository_ctx)
-            _check_dir(repository_ctx, trisycl_include_dir)
-
-            _tpl(
-                repository_ctx,
-                "crosstool:trisycl",
-                {
-                    "%{host_cxx_compiler}": find_cc(repository_ctx),
-                    "%{host_c_compiler}": find_c(repository_ctx),
-                    "%{trisycl_include_dir}": trisycl_include_dir,
-                },
-            )
-
-            _tpl(
-                repository_ctx,
-                "crosstool:CROSSTOOL",
-                {
-                    "%{sycl_include_dir}": trisycl_include_dir,
-                    "%{sycl_impl}": "trisycl",
-                    "%{c++_std}": "-std=c++1y",
-                    "%{python_lib_path}": find_python_lib(repository_ctx),
-                },
-            )
-
-            _symlink_dir(repository_ctx, trisycl_include_dir, "sycl/include")
-
-sycl_configure = repository_rule(
-    implementation = _sycl_autoconf_imp,
-    local = True,
-)
-"""Detects and configures the SYCL toolchain.
-
-Add the following to your WORKSPACE FILE:
-
-```python
-sycl_configure(name = "local_config_sycl")
-```
-
-Args:
-  name: A unique name for this workspace rule.
-"""