Use LLD when building with downloaded GPU toolchain

This improves build times when using the downloaded clang toolchain.

Additionally, remove '-B/usr/bin' flags from the cuda CROSSTOOL when using
the downloaded toolchain.
It was forcing 'clang' to first search for the linker in '/usr/bin',
preventing downloaded LLD from being selected.

PiperOrigin-RevId: 211430374
diff --git a/configure.py b/configure.py
index 7edab53..361bd47 100644
--- a/configure.py
+++ b/configure.py
@@ -1543,6 +1543,10 @@
       if environ_cp.get('TF_DOWNLOAD_CLANG') != '1':
         # Set up which clang we should use as the cuda / host compiler.
         set_clang_cuda_compiler_path(environ_cp)
+      else:
+        # Use downloaded LLD for linking.
+        write_to_bazelrc('build:cuda_clang --config=download_clang_use_lld')
+        write_to_bazelrc('test:cuda_clang --config=download_clang_use_lld')
     else:
       # Set up which gcc nvcc should use as the host compiler
       # No need to set this on Windows