[RISCV] Don't force Local Exec TLS for non-PIC

Summary:
Forcing Local Exec TLS requires the use of copy relocations. Copy
relocations need special handling in the runtime linker when being used
against TLS symbols, which is present in glibc, but not in FreeBSD nor
musl, and so cannot be relied upon. Moreover, copy relocations are a
hack that embed the size of an object in the ABI when it otherwise
wouldn't be, and break protected symbols (which are expected to be DSO
local), whilst also wasting space, thus they should be avoided whenever
possible. As discussed in D70398, RISC-V should move away from forcing
Local Exec, and instead use Initial Exec like other targets, with
possible linker relaxation to follow. The RISC-V GCC maintainers also
intend to adopt this more-conventional behaviour (see
https://github.com/riscv/riscv-elf-psabi-doc/issues/122).

Reviewers: asb, MaskRay

Reviewed By: MaskRay

Subscribers: emaste, krytarowski, hiraditya, rbar, johnrusso, simoncook, sabuasal, niosHD, kito-cheng, shiva0217, zzheng, edward-jones, rogfer01, MartinMosbeck, brucehoult, the_o, rkruppe, PkmX, jocewei, psnobl, benna, Jim, lenary, s.egerton, pzheng, sameer.abuasal, apazos, llvm-commits, bsdjhb

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D70649
diff --git a/llvm/test/CodeGen/RISCV/tls-models.ll b/llvm/test/CodeGen/RISCV/tls-models.ll
index a2015b0..25a2f71 100644
--- a/llvm/test/CodeGen/RISCV/tls-models.ll
+++ b/llvm/test/CodeGen/RISCV/tls-models.ll
@@ -3,16 +3,17 @@
 ; RUN:     | FileCheck -check-prefix=RV32-PIC %s
 ; RUN: llc -mtriple=riscv64 -relocation-model=pic < %s \
 ; RUN:     | FileCheck -check-prefix=RV64-PIC %s
-; RUN: llc -mtriple=riscv32 < %s | FileCheck -check-prefix=NOPIC %s
-; RUN: llc -mtriple=riscv64 < %s | FileCheck -check-prefix=NOPIC %s
+; RUN: llc -mtriple=riscv32 < %s | FileCheck -check-prefix=RV32-NOPIC %s
+; RUN: llc -mtriple=riscv64 < %s | FileCheck -check-prefix=RV64-NOPIC %s
 
 ; Check that TLS symbols are lowered correctly based on the specified
-; model.
+; model. Make sure they're external to avoid them all being optimised to Local
+; Exec for the executable.
 
-@unspecified = thread_local global i32 42
-@ld = thread_local(localdynamic) global i32 42
-@ie = thread_local(initialexec) global i32 42
-@le = thread_local(localexec) global i32 42
+@unspecified = external thread_local global i32
+@ld = external thread_local(localdynamic) global i32
+@ie = external thread_local(initialexec) global i32
+@le = external thread_local(localexec) global i32
 
 
 ; No model specified
@@ -44,12 +45,23 @@
 ; RV64-PIC-NEXT:    addi sp, sp, 16
 ; RV64-PIC-NEXT:    ret
 ;
-; NOPIC-LABEL: f1:
-; NOPIC:       # %bb.0: # %entry
-; NOPIC-NEXT:    lui a0, %tprel_hi(unspecified)
-; NOPIC-NEXT:    add a0, a0, tp, %tprel_add(unspecified)
-; NOPIC-NEXT:    addi a0, a0, %tprel_lo(unspecified)
-; NOPIC-NEXT:    ret
+; RV32-NOPIC-LABEL: f1:
+; RV32-NOPIC:       # %bb.0: # %entry
+; RV32-NOPIC-NEXT:  .LBB0_1: # %entry
+; RV32-NOPIC-NEXT:    # Label of block must be emitted
+; RV32-NOPIC-NEXT:    auipc a0, %tls_ie_pcrel_hi(unspecified)
+; RV32-NOPIC-NEXT:    lw a0, %pcrel_lo(.LBB0_1)(a0)
+; RV32-NOPIC-NEXT:    add a0, a0, tp
+; RV32-NOPIC-NEXT:    ret
+;
+; RV64-NOPIC-LABEL: f1:
+; RV64-NOPIC:       # %bb.0: # %entry
+; RV64-NOPIC-NEXT:  .LBB0_1: # %entry
+; RV64-NOPIC-NEXT:    # Label of block must be emitted
+; RV64-NOPIC-NEXT:    auipc a0, %tls_ie_pcrel_hi(unspecified)
+; RV64-NOPIC-NEXT:    ld a0, %pcrel_lo(.LBB0_1)(a0)
+; RV64-NOPIC-NEXT:    add a0, a0, tp
+; RV64-NOPIC-NEXT:    ret
 entry:
   ret i32* @unspecified
 }
@@ -84,12 +96,23 @@
 ; RV64-PIC-NEXT:    addi sp, sp, 16
 ; RV64-PIC-NEXT:    ret
 ;
-; NOPIC-LABEL: f2:
-; NOPIC:       # %bb.0: # %entry
-; NOPIC-NEXT:    lui a0, %tprel_hi(ld)
-; NOPIC-NEXT:    add a0, a0, tp, %tprel_add(ld)
-; NOPIC-NEXT:    addi a0, a0, %tprel_lo(ld)
-; NOPIC-NEXT:    ret
+; RV32-NOPIC-LABEL: f2:
+; RV32-NOPIC:       # %bb.0: # %entry
+; RV32-NOPIC-NEXT:  .LBB1_1: # %entry
+; RV32-NOPIC-NEXT:    # Label of block must be emitted
+; RV32-NOPIC-NEXT:    auipc a0, %tls_ie_pcrel_hi(ld)
+; RV32-NOPIC-NEXT:    lw a0, %pcrel_lo(.LBB1_1)(a0)
+; RV32-NOPIC-NEXT:    add a0, a0, tp
+; RV32-NOPIC-NEXT:    ret
+;
+; RV64-NOPIC-LABEL: f2:
+; RV64-NOPIC:       # %bb.0: # %entry
+; RV64-NOPIC-NEXT:  .LBB1_1: # %entry
+; RV64-NOPIC-NEXT:    # Label of block must be emitted
+; RV64-NOPIC-NEXT:    auipc a0, %tls_ie_pcrel_hi(ld)
+; RV64-NOPIC-NEXT:    ld a0, %pcrel_lo(.LBB1_1)(a0)
+; RV64-NOPIC-NEXT:    add a0, a0, tp
+; RV64-NOPIC-NEXT:    ret
 entry:
   ret i32* @ld
 }
@@ -116,12 +139,23 @@
 ; RV64-PIC-NEXT:    add a0, a0, tp
 ; RV64-PIC-NEXT:    ret
 ;
-; NOPIC-LABEL: f3:
-; NOPIC:       # %bb.0: # %entry
-; NOPIC-NEXT:    lui a0, %tprel_hi(ie)
-; NOPIC-NEXT:    add a0, a0, tp, %tprel_add(ie)
-; NOPIC-NEXT:    addi a0, a0, %tprel_lo(ie)
-; NOPIC-NEXT:    ret
+; RV32-NOPIC-LABEL: f3:
+; RV32-NOPIC:       # %bb.0: # %entry
+; RV32-NOPIC-NEXT:  .LBB2_1: # %entry
+; RV32-NOPIC-NEXT:    # Label of block must be emitted
+; RV32-NOPIC-NEXT:    auipc a0, %tls_ie_pcrel_hi(ie)
+; RV32-NOPIC-NEXT:    lw a0, %pcrel_lo(.LBB2_1)(a0)
+; RV32-NOPIC-NEXT:    add a0, a0, tp
+; RV32-NOPIC-NEXT:    ret
+;
+; RV64-NOPIC-LABEL: f3:
+; RV64-NOPIC:       # %bb.0: # %entry
+; RV64-NOPIC-NEXT:  .LBB2_1: # %entry
+; RV64-NOPIC-NEXT:    # Label of block must be emitted
+; RV64-NOPIC-NEXT:    auipc a0, %tls_ie_pcrel_hi(ie)
+; RV64-NOPIC-NEXT:    ld a0, %pcrel_lo(.LBB2_1)(a0)
+; RV64-NOPIC-NEXT:    add a0, a0, tp
+; RV64-NOPIC-NEXT:    ret
 entry:
   ret i32* @ie
 }
@@ -144,12 +178,19 @@
 ; RV64-PIC-NEXT:    addi a0, a0, %tprel_lo(le)
 ; RV64-PIC-NEXT:    ret
 ;
-; NOPIC-LABEL: f4:
-; NOPIC:       # %bb.0: # %entry
-; NOPIC-NEXT:    lui a0, %tprel_hi(le)
-; NOPIC-NEXT:    add a0, a0, tp, %tprel_add(le)
-; NOPIC-NEXT:    addi a0, a0, %tprel_lo(le)
-; NOPIC-NEXT:    ret
+; RV32-NOPIC-LABEL: f4:
+; RV32-NOPIC:       # %bb.0: # %entry
+; RV32-NOPIC-NEXT:    lui a0, %tprel_hi(le)
+; RV32-NOPIC-NEXT:    add a0, a0, tp, %tprel_add(le)
+; RV32-NOPIC-NEXT:    addi a0, a0, %tprel_lo(le)
+; RV32-NOPIC-NEXT:    ret
+;
+; RV64-NOPIC-LABEL: f4:
+; RV64-NOPIC:       # %bb.0: # %entry
+; RV64-NOPIC-NEXT:    lui a0, %tprel_hi(le)
+; RV64-NOPIC-NEXT:    add a0, a0, tp, %tprel_add(le)
+; RV64-NOPIC-NEXT:    addi a0, a0, %tprel_lo(le)
+; RV64-NOPIC-NEXT:    ret
 entry:
   ret i32* @le
 }