[CodeGen][X86] Fix handling of __fp16 vectors. This commit fixes a bug in IRGen where it generates completely broken code for __fp16 vectors on X86. For example when the following code is compiled: half4 hv0, hv1, hv2; // these are vectors of __fp16. void foo221() { hv0 = hv1 + hv2; } clang generates the following IR, in which two i16 vectors are added: @hv1 = common global <4 x i16> zeroinitializer, align 8 @hv2 = common global <4 x i16> zeroinitializer, align 8 @hv0 = common global <4 x i16> zeroinitializer, align 8 define void @foo221() { %0 = load <4 x i16>, <4 x i16>* @hv1, align 8 %1 = load <4 x i16>, <4 x i16>* @hv2, align 8 %add = add <4 x i16> %0, %1 store <4 x i16> %add, <4 x i16>* @hv0, align 8 ret void } To fix the bug, this commit uses the code committed in r314056, which modified clang to promote and truncate __fp16 vectors to and from float vectors in the AST. It also fixes another IRGen bug where a short value is assigned to an __fp16 variable without any integer-to-floating-point conversion, as shown in the following example: __fp16 a; short b; void foo1() { a = b; } @b = common global i16 0, align 2 @a = common global i16 0, align 2 define void @foo1() #0 { %0 = load i16, i16* @b, align 2 store i16 %0, i16* @a, align 2 ret void } rdar://problem/20625184 Differential Revision: https://reviews.llvm.org/D40112 llvm-svn: 320215

commit: 502775a2ee08e2fe1df7dd0741722a31debffa16 [log] [tgz]
author: Akira Hatanaka <ahatanaka@apple.com> Sat Dec 09 00:02:37 2017 +0000
committer: Akira Hatanaka <ahatanaka@apple.com> Sat Dec 09 00:02:37 2017 +0000
tree: 7d41f265f7421a4f62356f723683626a072646a3
parent: 01fb31cc89d3ce145f3cacd669981de5edd5de8d [diff]
diff --git a/clang/test/CodeGen/fp16-ops.c b/clang/test/CodeGen/fp16-ops.c
index c96727f..f2ed667 100644
--- a/clang/test/CodeGen/fp16-ops.c
+++ b/clang/test/CodeGen/fp16-ops.c

@@ -1,8 +1,9 @@
 // REQUIRES: arm-registered-target
-// RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi %s | FileCheck %s --check-prefix=NOHALF --check-prefix=CHECK
-// RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi %s | FileCheck %s --check-prefix=NOHALF --check-prefix=CHECK
-// RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=HALF --check-prefix=CHECK
-// RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=HALF --check-prefix=CHECK
+// RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi %s | FileCheck %s --check-prefix=NOTNATIVE --check-prefix=CHECK
+// RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi %s | FileCheck %s --check-prefix=NOTNATIVE --check-prefix=CHECK
+// RUN: %clang_cc1 -emit-llvm -o - -triple x86_64-linux-gnu %s | FileCheck %s --check-prefix=NOTNATIVE --check-prefix=CHECK
+// RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=NOTNATIVE --check-prefix=CHECK
+// RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=NOTNATIVE --check-prefix=CHECK
 // RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi -fnative-half-type %s \
 // RUN:   | FileCheck %s --check-prefix=NATIVE-HALF
 // RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi -fnative-half-type %s \
@@ -16,20 +17,19 @@
 volatile __fp16 h0 = 0.0, h1 = 1.0, h2;
 volatile float f0, f1, f2;
 volatile double d0;
+short s0;
 
 void foo(void) {
   // CHECK-LABEL: define void @foo()
 
   // Check unary ops
 
-  // NOHALF: [[F16TOF32:call float @llvm.convert.from.fp16.f32]]
-  // HALF: [[F16TOF32:fpext half]]
+  // NOTNATIVE: [[F16TOF32:fpext half]]
   // CHECK: fptoui float
   // NATIVE-HALF: fptoui half
   test = (h0);
   // CHECK: uitofp i32
-  // NOHALF: [[F32TOF16:call i16 @llvm.convert.to.fp16.f32]]
-  // HALF: [[F32TOF16:fptrunc float]]
+  // NOTNATIVE: [[F32TOF16:fptrunc float]]
   // NATIVE-HALF: uitofp i32 {{.*}} to half
   h0 = (test);
   // CHECK: [[F16TOF32]]
@@ -38,8 +38,7 @@
   test = (!h1);
   // CHECK: [[F16TOF32]]
   // CHECK: fsub float
-  // NOHALF: [[F32TOF16]]
-  // HALF: [[F32TOF16]]
+  // NOTNATIVE: [[F32TOF16]]
   // NATIVE-HALF: fsub half
   h1 = -h1;
   // CHECK: [[F16TOF32]]
@@ -76,8 +75,6 @@
   // NATIVE-HALF: fmul half
   h1 = h0 * h2;
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F32TOF16]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fmul float
   // CHECK: [[F32TOF16]]
   // NATIVE-HALF: fmul half
@@ -107,7 +104,6 @@
   // NATIVE-HALF: fdiv half
   h1 = (h0 / h2);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fdiv float
   // CHECK: [[F32TOF16]]
   // NATIVE-HALF: fdiv half
@@ -137,7 +133,6 @@
   // NATIVE-HALF: fadd half
   h1 = (h2 + h0);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fadd float
   // CHECK: [[F32TOF16]]
   // NATIVE-HALF: fadd half
@@ -167,7 +162,6 @@
   // NATIVE-HALF: fsub half
   h1 = (h2 - h0);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fsub float
   // CHECK: [[F32TOF16]]
   // NATIVE-HALF: fsub half
@@ -196,7 +190,6 @@
   // NATIVE-HALF: fcmp olt half
   test = (h2 < h0);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fcmp olt float
   // NATIVE-HALF: fcmp olt half
   test = (h2 < (__fp16)42.0);
@@ -225,7 +218,6 @@
   // NATIVE-HALF: fcmp ogt half
   test = (h0 > h2);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fcmp ogt float
   // NATIVE-HALF: fcmp ogt half
   test = ((__fp16)42.0 > h2);
@@ -254,7 +246,6 @@
   // NATIVE-HALF: fcmp ole half
   test = (h2 <= h0);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fcmp ole float
   // NATIVE-HALF: fcmp ole half
   test = (h2 <= (__fp16)42.0);
@@ -284,7 +275,6 @@
   // NATIVE-HALF: fcmp oge half
   test = (h0 >= h2);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fcmp oge float
   // NATIVE-HALF: fcmp oge half
   test = (h0 >= (__fp16)-2.0);
@@ -313,7 +303,6 @@
   // NATIVE-HALF: fcmp oeq half
   test = (h1 == h2);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fcmp oeq float
   // NATIVE-HALF: fcmp oeq half
   test = (h1 == (__fp16)1.0);
@@ -342,7 +331,6 @@
   // NATIVE-HALF: fcmp une half
   test = (h1 != h2);
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fcmp une float
   // NATIVE-HALF: fcmp une half
   test = (h1 != (__fp16)1.0);
@@ -374,8 +362,7 @@
   h1 = (h1 ? h2 : h0);
   // Check assignments (inc. compound)
   h0 = h1;
-  // NOHALF: [[F32TOF16]]
-  // HALF: store {{.*}} half 0xHC000
+  // NOTNATIVE: store {{.*}} half 0xHC000
   // NATIVE-HALF: store {{.*}} half 0xHC000
   h0 = (__fp16)-2.0f;
   // CHECK: [[F32TOF16]]
@@ -398,7 +385,6 @@
   // NATIVE-HALF: fadd half
   h0 += h1;
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fadd float
   // CHECK: [[F32TOF16]]
   // NATIVE-HALF: fadd half
@@ -433,7 +419,6 @@
   // NATIVE-HALF: fsub half
   h0 -= h1;
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fsub float
   // CHECK: [[F32TOF16]]
   // NATIVE-HALF: fsub half
@@ -468,7 +453,6 @@
   // NATIVE-HALF: fmul half
   h0 *= h1;
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fmul float
   // CHECK: [[F32TOF16]]
   // NATIVE-HALF: fmul half
@@ -503,7 +487,6 @@
   // NATIVE-HALF: fdiv half
   h0 /= h1;
   // CHECK: [[F16TOF32]]
-  // NOHALF: [[F16TOF32]]
   // CHECK: fdiv float
   // CHECK: [[F32TOF16]]
   // NATIVE-HALF: fdiv half
@@ -532,27 +515,29 @@
   h0 /= i0;
 
   // Check conversions to/from double
-  // NOHALF: call i16 @llvm.convert.to.fp16.f64(
-  // HALF: fptrunc double {{.*}} to half
+  // NOTNATIVE: fptrunc double {{.*}} to half
   // NATIVE-HALF: fptrunc double {{.*}} to half
   h0 = d0;
 
   // CHECK: [[MID:%.*]] = fptrunc double {{%.*}} to float
-  // NOHALF: call i16 @llvm.convert.to.fp16.f32(float [[MID]])
-  // HALF: fptrunc float [[MID]] to half
+  // NOTNATIVE: fptrunc float [[MID]] to half
   // NATIVE-HALF: [[MID:%.*]] = fptrunc double {{%.*}} to float
   // NATIVE-HALF: fptrunc float {{.*}} to half
   h0 = (float)d0;
 
-  // NOHALF: call double @llvm.convert.from.fp16.f64(
-  // HALF: fpext half {{.*}} to double
+  // NOTNATIVE: fpext half {{.*}} to double
   // NATIVE-HALF: fpext half {{.*}} to double
   d0 = h0;
 
-  // NOHALF: [[MID:%.*]] = call float @llvm.convert.from.fp16.f32(
-  // HALF: [[MID:%.*]] = fpext half {{.*}} to float
+  // NOTNATIVE: [[MID:%.*]] = fpext half {{.*}} to float
   // CHECK: fpext float [[MID]] to double
   // NATIVE-HALF: [[MID:%.*]] = fpext half {{.*}} to float
   // NATIVE-HALF: fpext float [[MID]] to double
   d0 = (float)h0;
+
+  // NOTNATIVE: [[V1:%.*]] = load i16, i16* @s0
+  // NOTNATIVE: [[CONV:%.*]] = sitofp i16 [[V1]] to float
+  // NOTNATIVE: [[TRUNC:%.*]] = fptrunc float [[CONV]] to half
+  // NOTNATIVE: store volatile half [[TRUNC]], half* @h0
+  h0 = s0;
 }

diff --git a/clang/test/CodeGen/fp16vec-ops.c b/clang/test/CodeGen/fp16vec-ops.c
index a99be41..2eb75a4 100644
--- a/clang/test/CodeGen/fp16vec-ops.c
+++ b/clang/test/CodeGen/fp16vec-ops.c

@@ -1,6 +1,7 @@
 // REQUIRES: arm-registered-target
 // RUN: %clang_cc1 -triple arm64-apple-ios9 -emit-llvm -o - -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=CHECK
 // RUN: %clang_cc1 -triple armv7-apple-ios9 -emit-llvm -o - -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.13 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK
 
 typedef __fp16 half4 __attribute__ ((vector_size (8)));
 typedef short short4 __attribute__ ((vector_size (8)));

diff --git a/clang/test/CodeGenCXX/float16-declarations.cpp b/clang/test/CodeGenCXX/float16-declarations.cpp
index b97f9aa..87ef139 100644
--- a/clang/test/CodeGenCXX/float16-declarations.cpp
+++ b/clang/test/CodeGenCXX/float16-declarations.cpp

@@ -11,16 +11,14 @@
 // CHECK-DAG: @_ZN12_GLOBAL__N_13f1nE = internal global half 0xH0000, align 2
 
   _Float16 f2n = 33.f16;
-// CHECK-AARCH64-DAG: @_ZN12_GLOBAL__N_13f2nE = internal global half 0xH5020, align 2
-// CHECK-X86-DAG:     @_ZN12_GLOBAL__N_13f2nE = internal global i16 20512, align 2
+// CHECK-DAG: @_ZN12_GLOBAL__N_13f2nE = internal global half 0xH5020, align 2
 
   _Float16 arr1n[10];
 // CHECK-AARCH64-DAG: @_ZN12_GLOBAL__N_15arr1nE = internal global [10 x half] zeroinitializer, align 2
 // CHECK-X86-DAG:     @_ZN12_GLOBAL__N_15arr1nE = internal global [10 x half] zeroinitializer, align 16
 
   _Float16 arr2n[] = { 1.2, 3.0, 3.e4 };
-// CHECK-AARCH64-DAG: @_ZN12_GLOBAL__N_15arr2nE = internal global [3 x half] [half 0xH3CCD, half 0xH4200, half 0xH7753], align 2
-// CHECK-X86-DAG:     @_ZN12_GLOBAL__N_15arr2nE = internal global [3 x i16] [i16 15565, i16 16896, i16 30547], align 2
+// CHECK-DAG: @_ZN12_GLOBAL__N_15arr2nE = internal global [3 x half] [half 0xH3CCD, half 0xH4200, half 0xH7753], align 2
 
   const volatile _Float16 func1n(const _Float16 &arg) {
     return arg + f2n + arr1n[4] - arr2n[1];
@@ -35,16 +33,14 @@
 // CHECK-X86-DAG: @f1f = global half 0xH0000, align 2
 
 _Float16 f2f = 32.4;
-// CHECK-AARCH64-DAG: @f2f = global half 0xH500D, align 2
-// CHECK-X86-DAG: @f2f = global i16 20493, align 2
+// CHECK-DAG: @f2f = global half 0xH500D, align 2
 
 _Float16 arr1f[10];
 // CHECK-AARCH64-DAG: @arr1f = global [10 x half] zeroinitializer, align 2
 // CHECK-X86-DAG: @arr1f = global [10 x half] zeroinitializer, align 16
 
 _Float16 arr2f[] = { -1.2, -3.0, -3.e4 };
-// CHECK-AARCH64-DAG: @arr2f = global [3 x half] [half 0xHBCCD, half 0xHC200, half 0xHF753], align 2
-// CHECK-X86-DAG: @arr2f = global [3 x i16] [i16 -17203, i16 -15872, i16 -2221], align 2
+// CHECK-DAG: @arr2f = global [3 x half] [half 0xHBCCD, half 0xHC200, half 0xHF753], align 2
 
 _Float16 func1f(_Float16 arg);
 
@@ -110,11 +106,9 @@
 // CHECK-DAG:  call void @_ZN2C1C2EDF16_(%class.C1* %{{.*}}, half %{{.*}})
 
   S1<_Float16> s1 = { 132.f16 };
-// CHECK-AARCH64-DAG: @_ZZ4mainE2s1 = private unnamed_addr constant %struct.S1 { half 0xH5820 }, align 2
-// CHECK-X86-DAG:     @_ZZ4mainE2s1 = private unnamed_addr constant { i16 } { i16 22560 }, align 2
+// CHECK-DAG: @_ZZ4mainE2s1 = private unnamed_addr constant %struct.S1 { half 0xH5820 }, align 2
 // CHECK-DAG:  [[S1:%[0-9]+]] = bitcast %struct.S1* %{{.*}} to i8*
-// CHECK-AARCH64-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[S1]], i8* bitcast (%struct.S1* @_ZZ4mainE2s1 to i8*), i64 2, i32 2, i1 false)
-// CHECK-X86-DAG:     call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* bitcast ({ i16 }* @_ZZ4mainE2s1 to i8*), i64 2, i32 2, i1 false)
+// CHECK-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[S1]], i8* bitcast (%struct.S1* @_ZZ4mainE2s1 to i8*), i64 2, i32 2, i1 false)
 
   _Float16 f4l = func1n(f1l)  + func1f(f2l) + c1.func1c(f3l) + c1.func2c(f1l) +
     func1t(f1l) + s1.mem2 - f1n + f2n;
@@ -129,8 +123,7 @@
 // CHECK-DAG:  store half [[INC]], half* %{{.*}}, align 2
 
   _Float16 arr1l[] = { -1.f16, -0.f16, -11.f16 };
-// CHECK-AARCH64-DAG: @_ZZ4mainE5arr1l = private unnamed_addr constant [3 x half] [half 0xHBC00, half 0xH8000, half 0xHC980], align 2
-// CHECK-X86-DAG:     @_ZZ4mainE5arr1l = private unnamed_addr constant [3 x i16] [i16 -17408, i16 -32768, i16 -13952], align 2
+// CHECK-DAG: @_ZZ4mainE5arr1l = private unnamed_addr constant [3 x half] [half 0xHBC00, half 0xH8000, half 0xHC980], align 2
 
   float cvtf = f2n;
 //CHECK-DAG: [[H2F:%[a-z0-9]+]] = fpext half {{%[0-9]+}} to float

diff --git a/clang/test/CodeGenCXX/fp16-mangle.cpp b/clang/test/CodeGenCXX/fp16-mangle.cpp
index bd5a319..5827fd5 100644
--- a/clang/test/CodeGenCXX/fp16-mangle.cpp
+++ b/clang/test/CodeGenCXX/fp16-mangle.cpp

@@ -4,9 +4,9 @@
 template <typename T, typename U> struct S { static int i; };
 template <> int S<__fp16, __fp16>::i = 3;
 
-// CHECK-LABEL: define void @_Z1fPDh(i16* %x)
+// CHECK-LABEL: define void @_Z1fPDh(half* %x)
 void f (__fp16 *x) { }
 
-// CHECK-LABEL: define void @_Z1gPDhS_(i16* %x, i16* %y)
+// CHECK-LABEL: define void @_Z1gPDhS_(half* %x, half* %y)
 void g (__fp16 *x, __fp16 *y) { }
commit	502775a2ee08e2fe1df7dd0741722a31debffa16	[log] [tgz]
author	Akira Hatanaka <ahatanaka@apple.com>	Sat Dec 09 00:02:37 2017 +0000
committer	Akira Hatanaka <ahatanaka@apple.com>	Sat Dec 09 00:02:37 2017 +0000
tree	7d41f265f7421a4f62356f723683626a072646a3
parent	01fb31cc89d3ce145f3cacd669981de5edd5de8d [diff]