[OpenMP] Codegen for the 'target parallel' directive on the NVPTX device. This patch adds codegen for the 'target parallel' directive on the NVPTX device. We term offload OpenMP directives such as 'target parallel' and 'target teams distribute parallel for' as SPMD constructs. SPMD constructs, in contrast to Generic ones like the plain 'target', can never contain a serial region. SPMD constructs can be handled more efficiently on the GPU and do not require the Warp Loop of the Generic codegen scheme. This patch adds SPMD codegen support for 'target parallel' on the NVPTX device and can be reused for other SPMD constructs. Reviewers: ABataev Differential Revision: https://reviews.llvm.org/D28755 llvm-svn: 292428

commit: 44a87c9f1b330d243eae87c60151711c5b0b486a [log] [tgz]
author: Arpith Chacko Jacob <acjacob@us.ibm.com> Wed Jan 18 19:35:00 2017 +0000
committer: Arpith Chacko Jacob <acjacob@us.ibm.com> Wed Jan 18 19:35:00 2017 +0000
tree: 66af7f701bfa080c252bcea9549369c3533c4088
parent: 11255ec7659cec933e0b0415c14d50ca80cb16cd [diff] [blame]
diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp
index 59c4d5b..dab0c5a 100644
--- a/clang/test/OpenMP/nvptx_target_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_codegen.cpp

@@ -8,6 +8,14 @@
 #ifndef HEADER
 #define HEADER
 
+// Check that the execution mode of all 6 target regions is set to Generic Mode.
+// CHECK-DAG: {{@__omp_offloading_.+l98}}_exec_mode = weak constant i8 1
+// CHECK-DAG: {{@__omp_offloading_.+l175}}_exec_mode = weak constant i8 1
+// CHECK-DAG: {{@__omp_offloading_.+l284}}_exec_mode = weak constant i8 1
+// CHECK-DAG: {{@__omp_offloading_.+l321}}_exec_mode = weak constant i8 1
+// CHECK-DAG: {{@__omp_offloading_.+l339}}_exec_mode = weak constant i8 1
+// CHECK-DAG: {{@__omp_offloading_.+l304}}_exec_mode = weak constant i8 1
+
 template<typename tx, typename ty>
 struct TT{
   tx X;
@@ -23,7 +31,7 @@
   double cn[5][n];
   TT<long long, char> d;
 
-  // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l90}}_worker()
+  // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l98}}_worker()
   // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
   // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*,
   // CHECK: store i8* null, i8** [[OMP_WORK_FN]],
@@ -54,7 +62,7 @@
   // CHECK: [[EXIT]]
   // CHECK: ret void
 
-  // CHECK: define {{.*}}void [[T1:@__omp_offloading_.+foo.+l90]]()
+  // CHECK: define {{.*}}void [[T1:@__omp_offloading_.+foo.+l98]]()
   // CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
   // CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
   // CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
@@ -96,7 +104,7 @@
   {
   }
 
-  // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l167}}_worker()
+  // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l175}}_worker()
   // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
   // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*,
   // CHECK: store i8* null, i8** [[OMP_WORK_FN]],
@@ -127,7 +135,7 @@
   // CHECK: [[EXIT]]
   // CHECK: ret void
 
-  // CHECK: define {{.*}}void [[T2:@__omp_offloading_.+foo.+l167]](i[[SZ:32|64]] [[ARG1:%[a-zA-Z_]+]])
+  // CHECK: define {{.*}}void [[T2:@__omp_offloading_.+foo.+l175]](i[[SZ:32|64]] [[ARG1:%[a-zA-Z_]+]])
   // CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]],
   // CHECK: store i[[SZ]] [[ARG1]], i[[SZ]]* [[AA_ADDR]],
   // CHECK: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
@@ -169,7 +177,7 @@
     aa += 1;
   }
 
-  // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l276}}_worker()
+  // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+foo.+l284}}_worker()
   // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
   // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*,
   // CHECK: store i8* null, i8** [[OMP_WORK_FN]],
@@ -200,7 +208,7 @@
   // CHECK: [[EXIT]]
   // CHECK: ret void
 
-  // CHECK: define {{.*}}void [[T3:@__omp_offloading_.+foo.+l276]](i[[SZ]]
+  // CHECK: define {{.*}}void [[T3:@__omp_offloading_.+foo.+l284]](i[[SZ]]
   // Create local storage for each capture.
   // CHECK:    [[LOCAL_A:%.+]] = alloca i[[SZ]]
   // CHECK:    [[LOCAL_B:%.+]] = alloca [10 x float]*
@@ -353,7 +361,7 @@
   return a;
 }
 
-  // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+static.+313}}_worker()
+  // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+static.+321}}_worker()
   // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
   // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*,
   // CHECK: store i8* null, i8** [[OMP_WORK_FN]],
@@ -384,7 +392,7 @@
   // CHECK: [[EXIT]]
   // CHECK: ret void
 
-  // CHECK: define {{.*}}void [[T4:@__omp_offloading_.+static.+l313]](i[[SZ]]
+  // CHECK: define {{.*}}void [[T4:@__omp_offloading_.+static.+l321]](i[[SZ]]
   // Create local storage for each capture.
   // CHECK:  [[LOCAL_A:%.+]] = alloca i[[SZ]]
   // CHECK:  [[LOCAL_AA:%.+]] = alloca i[[SZ]]
@@ -439,7 +447,7 @@
 
 
 
-  // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+S1.+l331}}_worker()
+  // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+S1.+l339}}_worker()
   // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
   // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*,
   // CHECK: store i8* null, i8** [[OMP_WORK_FN]],
@@ -470,7 +478,7 @@
   // CHECK: [[EXIT]]
   // CHECK: ret void
 
-  // CHECK: define {{.*}}void [[T5:@__omp_offloading_.+S1.+l331]](
+  // CHECK: define {{.*}}void [[T5:@__omp_offloading_.+S1.+l339]](
   // Create local storage for each capture.
   // CHECK:       [[LOCAL_THIS:%.+]] = alloca [[S1:%struct.*]]*
   // CHECK:       [[LOCAL_B:%.+]] = alloca i[[SZ]]
@@ -529,7 +537,7 @@
 
 
 
-  // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l296}}_worker()
+  // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l304}}_worker()
   // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
   // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*,
   // CHECK: store i8* null, i8** [[OMP_WORK_FN]],
@@ -560,7 +568,7 @@
   // CHECK: [[EXIT]]
   // CHECK: ret void
 
-  // CHECK: define {{.*}}void [[T6:@__omp_offloading_.+template.+l296]](i[[SZ]]
+  // CHECK: define {{.*}}void [[T6:@__omp_offloading_.+template.+l304]](i[[SZ]]
   // Create local storage for each capture.
   // CHECK:  [[LOCAL_A:%.+]] = alloca i[[SZ]]
   // CHECK:  [[LOCAL_AA:%.+]] = alloca i[[SZ]]
commit	44a87c9f1b330d243eae87c60151711c5b0b486a	[log] [tgz]
author	Arpith Chacko Jacob <acjacob@us.ibm.com>	Wed Jan 18 19:35:00 2017 +0000
committer	Arpith Chacko Jacob <acjacob@us.ibm.com>	Wed Jan 18 19:35:00 2017 +0000
tree	66af7f701bfa080c252bcea9549369c3533c4088
parent	11255ec7659cec933e0b0415c14d50ca80cb16cd [diff] [blame]