[OPENMP][NVPTX] Support memory coalescing for globalized variables.

Added support for memory coalescing for better performance for
globalized variables. From now on all the globalized variables are
represented as arrays of 32 elements and each thread accesses these
elements using `tid & 31` as index.

llvm-svn: 344049
diff --git a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp
index 6925b17..b73b5a8 100644
--- a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp
@@ -597,9 +597,9 @@
   // CHECK: [[OF:%.+]] = mul nuw i[[SZ]] [[NUM_TEAMS]], 1
   // CHECK: [[POS1:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[OF]]
   // CHECK: [[POS2:%.+]] = sub nuw i[[SZ]] [[POS1]], 1
-  // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 256
+  // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 128
   // CHECK: [[POS4:%.+]] = add nuw i[[SZ]] [[POS3]], 1
-  // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 256
+  // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 128
   //
   // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 1
   // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
@@ -643,9 +643,9 @@
   // CHECK: [[OF:%.+]] = mul nuw i[[SZ]] [[NUM_TEAMS]], 1
   // CHECK: [[POS1:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[OF]]
   // CHECK: [[POS2:%.+]] = sub nuw i[[SZ]] [[POS1]], 1
-  // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 256
+  // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 128
   // CHECK: [[POS4:%.+]] = add nuw i[[SZ]] [[POS3]], 1
-  // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 256
+  // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 128
   //
   // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 4, [[TEAM]]
   // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]]
@@ -1024,9 +1024,9 @@
   // CHECK: [[OF:%.+]] = mul nuw i[[SZ]] [[NUM_TEAMS]], 4
   // CHECK: [[POS1:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[OF]]
   // CHECK: [[POS2:%.+]] = sub nuw i[[SZ]] [[POS1]], 1
-  // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 256
+  // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 128
   // CHECK: [[POS4:%.+]] = add nuw i[[SZ]] [[POS3]], 1
-  // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 256
+  // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 128
   //
   // CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 1
   // CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
@@ -1072,9 +1072,9 @@
   // CHECK: [[OF:%.+]] = mul nuw i[[SZ]] [[NUM_TEAMS]], 4
   // CHECK: [[POS1:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[OF]]
   // CHECK: [[POS2:%.+]] = sub nuw i[[SZ]] [[POS1]], 1
-  // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 256
+  // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 128
   // CHECK: [[POS4:%.+]] = add nuw i[[SZ]] [[POS3]], 1
-  // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 256
+  // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 128
   //
   // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 2, [[TEAM]]
   // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]]