Add recursive task scheduling strategy to taskloop implementation Summary: Taskloop implementation is extended by using recursive task scheduling. Envirable KMP_TASKLOOP_MIN_TASKS added as a manual threshold for the user to switch from recursive to linear tasks scheduling. Details: * The calculations for the loop parameters are moved from __kmp_taskloop_linear upper level * Initial calculation is done in the __kmpc_taskloop, further range splitting is done in the __kmp_taskloop_recur. * Added threshold to switch from recursive to linear tasks scheduling; * One half of split range is scheduled as an internal task which just moves sub-range parameters to the stealing thread that continues recursive scheduling (if number of tasks still enough), the other half is processed recursively; * Internal task duplication routine fixed to assign parent task, that was not needed when all tasks were scheduled by same thread, but is needed now. Patch by Andrey Churbanov Differential Revision: https://reviews.llvm.org/D35273 llvm-svn: 308338

commit: 93e17cfe6c2e8e20f99d45241804732a78737baf [log] [tgz]
author: Jonathan Peyton <jonathan.l.peyton@intel.com> Tue Jul 18 18:50:13 2017 +0000
committer: Jonathan Peyton <jonathan.l.peyton@intel.com> Tue Jul 18 18:50:13 2017 +0000
tree: 674194c7c383f1f20131be6cfe0224849d67a03c
parent: d01e956d38b60508cef4851a6668838261e9b700 [diff]
diff --git a/openmp/runtime/test/tasking/kmp_taskloop.c b/openmp/runtime/test/tasking/kmp_taskloop.c
index 3b318bd..4b13793 100644
--- a/openmp/runtime/test/tasking/kmp_taskloop.c
+++ b/openmp/runtime/test/tasking/kmp_taskloop.c

@@ -1,4 +1,5 @@
 // RUN: %libomp-compile-and-run
+// RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run
 #include <stdio.h>
 #include <omp.h>
 #include "omp_my_sleep.h"

diff --git a/openmp/runtime/test/tasking/omp_taskloop_grainsize.c b/openmp/runtime/test/tasking/omp_taskloop_grainsize.c
new file mode 100644
index 0000000..ab5001d
--- /dev/null
+++ b/openmp/runtime/test/tasking/omp_taskloop_grainsize.c

@@ -0,0 +1,106 @@
+// RUN: %libomp-compile-and-run
+// RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run
+/*
+ * Test for taskloop
+ * Method: caculate how many times the iteration space is dispatched
+ *     and judge if each dispatch has the requested grainsize
+ * It is possible for two adjacent chunks are executed by the same thread
+ */
+#include <stdio.h>
+#include <omp.h>
+#include <stdlib.h>
+#include "omp_testsuite.h"
+
+#define CFDMAX_SIZE 1120
+
+int test_omp_taskloop_grainsize()
+{
+  int i, grainsize, count, tmp_count, result, num_off;
+  int *tmp, *tids, *tidsArray;
+
+  tidsArray = (int *)malloc(sizeof(int) * CFDMAX_SIZE);
+  tids = tidsArray;
+
+  for (grainsize = 1; grainsize < 48; ++grainsize) {
+    fprintf(stderr, "Grainsize %d\n", grainsize);
+    count = tmp_count = num_off = 0;
+
+    for (i = 0; i < CFDMAX_SIZE; ++i) {
+      tids[i] = -1;
+    }
+
+    #pragma omp parallel shared(tids)
+    {
+      #pragma omp master
+      #pragma omp taskloop grainsize(grainsize)
+      for (i = 0; i < CFDMAX_SIZE; i++) {
+        tids[i] = omp_get_thread_num();
+      }
+    }
+
+    for (i = 0; i < CFDMAX_SIZE; ++i) {
+      if (tids[i] == -1) {
+        fprintf(stderr, "  Iteration %d not touched!\n", i);
+        result++;
+      }
+    }
+
+    for (i = 0; i < CFDMAX_SIZE - 1; ++i) {
+      if (tids[i] != tids[i + 1]) {
+        count++;
+      }
+    }
+
+    tmp = (int *)malloc(sizeof(int) * (count + 1));
+    tmp[0] = 1;
+
+    for (i = 0; i < CFDMAX_SIZE - 1; ++i) {
+      if (tmp_count > count) {
+        printf("--------------------\nTestinternal Error: List too "
+               "small!!!\n--------------------\n");
+        break;
+      }
+      if (tids[i] != tids[i + 1]) {
+        tmp_count++;
+        tmp[tmp_count] = 1;
+      } else {
+        tmp[tmp_count]++;
+      }
+    }
+
+    // is grainsize statement working?
+    int num_tasks = CFDMAX_SIZE / grainsize;
+    int multiple1 = CFDMAX_SIZE / num_tasks;
+    int multiple2 = CFDMAX_SIZE / num_tasks + 1;
+    for (i = 0; i < count; i++) {
+      // it is possible for 2 adjacent chunks assigned to a same thread
+      if (tmp[i] % multiple1 != 0 && tmp[i] % multiple2 != 0) {
+        num_off++;
+      }
+    }
+
+    if (num_off > 1) {
+      fprintf(stderr, "  The number of bad chunks is %d\n", num_off);
+      result++;
+    } else {
+      fprintf(stderr, "  Everything ok\n");
+    }
+
+    free(tmp);
+  }
+  free(tidsArray);
+  return (result==0);
+}
+
+int main()
+{
+  int i;
+  int num_failed=0;
+
+  for (i = 0; i < REPETITIONS; i++) {
+    if (!test_omp_taskloop_grainsize()) {
+      num_failed++;
+    }
+  }
+  return num_failed;
+}

diff --git a/openmp/runtime/test/tasking/omp_taskloop_num_tasks.c b/openmp/runtime/test/tasking/omp_taskloop_num_tasks.c
new file mode 100644
index 0000000..2a932e3
--- /dev/null
+++ b/openmp/runtime/test/tasking/omp_taskloop_num_tasks.c

@@ -0,0 +1,67 @@
+// RUN: %libomp-compile-and-run
+// RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run
+/*
+ * Test for taskloop
+ * Method: caculate how many times the iteration space is dispatched
+ *     and judge if each dispatch has the requested grainsize
+ * It is possible for two adjacent chunks are executed by the same thread
+ */
+#include <stdio.h>
+#include <omp.h>
+#include <stdlib.h>
+#include "omp_testsuite.h"
+
+#define CFDMAX_SIZE 1120
+
+int test_omp_taskloop_num_tasks()
+{
+  int i;
+  int *tids;
+  int *tidsArray;
+  int count;
+  int result = 0;
+  int num_tasks;
+
+  for (num_tasks = 1; num_tasks < 120; ++num_tasks) {
+    count = 0;
+    tidsArray = (int *)malloc(sizeof(int) * CFDMAX_SIZE);
+    tids = tidsArray;
+
+    #pragma omp parallel shared(tids)
+    {
+      int i;
+      #pragma omp master
+      #pragma omp taskloop num_tasks(num_tasks)
+      for (i = 0; i < CFDMAX_SIZE; i++) {
+        tids[i] = omp_get_thread_num();
+      }
+    }
+
+    for (i = 0; i < CFDMAX_SIZE - 1; ++i) {
+      if (tids[i] != tids[i + 1]) {
+        count++;
+      }
+    }
+
+    if (count > num_tasks) {
+      fprintf(stderr, "counted too many tasks: (wanted %d, got %d)\n",
+              num_tasks, count);
+      result++;
+    }
+  }
+
+  return (result==0);
+}
+
+int main()
+{
+  int i;
+  int num_failed=0;
+
+  for (i = 0; i < REPETITIONS; i++) {
+    if (!test_omp_taskloop_num_tasks()) {
+      num_failed++;
+    }
+  }
+  return num_failed;
+}
commit	93e17cfe6c2e8e20f99d45241804732a78737baf	[log] [tgz]
author	Jonathan Peyton <jonathan.l.peyton@intel.com>	Tue Jul 18 18:50:13 2017 +0000
committer	Jonathan Peyton <jonathan.l.peyton@intel.com>	Tue Jul 18 18:50:13 2017 +0000
tree	674194c7c383f1f20131be6cfe0224849d67a03c
parent	d01e956d38b60508cef4851a6668838261e9b700 [diff]