[OMPT] Fix thread_num for implicit_task_end callbacks in nested parallel regions

implicit_task_end callbacks in nested parallel regions did not always give the
correct thread_num, since the inner parallel region may have already been
finalized.
Now, the thread_num is stored at the beginning of the implicit task and
retrieved at the end, whenever necessary.

A testcase was added as well.

Differential Revision: https://reviews.llvm.org/D46260

llvm-svn: 331632
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 0e96190..c4e2579 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -1386,6 +1386,8 @@
       ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
           ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
           OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid));
+      OMPT_CUR_TASK_INFO(this_thr)
+          ->thread_num = __kmp_tid_from_gtid(global_tid);
     }
 
     /* OMPT state */
@@ -1551,6 +1553,8 @@
             ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
                 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
                 implicit_task_data, 1, __kmp_tid_from_gtid(gtid));
+            OMPT_CUR_TASK_INFO(master_th)
+                ->thread_num = __kmp_tid_from_gtid(gtid);
           }
 
           /* OMPT state */
@@ -1578,7 +1582,7 @@
           if (ompt_enabled.ompt_callback_implicit_task) {
             ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
                 ompt_scope_end, NULL, implicit_task_data, 1,
-                __kmp_tid_from_gtid(gtid));
+                OMPT_CUR_TASK_INFO(master_th)->thread_num);
           }
           __ompt_lw_taskteam_unlink(master_th);
 
@@ -1762,6 +1766,8 @@
               ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
                   ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
                   &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid));
+              OMPT_CUR_TASK_INFO(master_th)
+                  ->thread_num = __kmp_tid_from_gtid(gtid);
             }
 
             /* OMPT state */
@@ -1789,7 +1795,7 @@
             if (ompt_enabled.ompt_callback_implicit_task) {
               ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
                   ompt_scope_end, NULL, &(task_info->task_data), 1,
-                  __kmp_tid_from_gtid(gtid));
+                  OMPT_CUR_TASK_INFO(master_th)->thread_num);
             }
 
             __ompt_lw_taskteam_unlink(master_th);
@@ -1865,6 +1871,8 @@
               ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
                   ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
                   implicit_task_data, 1, __kmp_tid_from_gtid(gtid));
+              OMPT_CUR_TASK_INFO(master_th)
+                  ->thread_num = __kmp_tid_from_gtid(gtid);
             }
 
             /* OMPT state */
@@ -1891,7 +1899,7 @@
             if (ompt_enabled.ompt_callback_implicit_task) {
               ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
                   ompt_scope_end, NULL, &(task_info->task_data), 1,
-                  __kmp_tid_from_gtid(gtid));
+                  OMPT_CUR_TASK_INFO(master_th)->thread_num);
             }
 
             ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
@@ -2494,7 +2502,7 @@
       int ompt_team_size = team->t.t_nproc;
       ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
           ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
-          __kmp_tid_from_gtid(gtid));
+          OMPT_CUR_TASK_INFO(master_th)->thread_num);
     }
 
     task_info->frame.exit_frame = NULL;
@@ -6919,6 +6927,7 @@
     ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
         ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
         __kmp_tid_from_gtid(gtid));
+    OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
   }
 #endif