For your Christmas hacking pleasure.
This release use aligns with Intel(r) Composer XE 2013 SP1 Product Update 2 

New features
* The library can now be built with clang (though wiht some
  limitations since clang does not support 128 bit floats)
* Support for Vtune analysis of load imbalance
* Code contribution from Steven Noonan to build the runtime for ARM*
  architecture processors 
* First implementation of runtime API for OpenMP cancellation

Bug Fixes
* Fixed hang on Windows (only) when using KMP_BLOCKTIME=0

llvm-svn: 197914
diff --git a/openmp/CREDITS.txt b/openmp/CREDITS.txt
index 67b3e9e..c054358 100644
--- a/openmp/CREDITS.txt
+++ b/openmp/CREDITS.txt
@@ -12,3 +12,6 @@
 W: http://openmprtl.org
 D: Created the runtime.
 
+N: Steven Noonan
+E: steven@uplinklabs.net
+D: Patches for the ARM architecture and several inconsistency removal.
diff --git a/openmp/runtime/README.txt b/openmp/runtime/README.txt
index 3880bf0..6ecca7f 100644
--- a/openmp/runtime/README.txt
+++ b/openmp/runtime/README.txt
@@ -74,13 +74,13 @@
 Supported Architectures: IA-32 architecture, Intel(R) 64, and 
 Intel(R) Many Integrated Core Architecture
 
-              -----------------------------------------------------------  
-              |           icc/icl            |           gcc            |
---------------|------------------------------|--------------------------|
-| Linux* OS   |            Yes(1,5)          |         Yes(2,4)         | 
-| OS X*       |            Yes(1,3,4)        |          No              |
-| Windows* OS |            Yes(1,4)          |          No              |
--------------------------------------------------------------------------
+              --------------------------------------------  
+              |   icc/icl     |    gcc      |   clang    |
+--------------|---------------|--------------------------|
+| Linux* OS   |   Yes(1,5)    |  Yes(2,4)   | Yes(4,6,7) |
+| OS X*       |   Yes(1,3,4)  |  No         | Yes(4,6,7) |
+| Windows* OS |   Yes(1,4)    |  No         | No         |
+----------------------------------------------------------
 
 (1) On IA-32 architecture and Intel(R) 64, icc/icl versions 12.x are 
     supported (12.1 is recommended).
@@ -89,6 +89,14 @@
 (4) Intel(R) Many Integrated Core Architecture not supported.
 (5) On Intel(R) Many Integrated Core Architecture, icc/icl versions 13.0 
     or later are required.
+(6) clang version 3.3 is supported.
+(7) clang currently does not offer a software-implemented 128 bit extended 
+    precision type.  Thus, all entry points reliant on this type are removed
+    from the library and cannot be called in the user program.  The following
+    functions are not available:
+    __kmpc_atomic_cmplx16_*
+    __kmpc_atomic_float16_*
+    __kmpc_atomic_*_fp
 
 Front-end Compilers that work with this RTL
 ===========================================
diff --git a/openmp/runtime/doc/Reference.pdf b/openmp/runtime/doc/Reference.pdf
index 60ce400..680f98c 100644
--- a/openmp/runtime/doc/Reference.pdf
+++ b/openmp/runtime/doc/Reference.pdf
Binary files differ
diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports
index cfcbdeb9..779f1d4 100644
--- a/openmp/runtime/src/dllexports
+++ b/openmp/runtime/src/dllexports
@@ -357,6 +357,9 @@
         __kmpc_fork_teams                   241
         __kmpc_omp_task_with_deps           242
         __kmpc_omp_wait_deps                243
+        __kmpc_cancel                       244
+        __kmpc_cancellationpoint            245
+        __kmpc_cancel_barrier               246
     %endif # OMP_40
 %endif
 
@@ -455,6 +458,8 @@
    #omp_curr_proc_bind                      864
     omp_get_num_teams                       865
     omp_get_team_num                        866
+    omp_get_cancellation                    867
+    kmp_get_cancellation_status             868
 %endif # OMP_40
 
 %ifndef stub
diff --git a/openmp/runtime/src/exports_so.txt b/openmp/runtime/src/exports_so.txt
index 4ddf575..9ace78f 100644
--- a/openmp/runtime/src/exports_so.txt
+++ b/openmp/runtime/src/exports_so.txt
@@ -80,4 +80,26 @@
 
 }; # VERSION
 
+# sets up GCC OMP_ version dependency chain
+OMP_1.0 {
+};
+OMP_2.0 {
+} OMP_1.0;
+OMP_3.0 {
+} OMP_2.0;
+OMP_3.1 {
+} OMP_3.0;
+OMP_4.0 {
+} OMP_3.1;
+
+# sets up GCC GOMP_ version dependency chain
+GOMP_1.0 {
+};
+GOMP_2.0 {
+} GOMP_1.0;
+GOMP_3.0 {
+} GOMP_2.0;
+GOMP_4.0 {
+} GOMP_3.0;
+
 # end of file #
diff --git a/openmp/runtime/src/include/40/iomp.h.var b/openmp/runtime/src/include/40/iomp.h.var
index 88b74f3..8aeb38c 100644
--- a/openmp/runtime/src/include/40/iomp.h.var
+++ b/openmp/runtime/src/include/40/iomp.h.var
@@ -82,6 +82,16 @@
     extern void   __KAI_KMPC_CONVENTION  kmp_set_warnings_on(void);
     extern void   __KAI_KMPC_CONVENTION  kmp_set_warnings_off(void);
 
+    /* schedule kind constants */
+    typedef enum kmp_cancel_kind_t {
+        kmp_cancel_parallel  = 1,
+        kmp_cancel_loop = 2,
+        kmp_cancel_sections  = 3,
+        kmp_cancel_taskgroup = 4
+    } kmp_cancel_kind_t;
+
+    extern int    __KAI_KMPC_CONVENTION  kmp_get_cancellation_status(kmp_cancel_kind_t);
+    
 #   undef __KAI_KMPC_CONVENTION
 
     /* Warning:
diff --git a/openmp/runtime/src/include/40/omp.h.var b/openmp/runtime/src/include/40/omp.h.var
index 38400d4..c6dd4cd 100644
--- a/openmp/runtime/src/include/40/omp.h.var
+++ b/openmp/runtime/src/include/40/omp.h.var
@@ -27,30 +27,6 @@
     extern "C" {
 #   endif
 
-#       define omp_set_num_threads          ompc_set_num_threads
-#       define omp_set_dynamic              ompc_set_dynamic
-#       define omp_set_nested               ompc_set_nested
-#       define omp_set_max_active_levels    ompc_set_max_active_levels
-#       define omp_set_schedule             ompc_set_schedule
-#       define omp_get_ancestor_thread_num  ompc_get_ancestor_thread_num
-#       define omp_get_team_size            ompc_get_team_size
-
-
-#       define kmp_set_stacksize            kmpc_set_stacksize
-#       define kmp_set_stacksize_s          kmpc_set_stacksize_s
-#       define kmp_set_blocktime            kmpc_set_blocktime
-#       define kmp_set_library              kmpc_set_library
-#       define kmp_set_defaults             kmpc_set_defaults
-#       define kmp_set_affinity_mask_proc   kmpc_set_affinity_mask_proc
-#       define kmp_unset_affinity_mask_proc kmpc_unset_affinity_mask_proc
-#       define kmp_get_affinity_mask_proc   kmpc_get_affinity_mask_proc
-
-#       define kmp_malloc                   kmpc_malloc
-#       define kmp_calloc                   kmpc_calloc
-#       define kmp_realloc                  kmpc_realloc
-#       define kmp_free                     kmpc_free
-
-
 #   if defined(_WIN32)
 #       define __KAI_KMPC_CONVENTION __cdecl
 #   else
@@ -120,6 +96,7 @@
     extern int  __KAI_KMPC_CONVENTION  omp_get_num_devices (void);
     extern int  __KAI_KMPC_CONVENTION  omp_get_num_teams (void);
     extern int  __KAI_KMPC_CONVENTION  omp_get_team_num (void);
+    extern int  __KAI_KMPC_CONVENTION  omp_get_cancellation (void);
 
 #   include <stdlib.h>
     /* kmp API functions */
diff --git a/openmp/runtime/src/include/40/omp_lib.f.var b/openmp/runtime/src/include/40/omp_lib.f.var
index 0adadb1..fb9b2f2 100644
--- a/openmp/runtime/src/include/40/omp_lib.f.var
+++ b/openmp/runtime/src/include/40/omp_lib.f.var
@@ -32,6 +32,7 @@
         integer, parameter :: kmp_pointer_kind       = int_ptr_kind()
         integer, parameter :: kmp_size_t_kind        = int_ptr_kind()
         integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind()
+        integer, parameter :: kmp_cancel_kind        = omp_integer_kind
 
       end module omp_lib_kinds
 
@@ -56,6 +57,11 @@
         integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3
         integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4
 
+        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1
+        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2
+        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3
+        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4
+
         interface
 
 !         ***
@@ -199,6 +205,11 @@
             integer (kind=omp_integer_kind) omp_get_team_num
           end function omp_get_team_num
 
+          function omp_get_cancellation()
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_cancellation
+          end function omp_get_cancellation
+
           subroutine omp_init_lock(lockvar)
 !DIR$ IF(__INTEL_COMPILER.GE.1400)
 !DIR$ attributes known_intrinsic :: omp_init_lock
@@ -417,6 +428,11 @@
           subroutine kmp_set_warnings_off()
           end subroutine kmp_set_warnings_off
 
+          function kmp_get_cancellation_status(cancelkind)
+            use omp_lib_kinds
+            integer (kind=kmp_cancel_kind) cancelkind
+            logical (kind=omp_logical_kind) kmp_get_cancellation_status
+          end function kmp_get_cancellation_status
         end interface
 
 !dec$ if defined(_WIN32)
@@ -459,6 +475,7 @@
 !dec$ attributes alias:'OMP_GET_NUM_DEVICES' :: omp_get_num_devices
 !dec$ attributes alias:'OMP_GET_NUM_TEAMS' :: omp_get_num_teams
 !dec$ attributes alias:'OMP_GET_TEAM_NUM' :: omp_get_team_num
+!dec$ attributes alias:'OMP_GET_CANCELLATION' :: omp_get_cancellation
 
 !dec$ attributes alias:'omp_init_lock' :: omp_init_lock
 !dec$ attributes alias:'omp_destroy_lock' :: omp_destroy_lock
@@ -498,6 +515,8 @@
 !dec$ attributes alias:'KMP_SET_WARNINGS_ON'::kmp_set_warnings_on
 !dec$ attributes alias:'KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off
 
+!dec$ attributes alias:'KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status
+
 !dec$   else
 
 !***
@@ -531,6 +550,7 @@
 !dec$ attributes alias:'_OMP_GET_NUM_DEVICES' :: omp_get_num_devices
 !dec$ attributes alias:'_OMP_GET_NUM_TEAMS' :: omp_get_num_teams
 !dec$ attributes alias:'_OMP_GET_TEAM_NUM' :: omp_get_team_num
+!dec$ attributes alias:'_OMP_GET_CANCELLATION' :: omp_get_cancellation
 
 !dec$ attributes alias:'_omp_init_lock' :: omp_init_lock
 !dec$ attributes alias:'_omp_destroy_lock' :: omp_destroy_lock
@@ -570,6 +590,8 @@
 !dec$ attributes alias:'_KMP_SET_WARNINGS_ON'::kmp_set_warnings_on
 !dec$ attributes alias:'_KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off
 
+!dec$ attributes alias:'_KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status
+
 !dec$   endif
 !dec$ endif
 
@@ -606,6 +628,7 @@
 !dec$ attributes alias:'omp_get_num_devices_'::omp_get_num_devices
 !dec$ attributes alias:'omp_get_num_teams_'::omp_get_num_teams
 !dec$ attributes alias:'omp_get_team_num_'::omp_get_team_num
+!dec$ attributes alias:'omp_get_cancellation_'::omp_get_cancellation
 
 !dec$ attributes alias:'omp_init_lock_'::omp_init_lock
 !dec$ attributes alias:'omp_destroy_lock_'::omp_destroy_lock
@@ -644,6 +667,7 @@
 
 !dec$ attributes alias:'kmp_set_warnings_on_'::kmp_set_warnings_on
 !dec$ attributes alias:'kmp_set_warnings_off_'::kmp_set_warnings_off
+!dec$ attributes alias:'kmp_get_cancellation_status_'::kmp_get_cancellation_status
 
 !dec$ endif
 
@@ -678,6 +702,7 @@
 !dec$ attributes alias:'_omp_get_wtick_'::omp_get_wtick
 !dec$ attributes alias:'_omp_get_num_teams_'::omp_get_num_teams
 !dec$ attributes alias:'_omp_get_team_num_'::omp_get_team_num
+!dec$ attributes alias:'_omp_get_cancellation_'::omp_get_cancellation
 
 !dec$ attributes alias:'_omp_init_lock_'::omp_init_lock
 !dec$ attributes alias:'_omp_destroy_lock_'::omp_destroy_lock
@@ -717,6 +742,8 @@
 !dec$ attributes alias:'_kmp_set_warnings_on_'::kmp_set_warnings_on
 !dec$ attributes alias:'_kmp_set_warnings_off_'::kmp_set_warnings_off
 
+!dec$ attributes alias:'_kmp_get_cancellation_status_'::kmp_get_cancellation_status
+
 !dec$ endif
 
       end module omp_lib
diff --git a/openmp/runtime/src/include/40/omp_lib.f90.var b/openmp/runtime/src/include/40/omp_lib.f90.var
index 5cac259..f785352 100644
--- a/openmp/runtime/src/include/40/omp_lib.f90.var
+++ b/openmp/runtime/src/include/40/omp_lib.f90.var
@@ -28,6 +28,7 @@
         integer, parameter :: kmp_pointer_kind       = c_intptr_t
         integer, parameter :: kmp_size_t_kind        = c_size_t
         integer, parameter :: kmp_affinity_mask_kind = c_intptr_t
+        integer, parameter :: kmp_cancel_kind        = omp_integer_kind
 
       end module omp_lib_kinds
 
@@ -47,12 +48,18 @@
         integer(kind=omp_sched_kind), parameter :: omp_sched_guided  = 3
         integer(kind=omp_sched_kind), parameter :: omp_sched_auto    = 4
 
+
         integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0
         integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1
         integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2
         integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3
         integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4
 
+        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1
+        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2
+        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3
+        integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4
+
         interface
 
 !         ***
@@ -198,6 +205,11 @@
             integer (kind=omp_integer_kind) omp_get_team_num
           end function omp_get_team_num
 
+          function omp_get_cancellation() bind(c)
+            use omp_lib_kinds
+            integer (kind=omp_integer_kind) omp_get_cancellation
+          end function omp_get_cancellation
+
           subroutine omp_init_lock(lockvar) bind(c)
 !DIR$ IF(__INTEL_COMPILER.GE.1400)
 !DIR$ attributes known_intrinsic :: omp_init_lock
@@ -417,6 +429,12 @@
           subroutine kmp_set_warnings_off() bind(c)
           end subroutine kmp_set_warnings_off
 
+          function kmp_get_cancellation_status(cancelkind) bind(c)
+            use omp_lib_kinds
+            integer (kind=kmp_cancel_kind), value :: cancelkind
+            logical (kind=omp_logical_kind) kmp_get_cancellation_status
+          end function kmp_get_cancellation_status
+
         end interface
 
       end module omp_lib
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 7117571..37c7f41 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -1,8 +1,8 @@
 /*! \file */
 /*
  * kmp.h -- KPTS runtime header file.
- * $Revision: 42642 $
- * $Date: 2013-09-06 01:57:24 -0500 (Fri, 06 Sep 2013) $
+ * $Revision: 42816 $
+ * $Date: 2013-11-11 15:33:37 -0600 (Mon, 11 Nov 2013) $
  */
 
 
@@ -26,10 +26,6 @@
 */
 //#define FIX_SGI_CLOCK
 
-#if defined( __GNUC__ ) && !defined( __INTEL_COMPILER )
-typedef __float128 _Quad;
-#endif
-
 /* Defines for OpenMP 3.0 tasking and auto scheduling */
 
 #if OMP_30_ENABLED
@@ -81,9 +77,12 @@
 
 #include <errno.h>
 
-#include <xmmintrin.h>
-
 #include "kmp_os.h"
+
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+#include <xmmintrin.h>
+#endif
+
 #include "kmp_version.h"
 #include "kmp_debug.h"
 #include "kmp_lock.h"
@@ -188,7 +187,7 @@
                             /*  contextual information. */
 #endif /* USE_ITT_BUILD */
     kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for C++  */
-    char     *psource;      /**< String describing the source location.
+    char const *psource;    /**< String describing the source location.
                             The string is composed of semi-colon separated fields which describe the source file,
                             the function and a pair of line numbers that delimit the construct.
                              */
@@ -231,6 +230,13 @@
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
+#define KMP_MAX( x, y ) ( (x) > (y) ? (x) : (y) )
+#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+
 /* Enumeration types */
 
 enum kmp_state_timer {
@@ -752,6 +758,16 @@
 
 #endif /* OMP_40_ENABLED */
 
+#if OMP_40_ENABLED
+typedef enum kmp_cancel_kind_t {
+    cancel_noreq = 0,
+    cancel_parallel = 1,
+    cancel_loop = 2,
+    cancel_sections = 3,
+    cancel_taskgroup = 4
+} kmp_cancel_kind_t;
+#endif // OMP_40_ENABLED
+
 #if KMP_MIC
 extern unsigned int __kmp_place_num_cores;
 extern unsigned int __kmp_place_num_threads_per_core;
@@ -777,7 +793,7 @@
 #define __kmp_entry_gtid()             __kmp_get_global_thread_id_reg()
 
 #define __kmp_tid_from_gtid(gtid)     ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), \
-                                        /*(__kmp_threads[ (gtid) ]->th.th_team_serialized) ? 0 : /* TODO remove this check, it is redundant */ \
+                                        /*(__kmp_threads[ (gtid) ]->th.th_team_serialized) ? 0 : */ /* TODO remove this check, it is redundant */ \
                                         __kmp_threads[ (gtid) ]->th.th_info.ds.ds_tid )
 
 #define __kmp_get_tid()               ( __kmp_tid_from_gtid( __kmp_get_gtid() ) )
@@ -1078,14 +1094,6 @@
 #endif /* BUILD_TV */
 
 /* ------------------------------------------------------------------------ */
-// Some forward declarations.
-
-typedef union  kmp_team      kmp_team_t;
-typedef struct kmp_taskdata  kmp_taskdata_t;
-typedef union  kmp_task_team kmp_task_team_t;
-typedef union  kmp_team      kmp_team_p;
-typedef union  kmp_info      kmp_info_p;
-typedef union  kmp_root      kmp_root_p;
 
 #if USE_ITT_BUILD
 // We cannot include "kmp_itt.h" due to circular dependency. Declare the only required type here.
@@ -1883,8 +1891,12 @@
     void *              shareds;            /**< pointer to block of pointers to shared vars   */
     kmp_routine_entry_t routine;            /**< pointer to routine to call for executing task */
     kmp_int32           part_id;            /**< part id for the task                          */
+#if OMP_40_ENABLED
+    kmp_routine_entry_t destructors;        /* pointer to function to invoke deconstructors of firstprivate C++ objects */
+#endif // OMP_40_ENABLED
     /*  private vars  */
 } kmp_task_t;
+
 /*!
 @}
 */
@@ -1892,6 +1904,7 @@
 #if OMP_40_ENABLED
 typedef struct kmp_taskgroup {
     kmp_uint32            count;   // number of allocated and not yet complete tasks
+    kmp_int32             cancel_request; // request for cancellation of this taskgroup
     struct kmp_taskgroup *parent;  // parent taskgroup
 } kmp_taskgroup_t;
 
@@ -1974,7 +1987,12 @@
     unsigned tiedness    : 1;               /* task is either tied (1) or untied (0) */
     unsigned final       : 1;               /* task is final(1) so execute immediately */
     unsigned merged_if0  : 1;               /* no __kmpc_task_{begin/complete}_if0 calls in if0 code path */
-    unsigned reserved13  : 13;              /* reserved for compiler use */
+#if OMP_40_ENABLED
+    unsigned destructors_thunk : 1;         /* set if the compiler creates a thunk to invoke destructors from the runtime */
+    unsigned reserved    : 12;              /* reserved for compiler use */
+#else // OMP_40_ENABLED
+    unsigned reserved    : 13;              /* reserved for compiler use */
+#endif // OMP_40_ENABLED
 
     /* Library flags */                     /* Total library flags must be 16 bits */
     unsigned tasktype    : 1;               /* task is either explicit(1) or implicit (0) */
@@ -2014,7 +2032,11 @@
     kmp_dephash_t *         td_dephash;           // Dependencies for children tasks are tracked from here
     kmp_depnode_t *         td_depnode;           // Pointer to graph node if this task has dependencies
 #endif
+#if KMP_HAVE_QUAD
     _Quad                   td_dummy;             // Align structure 16-byte size since allocated just before kmp_task_t
+#else
+    kmp_uint32              td_dummy[2];
+#endif
 }; // struct kmp_taskdata
 
 // Make sure padding above worked
@@ -2121,6 +2143,8 @@
     int               th_team_bt_intervals;
     int               th_team_bt_set;
 
+    kmp_internal_control_t  th_fixed_icvs;            /* Initial ICVs for the thread */
+
 
 #if KMP_OS_WINDOWS || KMP_OS_LINUX
     kmp_affin_mask_t  *th_affin_mask; /* thread's current affinity mask */
@@ -2142,6 +2166,7 @@
 # endif
 #endif
 #if USE_ITT_BUILD
+    kmp_uint64              th_bar_arrive_time;           /* arrival to barrier timestamp */
     kmp_uint64              th_frame_time;                /* frame timestamp */
     kmp_uint64              th_frame_time_serialized;     /* frame timestamp in serialized parallel */
 #endif /* USE_ITT_BUILD */
@@ -2328,15 +2353,6 @@
     kmp_uint32               t_mxcsr;
 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
 
-#if KMP_BARRIER_ICV_PULL
-   //
-   // Note: Putting ICV's before the fp control info causes a very slight
-   // ~1% improvement for EPCC parallel on fxe256lin01 / 256 threads, but
-   // causes a 17% regression on fxe64lin01 / 64 threads.
-   //
-   kmp_internal_control_t    t_initial_icvs;
-#endif // KMP_BARRIER_ICV_PULL
-
 #if (KMP_PERF_V106 == KMP_ON)
     void                    *t_inline_argv[ KMP_INLINE_ARGV_ENTRIES ];
 #endif
@@ -2398,6 +2414,9 @@
 
     kmp_internal_control_t  *t_control_stack_top;  /* internal control stack for additional nested teams.
                                                       for SERIALIZED teams nested 2 or more levels deep */
+#if OMP_40_ENABLED
+    kmp_int32                t_cancel_request; /* typed flag to store request state of cancellation */
+#endif
 
     int                      t_master_active;/* save on fork, restore on join */
     kmp_taskq_t              t_taskq;        /* this team's task queue */
@@ -2479,8 +2498,6 @@
 #if USE_ITT_BUILD
 extern int      __kmp_forkjoin_frames;
 extern int      __kmp_forkjoin_frames_mode;
-extern FILE *        __kmp_itt_csv_file;
-extern kmp_str_buf_t __kmp_itt_frame_buffer;
 #endif
 extern PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method;
 extern int      __kmp_determ_red;
@@ -2526,9 +2543,6 @@
 extern int      __kmp_storage_map_verbose_specified;
 
 extern kmp_cpuinfo_t    __kmp_cpuinfo;
-extern kmp_uint64       __kmp_cpu_frequency;
-    // CPU frequency, in Hz. Set by __kmp_runtime_initialize(). 0 means "is not set yet",
-    // ~ 0 signals an errror.
 
 extern volatile int __kmp_init_serial;
 extern volatile int __kmp_init_gtid;
@@ -2678,13 +2692,13 @@
 # endif /* USE_LOAD_BALANCE */
 
 // OpenMP 3.1 - Nested num threads array
-struct kmp_nested_nthreads_t {
+typedef struct kmp_nested_nthreads_t {
     int * nth;
     int   size;
     int   used;
-};
+} kmp_nested_nthreads_t;
 
-extern struct kmp_nested_nthreads_t __kmp_nested_nth;
+extern kmp_nested_nthreads_t __kmp_nested_nth;
 
 #if KMP_USE_ADAPTIVE_LOCKS
 
@@ -2707,6 +2721,7 @@
 #if OMP_40_ENABLED
 extern int __kmp_display_env;           /* TRUE or FALSE */
 extern int __kmp_display_env_verbose;   /* TRUE if OMP_DISPLAY_ENV=VERBOSE */
+extern int __kmp_omp_cancellation;      /* TRUE or FALSE */
 #endif
 
 /* ------------------------------------------------------------------------- */
@@ -2796,7 +2811,7 @@
 extern void __kmp_set_num_threads( int new_nth, int gtid );
 
 // Returns current thread (pointer to kmp_info_t). Current thread *must* be registered.
-inline kmp_info_t * __kmp_entry_thread()
+static inline kmp_info_t * __kmp_entry_thread()
 {
       int gtid = __kmp_entry_gtid();
 
@@ -2976,11 +2991,11 @@
 
 #endif /* KMP_OS_LINUX || KMP_OS_WINDOWS */
 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
 
 extern int __kmp_futex_determine_capable( void );
 
-#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
 
 extern void __kmp_gtid_set_specific( int gtid );
 extern int  __kmp_gtid_get_specific( void );
@@ -3067,7 +3082,7 @@
 extern int __kmp_fork_call( ident_t *loc, int gtid, int exec_master,
   kmp_int32 argc, microtask_t microtask, launch_t invoker,
 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
-#if KMP_ARCH_X86_64 && KMP_OS_LINUX
+#if (KMP_ARCH_ARM || KMP_ARCH_X86_64) && KMP_OS_LINUX
                              va_list *ap
 #else
                              va_list ap
@@ -3120,7 +3135,7 @@
 #if USE_ITT_BUILD
                                  void * itt_sync_obj,
 #endif /* USE_ITT_BUILD */
-                                 int c = 0 );
+                                 int c );
 extern void __kmp_reap_task_teams( void );
 extern void __kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread );
 extern void __kmp_wait_to_unref_task_teams( void );
@@ -3138,6 +3153,9 @@
 extern int  __kmp_is_address_mapped( void *addr );
 extern kmp_uint64 __kmp_hardware_timestamp(void);
 
+#if KMP_OS_UNIX
+extern int  __kmp_read_from_file( char const *path, char const *format, ... );
+#endif
 
 /* ------------------------------------------------------------------------ */
 //
@@ -3148,7 +3166,7 @@
 
 extern void       __kmp_query_cpuid( kmp_cpuinfo_t *p );
 
-static inline void __kmp_load_mxcsr ( kmp_uint32 *p ) { _mm_setcsr( *p ); }
+#define __kmp_load_mxcsr(p) _mm_setcsr(*(p))
 static inline void __kmp_store_mxcsr( kmp_uint32 *p ) { *p = _mm_getcsr(); }
 
 extern void __kmp_load_x87_fpu_control_word( kmp_int16 *p );
@@ -3258,8 +3276,8 @@
 #endif // OMP_30_ENABLED
 
 #if OMP_40_ENABLED
-KMP_EXPORT void __kmpc_taskgroup( ident* loc, int gtid );
-KMP_EXPORT void __kmpc_end_taskgroup( ident* loc, int gtid );
+KMP_EXPORT void __kmpc_taskgroup( ident_t * loc, int gtid );
+KMP_EXPORT void __kmpc_end_taskgroup( ident_t * loc, int gtid );
 
 KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task,
                                                  kmp_int32 ndeps, kmp_depend_info_t *dep_list,
@@ -3270,6 +3288,13 @@
 
 #endif
 
+#if OMP_40_ENABLED
+KMP_EXPORT kmp_int32 __kmpc_cancel(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind);
+KMP_EXPORT kmp_int32 __kmpc_cancellationpoint(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind);
+KMP_EXPORT kmp_int32 __kmpc_cancel_barrier(ident_t* loc_ref, kmp_int32 gtid);
+KMP_EXPORT int __kmp_get_cancellation_status(int cancel_kind);
+#endif
+
 /*
  * Lock interface routines (fast versions with gtid passed in)
  */
@@ -3355,6 +3380,42 @@
 struct private_common *
 kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size );
 
+//
+// ompc_, kmpc_ entries moved from omp.h.
+//
+#if KMP_OS_WINDOWS
+#   define KMPC_CONVENTION __cdecl
+#else
+#   define KMPC_CONVENTION
+#endif
+
+#if OMP_30_ENABLED
+
+#ifndef __OMP_H
+typedef enum omp_sched_t {
+    omp_sched_static  = 1,
+    omp_sched_dynamic = 2,
+    omp_sched_guided  = 3,
+    omp_sched_auto    = 4
+} omp_sched_t;
+typedef void * kmp_affinity_mask_t;
+#endif
+
+KMP_EXPORT void KMPC_CONVENTION ompc_set_max_active_levels(int);
+KMP_EXPORT void KMPC_CONVENTION ompc_set_schedule(omp_sched_t, int);
+KMP_EXPORT int  KMPC_CONVENTION ompc_get_ancestor_thread_num(int);
+KMP_EXPORT int  KMPC_CONVENTION ompc_get_team_size(int);
+KMP_EXPORT int  KMPC_CONVENTION kmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *);
+KMP_EXPORT int  KMPC_CONVENTION kmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *);
+KMP_EXPORT int  KMPC_CONVENTION kmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *);
+
+#endif // OMP_30_ENABLED
+
+KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize(int);
+KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize_s(size_t);
+KMP_EXPORT void KMPC_CONVENTION kmpc_set_library(int);
+KMP_EXPORT void KMPC_CONVENTION kmpc_set_defaults(char const *);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp
index 0840fa3..644251d 100644
--- a/openmp/runtime/src/kmp_affinity.cpp
+++ b/openmp/runtime/src/kmp_affinity.cpp
@@ -1,7 +1,7 @@
 /*
  * kmp_affinity.cpp -- affinity management
- * $Revision: 42613 $
- * $Date: 2013-08-23 13:29:50 -0500 (Fri, 23 Aug 2013) $
+ * $Revision: 42810 $
+ * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
  */
 
 
@@ -1885,7 +1885,19 @@
                 if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
                 if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
                 threadInfo[num_avail][osIdIndex] = val;
+#if KMP_OS_LINUX && USE_SYSFS_INFO
+                char path[256];
+                snprintf(path, sizeof(path),
+                    "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
+                    threadInfo[num_avail][osIdIndex]);
+                __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
+
+                snprintf(path, sizeof(path),
+                    "/sys/devices/system/cpu/cpu%u/topology/core_id",
+                    threadInfo[num_avail][osIdIndex]);
+                __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
                 continue;
+#else
             }
             char s2[] = "physical id";
             if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
@@ -1906,6 +1918,7 @@
                 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
                 threadInfo[num_avail][coreIdIndex] = val;
                 continue;
+#endif // KMP_OS_LINUX && USE_SYSFS_INFO
             }
             char s4[] = "thread id";
             if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
@@ -3058,8 +3071,6 @@
     int setSize = 0;
 
     for (;;) {
-        int start, count, stride;
-
         __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
 
         //
@@ -3090,7 +3101,7 @@
           "bad explicit places list");
         next = scan;
         SKIP_DIGITS(next);
-        count = __kmp_str_to_int(scan, *next);
+        int count = __kmp_str_to_int(scan, *next);
         KMP_ASSERT(count >= 0);
         scan = next;
 
@@ -3112,7 +3123,7 @@
                     // Use a temp var in case macro is changed to evaluate
                     // args multiple times.
                     //
-                    if (KMP_CPU_ISSET(j - stride, tempMask)) {
+                    if (KMP_CPU_ISSET(j - 1, tempMask)) {
                         KMP_CPU_SET(j, tempMask);
                         setSize++;
                     }
@@ -3159,7 +3170,7 @@
           "bad explicit places list");
         next = scan;
         SKIP_DIGITS(next);
-        stride = __kmp_str_to_int(scan, *next);
+        int stride = __kmp_str_to_int(scan, *next);
         KMP_DEBUG_ASSERT(stride >= 0);
         scan = next;
         stride *= sign;
diff --git a/openmp/runtime/src/kmp_alloc.c b/openmp/runtime/src/kmp_alloc.c
index 30ab4bd..885754f 100644
--- a/openmp/runtime/src/kmp_alloc.c
+++ b/openmp/runtime/src/kmp_alloc.c
@@ -1,7 +1,7 @@
 /*
  * kmp_alloc.c -- private/shared dyanmic memory allocation and management
- * $Revision: 42613 $
- * $Date: 2013-08-23 13:29:50 -0500 (Fri, 23 Aug 2013) $
+ * $Revision: 42810 $
+ * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
  */
 
 
@@ -31,7 +31,7 @@
 /* NOTE: bufsize must be a signed datatype */
 
 #if KMP_OS_WINDOWS
-# if KMP_ARCH_X86
+# if KMP_ARCH_X86 || KMP_ARCH_ARM
    typedef kmp_int32 bufsize;
 # else
    typedef kmp_int64 bufsize;
@@ -74,7 +74,7 @@
                                          malloc() does not
                                          ensure 16 byte alignmnent */
 
-#if KMP_ARCH_X86
+#if KMP_ARCH_X86 || !KMP_HAVE_QUAD
 
 #define SizeQuant   8
 #define AlignType   double
diff --git a/openmp/runtime/src/kmp_atomic.c b/openmp/runtime/src/kmp_atomic.c
index 547aad5..3e9c82f 100644
--- a/openmp/runtime/src/kmp_atomic.c
+++ b/openmp/runtime/src/kmp_atomic.c
@@ -1,7 +1,7 @@
 /*
  * kmp_atomic.c -- ATOMIC implementation routines
- * $Revision: 42582 $
- * $Date: 2013-08-09 06:30:22 -0500 (Fri, 09 Aug 2013) $
+ * $Revision: 42810 $
+ * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
  */
 
 
@@ -574,7 +574,7 @@
 */
 #define KMP_ATOMIC_VOLATILE volatile
 
-#if ( KMP_ARCH_X86 )
+#if ( KMP_ARCH_X86 ) && KMP_HAVE_QUAD
 
     static inline void operator +=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q += rhs.q; };
     static inline void operator -=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q -= rhs.q; };
@@ -608,7 +608,7 @@
 /* ------------------------------------------------------------------------ */
 
 // All routines declarations looks like
-// void __kmpc_atomic_RTYPE_OP( ident_t*, int*, TYPE *lhs, TYPE rhs );
+// void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
 // ------------------------------------------------------------------------
 
 #define KMP_CHECK_GTID                                                    \
@@ -721,6 +721,7 @@
         }                                                                 \
     }
 
+#if USE_CMPXCHG_FIX
 // 2007-06-25:
 // workaround for C78287 (complex(kind=4) data type)
 // lin_32, lin_32e, win_32 and win_32e are affected (I verified the asm)
@@ -751,6 +752,7 @@
         }                                                                 \
     }
 // end of the first part of the workaround for C78287
+#endif // USE_CMPXCHG_FIX
 
 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
 
@@ -775,6 +777,7 @@
     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                      \
     OP_CMPXCHG(TYPE,BITS,OP)                                               \
 }
+#if USE_CMPXCHG_FIX
 // -------------------------------------------------------------------------
 // workaround for C78287 (complex(kind=4) data type)
 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG)   \
@@ -783,6 +786,7 @@
     OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP)                                               \
 }
 // end of the second part of the workaround for C78287
+#endif
 
 #else
 // -------------------------------------------------------------------------
@@ -820,6 +824,7 @@
         OP_CRITICAL(OP##=,LCK_ID)  /* unaligned address - use critical */  \
     }                                                                      \
 }
+#if USE_CMPXCHG_FIX
 // -------------------------------------------------------------------------
 // workaround for C78287 (complex(kind=4) data type)
 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG)   \
@@ -833,6 +838,7 @@
     }                                                                                 \
 }
 // end of the second part of the workaround for C78287
+#endif // USE_CMPXCHG_FIX
 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
 
 // Routines for ATOMIC 4-byte operands addition and subtraction
@@ -1068,12 +1074,14 @@
 MIN_MAX_COMPXCHG( float4,  min, kmp_real32, 32, >, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min
 MIN_MAX_COMPXCHG( float8,  max, kmp_real64, 64, <, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max
 MIN_MAX_COMPXCHG( float8,  min, kmp_real64, 64, >, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min
+#if KMP_HAVE_QUAD
 MIN_MAX_CRITICAL( float16, max,     QUAD_LEGACY,      <, 16r,   1 )            // __kmpc_atomic_float16_max
 MIN_MAX_CRITICAL( float16, min,     QUAD_LEGACY,      >, 16r,   1 )            // __kmpc_atomic_float16_min
 #if ( KMP_ARCH_X86 )
     MIN_MAX_CRITICAL( float16, max_a16, Quad_a16_t,     <, 16r,   1 )            // __kmpc_atomic_float16_max_a16
     MIN_MAX_CRITICAL( float16, min_a16, Quad_a16_t,     >, 16r,   1 )            // __kmpc_atomic_float16_min_a16
 #endif
+#endif
 // ------------------------------------------------------------------------
 // Need separate macros for .EQV. because of the need of complement (~)
 // OP ignored for critical sections, ^=~ used instead
@@ -1135,6 +1143,7 @@
 ATOMIC_CRITICAL( float10, sub, long double,     -, 10r,   1 )            // __kmpc_atomic_float10_sub
 ATOMIC_CRITICAL( float10, mul, long double,     *, 10r,   1 )            // __kmpc_atomic_float10_mul
 ATOMIC_CRITICAL( float10, div, long double,     /, 10r,   1 )            // __kmpc_atomic_float10_div
+#if KMP_HAVE_QUAD
 // routines for _Quad type
 ATOMIC_CRITICAL( float16, add, QUAD_LEGACY,     +, 16r,   1 )            // __kmpc_atomic_float16_add
 ATOMIC_CRITICAL( float16, sub, QUAD_LEGACY,     -, 16r,   1 )            // __kmpc_atomic_float16_sub
@@ -1146,14 +1155,22 @@
     ATOMIC_CRITICAL( float16, mul_a16, Quad_a16_t, *, 16r, 1 )           // __kmpc_atomic_float16_mul_a16
     ATOMIC_CRITICAL( float16, div_a16, Quad_a16_t, /, 16r, 1 )           // __kmpc_atomic_float16_div_a16
 #endif
+#endif
 // routines for complex types
 
+#if USE_CMPXCHG_FIX
 // workaround for C78287 (complex(kind=4) data type)
 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1 )   // __kmpc_atomic_cmplx4_add
 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1 )   // __kmpc_atomic_cmplx4_sub
 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1 )   // __kmpc_atomic_cmplx4_mul
 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1 )   // __kmpc_atomic_cmplx4_div
 // end of the workaround for C78287
+#else
+ATOMIC_CRITICAL( cmplx4,  add, kmp_cmplx32,     +,  8c,   1 )            // __kmpc_atomic_cmplx4_add
+ATOMIC_CRITICAL( cmplx4,  sub, kmp_cmplx32,     -,  8c,   1 )            // __kmpc_atomic_cmplx4_sub
+ATOMIC_CRITICAL( cmplx4,  mul, kmp_cmplx32,     *,  8c,   1 )            // __kmpc_atomic_cmplx4_mul
+ATOMIC_CRITICAL( cmplx4,  div, kmp_cmplx32,     /,  8c,   1 )            // __kmpc_atomic_cmplx4_div
+#endif // USE_CMPXCHG_FIX
 
 ATOMIC_CRITICAL( cmplx8,  add, kmp_cmplx64,     +, 16c,   1 )            // __kmpc_atomic_cmplx8_add
 ATOMIC_CRITICAL( cmplx8,  sub, kmp_cmplx64,     -, 16c,   1 )            // __kmpc_atomic_cmplx8_sub
@@ -1163,6 +1180,7 @@
 ATOMIC_CRITICAL( cmplx10, sub, kmp_cmplx80,     -, 20c,   1 )            // __kmpc_atomic_cmplx10_sub
 ATOMIC_CRITICAL( cmplx10, mul, kmp_cmplx80,     *, 20c,   1 )            // __kmpc_atomic_cmplx10_mul
 ATOMIC_CRITICAL( cmplx10, div, kmp_cmplx80,     /, 20c,   1 )            // __kmpc_atomic_cmplx10_div
+#if KMP_HAVE_QUAD
 ATOMIC_CRITICAL( cmplx16, add, CPLX128_LEG,     +, 32c,   1 )            // __kmpc_atomic_cmplx16_add
 ATOMIC_CRITICAL( cmplx16, sub, CPLX128_LEG,     -, 32c,   1 )            // __kmpc_atomic_cmplx16_sub
 ATOMIC_CRITICAL( cmplx16, mul, CPLX128_LEG,     *, 32c,   1 )            // __kmpc_atomic_cmplx16_mul
@@ -1173,6 +1191,7 @@
     ATOMIC_CRITICAL( cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1 )   // __kmpc_atomic_cmplx16_mul_a16
     ATOMIC_CRITICAL( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 )   // __kmpc_atomic_cmplx16_div_a16
 #endif
+#endif
 
 #if OMP_40_ENABLED
 
@@ -1312,6 +1331,7 @@
 // routines for long double type
 ATOMIC_CRITICAL_REV( float10, sub, long double,     -, 10r,   1 )            // __kmpc_atomic_float10_sub_rev
 ATOMIC_CRITICAL_REV( float10, div, long double,     /, 10r,   1 )            // __kmpc_atomic_float10_div_rev
+#if KMP_HAVE_QUAD
 // routines for _Quad type
 ATOMIC_CRITICAL_REV( float16, sub, QUAD_LEGACY,     -, 16r,   1 )            // __kmpc_atomic_float16_sub_rev
 ATOMIC_CRITICAL_REV( float16, div, QUAD_LEGACY,     /, 16r,   1 )            // __kmpc_atomic_float16_div_rev
@@ -1319,6 +1339,7 @@
     ATOMIC_CRITICAL_REV( float16, sub_a16, Quad_a16_t, -, 16r, 1 )           // __kmpc_atomic_float16_sub_a16_rev
     ATOMIC_CRITICAL_REV( float16, div_a16, Quad_a16_t, /, 16r, 1 )           // __kmpc_atomic_float16_div_a16_rev
 #endif
+#endif
 
 // routines for complex types
 ATOMIC_CRITICAL_REV( cmplx4,  sub, kmp_cmplx32,     -, 8c,    1 )            // __kmpc_atomic_cmplx4_sub_rev
@@ -1327,12 +1348,14 @@
 ATOMIC_CRITICAL_REV( cmplx8,  div, kmp_cmplx64,     /, 16c,   1 )            // __kmpc_atomic_cmplx8_div_rev
 ATOMIC_CRITICAL_REV( cmplx10, sub, kmp_cmplx80,     -, 20c,   1 )            // __kmpc_atomic_cmplx10_sub_rev
 ATOMIC_CRITICAL_REV( cmplx10, div, kmp_cmplx80,     /, 20c,   1 )            // __kmpc_atomic_cmplx10_div_rev
+#if KMP_HAVE_QUAD
 ATOMIC_CRITICAL_REV( cmplx16, sub, CPLX128_LEG,     -, 32c,   1 )            // __kmpc_atomic_cmplx16_sub_rev
 ATOMIC_CRITICAL_REV( cmplx16, div, CPLX128_LEG,     /, 32c,   1 )            // __kmpc_atomic_cmplx16_div_rev
 #if ( KMP_ARCH_X86 )
     ATOMIC_CRITICAL_REV( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 )   // __kmpc_atomic_cmplx16_sub_a16_rev
     ATOMIC_CRITICAL_REV( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 )   // __kmpc_atomic_cmplx16_div_a16_rev
 #endif
+#endif
 
 
 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
@@ -1405,7 +1428,7 @@
 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_float8
 
 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them)
-
+#if KMP_HAVE_QUAD
 ATOMIC_CMPXCHG_MIX( fixed1,  char,       add,  8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_fp
 ATOMIC_CMPXCHG_MIX( fixed1,  char,       sub,  8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_fp
 ATOMIC_CMPXCHG_MIX( fixed1,  char,       mul,  8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_fp
@@ -1444,10 +1467,12 @@
 ATOMIC_CRITICAL_FP( float10, long double,    sub, -, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_sub_fp
 ATOMIC_CRITICAL_FP( float10, long double,    mul, *, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_mul_fp
 ATOMIC_CRITICAL_FP( float10, long double,    div, /, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_div_fp
+#endif
 
 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
 // ------------------------------------------------------------------------
 // X86 or X86_64: no alignment problems ====================================
+#if USE_CMPXCHG_FIX
 // workaround for C78287 (complex(kind=4) data type)
 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                           \
@@ -1456,6 +1481,13 @@
 }
 // end of the second part of the workaround for C78287
 #else
+#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
+ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                           \
+    OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                         \
+    OP_CMPXCHG(TYPE,BITS,OP)                                                                  \
+}
+#endif // USE_CMPXCHG_FIX
+#else
 // ------------------------------------------------------------------------
 // Code for other architectures that don't handle unaligned accesses.
 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
@@ -1624,7 +1656,9 @@
 ATOMIC_CMPXCHG_READ( fixed2,  rd, kmp_int16,  16, +,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_rd
 
 ATOMIC_CRITICAL_READ( float10, rd, long double, +, 10r,   1 )         // __kmpc_atomic_float10_rd
+#if KMP_HAVE_QUAD
 ATOMIC_CRITICAL_READ( float16, rd, QUAD_LEGACY, +, 16r,   1 )         // __kmpc_atomic_float16_rd
+#endif // KMP_HAVE_QUAD
 
 // Fix for CQ220361 on Windows* OS
 #if ( KMP_OS_WINDOWS )
@@ -1634,11 +1668,13 @@
 #endif
 ATOMIC_CRITICAL_READ( cmplx8,  rd, kmp_cmplx64, +, 16c, 1 )           // __kmpc_atomic_cmplx8_rd
 ATOMIC_CRITICAL_READ( cmplx10, rd, kmp_cmplx80, +, 20c, 1 )           // __kmpc_atomic_cmplx10_rd
+#if KMP_HAVE_QUAD
 ATOMIC_CRITICAL_READ( cmplx16, rd, CPLX128_LEG, +, 32c, 1 )           // __kmpc_atomic_cmplx16_rd
 #if ( KMP_ARCH_X86 )
     ATOMIC_CRITICAL_READ( float16, a16_rd, Quad_a16_t, +, 16r, 1 )         // __kmpc_atomic_float16_a16_rd
     ATOMIC_CRITICAL_READ( cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_rd
 #endif
+#endif
 
 
 // ------------------------------------------------------------------------
@@ -1720,15 +1756,19 @@
 #endif
 
 ATOMIC_CRITICAL_WR( float10, wr, long double, =, 10r,   1 )         // __kmpc_atomic_float10_wr
+#if KMP_HAVE_QUAD
 ATOMIC_CRITICAL_WR( float16, wr, QUAD_LEGACY, =, 16r,   1 )         // __kmpc_atomic_float16_wr
+#endif
 ATOMIC_CRITICAL_WR( cmplx4,  wr, kmp_cmplx32, =,  8c,   1 )         // __kmpc_atomic_cmplx4_wr
 ATOMIC_CRITICAL_WR( cmplx8,  wr, kmp_cmplx64, =, 16c,   1 )         // __kmpc_atomic_cmplx8_wr
 ATOMIC_CRITICAL_WR( cmplx10, wr, kmp_cmplx80, =, 20c,   1 )         // __kmpc_atomic_cmplx10_wr
+#if KMP_HAVE_QUAD
 ATOMIC_CRITICAL_WR( cmplx16, wr, CPLX128_LEG, =, 32c,   1 )         // __kmpc_atomic_cmplx16_wr
 #if ( KMP_ARCH_X86 )
     ATOMIC_CRITICAL_WR( float16, a16_wr, Quad_a16_t,         =, 16r, 1 ) // __kmpc_atomic_float16_a16_wr
     ATOMIC_CRITICAL_WR( cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_wr
 #endif
+#endif
 
 
 // ------------------------------------------------------------------------
@@ -2058,12 +2098,14 @@
 MIN_MAX_COMPXCHG_CPT( float4,  min_cpt, kmp_real32, 32, >, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min_cpt
 MIN_MAX_COMPXCHG_CPT( float8,  max_cpt, kmp_real64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max_cpt
 MIN_MAX_COMPXCHG_CPT( float8,  min_cpt, kmp_real64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min_cpt
+#if KMP_HAVE_QUAD
 MIN_MAX_CRITICAL_CPT( float16, max_cpt, QUAD_LEGACY,    <, 16r,   1 )     // __kmpc_atomic_float16_max_cpt
 MIN_MAX_CRITICAL_CPT( float16, min_cpt, QUAD_LEGACY,    >, 16r,   1 )     // __kmpc_atomic_float16_min_cpt
 #if ( KMP_ARCH_X86 )
     MIN_MAX_CRITICAL_CPT( float16, max_a16_cpt, Quad_a16_t, <, 16r,  1 )  // __kmpc_atomic_float16_max_a16_cpt
     MIN_MAX_CRITICAL_CPT( float16, min_a16_cpt, Quad_a16_t, >, 16r,  1 )  // __kmpc_atomic_float16_mix_a16_cpt
 #endif
+#endif
 
 // ------------------------------------------------------------------------
 #ifdef KMP_GOMP_COMPAT
@@ -2156,6 +2198,7 @@
 ATOMIC_CRITICAL_CPT( float10, sub_cpt, long double,     -, 10r,   1 )            // __kmpc_atomic_float10_sub_cpt
 ATOMIC_CRITICAL_CPT( float10, mul_cpt, long double,     *, 10r,   1 )            // __kmpc_atomic_float10_mul_cpt
 ATOMIC_CRITICAL_CPT( float10, div_cpt, long double,     /, 10r,   1 )            // __kmpc_atomic_float10_div_cpt
+#if KMP_HAVE_QUAD
 // routines for _Quad type
 ATOMIC_CRITICAL_CPT( float16, add_cpt, QUAD_LEGACY,     +, 16r,   1 )            // __kmpc_atomic_float16_add_cpt
 ATOMIC_CRITICAL_CPT( float16, sub_cpt, QUAD_LEGACY,     -, 16r,   1 )            // __kmpc_atomic_float16_sub_cpt
@@ -2167,6 +2210,7 @@
     ATOMIC_CRITICAL_CPT( float16, mul_a16_cpt, Quad_a16_t, *, 16r,  1 )          // __kmpc_atomic_float16_mul_a16_cpt
     ATOMIC_CRITICAL_CPT( float16, div_a16_cpt, Quad_a16_t, /, 16r,  1 )          // __kmpc_atomic_float16_div_a16_cpt
 #endif
+#endif
 
 // routines for complex types
 
@@ -2184,6 +2228,7 @@
 ATOMIC_CRITICAL_CPT( cmplx10, sub_cpt, kmp_cmplx80, -, 20c,   1 )            // __kmpc_atomic_cmplx10_sub_cpt
 ATOMIC_CRITICAL_CPT( cmplx10, mul_cpt, kmp_cmplx80, *, 20c,   1 )            // __kmpc_atomic_cmplx10_mul_cpt
 ATOMIC_CRITICAL_CPT( cmplx10, div_cpt, kmp_cmplx80, /, 20c,   1 )            // __kmpc_atomic_cmplx10_div_cpt
+#if KMP_HAVE_QUAD
 ATOMIC_CRITICAL_CPT( cmplx16, add_cpt, CPLX128_LEG, +, 32c,   1 )            // __kmpc_atomic_cmplx16_add_cpt
 ATOMIC_CRITICAL_CPT( cmplx16, sub_cpt, CPLX128_LEG, -, 32c,   1 )            // __kmpc_atomic_cmplx16_sub_cpt
 ATOMIC_CRITICAL_CPT( cmplx16, mul_cpt, CPLX128_LEG, *, 32c,   1 )            // __kmpc_atomic_cmplx16_mul_cpt
@@ -2194,6 +2239,7 @@
     ATOMIC_CRITICAL_CPT( cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,   1 )   // __kmpc_atomic_cmplx16_mul_a16_cpt
     ATOMIC_CRITICAL_CPT( cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,   1 )   // __kmpc_atomic_cmplx16_div_a16_cpt
 #endif
+#endif
 
 #if OMP_40_ENABLED
 
@@ -2321,6 +2367,7 @@
 // routines for long double type
 ATOMIC_CRITICAL_CPT_REV( float10, sub_cpt_rev, long double,     -, 10r,   1 )            // __kmpc_atomic_float10_sub_cpt_rev
 ATOMIC_CRITICAL_CPT_REV( float10, div_cpt_rev, long double,     /, 10r,   1 )            // __kmpc_atomic_float10_div_cpt_rev
+#if KMP_HAVE_QUAD
 // routines for _Quad type
 ATOMIC_CRITICAL_CPT_REV( float16, sub_cpt_rev, QUAD_LEGACY,     -, 16r,   1 )            // __kmpc_atomic_float16_sub_cpt_rev
 ATOMIC_CRITICAL_CPT_REV( float16, div_cpt_rev, QUAD_LEGACY,     /, 16r,   1 )            // __kmpc_atomic_float16_div_cpt_rev
@@ -2328,6 +2375,7 @@
     ATOMIC_CRITICAL_CPT_REV( float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,  1 )          // __kmpc_atomic_float16_sub_a16_cpt_rev
     ATOMIC_CRITICAL_CPT_REV( float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,  1 )          // __kmpc_atomic_float16_div_a16_cpt_rev
 #endif
+#endif
 
 // routines for complex types
 
@@ -2378,12 +2426,14 @@
 ATOMIC_CRITICAL_CPT_REV( cmplx8,  div_cpt_rev, kmp_cmplx64, /, 16c,   1 )            // __kmpc_atomic_cmplx8_div_cpt_rev
 ATOMIC_CRITICAL_CPT_REV( cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,   1 )            // __kmpc_atomic_cmplx10_sub_cpt_rev
 ATOMIC_CRITICAL_CPT_REV( cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,   1 )            // __kmpc_atomic_cmplx10_div_cpt_rev
+#if KMP_HAVE_QUAD
 ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,   1 )            // __kmpc_atomic_cmplx16_sub_cpt_rev
 ATOMIC_CRITICAL_CPT_REV( cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,   1 )            // __kmpc_atomic_cmplx16_div_cpt_rev
 #if ( KMP_ARCH_X86 )
     ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,   1 )   // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
     ATOMIC_CRITICAL_CPT_REV( cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,   1 )   // __kmpc_atomic_cmplx16_div_a16_cpt_rev
 #endif
+#endif
 
 //   OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
 
@@ -2527,7 +2577,9 @@
 
 
 ATOMIC_CRITICAL_SWP( float10, long double, 10r,   1 )              // __kmpc_atomic_float10_swp
+#if KMP_HAVE_QUAD
 ATOMIC_CRITICAL_SWP( float16, QUAD_LEGACY, 16r,   1 )              // __kmpc_atomic_float16_swp
+#endif
 // cmplx4 routine to return void
 ATOMIC_CRITICAL_SWP_WRK( cmplx4, kmp_cmplx32,  8c,   1 )           // __kmpc_atomic_cmplx4_swp
 
@@ -2536,11 +2588,13 @@
 
 ATOMIC_CRITICAL_SWP( cmplx8,  kmp_cmplx64, 16c,   1 )              // __kmpc_atomic_cmplx8_swp
 ATOMIC_CRITICAL_SWP( cmplx10, kmp_cmplx80, 20c,   1 )              // __kmpc_atomic_cmplx10_swp
+#if KMP_HAVE_QUAD
 ATOMIC_CRITICAL_SWP( cmplx16, CPLX128_LEG, 32c,   1 )              // __kmpc_atomic_cmplx16_swp
 #if ( KMP_ARCH_X86 )
     ATOMIC_CRITICAL_SWP( float16_a16, Quad_a16_t,         16r, 1 )  // __kmpc_atomic_float16_a16_swp
     ATOMIC_CRITICAL_SWP( cmplx16_a16, kmp_cmplx128_a16_t, 32c, 1 )  // __kmpc_atomic_cmplx16_a16_swp
 #endif
+#endif
 
 
 // End of OpenMP 4.0 Capture
diff --git a/openmp/runtime/src/kmp_atomic.h b/openmp/runtime/src/kmp_atomic.h
index 2243ba7..361dce9 100644
--- a/openmp/runtime/src/kmp_atomic.h
+++ b/openmp/runtime/src/kmp_atomic.h
@@ -1,7 +1,7 @@
 /*
  * kmp_atomic.h - ATOMIC header file
- * $Revision: 42195 $
- * $Date: 2013-03-27 16:10:35 -0500 (Wed, 27 Mar 2013) $
+ * $Revision: 42810 $
+ * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
  */
 
 
@@ -30,10 +30,6 @@
 //                  to use typedef'ed types on win.
 // Condition for WIN64 was modified in anticipation of 10.1 build compiler.
 
-#if defined( __GNUC__ ) && !defined( __INTEL_COMPILER )
-typedef __float128 _Quad;
-#endif
-
 #if defined( __cplusplus ) && ( KMP_OS_WINDOWS )
     // create shortcuts for c99 complex types
 
@@ -173,6 +169,7 @@
     typedef KMP_DO_ALIGN( 16 )  struct __kmp_cmplx80_t kmp_cmplx80;
 
     // complex16
+    #if KMP_HAVE_QUAD
     struct __kmp_cmplx128_t : std::complex< _Quad > {
 
             __kmp_cmplx128_t() : std::complex< _Quad > () {}
@@ -192,6 +189,7 @@
 
     };
     typedef struct __kmp_cmplx128_t kmp_cmplx128;
+    #endif /* KMP_HAVE_QUAD */
 
     #ifdef _DEBUG_TEMPORARILY_UNSET_
         #undef _DEBUG_TEMPORARILY_UNSET_
@@ -204,19 +202,22 @@
     typedef float _Complex       kmp_cmplx32;
     typedef double _Complex      kmp_cmplx64;
     typedef long double _Complex kmp_cmplx80;
+    #if KMP_HAVE_QUAD
     typedef _Quad _Complex       kmp_cmplx128;
+    #endif
 #endif
 
 // Compiler 12.0 changed alignment of 16 and 32-byte arguments (like _Quad
 // and kmp_cmplx128) on IA-32 architecture. The following aligned structures
 // are implemented to support the old alignment in 10.1, 11.0, 11.1 and 
 // introduce the new alignment in 12.0. See CQ88405.
-#if ( KMP_ARCH_X86 )
+#if KMP_ARCH_X86 && KMP_HAVE_QUAD
 
     // 4-byte aligned structures for backward compatibility.
 
     #pragma pack( push, 4 )
 
+    
     struct KMP_DO_ALIGN( 4 ) Quad_a4_t {
         _Quad q;
 
@@ -364,31 +365,31 @@
 
 typedef kmp_queuing_lock_t kmp_atomic_lock_t;
 
-inline void
+static inline void
 __kmp_acquire_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid )
 {
     __kmp_acquire_queuing_lock( lck, gtid );
 }
 
-inline int
+static inline int
 __kmp_test_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid )
 {
     return __kmp_test_queuing_lock( lck, gtid );
 }
 
-inline void
+static inline void
 __kmp_release_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid )
 {
     __kmp_release_queuing_lock( lck, gtid );
 }
 
-inline void
+static inline void
 __kmp_init_atomic_lock( kmp_atomic_lock_t *lck )
 {
     __kmp_init_queuing_lock( lck );
 }
 
-inline void
+static inline void
 __kmp_destroy_atomic_lock( kmp_atomic_lock_t *lck )
 {
     __kmp_destroy_queuing_lock( lck );
@@ -498,6 +499,7 @@
 void __kmpc_atomic_float4_min(  ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs );
 void __kmpc_atomic_float8_max(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs );
 void __kmpc_atomic_float8_min(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs );
+#if KMP_HAVE_QUAD
 void __kmpc_atomic_float16_max( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
 void __kmpc_atomic_float16_min( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
 #if ( KMP_ARCH_X86 )
@@ -505,6 +507,7 @@
     void __kmpc_atomic_float16_max_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
     void __kmpc_atomic_float16_min_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
 #endif
+#endif
 // .NEQV. (same as xor)
 void __kmpc_atomic_fixed1_neqv( ident_t *id_ref, int gtid, char * lhs, char rhs );
 void __kmpc_atomic_fixed2_neqv( ident_t *id_ref, int gtid, short * lhs, short rhs );
@@ -521,6 +524,7 @@
 void __kmpc_atomic_float10_mul( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
 void __kmpc_atomic_float10_div( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
 // _Quad type
+#if KMP_HAVE_QUAD
 void __kmpc_atomic_float16_add( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
 void __kmpc_atomic_float16_sub( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
 void __kmpc_atomic_float16_mul( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
@@ -532,6 +536,7 @@
     void __kmpc_atomic_float16_mul_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
     void __kmpc_atomic_float16_div_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
 #endif
+#endif
 // routines for complex types
 void __kmpc_atomic_cmplx4_add(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
 void __kmpc_atomic_cmplx4_sub(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
@@ -545,6 +550,7 @@
 void __kmpc_atomic_cmplx10_sub( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
 void __kmpc_atomic_cmplx10_mul( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
 void __kmpc_atomic_cmplx10_div( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
+#if KMP_HAVE_QUAD
 void __kmpc_atomic_cmplx16_add( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
 void __kmpc_atomic_cmplx16_sub( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
 void __kmpc_atomic_cmplx16_mul( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
@@ -556,6 +562,7 @@
     void __kmpc_atomic_cmplx16_mul_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
     void __kmpc_atomic_cmplx16_div_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
 #endif
+#endif
 
 #if OMP_40_ENABLED
 
@@ -593,14 +600,17 @@
 void __kmpc_atomic_float8_div_rev(  ident_t *id_ref, int gtid, double * lhs, double rhs );
 void __kmpc_atomic_float10_sub_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
 void __kmpc_atomic_float10_div_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
+#if KMP_HAVE_QUAD
 void __kmpc_atomic_float16_sub_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
 void __kmpc_atomic_float16_div_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
+#endif
 void __kmpc_atomic_cmplx4_sub_rev(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
 void __kmpc_atomic_cmplx4_div_rev(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
 void __kmpc_atomic_cmplx8_sub_rev(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
 void __kmpc_atomic_cmplx8_div_rev(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
 void __kmpc_atomic_cmplx10_sub_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
 void __kmpc_atomic_cmplx10_div_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
+#if KMP_HAVE_QUAD
 void __kmpc_atomic_cmplx16_sub_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
 void __kmpc_atomic_cmplx16_div_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
 #if ( KMP_ARCH_X86 )
@@ -610,6 +620,7 @@
     void __kmpc_atomic_cmplx16_sub_a16_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
     void __kmpc_atomic_cmplx16_div_a16_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
 #endif
+#endif // KMP_HAVE_QUAD
 
 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
 
@@ -632,6 +643,7 @@
 void __kmpc_atomic_float4_div_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs );
 
 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them)
+#if KMP_HAVE_QUAD
 void __kmpc_atomic_fixed1_add_fp(  ident_t *id_ref, int gtid, char * lhs, _Quad rhs );
 void __kmpc_atomic_fixed1_sub_fp(  ident_t *id_ref, int gtid, char * lhs, _Quad rhs );
 void __kmpc_atomic_fixed1_mul_fp(  ident_t *id_ref, int gtid, char * lhs, _Quad rhs );
@@ -670,6 +682,7 @@
 void __kmpc_atomic_float10_sub_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs );
 void __kmpc_atomic_float10_mul_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs );
 void __kmpc_atomic_float10_div_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs );
+#endif // KMP_HAVE_QUAD
 
 // RHS=cmplx8
 void __kmpc_atomic_cmplx4_add_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs );
@@ -701,7 +714,9 @@
 kmp_real32   __kmpc_atomic_float4_rd(  ident_t *id_ref, int gtid, kmp_real32  * loc );
 kmp_real64   __kmpc_atomic_float8_rd(  ident_t *id_ref, int gtid, kmp_real64  * loc );
 long double  __kmpc_atomic_float10_rd( ident_t *id_ref, int gtid, long double * loc );
+#if KMP_HAVE_QUAD
 QUAD_LEGACY  __kmpc_atomic_float16_rd( ident_t *id_ref, int gtid, QUAD_LEGACY * loc );
+#endif
 // Fix for CQ220361: cmplx4 READ will return void on Windows* OS; read value will be
 // returned through an additional parameter
 #if ( KMP_OS_WINDOWS )
@@ -711,12 +726,14 @@
 #endif
 kmp_cmplx64  __kmpc_atomic_cmplx8_rd(  ident_t *id_ref, int gtid, kmp_cmplx64 * loc );
 kmp_cmplx80  __kmpc_atomic_cmplx10_rd( ident_t *id_ref, int gtid, kmp_cmplx80 * loc );
+#if KMP_HAVE_QUAD
 CPLX128_LEG  __kmpc_atomic_cmplx16_rd( ident_t *id_ref, int gtid, CPLX128_LEG * loc );
 #if ( KMP_ARCH_X86 )
     // Routines with 16-byte arguments aligned to 16-byte boundary
     Quad_a16_t         __kmpc_atomic_float16_a16_rd( ident_t * id_ref, int gtid, Quad_a16_t         * loc );
     kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_a16_rd( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * loc );
 #endif
+#endif
 
 
 //
@@ -730,17 +747,20 @@
 void __kmpc_atomic_float4_wr(  ident_t *id_ref, int gtid, kmp_real32  * lhs, kmp_real32  rhs );
 void __kmpc_atomic_float8_wr(  ident_t *id_ref, int gtid, kmp_real64  * lhs, kmp_real64  rhs );
 void __kmpc_atomic_float10_wr( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
+#if KMP_HAVE_QUAD
 void __kmpc_atomic_float16_wr( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
+#endif
 void __kmpc_atomic_cmplx4_wr(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
 void __kmpc_atomic_cmplx8_wr(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
 void __kmpc_atomic_cmplx10_wr( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
+#if KMP_HAVE_QUAD
 void __kmpc_atomic_cmplx16_wr( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
 #if ( KMP_ARCH_X86 )
     // Routines with 16-byte arguments aligned to 16-byte boundary
     void __kmpc_atomic_float16_a16_wr( ident_t * id_ref, int gtid, Quad_a16_t         * lhs, Quad_a16_t         rhs );
     void __kmpc_atomic_cmplx16_a16_wr( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
 #endif
-
+#endif
 
 //
 //  Below routines for atomic CAPTURE are listed
@@ -830,8 +850,10 @@
 kmp_real32  __kmpc_atomic_float4_min_cpt(  ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag);
 kmp_real64  __kmpc_atomic_float8_max_cpt(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag);
 kmp_real64  __kmpc_atomic_float8_min_cpt(  ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag);
+#if KMP_HAVE_QUAD
 QUAD_LEGACY __kmpc_atomic_float16_max_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
 QUAD_LEGACY __kmpc_atomic_float16_min_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
+#endif
 // .NEQV. (same as xor)
 char      __kmpc_atomic_fixed1_neqv_cpt( ident_t *id_ref, int gtid, char      * lhs, char      rhs, int flag);
 short     __kmpc_atomic_fixed2_neqv_cpt( ident_t *id_ref, int gtid, short     * lhs, short     rhs, int flag);
@@ -847,11 +869,13 @@
 long double __kmpc_atomic_float10_sub_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag);
 long double __kmpc_atomic_float10_mul_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag);
 long double __kmpc_atomic_float10_div_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag);
+#if KMP_HAVE_QUAD
 // _Quad type
 QUAD_LEGACY __kmpc_atomic_float16_add_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
 QUAD_LEGACY __kmpc_atomic_float16_sub_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
 QUAD_LEGACY __kmpc_atomic_float16_mul_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
 QUAD_LEGACY __kmpc_atomic_float16_div_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
+#endif
 // routines for complex types
 // Workaround for cmplx4 routines - return void; captured value is returned via the argument
 void __kmpc_atomic_cmplx4_add_cpt(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag);
@@ -867,6 +891,7 @@
 kmp_cmplx80 __kmpc_atomic_cmplx10_sub_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag);
 kmp_cmplx80 __kmpc_atomic_cmplx10_mul_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag);
 kmp_cmplx80 __kmpc_atomic_cmplx10_div_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag);
+#if KMP_HAVE_QUAD
 CPLX128_LEG __kmpc_atomic_cmplx16_add_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag);
 CPLX128_LEG __kmpc_atomic_cmplx16_sub_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag);
 CPLX128_LEG __kmpc_atomic_cmplx16_mul_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag);
@@ -884,6 +909,7 @@
     kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_mul_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag);
     kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_div_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag);
 #endif
+#endif
 
 void __kmpc_atomic_start(void);
 void __kmpc_atomic_end(void);
@@ -922,8 +948,10 @@
 double 		__kmpc_atomic_float8_div_cpt_rev(  ident_t *id_ref, int gtid, double * lhs, double rhs, int flag );
 long double 	__kmpc_atomic_float10_sub_cpt_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag );
 long double 	__kmpc_atomic_float10_div_cpt_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag );
+#if KMP_HAVE_QUAD
 QUAD_LEGACY	__kmpc_atomic_float16_sub_cpt_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag );
 QUAD_LEGACY	__kmpc_atomic_float16_div_cpt_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag );
+#endif
 // Workaround for cmplx4 routines - return void; captured value is returned via the argument
 void     	__kmpc_atomic_cmplx4_sub_cpt_rev(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
 void 	        __kmpc_atomic_cmplx4_div_cpt_rev(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
@@ -931,6 +959,7 @@
 kmp_cmplx64 	__kmpc_atomic_cmplx8_div_cpt_rev(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag );
 kmp_cmplx80 	__kmpc_atomic_cmplx10_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag );
 kmp_cmplx80 	__kmpc_atomic_cmplx10_div_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag );
+#if KMP_HAVE_QUAD
 CPLX128_LEG  	__kmpc_atomic_cmplx16_sub_cpt_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag );
 CPLX128_LEG  	__kmpc_atomic_cmplx16_div_cpt_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag );
 #if ( KMP_ARCH_X86 )
@@ -939,6 +968,7 @@
     kmp_cmplx128_a16_t 	__kmpc_atomic_cmplx16_sub_a16_cpt_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag );
     kmp_cmplx128_a16_t 	__kmpc_atomic_cmplx16_div_a16_cpt_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag );
 #endif
+#endif
 
 //   OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
 char 		__kmpc_atomic_fixed1_swp(  ident_t *id_ref, int gtid, char        * lhs, char        rhs );
@@ -948,18 +978,22 @@
 float 		__kmpc_atomic_float4_swp(  ident_t *id_ref, int gtid, float       * lhs, float  rhs );
 double		__kmpc_atomic_float8_swp(  ident_t *id_ref, int gtid, double      * lhs, double  rhs );
 long double	__kmpc_atomic_float10_swp( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
+#if KMP_HAVE_QUAD
 QUAD_LEGACY    	__kmpc_atomic_float16_swp( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
+#endif
 // !!! TODO: check if we need a workaround here
 void        	__kmpc_atomic_cmplx4_swp(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out );
 //kmp_cmplx32   	__kmpc_atomic_cmplx4_swp(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
 
 kmp_cmplx64 	__kmpc_atomic_cmplx8_swp(  ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
 kmp_cmplx80	__kmpc_atomic_cmplx10_swp( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
+#if KMP_HAVE_QUAD
 CPLX128_LEG 	__kmpc_atomic_cmplx16_swp( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
 #if ( KMP_ARCH_X86 )
     Quad_a16_t		__kmpc_atomic_float16_a16_swp( ident_t *id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
     kmp_cmplx128_a16_t 	__kmpc_atomic_cmplx16_a16_swp( ident_t *id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
 #endif
+#endif
 
 // End of OpenMP 4.0 capture
 
diff --git a/openmp/runtime/src/kmp_cancel.cpp b/openmp/runtime/src/kmp_cancel.cpp
new file mode 100644
index 0000000..e5a76d2
--- /dev/null
+++ b/openmp/runtime/src/kmp_cancel.cpp
@@ -0,0 +1,282 @@
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp.h"
+#include "kmp_i18n.h"
+#include "kmp_io.h"
+#include "kmp_str.h"
+
+#if OMP_40_ENABLED
+
+/*!
+@ingroup CANCELLATION
+@param loc_ref location of the original task directive
+@param gtid Global thread ID of encountering thread
+@param cncl_kind Cancellation kind (parallel, for, sections, taskgroup)
+
+@return returns true if the cancellation request has been activated and the execution thread
+needs to proceed to the end of the canceled region.
+
+Request cancellation of the binding OpenMP region.
+*/
+kmp_int32 __kmpc_cancel(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) {
+    kmp_info_t *this_thr = __kmp_threads [ gtid ];
+    
+    KC_TRACE( 10, ("__kmpc_cancel: T#%d request %d OMP_CANCELLATION=%d\n", gtid, cncl_kind, __kmp_omp_cancellation) );
+
+    KMP_DEBUG_ASSERT(cncl_kind != cancel_noreq);
+    KMP_DEBUG_ASSERT(cncl_kind == cancel_parallel || cncl_kind == cancel_loop || 
+                     cncl_kind == cancel_sections || cncl_kind == cancel_taskgroup); 
+    KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid);
+
+    if (__kmp_omp_cancellation) {
+        switch (cncl_kind) {
+        case cancel_parallel:
+        case cancel_loop:
+        case cancel_sections:
+            // cancellation requests for parallel and worksharing constructs
+            // are handled through the team structure
+            {
+                kmp_team_t *this_team = this_thr->th.th_team;
+                KMP_DEBUG_ASSERT(this_team);
+                kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(&(this_team->t.t_cancel_request), cancel_noreq, cncl_kind);
+                if (old == cancel_noreq || old == cncl_kind) {
+                    //printf("__kmpc_cancel: this_team->t.t_cancel_request=%d @ %p\n", 
+                    //       this_team->t.t_cancel_request, &(this_team->t.t_cancel_request));
+                    // we do not have a cancellation request in this team or we do have one
+                    // that matches the current request -> cancel
+                    return 1 /* true */;
+                }
+                break;
+            }
+        case cancel_taskgroup:
+            // cancellation requests for parallel and worksharing constructs
+            // are handled through the taskgroup structure
+            {
+                kmp_taskdata_t*  task; 
+                kmp_taskgroup_t* taskgroup;
+                
+                task = this_thr->th.th_current_task;
+                KMP_DEBUG_ASSERT( task );
+                
+                taskgroup = task->td_taskgroup;
+                if (taskgroup) {
+                    kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(&(taskgroup->cancel_request), cancel_noreq, cncl_kind);
+                    if (old == cancel_noreq || old == cncl_kind) {
+                        // we do not have a cancellation request in this taskgroup or we do have one
+                        // that matches the current request -> cancel
+                        return 1 /* true */;
+                    }
+                }
+                else {
+                    // TODO: what needs to happen here?
+                    // the specification disallows cancellation w/o taskgroups
+                    // so we might do anything here, let's abort for now
+                    KMP_ASSERT( 0 /* false */);
+                }
+            }
+            break;
+        default:
+            KMP_ASSERT (0 /* false */);
+        }
+    }
+
+    // ICV OMP_CANCELLATION=false, so we ignored this cancel request
+    KMP_DEBUG_ASSERT(!__kmp_omp_cancellation);
+    return 0 /* false */;
+}
+
+/*!
+@ingroup CANCELLATION
+@param loc_ref location of the original task directive
+@param gtid Global thread ID of encountering thread
+@param cncl_kind Cancellation kind (parallel, for, sections, taskgroup)
+
+@return returns true if a matching cancellation request has been flagged in the RTL and the 
+encountering thread has to cancel..
+
+Cancellation point for the encountering thread.
+*/
+kmp_int32 __kmpc_cancellationpoint(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) {
+    kmp_info_t *this_thr = __kmp_threads [ gtid ];
+
+    KC_TRACE( 10, ("__kmpc_cancellationpoint: T#%d request %d OMP_CANCELLATION=%d\n", gtid, cncl_kind, __kmp_omp_cancellation) );
+
+    KMP_DEBUG_ASSERT(cncl_kind != cancel_noreq);
+    KMP_DEBUG_ASSERT(cncl_kind == cancel_parallel || cncl_kind == cancel_loop || 
+                     cncl_kind == cancel_sections || cncl_kind == cancel_taskgroup); 
+    KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid);
+
+    if (__kmp_omp_cancellation) {
+        switch (cncl_kind) {
+        case cancel_parallel:
+        case cancel_loop:
+        case cancel_sections:
+            // cancellation requests for parallel and worksharing constructs
+            // are handled through the team structure
+            {
+                kmp_team_t *this_team = this_thr->th.th_team;
+                KMP_DEBUG_ASSERT(this_team);
+                if (this_team->t.t_cancel_request) {
+                    if (cncl_kind == this_team->t.t_cancel_request) {
+                        // the request in the team structure matches the type of
+                        // cancellation point so we can cancel
+                        return 1 /* true */;
+                    }
+                    KMP_ASSERT( 0 /* false */);
+                }
+                else {
+                    // we do not have a cancellation request pending, so we just
+                    // ignore this cancellation point
+                    return 0;
+                }
+                break;
+            }
+        case cancel_taskgroup:
+            // cancellation requests for parallel and worksharing constructs
+            // are handled through the taskgroup structure
+            {
+                kmp_taskdata_t*  task; 
+                kmp_taskgroup_t* taskgroup;
+                
+                task = this_thr->th.th_current_task;
+                KMP_DEBUG_ASSERT( task );
+                
+                taskgroup = task->td_taskgroup;
+                if (taskgroup) {
+                    // return the current status of cancellation for the 
+                    // taskgroup
+                    return !!taskgroup->cancel_request;
+                }
+                else {
+                    // if a cancellation point is encountered by a task
+                    // that does not belong to a taskgroup, it is OK
+                    // to ignore it
+                    return 0 /* false */;
+                }
+            }
+        default:
+            KMP_ASSERT (0 /* false */);
+        }
+    }
+
+    // ICV OMP_CANCELLATION=false, so we ignore the cancellation point
+    KMP_DEBUG_ASSERT(!__kmp_omp_cancellation);
+    return 0 /* false */;
+}
+
+/*!
+@ingroup CANCELLATION
+@param loc_ref location of the original task directive
+@param gtid Global thread ID of encountering thread
+
+@return returns true if a matching cancellation request has been flagged in the RTL and the 
+encountering thread has to cancel..
+
+Barrier with cancellation point to send threads from the barrier to the
+end of the parallel region.  Needs a special code pattern as documented 
+in the design document for the cancellation feature.
+*/
+kmp_int32
+__kmpc_cancel_barrier(ident_t *loc, kmp_int32 gtid) {
+    int ret = 0 /* false */;
+    kmp_info_t *this_thr = __kmp_threads [ gtid ];
+    kmp_team_t *this_team = this_thr->th.th_team;
+
+    KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid);
+
+    // call into the standard barrier
+    __kmpc_barrier(loc, gtid);
+
+    // if cancellation is active, check cancellation flag
+    if (__kmp_omp_cancellation) {
+        // depending on which construct to cancel, check the flag and
+        // reset the flag
+        switch (this_team->t.t_cancel_request) {
+        case cancel_parallel:
+            ret = 1;
+            // ensure that threads have checked the flag, when
+            // leaving the above barrier
+            __kmpc_barrier(loc, gtid);
+            this_team->t.t_cancel_request = cancel_noreq;
+            // the next barrier is the fork/join barrier, which
+            // synchronizes the threads leaving here        
+            break;
+        case cancel_loop:
+        case cancel_sections:
+            ret = 1;
+            // ensure that threads have checked the flag, when
+            // leaving the above barrier
+            __kmpc_barrier(loc, gtid);
+            this_team->t.t_cancel_request = cancel_noreq;
+            // synchronize the threads again to make sure we
+            // do not have any run-away threads that cause a race
+            // on the cancellation flag
+            __kmpc_barrier(loc, gtid);
+            break;
+        case cancel_taskgroup:
+            // this case should not occur
+            KMP_ASSERT (0 /* false */ );
+            break;
+        case cancel_noreq:
+            // do nothing
+            break;
+        default:
+            KMP_ASSERT ( 0 /* false */);
+        }
+    }
+    
+    return ret;
+}
+
+/*!
+@ingroup CANCELLATION
+@param loc_ref location of the original task directive
+@param gtid Global thread ID of encountering thread
+
+@return returns true if a matching cancellation request has been flagged in the RTL and the 
+encountering thread has to cancel..
+
+Query function to query the current status of cancellation requests.
+Can be used to implement the following pattern:
+ 
+if (kmp_get_cancellation_status(kmp_cancel_parallel)) {
+    perform_cleanup();
+    #pragma omp cancellation point parallel      
+}
+*/
+int __kmp_get_cancellation_status(int cancel_kind) {
+    if (__kmp_omp_cancellation) {
+        kmp_info_t *this_thr = __kmp_entry_thread();
+        
+        switch (cancel_kind) {
+        case cancel_parallel:
+        case cancel_loop:
+        case cancel_sections:
+            {
+                kmp_team_t *this_team = this_thr->th.th_team;
+                return this_team->t.t_cancel_request == cancel_kind;
+            }
+        case cancel_taskgroup:
+            {
+                kmp_taskdata_t*  task; 
+                kmp_taskgroup_t* taskgroup;
+                task = this_thr->th.th_current_task;
+                taskgroup = task->td_taskgroup;
+                return taskgroup && taskgroup->cancel_request;
+            }
+        }
+    }
+
+    return 0 /* false */;
+}
+
+#endif
diff --git a/openmp/runtime/src/kmp_csupport.c b/openmp/runtime/src/kmp_csupport.c
index 8ca4612..17cc534 100644
--- a/openmp/runtime/src/kmp_csupport.c
+++ b/openmp/runtime/src/kmp_csupport.c
@@ -1,7 +1,7 @@
 /*
  * kmp_csupport.c -- kfront linkage support for OpenMP.
- * $Revision: 42642 $
- * $Date: 2013-09-06 01:57:24 -0500 (Fri, 06 Sep 2013) $
+ * $Revision: 42826 $
+ * $Date: 2013-11-20 03:39:45 -0600 (Wed, 20 Nov 2013) $
  */
 
 
@@ -287,7 +287,7 @@
             VOLATILE_CAST(microtask_t) microtask,
             VOLATILE_CAST(launch_t)    __kmp_invoke_task_func,
 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
-#if KMP_ARCH_X86_64 && KMP_OS_LINUX
+#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
             &ap
 #else
             ap
@@ -351,7 +351,7 @@
             argc,
             VOLATILE_CAST(microtask_t) __kmp_teams_master,
             VOLATILE_CAST(launch_t)    __kmp_invoke_teams_master,
-#if KMP_ARCH_X86_64 && KMP_OS_LINUX
+#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
             &ap
 #else
             ap
@@ -622,28 +622,20 @@
     if ( __kmp_env_consistency_check )
         __kmp_push_parallel( global_tid, NULL );
 
-#if USE_ITT_BUILD
+// t_level is not available in 2.5 build, so check for OMP_30_ENABLED
+#if USE_ITT_BUILD && OMP_30_ENABLED
     // Mark the start of the "parallel" region for VTune. Only use one of frame notification scheme at the moment.
     if ( ( __itt_frame_begin_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG )
     {
         __kmp_itt_region_forking( global_tid, 1 );
     }
-    // Collect information only if the file was opened succesfully.
-    if( __kmp_forkjoin_frames_mode == 1 && __kmp_itt_csv_file )
+    if( ( __kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3 ) && __itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr )
     {
+#if USE_ITT_NOTIFY
         if( this_thr->th.th_team->t.t_level == 1 ) {
-            kmp_uint64 fr_begin;
-#if defined( __GNUC__ )
-# if !defined( __INTEL_COMPILER )
-            fr_begin = __kmp_hardware_timestamp();
-# else
-            fr_begin = __rdtsc();
-# endif
-#else
-            fr_begin = __rdtsc();
-#endif
-            this_thr->th.th_frame_time_serialized = fr_begin;
+            this_thr->th.th_frame_time_serialized = __itt_get_timestamp();
         }
+#endif
     }
 #endif /* USE_ITT_BUILD */
 
@@ -774,39 +766,17 @@
 
     }
 
-#if USE_ITT_BUILD
+// t_level is not available in 2.5 build, so check for OMP_30_ENABLED
+#if USE_ITT_BUILD && OMP_30_ENABLED
     // Mark the end of the "parallel" region for VTune. Only use one of frame notification scheme at the moment.
     if ( ( __itt_frame_end_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG )
     {
+        this_thr->th.th_ident = loc;
         __kmp_itt_region_joined( global_tid, 1 );
     }
-    // Collect information only if the file was opened succesfully.
-    if( __kmp_forkjoin_frames_mode == 1 && __kmp_itt_csv_file )
-    {
+    if( ( __kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3 ) && __itt_frame_submit_v3_ptr ) {
         if( this_thr->th.th_team->t.t_level == 0 ) {
-            ident_t * loc  = this_thr->th.th_ident;
-            if (loc) {
-                // Use compiler-generated location to mark the frame:
-                // "<func>$omp$frame@[file:]<line>[:<col>]"
-                kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
-
-                kmp_uint64 fr_end;
-#if defined( __GNUC__ )
-# if !defined( __INTEL_COMPILER )
-                fr_end = __kmp_hardware_timestamp();
-# else
-                fr_end = __rdtsc();
-# endif
-#else
-                fr_end = __rdtsc();
-#endif
-                K_DIAG( 3, ( "__kmpc_end_serialized_parallel: T#%d frame_begin = %llu, frame_end = %llu\n",
-                             global_tid, this_thr->th.th_frame_time, fr_end ) );
-
-                __kmp_str_buf_print( &__kmp_itt_frame_buffer, "%s$omp$frame@%s:%d:%d,%llu,%llu,,\n",
-                                     str_loc.func, str_loc.file, str_loc.line, str_loc.col, this_thr->th.th_frame_time_serialized, fr_end );
-                __kmp_str_loc_free( &str_loc );
-            }
+            __kmp_itt_frame_submit( global_tid, this_thr->th.th_frame_time_serialized, __itt_timestamp_none, 0, loc );
         }
     }
 #endif /* USE_ITT_BUILD */
@@ -858,13 +828,15 @@
                 if ( ! __kmp_cpuinfo.sse2 ) {
                     // CPU cannot execute SSE2 instructions.
                 } else {
-                    #if defined( __GNUC__ ) && !defined( __INTEL_COMPILER )
-                    __sync_synchronize();
-                    #else
+                    #if KMP_COMPILER_ICC
                     _mm_mfence();
-                    #endif // __GNUC__
+                    #else
+                    __sync_synchronize();
+                    #endif // KMP_COMPILER_ICC
                 }; // if
             #endif // KMP_MIC
+        #elif KMP_ARCH_ARM
+            // Nothing yet
         #else
             #error Unknown or unsupported architecture
         #endif
@@ -1110,7 +1082,7 @@
       && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
         lck = (kmp_user_lock_p)crit;
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
     else if ( ( __kmp_user_lock_kind == lk_futex )
       && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
         lck = (kmp_user_lock_p)crit;
@@ -1163,7 +1135,7 @@
       && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
         lck = (kmp_user_lock_p)crit;
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
     else if ( ( __kmp_user_lock_kind == lk_futex )
       && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
         lck = (kmp_user_lock_p)crit;
@@ -1598,14 +1570,14 @@
       && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
         lck = (kmp_user_lock_p)user_lock;
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
     else if ( ( __kmp_user_lock_kind == lk_futex )
       && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
         lck = (kmp_user_lock_p)user_lock;
     }
 #endif
     else {
-        lck = __kmp_user_lock_allocate( user_lock, gtid );
+        lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
     }
     INIT_LOCK( lck );
     __kmp_set_user_lock_location( lck, loc );
@@ -1634,7 +1606,7 @@
       + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
         lck = (kmp_user_lock_p)user_lock;
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
     else if ( ( __kmp_user_lock_kind == lk_futex )
      && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
      <= OMP_NEST_LOCK_T_SIZE ) ) {
@@ -1642,7 +1614,7 @@
     }
 #endif
     else {
-        lck = __kmp_user_lock_allocate( user_lock, gtid );
+        lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
     }
 
     INIT_NESTED_LOCK( lck );
@@ -1662,7 +1634,7 @@
       && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
         lck = (kmp_user_lock_p)user_lock;
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
     else if ( ( __kmp_user_lock_kind == lk_futex )
       && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
         lck = (kmp_user_lock_p)user_lock;
@@ -1681,7 +1653,7 @@
       && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
         ;
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
     else if ( ( __kmp_user_lock_kind == lk_futex )
       && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
         ;
@@ -1702,7 +1674,7 @@
       + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
         lck = (kmp_user_lock_p)user_lock;
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
     else if ( ( __kmp_user_lock_kind == lk_futex )
      && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
      <= OMP_NEST_LOCK_T_SIZE ) ) {
@@ -1723,7 +1695,7 @@
      + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
         ;
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
     else if ( ( __kmp_user_lock_kind == lk_futex )
      && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
      <= OMP_NEST_LOCK_T_SIZE ) ) {
@@ -1743,7 +1715,7 @@
       && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
         lck = (kmp_user_lock_p)user_lock;
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
     else if ( ( __kmp_user_lock_kind == lk_futex )
       && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
         lck = (kmp_user_lock_p)user_lock;
@@ -1773,7 +1745,7 @@
       + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
         lck = (kmp_user_lock_p)user_lock;
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
     else if ( ( __kmp_user_lock_kind == lk_futex )
      && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
      <= OMP_NEST_LOCK_T_SIZE ) ) {
@@ -1805,7 +1777,7 @@
 
     if ( ( __kmp_user_lock_kind == lk_tas )
       && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
         // "fast" path implemented to fix customer performance issue
 #if USE_ITT_BUILD
         __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
@@ -1817,7 +1789,7 @@
         lck = (kmp_user_lock_p)user_lock;
 #endif
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
     else if ( ( __kmp_user_lock_kind == lk_futex )
       && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
         lck = (kmp_user_lock_p)user_lock;
@@ -1844,7 +1816,7 @@
 
     if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
       + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
         // "fast" path implemented to fix customer performance issue
         kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock;
 #if USE_ITT_BUILD
@@ -1859,7 +1831,7 @@
         lck = (kmp_user_lock_p)user_lock;
 #endif
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
     else if ( ( __kmp_user_lock_kind == lk_futex )
      && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
      <= OMP_NEST_LOCK_T_SIZE ) ) {
@@ -1888,7 +1860,7 @@
       && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
         lck = (kmp_user_lock_p)user_lock;
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
     else if ( ( __kmp_user_lock_kind == lk_futex )
       && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
         lck = (kmp_user_lock_p)user_lock;
@@ -1926,7 +1898,7 @@
       + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
         lck = (kmp_user_lock_p)user_lock;
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
     else if ( ( __kmp_user_lock_kind == lk_futex )
      && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
      <= OMP_NEST_LOCK_T_SIZE ) ) {
diff --git a/openmp/runtime/src/kmp_dispatch.cpp b/openmp/runtime/src/kmp_dispatch.cpp
index 1128b87..cb5bdac 100644
--- a/openmp/runtime/src/kmp_dispatch.cpp
+++ b/openmp/runtime/src/kmp_dispatch.cpp
@@ -1,7 +1,7 @@
 /*
  * kmp_dispatch.cpp: dynamic scheduling - iteration initialization and dispatch.
- * $Revision: 42624 $
- * $Date: 2013-08-27 10:53:11 -0500 (Tue, 27 Aug 2013) $
+ * $Revision: 42674 $
+ * $Date: 2013-09-18 11:12:49 -0500 (Wed, 18 Sep 2013) $
  */
 
 
@@ -916,7 +916,8 @@
                     */
                     // save original FPCW and set precision to 64-bit, as
                     // Windows* OS on IA-32 architecture defaults to 53-bit
-                    unsigned int oldFpcw = _control87(0,0x30000);
+                    unsigned int oldFpcw = _control87(0,0);
+                    _control87(_PC_64,_MCW_PC); // 0,0x30000
                     #endif
                     /* value used for comparison in solver for cross-over point */
                     long double target = ((long double)chunk * 2 + 1) * nproc / tc;
@@ -995,7 +996,7 @@
                     pr->u.p.count = tc - __kmp_dispatch_guided_remaining(tc, GUIDED_ANALYTICAL_WORKAROUND, cross) - cross * chunk;
                     #if KMP_OS_WINDOWS && KMP_ARCH_X86
                         // restore FPCW
-                        _control87(oldFpcw,0x30000);
+                        _control87(oldFpcw,_MCW_PC);
                     #endif
                 } // if
             } else {
@@ -1836,7 +1837,7 @@
                     /* for storing original FPCW value for Windows* OS on
 		       IA-32 architecture 8-byte version */
                     unsigned int oldFpcw;
-                    int fpcwSet = 0;
+                    unsigned int fpcwSet = 0;
     #endif
                     KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_guided_chunked analytical case\n",
                                    gtid ) );
@@ -1870,7 +1871,8 @@
 			       FPCW and set precision to 64-bit, as Windows* OS
 			       on IA-32 architecture defaults to 53-bit */
                             if ( !fpcwSet ) {
-                                oldFpcw = _control87(0,0x30000);
+                                oldFpcw = _control87(0,0);
+                                _control87(_PC_64,_MCW_PC);
                                 fpcwSet = 0x30000;
                             }
     #endif
@@ -1893,9 +1895,11 @@
                         } // if
                     } // while (1)
     #if KMP_OS_WINDOWS && KMP_ARCH_X86
-                    /* restore FPCW if necessary */
-                    if ( oldFpcw & fpcwSet != 0 )
-                        _control87(oldFpcw,0x30000);
+                    /* restore FPCW if necessary
+                       AC: check fpcwSet flag first because oldFpcw can be uninitialized here
+                    */
+                    if ( fpcwSet && ( oldFpcw & fpcwSet ) )
+                        _control87(oldFpcw,_MCW_PC);
     #endif
                     if ( status != 0 ) {
                         start = pr->u.p.lb;
diff --git a/openmp/runtime/src/kmp_ftn_cdecl.c b/openmp/runtime/src/kmp_ftn_cdecl.c
index 7079ee9..135a7cb 100644
--- a/openmp/runtime/src/kmp_ftn_cdecl.c
+++ b/openmp/runtime/src/kmp_ftn_cdecl.c
@@ -1,7 +1,7 @@
 /*
  * kmp_ftn_cdecl.c -- Fortran __cdecl linkage support for OpenMP.
- * $Revision: 42061 $
- * $Date: 2013-02-28 16:36:24 -0600 (Thu, 28 Feb 2013) $
+ * $Revision: 42757 $
+ * $Date: 2013-10-18 08:20:57 -0500 (Fri, 18 Oct 2013) $
  */
 
 
@@ -17,21 +17,21 @@
 
 #include "kmp.h"
 
-// Note: This string is not printed when KMP_VERSION=1.
-char const __kmp_version_ftncdecl[] = KMP_VERSION_PREFIX "Fortran __cdecl OMP support: "
-#ifdef USE_FTN_CDECL
-    "yes";
-#else
-    "no";
+#if KMP_OS_WINDOWS
+#   if defined  KMP_WIN_CDECL ||  !defined GUIDEDLL_EXPORTS
+#       define KMP_FTN_ENTRIES      KMP_FTN_UPPER
+#   endif
+#elif KMP_OS_UNIX
+#   define KMP_FTN_ENTRIES  KMP_FTN_PLAIN
 #endif
 
-#ifdef USE_FTN_CDECL
-
-#define FTN_STDCALL 	/* no stdcall */
-#define KMP_FTN_ENTRIES	USE_FTN_CDECL
-
-#include "kmp_ftn_os.h"
-#include "kmp_ftn_entry.h"
-
-#endif /* USE_FTN_CDECL */
-
+// Note: This string is not printed when KMP_VERSION=1.
+char const __kmp_version_ftncdecl[] = KMP_VERSION_PREFIX "Fortran __cdecl OMP support: "
+#ifdef KMP_FTN_ENTRIES
+    "yes";
+#   define FTN_STDCALL 	/* no stdcall */
+#   include "kmp_ftn_os.h"
+#   include "kmp_ftn_entry.h"
+#else
+    "no";
+#endif /* KMP_FTN_ENTRIES */
diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h
index f2c6440..dbbca19 100644
--- a/openmp/runtime/src/kmp_ftn_entry.h
+++ b/openmp/runtime/src/kmp_ftn_entry.h
@@ -1,7 +1,7 @@
 /*
  * kmp_ftn_entry.h -- Fortran entry linkage support for OpenMP.
- * $Revision: 42507 $
- * $Date: 2013-07-11 07:55:25 -0500 (Thu, 11 Jul 2013) $
+ * $Revision: 42798 $
+ * $Date: 2013-10-30 16:39:54 -0500 (Wed, 30 Oct 2013) $
  */
 
 
@@ -356,7 +356,7 @@
 /* sets the requested number of threads for the next parallel region */
 
 void FTN_STDCALL
-FTN_SET_NUM_THREADS( int KMP_DEREF arg )
+xexpand(FTN_SET_NUM_THREADS)( int KMP_DEREF arg )
 {
     #ifdef KMP_STUB
         // Nothing.
@@ -368,7 +368,7 @@
 
 /* returns the number of threads in current team */
 int FTN_STDCALL
-FTN_GET_NUM_THREADS( void )
+xexpand(FTN_GET_NUM_THREADS)( void )
 {
     #ifdef KMP_STUB
         return 1;
@@ -379,7 +379,7 @@
 }
 
 int FTN_STDCALL
-FTN_GET_MAX_THREADS( void )
+xexpand(FTN_GET_MAX_THREADS)( void )
 {
     #ifdef KMP_STUB
         return 1;
@@ -401,7 +401,7 @@
 }
 
 int FTN_STDCALL
-FTN_GET_THREAD_NUM( void )
+xexpand(FTN_GET_THREAD_NUM)( void )
 {
     #ifdef KMP_STUB
         return 0;
@@ -458,7 +458,7 @@
 }
 
 int FTN_STDCALL
-FTN_GET_NUM_PROCS( void )
+xexpand(FTN_GET_NUM_PROCS)( void )
 {
     #ifdef KMP_STUB
         return 1;
@@ -472,7 +472,7 @@
 }
 
 void FTN_STDCALL
-FTN_SET_NESTED( int KMP_DEREF flag )
+xexpand(FTN_SET_NESTED)( int KMP_DEREF flag )
 {
     #ifdef KMP_STUB
         __kmps_set_nested( KMP_DEREF flag );
@@ -487,7 +487,7 @@
 
 
 int FTN_STDCALL
-FTN_GET_NESTED( void )
+xexpand(FTN_GET_NESTED)( void )
 {
     #ifdef KMP_STUB
         return __kmps_get_nested();
@@ -499,7 +499,7 @@
 }
 
 void FTN_STDCALL
-FTN_SET_DYNAMIC( int KMP_DEREF flag )
+xexpand(FTN_SET_DYNAMIC)( int KMP_DEREF flag )
 {
     #ifdef KMP_STUB
         __kmps_set_dynamic( KMP_DEREF flag ? TRUE : FALSE );
@@ -515,7 +515,7 @@
 
 
 int FTN_STDCALL
-FTN_GET_DYNAMIC( void )
+xexpand(FTN_GET_DYNAMIC)( void )
 {
     #ifdef KMP_STUB
         return __kmps_get_dynamic();
@@ -527,7 +527,7 @@
 }
 
 int FTN_STDCALL
-FTN_IN_PARALLEL( void )
+xexpand(FTN_IN_PARALLEL)( void )
 {
     #ifdef KMP_STUB
         return 0;
@@ -550,7 +550,7 @@
 #if OMP_30_ENABLED
 
 void FTN_STDCALL
-FTN_SET_SCHEDULE( kmp_sched_t KMP_DEREF kind, int KMP_DEREF modifier )
+xexpand(FTN_SET_SCHEDULE)( kmp_sched_t KMP_DEREF kind, int KMP_DEREF modifier )
 {
     #ifdef KMP_STUB
         __kmps_set_schedule( KMP_DEREF kind, KMP_DEREF modifier );
@@ -562,7 +562,7 @@
 }
 
 void FTN_STDCALL
-FTN_GET_SCHEDULE( kmp_sched_t * kind, int * modifier )
+xexpand(FTN_GET_SCHEDULE)( kmp_sched_t * kind, int * modifier )
 {
     #ifdef KMP_STUB
         __kmps_get_schedule( kind, modifier );
@@ -574,7 +574,7 @@
 }
 
 void FTN_STDCALL
-FTN_SET_MAX_ACTIVE_LEVELS( int KMP_DEREF arg )
+xexpand(FTN_SET_MAX_ACTIVE_LEVELS)( int KMP_DEREF arg )
 {
     #ifdef KMP_STUB
 	// Nothing.
@@ -586,7 +586,7 @@
 }
 
 int FTN_STDCALL
-FTN_GET_MAX_ACTIVE_LEVELS( void )
+xexpand(FTN_GET_MAX_ACTIVE_LEVELS)( void )
 {
     #ifdef KMP_STUB
 	return 0;
@@ -598,7 +598,7 @@
 }
 
 int FTN_STDCALL
-FTN_GET_ACTIVE_LEVEL( void )
+xexpand(FTN_GET_ACTIVE_LEVEL)( void )
 {
     #ifdef KMP_STUB
 	return 0; // returns 0 if it is called from the sequential part of the program
@@ -610,7 +610,7 @@
 }
 
 int FTN_STDCALL
-FTN_GET_LEVEL( void )
+xexpand(FTN_GET_LEVEL)( void )
 {
     #ifdef KMP_STUB
 	return 0; // returns 0 if it is called from the sequential part of the program
@@ -622,7 +622,7 @@
 }
 
 int FTN_STDCALL
-FTN_GET_ANCESTOR_THREAD_NUM( int KMP_DEREF level )
+xexpand(FTN_GET_ANCESTOR_THREAD_NUM)( int KMP_DEREF level )
 {
     #ifdef KMP_STUB
 	return ( KMP_DEREF level ) ? ( -1 ) : ( 0 );
@@ -632,7 +632,7 @@
 }
 
 int FTN_STDCALL
-FTN_GET_TEAM_SIZE( int KMP_DEREF level )
+xexpand(FTN_GET_TEAM_SIZE)( int KMP_DEREF level )
 {
     #ifdef KMP_STUB
         return ( KMP_DEREF level ) ? ( -1 ) : ( 1 );
@@ -642,7 +642,7 @@
 }
 
 int FTN_STDCALL
-FTN_GET_THREAD_LIMIT( void )
+xexpand(FTN_GET_THREAD_LIMIT)( void )
 {
     #ifdef KMP_STUB
 	return 1;   // TO DO: clarify whether it returns 1 or 0?
@@ -656,7 +656,7 @@
 }
 
 int FTN_STDCALL
-FTN_IN_FINAL( void )
+xexpand(FTN_IN_FINAL)( void )
 {
     #ifdef KMP_STUB
 	return 0;   // TO DO: clarify whether it returns 1 or 0?
@@ -674,7 +674,7 @@
 
 
 kmp_proc_bind_t FTN_STDCALL
-FTN_GET_PROC_BIND( void )
+xexpand(FTN_GET_PROC_BIND)( void )
 {
     #ifdef KMP_STUB
         return __kmps_get_proc_bind();
@@ -684,7 +684,7 @@
 }
 
 int FTN_STDCALL
-FTN_GET_NUM_TEAMS( void )
+xexpand(FTN_GET_NUM_TEAMS)( void )
 {
     #ifdef KMP_STUB
         return 1;
@@ -723,7 +723,7 @@
 }
 
 int FTN_STDCALL
-FTN_GET_TEAM_NUM( void )
+xexpand(FTN_GET_TEAM_NUM)( void )
 {
     #ifdef KMP_STUB
         return 0;
@@ -793,7 +793,7 @@
 
 /* initialize the lock */
 void FTN_STDCALL
-FTN_INIT_LOCK( void **user_lock )
+xexpand(FTN_INIT_LOCK)( void **user_lock )
 {
     #ifdef KMP_STUB
         *((kmp_stub_lock_t *)user_lock) = UNLOCKED;
@@ -804,7 +804,7 @@
 
 /* initialize the lock */
 void FTN_STDCALL
-FTN_INIT_NEST_LOCK( void **user_lock )
+xexpand(FTN_INIT_NEST_LOCK)( void **user_lock )
 {
     #ifdef KMP_STUB
         *((kmp_stub_lock_t *)user_lock) = UNLOCKED;
@@ -814,7 +814,7 @@
 }
 
 void FTN_STDCALL
-FTN_DESTROY_LOCK( void **user_lock )
+xexpand(FTN_DESTROY_LOCK)( void **user_lock )
 {
     #ifdef KMP_STUB
         *((kmp_stub_lock_t *)user_lock) = UNINIT;
@@ -824,7 +824,7 @@
 }
 
 void FTN_STDCALL
-FTN_DESTROY_NEST_LOCK( void **user_lock )
+xexpand(FTN_DESTROY_NEST_LOCK)( void **user_lock )
 {
     #ifdef KMP_STUB
         *((kmp_stub_lock_t *)user_lock) = UNINIT;
@@ -834,7 +834,7 @@
 }
 
 void FTN_STDCALL
-FTN_SET_LOCK( void **user_lock )
+xexpand(FTN_SET_LOCK)( void **user_lock )
 {
     #ifdef KMP_STUB
         if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) {
@@ -850,7 +850,7 @@
 }
 
 void FTN_STDCALL
-FTN_SET_NEST_LOCK( void **user_lock )
+xexpand(FTN_SET_NEST_LOCK)( void **user_lock )
 {
     #ifdef KMP_STUB
         if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) {
@@ -863,7 +863,7 @@
 }
 
 void FTN_STDCALL
-FTN_UNSET_LOCK( void **user_lock )
+xexpand(FTN_UNSET_LOCK)( void **user_lock )
 {
     #ifdef KMP_STUB
         if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) {
@@ -879,7 +879,7 @@
 }
 
 void FTN_STDCALL
-FTN_UNSET_NEST_LOCK( void **user_lock )
+xexpand(FTN_UNSET_NEST_LOCK)( void **user_lock )
 {
     #ifdef KMP_STUB
         if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) {
@@ -895,7 +895,7 @@
 }
 
 int FTN_STDCALL
-FTN_TEST_LOCK( void **user_lock )
+xexpand(FTN_TEST_LOCK)( void **user_lock )
 {
     #ifdef KMP_STUB
         if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) {
@@ -912,7 +912,7 @@
 }
 
 int FTN_STDCALL
-FTN_TEST_NEST_LOCK( void **user_lock )
+xexpand(FTN_TEST_NEST_LOCK)( void **user_lock )
 {
     #ifdef KMP_STUB
         if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) {
@@ -925,7 +925,7 @@
 }
 
 double FTN_STDCALL
-FTN_GET_WTIME( void )
+xexpand(FTN_GET_WTIME)( void )
 {
     #ifdef KMP_STUB
         return __kmps_get_wtime();
@@ -944,7 +944,7 @@
 }
 
 double FTN_STDCALL
-FTN_GET_WTICK( void )
+xexpand(FTN_GET_WTICK)( void )
 {
     #ifdef KMP_STUB
         return __kmps_get_wtick();
@@ -1022,6 +1022,191 @@
 /* ------------------------------------------------------------------------ */
 
 
+#if OMP_40_ENABLED
+/* returns the status of cancellation */
+int FTN_STDCALL
+xexpand(FTN_GET_CANCELLATION)(void) {
+#ifdef KMP_STUB
+    return 0 /* false */;
+#else
+    // initialize the library if needed
+    if ( ! __kmp_init_serial ) {
+        __kmp_serial_initialize();
+    }
+    return __kmp_omp_cancellation;
+#endif
+}
+
+int FTN_STDCALL
+FTN_GET_CANCELLATION_STATUS(int cancel_kind) {
+#ifdef KMP_STUB
+    return 0 /* false */;
+#else
+    return __kmp_get_cancellation_status(cancel_kind);
+#endif
+}
+
+#endif // OMP_40_ENABLED
+
+// GCC compatibility (versioned symbols)
+#if KMP_OS_LINUX
+
+/*
+    These following sections create function aliases (dummy symbols) for the omp_* routines.
+    These aliases will then be versioned according to how libgomp ``versions'' its 
+    symbols (OMP_1.0, OMP_2.0, OMP_3.0, ...) while also retaining the 
+    default version which libiomp5 uses: VERSION (defined in exports_so.txt)
+    If you want to see the versioned symbols for libgomp.so.1 then just type: 
+
+    objdump -T /path/to/libgomp.so.1 | grep omp_
+
+    Example:
+    Step 1)  Create __kmp_api_omp_set_num_threads_10_alias 
+             which is alias of __kmp_api_omp_set_num_threads
+    Step 2)  Set __kmp_api_omp_set_num_threads_10_alias to version: omp_set_num_threads@OMP_1.0
+    Step 2B) Set __kmp_api_omp_set_num_threads to default version : omp_set_num_threads@@VERSION
+*/
+
+// OMP_1.0 aliases
+xaliasify(FTN_SET_NUM_THREADS,   10);
+xaliasify(FTN_GET_NUM_THREADS,   10);
+xaliasify(FTN_GET_MAX_THREADS,   10);
+xaliasify(FTN_GET_THREAD_NUM,    10);
+xaliasify(FTN_GET_NUM_PROCS,     10);
+xaliasify(FTN_IN_PARALLEL,       10);
+xaliasify(FTN_SET_DYNAMIC,       10);
+xaliasify(FTN_GET_DYNAMIC,       10);
+xaliasify(FTN_SET_NESTED,        10);
+xaliasify(FTN_GET_NESTED,        10);
+xaliasify(FTN_INIT_LOCK,         10);
+xaliasify(FTN_INIT_NEST_LOCK,    10);
+xaliasify(FTN_DESTROY_LOCK,      10);
+xaliasify(FTN_DESTROY_NEST_LOCK, 10);
+xaliasify(FTN_SET_LOCK,          10);
+xaliasify(FTN_SET_NEST_LOCK,     10);
+xaliasify(FTN_UNSET_LOCK,        10);
+xaliasify(FTN_UNSET_NEST_LOCK,   10);
+xaliasify(FTN_TEST_LOCK,         10);
+xaliasify(FTN_TEST_NEST_LOCK,    10);
+
+// OMP_2.0 aliases
+xaliasify(FTN_GET_WTICK, 20);
+xaliasify(FTN_GET_WTIME, 20);
+
+#if OMP_30_ENABLED
+// OMP_3.0 aliases
+xaliasify(FTN_SET_SCHEDULE,            30);
+xaliasify(FTN_GET_SCHEDULE,            30);
+xaliasify(FTN_GET_THREAD_LIMIT,        30);
+xaliasify(FTN_SET_MAX_ACTIVE_LEVELS,   30);
+xaliasify(FTN_GET_MAX_ACTIVE_LEVELS,   30);
+xaliasify(FTN_GET_LEVEL,               30);
+xaliasify(FTN_GET_ANCESTOR_THREAD_NUM, 30);
+xaliasify(FTN_GET_TEAM_SIZE,           30);
+xaliasify(FTN_GET_ACTIVE_LEVEL,        30);
+xaliasify(FTN_INIT_LOCK,               30);
+xaliasify(FTN_INIT_NEST_LOCK,          30);
+xaliasify(FTN_DESTROY_LOCK,            30);
+xaliasify(FTN_DESTROY_NEST_LOCK,       30);
+xaliasify(FTN_SET_LOCK,                30);
+xaliasify(FTN_SET_NEST_LOCK,           30);
+xaliasify(FTN_UNSET_LOCK,              30);
+xaliasify(FTN_UNSET_NEST_LOCK,         30);
+xaliasify(FTN_TEST_LOCK,               30);
+xaliasify(FTN_TEST_NEST_LOCK,          30);
+
+// OMP_3.1 aliases
+xaliasify(FTN_IN_FINAL, 31);
+#endif /* OMP_30_ENABLED */
+
+#if OMP_40_ENABLED
+// OMP_4.0 aliases
+xaliasify(FTN_GET_PROC_BIND, 40);
+xaliasify(FTN_GET_NUM_TEAMS, 40);
+xaliasify(FTN_GET_TEAM_NUM, 40);
+xaliasify(FTN_GET_CANCELLATION, 40);
+#endif /* OMP_40_ENABLED */
+
+#if OMP_41_ENABLED
+// OMP_4.1 aliases
+#endif
+
+#if OMP_50_ENABLED
+// OMP_5.0 aliases
+#endif
+
+// OMP_1.0 versioned symbols
+xversionify(FTN_SET_NUM_THREADS,   10, "OMP_1.0");
+xversionify(FTN_GET_NUM_THREADS,   10, "OMP_1.0");
+xversionify(FTN_GET_MAX_THREADS,   10, "OMP_1.0");
+xversionify(FTN_GET_THREAD_NUM,    10, "OMP_1.0");
+xversionify(FTN_GET_NUM_PROCS,     10, "OMP_1.0");
+xversionify(FTN_IN_PARALLEL,       10, "OMP_1.0");
+xversionify(FTN_SET_DYNAMIC,       10, "OMP_1.0");
+xversionify(FTN_GET_DYNAMIC,       10, "OMP_1.0");
+xversionify(FTN_SET_NESTED,        10, "OMP_1.0");
+xversionify(FTN_GET_NESTED,        10, "OMP_1.0");
+xversionify(FTN_INIT_LOCK,         10, "OMP_1.0");
+xversionify(FTN_INIT_NEST_LOCK,    10, "OMP_1.0");
+xversionify(FTN_DESTROY_LOCK,      10, "OMP_1.0");
+xversionify(FTN_DESTROY_NEST_LOCK, 10, "OMP_1.0");
+xversionify(FTN_SET_LOCK,          10, "OMP_1.0");
+xversionify(FTN_SET_NEST_LOCK,     10, "OMP_1.0");
+xversionify(FTN_UNSET_LOCK,        10, "OMP_1.0");
+xversionify(FTN_UNSET_NEST_LOCK,   10, "OMP_1.0");
+xversionify(FTN_TEST_LOCK,         10, "OMP_1.0");
+xversionify(FTN_TEST_NEST_LOCK,    10, "OMP_1.0");
+
+// OMP_2.0 versioned symbols
+xversionify(FTN_GET_WTICK,         20, "OMP_2.0");
+xversionify(FTN_GET_WTIME,         20, "OMP_2.0");
+
+#if OMP_30_ENABLED
+// OMP_3.0 versioned symbols
+xversionify(FTN_SET_SCHEDULE,      30, "OMP_3.0");
+xversionify(FTN_GET_SCHEDULE,      30, "OMP_3.0");
+xversionify(FTN_GET_THREAD_LIMIT,        30, "OMP_3.0");
+xversionify(FTN_SET_MAX_ACTIVE_LEVELS,   30, "OMP_3.0");
+xversionify(FTN_GET_MAX_ACTIVE_LEVELS,   30, "OMP_3.0");
+xversionify(FTN_GET_ANCESTOR_THREAD_NUM, 30, "OMP_3.0");
+xversionify(FTN_GET_LEVEL,               30, "OMP_3.0");
+xversionify(FTN_GET_TEAM_SIZE,     30, "OMP_3.0");
+xversionify(FTN_GET_ACTIVE_LEVEL,  30, "OMP_3.0");
+
+// the lock routines have a 1.0 and 3.0 version
+xversionify(FTN_INIT_LOCK,         30, "OMP_3.0");
+xversionify(FTN_INIT_NEST_LOCK,    30, "OMP_3.0");
+xversionify(FTN_DESTROY_LOCK,      30, "OMP_3.0");
+xversionify(FTN_DESTROY_NEST_LOCK, 30, "OMP_3.0");
+xversionify(FTN_SET_LOCK,          30, "OMP_3.0");
+xversionify(FTN_SET_NEST_LOCK,     30, "OMP_3.0");
+xversionify(FTN_UNSET_LOCK,        30, "OMP_3.0");
+xversionify(FTN_UNSET_NEST_LOCK,   30, "OMP_3.0");
+xversionify(FTN_TEST_LOCK,         30, "OMP_3.0");
+xversionify(FTN_TEST_NEST_LOCK,    30, "OMP_3.0");
+
+// OMP_3.1 versioned symbol
+xversionify(FTN_IN_FINAL,          31, "OMP_3.1");
+#endif /* OMP_30_ENABLED */
+
+#if OMP_40_ENABLED
+// OMP_4.0 versioned symbols
+xversionify(FTN_GET_PROC_BIND,     40, "OMP_4.0");
+xversionify(FTN_GET_NUM_TEAMS,     40, "OMP_4.0");
+xversionify(FTN_GET_TEAM_NUM,      40, "OMP_4.0");
+xversionify(FTN_GET_CANCELLATION,  40, "OMP_4.0");
+#endif /* OMP_40_ENABLED */
+
+#if OMP_41_ENABLED
+// OMP_4.1 versioned symbols
+#endif
+
+#if OMP_50_ENABLED
+// OMP_5.0 versioned symbols
+#endif
+
+#endif /* KMP_OS_LINUX */
+
 #ifdef __cplusplus
     } //extern "C"
 #endif // __cplusplus
diff --git a/openmp/runtime/src/kmp_ftn_extra.c b/openmp/runtime/src/kmp_ftn_extra.c
index 6e1bb7e..6777e01 100644
--- a/openmp/runtime/src/kmp_ftn_extra.c
+++ b/openmp/runtime/src/kmp_ftn_extra.c
@@ -1,7 +1,7 @@
 /*
  * kmp_ftn_extra.c -- Fortran 'extra' linkage support for OpenMP.
- * $Revision: 42061 $
- * $Date: 2013-02-28 16:36:24 -0600 (Thu, 28 Feb 2013) $
+ * $Revision: 42757 $
+ * $Date: 2013-10-18 08:20:57 -0500 (Fri, 18 Oct 2013) $
  */
 
 
@@ -17,21 +17,19 @@
 
 #include "kmp.h"
 
-// Note: This string is not printed when KMP_VERSION=1.
-char const __kmp_version_ftnextra[] = KMP_VERSION_PREFIX "Fortran \"extra\" OMP support: "
-#ifdef USE_FTN_EXTRA
-    "yes";
-#else
-    "no";
+#if KMP_OS_WINDOWS
+#   define KMP_FTN_ENTRIES KMP_FTN_PLAIN
+#elif KMP_OS_UNIX
+#   define KMP_FTN_ENTRIES KMP_FTN_APPEND
 #endif
 
-#ifdef USE_FTN_EXTRA
-
-#define FTN_STDCALL /* nothing to do */
-#define KMP_FTN_ENTRIES	USE_FTN_EXTRA
-
-#include "kmp_ftn_os.h"
-#include "kmp_ftn_entry.h"
-
-#endif /* USE_FTN_EXTRA */
-
+// Note: This string is not printed when KMP_VERSION=1.
+char const __kmp_version_ftnextra[] = KMP_VERSION_PREFIX "Fortran \"extra\" OMP support: "
+#ifdef KMP_FTN_ENTRIES
+    "yes";
+#   define FTN_STDCALL /* nothing to do */
+#   include "kmp_ftn_os.h"
+#   include "kmp_ftn_entry.h"
+#else
+    "no";
+#endif /* KMP_FTN_ENTRIES */
diff --git a/openmp/runtime/src/kmp_ftn_os.h b/openmp/runtime/src/kmp_ftn_os.h
index c52ca1e..f241751 100644
--- a/openmp/runtime/src/kmp_ftn_os.h
+++ b/openmp/runtime/src/kmp_ftn_os.h
@@ -1,7 +1,7 @@
 /*
  * kmp_ftn_os.h -- KPTS Fortran defines header file.
- * $Revision: 42478 $
- * $Date: 2013-07-02 15:15:08 -0500 (Tue, 02 Jul 2013) $
+ * $Revision: 42745 $
+ * $Date: 2013-10-14 17:02:04 -0500 (Mon, 14 Oct 2013) $
  */
 
 
@@ -105,6 +105,11 @@
 #endif
 #endif
 
+#if OMP_40_ENABLED
+    #define FTN_GET_CANCELLATION                 omp_get_cancellation
+    #define FTN_GET_CANCELLATION_STATUS          kmp_get_cancellation_status
+#endif
+
 #endif /* KMP_FTN_PLAIN */
 
 /* ------------------------------------------------------------------------ */
@@ -192,6 +197,11 @@
 #endif
 
 
+#if OMP_40_ENABLED
+    #define FTN_GET_CANCELLATION                 omp_get_cancellation_
+    #define FTN_GET_CANCELLATION_STATUS          kmp_get_cancellation_status_
+#endif
+
 #endif /* KMP_FTN_APPEND */
 
 /* ------------------------------------------------------------------------ */
@@ -279,6 +289,11 @@
 #endif
 
 
+#if OMP_40_ENABLED
+    #define FTN_GET_CANCELLATION                 OMP_GET_CANCELLATION
+    #define FTN_GET_CANCELLATION_STATUS          KMP_GET_CANCELLATION_STATUS
+#endif
+
 #endif /* KMP_FTN_UPPER */
 
 /* ------------------------------------------------------------------------ */
@@ -366,7 +381,134 @@
 #endif
 
 
+#if OMP_40_ENABLED
+    #define FTN_GET_CANCELLATION                 OMP_GET_CANCELLATION_
+    #define FTN_GET_CANCELLATION_STATUS          KMP_GET_CANCELLATION_STATUS_
+#endif
+
 #endif /* KMP_FTN_UAPPEND */
 
+/* ------------------------------------------------------------------ */
+/* -------------------------- GOMP API NAMES ------------------------ */
+// All GOMP_1.0 symbols
+#define KMP_API_NAME_GOMP_ATOMIC_END                   GOMP_atomic_end
+#define KMP_API_NAME_GOMP_ATOMIC_START                 GOMP_atomic_start
+#define KMP_API_NAME_GOMP_BARRIER                      GOMP_barrier
+#define KMP_API_NAME_GOMP_CRITICAL_END                 GOMP_critical_end
+#define KMP_API_NAME_GOMP_CRITICAL_NAME_END            GOMP_critical_name_end
+#define KMP_API_NAME_GOMP_CRITICAL_NAME_START          GOMP_critical_name_start
+#define KMP_API_NAME_GOMP_CRITICAL_START               GOMP_critical_start
+#define KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT            GOMP_loop_dynamic_next
+#define KMP_API_NAME_GOMP_LOOP_DYNAMIC_START           GOMP_loop_dynamic_start
+#define KMP_API_NAME_GOMP_LOOP_END                     GOMP_loop_end
+#define KMP_API_NAME_GOMP_LOOP_END_NOWAIT              GOMP_loop_end_nowait
+#define KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT             GOMP_loop_guided_next
+#define KMP_API_NAME_GOMP_LOOP_GUIDED_START            GOMP_loop_guided_start
+#define KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT    GOMP_loop_ordered_dynamic_next
+#define KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START   GOMP_loop_ordered_dynamic_start
+#define KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT     GOMP_loop_ordered_guided_next
+#define KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START    GOMP_loop_ordered_guided_start
+#define KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT    GOMP_loop_ordered_runtime_next
+#define KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START   GOMP_loop_ordered_runtime_start
+#define KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT     GOMP_loop_ordered_static_next
+#define KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START    GOMP_loop_ordered_static_start
+#define KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT            GOMP_loop_runtime_next
+#define KMP_API_NAME_GOMP_LOOP_RUNTIME_START           GOMP_loop_runtime_start
+#define KMP_API_NAME_GOMP_LOOP_STATIC_NEXT             GOMP_loop_static_next
+#define KMP_API_NAME_GOMP_LOOP_STATIC_START            GOMP_loop_static_start
+#define KMP_API_NAME_GOMP_ORDERED_END                  GOMP_ordered_end
+#define KMP_API_NAME_GOMP_ORDERED_START                GOMP_ordered_start
+#define KMP_API_NAME_GOMP_PARALLEL_END                 GOMP_parallel_end
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START  GOMP_parallel_loop_dynamic_start
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START   GOMP_parallel_loop_guided_start
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START  GOMP_parallel_loop_runtime_start
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START   GOMP_parallel_loop_static_start
+#define KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START      GOMP_parallel_sections_start
+#define KMP_API_NAME_GOMP_PARALLEL_START               GOMP_parallel_start
+#define KMP_API_NAME_GOMP_SECTIONS_END                 GOMP_sections_end
+#define KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT          GOMP_sections_end_nowait
+#define KMP_API_NAME_GOMP_SECTIONS_NEXT                GOMP_sections_next
+#define KMP_API_NAME_GOMP_SECTIONS_START               GOMP_sections_start
+#define KMP_API_NAME_GOMP_SINGLE_COPY_END              GOMP_single_copy_end
+#define KMP_API_NAME_GOMP_SINGLE_COPY_START            GOMP_single_copy_start
+#define KMP_API_NAME_GOMP_SINGLE_START                 GOMP_single_start
+
+// All GOMP_2.0 symbols
+#define KMP_API_NAME_GOMP_TASK                           GOMP_task
+#define KMP_API_NAME_GOMP_TASKWAIT                       GOMP_taskwait
+#define KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT          GOMP_loop_ull_dynamic_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START         GOMP_loop_ull_dynamic_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT           GOMP_loop_ull_guided_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START          GOMP_loop_ull_guided_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT  GOMP_loop_ull_ordered_dynamic_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START GOMP_loop_ull_ordered_dynamic_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT   GOMP_loop_ull_ordered_guided_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START  GOMP_loop_ull_ordered_guided_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT  GOMP_loop_ull_ordered_runtime_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START GOMP_loop_ull_ordered_runtime_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT   GOMP_loop_ull_ordered_static_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START  GOMP_loop_ull_ordered_static_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT          GOMP_loop_ull_runtime_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START         GOMP_loop_ull_runtime_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT           GOMP_loop_ull_static_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START          GOMP_loop_ull_static_start
+
+// All GOMP_3.0 symbols
+#define KMP_API_NAME_GOMP_TASKYIELD                      GOMP_taskyield
+
+// All GOMP_4.0 symbols 
+// TODO: As of 2013-10-14, none of the GOMP_4.0 functions are implemented in libiomp5
+#define KMP_API_NAME_GOMP_BARRIER_CANCEL                 GOMP_barrier_cancel
+#define KMP_API_NAME_GOMP_CANCEL                         GOMP_cancel
+#define KMP_API_NAME_GOMP_CANCELLATION_POINT             GOMP_cancellation_point
+#define KMP_API_NAME_GOMP_LOOP_END_CANCEL                GOMP_loop_end_cancel
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC          GOMP_parallel_loop_dynamic
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED           GOMP_parallel_loop_guided
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME          GOMP_parallel_loop_runtime
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC           GOMP_parallel_loop_static
+#define KMP_API_NAME_GOMP_PARALLEL_SECTIONS              GOMP_parallel_sections
+#define KMP_API_NAME_GOMP_PARALLEL                       GOMP_parallel
+#define KMP_API_NAME_GOMP_SECTIONS_END_CANCEL            GOMP_sections_end_cancel
+#define KMP_API_NAME_GOMP_TASKGROUP_START                GOMP_taskgroup_start
+#define KMP_API_NAME_GOMP_TASKGROUP_END                  GOMP_taskgroup_end
+/* Target functions should be taken care of by liboffload */
+//#define KMP_API_NAME_GOMP_TARGET                       GOMP_target
+//#define KMP_API_NAME_GOMP_TARGET_DATA                  GOMP_target_data
+//#define KMP_API_NAME_GOMP_TARGET_END_DATA              GOMP_target_end_data
+//#define KMP_API_NAME_GOMP_TARGET_UPDATE                GOMP_target_update
+#define KMP_API_NAME_GOMP_TEAMS                          GOMP_teams
+
+#if KMP_OS_LINUX
+    #define xstr(x) str(x) 
+    #define str(x) #x
+
+    // If Linux, xexpand prepends __kmp_api_ to the real API name
+    #define xexpand(api_name) expand(api_name)
+    #define expand(api_name) __kmp_api_##api_name
+
+    #define xaliasify(api_name,ver) aliasify(api_name,ver)
+    #define aliasify(api_name,ver) __typeof__(__kmp_api_##api_name) __kmp_api_##api_name##_##ver##_alias __attribute__((alias(xstr(__kmp_api_##api_name))))
+
+    #define xversionify(api_name, version_num, version_str) versionify(api_name, version_num, version_str, "VERSION")
+    #define versionify(api_name, version_num, version_str, default_ver) \
+    __asm__(".symver " xstr(__kmp_api_##api_name##_##version_num##_alias) "," xstr(api_name) "@" version_str "\n\t"); \
+    __asm__(".symver " xstr(__kmp_api_##api_name) "," xstr(api_name) "@@" default_ver "\n\t")
+
+#else /* KMP_OS_LINUX */
+    #define xstr(x) /* Nothing */
+    #define str(x)  /* Nothing */
+
+    // if Windows or Mac, xexpand does no name transformation
+    #define xexpand(api_name) expand(api_name)
+    #define expand(api_name) api_name
+
+    #define xaliasify(api_name,ver) /* Nothing */
+    #define aliasify(api_name,ver)  /* Nothing */
+
+    #define xversionify(api_name, version_num, version_str) /* Nothing */
+    #define versionify(api_name, version_num, version_str, default_ver) /* Nothing */
+
+#endif /* KMP_OS_LINUX */
+
 #endif /* KMP_FTN_OS_H */
 
diff --git a/openmp/runtime/src/kmp_global.c b/openmp/runtime/src/kmp_global.c
index db81764..b27b1716 100644
--- a/openmp/runtime/src/kmp_global.c
+++ b/openmp/runtime/src/kmp_global.c
@@ -1,7 +1,7 @@
 /*
  * kmp_global.c -- KPTS global variables for runtime support library
- * $Revision: 42642 $
- * $Date: 2013-09-06 01:57:24 -0500 (Fri, 06 Sep 2013) $
+ * $Revision: 42816 $
+ * $Date: 2013-11-11 15:33:37 -0600 (Mon, 11 Nov 2013) $
  */
 
 
@@ -24,7 +24,6 @@
 kmp_key_t __kmp_gtid_threadprivate_key;
 
 kmp_cpuinfo_t   __kmp_cpuinfo = { 0 }; // Not initialized
-kmp_uint64      __kmp_cpu_frequency = 0;
 
 
 /* ----------------------------------------------------- */
@@ -181,6 +180,7 @@
 #if OMP_40_ENABLED
 int __kmp_display_env           = FALSE;
 int __kmp_display_env_verbose   = FALSE;
+int __kmp_omp_cancellation      = FALSE;
 #endif
 
 /* map OMP 3.0 schedule types with our internal schedule types */
@@ -266,9 +266,6 @@
 #if USE_ITT_BUILD
 int     __kmp_forkjoin_frames = 1;
 int     __kmp_forkjoin_frames_mode = 0;
-FILE * __kmp_itt_csv_file;
-kmp_str_buf_t __kmp_itt_frame_buffer;
-
 #endif
 PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method = reduction_method_not_defined;
 int     __kmp_determ_red = FALSE;
diff --git a/openmp/runtime/src/kmp_gsupport.c b/openmp/runtime/src/kmp_gsupport.c
index 33e8fba..9d8e553 100644
--- a/openmp/runtime/src/kmp_gsupport.c
+++ b/openmp/runtime/src/kmp_gsupport.c
@@ -1,7 +1,7 @@
 /*
  * kmp_gsupport.c
- * $Revision: 42181 $
- * $Date: 2013-03-26 15:04:45 -0500 (Tue, 26 Mar 2013) $
+ * $Revision: 42810 $
+ * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
  */
 
 
@@ -28,9 +28,10 @@
 #define MKLOC(loc,routine) \
     static ident_t (loc) = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;" };
 
+#include "kmp_ftn_os.h"
 
 void
-GOMP_barrier(void)
+xexpand(KMP_API_NAME_GOMP_BARRIER)(void)
 {
     int gtid = __kmp_entry_gtid();
     MKLOC(loc, "GOMP_barrier");
@@ -58,7 +59,7 @@
 
 
 void
-GOMP_critical_start(void)
+xexpand(KMP_API_NAME_GOMP_CRITICAL_START)(void)
 {
     int gtid = __kmp_entry_gtid();
     MKLOC(loc, "GOMP_critical_start");
@@ -68,7 +69,7 @@
 
 
 void
-GOMP_critical_end(void)
+xexpand(KMP_API_NAME_GOMP_CRITICAL_END)(void)
 {
     int gtid = __kmp_get_gtid();
     MKLOC(loc, "GOMP_critical_end");
@@ -78,7 +79,7 @@
 
 
 void
-GOMP_critical_name_start(void **pptr)
+xexpand(KMP_API_NAME_GOMP_CRITICAL_NAME_START)(void **pptr)
 {
     int gtid = __kmp_entry_gtid();
     MKLOC(loc, "GOMP_critical_name_start");
@@ -88,7 +89,7 @@
 
 
 void
-GOMP_critical_name_end(void **pptr)
+xexpand(KMP_API_NAME_GOMP_CRITICAL_NAME_END)(void **pptr)
 {
     int gtid = __kmp_get_gtid();
     MKLOC(loc, "GOMP_critical_name_end");
@@ -104,7 +105,7 @@
 //
 
 void
-GOMP_atomic_start(void)
+xexpand(KMP_API_NAME_GOMP_ATOMIC_START)(void)
 {
     int gtid = __kmp_entry_gtid();
     KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid));
@@ -113,7 +114,7 @@
 
 
 void
-GOMP_atomic_end(void)
+xexpand(KMP_API_NAME_GOMP_ATOMIC_END)(void)
 {
     int gtid = __kmp_get_gtid();
     KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid));
@@ -122,7 +123,7 @@
 
 
 int
-GOMP_single_start(void)
+xexpand(KMP_API_NAME_GOMP_SINGLE_START)(void)
 {
     int gtid = __kmp_entry_gtid();
     MKLOC(loc, "GOMP_single_start");
@@ -141,7 +142,7 @@
 
 
 void *
-GOMP_single_copy_start(void)
+xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void)
 {
     void *retval;
     int gtid = __kmp_entry_gtid();
@@ -176,7 +177,7 @@
 
 
 void
-GOMP_single_copy_end(void *data)
+xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_END)(void *data)
 {
     int gtid = __kmp_get_gtid();
     MKLOC(loc, "GOMP_single_copy_end");
@@ -196,7 +197,7 @@
 
 
 void
-GOMP_ordered_start(void)
+xexpand(KMP_API_NAME_GOMP_ORDERED_START)(void)
 {
     int gtid = __kmp_entry_gtid();
     MKLOC(loc, "GOMP_ordered_start");
@@ -206,7 +207,7 @@
 
 
 void
-GOMP_ordered_end(void)
+xexpand(KMP_API_NAME_GOMP_ORDERED_END)(void)
 {
     int gtid = __kmp_get_gtid();
     MKLOC(loc, "GOMP_ordered_end");
@@ -223,7 +224,7 @@
 // (IA-32 architecture) or 64-bit signed (Intel(R) 64).
 //
 
-#if KMP_ARCH_X86
+#if KMP_ARCH_X86 || KMP_ARCH_ARM
 # define KMP_DISPATCH_INIT              __kmp_aux_dispatch_init_4
 # define KMP_DISPATCH_FINI_CHUNK        __kmp_aux_dispatch_fini_chunk_4
 # define KMP_DISPATCH_NEXT              __kmpc_dispatch_next_4
@@ -287,7 +288,7 @@
     va_start(ap, argc);
 
     rc = __kmp_fork_call(loc, gtid, FALSE, argc, wrapper, __kmp_invoke_task_func,
-#if KMP_ARCH_X86_64 && KMP_OS_LINUX
+#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
       &ap
 #else
       ap
@@ -305,7 +306,7 @@
 
 
 void
-GOMP_parallel_start(void (*task)(void *), void *data, unsigned num_threads)
+xexpand(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data, unsigned num_threads)
 {
     int gtid = __kmp_entry_gtid();
     MKLOC(loc, "GOMP_parallel_start");
@@ -325,7 +326,7 @@
 
 
 void
-GOMP_parallel_end(void)
+xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void)
 {
     int gtid = __kmp_get_gtid();
     MKLOC(loc, "GOMP_parallel_end");
@@ -457,31 +458,31 @@
     }
 
 
-LOOP_START(GOMP_loop_static_start, kmp_sch_static)
-LOOP_NEXT(GOMP_loop_static_next, {})
-LOOP_START(GOMP_loop_dynamic_start, kmp_sch_dynamic_chunked)
-LOOP_NEXT(GOMP_loop_dynamic_next, {})
-LOOP_START(GOMP_loop_guided_start, kmp_sch_guided_chunked)
-LOOP_NEXT(GOMP_loop_guided_next, {})
-LOOP_RUNTIME_START(GOMP_loop_runtime_start, kmp_sch_runtime)
-LOOP_NEXT(GOMP_loop_runtime_next, {})
+LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_STATIC_START), kmp_sch_static)
+LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT), {})
+LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START), kmp_sch_dynamic_chunked)
+LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT), {})
+LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_GUIDED_START), kmp_sch_guided_chunked)
+LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT), {})
+LOOP_RUNTIME_START(xexpand(KMP_API_NAME_GOMP_LOOP_RUNTIME_START), kmp_sch_runtime)
+LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT), {})
 
-LOOP_START(GOMP_loop_ordered_static_start, kmp_ord_static)
-LOOP_NEXT(GOMP_loop_ordered_static_next, \
+LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START), kmp_ord_static)
+LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT), \
     { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
-LOOP_START(GOMP_loop_ordered_dynamic_start, kmp_ord_dynamic_chunked)
-LOOP_NEXT(GOMP_loop_ordered_dynamic_next, \
+LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START), kmp_ord_dynamic_chunked)
+LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT), \
     { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
-LOOP_START(GOMP_loop_ordered_guided_start, kmp_ord_guided_chunked)
-LOOP_NEXT(GOMP_loop_ordered_guided_next, \
+LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START), kmp_ord_guided_chunked)
+LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT), \
     { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
-LOOP_RUNTIME_START(GOMP_loop_ordered_runtime_start, kmp_ord_runtime)
-LOOP_NEXT(GOMP_loop_ordered_runtime_next, \
+LOOP_RUNTIME_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START), kmp_ord_runtime)
+LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT), \
     { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
 
 
 void
-GOMP_loop_end(void)
+xexpand(KMP_API_NAME_GOMP_LOOP_END)(void)
 {
     int gtid = __kmp_get_gtid();
     KA_TRACE(20, ("GOMP_loop_end: T#%d\n", gtid))
@@ -493,7 +494,7 @@
 
 
 void
-GOMP_loop_end_nowait(void)
+xexpand(KMP_API_NAME_GOMP_LOOP_END_NOWAIT)(void)
 {
     KA_TRACE(20, ("GOMP_loop_end_nowait: T#%d\n", __kmp_get_gtid()))
 }
@@ -598,26 +599,26 @@
     }
 
 
-LOOP_START_ULL(GOMP_loop_ull_static_start, kmp_sch_static)
-LOOP_NEXT_ULL(GOMP_loop_ull_static_next, {})
-LOOP_START_ULL(GOMP_loop_ull_dynamic_start, kmp_sch_dynamic_chunked)
-LOOP_NEXT_ULL(GOMP_loop_ull_dynamic_next, {})
-LOOP_START_ULL(GOMP_loop_ull_guided_start, kmp_sch_guided_chunked)
-LOOP_NEXT_ULL(GOMP_loop_ull_guided_next, {})
-LOOP_RUNTIME_START_ULL(GOMP_loop_ull_runtime_start, kmp_sch_runtime)
-LOOP_NEXT_ULL(GOMP_loop_ull_runtime_next, {})
+LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START), kmp_sch_static)
+LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT), {})
+LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START), kmp_sch_dynamic_chunked)
+LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT), {})
+LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START), kmp_sch_guided_chunked)
+LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT), {})
+LOOP_RUNTIME_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START), kmp_sch_runtime)
+LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT), {})
 
-LOOP_START_ULL(GOMP_loop_ull_ordered_static_start, kmp_ord_static)
-LOOP_NEXT_ULL(GOMP_loop_ull_ordered_static_next, \
+LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START), kmp_ord_static)
+LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT), \
     { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
-LOOP_START_ULL(GOMP_loop_ull_ordered_dynamic_start, kmp_ord_dynamic_chunked)
-LOOP_NEXT_ULL(GOMP_loop_ull_ordered_dynamic_next, \
+LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START), kmp_ord_dynamic_chunked)
+LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT), \
     { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
-LOOP_START_ULL(GOMP_loop_ull_ordered_guided_start, kmp_ord_guided_chunked)
-LOOP_NEXT_ULL(GOMP_loop_ull_ordered_guided_next, \
+LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START), kmp_ord_guided_chunked)
+LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT), \
     { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
-LOOP_RUNTIME_START_ULL(GOMP_loop_ull_ordered_runtime_start, kmp_ord_runtime)
-LOOP_NEXT_ULL(GOMP_loop_ull_ordered_runtime_next, \
+LOOP_RUNTIME_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START), kmp_ord_runtime)
+LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT), \
     { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
 
 
@@ -659,10 +660,10 @@
     }
 
 
-PARALLEL_LOOP_START(GOMP_parallel_loop_static_start, kmp_sch_static)
-PARALLEL_LOOP_START(GOMP_parallel_loop_dynamic_start, kmp_sch_dynamic_chunked)
-PARALLEL_LOOP_START(GOMP_parallel_loop_guided_start, kmp_sch_guided_chunked)
-PARALLEL_LOOP_START(GOMP_parallel_loop_runtime_start, kmp_sch_runtime)
+PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START), kmp_sch_static)
+PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START), kmp_sch_dynamic_chunked)
+PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START), kmp_sch_guided_chunked)
+PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START), kmp_sch_runtime)
 
 
 #if OMP_30_ENABLED
@@ -674,7 +675,7 @@
 //
 
 void
-GOMP_task(void (*func)(void *), void *data, void (*copy_func)(void *, void *),
+xexpand(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data, void (*copy_func)(void *, void *),
   long arg_size, long arg_align, int if_cond, unsigned gomp_flags)
 {
     MKLOC(loc, "GOMP_task");
@@ -728,7 +729,7 @@
 
 
 void
-GOMP_taskwait(void)
+xexpand(KMP_API_NAME_GOMP_TASKWAIT)(void)
 {
     MKLOC(loc, "GOMP_taskwait");
     int gtid = __kmp_entry_gtid();
@@ -759,7 +760,7 @@
 //
 
 unsigned
-GOMP_sections_start(unsigned count)
+xexpand(KMP_API_NAME_GOMP_SECTIONS_START)(unsigned count)
 {
     int status;
     kmp_int lb, ub, stride;
@@ -786,7 +787,7 @@
 
 
 unsigned
-GOMP_sections_next(void)
+xexpand(KMP_API_NAME_GOMP_SECTIONS_NEXT)(void)
 {
     int status;
     kmp_int lb, ub, stride;
@@ -811,7 +812,7 @@
 
 
 void
-GOMP_parallel_sections_start(void (*task) (void *), void *data,
+xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(void (*task) (void *), void *data,
   unsigned num_threads, unsigned count)
 {
     int gtid = __kmp_entry_gtid();
@@ -839,7 +840,7 @@
 
 
 void
-GOMP_sections_end(void)
+xexpand(KMP_API_NAME_GOMP_SECTIONS_END)(void)
 {
     int gtid = __kmp_get_gtid();
     KA_TRACE(20, ("GOMP_sections_end: T#%d\n", gtid))
@@ -851,11 +852,175 @@
 
 
 void
-GOMP_sections_end_nowait(void)
+xexpand(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT)(void)
 {
     KA_TRACE(20, ("GOMP_sections_end_nowait: T#%d\n", __kmp_get_gtid()))
 }
 
+// libgomp has an empty function for GOMP_taskyield as of 2013-10-10
+void
+xexpand(KMP_API_NAME_GOMP_TASKYIELD)(void)
+{
+
+}
+
+/*
+    The following sections of code create aliases for the GOMP_* functions,
+    then create versioned symbols using the assembler directive .symver.
+    This is only pertinent for ELF .so library
+    xaliasify and xversionify are defined in kmp_ftn_os.h
+*/
+
+#if KMP_OS_LINUX
+
+// GOMP_1.0 aliases
+xaliasify(KMP_API_NAME_GOMP_ATOMIC_END, 10);
+xaliasify(KMP_API_NAME_GOMP_ATOMIC_START, 10);
+xaliasify(KMP_API_NAME_GOMP_BARRIER, 10);
+xaliasify(KMP_API_NAME_GOMP_CRITICAL_END, 10);
+xaliasify(KMP_API_NAME_GOMP_CRITICAL_NAME_END, 10);
+xaliasify(KMP_API_NAME_GOMP_CRITICAL_NAME_START, 10);
+xaliasify(KMP_API_NAME_GOMP_CRITICAL_START, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_END, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_END_NOWAIT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_GUIDED_START, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_RUNTIME_START, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_STATIC_START, 10);
+xaliasify(KMP_API_NAME_GOMP_ORDERED_END, 10);
+xaliasify(KMP_API_NAME_GOMP_ORDERED_START, 10);
+xaliasify(KMP_API_NAME_GOMP_PARALLEL_END, 10);
+xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START, 10);
+xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START, 10);
+xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START, 10);
+xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START, 10);
+xaliasify(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START, 10);
+xaliasify(KMP_API_NAME_GOMP_PARALLEL_START, 10);
+xaliasify(KMP_API_NAME_GOMP_SECTIONS_END, 10);
+xaliasify(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT, 10);
+xaliasify(KMP_API_NAME_GOMP_SECTIONS_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_SECTIONS_START, 10);
+xaliasify(KMP_API_NAME_GOMP_SINGLE_COPY_END, 10);
+xaliasify(KMP_API_NAME_GOMP_SINGLE_COPY_START, 10);
+xaliasify(KMP_API_NAME_GOMP_SINGLE_START, 10);
+
+// GOMP_2.0 aliases
+#if OMP_30_ENABLED
+xaliasify(KMP_API_NAME_GOMP_TASK, 20);
+xaliasify(KMP_API_NAME_GOMP_TASKWAIT, 20);
+#endif
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START, 20);
+
+// GOMP_3.0 aliases
+xaliasify(KMP_API_NAME_GOMP_TASKYIELD, 30);
+
+// GOMP_4.0 aliases
+/* TODO: add GOMP_4.0 aliases when corresponding
+         GOMP_* functions are implemented
+*/
+
+// GOMP_1.0 versioned symbols
+xversionify(KMP_API_NAME_GOMP_ATOMIC_END, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_ATOMIC_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_BARRIER, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_CRITICAL_END, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_CRITICAL_NAME_END, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_CRITICAL_NAME_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_CRITICAL_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_END, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_END_NOWAIT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_GUIDED_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_RUNTIME_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_STATIC_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_ORDERED_END, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_ORDERED_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_PARALLEL_END, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_PARALLEL_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_SECTIONS_END, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_SECTIONS_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_SECTIONS_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_SINGLE_COPY_END, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_SINGLE_COPY_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_SINGLE_START, 10, "GOMP_1.0");
+
+// GOMP_2.0 versioned symbols
+#if OMP_30_ENABLED
+xversionify(KMP_API_NAME_GOMP_TASK, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_TASKWAIT, 20, "GOMP_2.0");
+#endif
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START, 20, "GOMP_2.0");
+
+// GOMP_3.0 versioned symbols
+xversionify(KMP_API_NAME_GOMP_TASKYIELD, 30, "GOMP_3.0");
+
+// GOMP_4.0 versioned symbols
+/* TODO: add GOMP_4.0 versioned symbols when corresponding
+         GOMP_* functions are implemented
+*/
+
+#endif /* KMP_OS_LINUX */
+
 #ifdef __cplusplus
     } //extern "C"
 #endif // __cplusplus
diff --git a/openmp/runtime/src/kmp_i18n.c b/openmp/runtime/src/kmp_i18n.c
index e23e9f1..5cca6e8 100644
--- a/openmp/runtime/src/kmp_i18n.c
+++ b/openmp/runtime/src/kmp_i18n.c
@@ -1,7 +1,7 @@
 /*
  * kmp_i18n.c
- * $Revision: 42181 $
- * $Date: 2013-03-26 15:04:45 -0500 (Tue, 26 Mar 2013) $
+ * $Revision: 42810 $
+ * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
  */
 
 
@@ -668,7 +668,7 @@
 
 void
 __kmp_i18n_dump_catalog(
-    kmp_str_buf_t & buffer
+    kmp_str_buf_t * buffer
 ) {
 
     struct kmp_i18n_id_range_t {
@@ -676,7 +676,7 @@
         kmp_i18n_id_t  last;
     }; // struct kmp_i18n_id_range_t
 
-    static kmp_i18n_id_range_t ranges[] = {
+    static struct kmp_i18n_id_range_t ranges[] = {
         { kmp_i18n_prp_first, kmp_i18n_prp_last },
         { kmp_i18n_str_first, kmp_i18n_str_last },
         { kmp_i18n_fmt_first, kmp_i18n_fmt_last },
@@ -684,18 +684,20 @@
         { kmp_i18n_hnt_first, kmp_i18n_hnt_last }
     }; // ranges
 
-    int           num_of_ranges = sizeof( ranges ) / sizeof( kmp_i18n_id_range_t );
+    int           num_of_ranges = sizeof( ranges ) / sizeof( struct kmp_i18n_id_range_t );
     int           range;
     kmp_i18n_id_t id;
 
     for ( range = 0; range < num_of_ranges; ++ range ) {
-        __kmp_str_buf_print( & buffer, "*** Set #%d ***\n", range + 1 );
-        for ( id = kmp_i18n_id_t( ranges[ range ].first + 1 ); id < ranges[ range ].last; id = kmp_i18n_id_t( id + 1 ) ) {
-             __kmp_str_buf_print( & buffer, "%d: <<%s>>\n", id, __kmp_i18n_catgets( id ) );
+        __kmp_str_buf_print( buffer, "*** Set #%d ***\n", range + 1 );
+        for ( id = (kmp_i18n_id_t)( ranges[ range ].first + 1 );
+              id < ranges[ range ].last;
+              id = (kmp_i18n_id_t)( id + 1 ) ) {
+             __kmp_str_buf_print( buffer, "%d: <<%s>>\n", id, __kmp_i18n_catgets( id ) );
         }; // for id
     }; // for range
 
-    __kmp_printf( "%s", buffer.str );
+    __kmp_printf( "%s", buffer->str );
 
 } // __kmp_i18n_dump_catalog
 
diff --git a/openmp/runtime/src/kmp_i18n.h b/openmp/runtime/src/kmp_i18n.h
index 9392e62..fea8de4 100644
--- a/openmp/runtime/src/kmp_i18n.h
+++ b/openmp/runtime/src/kmp_i18n.h
@@ -1,7 +1,7 @@
 /*
  * kmp_i18n.h
- * $Revision: 42061 $
- * $Date: 2013-02-28 16:36:24 -0600 (Thu, 28 Feb 2013) $
+ * $Revision: 42810 $
+ * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
  */
 
 
@@ -183,7 +183,7 @@
     }
 
 #ifdef KMP_DEBUG
-    void __kmp_i18n_dump_catalog( kmp_str_buf_t & buffer );
+    void __kmp_i18n_dump_catalog( kmp_str_buf_t * buffer );
 #endif // KMP_DEBUG
 
 #ifdef __cplusplus
diff --git a/openmp/runtime/src/kmp_itt.h b/openmp/runtime/src/kmp_itt.h
index ced8fc8..0ee79b6 100644
--- a/openmp/runtime/src/kmp_itt.h
+++ b/openmp/runtime/src/kmp_itt.h
@@ -1,8 +1,8 @@
 #if USE_ITT_BUILD
 /*
  * kmp_itt.h -- ITT Notify interface.
- * $Revision: 42616 $
- * $Date: 2013-08-26 11:47:32 -0500 (Mon, 26 Aug 2013) $
+ * $Revision: 42829 $
+ * $Date: 2013-11-21 05:44:01 -0600 (Thu, 21 Nov 2013) $
  */
 
 
@@ -59,6 +59,9 @@
 __kmp_inline void __kmp_itt_region_joined(   int gtid, int serialized = 0 ); // Master only, after joining threads.
     // (*) Note: A thread may execute tasks after this point, though.
 
+// --- Frame reporting ---
+__kmp_inline void __kmp_itt_frame_submit( int gtid, __itt_timestamp begin, __itt_timestamp end, int imbalance, ident_t *loc );
+
 // --- Barrier reporting ---
 __kmp_inline void * __kmp_itt_barrier_object( int gtid, int bt, int set_name = 0, int delta = 0 );
 __kmp_inline void   __kmp_itt_barrier_starting( int gtid, void * object );
@@ -265,6 +268,6 @@
 
 # define KMP_ITT_IGNORE(stmt ) do { stmt } while (0)
 
-# define USE_ITT_BUILD_ARG(x) 
+# define USE_ITT_BUILD_ARG(x)
 
 #endif /* USE_ITT_BUILD */
diff --git a/openmp/runtime/src/kmp_itt.inl b/openmp/runtime/src/kmp_itt.inl
index bedcca1..6976e7f 100644
--- a/openmp/runtime/src/kmp_itt.inl
+++ b/openmp/runtime/src/kmp_itt.inl
@@ -1,8 +1,8 @@
 #if USE_ITT_BUILD
 /*
  * kmp_itt.inl -- Inline functions of ITT Notify.
- * $Revision: 42616 $
- * $Date: 2013-08-26 11:47:32 -0500 (Mon, 26 Aug 2013) $
+ * $Revision: 42866 $
+ * $Date: 2013-12-10 15:15:58 -0600 (Tue, 10 Dec 2013) $
  */
 
 
@@ -49,6 +49,20 @@
 # define LINKAGE static inline
 #endif
 
+// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses this
+// API to support user-defined synchronization primitives, but does not use ZCA;
+// it would be safe to turn this off until wider support becomes available.
+#if USE_ITT_ZCA
+#ifdef __INTEL_COMPILER
+#   if __INTEL_COMPILER >= 1200
+#       undef __itt_sync_acquired
+#       undef __itt_sync_releasing
+#       define __itt_sync_acquired(addr)    __notify_zc_intrinsic((char *)"sync_acquired", addr)
+#       define __itt_sync_releasing(addr)   __notify_intrinsic((char *)"sync_releasing", addr)
+#   endif
+#endif
+#endif
+
 /*
     ------------------------------------------------------------------------------------------------
     Parallel region reporting.
@@ -79,10 +93,6 @@
 #if USE_ITT_NOTIFY
     kmp_team_t *      team = __kmp_team_from_gtid( gtid );
 #if OMP_30_ENABLED
-    KMP_ITT_DEBUG_LOCK();
-    KMP_ITT_DEBUG_PRINT( "[frm beg] gtid=%d, idx=%d, serialized:%d, empty:%d\n", gtid,
-                         __kmp_threads[gtid]->th.th_ident->reserved_2 - 1, serialized,
-                         (team->t.t_active_level + serialized > 1) );
     if (team->t.t_active_level + serialized > 1)
 #endif
     {
@@ -116,13 +126,19 @@
                                         str_loc.line, str_loc.col);
                 __kmp_str_loc_free( &str_loc );
 
+                __itt_suppress_push(__itt_suppress_memory_errors);
                 __kmp_itt_domains[ frm ] = __itt_domain_create( buff );
+                __itt_suppress_pop();
+
                 __kmp_str_free( &buff );
                 __itt_frame_begin_v3(__kmp_itt_domains[ frm ], NULL);
             }
         } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS
             __itt_frame_begin_v3(__kmp_itt_domains[loc->reserved_2 - 1], NULL);
         }
+        KMP_ITT_DEBUG_LOCK();
+        KMP_ITT_DEBUG_PRINT( "[frm beg] gtid=%d, idx=%d, serialized:%d, loc:%p\n",
+                         gtid, loc->reserved_2 - 1, serialized, loc );
     }
 #endif
 } // __kmp_itt_region_forking
@@ -130,6 +146,51 @@
 // -------------------------------------------------------------------------------------------------
 
 LINKAGE void
+__kmp_itt_frame_submit( int gtid, __itt_timestamp begin, __itt_timestamp end, int imbalance, ident_t * loc ) {
+#if USE_ITT_NOTIFY
+        if (loc) {
+            if (loc->reserved_2 == 0) {
+                if (__kmp_frame_domain_count < KMP_MAX_FRAME_DOMAINS) {
+                    int frm = KMP_TEST_THEN_INC32( & __kmp_frame_domain_count ); // get "old" value
+                    if (frm >= KMP_MAX_FRAME_DOMAINS) {
+                        KMP_TEST_THEN_DEC32( & __kmp_frame_domain_count );       // revert the count
+                        return;                      // loc->reserved_2 is still 0
+                    }
+                    // Should it be synchronized? See the comment in __kmp_itt_region_forking
+                    loc->reserved_2 = frm + 1;                                   // save "new" value
+
+                    // Transform compiler-generated region location into the format
+                    // that the tools more or less standardized on:
+                    //                               "<func>$omp$frame@[file:]<line>[:<col>]"
+                    const char * buff = NULL;
+                    kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
+                    if( imbalance ) {
+                        buff = __kmp_str_format("%s$omp$barrier-imbalance@%s:%d",
+                                                str_loc.func, str_loc.file, str_loc.col);
+                    } else {
+                        buff = __kmp_str_format("%s$omp$barrier@%s:%d",
+                                                str_loc.func, str_loc.file, str_loc.col);
+                    }
+                    __kmp_str_loc_free( &str_loc );
+
+                    __itt_suppress_push(__itt_suppress_memory_errors);
+                    __kmp_itt_domains[ frm ] = __itt_domain_create( buff );
+                    __itt_suppress_pop();
+
+                    __kmp_str_free( &buff );
+                    __itt_frame_submit_v3(__kmp_itt_domains[ frm ], NULL, begin, end );
+                }
+            } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS
+                __itt_frame_submit_v3(__kmp_itt_domains[loc->reserved_2 - 1], NULL, begin, end );
+            }
+    }
+
+#endif
+} // __kmp_itt_frame_submit
+
+// -------------------------------------------------------------------------------------------------
+
+LINKAGE void
 __kmp_itt_region_starting( int gtid ) {
 #if USE_ITT_NOTIFY
 #endif
@@ -150,10 +211,6 @@
 #if USE_ITT_NOTIFY
     kmp_team_t *      team = __kmp_team_from_gtid( gtid );
 #if OMP_30_ENABLED
-    KMP_ITT_DEBUG_LOCK();
-    KMP_ITT_DEBUG_PRINT( "[frm end] gtid=%d, idx=%d, serialized:%d, empty:%d\n", gtid,
-                         __kmp_threads[gtid]->th.th_ident->reserved_2 - 1, serialized,
-                         (team->t.t_active_level + serialized > 1) );
     if (team->t.t_active_level + serialized > 1)
 #endif
     {
@@ -162,7 +219,10 @@
     }
     ident_t *         loc  = __kmp_thread_from_gtid( gtid )->th.th_ident;
     if (loc && loc->reserved_2 && loc->reserved_2 <= KMP_MAX_FRAME_DOMAINS) {
+        KMP_ITT_DEBUG_LOCK();
         __itt_frame_end_v3(__kmp_itt_domains[loc->reserved_2 - 1], NULL);
+        KMP_ITT_DEBUG_PRINT( "[frm end] gtid=%d, idx=%d, serialized:%d, loc:%p\n",
+                         gtid, loc->reserved_2 - 1, serialized, loc );
     }
 #endif
 } // __kmp_itt_region_joined
@@ -577,7 +637,7 @@
 void
 __kmp_itt_single_start( int gtid ) {
 #if USE_ITT_NOTIFY
-    if ( __itt_mark_create_ptr ) {
+    if ( __itt_mark_create_ptr || KMP_ITT_DEBUG ) {
         kmp_info_t *   thr = __kmp_thread_from_gtid( (gtid) );
         ident_t *      loc = thr->th.th_ident;
         char const *   src = ( loc == NULL ? NULL : loc->psource );
diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp
index d042019..766cf83 100644
--- a/openmp/runtime/src/kmp_lock.cpp
+++ b/openmp/runtime/src/kmp_lock.cpp
@@ -1,7 +1,7 @@
 /*
  * kmp_lock.cpp -- lock-related functions
- * $Revision: 42613 $
- * $Date: 2013-08-23 13:29:50 -0500 (Fri, 23 Aug 2013) $
+ * $Revision: 42810 $
+ * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
  */
 
 
@@ -23,7 +23,7 @@
 #include "kmp_lock.h"
 #include "kmp_io.h"
 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
 # include <unistd.h>
 # include <sys/syscall.h>
 // We should really include <futex.h>, but that causes compatibility problems on different
@@ -398,7 +398,7 @@
 }
 
 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
 
 /* ------------------------------------------------------------------------ */
 /* futex locks */
@@ -755,7 +755,7 @@
     __kmp_destroy_nested_futex_lock( lck );
 }
 
-#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
 
 
 /* ------------------------------------------------------------------------ */
@@ -2199,10 +2199,10 @@
 
     // We need a fence here, since we must ensure that no memory operations
     // from later in this thread float above that read.
-#if defined( __GNUC__ ) && !defined( __INTEL_COMPILER )
-    __sync_synchronize();
-#else
+#if KMP_COMPILER_ICC
     _mm_mfence();
+#else
+    __sync_synchronize();
 #endif
 
     return res;
@@ -3167,7 +3167,7 @@
         }
         break;
 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
 
         case lk_futex: {
             __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
@@ -3238,7 +3238,7 @@
         }
         break;
 
-#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
 
         case lk_ticket: {
             __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
diff --git a/openmp/runtime/src/kmp_lock.h b/openmp/runtime/src/kmp_lock.h
index bb80b5a..5191cea 100644
--- a/openmp/runtime/src/kmp_lock.h
+++ b/openmp/runtime/src/kmp_lock.h
@@ -1,7 +1,7 @@
 /*
  * kmp_lock.h -- lock header file
- * $Revision: 42590 $
- * $Date: 2013-08-13 20:55:19 -0500 (Tue, 13 Aug 2013) $
+ * $Revision: 42810 $
+ * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
  */
 
 
@@ -174,7 +174,7 @@
 extern void __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck );
 
 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
 
 // ----------------------------------------------------------------------------
 // futex locks.  futex locks are only available on Linux* OS.
@@ -224,7 +224,7 @@
 extern void __kmp_init_nested_futex_lock( kmp_futex_lock_t *lck );
 extern void __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck );
 
-#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
 
 
 // ----------------------------------------------------------------------------
@@ -479,31 +479,31 @@
 
 #define KMP_BOOTSTRAP_LOCK_INITIALIZER( lock ) KMP_TICKET_LOCK_INITIALIZER( (lock) )
 
-inline void
+static inline void
 __kmp_acquire_bootstrap_lock( kmp_bootstrap_lock_t *lck )
 {
     __kmp_acquire_ticket_lock( lck, KMP_GTID_DNE );
 }
 
-inline int
+static inline int
 __kmp_test_bootstrap_lock( kmp_bootstrap_lock_t *lck )
 {
     return __kmp_test_ticket_lock( lck, KMP_GTID_DNE );
 }
 
-inline void
+static inline void
 __kmp_release_bootstrap_lock( kmp_bootstrap_lock_t *lck )
 {
     __kmp_release_ticket_lock( lck, KMP_GTID_DNE );
 }
 
-inline void
+static inline void
 __kmp_init_bootstrap_lock( kmp_bootstrap_lock_t *lck )
 {
     __kmp_init_ticket_lock( lck );
 }
 
-inline void
+static inline void
 __kmp_destroy_bootstrap_lock( kmp_bootstrap_lock_t *lck )
 {
     __kmp_destroy_ticket_lock( lck );
@@ -524,31 +524,31 @@
 
 typedef kmp_ticket_lock_t kmp_lock_t;
 
-inline void
+static inline void
 __kmp_acquire_lock( kmp_lock_t *lck, kmp_int32 gtid )
 {
     __kmp_acquire_ticket_lock( lck, gtid );
 }
 
-inline int
+static inline int
 __kmp_test_lock( kmp_lock_t *lck, kmp_int32 gtid )
 {
     return __kmp_test_ticket_lock( lck, gtid );
 }
 
-inline void
+static inline void
 __kmp_release_lock( kmp_lock_t *lck, kmp_int32 gtid )
 {
     __kmp_release_ticket_lock( lck, gtid );
 }
 
-inline void
+static inline void
 __kmp_init_lock( kmp_lock_t *lck )
 {
     __kmp_init_ticket_lock( lck );
 }
 
-inline void
+static inline void
 __kmp_destroy_lock( kmp_lock_t *lck )
 {
     __kmp_destroy_ticket_lock( lck );
@@ -570,7 +570,7 @@
 enum kmp_lock_kind {
     lk_default = 0,
     lk_tas,
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
     lk_futex,
 #endif
     lk_ticket,
@@ -587,7 +587,7 @@
 
 union kmp_user_lock {
     kmp_tas_lock_t     tas;
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
     kmp_futex_lock_t   futex;
 #endif
     kmp_ticket_lock_t  ticket;
@@ -606,7 +606,7 @@
 
 extern kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck );
 
-inline kmp_int32
+static inline kmp_int32
 __kmp_get_user_lock_owner( kmp_user_lock_p lck )
 {
     KMP_DEBUG_ASSERT( __kmp_get_user_lock_owner_ != NULL );
@@ -615,7 +615,7 @@
 
 extern void ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
 
 #define __kmp_acquire_user_lock_with_checks(lck,gtid)                                           \
     if (__kmp_user_lock_kind == lk_tas) {                                                       \
@@ -655,7 +655,7 @@
     }
 
 #else
-inline void
+static inline void
 __kmp_acquire_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
 {
     KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL );
@@ -665,11 +665,11 @@
 
 extern int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
 
 #include "kmp_i18n.h"                       /* AC: KMP_FATAL definition */
 extern int __kmp_env_consistency_check;     /* AC: copy from kmp.h here */
-inline int
+static inline int
 __kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
 {
     if ( __kmp_user_lock_kind == lk_tas ) {
@@ -688,7 +688,7 @@
     }
 }
 #else
-inline int
+static inline int
 __kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
 {
     KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL );
@@ -698,7 +698,7 @@
 
 extern void ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
 
-inline void
+static inline void
 __kmp_release_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
 {
     KMP_DEBUG_ASSERT( __kmp_release_user_lock_with_checks_ != NULL );
@@ -707,7 +707,7 @@
 
 extern void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck );
 
-inline void
+static inline void
 __kmp_init_user_lock_with_checks( kmp_user_lock_p lck )
 {
     KMP_DEBUG_ASSERT( __kmp_init_user_lock_with_checks_ != NULL );
@@ -720,7 +720,7 @@
 //
 extern void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck );
 
-inline void
+static inline void
 __kmp_destroy_user_lock( kmp_user_lock_p lck )
 {
     KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_ != NULL );
@@ -729,7 +729,7 @@
 
 extern void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck );
 
-inline void
+static inline void
 __kmp_destroy_user_lock_with_checks( kmp_user_lock_p lck )
 {
     KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_with_checks_ != NULL );
@@ -780,7 +780,7 @@
     }
 
 #else
-inline void
+static inline void
 __kmp_acquire_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
 {
     KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL );
@@ -791,7 +791,7 @@
 extern int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
 
 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
-inline int
+static inline int
 __kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
 {
     if ( __kmp_user_lock_kind == lk_tas ) {
@@ -820,7 +820,7 @@
     }
 }
 #else
-inline int
+static inline int
 __kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
 {
     KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL );
@@ -830,7 +830,7 @@
 
 extern void ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
 
-inline void
+static inline void
 __kmp_release_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
 {
     KMP_DEBUG_ASSERT( __kmp_release_nested_user_lock_with_checks_ != NULL );
@@ -839,7 +839,7 @@
 
 extern void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck );
 
-inline void __kmp_init_nested_user_lock_with_checks( kmp_user_lock_p lck )
+static inline void __kmp_init_nested_user_lock_with_checks( kmp_user_lock_p lck )
 {
     KMP_DEBUG_ASSERT( __kmp_init_nested_user_lock_with_checks_ != NULL );
     ( *__kmp_init_nested_user_lock_with_checks_ )( lck );
@@ -847,7 +847,7 @@
 
 extern void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck );
 
-inline void
+static inline void
 __kmp_destroy_nested_user_lock_with_checks( kmp_user_lock_p lck )
 {
     KMP_DEBUG_ASSERT( __kmp_destroy_nested_user_lock_with_checks_ != NULL );
@@ -875,7 +875,7 @@
 
 extern const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck );
 
-inline const ident_t *
+static inline const ident_t *
 __kmp_get_user_lock_location( kmp_user_lock_p lck )
 {
     if ( __kmp_get_user_lock_location_  != NULL ) {
@@ -888,7 +888,7 @@
 
 extern void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc );
 
-inline void
+static inline void
 __kmp_set_user_lock_location( kmp_user_lock_p lck, const ident_t *loc )
 {
     if ( __kmp_set_user_lock_location_  != NULL ) {
@@ -900,7 +900,7 @@
 
 extern void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags );
 
-inline void
+static inline void
 __kmp_set_user_lock_flags( kmp_user_lock_p lck, kmp_lock_flags_t flags )
 {
     if ( __kmp_set_user_lock_flags_  != NULL ) {
@@ -962,7 +962,7 @@
 extern kmp_block_of_locks_t *__kmp_lock_blocks;
 extern int __kmp_num_locks_in_block;
 
-extern kmp_user_lock_p __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, kmp_lock_flags_t flags = 0 );
+extern kmp_user_lock_p __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, kmp_lock_flags_t flags );
 extern void __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck );
 extern kmp_user_lock_p __kmp_lookup_user_lock( void **user_lock, char const *func );
 extern void __kmp_cleanup_user_locks();
diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h
index 9a5d948..f167605 100644
--- a/openmp/runtime/src/kmp_os.h
+++ b/openmp/runtime/src/kmp_os.h
@@ -1,7 +1,7 @@
 /*
  * kmp_os.h -- KPTS runtime header file.
- * $Revision: 42588 $
- * $Date: 2013-08-13 01:26:00 -0500 (Tue, 13 Aug 2013) $
+ * $Revision: 42820 $
+ * $Date: 2013-11-13 16:53:44 -0600 (Wed, 13 Nov 2013) $
  */
 
 
@@ -42,6 +42,24 @@
 # define KMP_MEM_CONS_MODEL	 KMP_MEM_CONS_VOLATILE
 #endif
 
+/* ------------------------- Compiler recognition ---------------------- */
+#define KMP_COMPILER_ICC 0
+#define KMP_COMPILER_GCC 0
+#define KMP_COMPILER_CLANG 0
+
+#if defined( __INTEL_COMPILER )
+# undef KMP_COMPILER_ICC
+# define KMP_COMPILER_ICC 1
+#elif defined( __clang__ )
+# undef KMP_COMPILER_CLANG
+# define KMP_COMPILER_CLANG 1
+#elif defined( __GNUC__ )
+# undef KMP_COMPILER_GCC
+# define KMP_COMPILER_GCC 1
+#else
+# error Unknown compiler
+#endif
+
 /* ---------------------- Operating system recognition ------------------- */
 
 #define KMP_OS_LINUX    0
@@ -90,28 +108,77 @@
 # if defined __x86_64
 #  undef KMP_ARCH_X86_64
 #  define KMP_ARCH_X86_64 1
-# else
+# elif defined __i386
 #  undef KMP_ARCH_X86
 #  define KMP_ARCH_X86 1
 # endif
 #endif
 
-#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64)
+#if defined(__ARM_ARCH_7__)   || defined(__ARM_ARCH_7R__)  || \
+    defined(__ARM_ARCH_7A__)
+# define KMP_ARCH_ARMV7 1
+#endif
+
+#if defined(KMP_ARCH_ARMV7)   || defined(__ARM_ARCH_6__)   || \
+    defined(__ARM_ARCH_6J__)  || defined(__ARM_ARCH_6K__)  || \
+    defined(__ARM_ARCH_6Z__)  || defined(__ARM_ARCH_6T2__) || \
+    defined(__ARM_ARCH_6ZK__)
+# define KMP_ARCH_ARMV6 1
+#endif
+
+#if defined(KMP_ARCH_ARMV6)   || defined(__ARM_ARCH_5T__)  || \
+    defined(__ARM_ARCH_5E__)  || defined(__ARM_ARCH_5TE__) || \
+    defined(__ARM_ARCH_5TEJ__)
+# define KMP_ARCH_ARMV5 1
+#endif
+
+#if defined(KMP_ARCH_ARMV5)   || defined(__ARM_ARCH_4__)   || \
+    defined(__ARM_ARCH_4T__)
+# define KMP_ARCH_ARMV4 1
+#endif
+
+#if defined(KMP_ARCH_ARMV4)   || defined(__ARM_ARCH_3__)   || \
+    defined(__ARM_ARCH_3M__)
+# define KMP_ARCH_ARMV3 1
+#endif
+
+#if defined(KMP_ARCH_ARMV3)   || defined(__ARM_ARCH_2__)
+# define KMP_ARCH_ARMV2 1
+#endif
+
+#if defined(KMP_ARCH_ARMV2)
+# define KMP_ARCH_ARM 1
+#endif
+
+#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM)
 # error Unknown or unsupported architecture
 #endif
 
-#if KMP_OS_WINDOWS
-# if defined  KMP_WIN_CDECL ||  !defined GUIDEDLL_EXPORTS
-#   define USE_FTN_CDECL      KMP_FTN_UPPER
+/* Check for quad-precision extension. */
+#define KMP_HAVE_QUAD 0
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+# if KMP_COMPILER_ICC
+   /* _Quad is already defined for icc */
+#  undef  KMP_HAVE_QUAD
+#  define KMP_HAVE_QUAD 1
+# elif KMP_COMPILER_CLANG
+   /* Clang doesn't support a software-implemented
+      128-bit extended precision type yet */
+   typedef long double _Quad;
+# elif KMP_COMPILER_GCC
+   typedef __float128 _Quad;
+#  undef  KMP_HAVE_QUAD
+#  define KMP_HAVE_QUAD 1
 # endif
+#else
+# if __LDBL_MAX_EXP__ >= 16384 && KMP_COMPILER_GCC
+   typedef long double _Quad;
+#  undef  KMP_HAVE_QUAD
+#  define KMP_HAVE_QUAD 1
+# endif
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
 
-# define KMP_FTN            KMP_FTN_PLAIN
-# define USE_FTN_EXTRA      KMP_FTN_PLAIN
-# if KMP_ARCH_X86
-#  if defined KMP_WIN_STDCALL || !defined GUIDEDLL_EXPORTS
-#   define USE_FTN_STDCALL   KMP_FTN_UPPER
-#  endif
-# endif
+#if KMP_OS_WINDOWS
   typedef char              kmp_int8;
   typedef unsigned char     kmp_uint8;
   typedef short             kmp_int16;
@@ -143,9 +210,6 @@
 #endif /* KMP_OS_WINDOWS */
 
 #if KMP_OS_UNIX
-# define KMP_FTN        KMP_FTN_PLAIN
-# define USE_FTN_CDECL  KMP_FTN_PLAIN
-# define USE_FTN_EXTRA  KMP_FTN_APPEND
   typedef char               kmp_int8;
   typedef unsigned char      kmp_uint8;
   typedef short              kmp_int16;
@@ -160,7 +224,7 @@
 # define KMP_UINT64_SPEC     "llu"
 #endif /* KMP_OS_UNIX */
 
-#if KMP_ARCH_X86
+#if KMP_ARCH_X86 || KMP_ARCH_ARM
 # define KMP_SIZE_T_SPEC KMP_UINT32_SPEC
 #elif KMP_ARCH_X86_64
 # define KMP_SIZE_T_SPEC KMP_UINT64_SPEC
@@ -199,7 +263,7 @@
 # define  KMP_INT_SPEC	 KMP_INT32_SPEC
 # define  KMP_UINT_SPEC	 KMP_UINT32_SPEC
 # define  KMP_INT_MAX    ((kmp_int32)0x7FFFFFFF)
-# define  KMP_INT_MIN    ((kmp_int64)0x80000000)
+# define  KMP_INT_MIN    ((kmp_int32)0x80000000)
 #endif /* KMP_I8 */
 
 #ifdef __cplusplus
@@ -248,14 +312,6 @@
     //-------------------------------------------------------------------------
 #endif // __cplusplus
 
-#if KMP_OS_WINDOWS
-# define KMP_STDCALL      __stdcall
-#endif
-
-#ifndef KMP_STDCALL
-# define KMP_STDCALL    /* nothing */
-#endif
-
 #define KMP_EXPORT	extern	/* export declaration in guide libraries */
 
 #if __GNUC__ == 4
@@ -336,7 +392,113 @@
 // Synchronization primitives
 //
 
-#if KMP_ASM_INTRINS
+#if KMP_ASM_INTRINS && KMP_OS_WINDOWS
+
+#include <Windows.h>
+
+#pragma intrinsic(InterlockedExchangeAdd)
+#pragma intrinsic(InterlockedCompareExchange)
+#pragma intrinsic(InterlockedExchange)
+#pragma intrinsic(InterlockedExchange64)
+
+//
+// Using InterlockedIncrement / InterlockedDecrement causes a library loading
+// ordering problem, so we use InterlockedExchangeAdd instead.
+//
+# define KMP_TEST_THEN_INC32(p)                 InterlockedExchangeAdd( (volatile long *)(p), 1 )
+# define KMP_TEST_THEN_INC_ACQ32(p)             InterlockedExchangeAdd( (volatile long *)(p), 1 )
+# define KMP_TEST_THEN_ADD4_32(p)               InterlockedExchangeAdd( (volatile long *)(p), 4 )
+# define KMP_TEST_THEN_ADD4_ACQ32(p)            InterlockedExchangeAdd( (volatile long *)(p), 4 )
+# define KMP_TEST_THEN_DEC32(p)                 InterlockedExchangeAdd( (volatile long *)(p), -1 )
+# define KMP_TEST_THEN_DEC_ACQ32(p)             InterlockedExchangeAdd( (volatile long *)(p), -1 )
+# define KMP_TEST_THEN_ADD32(p, v)              InterlockedExchangeAdd( (volatile long *)(p), (v) )
+
+# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) InterlockedCompareExchange( (volatile long *)(p),(long)(sv),(long)(cv) )
+
+# define KMP_XCHG_FIXED32(p, v)                 InterlockedExchange( (volatile long *)(p), (long)(v) )
+# define KMP_XCHG_FIXED64(p, v)                 InterlockedExchange64( (volatile kmp_int64 *)(p), (kmp_int64)(v) )
+
+inline kmp_real32 KMP_XCHG_REAL32( volatile kmp_real32 *p, kmp_real32 v)
+{
+    kmp_int32 tmp = InterlockedExchange( (volatile long *)p, *(long *)&v);
+    return *(kmp_real32*)&tmp;
+}
+
+//
+// Routines that we still need to implement in assembly.
+//
+extern kmp_int32 __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 v );
+extern kmp_int32 __kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 v );
+extern kmp_int32 __kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 v );
+extern kmp_int64 __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 v );
+extern kmp_int64 __kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 v );
+extern kmp_int64 __kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 v );
+
+extern kmp_int8 __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
+extern kmp_int16 __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
+extern kmp_int32 __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
+extern kmp_int32 __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
+extern kmp_int8  __kmp_compare_and_store_ret8(  volatile kmp_int8  *p, kmp_int8  cv, kmp_int8  sv );
+extern kmp_int16 __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
+extern kmp_int32 __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
+extern kmp_int64 __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
+
+extern kmp_int8  __kmp_xchg_fixed8( volatile kmp_int8  *p, kmp_int8  v );
+extern kmp_int16 __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 v );
+extern kmp_int32 __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 v );
+extern kmp_int64 __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 v );
+extern kmp_real32 __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 v );
+extern kmp_real64 __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 v );
+
+//# define KMP_TEST_THEN_INC32(p)                 __kmp_test_then_add32( (p), 1 )
+//# define KMP_TEST_THEN_INC_ACQ32(p)             __kmp_test_then_add32( (p), 1 )
+# define KMP_TEST_THEN_INC64(p)                 __kmp_test_then_add64( (p), 1LL )
+# define KMP_TEST_THEN_INC_ACQ64(p)             __kmp_test_then_add64( (p), 1LL )
+//# define KMP_TEST_THEN_ADD4_32(p)               __kmp_test_then_add32( (p), 4 )
+//# define KMP_TEST_THEN_ADD4_ACQ32(p)            __kmp_test_then_add32( (p), 4 )
+# define KMP_TEST_THEN_ADD4_64(p)               __kmp_test_then_add64( (p), 4LL )
+# define KMP_TEST_THEN_ADD4_ACQ64(p)            __kmp_test_then_add64( (p), 4LL )
+//# define KMP_TEST_THEN_DEC32(p)                 __kmp_test_then_add32( (p), -1 )
+//# define KMP_TEST_THEN_DEC_ACQ32(p)             __kmp_test_then_add32( (p), -1 )
+# define KMP_TEST_THEN_DEC64(p)                 __kmp_test_then_add64( (p), -1LL )
+# define KMP_TEST_THEN_DEC_ACQ64(p)             __kmp_test_then_add64( (p), -1LL )
+//# define KMP_TEST_THEN_ADD32(p, v)              __kmp_test_then_add32( (p), (v) )
+# define KMP_TEST_THEN_ADD64(p, v)              __kmp_test_then_add64( (p), (v) )
+
+# define KMP_TEST_THEN_OR32(p, v)               __kmp_test_then_or32( (p), (v) )
+# define KMP_TEST_THEN_AND32(p, v)              __kmp_test_then_and32( (p), (v) )
+# define KMP_TEST_THEN_OR64(p, v)               __kmp_test_then_or64( (p), (v) )
+# define KMP_TEST_THEN_AND64(p, v)              __kmp_test_then_and64( (p), (v) )
+
+# define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv)  __kmp_compare_and_store8( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_REL8(p, cv, sv)  __kmp_compare_and_store8( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) __kmp_compare_and_store16( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) __kmp_compare_and_store16( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) __kmp_compare_and_store32( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) __kmp_compare_and_store32( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) __kmp_compare_and_store64( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) __kmp_compare_and_store64( (p), (cv), (sv) )
+
+# if KMP_ARCH_X86
+#  define KMP_COMPARE_AND_STORE_PTR(p, cv, sv)  __kmp_compare_and_store32( (volatile kmp_int32*)(p), (kmp_int32)(cv), (kmp_int32)(sv) )
+# else /* 64 bit pointers */
+#  define KMP_COMPARE_AND_STORE_PTR(p, cv, sv)  __kmp_compare_and_store64( (volatile kmp_int64*)(p), (kmp_int64)(cv), (kmp_int64)(sv) )
+# endif /* KMP_ARCH_X86 */
+
+# define KMP_COMPARE_AND_STORE_RET8(p, cv, sv)  __kmp_compare_and_store_ret8( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) __kmp_compare_and_store_ret16( (p), (cv), (sv) )
+//# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) __kmp_compare_and_store_ret32( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) __kmp_compare_and_store_ret64( (p), (cv), (sv) )
+
+# define KMP_XCHG_FIXED8(p, v)                  __kmp_xchg_fixed8( (p), (v) );
+# define KMP_XCHG_FIXED16(p, v)                 __kmp_xchg_fixed16( (p), (v) );
+//# define KMP_XCHG_FIXED32(p, v)                 __kmp_xchg_fixed32( (p), (v) );
+//# define KMP_XCHG_FIXED64(p, v)                 __kmp_xchg_fixed64( (p), (v) );
+//# define KMP_XCHG_REAL32(p, v)                  __kmp_xchg_real32( (p), (v) );
+# define KMP_XCHG_REAL64(p, v)                  __kmp_xchg_real64( (p), (v) );
+
+
+#elif (KMP_ASM_INTRINS && (KMP_OS_LINUX || KMP_OS_DARWIN)) || !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
 
 /* cast p to correct type so that proper intrinsic will be used */
 # define KMP_TEST_THEN_INC32(p)                 __sync_fetch_and_add( (kmp_int32 *)(p), 1 )
@@ -385,7 +547,7 @@
     return *(kmp_real32*)&tmp;
 }
 
-static kmp_real64 KMP_XCHG_REAL64( volatile kmp_real64 *p, kmp_real64 v)
+inline kmp_real64 KMP_XCHG_REAL64( volatile kmp_real64 *p, kmp_real64 v)
 {
     kmp_int64 tmp = __sync_lock_test_and_set( (kmp_int64*)p, *(kmp_int64*)&v);
     return *(kmp_real64*)&tmp;
@@ -607,6 +769,14 @@
 #endif
 
 
+// Switches for OSS builds
+#ifndef USE_SYSFS_INFO
+# define USE_SYSFS_INFO  0
+#endif
+#ifndef USE_CMPXCHG_FIX
+# define USE_CMPXCHG_FIX 1
+#endif
+
 // Warning levels
 enum kmp_warnings_level {
     kmp_warnings_off = 0,		/* No warnings */
diff --git a/openmp/runtime/src/kmp_runtime.c b/openmp/runtime/src/kmp_runtime.c
index 7d66b9b..37c372b 100644
--- a/openmp/runtime/src/kmp_runtime.c
+++ b/openmp/runtime/src/kmp_runtime.c
@@ -1,7 +1,7 @@
 /*
  * kmp_runtime.c -- KPTS runtime support library
- * $Revision: 42642 $
- * $Date: 2013-09-06 01:57:24 -0500 (Fri, 06 Sep 2013) $
+ * $Revision: 42839 $
+ * $Date: 2013-11-24 13:01:00 -0600 (Sun, 24 Nov 2013) $
  */
 
 
@@ -88,6 +88,8 @@
 #endif /* KMP_DEBUG */
 
 
+#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
+
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
@@ -472,8 +474,7 @@
                     __kmp_unref_task_team( task_team, this_thr );
                 } else if ( KMP_TASKING_ENABLED( task_team, this_thr->th.th_task_state ) ) {
                     __kmp_execute_tasks( this_thr, th_gtid, spin, check, final_spin, &flag
-                                         USE_ITT_BUILD_ARG( itt_sync_obj )
-                                         );
+                                         USE_ITT_BUILD_ARG( itt_sync_obj ), 0);
                 }
             }; // if
         }; // if
@@ -994,7 +995,7 @@
 }
 
 # endif /* KMP_OS_WINDOWS */
-#endif /* GUIDEDLL_EXPORTS
+#endif /* GUIDEDLL_EXPORTS */
 
 
 /* ------------------------------------------------------------------------ */
@@ -1190,10 +1191,8 @@
         register kmp_balign_team_t *team_bar  = & team -> t.t_bar[ bt ];
         register int                nproc     = this_thr -> th.th_team_nproc;
         register int                i;
-        register kmp_uint           new_state;
-
         /* Don't have to worry about sleep bit here or atomic since team setting */
-        new_state = team_bar -> b_arrived + KMP_BARRIER_STATE_BUMP;
+        register kmp_uint           new_state  = team_bar -> b_arrived + KMP_BARRIER_STATE_BUMP;
 
         /* Collect all the worker team member threads. */
         for (i = 1; i < nproc; i++) {
@@ -1341,7 +1340,7 @@
         /* Need to update the team arrived pointer if we are the master thread */
 
         if ( nproc > 1 )
-            /* New value was already computed in above loop */
+            /* New value was already computed above */
             team -> t.t_bar[ bt ].b_arrived = new_state;
         else
             team -> t.t_bar[ bt ].b_arrived += KMP_BARRIER_STATE_BUMP;
@@ -1380,6 +1379,12 @@
 
     KMP_DEBUG_ASSERT( this_thr == other_threads[this_thr->th.th_info.ds.ds_tid] );
 
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+    // Barrier imbalance - save arrive time to the thread
+    if( __kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3 ) {
+        this_thr->th.th_bar_arrive_time = __itt_get_timestamp();
+    }
+#endif
     /*
      * We now perform a hypercube-embedded tree gather to wait until all
      * of the threads have arrived, and reduce any required data
@@ -1417,6 +1422,9 @@
 
         /* parent threads wait for children to arrive */
 
+        if (new_state == KMP_BARRIER_UNUSED_STATE)
+            new_state = team -> t.t_bar[ bt ].b_arrived + KMP_BARRIER_STATE_BUMP;
+
         for ( child = 1, child_tid = tid + (1 << level);
               child < branch_factor && child_tid < num_threads;
               child++, child_tid += (1 << level) )
@@ -1429,10 +1437,6 @@
             if ( child+1 < branch_factor && next_child_tid < num_threads )
                 KMP_CACHE_PREFETCH( &other_threads[ next_child_tid ] -> th.th_bar[ bt ].bb.b_arrived );
 #endif /* KMP_CACHE_MANAGE */
-            /* Only read this arrived flag once per thread that needs it */
-            if (new_state == KMP_BARRIER_UNUSED_STATE)
-                new_state = team -> t.t_bar[ bt ].b_arrived + KMP_BARRIER_STATE_BUMP;
-
             KA_TRACE( 20, ( "__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
                             "arrived(%p) == %u\n",
                             gtid, team->t.t_id, tid,
@@ -1444,6 +1448,12 @@
                               USE_ITT_BUILD_ARG (itt_sync_obj)
                               );
 
+#if USE_ITT_BUILD
+            // Barrier imbalance - write min of the thread time and a child time to the thread.
+            if( __kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3 ) {
+                this_thr->th.th_bar_arrive_time = KMP_MIN( this_thr->th.th_bar_arrive_time, child_thr->th.th_bar_arrive_time );
+            }
+#endif
             if (reduce) {
 
                 KA_TRACE( 100, ( "__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
@@ -1729,7 +1739,6 @@
 
 /* The reverse versions seem to beat the forward versions overall */
 #define KMP_REVERSE_HYPER_BAR
-#ifdef KMP_REVERSE_HYPER_BAR
 static void
 __kmp_hyper_barrier_release( enum barrier_type bt,
                              kmp_info_t *this_thr,
@@ -1751,15 +1760,13 @@
     register kmp_uint32     offset;
     register kmp_uint32     level;
 
-    /*
-     * We now perform a hypercube-embedded tree release for all
-     * of the threads that have been gathered, but in the exact
-     * reverse order from the corresponding gather (for load balance.
-     */
+    /* Perform a hypercube-embedded tree release for all of the threads
+       that have been gathered.  If KMP_REVERSE_HYPER_BAR is defined (default)
+       the threads are released in the reverse order of the corresponding gather,
+       otherwise threads are released in the same order. */
 
     if ( ! KMP_MASTER_TID( tid )) {
         /* worker threads */
-
         KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n",
           gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) );
 
@@ -1807,7 +1814,7 @@
 
         TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
         KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
-          gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
+                        gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
 
         KMP_MB();       /* Flush all pending memory write invalidates.  */
 
@@ -1822,6 +1829,7 @@
     num_threads = this_thr -> th.th_team_nproc;
     other_threads = team -> t.t_threads;
 
+#ifdef KMP_REVERSE_HYPER_BAR
     /* count up to correct level for parent */
     for ( level = 0, offset = 1;
           offset < num_threads && (((tid >> level) & (branch_factor-1)) == 0);
@@ -1831,7 +1839,14 @@
     for ( level -= branch_bits, offset >>= branch_bits;
           offset != 0;
           level -= branch_bits, offset >>= branch_bits )
+#else
+    /* Go down the tree, level by level */
+    for ( level = 0, offset = 1;
+          offset < num_threads;
+          level += branch_bits, offset <<= branch_bits )
+#endif // KMP_REVERSE_HYPER_BAR
     {
+#ifdef KMP_REVERSE_HYPER_BAR
         /* Now go in reverse order through the children, highest to lowest.
            Initial setting of child is conservative here. */
         child = num_threads >> ((level==0)?level:level-1);
@@ -1839,8 +1854,18 @@
                   child_tid = tid + (child << level);
               child >= 1;
               child--, child_tid -= (1 << level) )
-        {
+#else
+        if (((tid >> level) & (branch_factor - 1)) != 0)
+            /* No need to go any lower than this, since this is the level
+               parent would be notified */
+            break;
 
+        /* iterate through children on this level of the tree */
+        for ( child = 1, child_tid = tid + (1 << level);
+              child < branch_factor && child_tid < num_threads;
+              child++, child_tid += (1 << level) )
+#endif // KMP_REVERSE_HYPER_BAR
+        {
             if ( child_tid >= num_threads ) continue;   /* child doesn't exist so keep going */
             else {
                 register kmp_info_t   *child_thr = other_threads[ child_tid ];
@@ -1848,7 +1873,11 @@
 #if KMP_CACHE_MANAGE
                 register kmp_uint32 next_child_tid = child_tid - (1 << level);
                 /* prefetch next thread's go count */
+#ifdef KMP_REVERSE_HYPER_BAR
                 if ( child-1 >= 1 && next_child_tid < num_threads )
+#else
+                if ( child+1 < branch_factor && next_child_tid < num_threads )
+#endif // KMP_REVERSE_HYPER_BAR
                     KMP_CACHE_PREFETCH( &other_threads[ next_child_tid ]->th.th_bar[ bt ].bb.b_go );
 #endif /* KMP_CACHE_MANAGE */
 
@@ -1880,154 +1909,6 @@
       gtid, team->t.t_id, tid, bt ) );
 }
 
-#else /* !KMP_REVERSE_HYPER_BAR */
-
-static void
-__kmp_hyper_barrier_release( enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, int propagate_icvs )
-{
-    /* handle fork barrier workers who aren't part of a team yet */
-    register kmp_team_t    *team;
-    register kmp_bstate_t  *thr_bar       = & this_thr -> th.th_bar[ bt ].bb;
-    register kmp_info_t   **other_threads;
-    register kmp_uint32     num_threads;
-    register kmp_uint32     branch_bits   = __kmp_barrier_release_branch_bits[ bt ];
-    register kmp_uint32     branch_factor = 1 << branch_bits;
-    register kmp_uint32     child;
-    register kmp_uint32     child_tid;
-    register kmp_uint32     offset;
-    register kmp_uint32     level;
-
-    /*
-     * We now perform a hypercube-embedded tree release for all
-     * of the threads that have been gathered, but in the same order
-     * as the gather.
-     */
-
-    if ( ! KMP_MASTER_TID( tid )) {
-        /* worker threads */
-
-        KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n",
-          gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) );
-
-        /* wait for parent thread to release us */
-        __kmp_wait_sleep( this_thr, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP, TRUE, NULL );
-
-#if USE_ITT_BUILD && OMP_30_ENABLED && USE_ITT_NOTIFY
-        if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
-            // we are on a fork barrier where we could not get the object reliably
-            itt_sync_obj  = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier, 0, -1 );
-            // cancel wait on previous parallel region...
-            __kmp_itt_task_starting( itt_sync_obj );
-
-            if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
-                return;
-
-            itt_sync_obj  = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
-            if ( itt_sync_obj != NULL )
-                __kmp_itt_task_finished( itt_sync_obj );  // call prepare as early as possible for "new" barrier
-
-        } else
-#endif /* USE_ITT_BUILD && OMP_30_ENABLED && USE_ITT_NOTIFY */
-        //
-        // early exit for reaping threads releasing forkjoin barrier
-        //
-        if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
-            return;
-
-        //
-        // The worker thread may now assume that the team is valid.
-        //
-#if USE_ITT_BUILD && !OMP_30_ENABLED && USE_ITT_NOTIFY
-        // libguide only code (cannot use *itt_task* routines)
-        if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
-            // we are on a fork barrier where we could not get the object reliably
-            itt_sync_obj  = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
-            __kmp_itt_barrier_starting( gtid, itt_sync_obj );  // no need to call releasing, but we have paired calls...
-        }
-#endif /* USE_ITT_BUILD && !OMP_30_ENABLED && USE_ITT_NOTIFY */
-        team = __kmp_threads[ gtid ]-> th.th_team;
-        KMP_DEBUG_ASSERT( team != NULL );
-        tid = __kmp_tid_from_gtid( gtid );
-
-        TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
-        KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
-                        gtid, ( team != NULL ) ? team->t.t_id : -1, tid,
-                        &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
-
-        KMP_MB();       /* Flush all pending memory write invalidates.  */
-
-    } else {  /* KMP_MASTER_TID(tid) */
-        team = __kmp_threads[ gtid ]-> th.th_team;
-        KMP_DEBUG_ASSERT( team != NULL );
-
-        KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) enter for barrier type %d\n",
-          gtid, team->t.t_id, tid, bt ) );
-    }
-
-    /* Now set up team parameters since workers have been released */
-    if ( team == NULL )  {
-        /* handle fork barrier workers who are now part of a team */
-        tid = __kmp_tid_from_gtid( gtid );
-        team = __kmp_threads[ gtid ]-> th.th_team;
-    }
-    num_threads = this_thr -> th.th_team_nproc;
-    other_threads = team -> t.t_threads;
-
-    /* Go down the tree, level by level */
-    for ( level = 0, offset = 1;
-          offset < num_threads;
-          level += branch_bits, offset <<= branch_bits )
-    {
-        if (((tid >> level) & (branch_factor - 1)) != 0)
-            /* No need to go any lower than this, since this is the level
-               parent would be notified */
-            break;
-
-        /* iterate through children on this level of the tree */
-        for ( child = 1, child_tid = tid + (1 << level);
-              child < branch_factor && child_tid < num_threads;
-              child++, child_tid += (1 << level) )
-        {
-            register kmp_info_t   *child_thr = other_threads[ child_tid ];
-            register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
-#if KMP_CACHE_MANAGE
-            {
-                register kmp_uint32 next_child_tid = child_tid + (1 << level);
-                /* prefetch next thread's go count */
-                if ( child+1 < branch_factor && next_child_tid < num_threads )
-                    KMP_CACHE_PREFETCH( &other_threads[ next_child_tid ]->th.th_bar[ bt ].bb.b_go );
-            }
-#endif /* KMP_CACHE_MANAGE */
-
-#if KMP_BARRIER_ICV_PUSH
-            if ( propagate_icvs ) {
-                KMP_DEBUG_ASSERT( team != NULL );
-                __kmp_init_implicit_task( team->t.t_ident,
-                  team->t.t_threads[child_tid], team, child_tid, FALSE );
-                load_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs);
-                store_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs, &team->t.t_implicit_task_taskdata[0].td_icvs);
-                sync_icvs();
-            }
-#endif // KMP_BARRIER_ICV_PUSH
-
-            KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) releasing "
-                            "T#%d(%d:%u) go(%p): %u => %u\n",
-                            gtid, team->t.t_id, tid,
-                            __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
-                            child_tid, &child_bar -> b_go, child_bar -> b_go,
-                            child_bar -> b_go + KMP_BARRIER_STATE_BUMP ) );
-
-            /* release child from barrier */
-            __kmp_release( child_thr, &child_bar -> b_go, kmp_acquire_fence );
-        }
-    }
-
-    KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
-      gtid, team->t.t_id, tid, bt ) );
-}
-#endif /* KMP_REVERSE_HYPER_BAR */
-
-
 /*
  * Internal function to do a barrier.
  * If is_split is true, do a split barrier, otherwise, do a plain barrier
@@ -2043,6 +1924,8 @@
     register kmp_team_t  *team            = this_thr -> th.th_team;
     register int status = 0;
 
+    ident_t * tmp_loc = __kmp_threads[ gtid ]->th.th_ident;
+
     KA_TRACE( 15, ( "__kmp_barrier: T#%d(%d:%d) has arrived\n",
                     gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid) ) );
 
@@ -2126,34 +2009,23 @@
             #endif /* OMP_30_ENABLED */
 
 
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
             // Barrier - report frame end
-#if USE_ITT_BUILD
-            // Collect information only if the file was opened succesfully.
-            if( __kmp_forkjoin_frames_mode == 1 && __kmp_itt_csv_file )
-            {
-                ident_t * loc  = this_thr->th.th_ident;
-                if (loc) {
-                    // Use compiler-generated location to mark the frame:
-                    // "<func>$omp$frame@[file:]<line>[:<col>]"
-                    kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
-
-                    kmp_uint64 fr_end;
-#if defined( __GNUC__ )
-# if !defined( __INTEL_COMPILER )
-                    fr_end = __kmp_hardware_timestamp();
-# else
-                    fr_end = __rdtsc();
-# endif
-#else
-                    fr_end = __rdtsc();
-#endif
-                    K_DIAG( 3, ( "__kmp_barrier: T#%d(%d:%d) frame_begin = %llu, frame_end = %llu\n",
-                                 gtid, ( team != NULL ) ? team->t.t_id : -1, tid, this_thr->th.th_frame_time, fr_end ) );
-
-                    __kmp_str_buf_print( &__kmp_itt_frame_buffer, "%s$omp$frame@%s:%d:%d,%llu,%llu,,\n",
-                                         str_loc.func, str_loc.file, str_loc.line, str_loc.col, this_thr->th.th_frame_time, fr_end );
-                    __kmp_str_loc_free( &str_loc );
-                    this_thr->th.th_frame_time = fr_end;
+            if( __itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode ) {
+                kmp_uint64 tmp = __itt_get_timestamp();
+                switch( __kmp_forkjoin_frames_mode ) {
+                case 1:
+                  __kmp_itt_frame_submit( gtid, this_thr->th.th_frame_time, tmp, 0, tmp_loc );
+                  this_thr->th.th_frame_time = tmp;
+                  break;
+                case 2:
+                  __kmp_itt_frame_submit( gtid, this_thr->th.th_bar_arrive_time, tmp, 1, tmp_loc );
+                  break;
+                case 3:
+                  __kmp_itt_frame_submit( gtid, this_thr->th.th_frame_time, tmp, 0, tmp_loc );
+                  __kmp_itt_frame_submit( gtid, this_thr->th.th_bar_arrive_time, tmp, 1, tmp_loc );
+                  this_thr->th.th_frame_time = tmp;
+                  break;
                 }
             }
 #endif /* USE_ITT_BUILD */
@@ -2465,7 +2337,7 @@
     KMP_MB();
 
     /* first, let's setup the master thread */
-    master_th -> th.th_info .ds.ds_tid = 0;
+    master_th -> th.th_info.ds.ds_tid  = 0;
     master_th -> th.th_team            = team;
     master_th -> th.th_team_nproc      = team -> t.t_nproc;
     master_th -> th.th_team_master     = master_th;
@@ -2514,6 +2386,17 @@
 static void
 __kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ); // forward declaration
 
+static void
+__kmp_setup_icv_copy( kmp_team_t *team, int new_nproc,
+#if OMP_30_ENABLED
+                 kmp_internal_control_t * new_icvs,
+                 ident_t *                loc
+#else
+                 int new_set_nproc, int new_set_dynamic, int new_set_nested,
+                 int new_set_blocktime, int new_bt_intervals, int new_bt_set
+#endif // OMP_30_ENABLED
+                 ); // forward declaration
+
 /* most of the work for a fork */
 /* return true if we really went parallel, false if serialized */
 int
@@ -2527,7 +2410,7 @@
     microtask_t microtask,
     launch_t    invoker,
 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
-#if KMP_ARCH_X86_64 && KMP_OS_LINUX
+#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
     va_list   * ap
 #else
     va_list     ap
@@ -2576,7 +2459,6 @@
 #endif
 
 
-
     master_th->th.th_ident = loc;
 
 #if OMP_40_ENABLED
@@ -2590,7 +2472,7 @@
         argv = (void**)parent_team->t.t_argv;
         for( i=argc-1; i >= 0; --i )
 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
-#if KMP_ARCH_X86_64 && KMP_OS_LINUX
+#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
             *argv++ = va_arg( *ap, void * );
 #else
             *argv++ = va_arg( ap, void * );
@@ -2686,11 +2568,11 @@
     /* create a serialized parallel region? */
     if ( nthreads == 1 ) {
         /* josh todo: hypothetical question: what do we do for OS X*? */
-#if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 )
+#if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM )
         void *   args[ argc ];
 #else
         void * * args = (void**) alloca( argc * sizeof( void * ) );
-#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 ) */
+#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM ) */
 
         __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
         KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
@@ -2721,7 +2603,7 @@
                 if ( ap ) {
                     for( i=argc-1; i >= 0; --i )
                       /* TODO: revert workaround for Intel(R) 64 tracker #96 */
-                      #if KMP_ARCH_X86_64 && KMP_OS_LINUX
+                      #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
                         *argv++ = va_arg( *ap, void * );
                       #else
                         *argv++ = va_arg( ap, void * );
@@ -2741,7 +2623,7 @@
                 argv = args;
                 for( i=argc-1; i >= 0; --i )
                 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
-                #if KMP_ARCH_X86_64 && KMP_OS_LINUX
+                #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
                     *argv++ = va_arg( *ap, void * );
                 #else
                     *argv++ = va_arg( ap, void * );
@@ -2957,7 +2839,7 @@
 #endif /* OMP_40_ENABLED */
         for( i=argc-1; i >= 0; --i )
 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
-#if KMP_ARCH_X86_64 && KMP_OS_LINUX
+#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
             *argv++ = va_arg( *ap, void * );
 #else
             *argv++ = va_arg( ap, void * );
@@ -2977,6 +2859,18 @@
         root -> r.r_active = TRUE;
 
     __kmp_fork_team_threads( root, team, master_th, gtid );
+    __kmp_setup_icv_copy(team, nthreads
+#if OMP_30_ENABLED
+			 , &master_th->th.th_current_task->td_icvs, loc
+#else
+			 , parent_team->t.t_set_nproc[master_tid],
+			 parent_team->t.t_set_dynamic[master_tid],
+			 parent_team->t.t_set_nested[master_tid],
+			 parent_team->t.t_set_blocktime[master_tid],
+			 parent_team->t.t_set_bt_intervals[master_tid],
+			 parent_team->t.t_set_bt_set[master_tid]
+#endif /* OMP_30_ENABLED */
+			 );
 
 
     __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
@@ -2992,23 +2886,12 @@
         __kmp_itt_region_forking( gtid );
 #endif /* USE_ITT_BUILD */
 
+#if USE_ITT_BUILD && USE_ITT_NOTIFY && OMP_30_ENABLED
     // Internal fork - report frame begin
-#if USE_ITT_BUILD
-    // Collect information only if the file was opened succesfully.
-    if( __kmp_forkjoin_frames_mode == 1 && __kmp_itt_csv_file )
+    if( ( __kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3 ) && __itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr )
     {
-        kmp_uint64 fr_begin;
-#if defined( __GNUC__ )
-# if !defined( __INTEL_COMPILER )
-        fr_begin = __kmp_hardware_timestamp();
-# else
-        fr_begin = __rdtsc();
-# endif
-#else
-        fr_begin = __rdtsc();
-#endif
         if( ! ( team->t.t_active_level > 1 ) ) {
-            master_th->th.th_frame_time   = fr_begin;
+            master_th->th.th_frame_time   = __itt_get_timestamp();
         }
     }
 #endif /* USE_ITT_BUILD */
@@ -3134,7 +3017,10 @@
         // Either not in teams or exiting teams region
         // (teams is a frame and no other frames inside the teams)
 # endif /* OMP_40_ENABLED */
+    {
+        master_th->th.th_ident = loc;
         __kmp_itt_region_joined( gtid );
+    }
 #endif /* USE_ITT_BUILD */
 
 #if OMP_40_ENABLED
@@ -4644,6 +4530,7 @@
     root -> r.r_root_team -> t.t_threads[0] = root_thread;
     root -> r.r_hot_team  -> t.t_threads[0] = root_thread;
     root_thread -> th.th_serial_team -> t.t_threads[0] = root_thread;
+    root_thread -> th.th_serial_team -> t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now).
     root -> r.r_uber_thread = root_thread;
 
     /* initialize the thread, get it ready to go */
@@ -5007,6 +4894,19 @@
             TCW_4( __kmp_init_monitor, 1 );
             __kmp_create_monitor( & __kmp_monitor );
             KF_TRACE( 10, ( "after __kmp_create_monitor\n" ) );
+            #if KMP_OS_WINDOWS
+                // AC: wait until monitor has started. This is a fix for CQ232808.
+                //     The reason is that if the library is loaded/unloaded in a loop with small (parallel)
+                //     work in between, then there is high probability that monitor thread started after
+                //     the library shutdown. At shutdown it is too late to cope with the problem, because
+                //     when the master is in DllMain (process detach) the monitor has no chances to start
+                //     (it is blocked), and master has no means to inform the monitor that the library has gone,
+                //     because all the memory which the monitor can access is going to be released/reset.
+                while ( TCR_4(__kmp_init_monitor) < 2 ) {
+                    KMP_YIELD( TRUE );
+                }
+                KF_TRACE( 10, ( "after monitor thread has started\n" ) );
+            #endif
         }
         __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
     }
@@ -5049,6 +4949,7 @@
                                            0 );
     }
     KMP_ASSERT ( serial_team );
+    serial_team -> t.t_serialized = 0;   // AC: the team created in reserve, not for execution (it is unused for now).
     serial_team -> t.t_threads[0] = new_thr;
     KF_TRACE( 10, ( "__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
       new_thr ) );
@@ -5144,76 +5045,94 @@
  * IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!!
  */
 static void
-__kmp_reinitialize_team(
-    kmp_team_t *  team,
-    int           new_nproc,
-    #if OMP_30_ENABLED
-        kmp_internal_control_t * new_icvs,
-        ident_t *                loc
-    #else
-        int new_set_nproc, int new_set_dynamic, int new_set_nested,
-        int new_set_blocktime, int new_bt_intervals, int new_bt_set
-    #endif // OMP_30_ENABLED
-) {
-    int f;
-    #if OMP_30_ENABLED
-        KMP_DEBUG_ASSERT( team && new_nproc && new_icvs );
-        KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
-        team->t.t_ident = loc;
-    #else
-        KMP_DEBUG_ASSERT( team && new_nproc && new_set_nproc );
-    #endif // OMP_30_ENABLED
+__kmp_reinitialize_team( kmp_team_t *team,
+#if OMP_30_ENABLED
+                         kmp_internal_control_t *new_icvs, ident_t *loc
+#else
+                         int new_set_nproc, int new_set_dynamic, int new_set_nested,
+                         int new_set_blocktime, int new_bt_intervals, int new_bt_set
+#endif
+                         ) {
+    KF_TRACE( 10, ( "__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
+                    team->t.t_threads[0], team ) );
+#if OMP_30_ENABLED
+    KMP_DEBUG_ASSERT( team && new_icvs);
+    KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
+    team->t.t_ident = loc;
+#else
+    KMP_DEBUG_ASSERT( team && new_set_nproc );
+#endif // OMP_30_ENABLED
 
     team->t.t_id = KMP_GEN_TEAM_ID();
 
-#if KMP_BARRIER_ICV_PULL
-    //
-    // Copy the ICV's to the team structure, where all of the worker threads
-    // can access them and make their own copies after the barrier.
-    //
-    load_icvs(new_icvs);
-    store_icvs(&team->t.t_initial_icvs, new_icvs);
-
-    //
-    // Set up the master thread's copy of the ICV's.  __kmp_fork_call()
-    // assumes they are already set in the master thread.
-    // FIXME - change that code to use the team->t.t_initial_icvs copy
-    // and eliminate this copy.
-    //
-    __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
-    store_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
-    sync_icvs();
-    KF_TRACE( 10, ( "__kmp_reinitialize_team2: T#%d this_thread=%p team=%p\n",
-                    0, team->t.t_threads[0], team ) );
-
-#elif KMP_BARRIER_ICV_PUSH
-    //
-    // Set the ICV's in the master thread only.
-    // They will be propagated by the fork barrier.
-    //
-    __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
-    load_icvs(new_icvs);
-    store_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
-    sync_icvs();
-
-    KF_TRACE( 10, ( "__kmp_reinitialize_team2: T#%d this_thread=%p team=%p\n",
-                    0, team->t.t_threads[0], team ) );
-
-#else
-    //
-    // Copy the icvs to each of the threads.  This takes O(nthreads) time.
-    //
+    // Copy ICVs to the master thread's implicit taskdata
 #if OMP_30_ENABLED
     load_icvs(new_icvs);
-#endif
-    for( f=0 ; f<new_nproc ; f++) {
+    __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
+    store_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
+    sync_icvs();
+# else
+    team -> t.t_set_nproc[0]   = new_set_nproc;
+    team -> t.t_set_dynamic[0] = new_set_dynamic;
+    team -> t.t_set_nested[0]  = new_set_nested;
+    team -> t.t_set_blocktime[0]   = new_set_blocktime;
+    team -> t.t_set_bt_intervals[0] = new_bt_intervals;
+    team -> t.t_set_bt_set[0]  = new_bt_set;
+# endif // OMP_30_ENABLED
+
+    KF_TRACE( 10, ( "__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
+                    team->t.t_threads[0], team ) );
+}
+
+static void
+__kmp_setup_icv_copy(kmp_team_t *  team, int           new_nproc,
+#if OMP_30_ENABLED
+                kmp_internal_control_t * new_icvs,
+                ident_t *                loc
+#else
+                int new_set_nproc, int new_set_dynamic, int new_set_nested,
+                int new_set_blocktime, int new_bt_intervals, int new_bt_set
+#endif // OMP_30_ENABLED
+                )
+{
+    int f;
+
+#if OMP_30_ENABLED
+    KMP_DEBUG_ASSERT( team && new_nproc && new_icvs );
+    KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
+#else
+    KMP_DEBUG_ASSERT( team && new_nproc && new_set_nproc );
+#endif // OMP_30_ENABLED
+
+    // Master thread's copy of the ICVs was set up on the implicit taskdata in __kmp_reinitialize_team.
+    // __kmp_fork_call() assumes the master thread's implicit task has this data before this function is called.
+#if KMP_BARRIER_ICV_PULL
+    // Copy the ICVs to master's thread structure into th_fixed_icvs (which remains untouched), where all of the
+    // worker threads can access them and make their own copies after the barrier.
+    load_icvs(new_icvs);
+    KMP_DEBUG_ASSERT(team->t.t_threads[0]);  // the threads arrays should be allocated at this point
+    store_icvs(&team->t.t_threads[0]->th.th_fixed_icvs, new_icvs);
+    sync_icvs();
+    KF_TRACE(10, ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", 0, team->t.t_threads[0], team));
+
+#elif KMP_BARRIER_ICV_PUSH
+    // The ICVs will be propagated in the fork barrier, so nothing needs to be done here.
+    KF_TRACE(10, ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", 0, team->t.t_threads[0], team));
+
+#else
+    // Copy the ICVs to each of the non-master threads.  This takes O(nthreads) time.
+# if OMP_30_ENABLED
+    load_icvs(new_icvs);
+# endif // OMP_30_ENABLED
+    KMP_DEBUG_ASSERT(team->t.t_threads[0]);  // the threads arrays should be allocated at this point
+    for(f=1 ; f<new_nproc ; f++) { // skip the master thread
 # if OMP_30_ENABLED
         // TODO: GEH - pass in better source location info since usually NULL here
-        KF_TRACE( 10, ( "__kmp_reinitialize_team1: T#%d this_thread=%p team=%p\n",
+        KF_TRACE( 10, ( "__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
                         f, team->t.t_threads[f], team ) );
         __kmp_init_implicit_task( loc, team->t.t_threads[f], team, f, FALSE );
         store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs);
-        KF_TRACE( 10, ( "__kmp_reinitialize_team2: T#%d this_thread=%p team=%p\n",
+        KF_TRACE( 10, ( "__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
                         f, team->t.t_threads[f], team ) );
 # else
         team -> t.t_set_nproc[f]   = new_set_nproc;
@@ -5226,9 +5145,8 @@
     }
 # if OMP_30_ENABLED
     sync_icvs();
-# endif
-#endif // KMP_BARRIER_ICV_PUSH || KMP_BARRIER_ICV_PULL
-
+# endif // OMP_30_ENABLED
+#endif // KMP_BARRIER_ICV_PULL
 }
 
 /* initialize the team data structure
@@ -5246,6 +5164,8 @@
         int new_set_blocktime, int new_bt_intervals, int new_bt_set
     #endif // OMP_30_ENABLED
 ) {
+    KF_TRACE( 10, ( "__kmp_initialize_team: enter: team=%p\n", team ) );
+
     /* verify */
     KMP_DEBUG_ASSERT( team );
     KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
@@ -5290,18 +5210,18 @@
 
     team -> t.t_control_stack_top = NULL;
 
-    __kmp_reinitialize_team(
-        team, new_nproc,
-        #if OMP_30_ENABLED
-            new_icvs,
-            loc
-        #else
-            new_set_nproc, new_set_dynamic, new_set_nested,
-            new_set_blocktime, new_bt_intervals, new_bt_set
-        #endif // OMP_30_ENABLED
-    );
+    __kmp_reinitialize_team( team,
+#if OMP_30_ENABLED
+                             new_icvs, loc
+#else
+                             new_set_nproc, new_set_dynamic, new_set_nested,
+                             new_set_blocktime, new_bt_intervals, new_bt_set
+#endif // OMP_30_ENABLED
+                             );
+
 
     KMP_MB();
+    KF_TRACE( 10, ( "__kmp_initialize_team: exit: team=%p\n", team ) );
 }
 
 #if KMP_OS_LINUX
@@ -5700,15 +5620,15 @@
             // TODO???: team -> t.t_max_active_levels = new_max_active_levels;
             team -> t.t_sched =  new_icvs->sched;
 #endif
-            __kmp_reinitialize_team( team, new_nproc,
+            __kmp_reinitialize_team( team,
 #if OMP_30_ENABLED
-              new_icvs,
-              root->r.r_uber_thread->th.th_ident
+                                     new_icvs, root->r.r_uber_thread->th.th_ident
 #else
-              new_set_nproc, new_set_dynamic, new_set_nested,
-              new_set_blocktime, new_bt_intervals, new_bt_set
-#endif
-            );
+                                     new_set_nproc, new_set_dynamic, new_set_nested,
+                                     new_set_blocktime, new_bt_intervals, new_bt_set
+#endif // OMP_30_ENABLED
+                                     );
+
 
 #if OMP_30_ENABLED
             if ( __kmp_tasking_mode != tskm_immediate_exec ) {
@@ -5768,15 +5688,14 @@
             if(team -> t.t_max_nproc < new_nproc) {
                 /* reallocate larger arrays */
                 __kmp_reallocate_team_arrays(team, new_nproc);
-                __kmp_reinitialize_team( team, new_nproc,
+                __kmp_reinitialize_team( team,
 #if OMP_30_ENABLED
-                  new_icvs,
-                  NULL  // TODO: !!!
+                                         new_icvs, NULL
 #else
-                  new_set_nproc, new_set_dynamic, new_set_nested,
-                  new_set_blocktime, new_bt_intervals, new_bt_set
-#endif
-                );
+                                         new_set_nproc, new_set_dynamic, new_set_nested,
+                                         new_set_blocktime, new_bt_intervals, new_bt_set
+#endif // OMP_30_ENABLED
+                                         );
             }
 
 #if KMP_OS_LINUX
@@ -5859,8 +5778,8 @@
 # endif
 #endif
 
-         }
-         else {
+        }
+        else {
             KA_TRACE( 20, ("__kmp_allocate_team: reusing hot team\n" ));
 #if KMP_MIC
             // This case can mean that omp_set_num_threads() was called and the hot team size
@@ -5877,15 +5796,14 @@
             team -> t.t_sched =  new_icvs->sched;
 #endif
 
-            __kmp_reinitialize_team( team, new_nproc,
+            __kmp_reinitialize_team( team,
 #if OMP_30_ENABLED
-              new_icvs,
-              root->r.r_uber_thread->th.th_ident
+                                     new_icvs, root->r.r_uber_thread->th.th_ident
 #else
-              new_set_nproc, new_set_dynamic, new_set_nested,
-              new_set_blocktime, new_bt_intervals, new_bt_set
-#endif
-            );
+                                     new_set_nproc, new_set_dynamic, new_set_nested,
+                                     new_set_blocktime, new_bt_intervals, new_bt_set
+#endif // OMP_30_ENABLED
+                                     );
 
 #if OMP_30_ENABLED
             KF_TRACE( 10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
@@ -6000,6 +5918,8 @@
      * up seems to really hurt performance a lot on the P4, so, let's not use
      * this... */
     __kmp_allocate_team_arrays( team, max_nproc );
+
+    KA_TRACE( 20, ( "__kmp_allocate_team: making a new team\n" ) );
     __kmp_initialize_team( team, new_nproc,
 #if OMP_30_ENABLED
       new_icvs,
@@ -6293,7 +6213,6 @@
     KA_TRACE( 10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n",
                    gtid, team_id, tid ));
 
-
     #if OMP_30_ENABLED
         if ( __kmp_tasking_mode == tskm_extra_barrier ) {
             __kmp_tasking_barrier( team, this_thr, gtid );
@@ -6329,25 +6248,6 @@
         #endif // OMP_30_ENABLED
     }
 
-    #if KMP_OS_WINDOWS
-        // AC: wait here until monitor has started. This is a fix for CQ232808.
-        //     The reason is that if the library is loaded/unloaded in a loop with small (parallel)
-        //     work in between, then there is high probability that monitor thread started after
-        //     the library shutdown. At shutdown it is too late to cope with the problem, because
-        //     when the master is in DllMain (process detach) the monitor has no chances to start
-        //     (it is blocked), and master has no means to inform the monitor that the library has gone,
-        //     because all the memory which the monitor can access is going to be released/reset.
-        //
-        //     The moment before barrier_gather sounds appropriate, because master needs to
-        //     wait for all workers anyway, and we want this to happen as late as possible,
-        //     but before the shutdown which may happen after the barrier.
-        if( KMP_MASTER_TID( tid ) && TCR_4(__kmp_init_monitor) < 2 ) {
-            __kmp_wait_sleep( this_thr, (volatile kmp_uint32*)&__kmp_init_monitor, 2, 0
-                              USE_ITT_BUILD_ARG( itt_sync_obj )
-                              );
-        }
-    #endif
-
 #if USE_ITT_BUILD
     if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
         __kmp_itt_barrier_starting( gtid, itt_sync_obj );
@@ -6390,34 +6290,22 @@
                                       USE_ITT_BUILD_ARG( itt_sync_obj )
                                       );
             }
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
             // Join barrier - report frame end
-#if USE_ITT_BUILD
-            // Collect information only if the file was opened successfully.
-            if( __kmp_forkjoin_frames_mode == 1 && __kmp_itt_csv_file )
-            {
-                ident_t * loc  = this_thr->th.th_ident;
-                if (loc) {
-                    // Use compiler-generated location to mark the frame:
-                    // "<func>$omp$frame@[file:]<line>[:<col>]"
-                    kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
-
-                    kmp_uint64 fr_end;
-#if defined( __GNUC__ )
-# if !defined( __INTEL_COMPILER )
-                    fr_end = __kmp_hardware_timestamp();
-# else
-                    fr_end = __rdtsc();
-# endif
-#else
-                    fr_end = __rdtsc();
-#endif
-                    K_DIAG( 3, ( "__kmp_join_barrier: T#%d(%d:%d) frame_begin = %llu, frame_end = %llu\n",
-                                 gtid, ( team != NULL ) ? team->t.t_id : -1, tid, this_thr->th.th_frame_time, fr_end ) );
-
-                    __kmp_str_buf_print( &__kmp_itt_frame_buffer, "%s$omp$frame@%s:%d:%d,%llu,%llu,,\n",
-                                         str_loc.func, str_loc.file, str_loc.line, str_loc.col, this_thr->th.th_frame_time, fr_end );
-
-                    __kmp_str_loc_free( &str_loc );
+            if( __itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode ) {
+                kmp_uint64 tmp = __itt_get_timestamp();
+                ident_t * loc = team->t.t_ident;
+                switch( __kmp_forkjoin_frames_mode ) {
+                case 1:
+                  __kmp_itt_frame_submit( gtid, this_thr->th.th_frame_time, tmp, 0, loc );
+                  break;
+                case 2:
+                  __kmp_itt_frame_submit( gtid, this_thr->th.th_bar_arrive_time, tmp, 1, loc );
+                  break;
+                case 3:
+                  __kmp_itt_frame_submit( gtid, this_thr->th.th_frame_time, tmp, 0, loc );
+                  __kmp_itt_frame_submit( gtid, this_thr->th.th_bar_arrive_time, tmp, 1, loc );
+                  break;
                 }
             }
 #endif /* USE_ITT_BUILD */
@@ -6571,20 +6459,16 @@
 #if OMP_30_ENABLED
 
 # if KMP_BARRIER_ICV_PULL
-    //
-    // FIXME - after __kmp_fork_call() is modified to not look at the
-    // master thread's implicit task ICV's, remove the ! KMP_MASTER_TID
-    // restriction from this if condition.
-    //
-    if (! KMP_MASTER_TID( tid ) ) {
-        //
-        // Copy the initial ICV's from the team struct to the implicit task
-        // for this tid.
-        //
-        __kmp_init_implicit_task( team->t.t_ident, team->t.t_threads[tid],
-          team, tid, FALSE );
-        load_icvs(&team->t.t_initial_icvs);
-        store_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &team->t.t_initial_icvs);
+    // Master thread's copy of the ICVs was set up on the implicit taskdata in __kmp_reinitialize_team.
+    // __kmp_fork_call() assumes the master thread's implicit task has this data before this function is called.
+    // We cannot modify __kmp_fork_call() to look at the fixed ICVs in the master's thread struct, because it is
+    // not always the case that the threads arrays have been allocated when __kmp_fork_call() is executed.
+    if (! KMP_MASTER_TID( tid ) ) {  // master thread already has ICVs
+        // Copy the initial ICVs from the master's thread struct to the implicit task for this tid.
+        KA_TRACE( 10, ( "__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid ));
+        load_icvs(&team->t.t_threads[0]->th.th_fixed_icvs);
+        __kmp_init_implicit_task( team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE );
+        store_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &team->t.t_threads[0]->th.th_fixed_icvs);
         sync_icvs();
     }
 # endif // KMP_BARRIER_ICV_PULL
@@ -6716,13 +6600,13 @@
 void
 __kmp_internal_end_dest( void *specific_gtid )
 {
-    #ifdef __INTEL_COMPILER
+    #if KMP_COMPILER_ICC
         #pragma warning( push )
         #pragma warning( disable:  810 ) // conversion from "void *" to "int" may lose significant bits
     #endif
     // Make sure no significant bits are lost
     int gtid = (kmp_intptr_t)specific_gtid - 1;
-    #ifdef __INTEL_COMPILER
+    #if KMP_COMPILER_ICC
         #pragma warning( pop )
     #endif
 
@@ -7503,7 +7387,6 @@
         __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
     }
     __kmp_max_nth = __kmp_sys_max_nth;
-    __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
 
     // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" part
     __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
@@ -7572,18 +7455,17 @@
         if ( __kmp_str_match_true( val ) ) {
             kmp_str_buf_t buffer;
             __kmp_str_buf_init( & buffer );
-            __kmp_i18n_dump_catalog( buffer );
+            __kmp_i18n_dump_catalog( & buffer );
             __kmp_printf( "%s", buffer.str );
             __kmp_str_buf_free( & buffer );
         }; // if
         __kmp_env_free( & val );
     #endif
 
+    __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
     // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
     __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
 
-    //  omalyshe: This initialisation beats env var setting.
-    //__kmp_load_balance_interval = 1.0;
 
     // If the library is shut down properly, both pools must be NULL. Just in case, set them
     // to NULL -- some memory may leak, but subsequent code will work even if pools are not freed.
@@ -7876,38 +7758,6 @@
         __kmp_print_version_2();
     }
 
-#if USE_ITT_BUILD
-    // Create CSV file to report frames
-    if( __kmp_forkjoin_frames_mode == 1 )
-    {
-        // Open CSV file to write itt frame information
-        const char * csv_file;
-/*        Internal AXE variables
-        char * host_name = __kmp_env_get("INTEL_MRTE_HOST_NAME");
-        char * out_dir   = __kmp_env_get("INTEL_MRTE_DATA_DIR");*/
-        char * host_name = __kmp_env_get("AMPLXE_HOSTNAME");
-        char * out_dir   = __kmp_env_get("AMPLXE_DATA_DIR");
-
-        if( out_dir && host_name ) {
-            csv_file = __kmp_str_format( "%s/omp-frames-hostname-%s.csv", out_dir, host_name );
-            __kmp_itt_csv_file = fopen( csv_file, "w" );
-            __kmp_str_free( &csv_file );
-        } else {
-#ifdef KMP_DEBUG
-            // Create CSV file in the current dir
-            csv_file = __kmp_str_format( "./omp-frames-hostname-xxx.csv" );
-            __kmp_itt_csv_file = fopen( csv_file, "w" );
-            __kmp_str_free( &csv_file );
-#endif
-        }
-        if( __kmp_itt_csv_file ) {
-            __kmp_str_buf_init( & __kmp_itt_frame_buffer );
-            __kmp_str_buf_print( & __kmp_itt_frame_buffer, "name,start_tsc.TSC,end_tsc,pid,tid\n" );
-        }
-    }
-
-#endif /* USE_ITT_BUILD */
-
     /* we have finished parallel initialization */
     TCW_SYNC_4(__kmp_init_parallel, TRUE);
 
@@ -8347,16 +8197,6 @@
 
     __kmp_i18n_catclose();
 
-#if USE_ITT_BUILD
-    // Close CSV file for frames
-    if( __kmp_forkjoin_frames_mode && __kmp_itt_csv_file ) {
-        fprintf( __kmp_itt_csv_file, __kmp_itt_frame_buffer.str );
-
-        __kmp_str_buf_free( & __kmp_itt_frame_buffer );
-        fclose( __kmp_itt_csv_file );
-    }
-#endif /* USE_ITT_BUILD */
-
     KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) );
 }
 
@@ -8576,14 +8416,6 @@
  * internal fast reduction routines
  */
 
-// implementation rev. 0.4
-// AT: determine CPU, and always use 'critical method' if non-Intel
-// AT: test loc != NULL
-// AT: what to return if lck == NULL
-// AT: tune the cut-off point for atomic reduce method
-// AT: tune what to return depending on the CPU and platform configuration
-// AT: tune what to return depending on team size
-// AT: move this function out to kmp_csupport.c
 PACKED_REDUCTION_METHOD_T
 __kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,
         kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
@@ -8641,22 +8473,10 @@
                 #error "Unknown or unsupported OS"
             #endif // KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_DARWIN
 
-        #elif KMP_ARCH_X86
+        #elif KMP_ARCH_X86 || KMP_ARCH_ARM
 
             #if KMP_OS_LINUX || KMP_OS_WINDOWS
 
-                // similar to win_32
-                // 4x1x2 fxqlin04, the 'linear,linear' barrier
-
-                // similar to lin_32
-                // 4x1x2 fxqwin04, the 'linear,linear' barrier
-
-                // actual measurement shows that the critical section method is better if team_size <= 8;
-                // what happenes when team_size > 8 ? ( no machine to test )
-
-                // TO DO: need to run a 32-bit code on Intel(R) 64
-                // TO DO: test the 'hyper,hyper,1,1' barrier
-
                 // basic tuning
 
                 if( atomic_available ) {
@@ -8667,7 +8487,6 @@
 
             #elif KMP_OS_DARWIN
 
-
                 if( atomic_available && ( num_vars <= 3 ) ) {
                         retval = atomic_reduce_block;
                 } else if( tree_available ) {
@@ -8686,18 +8505,6 @@
 
     }
 
-    //AT: TO DO: critical block method not implemented by PAROPT
-    //if( retval == __kmp_critical_reduce_block ) {
-    //  if( lck == NULL ) { // critical block method not implemented by PAROPT
-    //  }
-    //}
-
-    // tune what to return depending on the CPU and platform configuration
-    //           (sometimes tree method is slower than critical)
-
-    // probably tune what to return depending on team size
-
-
     // KMP_FORCE_REDUCTION
 
     if( __kmp_force_reduction_method != reduction_method_not_defined ) {
diff --git a/openmp/runtime/src/kmp_settings.c b/openmp/runtime/src/kmp_settings.c
index b190cce..3a0f6ce 100644
--- a/openmp/runtime/src/kmp_settings.c
+++ b/openmp/runtime/src/kmp_settings.c
@@ -1,7 +1,7 @@
 /*
  * kmp_settings.c -- Initialize environment variables
- * $Revision: 42642 $
- * $Date: 2013-09-06 01:57:24 -0500 (Fri, 06 Sep 2013) $
+ * $Revision: 42816 $
+ * $Date: 2013-11-11 15:33:37 -0600 (Mon, 11 Nov 2013) $
  */
 
 
@@ -26,9 +26,6 @@
 #include "kmp_io.h"
 
 
-#define KMP_MAX( x, y ) ( (x) > (y) ? (x) : (y) )
-#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
-
 static int __kmp_env_isDefined( char const * name );
 static int __kmp_env_toPrint( char const * name, int flag );
 
@@ -3915,7 +3912,7 @@
       || __kmp_str_match( "testandset", 2, value ) ) {
         __kmp_user_lock_kind = lk_tas;
     }
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
     else if ( __kmp_str_match( "futex", 1, value ) ) {
         if ( __kmp_futex_determine_capable() ) {
             __kmp_user_lock_kind = lk_futex;
@@ -4322,6 +4319,16 @@
     }
 } // __kmp_stg_print_omp_display_env
 
+static void
+__kmp_stg_parse_omp_cancellation( char const * name, char const * value, void * data ) {
+    __kmp_stg_parse_bool( name, value, & __kmp_omp_cancellation );
+} // __kmp_stg_parse_omp_cancellation
+
+static void
+__kmp_stg_print_omp_cancellation( kmp_str_buf_t * buffer, char const * name, void * data ) {
+    __kmp_stg_print_bool( buffer, name, __kmp_omp_cancellation );
+} // __kmp_stg_print_omp_cancellation
+
 #endif
 
 // -------------------------------------------------------------------------------------------------
@@ -4476,6 +4483,7 @@
 
 # if OMP_40_ENABLED
     { "OMP_DISPLAY_ENV",                   __kmp_stg_parse_omp_display_env,    __kmp_stg_print_omp_display_env,    NULL, 0, 0 },
+    { "OMP_CANCELLATION",                  __kmp_stg_parse_omp_cancellation,   __kmp_stg_print_omp_cancellation,   NULL, 0, 0 },
 #endif
     { "",                                  NULL,                               NULL,                               NULL, 0, 0 }
 }; // settings
diff --git a/openmp/runtime/src/kmp_str.c b/openmp/runtime/src/kmp_str.c
index d9b98ab..c1f9e9b 100644
--- a/openmp/runtime/src/kmp_str.c
+++ b/openmp/runtime/src/kmp_str.c
@@ -1,7 +1,7 @@
 /*
  * kmp_str.c -- String manipulation routines.
- * $Revision: 42613 $
- * $Date: 2013-08-23 13:29:50 -0500 (Fri, 23 Aug 2013) $
+ * $Revision: 42810 $
+ * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
  */
 
 
@@ -329,9 +329,9 @@
 __kmp_str_fname_free(
     kmp_str_fname_t * fname
 ) {
-    __kmp_str_free( const_cast< char const ** >( & fname->path ) );
-    __kmp_str_free( const_cast< char const ** >( & fname->dir  ) );
-    __kmp_str_free( const_cast< char const ** >( & fname->base ) );
+    __kmp_str_free( (char const **)( & fname->path ) );
+    __kmp_str_free( (char const **)( & fname->dir  ) );
+    __kmp_str_free( (char const **)( & fname->base ) );
 } // kmp_str_fname_free
 
 
diff --git a/openmp/runtime/src/kmp_stub.c b/openmp/runtime/src/kmp_stub.c
index e72b196..c1914f4 100644
--- a/openmp/runtime/src/kmp_stub.c
+++ b/openmp/runtime/src/kmp_stub.c
@@ -1,7 +1,7 @@
 /*
  * kmp_stub.c -- stub versions of user-callable OpenMP RT functions.
- * $Revision: 42150 $
- * $Date: 2013-03-15 15:40:38 -0500 (Fri, 15 Mar 2013) $
+ * $Revision: 42826 $
+ * $Date: 2013-11-20 03:39:45 -0600 (Wed, 20 Nov 2013) $
  */
 
 
@@ -29,11 +29,32 @@
     #include <sys/time.h>
 #endif
 
+#include "omp.h"                // Function renamings.
 #include "kmp.h"                // KMP_DEFAULT_STKSIZE
 #include "kmp_version.h"
 
-#include "omp.h"                // Function renamings.
+// Moved from omp.h
+#if OMP_30_ENABLED
 
+#define omp_set_max_active_levels    ompc_set_max_active_levels
+#define omp_set_schedule             ompc_set_schedule
+#define omp_get_ancestor_thread_num  ompc_get_ancestor_thread_num
+#define omp_get_team_size            ompc_get_team_size
+
+#endif // OMP_30_ENABLED
+
+#define omp_set_num_threads          ompc_set_num_threads
+#define omp_set_dynamic              ompc_set_dynamic
+#define omp_set_nested               ompc_set_nested
+#define kmp_set_stacksize            kmpc_set_stacksize
+#define kmp_set_stacksize_s          kmpc_set_stacksize_s
+#define kmp_set_blocktime            kmpc_set_blocktime
+#define kmp_set_library              kmpc_set_library
+#define kmp_set_defaults             kmpc_set_defaults
+#define kmp_malloc                   kmpc_malloc
+#define kmp_calloc                   kmpc_calloc
+#define kmp_realloc                  kmpc_realloc
+#define kmp_free                     kmpc_free
 
 static double frequency = 0.0;
 
@@ -243,29 +264,5 @@
     return wtick;
 }; // __kmps_get_wtick
 
-
-/*
-    These functions are exported from libraries, but not declared in omp,h and omp_lib.f:
-
-        // omalyshe: eight entries below removed from the library (2011-11-22)
-        kmpc_get_banner
-        kmpc_get_poolmode
-        kmpc_get_poolsize
-        kmpc_get_poolstat
-        kmpc_poolprint
-        kmpc_print_banner
-        kmpc_set_poolmode
-        kmpc_set_poolsize
-
-        kmpc_set_affinity
-        kmp_threadprivate_insert
-        kmp_threadprivate_insert_private_data
-        VT_getthid
-        vtgthid
-
-    The list is collected on lin_32.
-
-*/
-
 // end of file //
 
diff --git a/openmp/runtime/src/kmp_tasking.c b/openmp/runtime/src/kmp_tasking.c
index ea5cdc0..8cac009 100644
--- a/openmp/runtime/src/kmp_tasking.c
+++ b/openmp/runtime/src/kmp_tasking.c
@@ -1,7 +1,7 @@
 /*
  * kmp_tasking.c -- OpenMP 3.0 tasking support.
- * $Revision: 42522 $
- * $Date: 2013-07-16 05:28:49 -0500 (Tue, 16 Jul 2013) $
+ * $Revision: 42852 $
+ * $Date: 2013-12-04 10:50:49 -0600 (Wed, 04 Dec 2013) $
  */
 
 
@@ -620,13 +620,28 @@
 #if OMP_40_ENABLED
         if ( taskdata->td_taskgroup )
             KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
-        __kmp_release_deps(gtid,taskdata);    
+        __kmp_release_deps(gtid,taskdata);
 #endif
     }
 
     KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
                   gtid, taskdata, children) );
 
+#if OMP_40_ENABLED
+    /* If the tasks' destructor thunk flag has been set, we need to invoke the
+       destructor thunk that has been generated by the compiler.
+       The code is placed here, since at this point other tasks might have been released
+       hence overlapping the destructor invokations with some other work in the
+       released tasks.  The OpenMP spec is not specific on when the destructors are
+       invoked, so we should be free to choose.
+     */
+    if (taskdata->td_flags.destructors_thunk) {
+        kmp_routine_entry_t destr_thunk = task->destructors;
+        KMP_ASSERT(destr_thunk);
+        destr_thunk(gtid, task);
+    }
+#endif // OMP_40_ENABLED
+
     // bookkeeping for resuming task:
     // GEH - note tasking_ser => task_serial
     KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
@@ -739,10 +754,10 @@
     task->td_flags.complete    = 0;
     task->td_flags.freed       = 0;
 
-#if OMP_40_ENABLED    
+#if OMP_40_ENABLED
     task->td_dephash = NULL;
     task->td_depnode = NULL;
-#endif    
+#endif
 
     if (set_curr_task) {  // only do this initialization the first time a thread is created
         task->td_incomplete_child_tasks = 0;
@@ -850,7 +865,7 @@
 
     taskdata->td_task_id      = KMP_GEN_TASK_ID();
     taskdata->td_team         = team;
-    taskdata->td_alloc_thread = thread; 
+    taskdata->td_alloc_thread = thread;
     taskdata->td_parent       = parent_task;
     taskdata->td_level        = parent_task->td_level + 1; // increment nesting level
     taskdata->td_ident        = loc_ref;
@@ -863,6 +878,9 @@
     taskdata->td_flags.tiedness    = flags->tiedness;
     taskdata->td_flags.final       = flags->final;
     taskdata->td_flags.merged_if0  = flags->merged_if0;
+#if OMP_40_ENABLED
+    taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
+#endif // OMP_40_ENABLED
     taskdata->td_flags.tasktype    = TASK_EXPLICIT;
 
     // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
@@ -890,7 +908,7 @@
     taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
     taskdata->td_dephash = NULL;
     taskdata->td_depnode = NULL;
-#endif
+#endif 
     // Only need to keep track of child task counts if team parallel and tasking not serialized
     if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
         KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
@@ -946,24 +964,46 @@
 __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
 {
     kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
+#if OMP_40_ENABLED
+    int discard = 0 /* false */;
+#endif
     KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
                   gtid, taskdata, current_task) );
 
     __kmp_task_start( gtid, task, current_task );
 
+#if OMP_40_ENABLED
+    // TODO: cancel tasks if the parallel region has also been cancelled
+    // TODO: check if this sequence can be hoisted above __kmp_task_start
+    // if cancellation has been enabled for this run ...
+    if (__kmp_omp_cancellation) {
+        kmp_info_t *this_thr = __kmp_threads [ gtid ];
+        kmp_team_t * this_team = this_thr->th.th_team;
+        kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
+        if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
+            // this task belongs to a task group and we need to cancel it
+            discard = 1 /* true */;
+        }
+    }
+
     //
     // Invoke the task routine and pass in relevant data.
     // Thunks generated by gcc take a different argument list.
     //
+    if (!discard) {
+#endif // OMP_40_ENABLED
 #ifdef KMP_GOMP_COMPAT
-    if (taskdata->td_flags.native) {
-        ((void (*)(void *))(*(task->routine)))(task->shareds);
-    }
-    else
+        if (taskdata->td_flags.native) {
+            ((void (*)(void *))(*(task->routine)))(task->shareds);
+        }
+        else
 #endif /* KMP_GOMP_COMPAT */
-    {
-        (*(task->routine))(gtid, task);
+        {
+            (*(task->routine))(gtid, task);
+        }
+#if OMP_40_ENABLED
     }
+#endif // OMP_40_ENABLED
 
     __kmp_task_finish( gtid, task, current_task );
 
@@ -1079,10 +1119,8 @@
             // GEH: if team serialized, avoid reading the volatile variable below.
             while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
                 __kmp_execute_tasks( thread, gtid, &(taskdata->td_incomplete_child_tasks),
-                                     0, FALSE, &thread_finished, 
-#if USE_ITT_BUILD
-                                     itt_sync_obj, 
-#endif /* USE_ITT_BUILD */
+                                     0, FALSE, &thread_finished
+                                     USE_ITT_BUILD_ARG(itt_sync_obj),
                                      __kmp_task_stealing_constraint );
             }
         }
@@ -1134,10 +1172,8 @@
             __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
 #endif /* USE_ITT_BUILD */
         if ( ! taskdata->td_flags.team_serial ) {
-            __kmp_execute_tasks( thread, gtid, NULL, 0, FALSE, &thread_finished,
-#if USE_ITT_BUILD
-                                 itt_sync_obj, 
-#endif /* USE_ITT_BUILD */
+            __kmp_execute_tasks( thread, gtid, NULL, 0, FALSE, &thread_finished
+                                 USE_ITT_BUILD_ARG(itt_sync_obj),
                                  __kmp_task_stealing_constraint );
         }
 
@@ -1162,7 +1198,7 @@
 // __kmpc_taskgroup: Start a new taskgroup
 
 void
-__kmpc_taskgroup( ident* loc, int gtid )
+__kmpc_taskgroup( ident_t* loc, int gtid )
 {
     kmp_info_t      * thread = __kmp_threads[ gtid ];
     kmp_taskdata_t  * taskdata = thread->th.th_current_task;
@@ -1170,6 +1206,7 @@
         (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
     KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
     tg_new->count = 0;
+    tg_new->cancel_request = cancel_noreq;
     tg_new->parent = taskdata->td_taskgroup;
     taskdata->td_taskgroup = tg_new;
 }
@@ -1180,7 +1217,7 @@
 //                       and its descendants are complete
 
 void
-__kmpc_end_taskgroup( ident* loc, int gtid )
+__kmpc_end_taskgroup( ident_t* loc, int gtid )
 {
     kmp_info_t      * thread = __kmp_threads[ gtid ];
     kmp_taskdata_t  * taskdata = thread->th.th_current_task;
@@ -1201,10 +1238,8 @@
         if ( ! taskdata->td_flags.team_serial ) {
             while ( TCR_4(taskgroup->count) != 0 ) {
                 __kmp_execute_tasks( thread, gtid, &(taskgroup->count),
-                                     0, FALSE, &thread_finished, 
-#if USE_ITT_BUILD
-                                     itt_sync_obj,
-#endif /* USE_ITT_BUILD */
+                                     0, FALSE, &thread_finished
+                                     USE_ITT_BUILD_ARG(itt_sync_obj),
                                      __kmp_task_stealing_constraint );
             }
         }
@@ -1420,15 +1455,13 @@
 // checker is the value to check to terminate the spin.
 
 int
-__kmp_execute_tasks( kmp_info_t *thread, 
-                     kmp_int32 gtid, 
+__kmp_execute_tasks( kmp_info_t *thread,
+                     kmp_int32 gtid,
                      volatile kmp_uint *spinner,
                      kmp_uint checker,
-                     int final_spin, 
-                     int *thread_finished, 
-#if USE_ITT_BUILD
-                     void * itt_sync_obj,
-#endif /* USE_ITT_BUILD */
+                     int final_spin,
+                     int *thread_finished
+                     USE_ITT_BUILD_ARG(void * itt_sync_obj),
                      kmp_int32 is_constrained )
 {
     kmp_task_team_t *     task_team;
@@ -2297,11 +2330,9 @@
 // in team > 1 !
 
 void
-__kmp_task_team_wait( kmp_info_t *this_thr, 
+__kmp_task_team_wait( kmp_info_t *this_thr,
                       kmp_team_t *team
-#if USE_ITT_BUILD
-                      , void * itt_sync_obj
-#endif /* USE_ITT_BUILD */
+                      USE_ITT_BUILD_ARG(void * itt_sync_obj)
                       )
 {
     kmp_task_team_t *task_team = team->t.t_task_team;
@@ -2320,9 +2351,7 @@
         // termination condition.
         //
         __kmp_wait_sleep( this_thr, &task_team->tt.tt_unfinished_threads, 0, TRUE
-#if USE_ITT_BUILD
-                          , itt_sync_obj
-#endif /* USE_ITT_BUILD */
+                          USE_ITT_BUILD_ARG(itt_sync_obj)
                           );
 
         //
@@ -2361,7 +2390,8 @@
 #if USE_ITT_BUILD
     KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
 #endif /* USE_ITT_BUILD */
-    while (! __kmp_execute_tasks( thread, gtid, spin, 0, TRUE, &flag, NULL ) ) {
+    while (! __kmp_execute_tasks( thread, gtid, spin, 0, TRUE, &flag 
+                                  USE_ITT_BUILD_ARG(NULL), 0 ) ) {
 #if USE_ITT_BUILD
         // TODO: What about itt_sync_obj??
         KMP_FSYNC_SPIN_PREPARE( spin );
diff --git a/openmp/runtime/src/kmp_version.c b/openmp/runtime/src/kmp_version.c
index 0beb824..5d0de77 100644
--- a/openmp/runtime/src/kmp_version.c
+++ b/openmp/runtime/src/kmp_version.c
@@ -1,7 +1,7 @@
 /*
  * kmp_version.c
- * $Revision: 42594 $
- * $Date: 2013-08-16 04:14:33 -0500 (Fri, 16 Aug 2013) $
+ * $Revision: 42806 $
+ * $Date: 2013-11-05 16:16:45 -0600 (Tue, 05 Nov 2013) $
  */
 
 
@@ -27,7 +27,7 @@
 #define stringer( x )  _stringer( x )
 
 // Detect compiler.
-#ifdef __INTEL_COMPILER
+#if KMP_COMPILER_ICC
     #if   __INTEL_COMPILER == 1010
         #define KMP_COMPILER "Intel C++ Compiler 10.1"
     #elif __INTEL_COMPILER == 1100
@@ -49,7 +49,9 @@
     #elif __INTEL_COMPILER == 9999
         #define KMP_COMPILER "Intel C++ Compiler mainline"
     #endif
-#elif defined( __GNUC__ )
+#elif KMP_COMPILER_CLANG
+    #define KMP_COMPILER "Clang " stringer( __clang_major__ ) "." stringer( __clang_minor__ )
+#elif KMP_COMPILER_GCC
     #define KMP_COMPILER "GCC " stringer( __GNUC__ ) "." stringer( __GNUC_MINOR__ )
 #endif
 #ifndef KMP_COMPILER
diff --git a/openmp/runtime/src/makefile.mk b/openmp/runtime/src/makefile.mk
index d7c8266..8185e78 100644
--- a/openmp/runtime/src/makefile.mk
+++ b/openmp/runtime/src/makefile.mk
@@ -1,6 +1,6 @@
 # makefile.mk #
-# $Revision: 42661 $
-# $Date: 2013-09-12 11:37:13 -0500 (Thu, 12 Sep 2013) $
+# $Revision: 42820 $
+# $Date: 2013-11-13 16:53:44 -0600 (Wed, 13 Nov 2013) $
 
 #
 #//===----------------------------------------------------------------------===//
@@ -37,7 +37,7 @@
 # --------------------------------------------------------------------------------------------------
 
 # Build compiler
-BUILD_COMPILER := $(call check_variable,BUILD_COMPILER,icc gcc icl icl.exe)
+BUILD_COMPILER := $(call check_variable,BUILD_COMPILER,icc gcc clang icl icl.exe)
 # Distribution type: com (commercial) or oss (open-source)
 DISTRIBUTION  := $(call check_variable,DISTRIBUTION,com oss)
 
@@ -161,6 +161,18 @@
     endif
 endif
 
+ifeq "$(c)" "clang"
+    c-flags += -Wno-unused-value -Wno-switch
+    cxx-flags += -Wno-unused-value -Wno-switch
+    ifeq "$(arch)" "32"
+        c-flags += -m32 -msse
+        cxx-flags += -m32 -msse
+        fort-flags += -m32 -msse
+        ld-flags += -m32 -msse
+        as-flags += -m32 -msse
+    endif
+endif
+
 ifeq "$(LINK_TYPE)" "dyna"
 # debug-info
     ifeq "$(os)" "win"
@@ -186,7 +198,7 @@
 endif
 
 # Enable saving compiler options and version in object files and libraries.
-ifneq "$(c)" "gcc"
+ifeq "$(filter gcc clang,$(c))" ""
     ifeq "$(os)" "win"
         # Newer MS linker issues warnings if -Qsox is used:
         # "warning LNK4224: /COMMENT is no longer supported;  ignored"
@@ -231,24 +243,17 @@
 # Disable use of EBP as general purpose register.
 ifeq "$(os)" "win"
     ifeq "$(arch)" "32"
-        # ??? In original makefile, this option was specified only in debug builds.
-        # Compare with Linux* OS/OS X* -fno-omit-frame-pointer, which defined always.
         c-flags   += -Oy-
         cxx-flags += -Oy-
     endif
-else
-    ifneq "$(arch)" "64"
-        c-flags   += -fno-omit-frame-pointer
-        cxx-flags += -fno-omit-frame-pointer
-    endif
 endif
 
 ifeq "$(os)" "lin"
     c-flags   += -Wsign-compare
     cxx-flags += -Wsign-compare
     ld-flags  += -Wsign-compare
-    ifneq "$(c)" "gcc"	
-	c-flags   += -Werror	
+    ifeq "$(filter gcc clang,$(c))" ""
+        c-flags   += -Werror
         cxx-flags += -Werror
         ld-flags  += -Werror
     endif
@@ -306,7 +311,7 @@
     ifeq "$(os)" "win"
         c-flags   += -TP
     else
-        ifeq "$(c)" "gcc"
+        ifneq "$(filter gcc clang,$(c))" ""
             c-flags   += -x c++ -std=c++0x
         else
             c-flags   += -Kc++
@@ -352,12 +357,18 @@
             ld-flags-dll += -static-libgcc
             ld-flags-extra += -Wl,-ldl
         endif
+        ifeq "$(c)" "clang"
+            ld-flags-extra += -Wl,-ldl
+        endif
         ifeq "$(arch)" "32"
-            ifneq "$(c)" "gcc"
+            ifeq "$(filter gcc clang,$(c))" ""
             # to workaround CQ215229 link libirc_pic manually
             ld-flags-extra += -lirc_pic
             endif
         endif
+        ifeq "$(filter 32 32e 64,$(arch))" ""
+            ld-flags-extra += $(shell pkg-config --libs libffi)
+        endif
     else
         ifeq "$(arch)" "32e"
             # ???
@@ -452,13 +463,13 @@
 cpp-flags += -D CACHE_LINE=64
 cpp-flags += -D KMP_ADJUST_BLOCKTIME=1
 cpp-flags += -D BUILD_PARALLEL_ORDERED
+cpp-flags += -D KMP_ASM_INTRINS
 ifneq "$(os)" "lrb"
     cpp-flags += -D USE_LOAD_BALANCE
 endif
 ifneq "$(os)" "win"
     cpp-flags += -D USE_CBLKDATA
     # ??? Windows* OS: USE_CBLKDATA defined in kmp.h.
-    cpp-flags += -D KMP_ASM_INTRINS
 endif
 ifeq "$(os)" "win"
     cpp-flags += -D KMP_WIN_CDECL
@@ -477,23 +488,43 @@
     endif
 endif
 
+ifneq "$(filter 32 32e,$(arch))" ""
 cpp-flags += -D KMP_USE_ADAPTIVE_LOCKS=1 -D KMP_DEBUG_ADAPTIVE_LOCKS=0
-
-# define compatibility with OMP 3.0
-ifeq "$(OMP_VERSION)" "40"
-    cpp-flags += -D OMP_40_ENABLED=1
-    cpp-flags += -D OMP_30_ENABLED=1
-else
-    ifeq "$(OMP_VERSION)" "30"
-        cpp-flags += -D OMP_40_ENABLED=0
-        cpp-flags += -D OMP_30_ENABLED=1
-    else
-        cpp-flags += -D OMP_40_ENABLED=0
-        cpp-flags += -D OMP_30_ENABLED=0
-    # TODO: Check OMP_30_ENABLED == 0 is processed correctly.
-    endif
 endif
 
+# define compatibility with different OpenMP versions
+have_omp_50=0
+have_omp_41=0
+have_omp_40=0
+have_omp_30=0
+ifeq "$(OMP_VERSION)" "50"
+	have_omp_50=1
+	have_omp_41=1
+	have_omp_40=1
+	have_omp_30=1
+endif
+ifeq "$(OMP_VERSION)" "41"
+	have_omp_50=0
+	have_omp_41=1
+	have_omp_40=1
+	have_omp_30=1
+endif
+ifeq "$(OMP_VERSION)" "40"
+	have_omp_50=0
+	have_omp_41=0
+	have_omp_40=1
+	have_omp_30=1
+endif
+ifeq "$(OMP_VERSION)" "30"
+	have_omp_50=0
+	have_omp_41=0
+	have_omp_40=0
+	have_omp_30=1
+endif
+cpp-flags += -D OMP_50_ENABLED=$(have_omp_50) -D OMP_41_ENABLED=$(have_omp_41)
+cpp-flags += -D OMP_40_ENABLED=$(have_omp_40) -D OMP_30_ENABLED=$(have_omp_30)
+
+
 # Using ittnotify is enabled by default.
 USE_ITT_NOTIFY = 1
 ifeq "$(os)-$(arch)" "win-64"
@@ -541,8 +572,13 @@
 # only one, target architecture). So we cannot autodetect target architecture
 # within the file, and have to pass target architecture from command line.
 ifneq "$(os)" "win"
-    z_Linux_asm$(obj) : \
-        cpp-flags += -D KMP_ARCH_X86$(if $(filter 32e,$(arch)),_64)
+    ifeq "$(arch)" "arm"
+        z_Linux_asm$(obj) : \
+		    cpp-flags += -D KMP_ARCH_ARM
+    else
+        z_Linux_asm$(obj) : \
+            cpp-flags += -D KMP_ARCH_X86$(if $(filter 32e,$(arch)),_64)
+    endif
 endif
 
 # Defining KMP_BUILD_DATE for all files leads to warning "incompatible redefinition", because the
@@ -606,7 +642,6 @@
 lib_c_items :=      \
     kmp_ftn_cdecl   \
     kmp_ftn_extra   \
-    kmp_ftn_stdcall \
     kmp_version     \
     $(empty)
 lib_cpp_items :=
@@ -653,6 +688,7 @@
 
 ifeq "$(OMP_VERSION)" "40"
     lib_cpp_items += kmp_taskdeps
+    lib_cpp_items += kmp_cancel
 endif
 
     # OS-specific files.
@@ -1214,7 +1250,9 @@
             tt-c-flags  += -pthread
         endif
         tt-c-flags += -o $(tt-exe-file)
-        tt-c-flags += $(if $(filter 64,$(arch)),,$(if $(filter 32,$(arch)),-m32,-m64))
+        ifneq "$(filter 32 32e 64,$(arch))" ""
+            tt-c-flags += $(if $(filter 64,$(arch)),,$(if $(filter 32,$(arch)),-m32,-m64))
+        endif
         tt-libs    += $(lib_file)
         ifeq "$(os)-$(COVERAGE)-$(LINK_TYPE)" "lin-on-stat"
             # Static coverage build on Linux* OS fails due to unresolved symbols dlopen, dlsym, dlclose.
@@ -1343,8 +1381,16 @@
         ifeq "$(arch)" "64"
             td_exp += libc.so.6.1
         endif
+        ifeq "$(arch)" "arm"
+            td_exp += libc.so.6
+            td_exp += ld-linux-armhf.so.3
+        endif
         td_exp += libdl.so.2
         td_exp += libgcc_s.so.1
+        ifeq "$(filter 32 32e 64,$(arch))" ""
+            td_exp += libffi.so.6
+            td_exp += libffi.so.5
+        endif
         ifneq "$(LIB_TYPE)" "stub"
             td_exp += libpthread.so.0
         endif
diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify.h b/openmp/runtime/src/thirdparty/ittnotify/ittnotify.h
index 9cc398c..9df6e2f 100644
--- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify.h
+++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify.h
@@ -109,12 +109,18 @@
 #  define ITT_PLATFORM_POSIX 2
 #endif /* ITT_PLATFORM_POSIX */
 
+#ifndef ITT_PLATFORM_MAC
+#  define ITT_PLATFORM_MAC 3
+#endif /* ITT_PLATFORM_MAC */
+
 #ifndef ITT_PLATFORM
 #  if ITT_OS==ITT_OS_WIN
 #    define ITT_PLATFORM ITT_PLATFORM_WIN
+#  elif ITT_OS==ITT_OS_MAC
+#    define ITT_PLATFORM ITT_PLATFORM_MAC
 #  else
 #    define ITT_PLATFORM ITT_PLATFORM_POSIX
-#  endif /* _WIN32 */
+#  endif
 #endif /* ITT_PLATFORM */
 
 #if defined(_UNICODE) && !defined(UNICODE)
@@ -135,11 +141,11 @@
 #  if ITT_PLATFORM==ITT_PLATFORM_WIN
 #    define CDECL __cdecl
 #  else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-#    if defined _M_X64 || defined _M_AMD64 || defined __x86_64__
-#      define CDECL /* not actual on x86_64 platform */
-#    else  /* _M_X64 || _M_AMD64 || __x86_64__ */
+#    if defined _M_IX86 || defined __i386__
 #      define CDECL __attribute__ ((cdecl))
-#    endif /* _M_X64 || _M_AMD64 || __x86_64__ */
+#    else  /* _M_IX86 || __i386__ */
+#      define CDECL /* actual only on x86 platform */
+#    endif /* _M_IX86 || __i386__ */
 #  endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 #endif /* CDECL */
 
@@ -147,11 +153,11 @@
 #  if ITT_PLATFORM==ITT_PLATFORM_WIN
 #    define STDCALL __stdcall
 #  else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-#    if defined _M_X64 || defined _M_AMD64 || defined __x86_64__
-#      define STDCALL /* not supported on x86_64 platform */
-#    else  /* _M_X64 || _M_AMD64 || __x86_64__ */
+#    if defined _M_IX86 || defined __i386__
 #      define STDCALL __attribute__ ((stdcall))
-#    endif /* _M_X64 || _M_AMD64 || __x86_64__ */
+#    else  /* _M_IX86 || __i386__ */
+#      define STDCALL /* supported only on x86 platform */
+#    endif /* _M_IX86 || __i386__ */
 #  endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 #endif /* STDCALL */
 
@@ -164,8 +170,8 @@
 
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
 /* use __forceinline (VC++ specific) */
-#define INLINE           __forceinline
-#define INLINE_ATTRIBUTE /* nothing */
+#define ITT_INLINE           __forceinline
+#define ITT_INLINE_ATTRIBUTE /* nothing */
 #else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 /*
  * Generally, functions are not inlined unless optimization is specified.
@@ -173,11 +179,11 @@
  * if no optimization level was specified.
  */
 #ifdef __STRICT_ANSI__
-#define INLINE           static
+#define ITT_INLINE           static
 #else  /* __STRICT_ANSI__ */
-#define INLINE           static inline
+#define ITT_INLINE           static inline
 #endif /* __STRICT_ANSI__ */
-#define INLINE_ATTRIBUTE __attribute__ ((always_inline))
+#define ITT_INLINE_ATTRIBUTE __attribute__ ((always_inline, unused))
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 /** @endcond */
 
@@ -398,6 +404,128 @@
 /** @} threads group */
 
 /**
+ * @defgroup suppress Error suppression
+ * @ingroup public
+ * General behavior: application continues to run, but errors are suppressed
+ *
+ * @{
+ */
+
+/*****************************************************************//**
+ * @name group of functions used for error suppression in correctness tools
+ *********************************************************************/
+/** @{ */
+/**
+ * @hideinitializer
+ * @brief possible value for suppression mask
+ */
+#define __itt_suppress_all_errors 0x7fffffff
+
+/**
+ * @hideinitializer
+ * @brief possible value for suppression mask (suppresses errors from threading analysis)
+ */
+#define __itt_suppress_threading_errors 0x000000ff
+
+/**
+ * @hideinitializer
+ * @brief possible value for suppression mask (suppresses errors from memory analysis)
+ */
+#define __itt_suppress_memory_errors 0x0000ff00
+
+/**
+ * @brief Start suppressing errors identified in mask on this thread
+ */
+void ITTAPI __itt_suppress_push(unsigned int mask);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_push, (unsigned int mask))
+#define __itt_suppress_push     ITTNOTIFY_VOID(suppress_push)
+#define __itt_suppress_push_ptr ITTNOTIFY_NAME(suppress_push)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_push(mask)
+#define __itt_suppress_push_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_push_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Undo the effects of the matching call to __itt_suppress_push
+ */
+void ITTAPI __itt_suppress_pop(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_pop, (void))
+#define __itt_suppress_pop     ITTNOTIFY_VOID(suppress_pop)
+#define __itt_suppress_pop_ptr ITTNOTIFY_NAME(suppress_pop)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_pop()
+#define __itt_suppress_pop_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_pop_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @enum __itt_model_disable
+ * @brief Enumerator for the disable methods
+ */
+typedef enum __itt_suppress_mode {
+    __itt_unsuppress_range,
+    __itt_suppress_range
+} __itt_suppress_mode_t;
+
+/**
+ * @brief Mark a range of memory for error suppression or unsuppression for error types included in mask
+ */
+void ITTAPI __itt_suppress_mark_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size))
+#define __itt_suppress_mark_range     ITTNOTIFY_VOID(suppress_mark_range)
+#define __itt_suppress_mark_range_ptr ITTNOTIFY_NAME(suppress_mark_range)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_mark_range(mask)
+#define __itt_suppress_mark_range_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_mark_range_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Undo the effect of a matching call to __itt_suppress_mark_range.   If not matching
+ *        call is found, nothing is changed.
+ */
+void ITTAPI __itt_suppress_clear_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_clear_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size))
+#define __itt_suppress_clear_range     ITTNOTIFY_VOID(suppress_clear_range)
+#define __itt_suppress_clear_range_ptr ITTNOTIFY_NAME(suppress_clear_range)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_clear_range(mask)
+#define __itt_suppress_clear_range_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_clear_range_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} */
+/** @} suppress group */
+
+/**
  * @defgroup sync Synchronization
  * @ingroup public
  * Indicate user-written synchronization code
@@ -820,8 +948,10 @@
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
 void ITTAPI __itt_model_site_beginW(const wchar_t *name);
 #endif
+void ITTAPI __itt_model_site_beginA(const char *name);
 void ITTAPI __itt_model_site_beginAL(const char *name, size_t siteNameLen);
 void ITTAPI __itt_model_site_end  (__itt_model_site *site, __itt_model_site_instance *instance);
+void ITTAPI __itt_model_site_end_2(void);
 
 /** @cond exclude_from_documentation */
 #ifndef INTEL_NO_MACRO_BODY
@@ -830,18 +960,24 @@
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
 ITT_STUBV(ITTAPI, void, model_site_beginW,  (const wchar_t *name))
 #endif
+ITT_STUBV(ITTAPI, void, model_site_beginA,  (const char *name))
 ITT_STUBV(ITTAPI, void, model_site_beginAL,  (const char *name, size_t siteNameLen))
 ITT_STUBV(ITTAPI, void, model_site_end,    (__itt_model_site *site, __itt_model_site_instance *instance))
+ITT_STUBV(ITTAPI, void, model_site_end_2,  (void))
 #define __itt_model_site_begin      ITTNOTIFY_VOID(model_site_begin)
 #define __itt_model_site_begin_ptr  ITTNOTIFY_NAME(model_site_begin)
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
 #define __itt_model_site_beginW      ITTNOTIFY_VOID(model_site_beginW)
 #define __itt_model_site_beginW_ptr  ITTNOTIFY_NAME(model_site_beginW)
 #endif
+#define __itt_model_site_beginA      ITTNOTIFY_VOID(model_site_beginA)
+#define __itt_model_site_beginA_ptr  ITTNOTIFY_NAME(model_site_beginA)
 #define __itt_model_site_beginAL      ITTNOTIFY_VOID(model_site_beginAL)
 #define __itt_model_site_beginAL_ptr  ITTNOTIFY_NAME(model_site_beginAL)
 #define __itt_model_site_end        ITTNOTIFY_VOID(model_site_end)
 #define __itt_model_site_end_ptr    ITTNOTIFY_NAME(model_site_end)
+#define __itt_model_site_end_2        ITTNOTIFY_VOID(model_site_end_2)
+#define __itt_model_site_end_2_ptr    ITTNOTIFY_NAME(model_site_end_2)
 #else  /* INTEL_NO_ITTNOTIFY_API */
 #define __itt_model_site_begin(site, instance, name)
 #define __itt_model_site_begin_ptr  0
@@ -849,18 +985,24 @@
 #define __itt_model_site_beginW(name)
 #define __itt_model_site_beginW_ptr  0
 #endif
+#define __itt_model_site_beginA(name)
+#define __itt_model_site_beginA_ptr  0
 #define __itt_model_site_beginAL(name, siteNameLen)
 #define __itt_model_site_beginAL_ptr  0
 #define __itt_model_site_end(site, instance)
 #define __itt_model_site_end_ptr    0
+#define __itt_model_site_end_2()
+#define __itt_model_site_end_2_ptr    0
 #endif /* INTEL_NO_ITTNOTIFY_API */
 #else  /* INTEL_NO_MACRO_BODY */
 #define __itt_model_site_begin_ptr  0
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
 #define __itt_model_site_beginW_ptr  0
 #endif
+#define __itt_model_site_beginA_ptr  0
 #define __itt_model_site_beginAL_ptr  0
 #define __itt_model_site_end_ptr    0
+#define __itt_model_site_end_2_ptr    0
 #endif /* INTEL_NO_MACRO_BODY */
 /** @endcond */
 
@@ -878,9 +1020,14 @@
 void ITTAPI __itt_model_task_begin(__itt_model_task *task, __itt_model_task_instance *instance, const char *name);
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
 void ITTAPI __itt_model_task_beginW(const wchar_t *name);
+void ITTAPI __itt_model_iteration_taskW(const wchar_t *name);
 #endif
+void ITTAPI __itt_model_task_beginA(const char *name);
 void ITTAPI __itt_model_task_beginAL(const char *name, size_t taskNameLen);
+void ITTAPI __itt_model_iteration_taskA(const char *name);
+void ITTAPI __itt_model_iteration_taskAL(const char *name, size_t taskNameLen);
 void ITTAPI __itt_model_task_end  (__itt_model_task *task, __itt_model_task_instance *instance);
+void ITTAPI __itt_model_task_end_2(void);
 
 /** @cond exclude_from_documentation */
 #ifndef INTEL_NO_MACRO_BODY
@@ -888,19 +1035,34 @@
 ITT_STUBV(ITTAPI, void, model_task_begin,  (__itt_model_task *task, __itt_model_task_instance *instance, const char *name))
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
 ITT_STUBV(ITTAPI, void, model_task_beginW,  (const wchar_t *name))
+ITT_STUBV(ITTAPI, void, model_iteration_taskW, (const wchar_t *name))
 #endif
+ITT_STUBV(ITTAPI, void, model_task_beginA,  (const char *name))
 ITT_STUBV(ITTAPI, void, model_task_beginAL,  (const char *name, size_t taskNameLen))
+ITT_STUBV(ITTAPI, void, model_iteration_taskA,  (const char *name))
+ITT_STUBV(ITTAPI, void, model_iteration_taskAL,  (const char *name, size_t taskNameLen))
 ITT_STUBV(ITTAPI, void, model_task_end,    (__itt_model_task *task, __itt_model_task_instance *instance))
+ITT_STUBV(ITTAPI, void, model_task_end_2,  (void))
 #define __itt_model_task_begin      ITTNOTIFY_VOID(model_task_begin)
 #define __itt_model_task_begin_ptr  ITTNOTIFY_NAME(model_task_begin)
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
 #define __itt_model_task_beginW     ITTNOTIFY_VOID(model_task_beginW)
 #define __itt_model_task_beginW_ptr ITTNOTIFY_NAME(model_task_beginW)
+#define __itt_model_iteration_taskW     ITTNOTIFY_VOID(model_iteration_taskW)
+#define __itt_model_iteration_taskW_ptr ITTNOTIFY_NAME(model_iteration_taskW)
 #endif
+#define __itt_model_task_beginA    ITTNOTIFY_VOID(model_task_beginA)
+#define __itt_model_task_beginA_ptr ITTNOTIFY_NAME(model_task_beginA)
 #define __itt_model_task_beginAL    ITTNOTIFY_VOID(model_task_beginAL)
 #define __itt_model_task_beginAL_ptr ITTNOTIFY_NAME(model_task_beginAL)
+#define __itt_model_iteration_taskA    ITTNOTIFY_VOID(model_iteration_taskA)
+#define __itt_model_iteration_taskA_ptr ITTNOTIFY_NAME(model_iteration_taskA)
+#define __itt_model_iteration_taskAL    ITTNOTIFY_VOID(model_iteration_taskAL)
+#define __itt_model_iteration_taskAL_ptr ITTNOTIFY_NAME(model_iteration_taskAL)
 #define __itt_model_task_end        ITTNOTIFY_VOID(model_task_end)
 #define __itt_model_task_end_ptr    ITTNOTIFY_NAME(model_task_end)
+#define __itt_model_task_end_2        ITTNOTIFY_VOID(model_task_end_2)
+#define __itt_model_task_end_2_ptr    ITTNOTIFY_NAME(model_task_end_2)
 #else  /* INTEL_NO_ITTNOTIFY_API */
 #define __itt_model_task_begin(task, instance, name)
 #define __itt_model_task_begin_ptr  0
@@ -908,18 +1070,30 @@
 #define __itt_model_task_beginW(name)
 #define __itt_model_task_beginW_ptr  0
 #endif
+#define __itt_model_task_beginA(name)
+#define __itt_model_task_beginA_ptr  0
 #define __itt_model_task_beginAL(name, siteNameLen)
 #define __itt_model_task_beginAL_ptr  0
+#define __itt_model_iteration_taskA(name)
+#define __itt_model_iteration_taskA_ptr  0
+#define __itt_model_iteration_taskAL(name, siteNameLen)
+#define __itt_model_iteration_taskAL_ptr  0
 #define __itt_model_task_end(task, instance)
 #define __itt_model_task_end_ptr    0
+#define __itt_model_task_end_2()
+#define __itt_model_task_end_2_ptr    0
 #endif /* INTEL_NO_ITTNOTIFY_API */
 #else  /* INTEL_NO_MACRO_BODY */
 #define __itt_model_task_begin_ptr  0
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
 #define __itt_model_task_beginW_ptr 0
 #endif
+#define __itt_model_task_beginA_ptr  0
 #define __itt_model_task_beginAL_ptr  0
+#define __itt_model_iteration_taskA_ptr    0
+#define __itt_model_iteration_taskAL_ptr    0
 #define __itt_model_task_end_ptr    0
+#define __itt_model_task_end_2_ptr    0
 #endif /* INTEL_NO_MACRO_BODY */
 /** @endcond */
 
@@ -936,26 +1110,40 @@
  * but may not have identical semantics.)
  */
 void ITTAPI __itt_model_lock_acquire(void *lock);
+void ITTAPI __itt_model_lock_acquire_2(void *lock);
 void ITTAPI __itt_model_lock_release(void *lock);
+void ITTAPI __itt_model_lock_release_2(void *lock);
 
 /** @cond exclude_from_documentation */
 #ifndef INTEL_NO_MACRO_BODY
 #ifndef INTEL_NO_ITTNOTIFY_API
 ITT_STUBV(ITTAPI, void, model_lock_acquire, (void *lock))
+ITT_STUBV(ITTAPI, void, model_lock_acquire_2, (void *lock))
 ITT_STUBV(ITTAPI, void, model_lock_release, (void *lock))
+ITT_STUBV(ITTAPI, void, model_lock_release_2, (void *lock))
 #define __itt_model_lock_acquire     ITTNOTIFY_VOID(model_lock_acquire)
 #define __itt_model_lock_acquire_ptr ITTNOTIFY_NAME(model_lock_acquire)
+#define __itt_model_lock_acquire_2     ITTNOTIFY_VOID(model_lock_acquire_2)
+#define __itt_model_lock_acquire_2_ptr ITTNOTIFY_NAME(model_lock_acquire_2)
 #define __itt_model_lock_release     ITTNOTIFY_VOID(model_lock_release)
 #define __itt_model_lock_release_ptr ITTNOTIFY_NAME(model_lock_release)
+#define __itt_model_lock_release_2     ITTNOTIFY_VOID(model_lock_release_2)
+#define __itt_model_lock_release_2_ptr ITTNOTIFY_NAME(model_lock_release_2)
 #else  /* INTEL_NO_ITTNOTIFY_API */
 #define __itt_model_lock_acquire(lock)
 #define __itt_model_lock_acquire_ptr 0
+#define __itt_model_lock_acquire_2(lock)
+#define __itt_model_lock_acquire_2_ptr 0
 #define __itt_model_lock_release(lock)
 #define __itt_model_lock_release_ptr 0
+#define __itt_model_lock_release_2(lock)
+#define __itt_model_lock_release_2_ptr 0
 #endif /* INTEL_NO_ITTNOTIFY_API */
 #else  /* INTEL_NO_MACRO_BODY */
 #define __itt_model_lock_acquire_ptr 0
+#define __itt_model_lock_acquire_2_ptr 0
 #define __itt_model_lock_release_ptr 0
+#define __itt_model_lock_release_2_ptr 0
 #endif /* INTEL_NO_MACRO_BODY */
 /** @endcond */
 
@@ -1104,25 +1292,32 @@
  */
 void ITTAPI __itt_model_disable_push(__itt_model_disable x);
 void ITTAPI __itt_model_disable_pop(void);
+void ITTAPI __itt_model_aggregate_task(size_t x);
 
 /** @cond exclude_from_documentation */
 #ifndef INTEL_NO_MACRO_BODY
 #ifndef INTEL_NO_ITTNOTIFY_API
 ITT_STUBV(ITTAPI, void, model_disable_push, (__itt_model_disable x))
 ITT_STUBV(ITTAPI, void, model_disable_pop,  (void))
+ITT_STUBV(ITTAPI, void, model_aggregate_task, (size_t x))
 #define __itt_model_disable_push     ITTNOTIFY_VOID(model_disable_push)
 #define __itt_model_disable_push_ptr ITTNOTIFY_NAME(model_disable_push)
 #define __itt_model_disable_pop      ITTNOTIFY_VOID(model_disable_pop)
 #define __itt_model_disable_pop_ptr  ITTNOTIFY_NAME(model_disable_pop)
+#define __itt_model_aggregate_task      ITTNOTIFY_VOID(model_aggregate_task)
+#define __itt_model_aggregate_task_ptr  ITTNOTIFY_NAME(model_aggregate_task)
 #else  /* INTEL_NO_ITTNOTIFY_API */
 #define __itt_model_disable_push(x)
 #define __itt_model_disable_push_ptr 0
 #define __itt_model_disable_pop()
 #define __itt_model_disable_pop_ptr 0
+#define __itt_model_aggregate_task(x)
+#define __itt_model_aggregate_task_ptr 0
 #endif /* INTEL_NO_ITTNOTIFY_API */
 #else  /* INTEL_NO_MACRO_BODY */
 #define __itt_model_disable_push_ptr 0
 #define __itt_model_disable_pop_ptr 0
+#define __itt_model_aggregate_task_ptr 0
 #endif /* INTEL_NO_MACRO_BODY */
 /** @endcond */
 /** @} model group */
@@ -1348,9 +1543,97 @@
 #define __itt_heap_internal_access_end_ptr 0
 #endif /* INTEL_NO_MACRO_BODY */
 /** @endcond */
-/** @} heap group */
+
+/** @brief record memory growth begin */
+void ITTAPI __itt_heap_record_memory_growth_begin(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin,  (void))
+#define __itt_heap_record_memory_growth_begin      ITTNOTIFY_VOID(heap_record_memory_growth_begin)
+#define __itt_heap_record_memory_growth_begin_ptr  ITTNOTIFY_NAME(heap_record_memory_growth_begin)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_record_memory_growth_begin()
+#define __itt_heap_record_memory_growth_begin_ptr  0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_record_memory_growth_begin_ptr  0
+#endif /* INTEL_NO_MACRO_BODY */
 /** @endcond */
 
+/** @brief record memory growth end */
+void ITTAPI __itt_heap_record_memory_growth_end(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end, (void))
+#define __itt_heap_record_memory_growth_end     ITTNOTIFY_VOID(heap_record_memory_growth_end)
+#define __itt_heap_record_memory_growth_end_ptr ITTNOTIFY_NAME(heap_record_memory_growth_end)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_record_memory_growth_end()
+#define __itt_heap_record_memory_growth_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_record_memory_growth_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Specify the type of heap detection/reporting to modify.
+ */
+/**
+ * @hideinitializer
+ * @brief Report on memory leaks.
+ */
+#define __itt_heap_leaks 0x00000001
+
+/**
+ * @hideinitializer
+ * @brief Report on memory growth.
+ */
+#define __itt_heap_growth 0x00000002
+
+
+/** @brief heap reset detection */
+void ITTAPI __itt_heap_reset_detection(unsigned int reset_mask);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_reset_detection,  (unsigned int reset_mask))
+#define __itt_heap_reset_detection      ITTNOTIFY_VOID(heap_reset_detection)
+#define __itt_heap_reset_detection_ptr  ITTNOTIFY_NAME(heap_reset_detection)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_reset_detection()
+#define __itt_heap_reset_detection_ptr  0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_reset_detection_ptr  0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief report */
+void ITTAPI __itt_heap_record(unsigned int record_mask);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_record, (unsigned int record_mask))
+#define __itt_heap_record     ITTNOTIFY_VOID(heap_record)
+#define __itt_heap_record_ptr ITTNOTIFY_NAME(heap_record)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_record()
+#define __itt_heap_record_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_record_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @} heap group */
+/** @endcond */
 /* ========================================================================== */
 
 /**
@@ -1475,8 +1758,8 @@
  * @param[in] extra The extra data to unique identify object; low QWORD of the ID value.
  */
 
-INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) INLINE_ATTRIBUTE;
-INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra)
+ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra)
 {
     __itt_id id = __itt_null;
     id.d1 = (unsigned long long)((uintptr_t)addr);
@@ -1633,6 +1916,40 @@
 /** @endcond */
 /** @} handles group */
 
+/** @cond exclude_from_documentation */
+typedef unsigned long long __itt_timestamp;
+/** @endcond */
+
+static const __itt_timestamp __itt_timestamp_none = (__itt_timestamp)-1LL;
+
+/** @cond exclude_from_gpa_documentation */
+
+/**
+ * @ingroup timestamps
+ * @brief Return timestamp corresponding to current moment.
+ * This returns the timestamp in format that is most relevant for the current
+ * host or platform.  Do not rely that it's RDTSC value.  It is possible
+ * to compare __itt_timestamp values with "<" operator.
+ */
+__itt_timestamp ITTAPI __itt_get_timestamp(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void))
+#define __itt_get_timestamp      ITTNOTIFY_DATA(get_timestamp)
+#define __itt_get_timestamp_ptr  ITTNOTIFY_NAME(get_timestamp)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_get_timestamp()
+#define __itt_get_timestamp_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_get_timestamp_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} timestamps */
+/** @endcond */
+
 /** @cond exclude_from_gpa_documentation */
 
 /**
@@ -1717,24 +2034,46 @@
  */
 void ITTAPI __itt_frame_end_v3(const __itt_domain *domain, __itt_id *id);
 
+/**
+ * @ingroup frames
+ * @brief Submits a frame instance.
+ * Successive calls to __itt_frame_begin or __itt_frame_submit with the
+ * same ID are ignored until a call to __itt_frame_end or __itt_frame_submit
+ * with the same ID.
+ * Passing special __itt_timestamp_none value as "end" argument means
+ * take the current timestamp as the end timestamp.
+ * @param[in] domain The domain for this frame instance
+ * @param[in] id The instance ID for this frame instance or NULL
+ * @param[in] begin Timestamp of the beggining of the frame
+ * @param[in] end Timestamp of the end of the frame
+ */
+void ITTAPI __itt_frame_submit_v3(const __itt_domain *domain, __itt_id *id,
+    __itt_timestamp begin, __itt_timestamp end);
+
 /** @cond exclude_from_documentation */
 #ifndef INTEL_NO_MACRO_BODY
 #ifndef INTEL_NO_ITTNOTIFY_API
 ITT_STUBV(ITTAPI, void, frame_begin_v3, (const __itt_domain *domain, __itt_id *id))
 ITT_STUBV(ITTAPI, void, frame_end_v3,   (const __itt_domain *domain, __itt_id *id))
+ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end))
 #define __itt_frame_begin_v3(d,x)   ITTNOTIFY_VOID_D1(frame_begin_v3,d,x)
 #define __itt_frame_begin_v3_ptr    ITTNOTIFY_NAME(frame_begin_v3)
 #define __itt_frame_end_v3(d,x)     ITTNOTIFY_VOID_D1(frame_end_v3,d,x)
 #define __itt_frame_end_v3_ptr      ITTNOTIFY_NAME(frame_end_v3)
+#define __itt_frame_submit_v3(d,x,b,e) ITTNOTIFY_VOID_D3(frame_submit_v3,d,x,b,e)
+#define __itt_frame_submit_v3_ptr      ITTNOTIFY_NAME(frame_submit_v3)
 #else  /* INTEL_NO_ITTNOTIFY_API */
 #define __itt_frame_begin_v3(domain,id)
 #define __itt_frame_begin_v3_ptr 0
 #define __itt_frame_end_v3(domain,id)
 #define __itt_frame_end_v3_ptr   0
+#define __itt_frame_submit_v3(domain,id,begin,end)
+#define __itt_frame_submit_v3_ptr   0
 #endif /* INTEL_NO_ITTNOTIFY_API */
 #else  /* INTEL_NO_MACRO_BODY */
 #define __itt_frame_begin_v3_ptr 0
 #define __itt_frame_end_v3_ptr   0
+#define __itt_frame_submit_v3_ptr   0
 #endif /* INTEL_NO_MACRO_BODY */
 /** @endcond */
 /** @} frames group */
@@ -2730,8 +3069,125 @@
 #endif /* INTEL_NO_MACRO_BODY */
 /** @endcond */
 /** @} events group */
+
+
+/**
+ * @defgroup arrays Arrays Visualizer
+ * @ingroup public
+ * Visualize arrays
+ * @{
+ */
+
+/**
+ * @enum __itt_av_data_type
+ * @brief Defines types of arrays data (for C/C++ intrinsic types)
+ */
+typedef enum
+{
+    __itt_e_first = 0,
+    __itt_e_char = 0,  /* 1-byte integer */
+    __itt_e_uchar,     /* 1-byte unsigned integer */
+    __itt_e_int16,     /* 2-byte integer */
+    __itt_e_uint16,    /* 2-byte unsigned integer  */
+    __itt_e_int32,     /* 4-byte integer */
+    __itt_e_uint32,    /* 4-byte unsigned integer */
+    __itt_e_int64,     /* 8-byte integer */
+    __itt_e_uint64,    /* 8-byte unsigned integer */
+    __itt_e_float,     /* 4-byte floating */
+    __itt_e_double,    /* 8-byte floating */
+    __itt_e_last = __itt_e_double
+} __itt_av_data_type;
+
+/**
+ * @brief Save an array data to a file.
+ * Output format is defined by the file extension. The csv and bmp formats are supported (bmp - for 2-dimensional array only).
+ * @param[in] data - pointer to the array data
+ * @param[in] rank - the rank of the array
+ * @param[in] dimensions - pointer to an array of integers, which specifies the array dimensions.
+ * The size of dimensions must be equal to the rank
+ * @param[in] type - the type of the array, specified as one of the __itt_av_data_type values (for intrinsic types)
+ * @param[in] filePath - the file path; the output format is defined by the file extension
+ * @param[in] columnOrder - defines how the array is stored in the linear memory.
+ * It should be 1 for column-major order (e.g. in FORTRAN) or 0 - for row-major order (e.g. in C).
+ */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+int ITTAPI __itt_av_saveA(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder);
+int ITTAPI __itt_av_saveW(void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_av_save     __itt_av_saveW
+#  define __itt_av_save_ptr __itt_av_saveW_ptr
+#else /* UNICODE */
+#  define __itt_av_save     __itt_av_saveA
+#  define __itt_av_save_ptr __itt_av_saveA_ptr
+#endif /* UNICODE */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+int ITTAPI __itt_av_save(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder))
+ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, av_save,  (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_av_saveA     ITTNOTIFY_DATA(av_saveA)
+#define __itt_av_saveA_ptr ITTNOTIFY_NAME(av_saveA)
+#define __itt_av_saveW     ITTNOTIFY_DATA(av_saveW)
+#define __itt_av_saveW_ptr ITTNOTIFY_NAME(av_saveW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_av_save     ITTNOTIFY_DATA(av_save)
+#define __itt_av_save_ptr ITTNOTIFY_NAME(av_save)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_av_saveA(name)
+#define __itt_av_saveA_ptr 0
+#define __itt_av_saveW(name)
+#define __itt_av_saveW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_av_save(name)
+#define __itt_av_save_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_av_saveA_ptr 0
+#define __itt_av_saveW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_av_save_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
 /** @endcond */
 
+void ITTAPI __itt_enable_attach(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, enable_attach, (void))
+#define __itt_enable_attach     ITTNOTIFY_VOID(enable_attach)
+#define __itt_enable_attach_ptr ITTNOTIFY_NAME(enable_attach)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_enable_attach()
+#define __itt_enable_attach_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_enable_attach_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @cond exclude_from_gpa_documentation */
+
+/** @} arrays group */
+
+/** @endcond */
+
+
 #ifdef __cplusplus
 }
 #endif /* __cplusplus */
diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h
index bccaa38..40c8614 100644
--- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h
+++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h
@@ -42,12 +42,18 @@
 #  define ITT_PLATFORM_POSIX 2
 #endif /* ITT_PLATFORM_POSIX */
 
+#ifndef ITT_PLATFORM_MAC
+#  define ITT_PLATFORM_MAC 3
+#endif /* ITT_PLATFORM_MAC */
+
 #ifndef ITT_PLATFORM
 #  if ITT_OS==ITT_OS_WIN
 #    define ITT_PLATFORM ITT_PLATFORM_WIN
+#  elif ITT_OS==ITT_OS_MAC
+#    define ITT_PLATFORM ITT_PLATFORM_MAC
 #  else
 #    define ITT_PLATFORM ITT_PLATFORM_POSIX
-#  endif /* _WIN32 */
+#  endif
 #endif /* ITT_PLATFORM */
 
 #if defined(_UNICODE) && !defined(UNICODE)
@@ -68,11 +74,11 @@
 #  if ITT_PLATFORM==ITT_PLATFORM_WIN
 #    define CDECL __cdecl
 #  else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-#    if defined _M_X64 || defined _M_AMD64 || defined __x86_64__
-#      define CDECL /* not actual on x86_64 platform */
-#    else  /* _M_X64 || _M_AMD64 || __x86_64__ */
+#    if defined _M_IX86 || defined __i386__
 #      define CDECL __attribute__ ((cdecl))
-#    endif /* _M_X64 || _M_AMD64 || __x86_64__ */
+#    else  /* _M_IX86 || __i386__ */
+#      define CDECL /* actual only on x86 platform */
+#    endif /* _M_IX86 || __i386__ */
 #  endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 #endif /* CDECL */
 
@@ -80,11 +86,11 @@
 #  if ITT_PLATFORM==ITT_PLATFORM_WIN
 #    define STDCALL __stdcall
 #  else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-#    if defined _M_X64 || defined _M_AMD64 || defined __x86_64__
-#      define STDCALL /* not supported on x86_64 platform */
-#    else  /* _M_X64 || _M_AMD64 || __x86_64__ */
+#    if defined _M_IX86 || defined __i386__
 #      define STDCALL __attribute__ ((stdcall))
-#    endif /* _M_X64 || _M_AMD64 || __x86_64__ */
+#    else  /* _M_IX86 || __i386__ */
+#      define STDCALL /* supported only on x86 platform */
+#    endif /* _M_IX86 || __i386__ */
 #  endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 #endif /* STDCALL */
 
@@ -97,8 +103,8 @@
 
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
 /* use __forceinline (VC++ specific) */
-#define INLINE           __forceinline
-#define INLINE_ATTRIBUTE /* nothing */
+#define ITT_INLINE           __forceinline
+#define ITT_INLINE_ATTRIBUTE /* nothing */
 #else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 /*
  * Generally, functions are not inlined unless optimization is specified.
@@ -106,11 +112,11 @@
  * if no optimization level was specified.
  */
 #ifdef __STRICT_ANSI__
-#define INLINE           static
+#define ITT_INLINE           static
 #else  /* __STRICT_ANSI__ */
-#define INLINE           static inline
+#define ITT_INLINE           static inline
 #endif /* __STRICT_ANSI__ */
-#define INLINE_ATTRIBUTE __attribute__ ((always_inline))
+#define ITT_INLINE_ATTRIBUTE __attribute__ ((always_inline, unused))
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 /** @endcond */
 
@@ -122,17 +128,19 @@
 #  define ITT_ARCH_IA32E 2
 #endif /* ITT_ARCH_IA32E */
 
-#ifndef ITT_ARCH_IA64
-#  define ITT_ARCH_IA64  3
-#endif /* ITT_ARCH_IA64 */
+#ifndef ITT_ARCH_ARM
+#  define ITT_ARCH_ARM  4
+#endif /* ITT_ARCH_ARM */
 
 #ifndef ITT_ARCH
-#  if defined _M_X64 || defined _M_AMD64 || defined __x86_64__
-#    define ITT_ARCH ITT_ARCH_IA32E
-#  elif defined _M_IA64 || defined __ia64
-#    define ITT_ARCH ITT_ARCH_IA64
-#  else
+#  if defined _M_IX86 || defined __i386__
 #    define ITT_ARCH ITT_ARCH_IA32
+#  elif defined _M_X64 || defined _M_AMD64 || defined __x86_64__
+#    define ITT_ARCH ITT_ARCH_IA32E
+#  elif defined _M_IA64 || defined __ia64__
+#    define ITT_ARCH ITT_ARCH_IA64
+#  elif defined _M_ARM || __arm__
+#    define ITT_ARCH ITT_ARCH_ARM
 #  endif
 #endif
 
@@ -145,7 +153,10 @@
 #define ITT_TO_STR_AUX(x) #x
 #define ITT_TO_STR(x)     ITT_TO_STR_AUX(x)
 
-#define __ITT_BUILD_ASSERT(expr, suffix) do { static char __itt_build_check_##suffix[(expr) ? 1 : -1]; __itt_build_check_##suffix[0] = 0; } while(0)
+#define __ITT_BUILD_ASSERT(expr, suffix) do { \
+    static char __itt_build_check_##suffix[(expr) ? 1 : -1]; \
+    __itt_build_check_##suffix[0] = 0; \
+} while(0)
 #define _ITT_BUILD_ASSERT(expr, suffix)  __ITT_BUILD_ASSERT((expr), suffix)
 #define ITT_BUILD_ASSERT(expr)           _ITT_BUILD_ASSERT((expr), __LINE__)
 
@@ -158,7 +169,8 @@
 #define API_VERSION_NUM 0.0.0
 #endif /* API_VERSION_NUM */
 
-#define API_VERSION "ITT-API-Version " ITT_TO_STR(API_VERSION_NUM) " (" ITT_TO_STR(API_VERSION_BUILD) ")"
+#define API_VERSION "ITT-API-Version " ITT_TO_STR(API_VERSION_NUM) \
+                                " (" ITT_TO_STR(API_VERSION_BUILD) ")"
 
 /* OS communication functions */
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
@@ -176,12 +188,16 @@
 #ifndef _GNU_SOURCE
 #define _GNU_SOURCE 1 /* need for PTHREAD_MUTEX_RECURSIVE */
 #endif /* _GNU_SOURCE */
+#ifndef __USE_UNIX98
+#define __USE_UNIX98 1 /* need for PTHREAD_MUTEX_RECURSIVE, on SLES11.1 with gcc 4.3.4 wherein pthread.h missing dependency on __USE_XOPEN2K8 */
+#endif /*__USE_UNIX98*/
 #include <pthread.h>
 typedef void*             lib_t;
 typedef pthread_t         TIDT;
 typedef pthread_mutex_t   mutex_t;
 #define MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
-#define _strong_alias(name, aliasname) extern __typeof (name) aliasname __attribute__ ((alias (#name)));
+#define _strong_alias(name, aliasname) \
+            extern __typeof (name) aliasname __attribute__ ((alias (#name)));
 #define strong_alias(name, aliasname) _strong_alias(name, aliasname)
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 
@@ -200,29 +216,35 @@
 #define __itt_thread_id()         GetCurrentThreadId()
 #define __itt_thread_yield()      SwitchToThread()
 #ifndef ITT_SIMPLE_INIT
-INLINE int __itt_interlocked_increment(volatile long* ptr)
+ITT_INLINE long
+__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE long __itt_interlocked_increment(volatile long* ptr)
 {
     return InterlockedIncrement(ptr);
 }
 #endif /* ITT_SIMPLE_INIT */
 #else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
 #define __itt_get_proc(lib, name) dlsym(lib, name)
-#define __itt_mutex_init(mutex)   \
-    {                                                                                        \
-        pthread_mutexattr_t mutex_attr;                                                      \
-        int error_code = pthread_mutexattr_init(&mutex_attr);                                \
-        if (error_code)                                                                      \
-            __itt_report_error(__itt_error_system, "pthread_mutexattr_init", error_code);    \
-        error_code = pthread_mutexattr_settype(&mutex_attr, PTHREAD_MUTEX_RECURSIVE);        \
-        if (error_code)                                                                      \
-            __itt_report_error(__itt_error_system, "pthread_mutexattr_settype", error_code); \
-        error_code = pthread_mutex_init(mutex, &mutex_attr);                                 \
-        if (error_code)                                                                      \
-            __itt_report_error(__itt_error_system, "pthread_mutex_init", error_code);        \
-        error_code = pthread_mutexattr_destroy(&mutex_attr);                                 \
-        if (error_code)                                                                      \
-            __itt_report_error(__itt_error_system, "pthread_mutexattr_destroy", error_code); \
-    }
+#define __itt_mutex_init(mutex)   {\
+    pthread_mutexattr_t mutex_attr;                                         \
+    int error_code = pthread_mutexattr_init(&mutex_attr);                   \
+    if (error_code)                                                         \
+        __itt_report_error(__itt_error_system, "pthread_mutexattr_init",    \
+                           error_code);                                     \
+    error_code = pthread_mutexattr_settype(&mutex_attr,                     \
+                                           PTHREAD_MUTEX_RECURSIVE);        \
+    if (error_code)                                                         \
+        __itt_report_error(__itt_error_system, "pthread_mutexattr_settype", \
+                           error_code);                                     \
+    error_code = pthread_mutex_init(mutex, &mutex_attr);                    \
+    if (error_code)                                                         \
+        __itt_report_error(__itt_error_system, "pthread_mutex_init",        \
+                           error_code);                                     \
+    error_code = pthread_mutexattr_destroy(&mutex_attr);                    \
+    if (error_code)                                                         \
+        __itt_report_error(__itt_error_system, "pthread_mutexattr_destroy", \
+                           error_code);                                     \
+}
 #define __itt_mutex_lock(mutex)   pthread_mutex_lock(mutex)
 #define __itt_mutex_unlock(mutex) pthread_mutex_unlock(mutex)
 #define __itt_load_lib(name)      dlopen(name, RTLD_LAZY)
@@ -238,23 +260,29 @@
 #ifdef __INTEL_COMPILER
 #define __TBB_machine_fetchadd4(addr, val) __fetchadd4_acq((void *)addr, val)
 #else  /* __INTEL_COMPILER */
-/* TODO: Add Support for not Intel compilers for IA64 */
+/* TODO: Add Support for not Intel compilers for IA-64 architecture */
 #endif /* __INTEL_COMPILER */
-#else /* ITT_ARCH!=ITT_ARCH_IA64 */
-INLINE int __TBB_machine_fetchadd4(volatile void* ptr, long addend)
+#elif ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_IA32E /* ITT_ARCH!=ITT_ARCH_IA64 */
+ITT_INLINE long
+__TBB_machine_fetchadd4(volatile void* ptr, long addend) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend)
 {
-    int result;
-    __asm__ __volatile__("lock\nxaddl %0,%1"
-                          : "=r"(result),"=m"(*(long*)ptr)
-                          : "0"(addend), "m"(*(long*)ptr)
+    long result;
+    __asm__ __volatile__("lock\nxadd %0,%1"
+                          : "=r"(result),"=m"(*(int*)ptr)
+                          : "0"(addend), "m"(*(int*)ptr)
                           : "memory");
     return result;
 }
+#elif ITT_ARCH==ITT_ARCH_ARM
+#define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr, val)
 #endif /* ITT_ARCH==ITT_ARCH_IA64 */
 #ifndef ITT_SIMPLE_INIT
-INLINE int __itt_interlocked_increment(volatile long* ptr)
+ITT_INLINE long
+__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE long __itt_interlocked_increment(volatile long* ptr)
 {
-    return __TBB_machine_fetchadd4(ptr, 1) + 1;
+    return __TBB_machine_fetchadd4(ptr, 1) + 1L;
 }
 #endif /* ITT_SIMPLE_INIT */
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.c b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.c
index 5257d0d..4b5f464 100644
--- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.c
+++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.c
@@ -29,7 +29,7 @@
 
 #include "disable_warnings.h"
 
-static const char api_version[] = API_VERSION "\0\n@(#) 201495 2011-12-01 14:14:56Z\n";
+static const char api_version[] = API_VERSION "\0\n@(#) $Revision: 42754 $\n";
 
 #define _N_(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n)
 
@@ -43,6 +43,12 @@
 #error Unsupported or unknown OS.
 #endif
 
+#ifdef __ANDROID__
+/* default location of userapi collector on Android */
+#define ANDROID_ITTNOTIFY_DEFAULT_PATH  "/data/data/com.intel.vtune/intel/libittnotify.so"
+#endif
+
+
 #ifndef LIB_VAR_NAME
 #if ITT_ARCH==ITT_ARCH_IA32
 #define LIB_VAR_NAME INTEL_LIBITTNOTIFY32
@@ -146,7 +152,7 @@
 
 static __itt_group_alias group_alias[] = {
     { "KMP_FOR_TPROFILE", (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync  | __itt_group_mark) },
-    { "KMP_FOR_TCHECK",   (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync  | __itt_group_fsync | __itt_group_mark) },
+    { "KMP_FOR_TCHECK",   (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync  | __itt_group_fsync | __itt_group_mark | __itt_group_suppress) },
     { NULL,               (__itt_group_none) },
     { api_version,        (__itt_group_none) } /* !!! Just to avoid unused code elimination !!! */
 };
@@ -162,7 +168,7 @@
 /* Define functions with static implementation */
 #undef ITT_STUB
 #undef ITT_STUBV
-#define ITT_STUB(api,type,name,args,params,nameindll,group,format) {ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (void*)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (__itt_group_id)(group)},
+#define ITT_STUB(api,type,name,args,params,nameindll,group,format) { ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (__itt_group_id)(group)},
 #define ITT_STUBV ITT_STUB
 #define __ITT_INTERNAL_INIT
 #include "ittnotify_static.h"
@@ -170,7 +176,7 @@
 /* Define functions without static implementation */
 #undef ITT_STUB
 #undef ITT_STUBV
-#define ITT_STUB(api,type,name,args,params,nameindll,group,format) {ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), NULL, (__itt_group_id)(group)},
+#define ITT_STUB(api,type,name,args,params,nameindll,group,format) {ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), NULL, (__itt_group_id)(group)},
 #define ITT_STUBV ITT_STUB
 #include "ittnotify_static.h"
     {NULL, NULL, NULL, NULL, __itt_group_none}
@@ -225,7 +231,7 @@
 static const char dll_path[PATH_MAX] = { 0 };
 
 /* static part descriptor which handles. all notification api attributes. */
-static __itt_global __itt_ittapi_global = {
+__itt_global _N_(_ittapi_global) = {
     ITT_MAGIC,                                     /* identification info */
     ITT_MAJOR, ITT_MINOR, API_VERSION_BUILD,       /* version info */
     0,                                             /* api_initialized */
@@ -261,9 +267,9 @@
 {
     va_list args;
     va_start(args, code);
-    if (__itt_ittapi_global.error_handler != NULL)
+    if (_N_(_ittapi_global).error_handler != NULL)
     {
-        __itt_error_handler_t* handler = (__itt_error_handler_t*)__itt_ittapi_global.error_handler;
+        __itt_error_handler_t* handler = (__itt_error_handler_t*)(size_t)_N_(_ittapi_global).error_handler;
         handler(code, args);
     }
 #ifdef ITT_NOTIFY_EXT_REPORT
@@ -281,7 +287,7 @@
 {
     __itt_domain *h_tail, *h;
 
-    if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0)
+    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
     {
         __itt_init_ittlib_name(NULL, __itt_group_all);
         if (ITTNOTIFY_NAME(domain_createW) && ITTNOTIFY_NAME(domain_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW),_init)))
@@ -289,16 +295,16 @@
     }
 
     if (name == NULL)
-        return __itt_ittapi_global.domain_list;
+        return _N_(_ittapi_global).domain_list;
 
-    ITT_MUTEX_INIT_AND_LOCK(__itt_ittapi_global);
-    for (h_tail = NULL, h = __itt_ittapi_global.domain_list; h != NULL; h_tail = h, h = h->next)
+    ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
+    for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next)
         if (h->nameW != NULL && !wcscmp(h->nameW, name))
             break;
     if (h == NULL) {
-        NEW_DOMAIN_W(&__itt_ittapi_global,h,h_tail,name);
+        NEW_DOMAIN_W(&_N_(_ittapi_global),h,h_tail,name);
     }
-    __itt_mutex_unlock(&__itt_ittapi_global.mutex);
+    __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
     return h;
 }
 
@@ -309,7 +315,7 @@
 {
     __itt_domain *h_tail, *h;
 
-    if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0)
+    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
     {
         __itt_init_ittlib_name(NULL, __itt_group_all);
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
@@ -322,16 +328,16 @@
     }
 
     if (name == NULL)
-        return __itt_ittapi_global.domain_list;
+        return _N_(_ittapi_global).domain_list;
 
-    ITT_MUTEX_INIT_AND_LOCK(__itt_ittapi_global);
-    for (h_tail = NULL, h = __itt_ittapi_global.domain_list; h != NULL; h_tail = h, h = h->next)
+    ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
+    for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next)
         if (h->nameA != NULL && !__itt_fstrcmp(h->nameA, name))
             break;
     if (h == NULL) {
-        NEW_DOMAIN_A(&__itt_ittapi_global,h,h_tail,name);
+        NEW_DOMAIN_A(&_N_(_ittapi_global),h,h_tail,name);
     }
-    __itt_mutex_unlock(&__itt_ittapi_global.mutex);
+    __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
     return h;
 }
 
@@ -340,7 +346,7 @@
 {
     __itt_string_handle *h_tail, *h;
 
-    if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0)
+    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
     {
         __itt_init_ittlib_name(NULL, __itt_group_all);
         if (ITTNOTIFY_NAME(string_handle_createW) && ITTNOTIFY_NAME(string_handle_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createW),_init)))
@@ -348,16 +354,16 @@
     }
 
     if (name == NULL)
-        return __itt_ittapi_global.string_list;
+        return _N_(_ittapi_global).string_list;
 
-    ITT_MUTEX_INIT_AND_LOCK(__itt_ittapi_global);
-    for (h_tail = NULL, h = __itt_ittapi_global.string_list; h != NULL; h_tail = h, h = h->next)
+    ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
+    for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next)
         if (h->strW != NULL && !wcscmp(h->strW, name))
             break;
     if (h == NULL) {
-        NEW_STRING_HANDLE_W(&__itt_ittapi_global,h,h_tail,name);
+        NEW_STRING_HANDLE_W(&_N_(_ittapi_global),h,h_tail,name);
     }
-    __itt_mutex_unlock(&__itt_ittapi_global.mutex);
+    __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
     return h;
 }
 
@@ -368,7 +374,7 @@
 {
     __itt_string_handle *h_tail, *h;
 
-    if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0)
+    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
     {
         __itt_init_ittlib_name(NULL, __itt_group_all);
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
@@ -381,16 +387,16 @@
     }
 
     if (name == NULL)
-        return __itt_ittapi_global.string_list;
+        return _N_(_ittapi_global).string_list;
 
-    ITT_MUTEX_INIT_AND_LOCK(__itt_ittapi_global);
-    for (h_tail = NULL, h = __itt_ittapi_global.string_list; h != NULL; h_tail = h, h = h->next)
+    ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
+    for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next)
         if (h->strA != NULL && !__itt_fstrcmp(h->strA, name))
             break;
     if (h == NULL) {
-        NEW_STRING_HANDLE_A(&__itt_ittapi_global,h,h_tail,name);
+        NEW_STRING_HANDLE_A(&_N_(_ittapi_global),h,h_tail,name);
     }
-    __itt_mutex_unlock(&__itt_ittapi_global.mutex);
+    __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
     return h;
 }
 
@@ -398,7 +404,7 @@
 
 static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init))(void)
 {
-    if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0)
+    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
     {
         __itt_init_ittlib_name(NULL, __itt_group_all);
         if (ITTNOTIFY_NAME(pause) && ITTNOTIFY_NAME(pause) != ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init)))
@@ -407,12 +413,12 @@
             return;
         }
     }
-    __itt_ittapi_global.state = __itt_collection_paused;
+    _N_(_ittapi_global).state = __itt_collection_paused;
 }
 
 static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init))(void)
 {
-    if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0)
+    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
     {
         __itt_init_ittlib_name(NULL, __itt_group_all);
         if (ITTNOTIFY_NAME(resume) && ITTNOTIFY_NAME(resume) != ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init)))
@@ -421,7 +427,7 @@
             return;
         }
     }
-    __itt_ittapi_global.state = __itt_collection_normal;
+    _N_(_ittapi_global).state = __itt_collection_normal;
 }
 
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
@@ -430,7 +436,7 @@
     TIDT tid = __itt_thread_id();
     __itt_thread_info *h_tail, *h;
 
-    if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0)
+    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
     {
         __itt_init_ittlib_name(NULL, __itt_group_all);
         if (ITTNOTIFY_NAME(thread_set_nameW) && ITTNOTIFY_NAME(thread_set_nameW) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init)))
@@ -440,18 +446,18 @@
         }
     }
 
-    __itt_mutex_lock(&__itt_ittapi_global.mutex);
-    for (h_tail = NULL, h = __itt_ittapi_global.thread_list; h != NULL; h_tail = h, h = h->next)
+    __itt_mutex_lock(&_N_(_ittapi_global).mutex);
+    for (h_tail = NULL, h = _N_(_ittapi_global).thread_list; h != NULL; h_tail = h, h = h->next)
         if (h->tid == tid)
             break;
     if (h == NULL) {
-        NEW_THREAD_INFO_W(&__itt_ittapi_global, h, h_tail, tid, __itt_thread_normal, name);
+        NEW_THREAD_INFO_W(&_N_(_ittapi_global), h, h_tail, tid, __itt_thread_normal, name);
     }
     else
     {
         h->nameW = name ? _wcsdup(name) : NULL;
     }
-    __itt_mutex_unlock(&__itt_ittapi_global.mutex);
+    __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
 }
 
 static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_setW),_init))(const wchar_t* name, int namelen)
@@ -469,7 +475,7 @@
     TIDT tid = __itt_thread_id();
     __itt_thread_info *h_tail, *h;
 
-    if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0)
+    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
     {
         __itt_init_ittlib_name(NULL, __itt_group_all);
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
@@ -487,18 +493,18 @@
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
     }
 
-    __itt_mutex_lock(&__itt_ittapi_global.mutex);
-    for (h_tail = NULL, h = __itt_ittapi_global.thread_list; h != NULL; h_tail = h, h = h->next)
+    __itt_mutex_lock(&_N_(_ittapi_global).mutex);
+    for (h_tail = NULL, h = _N_(_ittapi_global).thread_list; h != NULL; h_tail = h, h = h->next)
         if (h->tid == tid)
             break;
     if (h == NULL) {
-        NEW_THREAD_INFO_A(&__itt_ittapi_global, h, h_tail, tid, __itt_thread_normal, name);
+        NEW_THREAD_INFO_A(&_N_(_ittapi_global), h, h_tail, tid, __itt_thread_normal, name);
     }
     else
     {
         h->nameA = name ? __itt_fstrdup(name) : NULL;
     }
-    __itt_mutex_unlock(&__itt_ittapi_global.mutex);
+    __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
 }
 
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
@@ -522,7 +528,7 @@
     TIDT tid = __itt_thread_id();
     __itt_thread_info *h_tail, *h;
 
-    if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0)
+    if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
     {
         __itt_init_ittlib_name(NULL, __itt_group_all);
         if (ITTNOTIFY_NAME(thread_ignore) && ITTNOTIFY_NAME(thread_ignore) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init)))
@@ -532,19 +538,19 @@
         }
     }
 
-    __itt_mutex_lock(&__itt_ittapi_global.mutex);
-    for (h_tail = NULL, h = __itt_ittapi_global.thread_list; h != NULL; h_tail = h, h = h->next)
+    __itt_mutex_lock(&_N_(_ittapi_global).mutex);
+    for (h_tail = NULL, h = _N_(_ittapi_global).thread_list; h != NULL; h_tail = h, h = h->next)
         if (h->tid == tid)
             break;
     if (h == NULL) {
         static const char* name = "unknown";
-        NEW_THREAD_INFO_A(&__itt_ittapi_global, h, h_tail, tid, __itt_thread_ignored, name);
+        NEW_THREAD_INFO_A(&_N_(_ittapi_global), h, h_tail, tid, __itt_thread_ignored, name);
     }
     else
     {
         h->state = __itt_thread_ignored;
     }
-    __itt_mutex_unlock(&__itt_ittapi_global.mutex);
+    __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
 }
 
 static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_ignore),_init))(void)
@@ -552,6 +558,17 @@
     ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))();
 }
 
+static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(enable_attach),_init))(void)
+{
+#ifdef __ANDROID__
+    /*
+     * if LIB_VAR_NAME env variable were set before then stay previous value
+     * else set default path
+    */
+    setenv(ITT_TO_STR(LIB_VAR_NAME), ANDROID_ITTNOTIFY_DEFAULT_PATH, 0);
+#endif
+}
+
 /* -------------------------------------------------------------------------- */
 
 static const char* __itt_fsplit(const char* s, const char* sep, const char** out, int* len)
@@ -666,80 +683,10 @@
     return NULL;
 }
 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN
-
-#include <Winreg.h>
-
-typedef LONG (APIENTRY* RegCloseKeyProcType)(HKEY);
-typedef LONG (APIENTRY* RegOpenKeyExAProcType)(HKEY, LPCTSTR, DWORD, REGSAM, PHKEY);
-typedef LONG (APIENTRY* RegGetValueAProcType)(HKEY, LPCTSTR, LPCTSTR, DWORD, LPDWORD, PVOID, LPDWORD);
-
-/* This function return value of registry key that placed into static buffer.
- * This was done to aviod dynamic memory allocation.
- */
-static const char* __itt_get_lib_name_registry(void)
-{
-#define MAX_REG_VALUE_SIZE 4086
-    static char reg_buff[MAX_REG_VALUE_SIZE];
-    DWORD size;
-    LONG  res;
-    HKEY  hKey;
-    RegCloseKeyProcType   pRegCloseKey;
-    RegOpenKeyExAProcType pRegOpenKeyExA;
-    RegGetValueAProcType  pRegGetValueA;
-    HMODULE h_advapi32 = LoadLibraryA("advapi32.dll");
-    DWORD autodetect = 0;
-
-    if (h_advapi32 == NULL)
-    {
-        return NULL;
-    }
-
-    pRegCloseKey   =   (RegCloseKeyProcType)GetProcAddress(h_advapi32, "RegCloseKey");
-    pRegOpenKeyExA = (RegOpenKeyExAProcType)GetProcAddress(h_advapi32, "RegOpenKeyExA");
-    pRegGetValueA  =  (RegGetValueAProcType)GetProcAddress(h_advapi32, "RegGetValueA");
-
-    if (pRegCloseKey   == NULL ||
-        pRegOpenKeyExA == NULL ||
-        pRegGetValueA  == NULL)
-    {
-        FreeLibrary(h_advapi32);
-        return NULL;
-    }
-
-    res = pRegOpenKeyExA(HKEY_CURRENT_USER, (LPCTSTR)"Software\\Intel Corporation\\ITT Environment\\Collector", 0, KEY_READ, &hKey);
-    if (res != ERROR_SUCCESS || hKey == 0)
-    {
-        FreeLibrary(h_advapi32);
-        return NULL;
-    }
-
-    size = sizeof(DWORD);
-    res = pRegGetValueA(hKey, (LPCTSTR)"AutoDetect", NULL, RRF_RT_REG_DWORD, NULL, (BYTE*)&autodetect, &size);
-    if (res != ERROR_SUCCESS || size == 0 || autodetect == 0)
-    {
-        pRegCloseKey(hKey);
-        FreeLibrary(h_advapi32);
-        return NULL;
-    }
-
-    size = MAX_REG_VALUE_SIZE-1;
-    res = pRegGetValueA(hKey, (LPCTSTR)ITT_TO_STR(LIB_VAR_NAME), NULL, REG_SZ, NULL, (BYTE*)&reg_buff, &size);
-    pRegCloseKey(hKey);
-    FreeLibrary(h_advapi32);
-
-    return (res == ERROR_SUCCESS && size > 0) ? reg_buff : NULL;
-}
-
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-
 static const char* __itt_get_lib_name(void)
 {
     const char* lib_name = __itt_get_env_var(ITT_TO_STR(LIB_VAR_NAME));
-#if ITT_PLATFORM==ITT_PLATFORM_WIN
-    if (lib_name == NULL)
-        lib_name = __itt_get_lib_name_registry();
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
     return lib_name;
 }
 
@@ -761,9 +708,8 @@
         const char* chunk;
         while ((group_str = __itt_fsplit(group_str, ",; ", &chunk, &len)) != NULL)
         {
-            __itt_fstrcpyn(gr, chunk, sizeof(gr));
-
-            gr[min((unsigned int)len, sizeof(gr) - 1)] = 0;
+            __itt_fstrcpyn(gr, chunk, sizeof(gr) - 1);
+            gr[min(len, (int)(sizeof(gr) - 1))] = 0;
 
             for (i = 0; group_list[i].name != NULL; i++)
             {
@@ -810,8 +756,8 @@
 {
     register int i;
     // Fill all pointers with initial stubs
-    for (i = 0; __itt_ittapi_global.api_list_ptr[i].name != NULL; i++)
-        *__itt_ittapi_global.api_list_ptr[i].func_ptr = __itt_ittapi_global.api_list_ptr[i].init_func;
+    for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++)
+        *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].init_func;
 }
 */
 
@@ -819,8 +765,8 @@
 {
     register int i;
     /* Nulify all pointers except domain_create and string_handle_create */
-    for (i = 0; __itt_ittapi_global.api_list_ptr[i].name != NULL; i++)
-        *__itt_ittapi_global.api_list_ptr[i].func_ptr = __itt_ittapi_global.api_list_ptr[i].null_func;
+    for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++)
+        *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func;
 }
 
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
@@ -834,30 +780,30 @@
     __itt_api_fini_t* __itt_api_fini_ptr;
     static volatile TIDT current_thread = 0;
 
-    if (__itt_ittapi_global.api_initialized)
+    if (_N_(_ittapi_global).api_initialized)
     {
-        __itt_mutex_lock(&__itt_ittapi_global.mutex);
-        if (__itt_ittapi_global.api_initialized)
+        __itt_mutex_lock(&_N_(_ittapi_global).mutex);
+        if (_N_(_ittapi_global).api_initialized)
         {
             if (current_thread == 0)
             {
                 current_thread = __itt_thread_id();
-                __itt_api_fini_ptr = (__itt_api_fini_t*)__itt_get_proc(__itt_ittapi_global.lib, "__itt_api_fini");
+                __itt_api_fini_ptr = (__itt_api_fini_t*)(size_t)__itt_get_proc(_N_(_ittapi_global).lib, "__itt_api_fini");
                 if (__itt_api_fini_ptr)
-                    __itt_api_fini_ptr(&__itt_ittapi_global);
+                    __itt_api_fini_ptr(&_N_(_ittapi_global));
 
                 __itt_nullify_all_pointers();
 
  /* TODO: !!! not safe !!! don't support unload so far.
-  *             if (__itt_ittapi_global.lib != NULL)
-  *                 __itt_unload_lib(__itt_ittapi_global.lib);
-  *             __itt_ittapi_global.lib = NULL;
+  *             if (_N_(_ittapi_global).lib != NULL)
+  *                 __itt_unload_lib(_N_(_ittapi_global).lib);
+  *             _N_(_ittapi_global).lib = NULL;
   */
-                __itt_ittapi_global.api_initialized = 0;
+                _N_(_ittapi_global).api_initialized = 0;
                 current_thread = 0;
             }
         }
-        __itt_mutex_unlock(&__itt_ittapi_global.mutex);
+        __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
     }
 }
 
@@ -870,51 +816,52 @@
 #endif /* ITT_COMPLETE_GROUP */
     static volatile TIDT current_thread = 0;
 
-    if (!__itt_ittapi_global.api_initialized)
+    if (!_N_(_ittapi_global).api_initialized)
     {
 #ifndef ITT_SIMPLE_INIT
-        ITT_MUTEX_INIT_AND_LOCK(__itt_ittapi_global);
+        ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
 #endif /* ITT_SIMPLE_INIT */
 
-        if (!__itt_ittapi_global.api_initialized)
+        if (!_N_(_ittapi_global).api_initialized)
         {
             if (current_thread == 0)
             {
                 current_thread = __itt_thread_id();
-                __itt_ittapi_global.thread_list->tid = current_thread;
+                _N_(_ittapi_global).thread_list->tid = current_thread;
                 if (lib_name == NULL)
                     lib_name = __itt_get_lib_name();
                 groups = __itt_get_groups();
                 if (groups != __itt_group_none || lib_name != NULL)
                 {
-                    __itt_ittapi_global.lib = __itt_load_lib((lib_name == NULL) ? ittnotify_lib_name : lib_name);
-                    if (__itt_ittapi_global.lib != NULL)
+                    _N_(_ittapi_global).lib = __itt_load_lib((lib_name == NULL) ? ittnotify_lib_name : lib_name);
+
+                    if (_N_(_ittapi_global).lib != NULL)
                     {
                         __itt_api_init_t* __itt_api_init_ptr;
-                        int lib_version = __itt_lib_version(__itt_ittapi_global.lib);
+                        int lib_version = __itt_lib_version(_N_(_ittapi_global).lib);
 
                         switch (lib_version) {
                         case 0:
                             groups = __itt_group_legacy;
                         case 1:
                             /* Fill all pointers from dynamic library */
-                            for (i = 0; __itt_ittapi_global.api_list_ptr[i].name != NULL; i++)
+                            for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++)
                             {
-                                if (__itt_ittapi_global.api_list_ptr[i].group & groups & init_groups)
+                                if (_N_(_ittapi_global).api_list_ptr[i].group & groups & init_groups)
                                 {
-                                    *__itt_ittapi_global.api_list_ptr[i].func_ptr = (void*)__itt_get_proc(__itt_ittapi_global.lib, __itt_ittapi_global.api_list_ptr[i].name);
-                                    if (*__itt_ittapi_global.api_list_ptr[i].func_ptr == NULL)
+                                    *_N_(_ittapi_global).api_list_ptr[i].func_ptr = (void*)__itt_get_proc(_N_(_ittapi_global).lib, _N_(_ittapi_global).api_list_ptr[i].name);
+                                    if (*_N_(_ittapi_global).api_list_ptr[i].func_ptr == NULL)
                                     {
                                         /* Restore pointers for function with static implementation */
-                                        *__itt_ittapi_global.api_list_ptr[i].func_ptr = __itt_ittapi_global.api_list_ptr[i].null_func;
-                                        __itt_report_error(__itt_error_no_symbol, lib_name, __itt_ittapi_global.api_list_ptr[i].name);
+                                        *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func;
+                                        __itt_report_error(__itt_error_no_symbol, lib_name, _N_(_ittapi_global).api_list_ptr[i].name);
 #ifdef ITT_COMPLETE_GROUP
-                                        zero_group = (__itt_group_id)(zero_group | __itt_ittapi_global.api_list_ptr[i].group);
+                                        zero_group = (__itt_group_id)(zero_group | _N_(_ittapi_global).api_list_ptr[i].group);
 #endif /* ITT_COMPLETE_GROUP */
                                     }
                                 }
                                 else
-                                    *__itt_ittapi_global.api_list_ptr[i].func_ptr = __itt_ittapi_global.api_list_ptr[i].null_func;
+                                    *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func;
                             }
 
                             if (groups == __itt_group_legacy)
@@ -934,15 +881,15 @@
                             }
 
 #ifdef ITT_COMPLETE_GROUP
-                            for (i = 0; __itt_ittapi_global.api_list_ptr[i].name != NULL; i++)
-                                if (__itt_ittapi_global.api_list_ptr[i].group & zero_group)
-                                    *__itt_ittapi_global.api_list_ptr[i].func_ptr = __itt_ittapi_global.api_list_ptr[i].null_func;
+                            for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++)
+                                if (_N_(_ittapi_global).api_list_ptr[i].group & zero_group)
+                                    *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func;
 #endif /* ITT_COMPLETE_GROUP */
                             break;
                         case 2:
-                            __itt_api_init_ptr = (__itt_api_init_t*)__itt_get_proc(__itt_ittapi_global.lib, "__itt_api_init");
+                            __itt_api_init_ptr = (__itt_api_init_t*)(size_t)__itt_get_proc(_N_(_ittapi_global).lib, "__itt_api_init");
                             if (__itt_api_init_ptr)
-                                __itt_api_init_ptr(&__itt_ittapi_global, init_groups);
+                                __itt_api_init_ptr(&_N_(_ittapi_global), init_groups);
                             break;
                         }
                     }
@@ -963,7 +910,7 @@
                 {
                     __itt_nullify_all_pointers();
                 }
-                __itt_ittapi_global.api_initialized = 1;
+                _N_(_ittapi_global).api_initialized = 1;
                 current_thread = 0;
                 /* !!! Just to avoid unused code elimination !!! */
                 if (__itt_fini_ittlib_ptr == _N_(fini_ittlib)) current_thread = 0;
@@ -971,25 +918,26 @@
         }
 
 #ifndef ITT_SIMPLE_INIT
-        __itt_mutex_unlock(&__itt_ittapi_global.mutex);
+        __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
 #endif /* ITT_SIMPLE_INIT */
     }
 
     /* Evaluating if any function ptr is non empty and it's in init_groups */
-    for (i = 0; __itt_ittapi_global.api_list_ptr[i].name != NULL; i++)
-        if (*__itt_ittapi_global.api_list_ptr[i].func_ptr != __itt_ittapi_global.api_list_ptr[i].null_func &&
-            __itt_ittapi_global.api_list_ptr[i].group & init_groups)
+    for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++)
+        if (*_N_(_ittapi_global).api_list_ptr[i].func_ptr != _N_(_ittapi_global).api_list_ptr[i].null_func &&
+            _N_(_ittapi_global).api_list_ptr[i].group & init_groups)
             return 1;
     return 0;
 }
 
 ITT_EXTERN_C __itt_error_handler_t* _N_(set_error_handler)(__itt_error_handler_t* handler)
 {
-    __itt_error_handler_t* prev = (__itt_error_handler_t*)__itt_ittapi_global.error_handler;
-    __itt_ittapi_global.error_handler = (void*)handler;
+    __itt_error_handler_t* prev = (__itt_error_handler_t*)(size_t)_N_(_ittapi_global).error_handler;
+    _N_(_ittapi_global).error_handler = (void*)(size_t)handler;
     return prev;
 }
 
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
 #pragma warning(pop)
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.h b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.h
index 1e9eb43..fe1fe3c 100644
--- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.h
+++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.h
@@ -60,6 +60,8 @@
 ITT_STUBV(LIBITTAPI, void, thr_ignore,   (void),                             (ITT_NO_PARAMS),            thr_ignore,    __itt_group_thread | __itt_group_legacy, "no args")
 #endif /* __ITT_INTERNAL_BODY */
 
+ITT_STUBV(ITTAPI, void, enable_attach, (void), (ITT_NO_PARAMS), enable_attach, __itt_group_all, "no args")
+
 #else  /* __ITT_INTERNAL_INIT */
 
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
@@ -78,6 +80,11 @@
 ITT_STUBV(ITTAPI, void, sync_acquired,   (void *addr), (ITT_FORMAT addr), sync_acquired,  __itt_group_sync,  "%p")
 ITT_STUBV(ITTAPI, void, sync_releasing,  (void* addr), (ITT_FORMAT addr), sync_releasing, __itt_group_sync,  "%p")
 
+ITT_STUBV(ITTAPI, void, suppress_push,       (unsigned int mask),                             (ITT_FORMAT mask), suppress_push,  __itt_group_suppress,  "%p")
+ITT_STUBV(ITTAPI, void, suppress_pop,        (void),                                          (ITT_NO_PARAMS),   suppress_pop,   __itt_group_suppress,  "no args")
+ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size),(ITT_FORMAT mode, mask, address, size), suppress_mark_range, __itt_group_suppress, "%d, %p, %p, %d")
+ITT_STUBV(ITTAPI, void, suppress_clear_range,(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size),(ITT_FORMAT mode, mask, address, size), suppress_clear_range,__itt_group_suppress, "%d, %p, %p, %d")
+
 ITT_STUBV(ITTAPI, void, fsync_prepare,   (void* addr), (ITT_FORMAT addr), sync_prepare,   __itt_group_fsync, "%p")
 ITT_STUBV(ITTAPI, void, fsync_cancel,    (void *addr), (ITT_FORMAT addr), sync_cancel,    __itt_group_fsync, "%p")
 ITT_STUBV(ITTAPI, void, fsync_acquired,  (void *addr), (ITT_FORMAT addr), sync_acquired,  __itt_group_fsync, "%p")
@@ -95,16 +102,26 @@
 ITT_STUBV(ITTAPI, void, model_reduction_uses,      (void* addr, size_t size), (ITT_FORMAT addr, size), model_reduction_uses,      __itt_group_model, "%p, %d")
 ITT_STUBV(ITTAPI, void, model_observe_uses,        (void* addr, size_t size), (ITT_FORMAT addr, size), model_observe_uses,        __itt_group_model, "%p, %d")
 ITT_STUBV(ITTAPI, void, model_clear_uses,          (void* addr),              (ITT_FORMAT addr),       model_clear_uses,          __itt_group_model, "%p")
-ITT_STUBV(ITTAPI, void, model_disable_push,        (__itt_model_disable x),   (ITT_FORMAT x),          model_disable_push,        __itt_group_model, "%p")
-ITT_STUBV(ITTAPI, void, model_disable_pop,         (void),                    (ITT_NO_PARAMS),         model_disable_pop,         __itt_group_model, "no args")
 
 #ifndef __ITT_INTERNAL_BODY
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
 ITT_STUBV(ITTAPI, void, model_site_beginW,         (const wchar_t *name), (ITT_FORMAT name), model_site_beginW, __itt_group_model, "\"%s\"")
 ITT_STUBV(ITTAPI, void, model_task_beginW,         (const wchar_t *name), (ITT_FORMAT name), model_task_beginW, __itt_group_model, "\"%s\"")
+ITT_STUBV(ITTAPI, void, model_iteration_taskW,     (const wchar_t *name),     (ITT_FORMAT name),       model_iteration_taskW,     __itt_group_model, "\"%s\"")
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, model_site_beginA,         (const char *name),        (ITT_FORMAT name),       model_site_beginA,         __itt_group_model, "\"%s\"")
 ITT_STUBV(ITTAPI, void, model_site_beginAL,        (const char *name, size_t len), (ITT_FORMAT name, len), model_site_beginAL, __itt_group_model, "\"%s\", %d")
+ITT_STUBV(ITTAPI, void, model_task_beginA,         (const char *name),        (ITT_FORMAT name),       model_task_beginA,         __itt_group_model, "\"%s\"")
 ITT_STUBV(ITTAPI, void, model_task_beginAL,        (const char *name, size_t len), (ITT_FORMAT name, len), model_task_beginAL, __itt_group_model, "\"%s\", %d")
+ITT_STUBV(ITTAPI, void, model_iteration_taskA,     (const char *name),        (ITT_FORMAT name),       model_iteration_taskA,     __itt_group_model, "\"%s\"")
+ITT_STUBV(ITTAPI, void, model_iteration_taskAL,    (const char *name, size_t len), (ITT_FORMAT name, len), model_iteration_taskAL, __itt_group_model, "\"%s\", %d")
+ITT_STUBV(ITTAPI, void, model_site_end_2,          (void),                    (ITT_NO_PARAMS),         model_site_end_2,          __itt_group_model, "no args")
+ITT_STUBV(ITTAPI, void, model_task_end_2,          (void),                    (ITT_NO_PARAMS),         model_task_end_2,          __itt_group_model, "no args")
+ITT_STUBV(ITTAPI, void, model_lock_acquire_2,      (void *lock),              (ITT_FORMAT lock),       model_lock_acquire_2,      __itt_group_model, "%p")
+ITT_STUBV(ITTAPI, void, model_lock_release_2,      (void *lock),              (ITT_FORMAT lock),       model_lock_release_2,      __itt_group_model, "%p")
+ITT_STUBV(ITTAPI, void, model_aggregate_task,      (size_t count),            (ITT_FORMAT count),      model_aggregate_task,      __itt_group_model, "%d")
+ITT_STUBV(ITTAPI, void, model_disable_push,        (__itt_model_disable x),   (ITT_FORMAT x),          model_disable_push,        __itt_group_model, "%p")
+ITT_STUBV(ITTAPI, void, model_disable_pop,         (void),                    (ITT_NO_PARAMS),         model_disable_pop,         __itt_group_model, "no args")
 #endif /* __ITT_INTERNAL_BODY */
 
 #ifndef __ITT_INTERNAL_BODY
@@ -123,16 +140,23 @@
 ITT_STUBV(ITTAPI, void, heap_reallocate_end,   (__itt_heap_function h, void*  addr, void** new_addr, size_t new_size, int initialized), (ITT_FORMAT h, addr, new_addr, new_size, initialized), heap_reallocate_end,   __itt_group_heap, "%p, %p, %p, %lu, %d")
 ITT_STUBV(ITTAPI, void, heap_internal_access_begin, (void), (ITT_NO_PARAMS), heap_internal_access_begin, __itt_group_heap, "no args")
 ITT_STUBV(ITTAPI, void, heap_internal_access_end,   (void), (ITT_NO_PARAMS), heap_internal_access_end,   __itt_group_heap, "no args")
+ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin, (void), (ITT_NO_PARAMS), heap_record_memory_growth_begin, __itt_group_heap, "no args")
+ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end,   (void), (ITT_NO_PARAMS), heap_record_memory_growth_end,   __itt_group_heap, "no args")
+ITT_STUBV(ITTAPI, void, heap_reset_detection, (unsigned int reset_mask),  (ITT_FORMAT reset_mask), heap_reset_detection, __itt_group_heap, "%u")
+ITT_STUBV(ITTAPI, void, heap_record,          (unsigned int record_mask), (ITT_FORMAT record_mask),  heap_record,        __itt_group_heap, "%u")
 
 ITT_STUBV(ITTAPI, void, id_create,  (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), id_create,  __itt_group_structure, "%p, %lu")
 ITT_STUBV(ITTAPI, void, id_destroy, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), id_destroy, __itt_group_structure, "%p, %lu")
 
+ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void), (ITT_NO_PARAMS), get_timestamp,  __itt_group_structure, "no args")
+
 ITT_STUBV(ITTAPI, void, region_begin, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), region_begin, __itt_group_structure, "%p, %lu, %lu, %p")
 ITT_STUBV(ITTAPI, void, region_end,   (const __itt_domain *domain, __itt_id id),                                             (ITT_FORMAT domain, id),               region_end,   __itt_group_structure, "%p, %lu")
 
 #ifndef __ITT_INTERNAL_BODY
 ITT_STUBV(ITTAPI, void, frame_begin_v3, (const __itt_domain *domain, __itt_id *id), (ITT_FORMAT domain, id), frame_begin_v3, __itt_group_structure, "%p, %p")
 ITT_STUBV(ITTAPI, void, frame_end_v3,   (const __itt_domain *domain, __itt_id *id), (ITT_FORMAT domain, id), frame_end_v3,   __itt_group_structure, "%p, %p")
+ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end), (ITT_FORMAT domain, id, begin, end), frame_submit_v3, __itt_group_structure, "%p, %p, %lu, %lu")
 #endif /* __ITT_INTERNAL_BODY */
 
 ITT_STUBV(ITTAPI, void, task_group,   (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), task_group,  __itt_group_structure, "%p, %lu, %lu, %p")
@@ -280,4 +304,13 @@
 ITT_STUB(ITTAPI, const char*, api_version, (void), (ITT_NO_PARAMS), api_version, __itt_group_all & ~__itt_group_legacy, "no args")
 #endif /* __ITT_INTERNAL_BODY */
 
+#ifndef __ITT_INTERNAL_BODY
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_saveA, __itt_group_arrays, "%p, %d, %p, %d, \"%s\", %d")
+ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_saveW, __itt_group_arrays, "%p, %d, %p, %d, \"%S\", %d")
+#else  /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, av_save,  (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_save,  __itt_group_arrays, "%p, %d, %p, %d, \"%s\", %d")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* __ITT_INTERNAL_BODY */
+
 #endif /* __ITT_INTERNAL_INIT */
diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_types.h b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_types.h
index 2799173..3695a67 100644
--- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_types.h
+++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_types.h
@@ -29,6 +29,8 @@
     __itt_group_heap      = 1<<11,
     __itt_group_splitter_max = 1<<12,
     __itt_group_structure = 1<<12,
+    __itt_group_suppress = 1<<13,
+    __itt_group_arrays    = 1<<14,
     __itt_group_all       = -1
 } __itt_group_id;
 
@@ -57,6 +59,8 @@
         { __itt_group_stitch,    "stitch"    }, \
         { __itt_group_heap,      "heap"      }, \
         { __itt_group_structure, "structure" }, \
+        { __itt_group_suppress,  "suppress"  }, \
+        { __itt_group_arrays,    "arrays"    }, \
         { __itt_group_none,      NULL        }  \
     }
 
diff --git a/openmp/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h b/openmp/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h
index b10676f..9919294 100644
--- a/openmp/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h
+++ b/openmp/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h
@@ -47,12 +47,18 @@
 #  define ITT_PLATFORM_POSIX 2
 #endif /* ITT_PLATFORM_POSIX */
 
+#ifndef ITT_PLATFORM_MAC
+#  define ITT_PLATFORM_MAC 3
+#endif /* ITT_PLATFORM_MAC */
+
 #ifndef ITT_PLATFORM
 #  if ITT_OS==ITT_OS_WIN
 #    define ITT_PLATFORM ITT_PLATFORM_WIN
+#  elif ITT_OS==ITT_OS_MAC
+#    define ITT_PLATFORM ITT_PLATFORM_MAC
 #  else
 #    define ITT_PLATFORM ITT_PLATFORM_POSIX
-#  endif /* _WIN32 */
+#  endif
 #endif /* ITT_PLATFORM */
 
 #if defined(_UNICODE) && !defined(UNICODE)
@@ -73,11 +79,11 @@
 #  if ITT_PLATFORM==ITT_PLATFORM_WIN
 #    define CDECL __cdecl
 #  else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-#    if defined _M_X64 || defined _M_AMD64 || defined __x86_64__
-#      define CDECL /* not actual on x86_64 platform */
-#    else  /* _M_X64 || _M_AMD64 || __x86_64__ */
+#    if defined _M_IX86 || defined __i386__
 #      define CDECL __attribute__ ((cdecl))
-#    endif /* _M_X64 || _M_AMD64 || __x86_64__ */
+#    else  /* _M_IX86 || __i386__ */
+#      define CDECL /* actual only on x86 platform */
+#    endif /* _M_IX86 || __i386__ */
 #  endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 #endif /* CDECL */
 
@@ -85,11 +91,11 @@
 #  if ITT_PLATFORM==ITT_PLATFORM_WIN
 #    define STDCALL __stdcall
 #  else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-#    if defined _M_X64 || defined _M_AMD64 || defined __x86_64__
-#      define STDCALL /* not supported on x86_64 platform */
-#    else  /* _M_X64 || _M_AMD64 || __x86_64__ */
+#    if defined _M_IX86 || defined __i386__
 #      define STDCALL __attribute__ ((stdcall))
-#    endif /* _M_X64 || _M_AMD64 || __x86_64__ */
+#    else  /* _M_IX86 || __i386__ */
+#      define STDCALL /* supported only on x86 platform */
+#    endif /* _M_IX86 || __i386__ */
 #  endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 #endif /* STDCALL */
 
@@ -102,8 +108,8 @@
 
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
 /* use __forceinline (VC++ specific) */
-#define INLINE           __forceinline
-#define INLINE_ATTRIBUTE /* nothing */
+#define ITT_INLINE           __forceinline
+#define ITT_INLINE_ATTRIBUTE /* nothing */
 #else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 /*
  * Generally, functions are not inlined unless optimization is specified.
@@ -111,11 +117,11 @@
  * if no optimization level was specified.
  */
 #ifdef __STRICT_ANSI__
-#define INLINE           static
+#define ITT_INLINE           static
 #else  /* __STRICT_ANSI__ */
-#define INLINE           static inline
+#define ITT_INLINE           static inline
 #endif /* __STRICT_ANSI__ */
-#define INLINE_ATTRIBUTE __attribute__ ((always_inline))
+#define ITT_INLINE_ATTRIBUTE __attribute__ ((always_inline, unused))
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 /** @endcond */
 
diff --git a/openmp/runtime/src/z_Linux_asm.s b/openmp/runtime/src/z_Linux_asm.s
index 1bfdc0b..1f1ba1b 100644
--- a/openmp/runtime/src/z_Linux_asm.s
+++ b/openmp/runtime/src/z_Linux_asm.s
@@ -1,7 +1,7 @@
 //  z_Linux_asm.s:  - microtasking routines specifically
 //                    written for Intel platforms running Linux* OS
-// $Revision: 42582 $
-// $Date: 2013-08-09 06:30:22 -0500 (Fri, 09 Aug 2013) $
+// $Revision: 42810 $
+// $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
 
 //
 ////===----------------------------------------------------------------------===//
@@ -77,7 +77,7 @@
 KMP_PREFIX_UNDERSCORE(\proc):
 .endm
 # endif // defined __APPLE__ && defined __MACH__
-#endif // __i386 || defined __x86_64
+#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64
 
 
 // -----------------------------------------------------------------------
@@ -1573,3 +1573,19 @@
 	
 // -----------------------------------------------------------------------
 #endif /* KMP_ARCH_X86_64 */
+
+#if KMP_ARCH_ARM
+    .data
+    .comm .gomp_critical_user_,32,8
+    .data
+    .align 4
+    .global __kmp_unnamed_critical_addr
+__kmp_unnamed_critical_addr:
+    .4byte .gomp_critical_user_
+    .size __kmp_unnamed_critical_addr,4
+#endif /* KMP_ARCH_ARM */
+
+
+#if defined(__linux__)
+.section .note.GNU-stack,"",@progbits
+#endif
diff --git a/openmp/runtime/src/z_Linux_util.c b/openmp/runtime/src/z_Linux_util.c
index 4675302..27e394f 100644
--- a/openmp/runtime/src/z_Linux_util.c
+++ b/openmp/runtime/src/z_Linux_util.c
@@ -1,7 +1,7 @@
 /*
  * z_Linux_util.c -- platform specific routines.
- * $Revision: 42582 $
- * $Date: 2013-08-09 06:30:22 -0500 (Fri, 09 Aug 2013) $
+ * $Revision: 42847 $
+ * $Date: 2013-11-26 09:10:01 -0600 (Tue, 26 Nov 2013) $
  */
 
 
@@ -32,7 +32,7 @@
 
 #if KMP_OS_LINUX
 # include <sys/sysinfo.h>
-# if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+# if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
 // We should really include <futex.h>, but that causes compatibility problems on different
 // Linux* OS distributions that either require that you include (or break when you try to include)
 // <pci/types.h>.
@@ -55,6 +55,12 @@
 #include <ctype.h>
 #include <fcntl.h>
 
+// For non-x86 architecture
+#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
+# include <stdbool.h>
+# include <ffi.h>
+#endif
+
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
@@ -112,7 +118,7 @@
  * stone forever.
  */
 
-#  if KMP_ARCH_X86
+#  if KMP_ARCH_X86 || KMP_ARCH_ARM
 #   ifndef __NR_sched_setaffinity
 #    define __NR_sched_setaffinity  241
 #   elif __NR_sched_setaffinity != 241
@@ -434,7 +440,7 @@
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
 
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
 
 int
 __kmp_futex_determine_capable()
@@ -451,7 +457,7 @@
     return retval;
 }
 
-#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
 
 /* ------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------ */
@@ -2004,43 +2010,21 @@
 
 } // __kmp_get_xproc
 
-/*
-    Parse /proc/cpuinfo file for processor frequency, return frequency in Hz, or ~ 0 in case of
-    error.
-*/
-static
-kmp_uint64
-__kmp_get_frequency_from_proc(
-) {
+int
+__kmp_read_from_file( char const *path, char const *format, ... )
+{
+    int result;
+    va_list args;
 
-    kmp_uint64 result = ~ 0;
-    FILE *     file   = NULL;
-    double     freq   = HUGE_VAL;
-    int        rc;
+    va_start(args, format);
+    FILE *f = fopen(path, "rb");
+    if ( f == NULL )
+        return 0;
+    result = vfscanf(f, format, args);
+    fclose(f);
 
-    //
-    // FIXME - use KMP_CPUINFO_FILE here if it is set!!!
-    //
-    file = fopen( "/proc/cpuinfo", "r" );
-    if ( file == NULL ) {
-        return result;
-    }; // if
-    for ( ; ; ) {
-        rc = fscanf( file, "cpu MHz : %lf\n", & freq );  // Try to scan frequency.
-        if ( rc == 1 ) {                                 // Success.
-            break;
-        }; // if
-        fscanf( file, "%*[^\n]\n" );                     // Failure -- skip line.
-    }; // for
-    fclose( file );
-    if ( freq == HUGE_VAL || freq <= 0 ) {
-        return result;
-    }; // if
-    result = (kmp_uint64)( freq * 1.0E+6 );
-    KA_TRACE( 5, ( "cpu frequency from /proc/cpuinfo: %" KMP_UINT64_SPEC "\n", result ) );
     return result;
-} // func __kmp_get_frequency_from_proc
-
+}
 
 void
 __kmp_runtime_initialize( void )
@@ -2059,15 +2043,6 @@
         }; // if
     #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
 
-    if ( __kmp_cpu_frequency == 0 ) {
-        // First try nominal frequency.
-        __kmp_cpu_frequency = __kmp_cpuinfo.frequency;
-        if ( __kmp_cpu_frequency == 0 || __kmp_cpu_frequency == ~ 0 ) {
-            // Next Try to get CPU frequency from /proc/cpuinfo.
-            __kmp_cpu_frequency = __kmp_get_frequency_from_proc();
-        }; // if
-    }; // if
-
     __kmp_xproc = __kmp_get_xproc();
 
     if ( sysconf( _SC_THREADS ) ) {
@@ -2536,5 +2511,42 @@
 
 #endif // USE_LOAD_BALANCE
 
+
+#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
+
+int __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid, int argc,
+        void *p_argv[] )
+{
+    int argc_full = argc + 2;
+    int i;
+    ffi_cif cif;
+    ffi_type *types[argc_full];
+    void *args[argc_full];
+    void *idp[2];
+
+    /* We're only passing pointers to the target. */
+    for (i = 0; i < argc_full; i++)
+        types[i] = &ffi_type_pointer;
+
+    /* Ugly double-indirection, but that's how it goes... */
+    idp[0] = &gtid;
+    idp[1] = &tid;
+    args[0] = &idp[0];
+    args[1] = &idp[1];
+
+    for (i = 0; i < argc; i++)
+        args[2 + i] = &p_argv[i];
+
+    if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, argc_full,
+                &ffi_type_void, types) != FFI_OK)
+        abort();
+
+    ffi_call(&cif, (void (*)(void))pkfn, NULL, args);
+
+    return 1;
+}
+
+#endif // KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
+
 // end of file //
 
diff --git a/openmp/runtime/src/z_Windows_NT_util.c b/openmp/runtime/src/z_Windows_NT_util.c
index bd22c25..ba59110 100644
--- a/openmp/runtime/src/z_Windows_NT_util.c
+++ b/openmp/runtime/src/z_Windows_NT_util.c
@@ -1,7 +1,7 @@
 /*
  * z_Windows_NT_util.c -- platform specific routines.
- * $Revision: 42518 $
- * $Date: 2013-07-15 11:12:26 -0500 (Mon, 15 Jul 2013) $
+ * $Revision: 42816 $
+ * $Date: 2013-11-11 15:33:37 -0600 (Mon, 11 Nov 2013) $
  */
 
 
@@ -391,14 +391,14 @@
     /* TODO: shouldn't this use release semantics to ensure that __kmp_suspend_initialize_thread
        gets called first?
     */
-    old_spin = __kmp_test_then_or32( (volatile kmp_int32 *) spinner,
+    old_spin = KMP_TEST_THEN_OR32( (volatile kmp_int32 *) spinner,
                                      KMP_BARRIER_SLEEP_STATE );
 
     KF_TRACE( 5, ( "__kmp_suspend: T#%d set sleep bit for spin(%p)==%d\n",
                                    th_gtid, spinner, *spinner ) );
 
     if ( old_spin == checker ) {
-        __kmp_test_then_and32( (volatile kmp_int32 *) spinner, ~(KMP_BARRIER_SLEEP_STATE) );
+        KMP_TEST_THEN_AND32( (volatile kmp_int32 *) spinner, ~(KMP_BARRIER_SLEEP_STATE) );
 
         KF_TRACE( 5, ( "__kmp_suspend: T#%d false alarm, reset sleep bit for spin(%p)\n",
                        th_gtid, spinner) );
@@ -501,7 +501,7 @@
     }
 
     TCW_PTR(th->th.th_sleep_loc, NULL);
-    old_spin = __kmp_test_then_and32( (kmp_int32 volatile *) spin, ~( KMP_BARRIER_SLEEP_STATE ) );
+    old_spin = KMP_TEST_THEN_AND32( (kmp_int32 volatile *) spin, ~( KMP_BARRIER_SLEEP_STATE ) );
 
     if ( ( old_spin & KMP_BARRIER_SLEEP_STATE ) == 0 ) {
         KF_TRACE( 5, ( "__kmp_resume: T#%d exiting, thread T#%d already awake - spin(%p): "
@@ -874,24 +874,6 @@
         }; // if
     #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
 
-    if ( __kmp_cpu_frequency == 0 ) {
-        // __kmp_hardware_timestamp() calls to QueryPerformanceCounter(). If
-        // __kmp_hardware_timestamp() rewritten to use RDTSC instruction (or its 64 analog),
-        // probably we should try to get frequency from __kmp_cpuinfo.frequency first (see
-        // z_Linux_util.c).
-        LARGE_INTEGER freq;
-        BOOL          rc;
-        rc = QueryPerformanceFrequency( & freq );
-        if ( rc ) {
-            KMP_DEBUG_ASSERT( sizeof( __kmp_cpu_frequency ) >= sizeof( freq.QuadPart ) );
-            KMP_DEBUG_ASSERT( freq.QuadPart >= 0 );
-            __kmp_cpu_frequency = freq.QuadPart;
-            KA_TRACE( 5, ( "cpu frequency: %" KMP_UINT64_SPEC "\n", __kmp_cpu_frequency ) );
-        } else {
-            __kmp_cpu_frequency = ~ 0;
-        }; // if
-    }; // if
-
     /* Set up minimum number of threads to switch to TLS gtid */
     #if KMP_OS_WINDOWS && ! defined GUIDEDLL_EXPORTS 
         // Windows* OS, static library.
diff --git a/openmp/runtime/tools/check-tools.pl b/openmp/runtime/tools/check-tools.pl
index 8140e11..1878ca5 100755
--- a/openmp/runtime/tools/check-tools.pl
+++ b/openmp/runtime/tools/check-tools.pl
@@ -268,6 +268,9 @@
             } elsif ( $stdout =~ m{^.*? \(SUSE Linux\) (\d+\.\d+\.\d+)\s+\[.*? (\d+)\]}m ) {
                 # gcc (SUSE Linux) 4.3.2 [gcc-4_3-branch revision 141291]
                 ( $ver, $bld ) = ( $1, $2 );
+            } elsif ( $stdout =~ m{^.*? \(SUSE Linux\) (\d+\.\d+\.\d+)\s+\d+\s+\[.*? (\d+)\]}m ) {
+                # gcc (SUSE Linux) 4.7.2 20130108 [gcc-4_7-branch revision 195012]
+                ( $ver, $bld ) = ( $1, $2 );
             } elsif ( $stdout =~ m{^.*? \((Debian|Ubuntu).*?\) (\d+\.\d+\.\d+)}m ) {
                 # gcc (Debian 4.7.2-22) 4.7.2
                 # Debian support from Sylvestre Ledru 
@@ -286,6 +289,35 @@
 }; # sub get_gnu_compiler_version
 
 
+sub get_clang_compiler_version($) {
+    my ( $tool ) = @_;
+    my ( @ret ) = ( $tool );
+    my ( $rc, $stdout, $stderr, $version );
+    $rc = run( [ $tool, "--version" ], $stdout, $stderr );
+    if ( $rc >= 0 ) {
+        my ( $ver, $bld );
+        if ( $target_os eq "mac" ) {
+            # Apple LLVM version 4.2 (clang-425.0.28) (based on LLVM 3.2svn)
+            $stdout =~ m{^.*? (\d+\.\d+) \(.*-(\d+\.\d+\.\d+)\)}m;
+            ( $ver, $bld ) = ( $1, $2 );
+        } else {
+            if ( 0 ) {
+            } elsif ( $stdout =~ m{^.*? (\d+\.\d+) \((.*)\)}m ) {
+                # clang version 3.3 (tags/RELEASE_33/final)
+                ( $ver, $bld ) = ( $1, $2 );
+            } 
+        }; # if
+        if ( defined( $ver ) ) {
+            $version = $ver . ( defined( $bld ) ? " ($bld)" : "" );
+        } else {
+            warning( "Cannot parse Clang compiler version:", $stdout, "(eof)" );
+        }; # if
+    }; # if
+    push( @ret, $version );
+    return @ret;
+}; # sub get_gnu_compiler_version
+
+
 sub get_ms_compiler_version() {
     my ( $rc, $stdout, $stderr, $version );
     my $tool = "cl";
@@ -349,18 +381,30 @@
 
 my $make;
 my $intel       = 1;             # Check Intel compilers.
-my $gnu_fortran = 0;             # Check GNU Fortran.
+my $fortran     = 0;             # Check for corresponding Fortran compiler, ifort for intel 
+                                 #                                           gfortran for gnu 
+                                 #                                           gfortran for clang 
+my $clang       = 0;             # Check Clang Compilers.
 my $intel_compilers = {
     "lin" => { c => "icc", cpp => "icpc", f => "ifort" },
     "lrb" => { c => "icc", cpp => "icpc", f => "ifort" },
     "mac" => { c => "icc", cpp => "icpc", f => "ifort" },
     "win" => { c => "icl", cpp => undef,  f => "ifort" },
 };
+my $gnu_compilers = {
+    "lin" => { c => "gcc", cpp =>  "g++", f => "gfortran" },
+    "mac" => { c => "gcc", cpp =>  "g++", f => "gfortran" },
+};
+my $clang_compilers = {
+    "lin" => { c => "clang", cpp =>  "clang++" },
+    "mac" => { c => "clang", cpp =>  "clang++" },
+};
 
 get_options(
     Platform::target_options(),
     "intel!"         => \$intel,
-    "gnu-fortran!"   => \$gnu_fortran,
+    "fortran"        => \$fortran,
+    "clang"          => \$clang,
     "make"           => \$make,
     "pedantic"       => \$pedantic,
 );
@@ -375,21 +419,32 @@
         # If Intel C++ compiler has a name different from C compiler, check it as well.
         push( @versions, [ "Intel C++ Compiler", get_intel_compiler_version( $ic->{ cpp } ) ] );
     }; # if
-    if ( defined( $ic->{ f } ) ) {
-        push( @versions, [ "Intel Fortran Compiler", get_intel_compiler_version( $ic->{ f } ) ] );
-    }; # if
+    # fortran check must be explicitly specified on command line with --fortran
+    if ( $fortran ) {
+        if ( defined( $ic->{ f } ) ) {
+            push( @versions, [ "Intel Fortran Compiler", get_intel_compiler_version( $ic->{ f } ) ] );
+        }; # if
+    };
 }; # if
 if ( $target_os eq "lin" or $target_os eq "mac" ) {
-    push( @versions, [ "GNU C Compiler",     get_gnu_compiler_version( "gcc" ) ] );
-    push( @versions, [ "GNU C++ Compiler",   get_gnu_compiler_version( "g++" ) ] );
-    if ( $gnu_fortran ) {
-        push( @versions, [ "GNU Fortran Compiler", get_gnu_compiler_version( "gfortran" ) ] );
-    }; # if
-}; # if
+    # check for gnu tools by default because touch-test.c is compiled with them.
+    push( @versions, [ "GNU C Compiler",     get_gnu_compiler_version( $gnu_compilers->{ $target_os }->{ c   } ) ] );
+    push( @versions, [ "GNU C++ Compiler",   get_gnu_compiler_version( $gnu_compilers->{ $target_os }->{ cpp } ) ] );
+    if ( $clang ) {
+        push( @versions, [ "Clang C Compiler",     get_clang_compiler_version( $clang_compilers->{ $target_os }->{ c   } ) ] );
+        push( @versions, [ "Clang C++ Compiler",   get_clang_compiler_version( $clang_compilers->{ $target_os }->{ cpp } ) ] );
+    }; 
+    # if intel fortran has been checked then gnu fortran is unnecessary
+    # also, if user specifies clang as build compiler, then gfortran is assumed fortran compiler
+    if ( $fortran and not $intel ) {
+        push( @versions, [ "GNU Fortran Compiler", get_gnu_compiler_version( $gnu_compilers->{ $target_os }->{ f } ) ] );
+    }; 
+}; 
 if ( $target_os eq "win" ) {
     push( @versions, [ "MS C/C++ Compiler",  get_ms_compiler_version() ] );
     push( @versions, [ "MS Linker",          get_ms_linker_version() ] );
 }; # if
+
 my $count = 0;
 foreach my $item ( @versions ) {
     my ( $title, $tool, $version ) = @$item;
diff --git a/openmp/runtime/tools/common.inc b/openmp/runtime/tools/common.inc
index 4154b29..8eceb98 100644
--- a/openmp/runtime/tools/common.inc
+++ b/openmp/runtime/tools/common.inc
@@ -56,10 +56,14 @@
 # Setting defaults
 mode?=release
 
-ifeq "$(omp_os)" "windows"
-    compiler?=icl
+ifeq "$(filter 32 32e 64,$(arch))" ""
+    compiler?=gcc
 else
-    compiler?=icc
+    ifeq "$(omp_os)" "windows"
+        compiler?=icl
+    else
+        compiler?=icc
+    endif
 endif
 
 ifneq "$(mic)" "no"
diff --git a/openmp/runtime/tools/lib/Platform.pm b/openmp/runtime/tools/lib/Platform.pm
index 584eeb7..d723174 100644
--- a/openmp/runtime/tools/lib/Platform.pm
+++ b/openmp/runtime/tools/lib/Platform.pm
@@ -48,6 +48,8 @@
             $arch = "32";
         } elsif ( $arch =~ m{\A\s*(?:48|(?:ia)?32e|Intel\s*64|Intel\(R\)\s*64|x86[_-]64|x64|AMD64)\s*\z}i ) {
             $arch = "32e";
+        } elsif ( $arch =~ m{\Aarm(?:v7\D*)?\z} ) {
+            $arch = "arm";
         } else {
             $arch = undef;
         }; # if
@@ -59,6 +61,7 @@
     my %legal = (
         "32"  => "IA-32 architecture",
         "32e" => "Intel(R) 64",
+        "arm" => "ARM",
     );
 
     sub legal_arch($) {
@@ -76,6 +79,7 @@
         "32"  => "ia32",
         "32e" => "intel64",
         "64"  => "ia64",
+        "arm" => "arm",
     );
 
     sub arch_opt($) {
@@ -153,6 +157,8 @@
         $_host_arch = "64";
     } elsif ( $hardware_platform eq "x86_64" ) {
         $_host_arch = "32e";
+    } elsif ( $hardware_platform eq "arm" ) {
+        $_host_arch = "arm";
     } else {
         die "Unsupported host hardware platform: \"$hardware_platform\"; stopped";
     }; # if
@@ -178,7 +184,7 @@
     # Use arch specified in LIBOMP_ARCH.
     $_target_arch = canon_arch( $ENV{ LIBOMP_ARCH } );
     if ( not defined( $_target_arch ) ) {
-        die "Uknown architecture specified in LIBOMP_ARCH environment variable: \"$ENV{ LIBOMP_ARCH }\"";
+        die "Unknown architecture specified in LIBOMP_ARCH environment variable: \"$ENV{ LIBOMP_ARCH }\"";
     }; # if
 } else {
     # Otherwise use host architecture.
@@ -191,7 +197,7 @@
     # Use OS specified in LIBOMP_OS.
     $_target_os = canon_os( $ENV{ LIBOMP_OS } );
     if ( not defined( $_target_os ) ) {
-        die "Uknown OS specified in LIBOMP_OS environment variable: \"$ENV{ LIBOMP_OS }\"";
+        die "Unknown OS specified in LIBOMP_OS environment variable: \"$ENV{ LIBOMP_OS }\"";
     }; # if
 } else {
     # Otherwise use host OS.
diff --git a/openmp/runtime/tools/lib/Uname.pm b/openmp/runtime/tools/lib/Uname.pm
index f978f8b8..9556884 100644
--- a/openmp/runtime/tools/lib/Uname.pm
+++ b/openmp/runtime/tools/lib/Uname.pm
@@ -145,6 +145,8 @@
         $values{ hardware_platform } = "i386";
     } elsif ( $values{ machine } =~ m{\Ax86_64\z} ) {
         $values{ hardware_platform } = "x86_64";
+    } elsif ( $values{ machine } =~ m{\Aarmv7\D*\z} ) {
+        $values{ hardware_platform } = "arm";
     } else {
         die "Unsupported machine (\"$values{ machine }\") returned by POSIX::uname(); stopped";
     }; # if
@@ -276,7 +278,7 @@
                     or runtime_error( "$release: Cannot find the first line:", $bulk, "(eof)" );
                 my $first_line = $1;
                 $values{ operating_system_description } = $first_line;
-                $first_line =~ m{\A(.*?)\s+release\s+(.*?)\s+\((.*?)(?:\s+Update\s+(.*?))?\)\s*$}
+                $first_line =~ m{\A(.*?)\s+release\s+(.*?)(?:\s+\((.*?)(?:\s+Update\s+(.*?))?\))?\s*$}
                     or runtime_error( "$release:1: Cannot parse line:", $first_line );
                 $values{ operating_system_name    }  = $1;
                 $values{ operating_system_release }  = $2 . ( defined( $4 ) ? ".$4" : "" );
diff --git a/openmp/runtime/tools/src/common-checks.mk b/openmp/runtime/tools/src/common-checks.mk
index 08c246f..0959fc6 100644
--- a/openmp/runtime/tools/src/common-checks.mk
+++ b/openmp/runtime/tools/src/common-checks.mk
@@ -19,17 +19,27 @@
 # Check tools versions.
 #
 ifeq "$(clean)" ""    # Do not check tools if clean goal specified.
-    ifeq "$(c)" "gcc"
-        curr_tools := $(strip $(shell $(perl) $(tools_dir)check-tools.pl $(oa-opts) --no-intel --gnu-fortran --make))
-        ifneq "$(findstring N/A,$(curr_tools))" ""
-            curr_tools := $(strip $(shell $(perl) $(tools_dir)check-tools.pl $(oa-opts) --make))
-            fort = ifort
-        else
-            fort = gfortran
-        endif
+
+    check_tools_flags = --make
+
+    # determine if fortran check is required from goals
+    # MAKECMDGOALS is like argv for gnu make
+    ifneq "$(filter mod all,$(MAKECMDGOALS))" ""
+        check_tools_flags += --fortran
     else
-        curr_tools := $(strip $(shell $(perl) $(tools_dir)check-tools.pl $(oa-opts) --make))
+        ifeq "$(MAKECMDGOALS)" "" # will default to all if no goals specified on command line
+            check_tools_flags += --fortran
+        endif
     endif
+    ifneq "$(filter gcc clang,$(c))" "" # if build compiler is gcc or clang
+        check_tools_flags += --no-intel
+    endif
+    ifeq "$(c)" "clang"
+        check_tools_flags += --clang
+    endif
+
+    curr_tools := $(strip $(shell $(perl) $(tools_dir)check-tools.pl $(oa-opts) $(check_tools_flags)))
+
     ifeq "$(curr_tools)" ""
         $(error check-tools.pl failed)
     endif
diff --git a/openmp/runtime/tools/src/common-defs.mk b/openmp/runtime/tools/src/common-defs.mk
index 1c164bc..ebd1922 100644
--- a/openmp/runtime/tools/src/common-defs.mk
+++ b/openmp/runtime/tools/src/common-defs.mk
@@ -45,7 +45,7 @@
 # Description:
 #     The function return printable name of specified architecture, IA-32 architecture or Intel(R) 64.
 #
-legal_arch = $(if $(filter 32,$(1)),IA-32,$(if $(filter 32e,$(1)),Intel(R) 64,$(if $(filter l1,$(1)),L1OM,$(error Bad architecture specified: $(1))))))
+legal_arch = $(if $(filter 32,$(1)),IA-32,$(if $(filter 32e,$(1)),Intel(R) 64,$(if $(filter l1,$(1)),L1OM,$(if $(filter arm,$(1)),ARM,$(error Bad architecture specified: $(1))))))
 
 # Synopsis:
 #     var_name = $(call check_variable,var,list)
@@ -128,9 +128,9 @@
 # --------------------------------------------------------------------------------------------------
 
 os       := $(call check_variable,os,lin lrb mac win)
-arch     := $(call check_variable,arch,32 32e 64)
+arch     := $(call check_variable,arch,32 32e 64 arm)
 platform := $(os)_$(arch)
-platform := $(call check_variable,platform,lin_32 lin_32e lin_64 lrb_32e mac_32 mac_32e win_32 win_32e win_64)
+platform := $(call check_variable,platform,lin_32 lin_32e lin_64 lin_arm lrb_32e mac_32 mac_32e win_32 win_32e win_64)
 # oa-opts means "os and arch options". They are passed to almost all perl scripts.
 oa-opts  := --os=$(os) --arch=$(arch)
 
diff --git a/openmp/runtime/tools/src/common-tools.mk b/openmp/runtime/tools/src/common-tools.mk
index 65bc92e..a9c9fbc 100644
--- a/openmp/runtime/tools/src/common-tools.mk
+++ b/openmp/runtime/tools/src/common-tools.mk
@@ -33,6 +33,10 @@
 # on Windows* OS generates such a dependency: "kmp_runtime.obj: .\kmp_i18n.inc", and make complains
 # "No rule to build .\kmp_i18n.inc". Using "./" solves the problem.
 cpp-flags += -I ./
+# For non-x86 architecture
+ifeq "$(filter 32 32e 64,$(arch))" ""
+    cpp-flags += $(shell pkg-config --cflags libffi)
+endif
 # Add all VPATH directories to path for searching include files.
 cpp-flags += $(foreach i,$(VPATH),-I $(i))
 
@@ -60,6 +64,9 @@
     ifeq "$(c)" "gcc"
         cxx = g++
     endif
+    ifeq "$(c)" "clang"
+        cxx = clang++
+    endif
     # Output file flag.
     c-out   = -o$(space)
     cxx-out = -o$(space)
@@ -70,7 +77,9 @@
     c-flags-m   += -M -MG
     cxx-flags-m += -M -MG
     # Enable C99 language.
-    c-flags += -std=c99
+    ifneq "$(CPLUSPLUS)" "on"
+        c-flags += -std=gnu99
+    endif
     # Generate position-independent code (a must for shared objects).
     ifeq "$(LINK_TYPE)" "dyna"
         c-flags   += -fPIC
@@ -118,12 +127,24 @@
     ifeq "$(c)" "gcc"
         as        = gcc
     endif
+    ifeq "$(c)" "clang"
+        as        = clang
+    endif
     as-out    = -o$(space)
     as-flags += $(cpp-flags)
     # Compile only, no link.
     as-flags += -c
     as-flags += -x assembler-with-cpp
     # --- Fortran ---
+    ifeq "$(c)" "icc"
+        fort = ifort
+    endif
+    ifeq "$(c)" "gcc"
+        fort = gfortran
+    endif
+    ifeq "$(c)" "clang"
+        fort = gfortran
+    endif
     ifeq "$(fort)" ""
         fort = ifort
     endif
@@ -148,6 +169,11 @@
             cxx-flags += -mia32
         endif
     endif
+    ifeq "$(c)" "gcc"
+        ifeq "$(arch)" "arm"
+            c-flags   += -marm
+        endif
+    endif
     # --- Librarian ---
     ar        = ar
     ar-out    = $(empty)
@@ -298,7 +324,9 @@
     c-flags-m   += -QM -QMM -QMG
     cxx-flags-m += -QM -QMM -QMG
     # Enable C99 language.
-    c-flags   += -Qstd=c99
+    ifneq "$(CPLUSPLUS)" "on"
+    	c-flags   += -Qstd=gnu99
+    endif
     # Enable C++ exception handling.
     # ??? Why we disable it on Linux* OS?
     cxx-flags += -EHsc
diff --git a/openmp/www/README.txt b/openmp/www/README.txt
index 3880bf0..6ecca7f 100644
--- a/openmp/www/README.txt
+++ b/openmp/www/README.txt
@@ -74,13 +74,13 @@
 Supported Architectures: IA-32 architecture, Intel(R) 64, and 
 Intel(R) Many Integrated Core Architecture
 
-              -----------------------------------------------------------  
-              |           icc/icl            |           gcc            |
---------------|------------------------------|--------------------------|
-| Linux* OS   |            Yes(1,5)          |         Yes(2,4)         | 
-| OS X*       |            Yes(1,3,4)        |          No              |
-| Windows* OS |            Yes(1,4)          |          No              |
--------------------------------------------------------------------------
+              --------------------------------------------  
+              |   icc/icl     |    gcc      |   clang    |
+--------------|---------------|--------------------------|
+| Linux* OS   |   Yes(1,5)    |  Yes(2,4)   | Yes(4,6,7) |
+| OS X*       |   Yes(1,3,4)  |  No         | Yes(4,6,7) |
+| Windows* OS |   Yes(1,4)    |  No         | No         |
+----------------------------------------------------------
 
 (1) On IA-32 architecture and Intel(R) 64, icc/icl versions 12.x are 
     supported (12.1 is recommended).
@@ -89,6 +89,14 @@
 (4) Intel(R) Many Integrated Core Architecture not supported.
 (5) On Intel(R) Many Integrated Core Architecture, icc/icl versions 13.0 
     or later are required.
+(6) clang version 3.3 is supported.
+(7) clang currently does not offer a software-implemented 128 bit extended 
+    precision type.  Thus, all entry points reliant on this type are removed
+    from the library and cannot be called in the user program.  The following
+    functions are not available:
+    __kmpc_atomic_cmplx16_*
+    __kmpc_atomic_float16_*
+    __kmpc_atomic_*_fp
 
 Front-end Compilers that work with this RTL
 ===========================================
diff --git a/openmp/www/Reference.pdf b/openmp/www/Reference.pdf
index 60ce400..680f98c 100644
--- a/openmp/www/Reference.pdf
+++ b/openmp/www/Reference.pdf
Binary files differ