For your Christmas hacking pleasure.
This release use aligns with Intel(r) Composer XE 2013 SP1 Product Update 2
New features
* The library can now be built with clang (though wiht some
limitations since clang does not support 128 bit floats)
* Support for Vtune analysis of load imbalance
* Code contribution from Steven Noonan to build the runtime for ARM*
architecture processors
* First implementation of runtime API for OpenMP cancellation
Bug Fixes
* Fixed hang on Windows (only) when using KMP_BLOCKTIME=0
llvm-svn: 197914
diff --git a/openmp/CREDITS.txt b/openmp/CREDITS.txt
index 67b3e9e..c054358 100644
--- a/openmp/CREDITS.txt
+++ b/openmp/CREDITS.txt
@@ -12,3 +12,6 @@
W: http://openmprtl.org
D: Created the runtime.
+N: Steven Noonan
+E: steven@uplinklabs.net
+D: Patches for the ARM architecture and several inconsistency removal.
diff --git a/openmp/runtime/README.txt b/openmp/runtime/README.txt
index 3880bf0..6ecca7f 100644
--- a/openmp/runtime/README.txt
+++ b/openmp/runtime/README.txt
@@ -74,13 +74,13 @@
Supported Architectures: IA-32 architecture, Intel(R) 64, and
Intel(R) Many Integrated Core Architecture
- -----------------------------------------------------------
- | icc/icl | gcc |
---------------|------------------------------|--------------------------|
-| Linux* OS | Yes(1,5) | Yes(2,4) |
-| OS X* | Yes(1,3,4) | No |
-| Windows* OS | Yes(1,4) | No |
--------------------------------------------------------------------------
+ --------------------------------------------
+ | icc/icl | gcc | clang |
+--------------|---------------|--------------------------|
+| Linux* OS | Yes(1,5) | Yes(2,4) | Yes(4,6,7) |
+| OS X* | Yes(1,3,4) | No | Yes(4,6,7) |
+| Windows* OS | Yes(1,4) | No | No |
+----------------------------------------------------------
(1) On IA-32 architecture and Intel(R) 64, icc/icl versions 12.x are
supported (12.1 is recommended).
@@ -89,6 +89,14 @@
(4) Intel(R) Many Integrated Core Architecture not supported.
(5) On Intel(R) Many Integrated Core Architecture, icc/icl versions 13.0
or later are required.
+(6) clang version 3.3 is supported.
+(7) clang currently does not offer a software-implemented 128 bit extended
+ precision type. Thus, all entry points reliant on this type are removed
+ from the library and cannot be called in the user program. The following
+ functions are not available:
+ __kmpc_atomic_cmplx16_*
+ __kmpc_atomic_float16_*
+ __kmpc_atomic_*_fp
Front-end Compilers that work with this RTL
===========================================
diff --git a/openmp/runtime/doc/Reference.pdf b/openmp/runtime/doc/Reference.pdf
index 60ce400..680f98c 100644
--- a/openmp/runtime/doc/Reference.pdf
+++ b/openmp/runtime/doc/Reference.pdf
Binary files differ
diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports
index cfcbdeb9..779f1d4 100644
--- a/openmp/runtime/src/dllexports
+++ b/openmp/runtime/src/dllexports
@@ -357,6 +357,9 @@
__kmpc_fork_teams 241
__kmpc_omp_task_with_deps 242
__kmpc_omp_wait_deps 243
+ __kmpc_cancel 244
+ __kmpc_cancellationpoint 245
+ __kmpc_cancel_barrier 246
%endif # OMP_40
%endif
@@ -455,6 +458,8 @@
#omp_curr_proc_bind 864
omp_get_num_teams 865
omp_get_team_num 866
+ omp_get_cancellation 867
+ kmp_get_cancellation_status 868
%endif # OMP_40
%ifndef stub
diff --git a/openmp/runtime/src/exports_so.txt b/openmp/runtime/src/exports_so.txt
index 4ddf575..9ace78f 100644
--- a/openmp/runtime/src/exports_so.txt
+++ b/openmp/runtime/src/exports_so.txt
@@ -80,4 +80,26 @@
}; # VERSION
+# sets up GCC OMP_ version dependency chain
+OMP_1.0 {
+};
+OMP_2.0 {
+} OMP_1.0;
+OMP_3.0 {
+} OMP_2.0;
+OMP_3.1 {
+} OMP_3.0;
+OMP_4.0 {
+} OMP_3.1;
+
+# sets up GCC GOMP_ version dependency chain
+GOMP_1.0 {
+};
+GOMP_2.0 {
+} GOMP_1.0;
+GOMP_3.0 {
+} GOMP_2.0;
+GOMP_4.0 {
+} GOMP_3.0;
+
# end of file #
diff --git a/openmp/runtime/src/include/40/iomp.h.var b/openmp/runtime/src/include/40/iomp.h.var
index 88b74f3..8aeb38c 100644
--- a/openmp/runtime/src/include/40/iomp.h.var
+++ b/openmp/runtime/src/include/40/iomp.h.var
@@ -82,6 +82,16 @@
extern void __KAI_KMPC_CONVENTION kmp_set_warnings_on(void);
extern void __KAI_KMPC_CONVENTION kmp_set_warnings_off(void);
+ /* schedule kind constants */
+ typedef enum kmp_cancel_kind_t {
+ kmp_cancel_parallel = 1,
+ kmp_cancel_loop = 2,
+ kmp_cancel_sections = 3,
+ kmp_cancel_taskgroup = 4
+ } kmp_cancel_kind_t;
+
+ extern int __KAI_KMPC_CONVENTION kmp_get_cancellation_status(kmp_cancel_kind_t);
+
# undef __KAI_KMPC_CONVENTION
/* Warning:
diff --git a/openmp/runtime/src/include/40/omp.h.var b/openmp/runtime/src/include/40/omp.h.var
index 38400d4..c6dd4cd 100644
--- a/openmp/runtime/src/include/40/omp.h.var
+++ b/openmp/runtime/src/include/40/omp.h.var
@@ -27,30 +27,6 @@
extern "C" {
# endif
-# define omp_set_num_threads ompc_set_num_threads
-# define omp_set_dynamic ompc_set_dynamic
-# define omp_set_nested ompc_set_nested
-# define omp_set_max_active_levels ompc_set_max_active_levels
-# define omp_set_schedule ompc_set_schedule
-# define omp_get_ancestor_thread_num ompc_get_ancestor_thread_num
-# define omp_get_team_size ompc_get_team_size
-
-
-# define kmp_set_stacksize kmpc_set_stacksize
-# define kmp_set_stacksize_s kmpc_set_stacksize_s
-# define kmp_set_blocktime kmpc_set_blocktime
-# define kmp_set_library kmpc_set_library
-# define kmp_set_defaults kmpc_set_defaults
-# define kmp_set_affinity_mask_proc kmpc_set_affinity_mask_proc
-# define kmp_unset_affinity_mask_proc kmpc_unset_affinity_mask_proc
-# define kmp_get_affinity_mask_proc kmpc_get_affinity_mask_proc
-
-# define kmp_malloc kmpc_malloc
-# define kmp_calloc kmpc_calloc
-# define kmp_realloc kmpc_realloc
-# define kmp_free kmpc_free
-
-
# if defined(_WIN32)
# define __KAI_KMPC_CONVENTION __cdecl
# else
@@ -120,6 +96,7 @@
extern int __KAI_KMPC_CONVENTION omp_get_num_devices (void);
extern int __KAI_KMPC_CONVENTION omp_get_num_teams (void);
extern int __KAI_KMPC_CONVENTION omp_get_team_num (void);
+ extern int __KAI_KMPC_CONVENTION omp_get_cancellation (void);
# include <stdlib.h>
/* kmp API functions */
diff --git a/openmp/runtime/src/include/40/omp_lib.f.var b/openmp/runtime/src/include/40/omp_lib.f.var
index 0adadb1..fb9b2f2 100644
--- a/openmp/runtime/src/include/40/omp_lib.f.var
+++ b/openmp/runtime/src/include/40/omp_lib.f.var
@@ -32,6 +32,7 @@
integer, parameter :: kmp_pointer_kind = int_ptr_kind()
integer, parameter :: kmp_size_t_kind = int_ptr_kind()
integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind()
+ integer, parameter :: kmp_cancel_kind = omp_integer_kind
end module omp_lib_kinds
@@ -56,6 +57,11 @@
integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3
integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4
+ integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1
+ integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2
+ integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3
+ integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4
+
interface
! ***
@@ -199,6 +205,11 @@
integer (kind=omp_integer_kind) omp_get_team_num
end function omp_get_team_num
+ function omp_get_cancellation()
+ use omp_lib_kinds
+ integer (kind=omp_integer_kind) omp_get_cancellation
+ end function omp_get_cancellation
+
subroutine omp_init_lock(lockvar)
!DIR$ IF(__INTEL_COMPILER.GE.1400)
!DIR$ attributes known_intrinsic :: omp_init_lock
@@ -417,6 +428,11 @@
subroutine kmp_set_warnings_off()
end subroutine kmp_set_warnings_off
+ function kmp_get_cancellation_status(cancelkind)
+ use omp_lib_kinds
+ integer (kind=kmp_cancel_kind) cancelkind
+ logical (kind=omp_logical_kind) kmp_get_cancellation_status
+ end function kmp_get_cancellation_status
end interface
!dec$ if defined(_WIN32)
@@ -459,6 +475,7 @@
!dec$ attributes alias:'OMP_GET_NUM_DEVICES' :: omp_get_num_devices
!dec$ attributes alias:'OMP_GET_NUM_TEAMS' :: omp_get_num_teams
!dec$ attributes alias:'OMP_GET_TEAM_NUM' :: omp_get_team_num
+!dec$ attributes alias:'OMP_GET_CANCELLATION' :: omp_get_cancellation
!dec$ attributes alias:'omp_init_lock' :: omp_init_lock
!dec$ attributes alias:'omp_destroy_lock' :: omp_destroy_lock
@@ -498,6 +515,8 @@
!dec$ attributes alias:'KMP_SET_WARNINGS_ON'::kmp_set_warnings_on
!dec$ attributes alias:'KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off
+!dec$ attributes alias:'KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status
+
!dec$ else
!***
@@ -531,6 +550,7 @@
!dec$ attributes alias:'_OMP_GET_NUM_DEVICES' :: omp_get_num_devices
!dec$ attributes alias:'_OMP_GET_NUM_TEAMS' :: omp_get_num_teams
!dec$ attributes alias:'_OMP_GET_TEAM_NUM' :: omp_get_team_num
+!dec$ attributes alias:'_OMP_GET_CANCELLATION' :: omp_get_cancellation
!dec$ attributes alias:'_omp_init_lock' :: omp_init_lock
!dec$ attributes alias:'_omp_destroy_lock' :: omp_destroy_lock
@@ -570,6 +590,8 @@
!dec$ attributes alias:'_KMP_SET_WARNINGS_ON'::kmp_set_warnings_on
!dec$ attributes alias:'_KMP_SET_WARNINGS_OFF'::kmp_set_warnings_off
+!dec$ attributes alias:'_KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status
+
!dec$ endif
!dec$ endif
@@ -606,6 +628,7 @@
!dec$ attributes alias:'omp_get_num_devices_'::omp_get_num_devices
!dec$ attributes alias:'omp_get_num_teams_'::omp_get_num_teams
!dec$ attributes alias:'omp_get_team_num_'::omp_get_team_num
+!dec$ attributes alias:'omp_get_cancellation_'::omp_get_cancellation
!dec$ attributes alias:'omp_init_lock_'::omp_init_lock
!dec$ attributes alias:'omp_destroy_lock_'::omp_destroy_lock
@@ -644,6 +667,7 @@
!dec$ attributes alias:'kmp_set_warnings_on_'::kmp_set_warnings_on
!dec$ attributes alias:'kmp_set_warnings_off_'::kmp_set_warnings_off
+!dec$ attributes alias:'kmp_get_cancellation_status_'::kmp_get_cancellation_status
!dec$ endif
@@ -678,6 +702,7 @@
!dec$ attributes alias:'_omp_get_wtick_'::omp_get_wtick
!dec$ attributes alias:'_omp_get_num_teams_'::omp_get_num_teams
!dec$ attributes alias:'_omp_get_team_num_'::omp_get_team_num
+!dec$ attributes alias:'_omp_get_cancellation_'::omp_get_cancellation
!dec$ attributes alias:'_omp_init_lock_'::omp_init_lock
!dec$ attributes alias:'_omp_destroy_lock_'::omp_destroy_lock
@@ -717,6 +742,8 @@
!dec$ attributes alias:'_kmp_set_warnings_on_'::kmp_set_warnings_on
!dec$ attributes alias:'_kmp_set_warnings_off_'::kmp_set_warnings_off
+!dec$ attributes alias:'_kmp_get_cancellation_status_'::kmp_get_cancellation_status
+
!dec$ endif
end module omp_lib
diff --git a/openmp/runtime/src/include/40/omp_lib.f90.var b/openmp/runtime/src/include/40/omp_lib.f90.var
index 5cac259..f785352 100644
--- a/openmp/runtime/src/include/40/omp_lib.f90.var
+++ b/openmp/runtime/src/include/40/omp_lib.f90.var
@@ -28,6 +28,7 @@
integer, parameter :: kmp_pointer_kind = c_intptr_t
integer, parameter :: kmp_size_t_kind = c_size_t
integer, parameter :: kmp_affinity_mask_kind = c_intptr_t
+ integer, parameter :: kmp_cancel_kind = omp_integer_kind
end module omp_lib_kinds
@@ -47,12 +48,18 @@
integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3
integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4
+
integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0
integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1
integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2
integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3
integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4
+ integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_parallel = 1
+ integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_loop = 2
+ integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3
+ integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4
+
interface
! ***
@@ -198,6 +205,11 @@
integer (kind=omp_integer_kind) omp_get_team_num
end function omp_get_team_num
+ function omp_get_cancellation() bind(c)
+ use omp_lib_kinds
+ integer (kind=omp_integer_kind) omp_get_cancellation
+ end function omp_get_cancellation
+
subroutine omp_init_lock(lockvar) bind(c)
!DIR$ IF(__INTEL_COMPILER.GE.1400)
!DIR$ attributes known_intrinsic :: omp_init_lock
@@ -417,6 +429,12 @@
subroutine kmp_set_warnings_off() bind(c)
end subroutine kmp_set_warnings_off
+ function kmp_get_cancellation_status(cancelkind) bind(c)
+ use omp_lib_kinds
+ integer (kind=kmp_cancel_kind), value :: cancelkind
+ logical (kind=omp_logical_kind) kmp_get_cancellation_status
+ end function kmp_get_cancellation_status
+
end interface
end module omp_lib
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 7117571..37c7f41 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -1,8 +1,8 @@
/*! \file */
/*
* kmp.h -- KPTS runtime header file.
- * $Revision: 42642 $
- * $Date: 2013-09-06 01:57:24 -0500 (Fri, 06 Sep 2013) $
+ * $Revision: 42816 $
+ * $Date: 2013-11-11 15:33:37 -0600 (Mon, 11 Nov 2013) $
*/
@@ -26,10 +26,6 @@
*/
//#define FIX_SGI_CLOCK
-#if defined( __GNUC__ ) && !defined( __INTEL_COMPILER )
-typedef __float128 _Quad;
-#endif
-
/* Defines for OpenMP 3.0 tasking and auto scheduling */
#if OMP_30_ENABLED
@@ -81,9 +77,12 @@
#include <errno.h>
-#include <xmmintrin.h>
-
#include "kmp_os.h"
+
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+#include <xmmintrin.h>
+#endif
+
#include "kmp_version.h"
#include "kmp_debug.h"
#include "kmp_lock.h"
@@ -188,7 +187,7 @@
/* contextual information. */
#endif /* USE_ITT_BUILD */
kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */
- char *psource; /**< String describing the source location.
+ char const *psource; /**< String describing the source location.
The string is composed of semi-colon separated fields which describe the source file,
the function and a pair of line numbers that delimit the construct.
*/
@@ -231,6 +230,13 @@
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
+#define KMP_MAX( x, y ) ( (x) > (y) ? (x) : (y) )
+#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
+
+/* ------------------------------------------------------------------------ */
+/* ------------------------------------------------------------------------ */
+
+
/* Enumeration types */
enum kmp_state_timer {
@@ -752,6 +758,16 @@
#endif /* OMP_40_ENABLED */
+#if OMP_40_ENABLED
+typedef enum kmp_cancel_kind_t {
+ cancel_noreq = 0,
+ cancel_parallel = 1,
+ cancel_loop = 2,
+ cancel_sections = 3,
+ cancel_taskgroup = 4
+} kmp_cancel_kind_t;
+#endif // OMP_40_ENABLED
+
#if KMP_MIC
extern unsigned int __kmp_place_num_cores;
extern unsigned int __kmp_place_num_threads_per_core;
@@ -777,7 +793,7 @@
#define __kmp_entry_gtid() __kmp_get_global_thread_id_reg()
#define __kmp_tid_from_gtid(gtid) ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), \
- /*(__kmp_threads[ (gtid) ]->th.th_team_serialized) ? 0 : /* TODO remove this check, it is redundant */ \
+ /*(__kmp_threads[ (gtid) ]->th.th_team_serialized) ? 0 : */ /* TODO remove this check, it is redundant */ \
__kmp_threads[ (gtid) ]->th.th_info.ds.ds_tid )
#define __kmp_get_tid() ( __kmp_tid_from_gtid( __kmp_get_gtid() ) )
@@ -1078,14 +1094,6 @@
#endif /* BUILD_TV */
/* ------------------------------------------------------------------------ */
-// Some forward declarations.
-
-typedef union kmp_team kmp_team_t;
-typedef struct kmp_taskdata kmp_taskdata_t;
-typedef union kmp_task_team kmp_task_team_t;
-typedef union kmp_team kmp_team_p;
-typedef union kmp_info kmp_info_p;
-typedef union kmp_root kmp_root_p;
#if USE_ITT_BUILD
// We cannot include "kmp_itt.h" due to circular dependency. Declare the only required type here.
@@ -1883,8 +1891,12 @@
void * shareds; /**< pointer to block of pointers to shared vars */
kmp_routine_entry_t routine; /**< pointer to routine to call for executing task */
kmp_int32 part_id; /**< part id for the task */
+#if OMP_40_ENABLED
+ kmp_routine_entry_t destructors; /* pointer to function to invoke deconstructors of firstprivate C++ objects */
+#endif // OMP_40_ENABLED
/* private vars */
} kmp_task_t;
+
/*!
@}
*/
@@ -1892,6 +1904,7 @@
#if OMP_40_ENABLED
typedef struct kmp_taskgroup {
kmp_uint32 count; // number of allocated and not yet complete tasks
+ kmp_int32 cancel_request; // request for cancellation of this taskgroup
struct kmp_taskgroup *parent; // parent taskgroup
} kmp_taskgroup_t;
@@ -1974,7 +1987,12 @@
unsigned tiedness : 1; /* task is either tied (1) or untied (0) */
unsigned final : 1; /* task is final(1) so execute immediately */
unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0 code path */
- unsigned reserved13 : 13; /* reserved for compiler use */
+#if OMP_40_ENABLED
+ unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to invoke destructors from the runtime */
+ unsigned reserved : 12; /* reserved for compiler use */
+#else // OMP_40_ENABLED
+ unsigned reserved : 13; /* reserved for compiler use */
+#endif // OMP_40_ENABLED
/* Library flags */ /* Total library flags must be 16 bits */
unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */
@@ -2014,7 +2032,11 @@
kmp_dephash_t * td_dephash; // Dependencies for children tasks are tracked from here
kmp_depnode_t * td_depnode; // Pointer to graph node if this task has dependencies
#endif
+#if KMP_HAVE_QUAD
_Quad td_dummy; // Align structure 16-byte size since allocated just before kmp_task_t
+#else
+ kmp_uint32 td_dummy[2];
+#endif
}; // struct kmp_taskdata
// Make sure padding above worked
@@ -2121,6 +2143,8 @@
int th_team_bt_intervals;
int th_team_bt_set;
+ kmp_internal_control_t th_fixed_icvs; /* Initial ICVs for the thread */
+
#if KMP_OS_WINDOWS || KMP_OS_LINUX
kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */
@@ -2142,6 +2166,7 @@
# endif
#endif
#if USE_ITT_BUILD
+ kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */
kmp_uint64 th_frame_time; /* frame timestamp */
kmp_uint64 th_frame_time_serialized; /* frame timestamp in serialized parallel */
#endif /* USE_ITT_BUILD */
@@ -2328,15 +2353,6 @@
kmp_uint32 t_mxcsr;
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
-#if KMP_BARRIER_ICV_PULL
- //
- // Note: Putting ICV's before the fp control info causes a very slight
- // ~1% improvement for EPCC parallel on fxe256lin01 / 256 threads, but
- // causes a 17% regression on fxe64lin01 / 64 threads.
- //
- kmp_internal_control_t t_initial_icvs;
-#endif // KMP_BARRIER_ICV_PULL
-
#if (KMP_PERF_V106 == KMP_ON)
void *t_inline_argv[ KMP_INLINE_ARGV_ENTRIES ];
#endif
@@ -2398,6 +2414,9 @@
kmp_internal_control_t *t_control_stack_top; /* internal control stack for additional nested teams.
for SERIALIZED teams nested 2 or more levels deep */
+#if OMP_40_ENABLED
+ kmp_int32 t_cancel_request; /* typed flag to store request state of cancellation */
+#endif
int t_master_active;/* save on fork, restore on join */
kmp_taskq_t t_taskq; /* this team's task queue */
@@ -2479,8 +2498,6 @@
#if USE_ITT_BUILD
extern int __kmp_forkjoin_frames;
extern int __kmp_forkjoin_frames_mode;
-extern FILE * __kmp_itt_csv_file;
-extern kmp_str_buf_t __kmp_itt_frame_buffer;
#endif
extern PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method;
extern int __kmp_determ_red;
@@ -2526,9 +2543,6 @@
extern int __kmp_storage_map_verbose_specified;
extern kmp_cpuinfo_t __kmp_cpuinfo;
-extern kmp_uint64 __kmp_cpu_frequency;
- // CPU frequency, in Hz. Set by __kmp_runtime_initialize(). 0 means "is not set yet",
- // ~ 0 signals an errror.
extern volatile int __kmp_init_serial;
extern volatile int __kmp_init_gtid;
@@ -2678,13 +2692,13 @@
# endif /* USE_LOAD_BALANCE */
// OpenMP 3.1 - Nested num threads array
-struct kmp_nested_nthreads_t {
+typedef struct kmp_nested_nthreads_t {
int * nth;
int size;
int used;
-};
+} kmp_nested_nthreads_t;
-extern struct kmp_nested_nthreads_t __kmp_nested_nth;
+extern kmp_nested_nthreads_t __kmp_nested_nth;
#if KMP_USE_ADAPTIVE_LOCKS
@@ -2707,6 +2721,7 @@
#if OMP_40_ENABLED
extern int __kmp_display_env; /* TRUE or FALSE */
extern int __kmp_display_env_verbose; /* TRUE if OMP_DISPLAY_ENV=VERBOSE */
+extern int __kmp_omp_cancellation; /* TRUE or FALSE */
#endif
/* ------------------------------------------------------------------------- */
@@ -2796,7 +2811,7 @@
extern void __kmp_set_num_threads( int new_nth, int gtid );
// Returns current thread (pointer to kmp_info_t). Current thread *must* be registered.
-inline kmp_info_t * __kmp_entry_thread()
+static inline kmp_info_t * __kmp_entry_thread()
{
int gtid = __kmp_entry_gtid();
@@ -2976,11 +2991,11 @@
#endif /* KMP_OS_LINUX || KMP_OS_WINDOWS */
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
extern int __kmp_futex_determine_capable( void );
-#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
extern void __kmp_gtid_set_specific( int gtid );
extern int __kmp_gtid_get_specific( void );
@@ -3067,7 +3082,7 @@
extern int __kmp_fork_call( ident_t *loc, int gtid, int exec_master,
kmp_int32 argc, microtask_t microtask, launch_t invoker,
/* TODO: revert workaround for Intel(R) 64 tracker #96 */
-#if KMP_ARCH_X86_64 && KMP_OS_LINUX
+#if (KMP_ARCH_ARM || KMP_ARCH_X86_64) && KMP_OS_LINUX
va_list *ap
#else
va_list ap
@@ -3120,7 +3135,7 @@
#if USE_ITT_BUILD
void * itt_sync_obj,
#endif /* USE_ITT_BUILD */
- int c = 0 );
+ int c );
extern void __kmp_reap_task_teams( void );
extern void __kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread );
extern void __kmp_wait_to_unref_task_teams( void );
@@ -3138,6 +3153,9 @@
extern int __kmp_is_address_mapped( void *addr );
extern kmp_uint64 __kmp_hardware_timestamp(void);
+#if KMP_OS_UNIX
+extern int __kmp_read_from_file( char const *path, char const *format, ... );
+#endif
/* ------------------------------------------------------------------------ */
//
@@ -3148,7 +3166,7 @@
extern void __kmp_query_cpuid( kmp_cpuinfo_t *p );
-static inline void __kmp_load_mxcsr ( kmp_uint32 *p ) { _mm_setcsr( *p ); }
+#define __kmp_load_mxcsr(p) _mm_setcsr(*(p))
static inline void __kmp_store_mxcsr( kmp_uint32 *p ) { *p = _mm_getcsr(); }
extern void __kmp_load_x87_fpu_control_word( kmp_int16 *p );
@@ -3258,8 +3276,8 @@
#endif // OMP_30_ENABLED
#if OMP_40_ENABLED
-KMP_EXPORT void __kmpc_taskgroup( ident* loc, int gtid );
-KMP_EXPORT void __kmpc_end_taskgroup( ident* loc, int gtid );
+KMP_EXPORT void __kmpc_taskgroup( ident_t * loc, int gtid );
+KMP_EXPORT void __kmpc_end_taskgroup( ident_t * loc, int gtid );
KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task,
kmp_int32 ndeps, kmp_depend_info_t *dep_list,
@@ -3270,6 +3288,13 @@
#endif
+#if OMP_40_ENABLED
+KMP_EXPORT kmp_int32 __kmpc_cancel(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind);
+KMP_EXPORT kmp_int32 __kmpc_cancellationpoint(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind);
+KMP_EXPORT kmp_int32 __kmpc_cancel_barrier(ident_t* loc_ref, kmp_int32 gtid);
+KMP_EXPORT int __kmp_get_cancellation_status(int cancel_kind);
+#endif
+
/*
* Lock interface routines (fast versions with gtid passed in)
*/
@@ -3355,6 +3380,42 @@
struct private_common *
kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size );
+//
+// ompc_, kmpc_ entries moved from omp.h.
+//
+#if KMP_OS_WINDOWS
+# define KMPC_CONVENTION __cdecl
+#else
+# define KMPC_CONVENTION
+#endif
+
+#if OMP_30_ENABLED
+
+#ifndef __OMP_H
+typedef enum omp_sched_t {
+ omp_sched_static = 1,
+ omp_sched_dynamic = 2,
+ omp_sched_guided = 3,
+ omp_sched_auto = 4
+} omp_sched_t;
+typedef void * kmp_affinity_mask_t;
+#endif
+
+KMP_EXPORT void KMPC_CONVENTION ompc_set_max_active_levels(int);
+KMP_EXPORT void KMPC_CONVENTION ompc_set_schedule(omp_sched_t, int);
+KMP_EXPORT int KMPC_CONVENTION ompc_get_ancestor_thread_num(int);
+KMP_EXPORT int KMPC_CONVENTION ompc_get_team_size(int);
+KMP_EXPORT int KMPC_CONVENTION kmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *);
+KMP_EXPORT int KMPC_CONVENTION kmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *);
+KMP_EXPORT int KMPC_CONVENTION kmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *);
+
+#endif // OMP_30_ENABLED
+
+KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize(int);
+KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize_s(size_t);
+KMP_EXPORT void KMPC_CONVENTION kmpc_set_library(int);
+KMP_EXPORT void KMPC_CONVENTION kmpc_set_defaults(char const *);
+
#ifdef __cplusplus
}
#endif
diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp
index 0840fa3..644251d 100644
--- a/openmp/runtime/src/kmp_affinity.cpp
+++ b/openmp/runtime/src/kmp_affinity.cpp
@@ -1,7 +1,7 @@
/*
* kmp_affinity.cpp -- affinity management
- * $Revision: 42613 $
- * $Date: 2013-08-23 13:29:50 -0500 (Fri, 23 Aug 2013) $
+ * $Revision: 42810 $
+ * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
*/
@@ -1885,7 +1885,19 @@
if ((p == NULL) || (sscanf(p + 1, "%u\n", &val) != 1)) goto no_val;
if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
threadInfo[num_avail][osIdIndex] = val;
+#if KMP_OS_LINUX && USE_SYSFS_INFO
+ char path[256];
+ snprintf(path, sizeof(path),
+ "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
+ threadInfo[num_avail][osIdIndex]);
+ __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
+
+ snprintf(path, sizeof(path),
+ "/sys/devices/system/cpu/cpu%u/topology/core_id",
+ threadInfo[num_avail][osIdIndex]);
+ __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
continue;
+#else
}
char s2[] = "physical id";
if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
@@ -1906,6 +1918,7 @@
if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
threadInfo[num_avail][coreIdIndex] = val;
continue;
+#endif // KMP_OS_LINUX && USE_SYSFS_INFO
}
char s4[] = "thread id";
if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
@@ -3058,8 +3071,6 @@
int setSize = 0;
for (;;) {
- int start, count, stride;
-
__kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
//
@@ -3090,7 +3101,7 @@
"bad explicit places list");
next = scan;
SKIP_DIGITS(next);
- count = __kmp_str_to_int(scan, *next);
+ int count = __kmp_str_to_int(scan, *next);
KMP_ASSERT(count >= 0);
scan = next;
@@ -3112,7 +3123,7 @@
// Use a temp var in case macro is changed to evaluate
// args multiple times.
//
- if (KMP_CPU_ISSET(j - stride, tempMask)) {
+ if (KMP_CPU_ISSET(j - 1, tempMask)) {
KMP_CPU_SET(j, tempMask);
setSize++;
}
@@ -3159,7 +3170,7 @@
"bad explicit places list");
next = scan;
SKIP_DIGITS(next);
- stride = __kmp_str_to_int(scan, *next);
+ int stride = __kmp_str_to_int(scan, *next);
KMP_DEBUG_ASSERT(stride >= 0);
scan = next;
stride *= sign;
diff --git a/openmp/runtime/src/kmp_alloc.c b/openmp/runtime/src/kmp_alloc.c
index 30ab4bd..885754f 100644
--- a/openmp/runtime/src/kmp_alloc.c
+++ b/openmp/runtime/src/kmp_alloc.c
@@ -1,7 +1,7 @@
/*
* kmp_alloc.c -- private/shared dyanmic memory allocation and management
- * $Revision: 42613 $
- * $Date: 2013-08-23 13:29:50 -0500 (Fri, 23 Aug 2013) $
+ * $Revision: 42810 $
+ * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
*/
@@ -31,7 +31,7 @@
/* NOTE: bufsize must be a signed datatype */
#if KMP_OS_WINDOWS
-# if KMP_ARCH_X86
+# if KMP_ARCH_X86 || KMP_ARCH_ARM
typedef kmp_int32 bufsize;
# else
typedef kmp_int64 bufsize;
@@ -74,7 +74,7 @@
malloc() does not
ensure 16 byte alignmnent */
-#if KMP_ARCH_X86
+#if KMP_ARCH_X86 || !KMP_HAVE_QUAD
#define SizeQuant 8
#define AlignType double
diff --git a/openmp/runtime/src/kmp_atomic.c b/openmp/runtime/src/kmp_atomic.c
index 547aad5..3e9c82f 100644
--- a/openmp/runtime/src/kmp_atomic.c
+++ b/openmp/runtime/src/kmp_atomic.c
@@ -1,7 +1,7 @@
/*
* kmp_atomic.c -- ATOMIC implementation routines
- * $Revision: 42582 $
- * $Date: 2013-08-09 06:30:22 -0500 (Fri, 09 Aug 2013) $
+ * $Revision: 42810 $
+ * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
*/
@@ -574,7 +574,7 @@
*/
#define KMP_ATOMIC_VOLATILE volatile
-#if ( KMP_ARCH_X86 )
+#if ( KMP_ARCH_X86 ) && KMP_HAVE_QUAD
static inline void operator +=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q += rhs.q; };
static inline void operator -=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q -= rhs.q; };
@@ -608,7 +608,7 @@
/* ------------------------------------------------------------------------ */
// All routines declarations looks like
-// void __kmpc_atomic_RTYPE_OP( ident_t*, int*, TYPE *lhs, TYPE rhs );
+// void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
// ------------------------------------------------------------------------
#define KMP_CHECK_GTID \
@@ -721,6 +721,7 @@
} \
}
+#if USE_CMPXCHG_FIX
// 2007-06-25:
// workaround for C78287 (complex(kind=4) data type)
// lin_32, lin_32e, win_32 and win_32e are affected (I verified the asm)
@@ -751,6 +752,7 @@
} \
}
// end of the first part of the workaround for C78287
+#endif // USE_CMPXCHG_FIX
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
@@ -775,6 +777,7 @@
OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
OP_CMPXCHG(TYPE,BITS,OP) \
}
+#if USE_CMPXCHG_FIX
// -------------------------------------------------------------------------
// workaround for C78287 (complex(kind=4) data type)
#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
@@ -783,6 +786,7 @@
OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
}
// end of the second part of the workaround for C78287
+#endif
#else
// -------------------------------------------------------------------------
@@ -820,6 +824,7 @@
OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
} \
}
+#if USE_CMPXCHG_FIX
// -------------------------------------------------------------------------
// workaround for C78287 (complex(kind=4) data type)
#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
@@ -833,6 +838,7 @@
} \
}
// end of the second part of the workaround for C78287
+#endif // USE_CMPXCHG_FIX
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
// Routines for ATOMIC 4-byte operands addition and subtraction
@@ -1068,12 +1074,14 @@
MIN_MAX_COMPXCHG( float4, min, kmp_real32, 32, >, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min
MIN_MAX_COMPXCHG( float8, max, kmp_real64, 64, <, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max
MIN_MAX_COMPXCHG( float8, min, kmp_real64, 64, >, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min
+#if KMP_HAVE_QUAD
MIN_MAX_CRITICAL( float16, max, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max
MIN_MAX_CRITICAL( float16, min, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min
#if ( KMP_ARCH_X86 )
MIN_MAX_CRITICAL( float16, max_a16, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16
MIN_MAX_CRITICAL( float16, min_a16, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_min_a16
#endif
+#endif
// ------------------------------------------------------------------------
// Need separate macros for .EQV. because of the need of complement (~)
// OP ignored for critical sections, ^=~ used instead
@@ -1135,6 +1143,7 @@
ATOMIC_CRITICAL( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub
ATOMIC_CRITICAL( float10, mul, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul
ATOMIC_CRITICAL( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div
+#if KMP_HAVE_QUAD
// routines for _Quad type
ATOMIC_CRITICAL( float16, add, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add
ATOMIC_CRITICAL( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub
@@ -1146,14 +1155,22 @@
ATOMIC_CRITICAL( float16, mul_a16, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16
ATOMIC_CRITICAL( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16
#endif
+#endif
// routines for complex types
+#if USE_CMPXCHG_FIX
// workaround for C78287 (complex(kind=4) data type)
ATOMIC_CMPXCHG_WORKAROUND( cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_add
ATOMIC_CMPXCHG_WORKAROUND( cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_sub
ATOMIC_CMPXCHG_WORKAROUND( cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_mul
ATOMIC_CMPXCHG_WORKAROUND( cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_div
// end of the workaround for C78287
+#else
+ATOMIC_CRITICAL( cmplx4, add, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add
+ATOMIC_CRITICAL( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub
+ATOMIC_CRITICAL( cmplx4, mul, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul
+ATOMIC_CRITICAL( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div
+#endif // USE_CMPXCHG_FIX
ATOMIC_CRITICAL( cmplx8, add, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add
ATOMIC_CRITICAL( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub
@@ -1163,6 +1180,7 @@
ATOMIC_CRITICAL( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub
ATOMIC_CRITICAL( cmplx10, mul, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul
ATOMIC_CRITICAL( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div
+#if KMP_HAVE_QUAD
ATOMIC_CRITICAL( cmplx16, add, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add
ATOMIC_CRITICAL( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub
ATOMIC_CRITICAL( cmplx16, mul, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul
@@ -1173,6 +1191,7 @@
ATOMIC_CRITICAL( cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16
ATOMIC_CRITICAL( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16
#endif
+#endif
#if OMP_40_ENABLED
@@ -1312,6 +1331,7 @@
// routines for long double type
ATOMIC_CRITICAL_REV( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_rev
ATOMIC_CRITICAL_REV( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_rev
+#if KMP_HAVE_QUAD
// routines for _Quad type
ATOMIC_CRITICAL_REV( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_rev
ATOMIC_CRITICAL_REV( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_rev
@@ -1319,6 +1339,7 @@
ATOMIC_CRITICAL_REV( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_rev
ATOMIC_CRITICAL_REV( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_rev
#endif
+#endif
// routines for complex types
ATOMIC_CRITICAL_REV( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_rev
@@ -1327,12 +1348,14 @@
ATOMIC_CRITICAL_REV( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_rev
ATOMIC_CRITICAL_REV( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_rev
ATOMIC_CRITICAL_REV( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_rev
+#if KMP_HAVE_QUAD
ATOMIC_CRITICAL_REV( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_rev
ATOMIC_CRITICAL_REV( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_rev
#if ( KMP_ARCH_X86 )
ATOMIC_CRITICAL_REV( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_rev
ATOMIC_CRITICAL_REV( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_rev
#endif
+#endif
#endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
@@ -1405,7 +1428,7 @@
ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_float8
// RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them)
-
+#if KMP_HAVE_QUAD
ATOMIC_CMPXCHG_MIX( fixed1, char, add, 8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_fp
ATOMIC_CMPXCHG_MIX( fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_fp
ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_fp
@@ -1444,10 +1467,12 @@
ATOMIC_CRITICAL_FP( float10, long double, sub, -, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_sub_fp
ATOMIC_CRITICAL_FP( float10, long double, mul, *, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_mul_fp
ATOMIC_CRITICAL_FP( float10, long double, div, /, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_div_fp
+#endif
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
// ------------------------------------------------------------------------
// X86 or X86_64: no alignment problems ====================================
+#if USE_CMPXCHG_FIX
// workaround for C78287 (complex(kind=4) data type)
#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
@@ -1456,6 +1481,13 @@
}
// end of the second part of the workaround for C78287
#else
+#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
+ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
+ OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
+ OP_CMPXCHG(TYPE,BITS,OP) \
+}
+#endif // USE_CMPXCHG_FIX
+#else
// ------------------------------------------------------------------------
// Code for other architectures that don't handle unaligned accesses.
#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
@@ -1624,7 +1656,9 @@
ATOMIC_CMPXCHG_READ( fixed2, rd, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_rd
ATOMIC_CRITICAL_READ( float10, rd, long double, +, 10r, 1 ) // __kmpc_atomic_float10_rd
+#if KMP_HAVE_QUAD
ATOMIC_CRITICAL_READ( float16, rd, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_rd
+#endif // KMP_HAVE_QUAD
// Fix for CQ220361 on Windows* OS
#if ( KMP_OS_WINDOWS )
@@ -1634,11 +1668,13 @@
#endif
ATOMIC_CRITICAL_READ( cmplx8, rd, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_rd
ATOMIC_CRITICAL_READ( cmplx10, rd, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_rd
+#if KMP_HAVE_QUAD
ATOMIC_CRITICAL_READ( cmplx16, rd, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_rd
#if ( KMP_ARCH_X86 )
ATOMIC_CRITICAL_READ( float16, a16_rd, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_a16_rd
ATOMIC_CRITICAL_READ( cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_rd
#endif
+#endif
// ------------------------------------------------------------------------
@@ -1720,15 +1756,19 @@
#endif
ATOMIC_CRITICAL_WR( float10, wr, long double, =, 10r, 1 ) // __kmpc_atomic_float10_wr
+#if KMP_HAVE_QUAD
ATOMIC_CRITICAL_WR( float16, wr, QUAD_LEGACY, =, 16r, 1 ) // __kmpc_atomic_float16_wr
+#endif
ATOMIC_CRITICAL_WR( cmplx4, wr, kmp_cmplx32, =, 8c, 1 ) // __kmpc_atomic_cmplx4_wr
ATOMIC_CRITICAL_WR( cmplx8, wr, kmp_cmplx64, =, 16c, 1 ) // __kmpc_atomic_cmplx8_wr
ATOMIC_CRITICAL_WR( cmplx10, wr, kmp_cmplx80, =, 20c, 1 ) // __kmpc_atomic_cmplx10_wr
+#if KMP_HAVE_QUAD
ATOMIC_CRITICAL_WR( cmplx16, wr, CPLX128_LEG, =, 32c, 1 ) // __kmpc_atomic_cmplx16_wr
#if ( KMP_ARCH_X86 )
ATOMIC_CRITICAL_WR( float16, a16_wr, Quad_a16_t, =, 16r, 1 ) // __kmpc_atomic_float16_a16_wr
ATOMIC_CRITICAL_WR( cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_wr
#endif
+#endif
// ------------------------------------------------------------------------
@@ -2058,12 +2098,14 @@
MIN_MAX_COMPXCHG_CPT( float4, min_cpt, kmp_real32, 32, >, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min_cpt
MIN_MAX_COMPXCHG_CPT( float8, max_cpt, kmp_real64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max_cpt
MIN_MAX_COMPXCHG_CPT( float8, min_cpt, kmp_real64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min_cpt
+#if KMP_HAVE_QUAD
MIN_MAX_CRITICAL_CPT( float16, max_cpt, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max_cpt
MIN_MAX_CRITICAL_CPT( float16, min_cpt, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min_cpt
#if ( KMP_ARCH_X86 )
MIN_MAX_CRITICAL_CPT( float16, max_a16_cpt, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16_cpt
MIN_MAX_CRITICAL_CPT( float16, min_a16_cpt, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_mix_a16_cpt
#endif
+#endif
// ------------------------------------------------------------------------
#ifdef KMP_GOMP_COMPAT
@@ -2156,6 +2198,7 @@
ATOMIC_CRITICAL_CPT( float10, sub_cpt, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt
ATOMIC_CRITICAL_CPT( float10, mul_cpt, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul_cpt
ATOMIC_CRITICAL_CPT( float10, div_cpt, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt
+#if KMP_HAVE_QUAD
// routines for _Quad type
ATOMIC_CRITICAL_CPT( float16, add_cpt, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add_cpt
ATOMIC_CRITICAL_CPT( float16, sub_cpt, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt
@@ -2167,6 +2210,7 @@
ATOMIC_CRITICAL_CPT( float16, mul_a16_cpt, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16_cpt
ATOMIC_CRITICAL_CPT( float16, div_a16_cpt, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt
#endif
+#endif
// routines for complex types
@@ -2184,6 +2228,7 @@
ATOMIC_CRITICAL_CPT( cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt
ATOMIC_CRITICAL_CPT( cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul_cpt
ATOMIC_CRITICAL_CPT( cmplx10, div_cpt, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt
+#if KMP_HAVE_QUAD
ATOMIC_CRITICAL_CPT( cmplx16, add_cpt, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_cpt
ATOMIC_CRITICAL_CPT( cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt
ATOMIC_CRITICAL_CPT( cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_cpt
@@ -2194,6 +2239,7 @@
ATOMIC_CRITICAL_CPT( cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16_cpt
ATOMIC_CRITICAL_CPT( cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt
#endif
+#endif
#if OMP_40_ENABLED
@@ -2321,6 +2367,7 @@
// routines for long double type
ATOMIC_CRITICAL_CPT_REV( float10, sub_cpt_rev, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt_rev
ATOMIC_CRITICAL_CPT_REV( float10, div_cpt_rev, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt_rev
+#if KMP_HAVE_QUAD
// routines for _Quad type
ATOMIC_CRITICAL_CPT_REV( float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt_rev
ATOMIC_CRITICAL_CPT_REV( float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt_rev
@@ -2328,6 +2375,7 @@
ATOMIC_CRITICAL_CPT_REV( float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt_rev
ATOMIC_CRITICAL_CPT_REV( float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt_rev
#endif
+#endif
// routines for complex types
@@ -2378,12 +2426,14 @@
ATOMIC_CRITICAL_CPT_REV( cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt_rev
ATOMIC_CRITICAL_CPT_REV( cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt_rev
ATOMIC_CRITICAL_CPT_REV( cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt_rev
+#if KMP_HAVE_QUAD
ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt_rev
ATOMIC_CRITICAL_CPT_REV( cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt_rev
#if ( KMP_ARCH_X86 )
ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
ATOMIC_CRITICAL_CPT_REV( cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
#endif
+#endif
// OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
@@ -2527,7 +2577,9 @@
ATOMIC_CRITICAL_SWP( float10, long double, 10r, 1 ) // __kmpc_atomic_float10_swp
+#if KMP_HAVE_QUAD
ATOMIC_CRITICAL_SWP( float16, QUAD_LEGACY, 16r, 1 ) // __kmpc_atomic_float16_swp
+#endif
// cmplx4 routine to return void
ATOMIC_CRITICAL_SWP_WRK( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp
@@ -2536,11 +2588,13 @@
ATOMIC_CRITICAL_SWP( cmplx8, kmp_cmplx64, 16c, 1 ) // __kmpc_atomic_cmplx8_swp
ATOMIC_CRITICAL_SWP( cmplx10, kmp_cmplx80, 20c, 1 ) // __kmpc_atomic_cmplx10_swp
+#if KMP_HAVE_QUAD
ATOMIC_CRITICAL_SWP( cmplx16, CPLX128_LEG, 32c, 1 ) // __kmpc_atomic_cmplx16_swp
#if ( KMP_ARCH_X86 )
ATOMIC_CRITICAL_SWP( float16_a16, Quad_a16_t, 16r, 1 ) // __kmpc_atomic_float16_a16_swp
ATOMIC_CRITICAL_SWP( cmplx16_a16, kmp_cmplx128_a16_t, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_swp
#endif
+#endif
// End of OpenMP 4.0 Capture
diff --git a/openmp/runtime/src/kmp_atomic.h b/openmp/runtime/src/kmp_atomic.h
index 2243ba7..361dce9 100644
--- a/openmp/runtime/src/kmp_atomic.h
+++ b/openmp/runtime/src/kmp_atomic.h
@@ -1,7 +1,7 @@
/*
* kmp_atomic.h - ATOMIC header file
- * $Revision: 42195 $
- * $Date: 2013-03-27 16:10:35 -0500 (Wed, 27 Mar 2013) $
+ * $Revision: 42810 $
+ * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
*/
@@ -30,10 +30,6 @@
// to use typedef'ed types on win.
// Condition for WIN64 was modified in anticipation of 10.1 build compiler.
-#if defined( __GNUC__ ) && !defined( __INTEL_COMPILER )
-typedef __float128 _Quad;
-#endif
-
#if defined( __cplusplus ) && ( KMP_OS_WINDOWS )
// create shortcuts for c99 complex types
@@ -173,6 +169,7 @@
typedef KMP_DO_ALIGN( 16 ) struct __kmp_cmplx80_t kmp_cmplx80;
// complex16
+ #if KMP_HAVE_QUAD
struct __kmp_cmplx128_t : std::complex< _Quad > {
__kmp_cmplx128_t() : std::complex< _Quad > () {}
@@ -192,6 +189,7 @@
};
typedef struct __kmp_cmplx128_t kmp_cmplx128;
+ #endif /* KMP_HAVE_QUAD */
#ifdef _DEBUG_TEMPORARILY_UNSET_
#undef _DEBUG_TEMPORARILY_UNSET_
@@ -204,19 +202,22 @@
typedef float _Complex kmp_cmplx32;
typedef double _Complex kmp_cmplx64;
typedef long double _Complex kmp_cmplx80;
+ #if KMP_HAVE_QUAD
typedef _Quad _Complex kmp_cmplx128;
+ #endif
#endif
// Compiler 12.0 changed alignment of 16 and 32-byte arguments (like _Quad
// and kmp_cmplx128) on IA-32 architecture. The following aligned structures
// are implemented to support the old alignment in 10.1, 11.0, 11.1 and
// introduce the new alignment in 12.0. See CQ88405.
-#if ( KMP_ARCH_X86 )
+#if KMP_ARCH_X86 && KMP_HAVE_QUAD
// 4-byte aligned structures for backward compatibility.
#pragma pack( push, 4 )
+
struct KMP_DO_ALIGN( 4 ) Quad_a4_t {
_Quad q;
@@ -364,31 +365,31 @@
typedef kmp_queuing_lock_t kmp_atomic_lock_t;
-inline void
+static inline void
__kmp_acquire_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid )
{
__kmp_acquire_queuing_lock( lck, gtid );
}
-inline int
+static inline int
__kmp_test_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid )
{
return __kmp_test_queuing_lock( lck, gtid );
}
-inline void
+static inline void
__kmp_release_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid )
{
__kmp_release_queuing_lock( lck, gtid );
}
-inline void
+static inline void
__kmp_init_atomic_lock( kmp_atomic_lock_t *lck )
{
__kmp_init_queuing_lock( lck );
}
-inline void
+static inline void
__kmp_destroy_atomic_lock( kmp_atomic_lock_t *lck )
{
__kmp_destroy_queuing_lock( lck );
@@ -498,6 +499,7 @@
void __kmpc_atomic_float4_min( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs );
void __kmpc_atomic_float8_max( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs );
void __kmpc_atomic_float8_min( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs );
+#if KMP_HAVE_QUAD
void __kmpc_atomic_float16_max( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
void __kmpc_atomic_float16_min( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
#if ( KMP_ARCH_X86 )
@@ -505,6 +507,7 @@
void __kmpc_atomic_float16_max_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
void __kmpc_atomic_float16_min_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
#endif
+#endif
// .NEQV. (same as xor)
void __kmpc_atomic_fixed1_neqv( ident_t *id_ref, int gtid, char * lhs, char rhs );
void __kmpc_atomic_fixed2_neqv( ident_t *id_ref, int gtid, short * lhs, short rhs );
@@ -521,6 +524,7 @@
void __kmpc_atomic_float10_mul( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
void __kmpc_atomic_float10_div( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
// _Quad type
+#if KMP_HAVE_QUAD
void __kmpc_atomic_float16_add( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
void __kmpc_atomic_float16_sub( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
void __kmpc_atomic_float16_mul( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
@@ -532,6 +536,7 @@
void __kmpc_atomic_float16_mul_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
void __kmpc_atomic_float16_div_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
#endif
+#endif
// routines for complex types
void __kmpc_atomic_cmplx4_add( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
void __kmpc_atomic_cmplx4_sub( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
@@ -545,6 +550,7 @@
void __kmpc_atomic_cmplx10_sub( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
void __kmpc_atomic_cmplx10_mul( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
void __kmpc_atomic_cmplx10_div( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
+#if KMP_HAVE_QUAD
void __kmpc_atomic_cmplx16_add( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
void __kmpc_atomic_cmplx16_sub( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
void __kmpc_atomic_cmplx16_mul( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
@@ -556,6 +562,7 @@
void __kmpc_atomic_cmplx16_mul_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
void __kmpc_atomic_cmplx16_div_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
#endif
+#endif
#if OMP_40_ENABLED
@@ -593,14 +600,17 @@
void __kmpc_atomic_float8_div_rev( ident_t *id_ref, int gtid, double * lhs, double rhs );
void __kmpc_atomic_float10_sub_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
void __kmpc_atomic_float10_div_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
+#if KMP_HAVE_QUAD
void __kmpc_atomic_float16_sub_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
void __kmpc_atomic_float16_div_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
+#endif
void __kmpc_atomic_cmplx4_sub_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
void __kmpc_atomic_cmplx4_div_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
void __kmpc_atomic_cmplx8_sub_rev( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
void __kmpc_atomic_cmplx8_div_rev( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
void __kmpc_atomic_cmplx10_sub_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
void __kmpc_atomic_cmplx10_div_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
+#if KMP_HAVE_QUAD
void __kmpc_atomic_cmplx16_sub_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
void __kmpc_atomic_cmplx16_div_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
#if ( KMP_ARCH_X86 )
@@ -610,6 +620,7 @@
void __kmpc_atomic_cmplx16_sub_a16_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
void __kmpc_atomic_cmplx16_div_a16_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
#endif
+#endif // KMP_HAVE_QUAD
#endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
@@ -632,6 +643,7 @@
void __kmpc_atomic_float4_div_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs );
// RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them)
+#if KMP_HAVE_QUAD
void __kmpc_atomic_fixed1_add_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs );
void __kmpc_atomic_fixed1_sub_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs );
void __kmpc_atomic_fixed1_mul_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs );
@@ -670,6 +682,7 @@
void __kmpc_atomic_float10_sub_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs );
void __kmpc_atomic_float10_mul_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs );
void __kmpc_atomic_float10_div_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs );
+#endif // KMP_HAVE_QUAD
// RHS=cmplx8
void __kmpc_atomic_cmplx4_add_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs );
@@ -701,7 +714,9 @@
kmp_real32 __kmpc_atomic_float4_rd( ident_t *id_ref, int gtid, kmp_real32 * loc );
kmp_real64 __kmpc_atomic_float8_rd( ident_t *id_ref, int gtid, kmp_real64 * loc );
long double __kmpc_atomic_float10_rd( ident_t *id_ref, int gtid, long double * loc );
+#if KMP_HAVE_QUAD
QUAD_LEGACY __kmpc_atomic_float16_rd( ident_t *id_ref, int gtid, QUAD_LEGACY * loc );
+#endif
// Fix for CQ220361: cmplx4 READ will return void on Windows* OS; read value will be
// returned through an additional parameter
#if ( KMP_OS_WINDOWS )
@@ -711,12 +726,14 @@
#endif
kmp_cmplx64 __kmpc_atomic_cmplx8_rd( ident_t *id_ref, int gtid, kmp_cmplx64 * loc );
kmp_cmplx80 __kmpc_atomic_cmplx10_rd( ident_t *id_ref, int gtid, kmp_cmplx80 * loc );
+#if KMP_HAVE_QUAD
CPLX128_LEG __kmpc_atomic_cmplx16_rd( ident_t *id_ref, int gtid, CPLX128_LEG * loc );
#if ( KMP_ARCH_X86 )
// Routines with 16-byte arguments aligned to 16-byte boundary
Quad_a16_t __kmpc_atomic_float16_a16_rd( ident_t * id_ref, int gtid, Quad_a16_t * loc );
kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_a16_rd( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * loc );
#endif
+#endif
//
@@ -730,17 +747,20 @@
void __kmpc_atomic_float4_wr( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs );
void __kmpc_atomic_float8_wr( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs );
void __kmpc_atomic_float10_wr( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
+#if KMP_HAVE_QUAD
void __kmpc_atomic_float16_wr( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
+#endif
void __kmpc_atomic_cmplx4_wr( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
void __kmpc_atomic_cmplx8_wr( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
void __kmpc_atomic_cmplx10_wr( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
+#if KMP_HAVE_QUAD
void __kmpc_atomic_cmplx16_wr( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
#if ( KMP_ARCH_X86 )
// Routines with 16-byte arguments aligned to 16-byte boundary
void __kmpc_atomic_float16_a16_wr( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
void __kmpc_atomic_cmplx16_a16_wr( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
#endif
-
+#endif
//
// Below routines for atomic CAPTURE are listed
@@ -830,8 +850,10 @@
kmp_real32 __kmpc_atomic_float4_min_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag);
kmp_real64 __kmpc_atomic_float8_max_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag);
kmp_real64 __kmpc_atomic_float8_min_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag);
+#if KMP_HAVE_QUAD
QUAD_LEGACY __kmpc_atomic_float16_max_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
QUAD_LEGACY __kmpc_atomic_float16_min_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
+#endif
// .NEQV. (same as xor)
char __kmpc_atomic_fixed1_neqv_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
short __kmpc_atomic_fixed2_neqv_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
@@ -847,11 +869,13 @@
long double __kmpc_atomic_float10_sub_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag);
long double __kmpc_atomic_float10_mul_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag);
long double __kmpc_atomic_float10_div_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag);
+#if KMP_HAVE_QUAD
// _Quad type
QUAD_LEGACY __kmpc_atomic_float16_add_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
QUAD_LEGACY __kmpc_atomic_float16_sub_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
QUAD_LEGACY __kmpc_atomic_float16_mul_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
QUAD_LEGACY __kmpc_atomic_float16_div_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
+#endif
// routines for complex types
// Workaround for cmplx4 routines - return void; captured value is returned via the argument
void __kmpc_atomic_cmplx4_add_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag);
@@ -867,6 +891,7 @@
kmp_cmplx80 __kmpc_atomic_cmplx10_sub_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag);
kmp_cmplx80 __kmpc_atomic_cmplx10_mul_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag);
kmp_cmplx80 __kmpc_atomic_cmplx10_div_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag);
+#if KMP_HAVE_QUAD
CPLX128_LEG __kmpc_atomic_cmplx16_add_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag);
CPLX128_LEG __kmpc_atomic_cmplx16_sub_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag);
CPLX128_LEG __kmpc_atomic_cmplx16_mul_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag);
@@ -884,6 +909,7 @@
kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_mul_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag);
kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_div_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag);
#endif
+#endif
void __kmpc_atomic_start(void);
void __kmpc_atomic_end(void);
@@ -922,8 +948,10 @@
double __kmpc_atomic_float8_div_cpt_rev( ident_t *id_ref, int gtid, double * lhs, double rhs, int flag );
long double __kmpc_atomic_float10_sub_cpt_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag );
long double __kmpc_atomic_float10_div_cpt_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag );
+#if KMP_HAVE_QUAD
QUAD_LEGACY __kmpc_atomic_float16_sub_cpt_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag );
QUAD_LEGACY __kmpc_atomic_float16_div_cpt_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag );
+#endif
// Workaround for cmplx4 routines - return void; captured value is returned via the argument
void __kmpc_atomic_cmplx4_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
void __kmpc_atomic_cmplx4_div_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
@@ -931,6 +959,7 @@
kmp_cmplx64 __kmpc_atomic_cmplx8_div_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag );
kmp_cmplx80 __kmpc_atomic_cmplx10_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag );
kmp_cmplx80 __kmpc_atomic_cmplx10_div_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag );
+#if KMP_HAVE_QUAD
CPLX128_LEG __kmpc_atomic_cmplx16_sub_cpt_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag );
CPLX128_LEG __kmpc_atomic_cmplx16_div_cpt_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag );
#if ( KMP_ARCH_X86 )
@@ -939,6 +968,7 @@
kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_sub_a16_cpt_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag );
kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_div_a16_cpt_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag );
#endif
+#endif
// OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
char __kmpc_atomic_fixed1_swp( ident_t *id_ref, int gtid, char * lhs, char rhs );
@@ -948,18 +978,22 @@
float __kmpc_atomic_float4_swp( ident_t *id_ref, int gtid, float * lhs, float rhs );
double __kmpc_atomic_float8_swp( ident_t *id_ref, int gtid, double * lhs, double rhs );
long double __kmpc_atomic_float10_swp( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
+#if KMP_HAVE_QUAD
QUAD_LEGACY __kmpc_atomic_float16_swp( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
+#endif
// !!! TODO: check if we need a workaround here
void __kmpc_atomic_cmplx4_swp( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out );
//kmp_cmplx32 __kmpc_atomic_cmplx4_swp( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
kmp_cmplx64 __kmpc_atomic_cmplx8_swp( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
kmp_cmplx80 __kmpc_atomic_cmplx10_swp( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
+#if KMP_HAVE_QUAD
CPLX128_LEG __kmpc_atomic_cmplx16_swp( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
#if ( KMP_ARCH_X86 )
Quad_a16_t __kmpc_atomic_float16_a16_swp( ident_t *id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_a16_swp( ident_t *id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
#endif
+#endif
// End of OpenMP 4.0 capture
diff --git a/openmp/runtime/src/kmp_cancel.cpp b/openmp/runtime/src/kmp_cancel.cpp
new file mode 100644
index 0000000..e5a76d2
--- /dev/null
+++ b/openmp/runtime/src/kmp_cancel.cpp
@@ -0,0 +1,282 @@
+
+//===----------------------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "kmp.h"
+#include "kmp_i18n.h"
+#include "kmp_io.h"
+#include "kmp_str.h"
+
+#if OMP_40_ENABLED
+
+/*!
+@ingroup CANCELLATION
+@param loc_ref location of the original task directive
+@param gtid Global thread ID of encountering thread
+@param cncl_kind Cancellation kind (parallel, for, sections, taskgroup)
+
+@return returns true if the cancellation request has been activated and the execution thread
+needs to proceed to the end of the canceled region.
+
+Request cancellation of the binding OpenMP region.
+*/
+kmp_int32 __kmpc_cancel(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) {
+ kmp_info_t *this_thr = __kmp_threads [ gtid ];
+
+ KC_TRACE( 10, ("__kmpc_cancel: T#%d request %d OMP_CANCELLATION=%d\n", gtid, cncl_kind, __kmp_omp_cancellation) );
+
+ KMP_DEBUG_ASSERT(cncl_kind != cancel_noreq);
+ KMP_DEBUG_ASSERT(cncl_kind == cancel_parallel || cncl_kind == cancel_loop ||
+ cncl_kind == cancel_sections || cncl_kind == cancel_taskgroup);
+ KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid);
+
+ if (__kmp_omp_cancellation) {
+ switch (cncl_kind) {
+ case cancel_parallel:
+ case cancel_loop:
+ case cancel_sections:
+ // cancellation requests for parallel and worksharing constructs
+ // are handled through the team structure
+ {
+ kmp_team_t *this_team = this_thr->th.th_team;
+ KMP_DEBUG_ASSERT(this_team);
+ kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(&(this_team->t.t_cancel_request), cancel_noreq, cncl_kind);
+ if (old == cancel_noreq || old == cncl_kind) {
+ //printf("__kmpc_cancel: this_team->t.t_cancel_request=%d @ %p\n",
+ // this_team->t.t_cancel_request, &(this_team->t.t_cancel_request));
+ // we do not have a cancellation request in this team or we do have one
+ // that matches the current request -> cancel
+ return 1 /* true */;
+ }
+ break;
+ }
+ case cancel_taskgroup:
+ // cancellation requests for parallel and worksharing constructs
+ // are handled through the taskgroup structure
+ {
+ kmp_taskdata_t* task;
+ kmp_taskgroup_t* taskgroup;
+
+ task = this_thr->th.th_current_task;
+ KMP_DEBUG_ASSERT( task );
+
+ taskgroup = task->td_taskgroup;
+ if (taskgroup) {
+ kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(&(taskgroup->cancel_request), cancel_noreq, cncl_kind);
+ if (old == cancel_noreq || old == cncl_kind) {
+ // we do not have a cancellation request in this taskgroup or we do have one
+ // that matches the current request -> cancel
+ return 1 /* true */;
+ }
+ }
+ else {
+ // TODO: what needs to happen here?
+ // the specification disallows cancellation w/o taskgroups
+ // so we might do anything here, let's abort for now
+ KMP_ASSERT( 0 /* false */);
+ }
+ }
+ break;
+ default:
+ KMP_ASSERT (0 /* false */);
+ }
+ }
+
+ // ICV OMP_CANCELLATION=false, so we ignored this cancel request
+ KMP_DEBUG_ASSERT(!__kmp_omp_cancellation);
+ return 0 /* false */;
+}
+
+/*!
+@ingroup CANCELLATION
+@param loc_ref location of the original task directive
+@param gtid Global thread ID of encountering thread
+@param cncl_kind Cancellation kind (parallel, for, sections, taskgroup)
+
+@return returns true if a matching cancellation request has been flagged in the RTL and the
+encountering thread has to cancel..
+
+Cancellation point for the encountering thread.
+*/
+kmp_int32 __kmpc_cancellationpoint(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) {
+ kmp_info_t *this_thr = __kmp_threads [ gtid ];
+
+ KC_TRACE( 10, ("__kmpc_cancellationpoint: T#%d request %d OMP_CANCELLATION=%d\n", gtid, cncl_kind, __kmp_omp_cancellation) );
+
+ KMP_DEBUG_ASSERT(cncl_kind != cancel_noreq);
+ KMP_DEBUG_ASSERT(cncl_kind == cancel_parallel || cncl_kind == cancel_loop ||
+ cncl_kind == cancel_sections || cncl_kind == cancel_taskgroup);
+ KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid);
+
+ if (__kmp_omp_cancellation) {
+ switch (cncl_kind) {
+ case cancel_parallel:
+ case cancel_loop:
+ case cancel_sections:
+ // cancellation requests for parallel and worksharing constructs
+ // are handled through the team structure
+ {
+ kmp_team_t *this_team = this_thr->th.th_team;
+ KMP_DEBUG_ASSERT(this_team);
+ if (this_team->t.t_cancel_request) {
+ if (cncl_kind == this_team->t.t_cancel_request) {
+ // the request in the team structure matches the type of
+ // cancellation point so we can cancel
+ return 1 /* true */;
+ }
+ KMP_ASSERT( 0 /* false */);
+ }
+ else {
+ // we do not have a cancellation request pending, so we just
+ // ignore this cancellation point
+ return 0;
+ }
+ break;
+ }
+ case cancel_taskgroup:
+ // cancellation requests for parallel and worksharing constructs
+ // are handled through the taskgroup structure
+ {
+ kmp_taskdata_t* task;
+ kmp_taskgroup_t* taskgroup;
+
+ task = this_thr->th.th_current_task;
+ KMP_DEBUG_ASSERT( task );
+
+ taskgroup = task->td_taskgroup;
+ if (taskgroup) {
+ // return the current status of cancellation for the
+ // taskgroup
+ return !!taskgroup->cancel_request;
+ }
+ else {
+ // if a cancellation point is encountered by a task
+ // that does not belong to a taskgroup, it is OK
+ // to ignore it
+ return 0 /* false */;
+ }
+ }
+ default:
+ KMP_ASSERT (0 /* false */);
+ }
+ }
+
+ // ICV OMP_CANCELLATION=false, so we ignore the cancellation point
+ KMP_DEBUG_ASSERT(!__kmp_omp_cancellation);
+ return 0 /* false */;
+}
+
+/*!
+@ingroup CANCELLATION
+@param loc_ref location of the original task directive
+@param gtid Global thread ID of encountering thread
+
+@return returns true if a matching cancellation request has been flagged in the RTL and the
+encountering thread has to cancel..
+
+Barrier with cancellation point to send threads from the barrier to the
+end of the parallel region. Needs a special code pattern as documented
+in the design document for the cancellation feature.
+*/
+kmp_int32
+__kmpc_cancel_barrier(ident_t *loc, kmp_int32 gtid) {
+ int ret = 0 /* false */;
+ kmp_info_t *this_thr = __kmp_threads [ gtid ];
+ kmp_team_t *this_team = this_thr->th.th_team;
+
+ KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid);
+
+ // call into the standard barrier
+ __kmpc_barrier(loc, gtid);
+
+ // if cancellation is active, check cancellation flag
+ if (__kmp_omp_cancellation) {
+ // depending on which construct to cancel, check the flag and
+ // reset the flag
+ switch (this_team->t.t_cancel_request) {
+ case cancel_parallel:
+ ret = 1;
+ // ensure that threads have checked the flag, when
+ // leaving the above barrier
+ __kmpc_barrier(loc, gtid);
+ this_team->t.t_cancel_request = cancel_noreq;
+ // the next barrier is the fork/join barrier, which
+ // synchronizes the threads leaving here
+ break;
+ case cancel_loop:
+ case cancel_sections:
+ ret = 1;
+ // ensure that threads have checked the flag, when
+ // leaving the above barrier
+ __kmpc_barrier(loc, gtid);
+ this_team->t.t_cancel_request = cancel_noreq;
+ // synchronize the threads again to make sure we
+ // do not have any run-away threads that cause a race
+ // on the cancellation flag
+ __kmpc_barrier(loc, gtid);
+ break;
+ case cancel_taskgroup:
+ // this case should not occur
+ KMP_ASSERT (0 /* false */ );
+ break;
+ case cancel_noreq:
+ // do nothing
+ break;
+ default:
+ KMP_ASSERT ( 0 /* false */);
+ }
+ }
+
+ return ret;
+}
+
+/*!
+@ingroup CANCELLATION
+@param loc_ref location of the original task directive
+@param gtid Global thread ID of encountering thread
+
+@return returns true if a matching cancellation request has been flagged in the RTL and the
+encountering thread has to cancel..
+
+Query function to query the current status of cancellation requests.
+Can be used to implement the following pattern:
+
+if (kmp_get_cancellation_status(kmp_cancel_parallel)) {
+ perform_cleanup();
+ #pragma omp cancellation point parallel
+}
+*/
+int __kmp_get_cancellation_status(int cancel_kind) {
+ if (__kmp_omp_cancellation) {
+ kmp_info_t *this_thr = __kmp_entry_thread();
+
+ switch (cancel_kind) {
+ case cancel_parallel:
+ case cancel_loop:
+ case cancel_sections:
+ {
+ kmp_team_t *this_team = this_thr->th.th_team;
+ return this_team->t.t_cancel_request == cancel_kind;
+ }
+ case cancel_taskgroup:
+ {
+ kmp_taskdata_t* task;
+ kmp_taskgroup_t* taskgroup;
+ task = this_thr->th.th_current_task;
+ taskgroup = task->td_taskgroup;
+ return taskgroup && taskgroup->cancel_request;
+ }
+ }
+ }
+
+ return 0 /* false */;
+}
+
+#endif
diff --git a/openmp/runtime/src/kmp_csupport.c b/openmp/runtime/src/kmp_csupport.c
index 8ca4612..17cc534 100644
--- a/openmp/runtime/src/kmp_csupport.c
+++ b/openmp/runtime/src/kmp_csupport.c
@@ -1,7 +1,7 @@
/*
* kmp_csupport.c -- kfront linkage support for OpenMP.
- * $Revision: 42642 $
- * $Date: 2013-09-06 01:57:24 -0500 (Fri, 06 Sep 2013) $
+ * $Revision: 42826 $
+ * $Date: 2013-11-20 03:39:45 -0600 (Wed, 20 Nov 2013) $
*/
@@ -287,7 +287,7 @@
VOLATILE_CAST(microtask_t) microtask,
VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
/* TODO: revert workaround for Intel(R) 64 tracker #96 */
-#if KMP_ARCH_X86_64 && KMP_OS_LINUX
+#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
&ap
#else
ap
@@ -351,7 +351,7 @@
argc,
VOLATILE_CAST(microtask_t) __kmp_teams_master,
VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
-#if KMP_ARCH_X86_64 && KMP_OS_LINUX
+#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
&ap
#else
ap
@@ -622,28 +622,20 @@
if ( __kmp_env_consistency_check )
__kmp_push_parallel( global_tid, NULL );
-#if USE_ITT_BUILD
+// t_level is not available in 2.5 build, so check for OMP_30_ENABLED
+#if USE_ITT_BUILD && OMP_30_ENABLED
// Mark the start of the "parallel" region for VTune. Only use one of frame notification scheme at the moment.
if ( ( __itt_frame_begin_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG )
{
__kmp_itt_region_forking( global_tid, 1 );
}
- // Collect information only if the file was opened succesfully.
- if( __kmp_forkjoin_frames_mode == 1 && __kmp_itt_csv_file )
+ if( ( __kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3 ) && __itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr )
{
+#if USE_ITT_NOTIFY
if( this_thr->th.th_team->t.t_level == 1 ) {
- kmp_uint64 fr_begin;
-#if defined( __GNUC__ )
-# if !defined( __INTEL_COMPILER )
- fr_begin = __kmp_hardware_timestamp();
-# else
- fr_begin = __rdtsc();
-# endif
-#else
- fr_begin = __rdtsc();
-#endif
- this_thr->th.th_frame_time_serialized = fr_begin;
+ this_thr->th.th_frame_time_serialized = __itt_get_timestamp();
}
+#endif
}
#endif /* USE_ITT_BUILD */
@@ -774,39 +766,17 @@
}
-#if USE_ITT_BUILD
+// t_level is not available in 2.5 build, so check for OMP_30_ENABLED
+#if USE_ITT_BUILD && OMP_30_ENABLED
// Mark the end of the "parallel" region for VTune. Only use one of frame notification scheme at the moment.
if ( ( __itt_frame_end_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG )
{
+ this_thr->th.th_ident = loc;
__kmp_itt_region_joined( global_tid, 1 );
}
- // Collect information only if the file was opened succesfully.
- if( __kmp_forkjoin_frames_mode == 1 && __kmp_itt_csv_file )
- {
+ if( ( __kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3 ) && __itt_frame_submit_v3_ptr ) {
if( this_thr->th.th_team->t.t_level == 0 ) {
- ident_t * loc = this_thr->th.th_ident;
- if (loc) {
- // Use compiler-generated location to mark the frame:
- // "<func>$omp$frame@[file:]<line>[:<col>]"
- kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
-
- kmp_uint64 fr_end;
-#if defined( __GNUC__ )
-# if !defined( __INTEL_COMPILER )
- fr_end = __kmp_hardware_timestamp();
-# else
- fr_end = __rdtsc();
-# endif
-#else
- fr_end = __rdtsc();
-#endif
- K_DIAG( 3, ( "__kmpc_end_serialized_parallel: T#%d frame_begin = %llu, frame_end = %llu\n",
- global_tid, this_thr->th.th_frame_time, fr_end ) );
-
- __kmp_str_buf_print( &__kmp_itt_frame_buffer, "%s$omp$frame@%s:%d:%d,%llu,%llu,,\n",
- str_loc.func, str_loc.file, str_loc.line, str_loc.col, this_thr->th.th_frame_time_serialized, fr_end );
- __kmp_str_loc_free( &str_loc );
- }
+ __kmp_itt_frame_submit( global_tid, this_thr->th.th_frame_time_serialized, __itt_timestamp_none, 0, loc );
}
}
#endif /* USE_ITT_BUILD */
@@ -858,13 +828,15 @@
if ( ! __kmp_cpuinfo.sse2 ) {
// CPU cannot execute SSE2 instructions.
} else {
- #if defined( __GNUC__ ) && !defined( __INTEL_COMPILER )
- __sync_synchronize();
- #else
+ #if KMP_COMPILER_ICC
_mm_mfence();
- #endif // __GNUC__
+ #else
+ __sync_synchronize();
+ #endif // KMP_COMPILER_ICC
}; // if
#endif // KMP_MIC
+ #elif KMP_ARCH_ARM
+ // Nothing yet
#else
#error Unknown or unsupported architecture
#endif
@@ -1110,7 +1082,7 @@
&& ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
lck = (kmp_user_lock_p)crit;
}
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
else if ( ( __kmp_user_lock_kind == lk_futex )
&& ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
lck = (kmp_user_lock_p)crit;
@@ -1163,7 +1135,7 @@
&& ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
lck = (kmp_user_lock_p)crit;
}
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
else if ( ( __kmp_user_lock_kind == lk_futex )
&& ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
lck = (kmp_user_lock_p)crit;
@@ -1598,14 +1570,14 @@
&& ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
lck = (kmp_user_lock_p)user_lock;
}
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
else if ( ( __kmp_user_lock_kind == lk_futex )
&& ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
lck = (kmp_user_lock_p)user_lock;
}
#endif
else {
- lck = __kmp_user_lock_allocate( user_lock, gtid );
+ lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
}
INIT_LOCK( lck );
__kmp_set_user_lock_location( lck, loc );
@@ -1634,7 +1606,7 @@
+ sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
lck = (kmp_user_lock_p)user_lock;
}
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
else if ( ( __kmp_user_lock_kind == lk_futex )
&& ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
<= OMP_NEST_LOCK_T_SIZE ) ) {
@@ -1642,7 +1614,7 @@
}
#endif
else {
- lck = __kmp_user_lock_allocate( user_lock, gtid );
+ lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
}
INIT_NESTED_LOCK( lck );
@@ -1662,7 +1634,7 @@
&& ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
lck = (kmp_user_lock_p)user_lock;
}
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
else if ( ( __kmp_user_lock_kind == lk_futex )
&& ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
lck = (kmp_user_lock_p)user_lock;
@@ -1681,7 +1653,7 @@
&& ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
;
}
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
else if ( ( __kmp_user_lock_kind == lk_futex )
&& ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
;
@@ -1702,7 +1674,7 @@
+ sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
lck = (kmp_user_lock_p)user_lock;
}
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
else if ( ( __kmp_user_lock_kind == lk_futex )
&& ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
<= OMP_NEST_LOCK_T_SIZE ) ) {
@@ -1723,7 +1695,7 @@
+ sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
;
}
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
else if ( ( __kmp_user_lock_kind == lk_futex )
&& ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
<= OMP_NEST_LOCK_T_SIZE ) ) {
@@ -1743,7 +1715,7 @@
&& ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
lck = (kmp_user_lock_p)user_lock;
}
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
else if ( ( __kmp_user_lock_kind == lk_futex )
&& ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
lck = (kmp_user_lock_p)user_lock;
@@ -1773,7 +1745,7 @@
+ sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
lck = (kmp_user_lock_p)user_lock;
}
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
else if ( ( __kmp_user_lock_kind == lk_futex )
&& ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
<= OMP_NEST_LOCK_T_SIZE ) ) {
@@ -1805,7 +1777,7 @@
if ( ( __kmp_user_lock_kind == lk_tas )
&& ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
// "fast" path implemented to fix customer performance issue
#if USE_ITT_BUILD
__kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
@@ -1817,7 +1789,7 @@
lck = (kmp_user_lock_p)user_lock;
#endif
}
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
else if ( ( __kmp_user_lock_kind == lk_futex )
&& ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
lck = (kmp_user_lock_p)user_lock;
@@ -1844,7 +1816,7 @@
if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
+ sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
// "fast" path implemented to fix customer performance issue
kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock;
#if USE_ITT_BUILD
@@ -1859,7 +1831,7 @@
lck = (kmp_user_lock_p)user_lock;
#endif
}
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
else if ( ( __kmp_user_lock_kind == lk_futex )
&& ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
<= OMP_NEST_LOCK_T_SIZE ) ) {
@@ -1888,7 +1860,7 @@
&& ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
lck = (kmp_user_lock_p)user_lock;
}
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
else if ( ( __kmp_user_lock_kind == lk_futex )
&& ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
lck = (kmp_user_lock_p)user_lock;
@@ -1926,7 +1898,7 @@
+ sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
lck = (kmp_user_lock_p)user_lock;
}
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
else if ( ( __kmp_user_lock_kind == lk_futex )
&& ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
<= OMP_NEST_LOCK_T_SIZE ) ) {
diff --git a/openmp/runtime/src/kmp_dispatch.cpp b/openmp/runtime/src/kmp_dispatch.cpp
index 1128b87..cb5bdac 100644
--- a/openmp/runtime/src/kmp_dispatch.cpp
+++ b/openmp/runtime/src/kmp_dispatch.cpp
@@ -1,7 +1,7 @@
/*
* kmp_dispatch.cpp: dynamic scheduling - iteration initialization and dispatch.
- * $Revision: 42624 $
- * $Date: 2013-08-27 10:53:11 -0500 (Tue, 27 Aug 2013) $
+ * $Revision: 42674 $
+ * $Date: 2013-09-18 11:12:49 -0500 (Wed, 18 Sep 2013) $
*/
@@ -916,7 +916,8 @@
*/
// save original FPCW and set precision to 64-bit, as
// Windows* OS on IA-32 architecture defaults to 53-bit
- unsigned int oldFpcw = _control87(0,0x30000);
+ unsigned int oldFpcw = _control87(0,0);
+ _control87(_PC_64,_MCW_PC); // 0,0x30000
#endif
/* value used for comparison in solver for cross-over point */
long double target = ((long double)chunk * 2 + 1) * nproc / tc;
@@ -995,7 +996,7 @@
pr->u.p.count = tc - __kmp_dispatch_guided_remaining(tc, GUIDED_ANALYTICAL_WORKAROUND, cross) - cross * chunk;
#if KMP_OS_WINDOWS && KMP_ARCH_X86
// restore FPCW
- _control87(oldFpcw,0x30000);
+ _control87(oldFpcw,_MCW_PC);
#endif
} // if
} else {
@@ -1836,7 +1837,7 @@
/* for storing original FPCW value for Windows* OS on
IA-32 architecture 8-byte version */
unsigned int oldFpcw;
- int fpcwSet = 0;
+ unsigned int fpcwSet = 0;
#endif
KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_guided_chunked analytical case\n",
gtid ) );
@@ -1870,7 +1871,8 @@
FPCW and set precision to 64-bit, as Windows* OS
on IA-32 architecture defaults to 53-bit */
if ( !fpcwSet ) {
- oldFpcw = _control87(0,0x30000);
+ oldFpcw = _control87(0,0);
+ _control87(_PC_64,_MCW_PC);
fpcwSet = 0x30000;
}
#endif
@@ -1893,9 +1895,11 @@
} // if
} // while (1)
#if KMP_OS_WINDOWS && KMP_ARCH_X86
- /* restore FPCW if necessary */
- if ( oldFpcw & fpcwSet != 0 )
- _control87(oldFpcw,0x30000);
+ /* restore FPCW if necessary
+ AC: check fpcwSet flag first because oldFpcw can be uninitialized here
+ */
+ if ( fpcwSet && ( oldFpcw & fpcwSet ) )
+ _control87(oldFpcw,_MCW_PC);
#endif
if ( status != 0 ) {
start = pr->u.p.lb;
diff --git a/openmp/runtime/src/kmp_ftn_cdecl.c b/openmp/runtime/src/kmp_ftn_cdecl.c
index 7079ee9..135a7cb 100644
--- a/openmp/runtime/src/kmp_ftn_cdecl.c
+++ b/openmp/runtime/src/kmp_ftn_cdecl.c
@@ -1,7 +1,7 @@
/*
* kmp_ftn_cdecl.c -- Fortran __cdecl linkage support for OpenMP.
- * $Revision: 42061 $
- * $Date: 2013-02-28 16:36:24 -0600 (Thu, 28 Feb 2013) $
+ * $Revision: 42757 $
+ * $Date: 2013-10-18 08:20:57 -0500 (Fri, 18 Oct 2013) $
*/
@@ -17,21 +17,21 @@
#include "kmp.h"
-// Note: This string is not printed when KMP_VERSION=1.
-char const __kmp_version_ftncdecl[] = KMP_VERSION_PREFIX "Fortran __cdecl OMP support: "
-#ifdef USE_FTN_CDECL
- "yes";
-#else
- "no";
+#if KMP_OS_WINDOWS
+# if defined KMP_WIN_CDECL || !defined GUIDEDLL_EXPORTS
+# define KMP_FTN_ENTRIES KMP_FTN_UPPER
+# endif
+#elif KMP_OS_UNIX
+# define KMP_FTN_ENTRIES KMP_FTN_PLAIN
#endif
-#ifdef USE_FTN_CDECL
-
-#define FTN_STDCALL /* no stdcall */
-#define KMP_FTN_ENTRIES USE_FTN_CDECL
-
-#include "kmp_ftn_os.h"
-#include "kmp_ftn_entry.h"
-
-#endif /* USE_FTN_CDECL */
-
+// Note: This string is not printed when KMP_VERSION=1.
+char const __kmp_version_ftncdecl[] = KMP_VERSION_PREFIX "Fortran __cdecl OMP support: "
+#ifdef KMP_FTN_ENTRIES
+ "yes";
+# define FTN_STDCALL /* no stdcall */
+# include "kmp_ftn_os.h"
+# include "kmp_ftn_entry.h"
+#else
+ "no";
+#endif /* KMP_FTN_ENTRIES */
diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h
index f2c6440..dbbca19 100644
--- a/openmp/runtime/src/kmp_ftn_entry.h
+++ b/openmp/runtime/src/kmp_ftn_entry.h
@@ -1,7 +1,7 @@
/*
* kmp_ftn_entry.h -- Fortran entry linkage support for OpenMP.
- * $Revision: 42507 $
- * $Date: 2013-07-11 07:55:25 -0500 (Thu, 11 Jul 2013) $
+ * $Revision: 42798 $
+ * $Date: 2013-10-30 16:39:54 -0500 (Wed, 30 Oct 2013) $
*/
@@ -356,7 +356,7 @@
/* sets the requested number of threads for the next parallel region */
void FTN_STDCALL
-FTN_SET_NUM_THREADS( int KMP_DEREF arg )
+xexpand(FTN_SET_NUM_THREADS)( int KMP_DEREF arg )
{
#ifdef KMP_STUB
// Nothing.
@@ -368,7 +368,7 @@
/* returns the number of threads in current team */
int FTN_STDCALL
-FTN_GET_NUM_THREADS( void )
+xexpand(FTN_GET_NUM_THREADS)( void )
{
#ifdef KMP_STUB
return 1;
@@ -379,7 +379,7 @@
}
int FTN_STDCALL
-FTN_GET_MAX_THREADS( void )
+xexpand(FTN_GET_MAX_THREADS)( void )
{
#ifdef KMP_STUB
return 1;
@@ -401,7 +401,7 @@
}
int FTN_STDCALL
-FTN_GET_THREAD_NUM( void )
+xexpand(FTN_GET_THREAD_NUM)( void )
{
#ifdef KMP_STUB
return 0;
@@ -458,7 +458,7 @@
}
int FTN_STDCALL
-FTN_GET_NUM_PROCS( void )
+xexpand(FTN_GET_NUM_PROCS)( void )
{
#ifdef KMP_STUB
return 1;
@@ -472,7 +472,7 @@
}
void FTN_STDCALL
-FTN_SET_NESTED( int KMP_DEREF flag )
+xexpand(FTN_SET_NESTED)( int KMP_DEREF flag )
{
#ifdef KMP_STUB
__kmps_set_nested( KMP_DEREF flag );
@@ -487,7 +487,7 @@
int FTN_STDCALL
-FTN_GET_NESTED( void )
+xexpand(FTN_GET_NESTED)( void )
{
#ifdef KMP_STUB
return __kmps_get_nested();
@@ -499,7 +499,7 @@
}
void FTN_STDCALL
-FTN_SET_DYNAMIC( int KMP_DEREF flag )
+xexpand(FTN_SET_DYNAMIC)( int KMP_DEREF flag )
{
#ifdef KMP_STUB
__kmps_set_dynamic( KMP_DEREF flag ? TRUE : FALSE );
@@ -515,7 +515,7 @@
int FTN_STDCALL
-FTN_GET_DYNAMIC( void )
+xexpand(FTN_GET_DYNAMIC)( void )
{
#ifdef KMP_STUB
return __kmps_get_dynamic();
@@ -527,7 +527,7 @@
}
int FTN_STDCALL
-FTN_IN_PARALLEL( void )
+xexpand(FTN_IN_PARALLEL)( void )
{
#ifdef KMP_STUB
return 0;
@@ -550,7 +550,7 @@
#if OMP_30_ENABLED
void FTN_STDCALL
-FTN_SET_SCHEDULE( kmp_sched_t KMP_DEREF kind, int KMP_DEREF modifier )
+xexpand(FTN_SET_SCHEDULE)( kmp_sched_t KMP_DEREF kind, int KMP_DEREF modifier )
{
#ifdef KMP_STUB
__kmps_set_schedule( KMP_DEREF kind, KMP_DEREF modifier );
@@ -562,7 +562,7 @@
}
void FTN_STDCALL
-FTN_GET_SCHEDULE( kmp_sched_t * kind, int * modifier )
+xexpand(FTN_GET_SCHEDULE)( kmp_sched_t * kind, int * modifier )
{
#ifdef KMP_STUB
__kmps_get_schedule( kind, modifier );
@@ -574,7 +574,7 @@
}
void FTN_STDCALL
-FTN_SET_MAX_ACTIVE_LEVELS( int KMP_DEREF arg )
+xexpand(FTN_SET_MAX_ACTIVE_LEVELS)( int KMP_DEREF arg )
{
#ifdef KMP_STUB
// Nothing.
@@ -586,7 +586,7 @@
}
int FTN_STDCALL
-FTN_GET_MAX_ACTIVE_LEVELS( void )
+xexpand(FTN_GET_MAX_ACTIVE_LEVELS)( void )
{
#ifdef KMP_STUB
return 0;
@@ -598,7 +598,7 @@
}
int FTN_STDCALL
-FTN_GET_ACTIVE_LEVEL( void )
+xexpand(FTN_GET_ACTIVE_LEVEL)( void )
{
#ifdef KMP_STUB
return 0; // returns 0 if it is called from the sequential part of the program
@@ -610,7 +610,7 @@
}
int FTN_STDCALL
-FTN_GET_LEVEL( void )
+xexpand(FTN_GET_LEVEL)( void )
{
#ifdef KMP_STUB
return 0; // returns 0 if it is called from the sequential part of the program
@@ -622,7 +622,7 @@
}
int FTN_STDCALL
-FTN_GET_ANCESTOR_THREAD_NUM( int KMP_DEREF level )
+xexpand(FTN_GET_ANCESTOR_THREAD_NUM)( int KMP_DEREF level )
{
#ifdef KMP_STUB
return ( KMP_DEREF level ) ? ( -1 ) : ( 0 );
@@ -632,7 +632,7 @@
}
int FTN_STDCALL
-FTN_GET_TEAM_SIZE( int KMP_DEREF level )
+xexpand(FTN_GET_TEAM_SIZE)( int KMP_DEREF level )
{
#ifdef KMP_STUB
return ( KMP_DEREF level ) ? ( -1 ) : ( 1 );
@@ -642,7 +642,7 @@
}
int FTN_STDCALL
-FTN_GET_THREAD_LIMIT( void )
+xexpand(FTN_GET_THREAD_LIMIT)( void )
{
#ifdef KMP_STUB
return 1; // TO DO: clarify whether it returns 1 or 0?
@@ -656,7 +656,7 @@
}
int FTN_STDCALL
-FTN_IN_FINAL( void )
+xexpand(FTN_IN_FINAL)( void )
{
#ifdef KMP_STUB
return 0; // TO DO: clarify whether it returns 1 or 0?
@@ -674,7 +674,7 @@
kmp_proc_bind_t FTN_STDCALL
-FTN_GET_PROC_BIND( void )
+xexpand(FTN_GET_PROC_BIND)( void )
{
#ifdef KMP_STUB
return __kmps_get_proc_bind();
@@ -684,7 +684,7 @@
}
int FTN_STDCALL
-FTN_GET_NUM_TEAMS( void )
+xexpand(FTN_GET_NUM_TEAMS)( void )
{
#ifdef KMP_STUB
return 1;
@@ -723,7 +723,7 @@
}
int FTN_STDCALL
-FTN_GET_TEAM_NUM( void )
+xexpand(FTN_GET_TEAM_NUM)( void )
{
#ifdef KMP_STUB
return 0;
@@ -793,7 +793,7 @@
/* initialize the lock */
void FTN_STDCALL
-FTN_INIT_LOCK( void **user_lock )
+xexpand(FTN_INIT_LOCK)( void **user_lock )
{
#ifdef KMP_STUB
*((kmp_stub_lock_t *)user_lock) = UNLOCKED;
@@ -804,7 +804,7 @@
/* initialize the lock */
void FTN_STDCALL
-FTN_INIT_NEST_LOCK( void **user_lock )
+xexpand(FTN_INIT_NEST_LOCK)( void **user_lock )
{
#ifdef KMP_STUB
*((kmp_stub_lock_t *)user_lock) = UNLOCKED;
@@ -814,7 +814,7 @@
}
void FTN_STDCALL
-FTN_DESTROY_LOCK( void **user_lock )
+xexpand(FTN_DESTROY_LOCK)( void **user_lock )
{
#ifdef KMP_STUB
*((kmp_stub_lock_t *)user_lock) = UNINIT;
@@ -824,7 +824,7 @@
}
void FTN_STDCALL
-FTN_DESTROY_NEST_LOCK( void **user_lock )
+xexpand(FTN_DESTROY_NEST_LOCK)( void **user_lock )
{
#ifdef KMP_STUB
*((kmp_stub_lock_t *)user_lock) = UNINIT;
@@ -834,7 +834,7 @@
}
void FTN_STDCALL
-FTN_SET_LOCK( void **user_lock )
+xexpand(FTN_SET_LOCK)( void **user_lock )
{
#ifdef KMP_STUB
if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) {
@@ -850,7 +850,7 @@
}
void FTN_STDCALL
-FTN_SET_NEST_LOCK( void **user_lock )
+xexpand(FTN_SET_NEST_LOCK)( void **user_lock )
{
#ifdef KMP_STUB
if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) {
@@ -863,7 +863,7 @@
}
void FTN_STDCALL
-FTN_UNSET_LOCK( void **user_lock )
+xexpand(FTN_UNSET_LOCK)( void **user_lock )
{
#ifdef KMP_STUB
if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) {
@@ -879,7 +879,7 @@
}
void FTN_STDCALL
-FTN_UNSET_NEST_LOCK( void **user_lock )
+xexpand(FTN_UNSET_NEST_LOCK)( void **user_lock )
{
#ifdef KMP_STUB
if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) {
@@ -895,7 +895,7 @@
}
int FTN_STDCALL
-FTN_TEST_LOCK( void **user_lock )
+xexpand(FTN_TEST_LOCK)( void **user_lock )
{
#ifdef KMP_STUB
if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) {
@@ -912,7 +912,7 @@
}
int FTN_STDCALL
-FTN_TEST_NEST_LOCK( void **user_lock )
+xexpand(FTN_TEST_NEST_LOCK)( void **user_lock )
{
#ifdef KMP_STUB
if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) {
@@ -925,7 +925,7 @@
}
double FTN_STDCALL
-FTN_GET_WTIME( void )
+xexpand(FTN_GET_WTIME)( void )
{
#ifdef KMP_STUB
return __kmps_get_wtime();
@@ -944,7 +944,7 @@
}
double FTN_STDCALL
-FTN_GET_WTICK( void )
+xexpand(FTN_GET_WTICK)( void )
{
#ifdef KMP_STUB
return __kmps_get_wtick();
@@ -1022,6 +1022,191 @@
/* ------------------------------------------------------------------------ */
+#if OMP_40_ENABLED
+/* returns the status of cancellation */
+int FTN_STDCALL
+xexpand(FTN_GET_CANCELLATION)(void) {
+#ifdef KMP_STUB
+ return 0 /* false */;
+#else
+ // initialize the library if needed
+ if ( ! __kmp_init_serial ) {
+ __kmp_serial_initialize();
+ }
+ return __kmp_omp_cancellation;
+#endif
+}
+
+int FTN_STDCALL
+FTN_GET_CANCELLATION_STATUS(int cancel_kind) {
+#ifdef KMP_STUB
+ return 0 /* false */;
+#else
+ return __kmp_get_cancellation_status(cancel_kind);
+#endif
+}
+
+#endif // OMP_40_ENABLED
+
+// GCC compatibility (versioned symbols)
+#if KMP_OS_LINUX
+
+/*
+ These following sections create function aliases (dummy symbols) for the omp_* routines.
+ These aliases will then be versioned according to how libgomp ``versions'' its
+ symbols (OMP_1.0, OMP_2.0, OMP_3.0, ...) while also retaining the
+ default version which libiomp5 uses: VERSION (defined in exports_so.txt)
+ If you want to see the versioned symbols for libgomp.so.1 then just type:
+
+ objdump -T /path/to/libgomp.so.1 | grep omp_
+
+ Example:
+ Step 1) Create __kmp_api_omp_set_num_threads_10_alias
+ which is alias of __kmp_api_omp_set_num_threads
+ Step 2) Set __kmp_api_omp_set_num_threads_10_alias to version: omp_set_num_threads@OMP_1.0
+ Step 2B) Set __kmp_api_omp_set_num_threads to default version : omp_set_num_threads@@VERSION
+*/
+
+// OMP_1.0 aliases
+xaliasify(FTN_SET_NUM_THREADS, 10);
+xaliasify(FTN_GET_NUM_THREADS, 10);
+xaliasify(FTN_GET_MAX_THREADS, 10);
+xaliasify(FTN_GET_THREAD_NUM, 10);
+xaliasify(FTN_GET_NUM_PROCS, 10);
+xaliasify(FTN_IN_PARALLEL, 10);
+xaliasify(FTN_SET_DYNAMIC, 10);
+xaliasify(FTN_GET_DYNAMIC, 10);
+xaliasify(FTN_SET_NESTED, 10);
+xaliasify(FTN_GET_NESTED, 10);
+xaliasify(FTN_INIT_LOCK, 10);
+xaliasify(FTN_INIT_NEST_LOCK, 10);
+xaliasify(FTN_DESTROY_LOCK, 10);
+xaliasify(FTN_DESTROY_NEST_LOCK, 10);
+xaliasify(FTN_SET_LOCK, 10);
+xaliasify(FTN_SET_NEST_LOCK, 10);
+xaliasify(FTN_UNSET_LOCK, 10);
+xaliasify(FTN_UNSET_NEST_LOCK, 10);
+xaliasify(FTN_TEST_LOCK, 10);
+xaliasify(FTN_TEST_NEST_LOCK, 10);
+
+// OMP_2.0 aliases
+xaliasify(FTN_GET_WTICK, 20);
+xaliasify(FTN_GET_WTIME, 20);
+
+#if OMP_30_ENABLED
+// OMP_3.0 aliases
+xaliasify(FTN_SET_SCHEDULE, 30);
+xaliasify(FTN_GET_SCHEDULE, 30);
+xaliasify(FTN_GET_THREAD_LIMIT, 30);
+xaliasify(FTN_SET_MAX_ACTIVE_LEVELS, 30);
+xaliasify(FTN_GET_MAX_ACTIVE_LEVELS, 30);
+xaliasify(FTN_GET_LEVEL, 30);
+xaliasify(FTN_GET_ANCESTOR_THREAD_NUM, 30);
+xaliasify(FTN_GET_TEAM_SIZE, 30);
+xaliasify(FTN_GET_ACTIVE_LEVEL, 30);
+xaliasify(FTN_INIT_LOCK, 30);
+xaliasify(FTN_INIT_NEST_LOCK, 30);
+xaliasify(FTN_DESTROY_LOCK, 30);
+xaliasify(FTN_DESTROY_NEST_LOCK, 30);
+xaliasify(FTN_SET_LOCK, 30);
+xaliasify(FTN_SET_NEST_LOCK, 30);
+xaliasify(FTN_UNSET_LOCK, 30);
+xaliasify(FTN_UNSET_NEST_LOCK, 30);
+xaliasify(FTN_TEST_LOCK, 30);
+xaliasify(FTN_TEST_NEST_LOCK, 30);
+
+// OMP_3.1 aliases
+xaliasify(FTN_IN_FINAL, 31);
+#endif /* OMP_30_ENABLED */
+
+#if OMP_40_ENABLED
+// OMP_4.0 aliases
+xaliasify(FTN_GET_PROC_BIND, 40);
+xaliasify(FTN_GET_NUM_TEAMS, 40);
+xaliasify(FTN_GET_TEAM_NUM, 40);
+xaliasify(FTN_GET_CANCELLATION, 40);
+#endif /* OMP_40_ENABLED */
+
+#if OMP_41_ENABLED
+// OMP_4.1 aliases
+#endif
+
+#if OMP_50_ENABLED
+// OMP_5.0 aliases
+#endif
+
+// OMP_1.0 versioned symbols
+xversionify(FTN_SET_NUM_THREADS, 10, "OMP_1.0");
+xversionify(FTN_GET_NUM_THREADS, 10, "OMP_1.0");
+xversionify(FTN_GET_MAX_THREADS, 10, "OMP_1.0");
+xversionify(FTN_GET_THREAD_NUM, 10, "OMP_1.0");
+xversionify(FTN_GET_NUM_PROCS, 10, "OMP_1.0");
+xversionify(FTN_IN_PARALLEL, 10, "OMP_1.0");
+xversionify(FTN_SET_DYNAMIC, 10, "OMP_1.0");
+xversionify(FTN_GET_DYNAMIC, 10, "OMP_1.0");
+xversionify(FTN_SET_NESTED, 10, "OMP_1.0");
+xversionify(FTN_GET_NESTED, 10, "OMP_1.0");
+xversionify(FTN_INIT_LOCK, 10, "OMP_1.0");
+xversionify(FTN_INIT_NEST_LOCK, 10, "OMP_1.0");
+xversionify(FTN_DESTROY_LOCK, 10, "OMP_1.0");
+xversionify(FTN_DESTROY_NEST_LOCK, 10, "OMP_1.0");
+xversionify(FTN_SET_LOCK, 10, "OMP_1.0");
+xversionify(FTN_SET_NEST_LOCK, 10, "OMP_1.0");
+xversionify(FTN_UNSET_LOCK, 10, "OMP_1.0");
+xversionify(FTN_UNSET_NEST_LOCK, 10, "OMP_1.0");
+xversionify(FTN_TEST_LOCK, 10, "OMP_1.0");
+xversionify(FTN_TEST_NEST_LOCK, 10, "OMP_1.0");
+
+// OMP_2.0 versioned symbols
+xversionify(FTN_GET_WTICK, 20, "OMP_2.0");
+xversionify(FTN_GET_WTIME, 20, "OMP_2.0");
+
+#if OMP_30_ENABLED
+// OMP_3.0 versioned symbols
+xversionify(FTN_SET_SCHEDULE, 30, "OMP_3.0");
+xversionify(FTN_GET_SCHEDULE, 30, "OMP_3.0");
+xversionify(FTN_GET_THREAD_LIMIT, 30, "OMP_3.0");
+xversionify(FTN_SET_MAX_ACTIVE_LEVELS, 30, "OMP_3.0");
+xversionify(FTN_GET_MAX_ACTIVE_LEVELS, 30, "OMP_3.0");
+xversionify(FTN_GET_ANCESTOR_THREAD_NUM, 30, "OMP_3.0");
+xversionify(FTN_GET_LEVEL, 30, "OMP_3.0");
+xversionify(FTN_GET_TEAM_SIZE, 30, "OMP_3.0");
+xversionify(FTN_GET_ACTIVE_LEVEL, 30, "OMP_3.0");
+
+// the lock routines have a 1.0 and 3.0 version
+xversionify(FTN_INIT_LOCK, 30, "OMP_3.0");
+xversionify(FTN_INIT_NEST_LOCK, 30, "OMP_3.0");
+xversionify(FTN_DESTROY_LOCK, 30, "OMP_3.0");
+xversionify(FTN_DESTROY_NEST_LOCK, 30, "OMP_3.0");
+xversionify(FTN_SET_LOCK, 30, "OMP_3.0");
+xversionify(FTN_SET_NEST_LOCK, 30, "OMP_3.0");
+xversionify(FTN_UNSET_LOCK, 30, "OMP_3.0");
+xversionify(FTN_UNSET_NEST_LOCK, 30, "OMP_3.0");
+xversionify(FTN_TEST_LOCK, 30, "OMP_3.0");
+xversionify(FTN_TEST_NEST_LOCK, 30, "OMP_3.0");
+
+// OMP_3.1 versioned symbol
+xversionify(FTN_IN_FINAL, 31, "OMP_3.1");
+#endif /* OMP_30_ENABLED */
+
+#if OMP_40_ENABLED
+// OMP_4.0 versioned symbols
+xversionify(FTN_GET_PROC_BIND, 40, "OMP_4.0");
+xversionify(FTN_GET_NUM_TEAMS, 40, "OMP_4.0");
+xversionify(FTN_GET_TEAM_NUM, 40, "OMP_4.0");
+xversionify(FTN_GET_CANCELLATION, 40, "OMP_4.0");
+#endif /* OMP_40_ENABLED */
+
+#if OMP_41_ENABLED
+// OMP_4.1 versioned symbols
+#endif
+
+#if OMP_50_ENABLED
+// OMP_5.0 versioned symbols
+#endif
+
+#endif /* KMP_OS_LINUX */
+
#ifdef __cplusplus
} //extern "C"
#endif // __cplusplus
diff --git a/openmp/runtime/src/kmp_ftn_extra.c b/openmp/runtime/src/kmp_ftn_extra.c
index 6e1bb7e..6777e01 100644
--- a/openmp/runtime/src/kmp_ftn_extra.c
+++ b/openmp/runtime/src/kmp_ftn_extra.c
@@ -1,7 +1,7 @@
/*
* kmp_ftn_extra.c -- Fortran 'extra' linkage support for OpenMP.
- * $Revision: 42061 $
- * $Date: 2013-02-28 16:36:24 -0600 (Thu, 28 Feb 2013) $
+ * $Revision: 42757 $
+ * $Date: 2013-10-18 08:20:57 -0500 (Fri, 18 Oct 2013) $
*/
@@ -17,21 +17,19 @@
#include "kmp.h"
-// Note: This string is not printed when KMP_VERSION=1.
-char const __kmp_version_ftnextra[] = KMP_VERSION_PREFIX "Fortran \"extra\" OMP support: "
-#ifdef USE_FTN_EXTRA
- "yes";
-#else
- "no";
+#if KMP_OS_WINDOWS
+# define KMP_FTN_ENTRIES KMP_FTN_PLAIN
+#elif KMP_OS_UNIX
+# define KMP_FTN_ENTRIES KMP_FTN_APPEND
#endif
-#ifdef USE_FTN_EXTRA
-
-#define FTN_STDCALL /* nothing to do */
-#define KMP_FTN_ENTRIES USE_FTN_EXTRA
-
-#include "kmp_ftn_os.h"
-#include "kmp_ftn_entry.h"
-
-#endif /* USE_FTN_EXTRA */
-
+// Note: This string is not printed when KMP_VERSION=1.
+char const __kmp_version_ftnextra[] = KMP_VERSION_PREFIX "Fortran \"extra\" OMP support: "
+#ifdef KMP_FTN_ENTRIES
+ "yes";
+# define FTN_STDCALL /* nothing to do */
+# include "kmp_ftn_os.h"
+# include "kmp_ftn_entry.h"
+#else
+ "no";
+#endif /* KMP_FTN_ENTRIES */
diff --git a/openmp/runtime/src/kmp_ftn_os.h b/openmp/runtime/src/kmp_ftn_os.h
index c52ca1e..f241751 100644
--- a/openmp/runtime/src/kmp_ftn_os.h
+++ b/openmp/runtime/src/kmp_ftn_os.h
@@ -1,7 +1,7 @@
/*
* kmp_ftn_os.h -- KPTS Fortran defines header file.
- * $Revision: 42478 $
- * $Date: 2013-07-02 15:15:08 -0500 (Tue, 02 Jul 2013) $
+ * $Revision: 42745 $
+ * $Date: 2013-10-14 17:02:04 -0500 (Mon, 14 Oct 2013) $
*/
@@ -105,6 +105,11 @@
#endif
#endif
+#if OMP_40_ENABLED
+ #define FTN_GET_CANCELLATION omp_get_cancellation
+ #define FTN_GET_CANCELLATION_STATUS kmp_get_cancellation_status
+#endif
+
#endif /* KMP_FTN_PLAIN */
/* ------------------------------------------------------------------------ */
@@ -192,6 +197,11 @@
#endif
+#if OMP_40_ENABLED
+ #define FTN_GET_CANCELLATION omp_get_cancellation_
+ #define FTN_GET_CANCELLATION_STATUS kmp_get_cancellation_status_
+#endif
+
#endif /* KMP_FTN_APPEND */
/* ------------------------------------------------------------------------ */
@@ -279,6 +289,11 @@
#endif
+#if OMP_40_ENABLED
+ #define FTN_GET_CANCELLATION OMP_GET_CANCELLATION
+ #define FTN_GET_CANCELLATION_STATUS KMP_GET_CANCELLATION_STATUS
+#endif
+
#endif /* KMP_FTN_UPPER */
/* ------------------------------------------------------------------------ */
@@ -366,7 +381,134 @@
#endif
+#if OMP_40_ENABLED
+ #define FTN_GET_CANCELLATION OMP_GET_CANCELLATION_
+ #define FTN_GET_CANCELLATION_STATUS KMP_GET_CANCELLATION_STATUS_
+#endif
+
#endif /* KMP_FTN_UAPPEND */
+/* ------------------------------------------------------------------ */
+/* -------------------------- GOMP API NAMES ------------------------ */
+// All GOMP_1.0 symbols
+#define KMP_API_NAME_GOMP_ATOMIC_END GOMP_atomic_end
+#define KMP_API_NAME_GOMP_ATOMIC_START GOMP_atomic_start
+#define KMP_API_NAME_GOMP_BARRIER GOMP_barrier
+#define KMP_API_NAME_GOMP_CRITICAL_END GOMP_critical_end
+#define KMP_API_NAME_GOMP_CRITICAL_NAME_END GOMP_critical_name_end
+#define KMP_API_NAME_GOMP_CRITICAL_NAME_START GOMP_critical_name_start
+#define KMP_API_NAME_GOMP_CRITICAL_START GOMP_critical_start
+#define KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT GOMP_loop_dynamic_next
+#define KMP_API_NAME_GOMP_LOOP_DYNAMIC_START GOMP_loop_dynamic_start
+#define KMP_API_NAME_GOMP_LOOP_END GOMP_loop_end
+#define KMP_API_NAME_GOMP_LOOP_END_NOWAIT GOMP_loop_end_nowait
+#define KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT GOMP_loop_guided_next
+#define KMP_API_NAME_GOMP_LOOP_GUIDED_START GOMP_loop_guided_start
+#define KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT GOMP_loop_ordered_dynamic_next
+#define KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START GOMP_loop_ordered_dynamic_start
+#define KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT GOMP_loop_ordered_guided_next
+#define KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START GOMP_loop_ordered_guided_start
+#define KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT GOMP_loop_ordered_runtime_next
+#define KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START GOMP_loop_ordered_runtime_start
+#define KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT GOMP_loop_ordered_static_next
+#define KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START GOMP_loop_ordered_static_start
+#define KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT GOMP_loop_runtime_next
+#define KMP_API_NAME_GOMP_LOOP_RUNTIME_START GOMP_loop_runtime_start
+#define KMP_API_NAME_GOMP_LOOP_STATIC_NEXT GOMP_loop_static_next
+#define KMP_API_NAME_GOMP_LOOP_STATIC_START GOMP_loop_static_start
+#define KMP_API_NAME_GOMP_ORDERED_END GOMP_ordered_end
+#define KMP_API_NAME_GOMP_ORDERED_START GOMP_ordered_start
+#define KMP_API_NAME_GOMP_PARALLEL_END GOMP_parallel_end
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START GOMP_parallel_loop_dynamic_start
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START GOMP_parallel_loop_guided_start
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START GOMP_parallel_loop_runtime_start
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START GOMP_parallel_loop_static_start
+#define KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START GOMP_parallel_sections_start
+#define KMP_API_NAME_GOMP_PARALLEL_START GOMP_parallel_start
+#define KMP_API_NAME_GOMP_SECTIONS_END GOMP_sections_end
+#define KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT GOMP_sections_end_nowait
+#define KMP_API_NAME_GOMP_SECTIONS_NEXT GOMP_sections_next
+#define KMP_API_NAME_GOMP_SECTIONS_START GOMP_sections_start
+#define KMP_API_NAME_GOMP_SINGLE_COPY_END GOMP_single_copy_end
+#define KMP_API_NAME_GOMP_SINGLE_COPY_START GOMP_single_copy_start
+#define KMP_API_NAME_GOMP_SINGLE_START GOMP_single_start
+
+// All GOMP_2.0 symbols
+#define KMP_API_NAME_GOMP_TASK GOMP_task
+#define KMP_API_NAME_GOMP_TASKWAIT GOMP_taskwait
+#define KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT GOMP_loop_ull_dynamic_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START GOMP_loop_ull_dynamic_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT GOMP_loop_ull_guided_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START GOMP_loop_ull_guided_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT GOMP_loop_ull_ordered_dynamic_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START GOMP_loop_ull_ordered_dynamic_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT GOMP_loop_ull_ordered_guided_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START GOMP_loop_ull_ordered_guided_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT GOMP_loop_ull_ordered_runtime_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START GOMP_loop_ull_ordered_runtime_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT GOMP_loop_ull_ordered_static_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START GOMP_loop_ull_ordered_static_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT GOMP_loop_ull_runtime_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START GOMP_loop_ull_runtime_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT GOMP_loop_ull_static_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START GOMP_loop_ull_static_start
+
+// All GOMP_3.0 symbols
+#define KMP_API_NAME_GOMP_TASKYIELD GOMP_taskyield
+
+// All GOMP_4.0 symbols
+// TODO: As of 2013-10-14, none of the GOMP_4.0 functions are implemented in libiomp5
+#define KMP_API_NAME_GOMP_BARRIER_CANCEL GOMP_barrier_cancel
+#define KMP_API_NAME_GOMP_CANCEL GOMP_cancel
+#define KMP_API_NAME_GOMP_CANCELLATION_POINT GOMP_cancellation_point
+#define KMP_API_NAME_GOMP_LOOP_END_CANCEL GOMP_loop_end_cancel
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC GOMP_parallel_loop_dynamic
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED GOMP_parallel_loop_guided
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME GOMP_parallel_loop_runtime
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC GOMP_parallel_loop_static
+#define KMP_API_NAME_GOMP_PARALLEL_SECTIONS GOMP_parallel_sections
+#define KMP_API_NAME_GOMP_PARALLEL GOMP_parallel
+#define KMP_API_NAME_GOMP_SECTIONS_END_CANCEL GOMP_sections_end_cancel
+#define KMP_API_NAME_GOMP_TASKGROUP_START GOMP_taskgroup_start
+#define KMP_API_NAME_GOMP_TASKGROUP_END GOMP_taskgroup_end
+/* Target functions should be taken care of by liboffload */
+//#define KMP_API_NAME_GOMP_TARGET GOMP_target
+//#define KMP_API_NAME_GOMP_TARGET_DATA GOMP_target_data
+//#define KMP_API_NAME_GOMP_TARGET_END_DATA GOMP_target_end_data
+//#define KMP_API_NAME_GOMP_TARGET_UPDATE GOMP_target_update
+#define KMP_API_NAME_GOMP_TEAMS GOMP_teams
+
+#if KMP_OS_LINUX
+ #define xstr(x) str(x)
+ #define str(x) #x
+
+ // If Linux, xexpand prepends __kmp_api_ to the real API name
+ #define xexpand(api_name) expand(api_name)
+ #define expand(api_name) __kmp_api_##api_name
+
+ #define xaliasify(api_name,ver) aliasify(api_name,ver)
+ #define aliasify(api_name,ver) __typeof__(__kmp_api_##api_name) __kmp_api_##api_name##_##ver##_alias __attribute__((alias(xstr(__kmp_api_##api_name))))
+
+ #define xversionify(api_name, version_num, version_str) versionify(api_name, version_num, version_str, "VERSION")
+ #define versionify(api_name, version_num, version_str, default_ver) \
+ __asm__(".symver " xstr(__kmp_api_##api_name##_##version_num##_alias) "," xstr(api_name) "@" version_str "\n\t"); \
+ __asm__(".symver " xstr(__kmp_api_##api_name) "," xstr(api_name) "@@" default_ver "\n\t")
+
+#else /* KMP_OS_LINUX */
+ #define xstr(x) /* Nothing */
+ #define str(x) /* Nothing */
+
+ // if Windows or Mac, xexpand does no name transformation
+ #define xexpand(api_name) expand(api_name)
+ #define expand(api_name) api_name
+
+ #define xaliasify(api_name,ver) /* Nothing */
+ #define aliasify(api_name,ver) /* Nothing */
+
+ #define xversionify(api_name, version_num, version_str) /* Nothing */
+ #define versionify(api_name, version_num, version_str, default_ver) /* Nothing */
+
+#endif /* KMP_OS_LINUX */
+
#endif /* KMP_FTN_OS_H */
diff --git a/openmp/runtime/src/kmp_global.c b/openmp/runtime/src/kmp_global.c
index db81764..b27b1716 100644
--- a/openmp/runtime/src/kmp_global.c
+++ b/openmp/runtime/src/kmp_global.c
@@ -1,7 +1,7 @@
/*
* kmp_global.c -- KPTS global variables for runtime support library
- * $Revision: 42642 $
- * $Date: 2013-09-06 01:57:24 -0500 (Fri, 06 Sep 2013) $
+ * $Revision: 42816 $
+ * $Date: 2013-11-11 15:33:37 -0600 (Mon, 11 Nov 2013) $
*/
@@ -24,7 +24,6 @@
kmp_key_t __kmp_gtid_threadprivate_key;
kmp_cpuinfo_t __kmp_cpuinfo = { 0 }; // Not initialized
-kmp_uint64 __kmp_cpu_frequency = 0;
/* ----------------------------------------------------- */
@@ -181,6 +180,7 @@
#if OMP_40_ENABLED
int __kmp_display_env = FALSE;
int __kmp_display_env_verbose = FALSE;
+int __kmp_omp_cancellation = FALSE;
#endif
/* map OMP 3.0 schedule types with our internal schedule types */
@@ -266,9 +266,6 @@
#if USE_ITT_BUILD
int __kmp_forkjoin_frames = 1;
int __kmp_forkjoin_frames_mode = 0;
-FILE * __kmp_itt_csv_file;
-kmp_str_buf_t __kmp_itt_frame_buffer;
-
#endif
PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method = reduction_method_not_defined;
int __kmp_determ_red = FALSE;
diff --git a/openmp/runtime/src/kmp_gsupport.c b/openmp/runtime/src/kmp_gsupport.c
index 33e8fba..9d8e553 100644
--- a/openmp/runtime/src/kmp_gsupport.c
+++ b/openmp/runtime/src/kmp_gsupport.c
@@ -1,7 +1,7 @@
/*
* kmp_gsupport.c
- * $Revision: 42181 $
- * $Date: 2013-03-26 15:04:45 -0500 (Tue, 26 Mar 2013) $
+ * $Revision: 42810 $
+ * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
*/
@@ -28,9 +28,10 @@
#define MKLOC(loc,routine) \
static ident_t (loc) = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;" };
+#include "kmp_ftn_os.h"
void
-GOMP_barrier(void)
+xexpand(KMP_API_NAME_GOMP_BARRIER)(void)
{
int gtid = __kmp_entry_gtid();
MKLOC(loc, "GOMP_barrier");
@@ -58,7 +59,7 @@
void
-GOMP_critical_start(void)
+xexpand(KMP_API_NAME_GOMP_CRITICAL_START)(void)
{
int gtid = __kmp_entry_gtid();
MKLOC(loc, "GOMP_critical_start");
@@ -68,7 +69,7 @@
void
-GOMP_critical_end(void)
+xexpand(KMP_API_NAME_GOMP_CRITICAL_END)(void)
{
int gtid = __kmp_get_gtid();
MKLOC(loc, "GOMP_critical_end");
@@ -78,7 +79,7 @@
void
-GOMP_critical_name_start(void **pptr)
+xexpand(KMP_API_NAME_GOMP_CRITICAL_NAME_START)(void **pptr)
{
int gtid = __kmp_entry_gtid();
MKLOC(loc, "GOMP_critical_name_start");
@@ -88,7 +89,7 @@
void
-GOMP_critical_name_end(void **pptr)
+xexpand(KMP_API_NAME_GOMP_CRITICAL_NAME_END)(void **pptr)
{
int gtid = __kmp_get_gtid();
MKLOC(loc, "GOMP_critical_name_end");
@@ -104,7 +105,7 @@
//
void
-GOMP_atomic_start(void)
+xexpand(KMP_API_NAME_GOMP_ATOMIC_START)(void)
{
int gtid = __kmp_entry_gtid();
KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid));
@@ -113,7 +114,7 @@
void
-GOMP_atomic_end(void)
+xexpand(KMP_API_NAME_GOMP_ATOMIC_END)(void)
{
int gtid = __kmp_get_gtid();
KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid));
@@ -122,7 +123,7 @@
int
-GOMP_single_start(void)
+xexpand(KMP_API_NAME_GOMP_SINGLE_START)(void)
{
int gtid = __kmp_entry_gtid();
MKLOC(loc, "GOMP_single_start");
@@ -141,7 +142,7 @@
void *
-GOMP_single_copy_start(void)
+xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void)
{
void *retval;
int gtid = __kmp_entry_gtid();
@@ -176,7 +177,7 @@
void
-GOMP_single_copy_end(void *data)
+xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_END)(void *data)
{
int gtid = __kmp_get_gtid();
MKLOC(loc, "GOMP_single_copy_end");
@@ -196,7 +197,7 @@
void
-GOMP_ordered_start(void)
+xexpand(KMP_API_NAME_GOMP_ORDERED_START)(void)
{
int gtid = __kmp_entry_gtid();
MKLOC(loc, "GOMP_ordered_start");
@@ -206,7 +207,7 @@
void
-GOMP_ordered_end(void)
+xexpand(KMP_API_NAME_GOMP_ORDERED_END)(void)
{
int gtid = __kmp_get_gtid();
MKLOC(loc, "GOMP_ordered_end");
@@ -223,7 +224,7 @@
// (IA-32 architecture) or 64-bit signed (Intel(R) 64).
//
-#if KMP_ARCH_X86
+#if KMP_ARCH_X86 || KMP_ARCH_ARM
# define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_4
# define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_4
# define KMP_DISPATCH_NEXT __kmpc_dispatch_next_4
@@ -287,7 +288,7 @@
va_start(ap, argc);
rc = __kmp_fork_call(loc, gtid, FALSE, argc, wrapper, __kmp_invoke_task_func,
-#if KMP_ARCH_X86_64 && KMP_OS_LINUX
+#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
&ap
#else
ap
@@ -305,7 +306,7 @@
void
-GOMP_parallel_start(void (*task)(void *), void *data, unsigned num_threads)
+xexpand(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data, unsigned num_threads)
{
int gtid = __kmp_entry_gtid();
MKLOC(loc, "GOMP_parallel_start");
@@ -325,7 +326,7 @@
void
-GOMP_parallel_end(void)
+xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void)
{
int gtid = __kmp_get_gtid();
MKLOC(loc, "GOMP_parallel_end");
@@ -457,31 +458,31 @@
}
-LOOP_START(GOMP_loop_static_start, kmp_sch_static)
-LOOP_NEXT(GOMP_loop_static_next, {})
-LOOP_START(GOMP_loop_dynamic_start, kmp_sch_dynamic_chunked)
-LOOP_NEXT(GOMP_loop_dynamic_next, {})
-LOOP_START(GOMP_loop_guided_start, kmp_sch_guided_chunked)
-LOOP_NEXT(GOMP_loop_guided_next, {})
-LOOP_RUNTIME_START(GOMP_loop_runtime_start, kmp_sch_runtime)
-LOOP_NEXT(GOMP_loop_runtime_next, {})
+LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_STATIC_START), kmp_sch_static)
+LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT), {})
+LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START), kmp_sch_dynamic_chunked)
+LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT), {})
+LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_GUIDED_START), kmp_sch_guided_chunked)
+LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT), {})
+LOOP_RUNTIME_START(xexpand(KMP_API_NAME_GOMP_LOOP_RUNTIME_START), kmp_sch_runtime)
+LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT), {})
-LOOP_START(GOMP_loop_ordered_static_start, kmp_ord_static)
-LOOP_NEXT(GOMP_loop_ordered_static_next, \
+LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START), kmp_ord_static)
+LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT), \
{ KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
-LOOP_START(GOMP_loop_ordered_dynamic_start, kmp_ord_dynamic_chunked)
-LOOP_NEXT(GOMP_loop_ordered_dynamic_next, \
+LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START), kmp_ord_dynamic_chunked)
+LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT), \
{ KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
-LOOP_START(GOMP_loop_ordered_guided_start, kmp_ord_guided_chunked)
-LOOP_NEXT(GOMP_loop_ordered_guided_next, \
+LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START), kmp_ord_guided_chunked)
+LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT), \
{ KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
-LOOP_RUNTIME_START(GOMP_loop_ordered_runtime_start, kmp_ord_runtime)
-LOOP_NEXT(GOMP_loop_ordered_runtime_next, \
+LOOP_RUNTIME_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START), kmp_ord_runtime)
+LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT), \
{ KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
void
-GOMP_loop_end(void)
+xexpand(KMP_API_NAME_GOMP_LOOP_END)(void)
{
int gtid = __kmp_get_gtid();
KA_TRACE(20, ("GOMP_loop_end: T#%d\n", gtid))
@@ -493,7 +494,7 @@
void
-GOMP_loop_end_nowait(void)
+xexpand(KMP_API_NAME_GOMP_LOOP_END_NOWAIT)(void)
{
KA_TRACE(20, ("GOMP_loop_end_nowait: T#%d\n", __kmp_get_gtid()))
}
@@ -598,26 +599,26 @@
}
-LOOP_START_ULL(GOMP_loop_ull_static_start, kmp_sch_static)
-LOOP_NEXT_ULL(GOMP_loop_ull_static_next, {})
-LOOP_START_ULL(GOMP_loop_ull_dynamic_start, kmp_sch_dynamic_chunked)
-LOOP_NEXT_ULL(GOMP_loop_ull_dynamic_next, {})
-LOOP_START_ULL(GOMP_loop_ull_guided_start, kmp_sch_guided_chunked)
-LOOP_NEXT_ULL(GOMP_loop_ull_guided_next, {})
-LOOP_RUNTIME_START_ULL(GOMP_loop_ull_runtime_start, kmp_sch_runtime)
-LOOP_NEXT_ULL(GOMP_loop_ull_runtime_next, {})
+LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START), kmp_sch_static)
+LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT), {})
+LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START), kmp_sch_dynamic_chunked)
+LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT), {})
+LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START), kmp_sch_guided_chunked)
+LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT), {})
+LOOP_RUNTIME_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START), kmp_sch_runtime)
+LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT), {})
-LOOP_START_ULL(GOMP_loop_ull_ordered_static_start, kmp_ord_static)
-LOOP_NEXT_ULL(GOMP_loop_ull_ordered_static_next, \
+LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START), kmp_ord_static)
+LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT), \
{ KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
-LOOP_START_ULL(GOMP_loop_ull_ordered_dynamic_start, kmp_ord_dynamic_chunked)
-LOOP_NEXT_ULL(GOMP_loop_ull_ordered_dynamic_next, \
+LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START), kmp_ord_dynamic_chunked)
+LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT), \
{ KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
-LOOP_START_ULL(GOMP_loop_ull_ordered_guided_start, kmp_ord_guided_chunked)
-LOOP_NEXT_ULL(GOMP_loop_ull_ordered_guided_next, \
+LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START), kmp_ord_guided_chunked)
+LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT), \
{ KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
-LOOP_RUNTIME_START_ULL(GOMP_loop_ull_ordered_runtime_start, kmp_ord_runtime)
-LOOP_NEXT_ULL(GOMP_loop_ull_ordered_runtime_next, \
+LOOP_RUNTIME_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START), kmp_ord_runtime)
+LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT), \
{ KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
@@ -659,10 +660,10 @@
}
-PARALLEL_LOOP_START(GOMP_parallel_loop_static_start, kmp_sch_static)
-PARALLEL_LOOP_START(GOMP_parallel_loop_dynamic_start, kmp_sch_dynamic_chunked)
-PARALLEL_LOOP_START(GOMP_parallel_loop_guided_start, kmp_sch_guided_chunked)
-PARALLEL_LOOP_START(GOMP_parallel_loop_runtime_start, kmp_sch_runtime)
+PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START), kmp_sch_static)
+PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START), kmp_sch_dynamic_chunked)
+PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START), kmp_sch_guided_chunked)
+PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START), kmp_sch_runtime)
#if OMP_30_ENABLED
@@ -674,7 +675,7 @@
//
void
-GOMP_task(void (*func)(void *), void *data, void (*copy_func)(void *, void *),
+xexpand(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data, void (*copy_func)(void *, void *),
long arg_size, long arg_align, int if_cond, unsigned gomp_flags)
{
MKLOC(loc, "GOMP_task");
@@ -728,7 +729,7 @@
void
-GOMP_taskwait(void)
+xexpand(KMP_API_NAME_GOMP_TASKWAIT)(void)
{
MKLOC(loc, "GOMP_taskwait");
int gtid = __kmp_entry_gtid();
@@ -759,7 +760,7 @@
//
unsigned
-GOMP_sections_start(unsigned count)
+xexpand(KMP_API_NAME_GOMP_SECTIONS_START)(unsigned count)
{
int status;
kmp_int lb, ub, stride;
@@ -786,7 +787,7 @@
unsigned
-GOMP_sections_next(void)
+xexpand(KMP_API_NAME_GOMP_SECTIONS_NEXT)(void)
{
int status;
kmp_int lb, ub, stride;
@@ -811,7 +812,7 @@
void
-GOMP_parallel_sections_start(void (*task) (void *), void *data,
+xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(void (*task) (void *), void *data,
unsigned num_threads, unsigned count)
{
int gtid = __kmp_entry_gtid();
@@ -839,7 +840,7 @@
void
-GOMP_sections_end(void)
+xexpand(KMP_API_NAME_GOMP_SECTIONS_END)(void)
{
int gtid = __kmp_get_gtid();
KA_TRACE(20, ("GOMP_sections_end: T#%d\n", gtid))
@@ -851,11 +852,175 @@
void
-GOMP_sections_end_nowait(void)
+xexpand(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT)(void)
{
KA_TRACE(20, ("GOMP_sections_end_nowait: T#%d\n", __kmp_get_gtid()))
}
+// libgomp has an empty function for GOMP_taskyield as of 2013-10-10
+void
+xexpand(KMP_API_NAME_GOMP_TASKYIELD)(void)
+{
+
+}
+
+/*
+ The following sections of code create aliases for the GOMP_* functions,
+ then create versioned symbols using the assembler directive .symver.
+ This is only pertinent for ELF .so library
+ xaliasify and xversionify are defined in kmp_ftn_os.h
+*/
+
+#if KMP_OS_LINUX
+
+// GOMP_1.0 aliases
+xaliasify(KMP_API_NAME_GOMP_ATOMIC_END, 10);
+xaliasify(KMP_API_NAME_GOMP_ATOMIC_START, 10);
+xaliasify(KMP_API_NAME_GOMP_BARRIER, 10);
+xaliasify(KMP_API_NAME_GOMP_CRITICAL_END, 10);
+xaliasify(KMP_API_NAME_GOMP_CRITICAL_NAME_END, 10);
+xaliasify(KMP_API_NAME_GOMP_CRITICAL_NAME_START, 10);
+xaliasify(KMP_API_NAME_GOMP_CRITICAL_START, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_END, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_END_NOWAIT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_GUIDED_START, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_RUNTIME_START, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_LOOP_STATIC_START, 10);
+xaliasify(KMP_API_NAME_GOMP_ORDERED_END, 10);
+xaliasify(KMP_API_NAME_GOMP_ORDERED_START, 10);
+xaliasify(KMP_API_NAME_GOMP_PARALLEL_END, 10);
+xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START, 10);
+xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START, 10);
+xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START, 10);
+xaliasify(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START, 10);
+xaliasify(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START, 10);
+xaliasify(KMP_API_NAME_GOMP_PARALLEL_START, 10);
+xaliasify(KMP_API_NAME_GOMP_SECTIONS_END, 10);
+xaliasify(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT, 10);
+xaliasify(KMP_API_NAME_GOMP_SECTIONS_NEXT, 10);
+xaliasify(KMP_API_NAME_GOMP_SECTIONS_START, 10);
+xaliasify(KMP_API_NAME_GOMP_SINGLE_COPY_END, 10);
+xaliasify(KMP_API_NAME_GOMP_SINGLE_COPY_START, 10);
+xaliasify(KMP_API_NAME_GOMP_SINGLE_START, 10);
+
+// GOMP_2.0 aliases
+#if OMP_30_ENABLED
+xaliasify(KMP_API_NAME_GOMP_TASK, 20);
+xaliasify(KMP_API_NAME_GOMP_TASKWAIT, 20);
+#endif
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT, 20);
+xaliasify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START, 20);
+
+// GOMP_3.0 aliases
+xaliasify(KMP_API_NAME_GOMP_TASKYIELD, 30);
+
+// GOMP_4.0 aliases
+/* TODO: add GOMP_4.0 aliases when corresponding
+ GOMP_* functions are implemented
+*/
+
+// GOMP_1.0 versioned symbols
+xversionify(KMP_API_NAME_GOMP_ATOMIC_END, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_ATOMIC_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_BARRIER, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_CRITICAL_END, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_CRITICAL_NAME_END, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_CRITICAL_NAME_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_CRITICAL_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_END, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_END_NOWAIT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_GUIDED_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_RUNTIME_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_STATIC_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_ORDERED_END, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_ORDERED_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_PARALLEL_END, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_PARALLEL_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_SECTIONS_END, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_SECTIONS_NEXT, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_SECTIONS_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_SINGLE_COPY_END, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_SINGLE_COPY_START, 10, "GOMP_1.0");
+xversionify(KMP_API_NAME_GOMP_SINGLE_START, 10, "GOMP_1.0");
+
+// GOMP_2.0 versioned symbols
+#if OMP_30_ENABLED
+xversionify(KMP_API_NAME_GOMP_TASK, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_TASKWAIT, 20, "GOMP_2.0");
+#endif
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT, 20, "GOMP_2.0");
+xversionify(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START, 20, "GOMP_2.0");
+
+// GOMP_3.0 versioned symbols
+xversionify(KMP_API_NAME_GOMP_TASKYIELD, 30, "GOMP_3.0");
+
+// GOMP_4.0 versioned symbols
+/* TODO: add GOMP_4.0 versioned symbols when corresponding
+ GOMP_* functions are implemented
+*/
+
+#endif /* KMP_OS_LINUX */
+
#ifdef __cplusplus
} //extern "C"
#endif // __cplusplus
diff --git a/openmp/runtime/src/kmp_i18n.c b/openmp/runtime/src/kmp_i18n.c
index e23e9f1..5cca6e8 100644
--- a/openmp/runtime/src/kmp_i18n.c
+++ b/openmp/runtime/src/kmp_i18n.c
@@ -1,7 +1,7 @@
/*
* kmp_i18n.c
- * $Revision: 42181 $
- * $Date: 2013-03-26 15:04:45 -0500 (Tue, 26 Mar 2013) $
+ * $Revision: 42810 $
+ * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
*/
@@ -668,7 +668,7 @@
void
__kmp_i18n_dump_catalog(
- kmp_str_buf_t & buffer
+ kmp_str_buf_t * buffer
) {
struct kmp_i18n_id_range_t {
@@ -676,7 +676,7 @@
kmp_i18n_id_t last;
}; // struct kmp_i18n_id_range_t
- static kmp_i18n_id_range_t ranges[] = {
+ static struct kmp_i18n_id_range_t ranges[] = {
{ kmp_i18n_prp_first, kmp_i18n_prp_last },
{ kmp_i18n_str_first, kmp_i18n_str_last },
{ kmp_i18n_fmt_first, kmp_i18n_fmt_last },
@@ -684,18 +684,20 @@
{ kmp_i18n_hnt_first, kmp_i18n_hnt_last }
}; // ranges
- int num_of_ranges = sizeof( ranges ) / sizeof( kmp_i18n_id_range_t );
+ int num_of_ranges = sizeof( ranges ) / sizeof( struct kmp_i18n_id_range_t );
int range;
kmp_i18n_id_t id;
for ( range = 0; range < num_of_ranges; ++ range ) {
- __kmp_str_buf_print( & buffer, "*** Set #%d ***\n", range + 1 );
- for ( id = kmp_i18n_id_t( ranges[ range ].first + 1 ); id < ranges[ range ].last; id = kmp_i18n_id_t( id + 1 ) ) {
- __kmp_str_buf_print( & buffer, "%d: <<%s>>\n", id, __kmp_i18n_catgets( id ) );
+ __kmp_str_buf_print( buffer, "*** Set #%d ***\n", range + 1 );
+ for ( id = (kmp_i18n_id_t)( ranges[ range ].first + 1 );
+ id < ranges[ range ].last;
+ id = (kmp_i18n_id_t)( id + 1 ) ) {
+ __kmp_str_buf_print( buffer, "%d: <<%s>>\n", id, __kmp_i18n_catgets( id ) );
}; // for id
}; // for range
- __kmp_printf( "%s", buffer.str );
+ __kmp_printf( "%s", buffer->str );
} // __kmp_i18n_dump_catalog
diff --git a/openmp/runtime/src/kmp_i18n.h b/openmp/runtime/src/kmp_i18n.h
index 9392e62..fea8de4 100644
--- a/openmp/runtime/src/kmp_i18n.h
+++ b/openmp/runtime/src/kmp_i18n.h
@@ -1,7 +1,7 @@
/*
* kmp_i18n.h
- * $Revision: 42061 $
- * $Date: 2013-02-28 16:36:24 -0600 (Thu, 28 Feb 2013) $
+ * $Revision: 42810 $
+ * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
*/
@@ -183,7 +183,7 @@
}
#ifdef KMP_DEBUG
- void __kmp_i18n_dump_catalog( kmp_str_buf_t & buffer );
+ void __kmp_i18n_dump_catalog( kmp_str_buf_t * buffer );
#endif // KMP_DEBUG
#ifdef __cplusplus
diff --git a/openmp/runtime/src/kmp_itt.h b/openmp/runtime/src/kmp_itt.h
index ced8fc8..0ee79b6 100644
--- a/openmp/runtime/src/kmp_itt.h
+++ b/openmp/runtime/src/kmp_itt.h
@@ -1,8 +1,8 @@
#if USE_ITT_BUILD
/*
* kmp_itt.h -- ITT Notify interface.
- * $Revision: 42616 $
- * $Date: 2013-08-26 11:47:32 -0500 (Mon, 26 Aug 2013) $
+ * $Revision: 42829 $
+ * $Date: 2013-11-21 05:44:01 -0600 (Thu, 21 Nov 2013) $
*/
@@ -59,6 +59,9 @@
__kmp_inline void __kmp_itt_region_joined( int gtid, int serialized = 0 ); // Master only, after joining threads.
// (*) Note: A thread may execute tasks after this point, though.
+// --- Frame reporting ---
+__kmp_inline void __kmp_itt_frame_submit( int gtid, __itt_timestamp begin, __itt_timestamp end, int imbalance, ident_t *loc );
+
// --- Barrier reporting ---
__kmp_inline void * __kmp_itt_barrier_object( int gtid, int bt, int set_name = 0, int delta = 0 );
__kmp_inline void __kmp_itt_barrier_starting( int gtid, void * object );
@@ -265,6 +268,6 @@
# define KMP_ITT_IGNORE(stmt ) do { stmt } while (0)
-# define USE_ITT_BUILD_ARG(x)
+# define USE_ITT_BUILD_ARG(x)
#endif /* USE_ITT_BUILD */
diff --git a/openmp/runtime/src/kmp_itt.inl b/openmp/runtime/src/kmp_itt.inl
index bedcca1..6976e7f 100644
--- a/openmp/runtime/src/kmp_itt.inl
+++ b/openmp/runtime/src/kmp_itt.inl
@@ -1,8 +1,8 @@
#if USE_ITT_BUILD
/*
* kmp_itt.inl -- Inline functions of ITT Notify.
- * $Revision: 42616 $
- * $Date: 2013-08-26 11:47:32 -0500 (Mon, 26 Aug 2013) $
+ * $Revision: 42866 $
+ * $Date: 2013-12-10 15:15:58 -0600 (Tue, 10 Dec 2013) $
*/
@@ -49,6 +49,20 @@
# define LINKAGE static inline
#endif
+// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses this
+// API to support user-defined synchronization primitives, but does not use ZCA;
+// it would be safe to turn this off until wider support becomes available.
+#if USE_ITT_ZCA
+#ifdef __INTEL_COMPILER
+# if __INTEL_COMPILER >= 1200
+# undef __itt_sync_acquired
+# undef __itt_sync_releasing
+# define __itt_sync_acquired(addr) __notify_zc_intrinsic((char *)"sync_acquired", addr)
+# define __itt_sync_releasing(addr) __notify_intrinsic((char *)"sync_releasing", addr)
+# endif
+#endif
+#endif
+
/*
------------------------------------------------------------------------------------------------
Parallel region reporting.
@@ -79,10 +93,6 @@
#if USE_ITT_NOTIFY
kmp_team_t * team = __kmp_team_from_gtid( gtid );
#if OMP_30_ENABLED
- KMP_ITT_DEBUG_LOCK();
- KMP_ITT_DEBUG_PRINT( "[frm beg] gtid=%d, idx=%d, serialized:%d, empty:%d\n", gtid,
- __kmp_threads[gtid]->th.th_ident->reserved_2 - 1, serialized,
- (team->t.t_active_level + serialized > 1) );
if (team->t.t_active_level + serialized > 1)
#endif
{
@@ -116,13 +126,19 @@
str_loc.line, str_loc.col);
__kmp_str_loc_free( &str_loc );
+ __itt_suppress_push(__itt_suppress_memory_errors);
__kmp_itt_domains[ frm ] = __itt_domain_create( buff );
+ __itt_suppress_pop();
+
__kmp_str_free( &buff );
__itt_frame_begin_v3(__kmp_itt_domains[ frm ], NULL);
}
} else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS
__itt_frame_begin_v3(__kmp_itt_domains[loc->reserved_2 - 1], NULL);
}
+ KMP_ITT_DEBUG_LOCK();
+ KMP_ITT_DEBUG_PRINT( "[frm beg] gtid=%d, idx=%d, serialized:%d, loc:%p\n",
+ gtid, loc->reserved_2 - 1, serialized, loc );
}
#endif
} // __kmp_itt_region_forking
@@ -130,6 +146,51 @@
// -------------------------------------------------------------------------------------------------
LINKAGE void
+__kmp_itt_frame_submit( int gtid, __itt_timestamp begin, __itt_timestamp end, int imbalance, ident_t * loc ) {
+#if USE_ITT_NOTIFY
+ if (loc) {
+ if (loc->reserved_2 == 0) {
+ if (__kmp_frame_domain_count < KMP_MAX_FRAME_DOMAINS) {
+ int frm = KMP_TEST_THEN_INC32( & __kmp_frame_domain_count ); // get "old" value
+ if (frm >= KMP_MAX_FRAME_DOMAINS) {
+ KMP_TEST_THEN_DEC32( & __kmp_frame_domain_count ); // revert the count
+ return; // loc->reserved_2 is still 0
+ }
+ // Should it be synchronized? See the comment in __kmp_itt_region_forking
+ loc->reserved_2 = frm + 1; // save "new" value
+
+ // Transform compiler-generated region location into the format
+ // that the tools more or less standardized on:
+ // "<func>$omp$frame@[file:]<line>[:<col>]"
+ const char * buff = NULL;
+ kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
+ if( imbalance ) {
+ buff = __kmp_str_format("%s$omp$barrier-imbalance@%s:%d",
+ str_loc.func, str_loc.file, str_loc.col);
+ } else {
+ buff = __kmp_str_format("%s$omp$barrier@%s:%d",
+ str_loc.func, str_loc.file, str_loc.col);
+ }
+ __kmp_str_loc_free( &str_loc );
+
+ __itt_suppress_push(__itt_suppress_memory_errors);
+ __kmp_itt_domains[ frm ] = __itt_domain_create( buff );
+ __itt_suppress_pop();
+
+ __kmp_str_free( &buff );
+ __itt_frame_submit_v3(__kmp_itt_domains[ frm ], NULL, begin, end );
+ }
+ } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS
+ __itt_frame_submit_v3(__kmp_itt_domains[loc->reserved_2 - 1], NULL, begin, end );
+ }
+ }
+
+#endif
+} // __kmp_itt_frame_submit
+
+// -------------------------------------------------------------------------------------------------
+
+LINKAGE void
__kmp_itt_region_starting( int gtid ) {
#if USE_ITT_NOTIFY
#endif
@@ -150,10 +211,6 @@
#if USE_ITT_NOTIFY
kmp_team_t * team = __kmp_team_from_gtid( gtid );
#if OMP_30_ENABLED
- KMP_ITT_DEBUG_LOCK();
- KMP_ITT_DEBUG_PRINT( "[frm end] gtid=%d, idx=%d, serialized:%d, empty:%d\n", gtid,
- __kmp_threads[gtid]->th.th_ident->reserved_2 - 1, serialized,
- (team->t.t_active_level + serialized > 1) );
if (team->t.t_active_level + serialized > 1)
#endif
{
@@ -162,7 +219,10 @@
}
ident_t * loc = __kmp_thread_from_gtid( gtid )->th.th_ident;
if (loc && loc->reserved_2 && loc->reserved_2 <= KMP_MAX_FRAME_DOMAINS) {
+ KMP_ITT_DEBUG_LOCK();
__itt_frame_end_v3(__kmp_itt_domains[loc->reserved_2 - 1], NULL);
+ KMP_ITT_DEBUG_PRINT( "[frm end] gtid=%d, idx=%d, serialized:%d, loc:%p\n",
+ gtid, loc->reserved_2 - 1, serialized, loc );
}
#endif
} // __kmp_itt_region_joined
@@ -577,7 +637,7 @@
void
__kmp_itt_single_start( int gtid ) {
#if USE_ITT_NOTIFY
- if ( __itt_mark_create_ptr ) {
+ if ( __itt_mark_create_ptr || KMP_ITT_DEBUG ) {
kmp_info_t * thr = __kmp_thread_from_gtid( (gtid) );
ident_t * loc = thr->th.th_ident;
char const * src = ( loc == NULL ? NULL : loc->psource );
diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp
index d042019..766cf83 100644
--- a/openmp/runtime/src/kmp_lock.cpp
+++ b/openmp/runtime/src/kmp_lock.cpp
@@ -1,7 +1,7 @@
/*
* kmp_lock.cpp -- lock-related functions
- * $Revision: 42613 $
- * $Date: 2013-08-23 13:29:50 -0500 (Fri, 23 Aug 2013) $
+ * $Revision: 42810 $
+ * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
*/
@@ -23,7 +23,7 @@
#include "kmp_lock.h"
#include "kmp_io.h"
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
# include <unistd.h>
# include <sys/syscall.h>
// We should really include <futex.h>, but that causes compatibility problems on different
@@ -398,7 +398,7 @@
}
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
/* ------------------------------------------------------------------------ */
/* futex locks */
@@ -755,7 +755,7 @@
__kmp_destroy_nested_futex_lock( lck );
}
-#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
/* ------------------------------------------------------------------------ */
@@ -2199,10 +2199,10 @@
// We need a fence here, since we must ensure that no memory operations
// from later in this thread float above that read.
-#if defined( __GNUC__ ) && !defined( __INTEL_COMPILER )
- __sync_synchronize();
-#else
+#if KMP_COMPILER_ICC
_mm_mfence();
+#else
+ __sync_synchronize();
#endif
return res;
@@ -3167,7 +3167,7 @@
}
break;
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
case lk_futex: {
__kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
@@ -3238,7 +3238,7 @@
}
break;
-#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
case lk_ticket: {
__kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
diff --git a/openmp/runtime/src/kmp_lock.h b/openmp/runtime/src/kmp_lock.h
index bb80b5a..5191cea 100644
--- a/openmp/runtime/src/kmp_lock.h
+++ b/openmp/runtime/src/kmp_lock.h
@@ -1,7 +1,7 @@
/*
* kmp_lock.h -- lock header file
- * $Revision: 42590 $
- * $Date: 2013-08-13 20:55:19 -0500 (Tue, 13 Aug 2013) $
+ * $Revision: 42810 $
+ * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
*/
@@ -174,7 +174,7 @@
extern void __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck );
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
// ----------------------------------------------------------------------------
// futex locks. futex locks are only available on Linux* OS.
@@ -224,7 +224,7 @@
extern void __kmp_init_nested_futex_lock( kmp_futex_lock_t *lck );
extern void __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck );
-#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
// ----------------------------------------------------------------------------
@@ -479,31 +479,31 @@
#define KMP_BOOTSTRAP_LOCK_INITIALIZER( lock ) KMP_TICKET_LOCK_INITIALIZER( (lock) )
-inline void
+static inline void
__kmp_acquire_bootstrap_lock( kmp_bootstrap_lock_t *lck )
{
__kmp_acquire_ticket_lock( lck, KMP_GTID_DNE );
}
-inline int
+static inline int
__kmp_test_bootstrap_lock( kmp_bootstrap_lock_t *lck )
{
return __kmp_test_ticket_lock( lck, KMP_GTID_DNE );
}
-inline void
+static inline void
__kmp_release_bootstrap_lock( kmp_bootstrap_lock_t *lck )
{
__kmp_release_ticket_lock( lck, KMP_GTID_DNE );
}
-inline void
+static inline void
__kmp_init_bootstrap_lock( kmp_bootstrap_lock_t *lck )
{
__kmp_init_ticket_lock( lck );
}
-inline void
+static inline void
__kmp_destroy_bootstrap_lock( kmp_bootstrap_lock_t *lck )
{
__kmp_destroy_ticket_lock( lck );
@@ -524,31 +524,31 @@
typedef kmp_ticket_lock_t kmp_lock_t;
-inline void
+static inline void
__kmp_acquire_lock( kmp_lock_t *lck, kmp_int32 gtid )
{
__kmp_acquire_ticket_lock( lck, gtid );
}
-inline int
+static inline int
__kmp_test_lock( kmp_lock_t *lck, kmp_int32 gtid )
{
return __kmp_test_ticket_lock( lck, gtid );
}
-inline void
+static inline void
__kmp_release_lock( kmp_lock_t *lck, kmp_int32 gtid )
{
__kmp_release_ticket_lock( lck, gtid );
}
-inline void
+static inline void
__kmp_init_lock( kmp_lock_t *lck )
{
__kmp_init_ticket_lock( lck );
}
-inline void
+static inline void
__kmp_destroy_lock( kmp_lock_t *lck )
{
__kmp_destroy_ticket_lock( lck );
@@ -570,7 +570,7 @@
enum kmp_lock_kind {
lk_default = 0,
lk_tas,
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
lk_futex,
#endif
lk_ticket,
@@ -587,7 +587,7 @@
union kmp_user_lock {
kmp_tas_lock_t tas;
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
kmp_futex_lock_t futex;
#endif
kmp_ticket_lock_t ticket;
@@ -606,7 +606,7 @@
extern kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck );
-inline kmp_int32
+static inline kmp_int32
__kmp_get_user_lock_owner( kmp_user_lock_p lck )
{
KMP_DEBUG_ASSERT( __kmp_get_user_lock_owner_ != NULL );
@@ -615,7 +615,7 @@
extern void ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
#define __kmp_acquire_user_lock_with_checks(lck,gtid) \
if (__kmp_user_lock_kind == lk_tas) { \
@@ -655,7 +655,7 @@
}
#else
-inline void
+static inline void
__kmp_acquire_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
{
KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL );
@@ -665,11 +665,11 @@
extern int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
#include "kmp_i18n.h" /* AC: KMP_FATAL definition */
extern int __kmp_env_consistency_check; /* AC: copy from kmp.h here */
-inline int
+static inline int
__kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
{
if ( __kmp_user_lock_kind == lk_tas ) {
@@ -688,7 +688,7 @@
}
}
#else
-inline int
+static inline int
__kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
{
KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL );
@@ -698,7 +698,7 @@
extern void ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
-inline void
+static inline void
__kmp_release_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
{
KMP_DEBUG_ASSERT( __kmp_release_user_lock_with_checks_ != NULL );
@@ -707,7 +707,7 @@
extern void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck );
-inline void
+static inline void
__kmp_init_user_lock_with_checks( kmp_user_lock_p lck )
{
KMP_DEBUG_ASSERT( __kmp_init_user_lock_with_checks_ != NULL );
@@ -720,7 +720,7 @@
//
extern void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck );
-inline void
+static inline void
__kmp_destroy_user_lock( kmp_user_lock_p lck )
{
KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_ != NULL );
@@ -729,7 +729,7 @@
extern void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck );
-inline void
+static inline void
__kmp_destroy_user_lock_with_checks( kmp_user_lock_p lck )
{
KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_with_checks_ != NULL );
@@ -780,7 +780,7 @@
}
#else
-inline void
+static inline void
__kmp_acquire_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
{
KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL );
@@ -791,7 +791,7 @@
extern int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
-inline int
+static inline int
__kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
{
if ( __kmp_user_lock_kind == lk_tas ) {
@@ -820,7 +820,7 @@
}
}
#else
-inline int
+static inline int
__kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
{
KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL );
@@ -830,7 +830,7 @@
extern void ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
-inline void
+static inline void
__kmp_release_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
{
KMP_DEBUG_ASSERT( __kmp_release_nested_user_lock_with_checks_ != NULL );
@@ -839,7 +839,7 @@
extern void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck );
-inline void __kmp_init_nested_user_lock_with_checks( kmp_user_lock_p lck )
+static inline void __kmp_init_nested_user_lock_with_checks( kmp_user_lock_p lck )
{
KMP_DEBUG_ASSERT( __kmp_init_nested_user_lock_with_checks_ != NULL );
( *__kmp_init_nested_user_lock_with_checks_ )( lck );
@@ -847,7 +847,7 @@
extern void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck );
-inline void
+static inline void
__kmp_destroy_nested_user_lock_with_checks( kmp_user_lock_p lck )
{
KMP_DEBUG_ASSERT( __kmp_destroy_nested_user_lock_with_checks_ != NULL );
@@ -875,7 +875,7 @@
extern const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck );
-inline const ident_t *
+static inline const ident_t *
__kmp_get_user_lock_location( kmp_user_lock_p lck )
{
if ( __kmp_get_user_lock_location_ != NULL ) {
@@ -888,7 +888,7 @@
extern void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc );
-inline void
+static inline void
__kmp_set_user_lock_location( kmp_user_lock_p lck, const ident_t *loc )
{
if ( __kmp_set_user_lock_location_ != NULL ) {
@@ -900,7 +900,7 @@
extern void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags );
-inline void
+static inline void
__kmp_set_user_lock_flags( kmp_user_lock_p lck, kmp_lock_flags_t flags )
{
if ( __kmp_set_user_lock_flags_ != NULL ) {
@@ -962,7 +962,7 @@
extern kmp_block_of_locks_t *__kmp_lock_blocks;
extern int __kmp_num_locks_in_block;
-extern kmp_user_lock_p __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, kmp_lock_flags_t flags = 0 );
+extern kmp_user_lock_p __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, kmp_lock_flags_t flags );
extern void __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck );
extern kmp_user_lock_p __kmp_lookup_user_lock( void **user_lock, char const *func );
extern void __kmp_cleanup_user_locks();
diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h
index 9a5d948..f167605 100644
--- a/openmp/runtime/src/kmp_os.h
+++ b/openmp/runtime/src/kmp_os.h
@@ -1,7 +1,7 @@
/*
* kmp_os.h -- KPTS runtime header file.
- * $Revision: 42588 $
- * $Date: 2013-08-13 01:26:00 -0500 (Tue, 13 Aug 2013) $
+ * $Revision: 42820 $
+ * $Date: 2013-11-13 16:53:44 -0600 (Wed, 13 Nov 2013) $
*/
@@ -42,6 +42,24 @@
# define KMP_MEM_CONS_MODEL KMP_MEM_CONS_VOLATILE
#endif
+/* ------------------------- Compiler recognition ---------------------- */
+#define KMP_COMPILER_ICC 0
+#define KMP_COMPILER_GCC 0
+#define KMP_COMPILER_CLANG 0
+
+#if defined( __INTEL_COMPILER )
+# undef KMP_COMPILER_ICC
+# define KMP_COMPILER_ICC 1
+#elif defined( __clang__ )
+# undef KMP_COMPILER_CLANG
+# define KMP_COMPILER_CLANG 1
+#elif defined( __GNUC__ )
+# undef KMP_COMPILER_GCC
+# define KMP_COMPILER_GCC 1
+#else
+# error Unknown compiler
+#endif
+
/* ---------------------- Operating system recognition ------------------- */
#define KMP_OS_LINUX 0
@@ -90,28 +108,77 @@
# if defined __x86_64
# undef KMP_ARCH_X86_64
# define KMP_ARCH_X86_64 1
-# else
+# elif defined __i386
# undef KMP_ARCH_X86
# define KMP_ARCH_X86 1
# endif
#endif
-#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64)
+#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7R__) || \
+ defined(__ARM_ARCH_7A__)
+# define KMP_ARCH_ARMV7 1
+#endif
+
+#if defined(KMP_ARCH_ARMV7) || defined(__ARM_ARCH_6__) || \
+ defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || \
+ defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6T2__) || \
+ defined(__ARM_ARCH_6ZK__)
+# define KMP_ARCH_ARMV6 1
+#endif
+
+#if defined(KMP_ARCH_ARMV6) || defined(__ARM_ARCH_5T__) || \
+ defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) || \
+ defined(__ARM_ARCH_5TEJ__)
+# define KMP_ARCH_ARMV5 1
+#endif
+
+#if defined(KMP_ARCH_ARMV5) || defined(__ARM_ARCH_4__) || \
+ defined(__ARM_ARCH_4T__)
+# define KMP_ARCH_ARMV4 1
+#endif
+
+#if defined(KMP_ARCH_ARMV4) || defined(__ARM_ARCH_3__) || \
+ defined(__ARM_ARCH_3M__)
+# define KMP_ARCH_ARMV3 1
+#endif
+
+#if defined(KMP_ARCH_ARMV3) || defined(__ARM_ARCH_2__)
+# define KMP_ARCH_ARMV2 1
+#endif
+
+#if defined(KMP_ARCH_ARMV2)
+# define KMP_ARCH_ARM 1
+#endif
+
+#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM)
# error Unknown or unsupported architecture
#endif
-#if KMP_OS_WINDOWS
-# if defined KMP_WIN_CDECL || !defined GUIDEDLL_EXPORTS
-# define USE_FTN_CDECL KMP_FTN_UPPER
+/* Check for quad-precision extension. */
+#define KMP_HAVE_QUAD 0
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+# if KMP_COMPILER_ICC
+ /* _Quad is already defined for icc */
+# undef KMP_HAVE_QUAD
+# define KMP_HAVE_QUAD 1
+# elif KMP_COMPILER_CLANG
+ /* Clang doesn't support a software-implemented
+ 128-bit extended precision type yet */
+ typedef long double _Quad;
+# elif KMP_COMPILER_GCC
+ typedef __float128 _Quad;
+# undef KMP_HAVE_QUAD
+# define KMP_HAVE_QUAD 1
# endif
+#else
+# if __LDBL_MAX_EXP__ >= 16384 && KMP_COMPILER_GCC
+ typedef long double _Quad;
+# undef KMP_HAVE_QUAD
+# define KMP_HAVE_QUAD 1
+# endif
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
-# define KMP_FTN KMP_FTN_PLAIN
-# define USE_FTN_EXTRA KMP_FTN_PLAIN
-# if KMP_ARCH_X86
-# if defined KMP_WIN_STDCALL || !defined GUIDEDLL_EXPORTS
-# define USE_FTN_STDCALL KMP_FTN_UPPER
-# endif
-# endif
+#if KMP_OS_WINDOWS
typedef char kmp_int8;
typedef unsigned char kmp_uint8;
typedef short kmp_int16;
@@ -143,9 +210,6 @@
#endif /* KMP_OS_WINDOWS */
#if KMP_OS_UNIX
-# define KMP_FTN KMP_FTN_PLAIN
-# define USE_FTN_CDECL KMP_FTN_PLAIN
-# define USE_FTN_EXTRA KMP_FTN_APPEND
typedef char kmp_int8;
typedef unsigned char kmp_uint8;
typedef short kmp_int16;
@@ -160,7 +224,7 @@
# define KMP_UINT64_SPEC "llu"
#endif /* KMP_OS_UNIX */
-#if KMP_ARCH_X86
+#if KMP_ARCH_X86 || KMP_ARCH_ARM
# define KMP_SIZE_T_SPEC KMP_UINT32_SPEC
#elif KMP_ARCH_X86_64
# define KMP_SIZE_T_SPEC KMP_UINT64_SPEC
@@ -199,7 +263,7 @@
# define KMP_INT_SPEC KMP_INT32_SPEC
# define KMP_UINT_SPEC KMP_UINT32_SPEC
# define KMP_INT_MAX ((kmp_int32)0x7FFFFFFF)
-# define KMP_INT_MIN ((kmp_int64)0x80000000)
+# define KMP_INT_MIN ((kmp_int32)0x80000000)
#endif /* KMP_I8 */
#ifdef __cplusplus
@@ -248,14 +312,6 @@
//-------------------------------------------------------------------------
#endif // __cplusplus
-#if KMP_OS_WINDOWS
-# define KMP_STDCALL __stdcall
-#endif
-
-#ifndef KMP_STDCALL
-# define KMP_STDCALL /* nothing */
-#endif
-
#define KMP_EXPORT extern /* export declaration in guide libraries */
#if __GNUC__ == 4
@@ -336,7 +392,113 @@
// Synchronization primitives
//
-#if KMP_ASM_INTRINS
+#if KMP_ASM_INTRINS && KMP_OS_WINDOWS
+
+#include <Windows.h>
+
+#pragma intrinsic(InterlockedExchangeAdd)
+#pragma intrinsic(InterlockedCompareExchange)
+#pragma intrinsic(InterlockedExchange)
+#pragma intrinsic(InterlockedExchange64)
+
+//
+// Using InterlockedIncrement / InterlockedDecrement causes a library loading
+// ordering problem, so we use InterlockedExchangeAdd instead.
+//
+# define KMP_TEST_THEN_INC32(p) InterlockedExchangeAdd( (volatile long *)(p), 1 )
+# define KMP_TEST_THEN_INC_ACQ32(p) InterlockedExchangeAdd( (volatile long *)(p), 1 )
+# define KMP_TEST_THEN_ADD4_32(p) InterlockedExchangeAdd( (volatile long *)(p), 4 )
+# define KMP_TEST_THEN_ADD4_ACQ32(p) InterlockedExchangeAdd( (volatile long *)(p), 4 )
+# define KMP_TEST_THEN_DEC32(p) InterlockedExchangeAdd( (volatile long *)(p), -1 )
+# define KMP_TEST_THEN_DEC_ACQ32(p) InterlockedExchangeAdd( (volatile long *)(p), -1 )
+# define KMP_TEST_THEN_ADD32(p, v) InterlockedExchangeAdd( (volatile long *)(p), (v) )
+
+# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) InterlockedCompareExchange( (volatile long *)(p),(long)(sv),(long)(cv) )
+
+# define KMP_XCHG_FIXED32(p, v) InterlockedExchange( (volatile long *)(p), (long)(v) )
+# define KMP_XCHG_FIXED64(p, v) InterlockedExchange64( (volatile kmp_int64 *)(p), (kmp_int64)(v) )
+
+inline kmp_real32 KMP_XCHG_REAL32( volatile kmp_real32 *p, kmp_real32 v)
+{
+ kmp_int32 tmp = InterlockedExchange( (volatile long *)p, *(long *)&v);
+ return *(kmp_real32*)&tmp;
+}
+
+//
+// Routines that we still need to implement in assembly.
+//
+extern kmp_int32 __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 v );
+extern kmp_int32 __kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 v );
+extern kmp_int32 __kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 v );
+extern kmp_int64 __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 v );
+extern kmp_int64 __kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 v );
+extern kmp_int64 __kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 v );
+
+extern kmp_int8 __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
+extern kmp_int16 __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
+extern kmp_int32 __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
+extern kmp_int32 __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
+extern kmp_int8 __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
+extern kmp_int16 __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
+extern kmp_int32 __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
+extern kmp_int64 __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
+
+extern kmp_int8 __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 v );
+extern kmp_int16 __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 v );
+extern kmp_int32 __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 v );
+extern kmp_int64 __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 v );
+extern kmp_real32 __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 v );
+extern kmp_real64 __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 v );
+
+//# define KMP_TEST_THEN_INC32(p) __kmp_test_then_add32( (p), 1 )
+//# define KMP_TEST_THEN_INC_ACQ32(p) __kmp_test_then_add32( (p), 1 )
+# define KMP_TEST_THEN_INC64(p) __kmp_test_then_add64( (p), 1LL )
+# define KMP_TEST_THEN_INC_ACQ64(p) __kmp_test_then_add64( (p), 1LL )
+//# define KMP_TEST_THEN_ADD4_32(p) __kmp_test_then_add32( (p), 4 )
+//# define KMP_TEST_THEN_ADD4_ACQ32(p) __kmp_test_then_add32( (p), 4 )
+# define KMP_TEST_THEN_ADD4_64(p) __kmp_test_then_add64( (p), 4LL )
+# define KMP_TEST_THEN_ADD4_ACQ64(p) __kmp_test_then_add64( (p), 4LL )
+//# define KMP_TEST_THEN_DEC32(p) __kmp_test_then_add32( (p), -1 )
+//# define KMP_TEST_THEN_DEC_ACQ32(p) __kmp_test_then_add32( (p), -1 )
+# define KMP_TEST_THEN_DEC64(p) __kmp_test_then_add64( (p), -1LL )
+# define KMP_TEST_THEN_DEC_ACQ64(p) __kmp_test_then_add64( (p), -1LL )
+//# define KMP_TEST_THEN_ADD32(p, v) __kmp_test_then_add32( (p), (v) )
+# define KMP_TEST_THEN_ADD64(p, v) __kmp_test_then_add64( (p), (v) )
+
+# define KMP_TEST_THEN_OR32(p, v) __kmp_test_then_or32( (p), (v) )
+# define KMP_TEST_THEN_AND32(p, v) __kmp_test_then_and32( (p), (v) )
+# define KMP_TEST_THEN_OR64(p, v) __kmp_test_then_or64( (p), (v) )
+# define KMP_TEST_THEN_AND64(p, v) __kmp_test_then_and64( (p), (v) )
+
+# define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) __kmp_compare_and_store8( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) __kmp_compare_and_store8( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) __kmp_compare_and_store16( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) __kmp_compare_and_store16( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) __kmp_compare_and_store32( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) __kmp_compare_and_store32( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) __kmp_compare_and_store64( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) __kmp_compare_and_store64( (p), (cv), (sv) )
+
+# if KMP_ARCH_X86
+# define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) __kmp_compare_and_store32( (volatile kmp_int32*)(p), (kmp_int32)(cv), (kmp_int32)(sv) )
+# else /* 64 bit pointers */
+# define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) __kmp_compare_and_store64( (volatile kmp_int64*)(p), (kmp_int64)(cv), (kmp_int64)(sv) )
+# endif /* KMP_ARCH_X86 */
+
+# define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) __kmp_compare_and_store_ret8( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) __kmp_compare_and_store_ret16( (p), (cv), (sv) )
+//# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) __kmp_compare_and_store_ret32( (p), (cv), (sv) )
+# define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) __kmp_compare_and_store_ret64( (p), (cv), (sv) )
+
+# define KMP_XCHG_FIXED8(p, v) __kmp_xchg_fixed8( (p), (v) );
+# define KMP_XCHG_FIXED16(p, v) __kmp_xchg_fixed16( (p), (v) );
+//# define KMP_XCHG_FIXED32(p, v) __kmp_xchg_fixed32( (p), (v) );
+//# define KMP_XCHG_FIXED64(p, v) __kmp_xchg_fixed64( (p), (v) );
+//# define KMP_XCHG_REAL32(p, v) __kmp_xchg_real32( (p), (v) );
+# define KMP_XCHG_REAL64(p, v) __kmp_xchg_real64( (p), (v) );
+
+
+#elif (KMP_ASM_INTRINS && (KMP_OS_LINUX || KMP_OS_DARWIN)) || !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
/* cast p to correct type so that proper intrinsic will be used */
# define KMP_TEST_THEN_INC32(p) __sync_fetch_and_add( (kmp_int32 *)(p), 1 )
@@ -385,7 +547,7 @@
return *(kmp_real32*)&tmp;
}
-static kmp_real64 KMP_XCHG_REAL64( volatile kmp_real64 *p, kmp_real64 v)
+inline kmp_real64 KMP_XCHG_REAL64( volatile kmp_real64 *p, kmp_real64 v)
{
kmp_int64 tmp = __sync_lock_test_and_set( (kmp_int64*)p, *(kmp_int64*)&v);
return *(kmp_real64*)&tmp;
@@ -607,6 +769,14 @@
#endif
+// Switches for OSS builds
+#ifndef USE_SYSFS_INFO
+# define USE_SYSFS_INFO 0
+#endif
+#ifndef USE_CMPXCHG_FIX
+# define USE_CMPXCHG_FIX 1
+#endif
+
// Warning levels
enum kmp_warnings_level {
kmp_warnings_off = 0, /* No warnings */
diff --git a/openmp/runtime/src/kmp_runtime.c b/openmp/runtime/src/kmp_runtime.c
index 7d66b9b..37c372b 100644
--- a/openmp/runtime/src/kmp_runtime.c
+++ b/openmp/runtime/src/kmp_runtime.c
@@ -1,7 +1,7 @@
/*
* kmp_runtime.c -- KPTS runtime support library
- * $Revision: 42642 $
- * $Date: 2013-09-06 01:57:24 -0500 (Fri, 06 Sep 2013) $
+ * $Revision: 42839 $
+ * $Date: 2013-11-24 13:01:00 -0600 (Sun, 24 Nov 2013) $
*/
@@ -88,6 +88,8 @@
#endif /* KMP_DEBUG */
+#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
+
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
@@ -472,8 +474,7 @@
__kmp_unref_task_team( task_team, this_thr );
} else if ( KMP_TASKING_ENABLED( task_team, this_thr->th.th_task_state ) ) {
__kmp_execute_tasks( this_thr, th_gtid, spin, check, final_spin, &flag
- USE_ITT_BUILD_ARG( itt_sync_obj )
- );
+ USE_ITT_BUILD_ARG( itt_sync_obj ), 0);
}
}; // if
}; // if
@@ -994,7 +995,7 @@
}
# endif /* KMP_OS_WINDOWS */
-#endif /* GUIDEDLL_EXPORTS
+#endif /* GUIDEDLL_EXPORTS */
/* ------------------------------------------------------------------------ */
@@ -1190,10 +1191,8 @@
register kmp_balign_team_t *team_bar = & team -> t.t_bar[ bt ];
register int nproc = this_thr -> th.th_team_nproc;
register int i;
- register kmp_uint new_state;
-
/* Don't have to worry about sleep bit here or atomic since team setting */
- new_state = team_bar -> b_arrived + KMP_BARRIER_STATE_BUMP;
+ register kmp_uint new_state = team_bar -> b_arrived + KMP_BARRIER_STATE_BUMP;
/* Collect all the worker team member threads. */
for (i = 1; i < nproc; i++) {
@@ -1341,7 +1340,7 @@
/* Need to update the team arrived pointer if we are the master thread */
if ( nproc > 1 )
- /* New value was already computed in above loop */
+ /* New value was already computed above */
team -> t.t_bar[ bt ].b_arrived = new_state;
else
team -> t.t_bar[ bt ].b_arrived += KMP_BARRIER_STATE_BUMP;
@@ -1380,6 +1379,12 @@
KMP_DEBUG_ASSERT( this_thr == other_threads[this_thr->th.th_info.ds.ds_tid] );
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+ // Barrier imbalance - save arrive time to the thread
+ if( __kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3 ) {
+ this_thr->th.th_bar_arrive_time = __itt_get_timestamp();
+ }
+#endif
/*
* We now perform a hypercube-embedded tree gather to wait until all
* of the threads have arrived, and reduce any required data
@@ -1417,6 +1422,9 @@
/* parent threads wait for children to arrive */
+ if (new_state == KMP_BARRIER_UNUSED_STATE)
+ new_state = team -> t.t_bar[ bt ].b_arrived + KMP_BARRIER_STATE_BUMP;
+
for ( child = 1, child_tid = tid + (1 << level);
child < branch_factor && child_tid < num_threads;
child++, child_tid += (1 << level) )
@@ -1429,10 +1437,6 @@
if ( child+1 < branch_factor && next_child_tid < num_threads )
KMP_CACHE_PREFETCH( &other_threads[ next_child_tid ] -> th.th_bar[ bt ].bb.b_arrived );
#endif /* KMP_CACHE_MANAGE */
- /* Only read this arrived flag once per thread that needs it */
- if (new_state == KMP_BARRIER_UNUSED_STATE)
- new_state = team -> t.t_bar[ bt ].b_arrived + KMP_BARRIER_STATE_BUMP;
-
KA_TRACE( 20, ( "__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %u\n",
gtid, team->t.t_id, tid,
@@ -1444,6 +1448,12 @@
USE_ITT_BUILD_ARG (itt_sync_obj)
);
+#if USE_ITT_BUILD
+ // Barrier imbalance - write min of the thread time and a child time to the thread.
+ if( __kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3 ) {
+ this_thr->th.th_bar_arrive_time = KMP_MIN( this_thr->th.th_bar_arrive_time, child_thr->th.th_bar_arrive_time );
+ }
+#endif
if (reduce) {
KA_TRACE( 100, ( "__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
@@ -1729,7 +1739,6 @@
/* The reverse versions seem to beat the forward versions overall */
#define KMP_REVERSE_HYPER_BAR
-#ifdef KMP_REVERSE_HYPER_BAR
static void
__kmp_hyper_barrier_release( enum barrier_type bt,
kmp_info_t *this_thr,
@@ -1751,15 +1760,13 @@
register kmp_uint32 offset;
register kmp_uint32 level;
- /*
- * We now perform a hypercube-embedded tree release for all
- * of the threads that have been gathered, but in the exact
- * reverse order from the corresponding gather (for load balance.
- */
+ /* Perform a hypercube-embedded tree release for all of the threads
+ that have been gathered. If KMP_REVERSE_HYPER_BAR is defined (default)
+ the threads are released in the reverse order of the corresponding gather,
+ otherwise threads are released in the same order. */
if ( ! KMP_MASTER_TID( tid )) {
/* worker threads */
-
KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n",
gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) );
@@ -1807,7 +1814,7 @@
TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
- gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
+ gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
KMP_MB(); /* Flush all pending memory write invalidates. */
@@ -1822,6 +1829,7 @@
num_threads = this_thr -> th.th_team_nproc;
other_threads = team -> t.t_threads;
+#ifdef KMP_REVERSE_HYPER_BAR
/* count up to correct level for parent */
for ( level = 0, offset = 1;
offset < num_threads && (((tid >> level) & (branch_factor-1)) == 0);
@@ -1831,7 +1839,14 @@
for ( level -= branch_bits, offset >>= branch_bits;
offset != 0;
level -= branch_bits, offset >>= branch_bits )
+#else
+ /* Go down the tree, level by level */
+ for ( level = 0, offset = 1;
+ offset < num_threads;
+ level += branch_bits, offset <<= branch_bits )
+#endif // KMP_REVERSE_HYPER_BAR
{
+#ifdef KMP_REVERSE_HYPER_BAR
/* Now go in reverse order through the children, highest to lowest.
Initial setting of child is conservative here. */
child = num_threads >> ((level==0)?level:level-1);
@@ -1839,8 +1854,18 @@
child_tid = tid + (child << level);
child >= 1;
child--, child_tid -= (1 << level) )
- {
+#else
+ if (((tid >> level) & (branch_factor - 1)) != 0)
+ /* No need to go any lower than this, since this is the level
+ parent would be notified */
+ break;
+ /* iterate through children on this level of the tree */
+ for ( child = 1, child_tid = tid + (1 << level);
+ child < branch_factor && child_tid < num_threads;
+ child++, child_tid += (1 << level) )
+#endif // KMP_REVERSE_HYPER_BAR
+ {
if ( child_tid >= num_threads ) continue; /* child doesn't exist so keep going */
else {
register kmp_info_t *child_thr = other_threads[ child_tid ];
@@ -1848,7 +1873,11 @@
#if KMP_CACHE_MANAGE
register kmp_uint32 next_child_tid = child_tid - (1 << level);
/* prefetch next thread's go count */
+#ifdef KMP_REVERSE_HYPER_BAR
if ( child-1 >= 1 && next_child_tid < num_threads )
+#else
+ if ( child+1 < branch_factor && next_child_tid < num_threads )
+#endif // KMP_REVERSE_HYPER_BAR
KMP_CACHE_PREFETCH( &other_threads[ next_child_tid ]->th.th_bar[ bt ].bb.b_go );
#endif /* KMP_CACHE_MANAGE */
@@ -1880,154 +1909,6 @@
gtid, team->t.t_id, tid, bt ) );
}
-#else /* !KMP_REVERSE_HYPER_BAR */
-
-static void
-__kmp_hyper_barrier_release( enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, int propagate_icvs )
-{
- /* handle fork barrier workers who aren't part of a team yet */
- register kmp_team_t *team;
- register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
- register kmp_info_t **other_threads;
- register kmp_uint32 num_threads;
- register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[ bt ];
- register kmp_uint32 branch_factor = 1 << branch_bits;
- register kmp_uint32 child;
- register kmp_uint32 child_tid;
- register kmp_uint32 offset;
- register kmp_uint32 level;
-
- /*
- * We now perform a hypercube-embedded tree release for all
- * of the threads that have been gathered, but in the same order
- * as the gather.
- */
-
- if ( ! KMP_MASTER_TID( tid )) {
- /* worker threads */
-
- KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n",
- gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) );
-
- /* wait for parent thread to release us */
- __kmp_wait_sleep( this_thr, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP, TRUE, NULL );
-
-#if USE_ITT_BUILD && OMP_30_ENABLED && USE_ITT_NOTIFY
- if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
- // we are on a fork barrier where we could not get the object reliably
- itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier, 0, -1 );
- // cancel wait on previous parallel region...
- __kmp_itt_task_starting( itt_sync_obj );
-
- if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
- return;
-
- itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
- if ( itt_sync_obj != NULL )
- __kmp_itt_task_finished( itt_sync_obj ); // call prepare as early as possible for "new" barrier
-
- } else
-#endif /* USE_ITT_BUILD && OMP_30_ENABLED && USE_ITT_NOTIFY */
- //
- // early exit for reaping threads releasing forkjoin barrier
- //
- if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
- return;
-
- //
- // The worker thread may now assume that the team is valid.
- //
-#if USE_ITT_BUILD && !OMP_30_ENABLED && USE_ITT_NOTIFY
- // libguide only code (cannot use *itt_task* routines)
- if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
- // we are on a fork barrier where we could not get the object reliably
- itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
- __kmp_itt_barrier_starting( gtid, itt_sync_obj ); // no need to call releasing, but we have paired calls...
- }
-#endif /* USE_ITT_BUILD && !OMP_30_ENABLED && USE_ITT_NOTIFY */
- team = __kmp_threads[ gtid ]-> th.th_team;
- KMP_DEBUG_ASSERT( team != NULL );
- tid = __kmp_tid_from_gtid( gtid );
-
- TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
- KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
- gtid, ( team != NULL ) ? team->t.t_id : -1, tid,
- &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
-
- KMP_MB(); /* Flush all pending memory write invalidates. */
-
- } else { /* KMP_MASTER_TID(tid) */
- team = __kmp_threads[ gtid ]-> th.th_team;
- KMP_DEBUG_ASSERT( team != NULL );
-
- KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) enter for barrier type %d\n",
- gtid, team->t.t_id, tid, bt ) );
- }
-
- /* Now set up team parameters since workers have been released */
- if ( team == NULL ) {
- /* handle fork barrier workers who are now part of a team */
- tid = __kmp_tid_from_gtid( gtid );
- team = __kmp_threads[ gtid ]-> th.th_team;
- }
- num_threads = this_thr -> th.th_team_nproc;
- other_threads = team -> t.t_threads;
-
- /* Go down the tree, level by level */
- for ( level = 0, offset = 1;
- offset < num_threads;
- level += branch_bits, offset <<= branch_bits )
- {
- if (((tid >> level) & (branch_factor - 1)) != 0)
- /* No need to go any lower than this, since this is the level
- parent would be notified */
- break;
-
- /* iterate through children on this level of the tree */
- for ( child = 1, child_tid = tid + (1 << level);
- child < branch_factor && child_tid < num_threads;
- child++, child_tid += (1 << level) )
- {
- register kmp_info_t *child_thr = other_threads[ child_tid ];
- register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
-#if KMP_CACHE_MANAGE
- {
- register kmp_uint32 next_child_tid = child_tid + (1 << level);
- /* prefetch next thread's go count */
- if ( child+1 < branch_factor && next_child_tid < num_threads )
- KMP_CACHE_PREFETCH( &other_threads[ next_child_tid ]->th.th_bar[ bt ].bb.b_go );
- }
-#endif /* KMP_CACHE_MANAGE */
-
-#if KMP_BARRIER_ICV_PUSH
- if ( propagate_icvs ) {
- KMP_DEBUG_ASSERT( team != NULL );
- __kmp_init_implicit_task( team->t.t_ident,
- team->t.t_threads[child_tid], team, child_tid, FALSE );
- load_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs);
- store_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs, &team->t.t_implicit_task_taskdata[0].td_icvs);
- sync_icvs();
- }
-#endif // KMP_BARRIER_ICV_PUSH
-
- KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) releasing "
- "T#%d(%d:%u) go(%p): %u => %u\n",
- gtid, team->t.t_id, tid,
- __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
- child_tid, &child_bar -> b_go, child_bar -> b_go,
- child_bar -> b_go + KMP_BARRIER_STATE_BUMP ) );
-
- /* release child from barrier */
- __kmp_release( child_thr, &child_bar -> b_go, kmp_acquire_fence );
- }
- }
-
- KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
- gtid, team->t.t_id, tid, bt ) );
-}
-#endif /* KMP_REVERSE_HYPER_BAR */
-
-
/*
* Internal function to do a barrier.
* If is_split is true, do a split barrier, otherwise, do a plain barrier
@@ -2043,6 +1924,8 @@
register kmp_team_t *team = this_thr -> th.th_team;
register int status = 0;
+ ident_t * tmp_loc = __kmp_threads[ gtid ]->th.th_ident;
+
KA_TRACE( 15, ( "__kmp_barrier: T#%d(%d:%d) has arrived\n",
gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid) ) );
@@ -2126,34 +2009,23 @@
#endif /* OMP_30_ENABLED */
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
// Barrier - report frame end
-#if USE_ITT_BUILD
- // Collect information only if the file was opened succesfully.
- if( __kmp_forkjoin_frames_mode == 1 && __kmp_itt_csv_file )
- {
- ident_t * loc = this_thr->th.th_ident;
- if (loc) {
- // Use compiler-generated location to mark the frame:
- // "<func>$omp$frame@[file:]<line>[:<col>]"
- kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
-
- kmp_uint64 fr_end;
-#if defined( __GNUC__ )
-# if !defined( __INTEL_COMPILER )
- fr_end = __kmp_hardware_timestamp();
-# else
- fr_end = __rdtsc();
-# endif
-#else
- fr_end = __rdtsc();
-#endif
- K_DIAG( 3, ( "__kmp_barrier: T#%d(%d:%d) frame_begin = %llu, frame_end = %llu\n",
- gtid, ( team != NULL ) ? team->t.t_id : -1, tid, this_thr->th.th_frame_time, fr_end ) );
-
- __kmp_str_buf_print( &__kmp_itt_frame_buffer, "%s$omp$frame@%s:%d:%d,%llu,%llu,,\n",
- str_loc.func, str_loc.file, str_loc.line, str_loc.col, this_thr->th.th_frame_time, fr_end );
- __kmp_str_loc_free( &str_loc );
- this_thr->th.th_frame_time = fr_end;
+ if( __itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode ) {
+ kmp_uint64 tmp = __itt_get_timestamp();
+ switch( __kmp_forkjoin_frames_mode ) {
+ case 1:
+ __kmp_itt_frame_submit( gtid, this_thr->th.th_frame_time, tmp, 0, tmp_loc );
+ this_thr->th.th_frame_time = tmp;
+ break;
+ case 2:
+ __kmp_itt_frame_submit( gtid, this_thr->th.th_bar_arrive_time, tmp, 1, tmp_loc );
+ break;
+ case 3:
+ __kmp_itt_frame_submit( gtid, this_thr->th.th_frame_time, tmp, 0, tmp_loc );
+ __kmp_itt_frame_submit( gtid, this_thr->th.th_bar_arrive_time, tmp, 1, tmp_loc );
+ this_thr->th.th_frame_time = tmp;
+ break;
}
}
#endif /* USE_ITT_BUILD */
@@ -2465,7 +2337,7 @@
KMP_MB();
/* first, let's setup the master thread */
- master_th -> th.th_info .ds.ds_tid = 0;
+ master_th -> th.th_info.ds.ds_tid = 0;
master_th -> th.th_team = team;
master_th -> th.th_team_nproc = team -> t.t_nproc;
master_th -> th.th_team_master = master_th;
@@ -2514,6 +2386,17 @@
static void
__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ); // forward declaration
+static void
+__kmp_setup_icv_copy( kmp_team_t *team, int new_nproc,
+#if OMP_30_ENABLED
+ kmp_internal_control_t * new_icvs,
+ ident_t * loc
+#else
+ int new_set_nproc, int new_set_dynamic, int new_set_nested,
+ int new_set_blocktime, int new_bt_intervals, int new_bt_set
+#endif // OMP_30_ENABLED
+ ); // forward declaration
+
/* most of the work for a fork */
/* return true if we really went parallel, false if serialized */
int
@@ -2527,7 +2410,7 @@
microtask_t microtask,
launch_t invoker,
/* TODO: revert workaround for Intel(R) 64 tracker #96 */
-#if KMP_ARCH_X86_64 && KMP_OS_LINUX
+#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
va_list * ap
#else
va_list ap
@@ -2576,7 +2459,6 @@
#endif
-
master_th->th.th_ident = loc;
#if OMP_40_ENABLED
@@ -2590,7 +2472,7 @@
argv = (void**)parent_team->t.t_argv;
for( i=argc-1; i >= 0; --i )
/* TODO: revert workaround for Intel(R) 64 tracker #96 */
-#if KMP_ARCH_X86_64 && KMP_OS_LINUX
+#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
*argv++ = va_arg( *ap, void * );
#else
*argv++ = va_arg( ap, void * );
@@ -2686,11 +2568,11 @@
/* create a serialized parallel region? */
if ( nthreads == 1 ) {
/* josh todo: hypothetical question: what do we do for OS X*? */
-#if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 )
+#if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM )
void * args[ argc ];
#else
void * * args = (void**) alloca( argc * sizeof( void * ) );
-#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 ) */
+#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM ) */
__kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
@@ -2721,7 +2603,7 @@
if ( ap ) {
for( i=argc-1; i >= 0; --i )
/* TODO: revert workaround for Intel(R) 64 tracker #96 */
- #if KMP_ARCH_X86_64 && KMP_OS_LINUX
+ #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
*argv++ = va_arg( *ap, void * );
#else
*argv++ = va_arg( ap, void * );
@@ -2741,7 +2623,7 @@
argv = args;
for( i=argc-1; i >= 0; --i )
/* TODO: revert workaround for Intel(R) 64 tracker #96 */
- #if KMP_ARCH_X86_64 && KMP_OS_LINUX
+ #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
*argv++ = va_arg( *ap, void * );
#else
*argv++ = va_arg( ap, void * );
@@ -2957,7 +2839,7 @@
#endif /* OMP_40_ENABLED */
for( i=argc-1; i >= 0; --i )
/* TODO: revert workaround for Intel(R) 64 tracker #96 */
-#if KMP_ARCH_X86_64 && KMP_OS_LINUX
+#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
*argv++ = va_arg( *ap, void * );
#else
*argv++ = va_arg( ap, void * );
@@ -2977,6 +2859,18 @@
root -> r.r_active = TRUE;
__kmp_fork_team_threads( root, team, master_th, gtid );
+ __kmp_setup_icv_copy(team, nthreads
+#if OMP_30_ENABLED
+ , &master_th->th.th_current_task->td_icvs, loc
+#else
+ , parent_team->t.t_set_nproc[master_tid],
+ parent_team->t.t_set_dynamic[master_tid],
+ parent_team->t.t_set_nested[master_tid],
+ parent_team->t.t_set_blocktime[master_tid],
+ parent_team->t.t_set_bt_intervals[master_tid],
+ parent_team->t.t_set_bt_set[master_tid]
+#endif /* OMP_30_ENABLED */
+ );
__kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
@@ -2992,23 +2886,12 @@
__kmp_itt_region_forking( gtid );
#endif /* USE_ITT_BUILD */
+#if USE_ITT_BUILD && USE_ITT_NOTIFY && OMP_30_ENABLED
// Internal fork - report frame begin
-#if USE_ITT_BUILD
- // Collect information only if the file was opened succesfully.
- if( __kmp_forkjoin_frames_mode == 1 && __kmp_itt_csv_file )
+ if( ( __kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3 ) && __itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr )
{
- kmp_uint64 fr_begin;
-#if defined( __GNUC__ )
-# if !defined( __INTEL_COMPILER )
- fr_begin = __kmp_hardware_timestamp();
-# else
- fr_begin = __rdtsc();
-# endif
-#else
- fr_begin = __rdtsc();
-#endif
if( ! ( team->t.t_active_level > 1 ) ) {
- master_th->th.th_frame_time = fr_begin;
+ master_th->th.th_frame_time = __itt_get_timestamp();
}
}
#endif /* USE_ITT_BUILD */
@@ -3134,7 +3017,10 @@
// Either not in teams or exiting teams region
// (teams is a frame and no other frames inside the teams)
# endif /* OMP_40_ENABLED */
+ {
+ master_th->th.th_ident = loc;
__kmp_itt_region_joined( gtid );
+ }
#endif /* USE_ITT_BUILD */
#if OMP_40_ENABLED
@@ -4644,6 +4530,7 @@
root -> r.r_root_team -> t.t_threads[0] = root_thread;
root -> r.r_hot_team -> t.t_threads[0] = root_thread;
root_thread -> th.th_serial_team -> t.t_threads[0] = root_thread;
+ root_thread -> th.th_serial_team -> t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now).
root -> r.r_uber_thread = root_thread;
/* initialize the thread, get it ready to go */
@@ -5007,6 +4894,19 @@
TCW_4( __kmp_init_monitor, 1 );
__kmp_create_monitor( & __kmp_monitor );
KF_TRACE( 10, ( "after __kmp_create_monitor\n" ) );
+ #if KMP_OS_WINDOWS
+ // AC: wait until monitor has started. This is a fix for CQ232808.
+ // The reason is that if the library is loaded/unloaded in a loop with small (parallel)
+ // work in between, then there is high probability that monitor thread started after
+ // the library shutdown. At shutdown it is too late to cope with the problem, because
+ // when the master is in DllMain (process detach) the monitor has no chances to start
+ // (it is blocked), and master has no means to inform the monitor that the library has gone,
+ // because all the memory which the monitor can access is going to be released/reset.
+ while ( TCR_4(__kmp_init_monitor) < 2 ) {
+ KMP_YIELD( TRUE );
+ }
+ KF_TRACE( 10, ( "after monitor thread has started\n" ) );
+ #endif
}
__kmp_release_bootstrap_lock( & __kmp_monitor_lock );
}
@@ -5049,6 +4949,7 @@
0 );
}
KMP_ASSERT ( serial_team );
+ serial_team -> t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now).
serial_team -> t.t_threads[0] = new_thr;
KF_TRACE( 10, ( "__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
new_thr ) );
@@ -5144,76 +5045,94 @@
* IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!!
*/
static void
-__kmp_reinitialize_team(
- kmp_team_t * team,
- int new_nproc,
- #if OMP_30_ENABLED
- kmp_internal_control_t * new_icvs,
- ident_t * loc
- #else
- int new_set_nproc, int new_set_dynamic, int new_set_nested,
- int new_set_blocktime, int new_bt_intervals, int new_bt_set
- #endif // OMP_30_ENABLED
-) {
- int f;
- #if OMP_30_ENABLED
- KMP_DEBUG_ASSERT( team && new_nproc && new_icvs );
- KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
- team->t.t_ident = loc;
- #else
- KMP_DEBUG_ASSERT( team && new_nproc && new_set_nproc );
- #endif // OMP_30_ENABLED
+__kmp_reinitialize_team( kmp_team_t *team,
+#if OMP_30_ENABLED
+ kmp_internal_control_t *new_icvs, ident_t *loc
+#else
+ int new_set_nproc, int new_set_dynamic, int new_set_nested,
+ int new_set_blocktime, int new_bt_intervals, int new_bt_set
+#endif
+ ) {
+ KF_TRACE( 10, ( "__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
+ team->t.t_threads[0], team ) );
+#if OMP_30_ENABLED
+ KMP_DEBUG_ASSERT( team && new_icvs);
+ KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
+ team->t.t_ident = loc;
+#else
+ KMP_DEBUG_ASSERT( team && new_set_nproc );
+#endif // OMP_30_ENABLED
team->t.t_id = KMP_GEN_TEAM_ID();
-#if KMP_BARRIER_ICV_PULL
- //
- // Copy the ICV's to the team structure, where all of the worker threads
- // can access them and make their own copies after the barrier.
- //
- load_icvs(new_icvs);
- store_icvs(&team->t.t_initial_icvs, new_icvs);
-
- //
- // Set up the master thread's copy of the ICV's. __kmp_fork_call()
- // assumes they are already set in the master thread.
- // FIXME - change that code to use the team->t.t_initial_icvs copy
- // and eliminate this copy.
- //
- __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
- store_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
- sync_icvs();
- KF_TRACE( 10, ( "__kmp_reinitialize_team2: T#%d this_thread=%p team=%p\n",
- 0, team->t.t_threads[0], team ) );
-
-#elif KMP_BARRIER_ICV_PUSH
- //
- // Set the ICV's in the master thread only.
- // They will be propagated by the fork barrier.
- //
- __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
- load_icvs(new_icvs);
- store_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
- sync_icvs();
-
- KF_TRACE( 10, ( "__kmp_reinitialize_team2: T#%d this_thread=%p team=%p\n",
- 0, team->t.t_threads[0], team ) );
-
-#else
- //
- // Copy the icvs to each of the threads. This takes O(nthreads) time.
- //
+ // Copy ICVs to the master thread's implicit taskdata
#if OMP_30_ENABLED
load_icvs(new_icvs);
-#endif
- for( f=0 ; f<new_nproc ; f++) {
+ __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
+ store_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
+ sync_icvs();
+# else
+ team -> t.t_set_nproc[0] = new_set_nproc;
+ team -> t.t_set_dynamic[0] = new_set_dynamic;
+ team -> t.t_set_nested[0] = new_set_nested;
+ team -> t.t_set_blocktime[0] = new_set_blocktime;
+ team -> t.t_set_bt_intervals[0] = new_bt_intervals;
+ team -> t.t_set_bt_set[0] = new_bt_set;
+# endif // OMP_30_ENABLED
+
+ KF_TRACE( 10, ( "__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
+ team->t.t_threads[0], team ) );
+}
+
+static void
+__kmp_setup_icv_copy(kmp_team_t * team, int new_nproc,
+#if OMP_30_ENABLED
+ kmp_internal_control_t * new_icvs,
+ ident_t * loc
+#else
+ int new_set_nproc, int new_set_dynamic, int new_set_nested,
+ int new_set_blocktime, int new_bt_intervals, int new_bt_set
+#endif // OMP_30_ENABLED
+ )
+{
+ int f;
+
+#if OMP_30_ENABLED
+ KMP_DEBUG_ASSERT( team && new_nproc && new_icvs );
+ KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
+#else
+ KMP_DEBUG_ASSERT( team && new_nproc && new_set_nproc );
+#endif // OMP_30_ENABLED
+
+ // Master thread's copy of the ICVs was set up on the implicit taskdata in __kmp_reinitialize_team.
+ // __kmp_fork_call() assumes the master thread's implicit task has this data before this function is called.
+#if KMP_BARRIER_ICV_PULL
+ // Copy the ICVs to master's thread structure into th_fixed_icvs (which remains untouched), where all of the
+ // worker threads can access them and make their own copies after the barrier.
+ load_icvs(new_icvs);
+ KMP_DEBUG_ASSERT(team->t.t_threads[0]); // the threads arrays should be allocated at this point
+ store_icvs(&team->t.t_threads[0]->th.th_fixed_icvs, new_icvs);
+ sync_icvs();
+ KF_TRACE(10, ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", 0, team->t.t_threads[0], team));
+
+#elif KMP_BARRIER_ICV_PUSH
+ // The ICVs will be propagated in the fork barrier, so nothing needs to be done here.
+ KF_TRACE(10, ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", 0, team->t.t_threads[0], team));
+
+#else
+ // Copy the ICVs to each of the non-master threads. This takes O(nthreads) time.
+# if OMP_30_ENABLED
+ load_icvs(new_icvs);
+# endif // OMP_30_ENABLED
+ KMP_DEBUG_ASSERT(team->t.t_threads[0]); // the threads arrays should be allocated at this point
+ for(f=1 ; f<new_nproc ; f++) { // skip the master thread
# if OMP_30_ENABLED
// TODO: GEH - pass in better source location info since usually NULL here
- KF_TRACE( 10, ( "__kmp_reinitialize_team1: T#%d this_thread=%p team=%p\n",
+ KF_TRACE( 10, ( "__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
f, team->t.t_threads[f], team ) );
__kmp_init_implicit_task( loc, team->t.t_threads[f], team, f, FALSE );
store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs);
- KF_TRACE( 10, ( "__kmp_reinitialize_team2: T#%d this_thread=%p team=%p\n",
+ KF_TRACE( 10, ( "__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
f, team->t.t_threads[f], team ) );
# else
team -> t.t_set_nproc[f] = new_set_nproc;
@@ -5226,9 +5145,8 @@
}
# if OMP_30_ENABLED
sync_icvs();
-# endif
-#endif // KMP_BARRIER_ICV_PUSH || KMP_BARRIER_ICV_PULL
-
+# endif // OMP_30_ENABLED
+#endif // KMP_BARRIER_ICV_PULL
}
/* initialize the team data structure
@@ -5246,6 +5164,8 @@
int new_set_blocktime, int new_bt_intervals, int new_bt_set
#endif // OMP_30_ENABLED
) {
+ KF_TRACE( 10, ( "__kmp_initialize_team: enter: team=%p\n", team ) );
+
/* verify */
KMP_DEBUG_ASSERT( team );
KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
@@ -5290,18 +5210,18 @@
team -> t.t_control_stack_top = NULL;
- __kmp_reinitialize_team(
- team, new_nproc,
- #if OMP_30_ENABLED
- new_icvs,
- loc
- #else
- new_set_nproc, new_set_dynamic, new_set_nested,
- new_set_blocktime, new_bt_intervals, new_bt_set
- #endif // OMP_30_ENABLED
- );
+ __kmp_reinitialize_team( team,
+#if OMP_30_ENABLED
+ new_icvs, loc
+#else
+ new_set_nproc, new_set_dynamic, new_set_nested,
+ new_set_blocktime, new_bt_intervals, new_bt_set
+#endif // OMP_30_ENABLED
+ );
+
KMP_MB();
+ KF_TRACE( 10, ( "__kmp_initialize_team: exit: team=%p\n", team ) );
}
#if KMP_OS_LINUX
@@ -5700,15 +5620,15 @@
// TODO???: team -> t.t_max_active_levels = new_max_active_levels;
team -> t.t_sched = new_icvs->sched;
#endif
- __kmp_reinitialize_team( team, new_nproc,
+ __kmp_reinitialize_team( team,
#if OMP_30_ENABLED
- new_icvs,
- root->r.r_uber_thread->th.th_ident
+ new_icvs, root->r.r_uber_thread->th.th_ident
#else
- new_set_nproc, new_set_dynamic, new_set_nested,
- new_set_blocktime, new_bt_intervals, new_bt_set
-#endif
- );
+ new_set_nproc, new_set_dynamic, new_set_nested,
+ new_set_blocktime, new_bt_intervals, new_bt_set
+#endif // OMP_30_ENABLED
+ );
+
#if OMP_30_ENABLED
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
@@ -5768,15 +5688,14 @@
if(team -> t.t_max_nproc < new_nproc) {
/* reallocate larger arrays */
__kmp_reallocate_team_arrays(team, new_nproc);
- __kmp_reinitialize_team( team, new_nproc,
+ __kmp_reinitialize_team( team,
#if OMP_30_ENABLED
- new_icvs,
- NULL // TODO: !!!
+ new_icvs, NULL
#else
- new_set_nproc, new_set_dynamic, new_set_nested,
- new_set_blocktime, new_bt_intervals, new_bt_set
-#endif
- );
+ new_set_nproc, new_set_dynamic, new_set_nested,
+ new_set_blocktime, new_bt_intervals, new_bt_set
+#endif // OMP_30_ENABLED
+ );
}
#if KMP_OS_LINUX
@@ -5859,8 +5778,8 @@
# endif
#endif
- }
- else {
+ }
+ else {
KA_TRACE( 20, ("__kmp_allocate_team: reusing hot team\n" ));
#if KMP_MIC
// This case can mean that omp_set_num_threads() was called and the hot team size
@@ -5877,15 +5796,14 @@
team -> t.t_sched = new_icvs->sched;
#endif
- __kmp_reinitialize_team( team, new_nproc,
+ __kmp_reinitialize_team( team,
#if OMP_30_ENABLED
- new_icvs,
- root->r.r_uber_thread->th.th_ident
+ new_icvs, root->r.r_uber_thread->th.th_ident
#else
- new_set_nproc, new_set_dynamic, new_set_nested,
- new_set_blocktime, new_bt_intervals, new_bt_set
-#endif
- );
+ new_set_nproc, new_set_dynamic, new_set_nested,
+ new_set_blocktime, new_bt_intervals, new_bt_set
+#endif // OMP_30_ENABLED
+ );
#if OMP_30_ENABLED
KF_TRACE( 10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
@@ -6000,6 +5918,8 @@
* up seems to really hurt performance a lot on the P4, so, let's not use
* this... */
__kmp_allocate_team_arrays( team, max_nproc );
+
+ KA_TRACE( 20, ( "__kmp_allocate_team: making a new team\n" ) );
__kmp_initialize_team( team, new_nproc,
#if OMP_30_ENABLED
new_icvs,
@@ -6293,7 +6213,6 @@
KA_TRACE( 10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n",
gtid, team_id, tid ));
-
#if OMP_30_ENABLED
if ( __kmp_tasking_mode == tskm_extra_barrier ) {
__kmp_tasking_barrier( team, this_thr, gtid );
@@ -6329,25 +6248,6 @@
#endif // OMP_30_ENABLED
}
- #if KMP_OS_WINDOWS
- // AC: wait here until monitor has started. This is a fix for CQ232808.
- // The reason is that if the library is loaded/unloaded in a loop with small (parallel)
- // work in between, then there is high probability that monitor thread started after
- // the library shutdown. At shutdown it is too late to cope with the problem, because
- // when the master is in DllMain (process detach) the monitor has no chances to start
- // (it is blocked), and master has no means to inform the monitor that the library has gone,
- // because all the memory which the monitor can access is going to be released/reset.
- //
- // The moment before barrier_gather sounds appropriate, because master needs to
- // wait for all workers anyway, and we want this to happen as late as possible,
- // but before the shutdown which may happen after the barrier.
- if( KMP_MASTER_TID( tid ) && TCR_4(__kmp_init_monitor) < 2 ) {
- __kmp_wait_sleep( this_thr, (volatile kmp_uint32*)&__kmp_init_monitor, 2, 0
- USE_ITT_BUILD_ARG( itt_sync_obj )
- );
- }
- #endif
-
#if USE_ITT_BUILD
if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
__kmp_itt_barrier_starting( gtid, itt_sync_obj );
@@ -6390,34 +6290,22 @@
USE_ITT_BUILD_ARG( itt_sync_obj )
);
}
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
// Join barrier - report frame end
-#if USE_ITT_BUILD
- // Collect information only if the file was opened successfully.
- if( __kmp_forkjoin_frames_mode == 1 && __kmp_itt_csv_file )
- {
- ident_t * loc = this_thr->th.th_ident;
- if (loc) {
- // Use compiler-generated location to mark the frame:
- // "<func>$omp$frame@[file:]<line>[:<col>]"
- kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 );
-
- kmp_uint64 fr_end;
-#if defined( __GNUC__ )
-# if !defined( __INTEL_COMPILER )
- fr_end = __kmp_hardware_timestamp();
-# else
- fr_end = __rdtsc();
-# endif
-#else
- fr_end = __rdtsc();
-#endif
- K_DIAG( 3, ( "__kmp_join_barrier: T#%d(%d:%d) frame_begin = %llu, frame_end = %llu\n",
- gtid, ( team != NULL ) ? team->t.t_id : -1, tid, this_thr->th.th_frame_time, fr_end ) );
-
- __kmp_str_buf_print( &__kmp_itt_frame_buffer, "%s$omp$frame@%s:%d:%d,%llu,%llu,,\n",
- str_loc.func, str_loc.file, str_loc.line, str_loc.col, this_thr->th.th_frame_time, fr_end );
-
- __kmp_str_loc_free( &str_loc );
+ if( __itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode ) {
+ kmp_uint64 tmp = __itt_get_timestamp();
+ ident_t * loc = team->t.t_ident;
+ switch( __kmp_forkjoin_frames_mode ) {
+ case 1:
+ __kmp_itt_frame_submit( gtid, this_thr->th.th_frame_time, tmp, 0, loc );
+ break;
+ case 2:
+ __kmp_itt_frame_submit( gtid, this_thr->th.th_bar_arrive_time, tmp, 1, loc );
+ break;
+ case 3:
+ __kmp_itt_frame_submit( gtid, this_thr->th.th_frame_time, tmp, 0, loc );
+ __kmp_itt_frame_submit( gtid, this_thr->th.th_bar_arrive_time, tmp, 1, loc );
+ break;
}
}
#endif /* USE_ITT_BUILD */
@@ -6571,20 +6459,16 @@
#if OMP_30_ENABLED
# if KMP_BARRIER_ICV_PULL
- //
- // FIXME - after __kmp_fork_call() is modified to not look at the
- // master thread's implicit task ICV's, remove the ! KMP_MASTER_TID
- // restriction from this if condition.
- //
- if (! KMP_MASTER_TID( tid ) ) {
- //
- // Copy the initial ICV's from the team struct to the implicit task
- // for this tid.
- //
- __kmp_init_implicit_task( team->t.t_ident, team->t.t_threads[tid],
- team, tid, FALSE );
- load_icvs(&team->t.t_initial_icvs);
- store_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &team->t.t_initial_icvs);
+ // Master thread's copy of the ICVs was set up on the implicit taskdata in __kmp_reinitialize_team.
+ // __kmp_fork_call() assumes the master thread's implicit task has this data before this function is called.
+ // We cannot modify __kmp_fork_call() to look at the fixed ICVs in the master's thread struct, because it is
+ // not always the case that the threads arrays have been allocated when __kmp_fork_call() is executed.
+ if (! KMP_MASTER_TID( tid ) ) { // master thread already has ICVs
+ // Copy the initial ICVs from the master's thread struct to the implicit task for this tid.
+ KA_TRACE( 10, ( "__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid ));
+ load_icvs(&team->t.t_threads[0]->th.th_fixed_icvs);
+ __kmp_init_implicit_task( team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE );
+ store_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &team->t.t_threads[0]->th.th_fixed_icvs);
sync_icvs();
}
# endif // KMP_BARRIER_ICV_PULL
@@ -6716,13 +6600,13 @@
void
__kmp_internal_end_dest( void *specific_gtid )
{
- #ifdef __INTEL_COMPILER
+ #if KMP_COMPILER_ICC
#pragma warning( push )
#pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits
#endif
// Make sure no significant bits are lost
int gtid = (kmp_intptr_t)specific_gtid - 1;
- #ifdef __INTEL_COMPILER
+ #if KMP_COMPILER_ICC
#pragma warning( pop )
#endif
@@ -7503,7 +7387,6 @@
__kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
}
__kmp_max_nth = __kmp_sys_max_nth;
- __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
// Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" part
__kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
@@ -7572,18 +7455,17 @@
if ( __kmp_str_match_true( val ) ) {
kmp_str_buf_t buffer;
__kmp_str_buf_init( & buffer );
- __kmp_i18n_dump_catalog( buffer );
+ __kmp_i18n_dump_catalog( & buffer );
__kmp_printf( "%s", buffer.str );
__kmp_str_buf_free( & buffer );
}; // if
__kmp_env_free( & val );
#endif
+ __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
// Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
__kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
- // omalyshe: This initialisation beats env var setting.
- //__kmp_load_balance_interval = 1.0;
// If the library is shut down properly, both pools must be NULL. Just in case, set them
// to NULL -- some memory may leak, but subsequent code will work even if pools are not freed.
@@ -7876,38 +7758,6 @@
__kmp_print_version_2();
}
-#if USE_ITT_BUILD
- // Create CSV file to report frames
- if( __kmp_forkjoin_frames_mode == 1 )
- {
- // Open CSV file to write itt frame information
- const char * csv_file;
-/* Internal AXE variables
- char * host_name = __kmp_env_get("INTEL_MRTE_HOST_NAME");
- char * out_dir = __kmp_env_get("INTEL_MRTE_DATA_DIR");*/
- char * host_name = __kmp_env_get("AMPLXE_HOSTNAME");
- char * out_dir = __kmp_env_get("AMPLXE_DATA_DIR");
-
- if( out_dir && host_name ) {
- csv_file = __kmp_str_format( "%s/omp-frames-hostname-%s.csv", out_dir, host_name );
- __kmp_itt_csv_file = fopen( csv_file, "w" );
- __kmp_str_free( &csv_file );
- } else {
-#ifdef KMP_DEBUG
- // Create CSV file in the current dir
- csv_file = __kmp_str_format( "./omp-frames-hostname-xxx.csv" );
- __kmp_itt_csv_file = fopen( csv_file, "w" );
- __kmp_str_free( &csv_file );
-#endif
- }
- if( __kmp_itt_csv_file ) {
- __kmp_str_buf_init( & __kmp_itt_frame_buffer );
- __kmp_str_buf_print( & __kmp_itt_frame_buffer, "name,start_tsc.TSC,end_tsc,pid,tid\n" );
- }
- }
-
-#endif /* USE_ITT_BUILD */
-
/* we have finished parallel initialization */
TCW_SYNC_4(__kmp_init_parallel, TRUE);
@@ -8347,16 +8197,6 @@
__kmp_i18n_catclose();
-#if USE_ITT_BUILD
- // Close CSV file for frames
- if( __kmp_forkjoin_frames_mode && __kmp_itt_csv_file ) {
- fprintf( __kmp_itt_csv_file, __kmp_itt_frame_buffer.str );
-
- __kmp_str_buf_free( & __kmp_itt_frame_buffer );
- fclose( __kmp_itt_csv_file );
- }
-#endif /* USE_ITT_BUILD */
-
KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) );
}
@@ -8576,14 +8416,6 @@
* internal fast reduction routines
*/
-// implementation rev. 0.4
-// AT: determine CPU, and always use 'critical method' if non-Intel
-// AT: test loc != NULL
-// AT: what to return if lck == NULL
-// AT: tune the cut-off point for atomic reduce method
-// AT: tune what to return depending on the CPU and platform configuration
-// AT: tune what to return depending on team size
-// AT: move this function out to kmp_csupport.c
PACKED_REDUCTION_METHOD_T
__kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,
kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
@@ -8641,22 +8473,10 @@
#error "Unknown or unsupported OS"
#endif // KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_DARWIN
- #elif KMP_ARCH_X86
+ #elif KMP_ARCH_X86 || KMP_ARCH_ARM
#if KMP_OS_LINUX || KMP_OS_WINDOWS
- // similar to win_32
- // 4x1x2 fxqlin04, the 'linear,linear' barrier
-
- // similar to lin_32
- // 4x1x2 fxqwin04, the 'linear,linear' barrier
-
- // actual measurement shows that the critical section method is better if team_size <= 8;
- // what happenes when team_size > 8 ? ( no machine to test )
-
- // TO DO: need to run a 32-bit code on Intel(R) 64
- // TO DO: test the 'hyper,hyper,1,1' barrier
-
// basic tuning
if( atomic_available ) {
@@ -8667,7 +8487,6 @@
#elif KMP_OS_DARWIN
-
if( atomic_available && ( num_vars <= 3 ) ) {
retval = atomic_reduce_block;
} else if( tree_available ) {
@@ -8686,18 +8505,6 @@
}
- //AT: TO DO: critical block method not implemented by PAROPT
- //if( retval == __kmp_critical_reduce_block ) {
- // if( lck == NULL ) { // critical block method not implemented by PAROPT
- // }
- //}
-
- // tune what to return depending on the CPU and platform configuration
- // (sometimes tree method is slower than critical)
-
- // probably tune what to return depending on team size
-
-
// KMP_FORCE_REDUCTION
if( __kmp_force_reduction_method != reduction_method_not_defined ) {
diff --git a/openmp/runtime/src/kmp_settings.c b/openmp/runtime/src/kmp_settings.c
index b190cce..3a0f6ce 100644
--- a/openmp/runtime/src/kmp_settings.c
+++ b/openmp/runtime/src/kmp_settings.c
@@ -1,7 +1,7 @@
/*
* kmp_settings.c -- Initialize environment variables
- * $Revision: 42642 $
- * $Date: 2013-09-06 01:57:24 -0500 (Fri, 06 Sep 2013) $
+ * $Revision: 42816 $
+ * $Date: 2013-11-11 15:33:37 -0600 (Mon, 11 Nov 2013) $
*/
@@ -26,9 +26,6 @@
#include "kmp_io.h"
-#define KMP_MAX( x, y ) ( (x) > (y) ? (x) : (y) )
-#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
-
static int __kmp_env_isDefined( char const * name );
static int __kmp_env_toPrint( char const * name, int flag );
@@ -3915,7 +3912,7 @@
|| __kmp_str_match( "testandset", 2, value ) ) {
__kmp_user_lock_kind = lk_tas;
}
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
else if ( __kmp_str_match( "futex", 1, value ) ) {
if ( __kmp_futex_determine_capable() ) {
__kmp_user_lock_kind = lk_futex;
@@ -4322,6 +4319,16 @@
}
} // __kmp_stg_print_omp_display_env
+static void
+__kmp_stg_parse_omp_cancellation( char const * name, char const * value, void * data ) {
+ __kmp_stg_parse_bool( name, value, & __kmp_omp_cancellation );
+} // __kmp_stg_parse_omp_cancellation
+
+static void
+__kmp_stg_print_omp_cancellation( kmp_str_buf_t * buffer, char const * name, void * data ) {
+ __kmp_stg_print_bool( buffer, name, __kmp_omp_cancellation );
+} // __kmp_stg_print_omp_cancellation
+
#endif
// -------------------------------------------------------------------------------------------------
@@ -4476,6 +4483,7 @@
# if OMP_40_ENABLED
{ "OMP_DISPLAY_ENV", __kmp_stg_parse_omp_display_env, __kmp_stg_print_omp_display_env, NULL, 0, 0 },
+ { "OMP_CANCELLATION", __kmp_stg_parse_omp_cancellation, __kmp_stg_print_omp_cancellation, NULL, 0, 0 },
#endif
{ "", NULL, NULL, NULL, 0, 0 }
}; // settings
diff --git a/openmp/runtime/src/kmp_str.c b/openmp/runtime/src/kmp_str.c
index d9b98ab..c1f9e9b 100644
--- a/openmp/runtime/src/kmp_str.c
+++ b/openmp/runtime/src/kmp_str.c
@@ -1,7 +1,7 @@
/*
* kmp_str.c -- String manipulation routines.
- * $Revision: 42613 $
- * $Date: 2013-08-23 13:29:50 -0500 (Fri, 23 Aug 2013) $
+ * $Revision: 42810 $
+ * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
*/
@@ -329,9 +329,9 @@
__kmp_str_fname_free(
kmp_str_fname_t * fname
) {
- __kmp_str_free( const_cast< char const ** >( & fname->path ) );
- __kmp_str_free( const_cast< char const ** >( & fname->dir ) );
- __kmp_str_free( const_cast< char const ** >( & fname->base ) );
+ __kmp_str_free( (char const **)( & fname->path ) );
+ __kmp_str_free( (char const **)( & fname->dir ) );
+ __kmp_str_free( (char const **)( & fname->base ) );
} // kmp_str_fname_free
diff --git a/openmp/runtime/src/kmp_stub.c b/openmp/runtime/src/kmp_stub.c
index e72b196..c1914f4 100644
--- a/openmp/runtime/src/kmp_stub.c
+++ b/openmp/runtime/src/kmp_stub.c
@@ -1,7 +1,7 @@
/*
* kmp_stub.c -- stub versions of user-callable OpenMP RT functions.
- * $Revision: 42150 $
- * $Date: 2013-03-15 15:40:38 -0500 (Fri, 15 Mar 2013) $
+ * $Revision: 42826 $
+ * $Date: 2013-11-20 03:39:45 -0600 (Wed, 20 Nov 2013) $
*/
@@ -29,11 +29,32 @@
#include <sys/time.h>
#endif
+#include "omp.h" // Function renamings.
#include "kmp.h" // KMP_DEFAULT_STKSIZE
#include "kmp_version.h"
-#include "omp.h" // Function renamings.
+// Moved from omp.h
+#if OMP_30_ENABLED
+#define omp_set_max_active_levels ompc_set_max_active_levels
+#define omp_set_schedule ompc_set_schedule
+#define omp_get_ancestor_thread_num ompc_get_ancestor_thread_num
+#define omp_get_team_size ompc_get_team_size
+
+#endif // OMP_30_ENABLED
+
+#define omp_set_num_threads ompc_set_num_threads
+#define omp_set_dynamic ompc_set_dynamic
+#define omp_set_nested ompc_set_nested
+#define kmp_set_stacksize kmpc_set_stacksize
+#define kmp_set_stacksize_s kmpc_set_stacksize_s
+#define kmp_set_blocktime kmpc_set_blocktime
+#define kmp_set_library kmpc_set_library
+#define kmp_set_defaults kmpc_set_defaults
+#define kmp_malloc kmpc_malloc
+#define kmp_calloc kmpc_calloc
+#define kmp_realloc kmpc_realloc
+#define kmp_free kmpc_free
static double frequency = 0.0;
@@ -243,29 +264,5 @@
return wtick;
}; // __kmps_get_wtick
-
-/*
- These functions are exported from libraries, but not declared in omp,h and omp_lib.f:
-
- // omalyshe: eight entries below removed from the library (2011-11-22)
- kmpc_get_banner
- kmpc_get_poolmode
- kmpc_get_poolsize
- kmpc_get_poolstat
- kmpc_poolprint
- kmpc_print_banner
- kmpc_set_poolmode
- kmpc_set_poolsize
-
- kmpc_set_affinity
- kmp_threadprivate_insert
- kmp_threadprivate_insert_private_data
- VT_getthid
- vtgthid
-
- The list is collected on lin_32.
-
-*/
-
// end of file //
diff --git a/openmp/runtime/src/kmp_tasking.c b/openmp/runtime/src/kmp_tasking.c
index ea5cdc0..8cac009 100644
--- a/openmp/runtime/src/kmp_tasking.c
+++ b/openmp/runtime/src/kmp_tasking.c
@@ -1,7 +1,7 @@
/*
* kmp_tasking.c -- OpenMP 3.0 tasking support.
- * $Revision: 42522 $
- * $Date: 2013-07-16 05:28:49 -0500 (Tue, 16 Jul 2013) $
+ * $Revision: 42852 $
+ * $Date: 2013-12-04 10:50:49 -0600 (Wed, 04 Dec 2013) $
*/
@@ -620,13 +620,28 @@
#if OMP_40_ENABLED
if ( taskdata->td_taskgroup )
KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
- __kmp_release_deps(gtid,taskdata);
+ __kmp_release_deps(gtid,taskdata);
#endif
}
KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
gtid, taskdata, children) );
+#if OMP_40_ENABLED
+ /* If the tasks' destructor thunk flag has been set, we need to invoke the
+ destructor thunk that has been generated by the compiler.
+ The code is placed here, since at this point other tasks might have been released
+ hence overlapping the destructor invokations with some other work in the
+ released tasks. The OpenMP spec is not specific on when the destructors are
+ invoked, so we should be free to choose.
+ */
+ if (taskdata->td_flags.destructors_thunk) {
+ kmp_routine_entry_t destr_thunk = task->destructors;
+ KMP_ASSERT(destr_thunk);
+ destr_thunk(gtid, task);
+ }
+#endif // OMP_40_ENABLED
+
// bookkeeping for resuming task:
// GEH - note tasking_ser => task_serial
KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
@@ -739,10 +754,10 @@
task->td_flags.complete = 0;
task->td_flags.freed = 0;
-#if OMP_40_ENABLED
+#if OMP_40_ENABLED
task->td_dephash = NULL;
task->td_depnode = NULL;
-#endif
+#endif
if (set_curr_task) { // only do this initialization the first time a thread is created
task->td_incomplete_child_tasks = 0;
@@ -850,7 +865,7 @@
taskdata->td_task_id = KMP_GEN_TASK_ID();
taskdata->td_team = team;
- taskdata->td_alloc_thread = thread;
+ taskdata->td_alloc_thread = thread;
taskdata->td_parent = parent_task;
taskdata->td_level = parent_task->td_level + 1; // increment nesting level
taskdata->td_ident = loc_ref;
@@ -863,6 +878,9 @@
taskdata->td_flags.tiedness = flags->tiedness;
taskdata->td_flags.final = flags->final;
taskdata->td_flags.merged_if0 = flags->merged_if0;
+#if OMP_40_ENABLED
+ taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
+#endif // OMP_40_ENABLED
taskdata->td_flags.tasktype = TASK_EXPLICIT;
// GEH - TODO: fix this to copy parent task's value of tasking_ser flag
@@ -890,7 +908,7 @@
taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
taskdata->td_dephash = NULL;
taskdata->td_depnode = NULL;
-#endif
+#endif
// Only need to keep track of child task counts if team parallel and tasking not serialized
if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
@@ -946,24 +964,46 @@
__kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
{
kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
+#if OMP_40_ENABLED
+ int discard = 0 /* false */;
+#endif
KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
gtid, taskdata, current_task) );
__kmp_task_start( gtid, task, current_task );
+#if OMP_40_ENABLED
+ // TODO: cancel tasks if the parallel region has also been cancelled
+ // TODO: check if this sequence can be hoisted above __kmp_task_start
+ // if cancellation has been enabled for this run ...
+ if (__kmp_omp_cancellation) {
+ kmp_info_t *this_thr = __kmp_threads [ gtid ];
+ kmp_team_t * this_team = this_thr->th.th_team;
+ kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
+ if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
+ // this task belongs to a task group and we need to cancel it
+ discard = 1 /* true */;
+ }
+ }
+
//
// Invoke the task routine and pass in relevant data.
// Thunks generated by gcc take a different argument list.
//
+ if (!discard) {
+#endif // OMP_40_ENABLED
#ifdef KMP_GOMP_COMPAT
- if (taskdata->td_flags.native) {
- ((void (*)(void *))(*(task->routine)))(task->shareds);
- }
- else
+ if (taskdata->td_flags.native) {
+ ((void (*)(void *))(*(task->routine)))(task->shareds);
+ }
+ else
#endif /* KMP_GOMP_COMPAT */
- {
- (*(task->routine))(gtid, task);
+ {
+ (*(task->routine))(gtid, task);
+ }
+#if OMP_40_ENABLED
}
+#endif // OMP_40_ENABLED
__kmp_task_finish( gtid, task, current_task );
@@ -1079,10 +1119,8 @@
// GEH: if team serialized, avoid reading the volatile variable below.
while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
__kmp_execute_tasks( thread, gtid, &(taskdata->td_incomplete_child_tasks),
- 0, FALSE, &thread_finished,
-#if USE_ITT_BUILD
- itt_sync_obj,
-#endif /* USE_ITT_BUILD */
+ 0, FALSE, &thread_finished
+ USE_ITT_BUILD_ARG(itt_sync_obj),
__kmp_task_stealing_constraint );
}
}
@@ -1134,10 +1172,8 @@
__kmp_itt_taskwait_starting( gtid, itt_sync_obj );
#endif /* USE_ITT_BUILD */
if ( ! taskdata->td_flags.team_serial ) {
- __kmp_execute_tasks( thread, gtid, NULL, 0, FALSE, &thread_finished,
-#if USE_ITT_BUILD
- itt_sync_obj,
-#endif /* USE_ITT_BUILD */
+ __kmp_execute_tasks( thread, gtid, NULL, 0, FALSE, &thread_finished
+ USE_ITT_BUILD_ARG(itt_sync_obj),
__kmp_task_stealing_constraint );
}
@@ -1162,7 +1198,7 @@
// __kmpc_taskgroup: Start a new taskgroup
void
-__kmpc_taskgroup( ident* loc, int gtid )
+__kmpc_taskgroup( ident_t* loc, int gtid )
{
kmp_info_t * thread = __kmp_threads[ gtid ];
kmp_taskdata_t * taskdata = thread->th.th_current_task;
@@ -1170,6 +1206,7 @@
(kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
tg_new->count = 0;
+ tg_new->cancel_request = cancel_noreq;
tg_new->parent = taskdata->td_taskgroup;
taskdata->td_taskgroup = tg_new;
}
@@ -1180,7 +1217,7 @@
// and its descendants are complete
void
-__kmpc_end_taskgroup( ident* loc, int gtid )
+__kmpc_end_taskgroup( ident_t* loc, int gtid )
{
kmp_info_t * thread = __kmp_threads[ gtid ];
kmp_taskdata_t * taskdata = thread->th.th_current_task;
@@ -1201,10 +1238,8 @@
if ( ! taskdata->td_flags.team_serial ) {
while ( TCR_4(taskgroup->count) != 0 ) {
__kmp_execute_tasks( thread, gtid, &(taskgroup->count),
- 0, FALSE, &thread_finished,
-#if USE_ITT_BUILD
- itt_sync_obj,
-#endif /* USE_ITT_BUILD */
+ 0, FALSE, &thread_finished
+ USE_ITT_BUILD_ARG(itt_sync_obj),
__kmp_task_stealing_constraint );
}
}
@@ -1420,15 +1455,13 @@
// checker is the value to check to terminate the spin.
int
-__kmp_execute_tasks( kmp_info_t *thread,
- kmp_int32 gtid,
+__kmp_execute_tasks( kmp_info_t *thread,
+ kmp_int32 gtid,
volatile kmp_uint *spinner,
kmp_uint checker,
- int final_spin,
- int *thread_finished,
-#if USE_ITT_BUILD
- void * itt_sync_obj,
-#endif /* USE_ITT_BUILD */
+ int final_spin,
+ int *thread_finished
+ USE_ITT_BUILD_ARG(void * itt_sync_obj),
kmp_int32 is_constrained )
{
kmp_task_team_t * task_team;
@@ -2297,11 +2330,9 @@
// in team > 1 !
void
-__kmp_task_team_wait( kmp_info_t *this_thr,
+__kmp_task_team_wait( kmp_info_t *this_thr,
kmp_team_t *team
-#if USE_ITT_BUILD
- , void * itt_sync_obj
-#endif /* USE_ITT_BUILD */
+ USE_ITT_BUILD_ARG(void * itt_sync_obj)
)
{
kmp_task_team_t *task_team = team->t.t_task_team;
@@ -2320,9 +2351,7 @@
// termination condition.
//
__kmp_wait_sleep( this_thr, &task_team->tt.tt_unfinished_threads, 0, TRUE
-#if USE_ITT_BUILD
- , itt_sync_obj
-#endif /* USE_ITT_BUILD */
+ USE_ITT_BUILD_ARG(itt_sync_obj)
);
//
@@ -2361,7 +2390,8 @@
#if USE_ITT_BUILD
KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
#endif /* USE_ITT_BUILD */
- while (! __kmp_execute_tasks( thread, gtid, spin, 0, TRUE, &flag, NULL ) ) {
+ while (! __kmp_execute_tasks( thread, gtid, spin, 0, TRUE, &flag
+ USE_ITT_BUILD_ARG(NULL), 0 ) ) {
#if USE_ITT_BUILD
// TODO: What about itt_sync_obj??
KMP_FSYNC_SPIN_PREPARE( spin );
diff --git a/openmp/runtime/src/kmp_version.c b/openmp/runtime/src/kmp_version.c
index 0beb824..5d0de77 100644
--- a/openmp/runtime/src/kmp_version.c
+++ b/openmp/runtime/src/kmp_version.c
@@ -1,7 +1,7 @@
/*
* kmp_version.c
- * $Revision: 42594 $
- * $Date: 2013-08-16 04:14:33 -0500 (Fri, 16 Aug 2013) $
+ * $Revision: 42806 $
+ * $Date: 2013-11-05 16:16:45 -0600 (Tue, 05 Nov 2013) $
*/
@@ -27,7 +27,7 @@
#define stringer( x ) _stringer( x )
// Detect compiler.
-#ifdef __INTEL_COMPILER
+#if KMP_COMPILER_ICC
#if __INTEL_COMPILER == 1010
#define KMP_COMPILER "Intel C++ Compiler 10.1"
#elif __INTEL_COMPILER == 1100
@@ -49,7 +49,9 @@
#elif __INTEL_COMPILER == 9999
#define KMP_COMPILER "Intel C++ Compiler mainline"
#endif
-#elif defined( __GNUC__ )
+#elif KMP_COMPILER_CLANG
+ #define KMP_COMPILER "Clang " stringer( __clang_major__ ) "." stringer( __clang_minor__ )
+#elif KMP_COMPILER_GCC
#define KMP_COMPILER "GCC " stringer( __GNUC__ ) "." stringer( __GNUC_MINOR__ )
#endif
#ifndef KMP_COMPILER
diff --git a/openmp/runtime/src/makefile.mk b/openmp/runtime/src/makefile.mk
index d7c8266..8185e78 100644
--- a/openmp/runtime/src/makefile.mk
+++ b/openmp/runtime/src/makefile.mk
@@ -1,6 +1,6 @@
# makefile.mk #
-# $Revision: 42661 $
-# $Date: 2013-09-12 11:37:13 -0500 (Thu, 12 Sep 2013) $
+# $Revision: 42820 $
+# $Date: 2013-11-13 16:53:44 -0600 (Wed, 13 Nov 2013) $
#
#//===----------------------------------------------------------------------===//
@@ -37,7 +37,7 @@
# --------------------------------------------------------------------------------------------------
# Build compiler
-BUILD_COMPILER := $(call check_variable,BUILD_COMPILER,icc gcc icl icl.exe)
+BUILD_COMPILER := $(call check_variable,BUILD_COMPILER,icc gcc clang icl icl.exe)
# Distribution type: com (commercial) or oss (open-source)
DISTRIBUTION := $(call check_variable,DISTRIBUTION,com oss)
@@ -161,6 +161,18 @@
endif
endif
+ifeq "$(c)" "clang"
+ c-flags += -Wno-unused-value -Wno-switch
+ cxx-flags += -Wno-unused-value -Wno-switch
+ ifeq "$(arch)" "32"
+ c-flags += -m32 -msse
+ cxx-flags += -m32 -msse
+ fort-flags += -m32 -msse
+ ld-flags += -m32 -msse
+ as-flags += -m32 -msse
+ endif
+endif
+
ifeq "$(LINK_TYPE)" "dyna"
# debug-info
ifeq "$(os)" "win"
@@ -186,7 +198,7 @@
endif
# Enable saving compiler options and version in object files and libraries.
-ifneq "$(c)" "gcc"
+ifeq "$(filter gcc clang,$(c))" ""
ifeq "$(os)" "win"
# Newer MS linker issues warnings if -Qsox is used:
# "warning LNK4224: /COMMENT is no longer supported; ignored"
@@ -231,24 +243,17 @@
# Disable use of EBP as general purpose register.
ifeq "$(os)" "win"
ifeq "$(arch)" "32"
- # ??? In original makefile, this option was specified only in debug builds.
- # Compare with Linux* OS/OS X* -fno-omit-frame-pointer, which defined always.
c-flags += -Oy-
cxx-flags += -Oy-
endif
-else
- ifneq "$(arch)" "64"
- c-flags += -fno-omit-frame-pointer
- cxx-flags += -fno-omit-frame-pointer
- endif
endif
ifeq "$(os)" "lin"
c-flags += -Wsign-compare
cxx-flags += -Wsign-compare
ld-flags += -Wsign-compare
- ifneq "$(c)" "gcc"
- c-flags += -Werror
+ ifeq "$(filter gcc clang,$(c))" ""
+ c-flags += -Werror
cxx-flags += -Werror
ld-flags += -Werror
endif
@@ -306,7 +311,7 @@
ifeq "$(os)" "win"
c-flags += -TP
else
- ifeq "$(c)" "gcc"
+ ifneq "$(filter gcc clang,$(c))" ""
c-flags += -x c++ -std=c++0x
else
c-flags += -Kc++
@@ -352,12 +357,18 @@
ld-flags-dll += -static-libgcc
ld-flags-extra += -Wl,-ldl
endif
+ ifeq "$(c)" "clang"
+ ld-flags-extra += -Wl,-ldl
+ endif
ifeq "$(arch)" "32"
- ifneq "$(c)" "gcc"
+ ifeq "$(filter gcc clang,$(c))" ""
# to workaround CQ215229 link libirc_pic manually
ld-flags-extra += -lirc_pic
endif
endif
+ ifeq "$(filter 32 32e 64,$(arch))" ""
+ ld-flags-extra += $(shell pkg-config --libs libffi)
+ endif
else
ifeq "$(arch)" "32e"
# ???
@@ -452,13 +463,13 @@
cpp-flags += -D CACHE_LINE=64
cpp-flags += -D KMP_ADJUST_BLOCKTIME=1
cpp-flags += -D BUILD_PARALLEL_ORDERED
+cpp-flags += -D KMP_ASM_INTRINS
ifneq "$(os)" "lrb"
cpp-flags += -D USE_LOAD_BALANCE
endif
ifneq "$(os)" "win"
cpp-flags += -D USE_CBLKDATA
# ??? Windows* OS: USE_CBLKDATA defined in kmp.h.
- cpp-flags += -D KMP_ASM_INTRINS
endif
ifeq "$(os)" "win"
cpp-flags += -D KMP_WIN_CDECL
@@ -477,23 +488,43 @@
endif
endif
+ifneq "$(filter 32 32e,$(arch))" ""
cpp-flags += -D KMP_USE_ADAPTIVE_LOCKS=1 -D KMP_DEBUG_ADAPTIVE_LOCKS=0
-
-# define compatibility with OMP 3.0
-ifeq "$(OMP_VERSION)" "40"
- cpp-flags += -D OMP_40_ENABLED=1
- cpp-flags += -D OMP_30_ENABLED=1
-else
- ifeq "$(OMP_VERSION)" "30"
- cpp-flags += -D OMP_40_ENABLED=0
- cpp-flags += -D OMP_30_ENABLED=1
- else
- cpp-flags += -D OMP_40_ENABLED=0
- cpp-flags += -D OMP_30_ENABLED=0
- # TODO: Check OMP_30_ENABLED == 0 is processed correctly.
- endif
endif
+# define compatibility with different OpenMP versions
+have_omp_50=0
+have_omp_41=0
+have_omp_40=0
+have_omp_30=0
+ifeq "$(OMP_VERSION)" "50"
+ have_omp_50=1
+ have_omp_41=1
+ have_omp_40=1
+ have_omp_30=1
+endif
+ifeq "$(OMP_VERSION)" "41"
+ have_omp_50=0
+ have_omp_41=1
+ have_omp_40=1
+ have_omp_30=1
+endif
+ifeq "$(OMP_VERSION)" "40"
+ have_omp_50=0
+ have_omp_41=0
+ have_omp_40=1
+ have_omp_30=1
+endif
+ifeq "$(OMP_VERSION)" "30"
+ have_omp_50=0
+ have_omp_41=0
+ have_omp_40=0
+ have_omp_30=1
+endif
+cpp-flags += -D OMP_50_ENABLED=$(have_omp_50) -D OMP_41_ENABLED=$(have_omp_41)
+cpp-flags += -D OMP_40_ENABLED=$(have_omp_40) -D OMP_30_ENABLED=$(have_omp_30)
+
+
# Using ittnotify is enabled by default.
USE_ITT_NOTIFY = 1
ifeq "$(os)-$(arch)" "win-64"
@@ -541,8 +572,13 @@
# only one, target architecture). So we cannot autodetect target architecture
# within the file, and have to pass target architecture from command line.
ifneq "$(os)" "win"
- z_Linux_asm$(obj) : \
- cpp-flags += -D KMP_ARCH_X86$(if $(filter 32e,$(arch)),_64)
+ ifeq "$(arch)" "arm"
+ z_Linux_asm$(obj) : \
+ cpp-flags += -D KMP_ARCH_ARM
+ else
+ z_Linux_asm$(obj) : \
+ cpp-flags += -D KMP_ARCH_X86$(if $(filter 32e,$(arch)),_64)
+ endif
endif
# Defining KMP_BUILD_DATE for all files leads to warning "incompatible redefinition", because the
@@ -606,7 +642,6 @@
lib_c_items := \
kmp_ftn_cdecl \
kmp_ftn_extra \
- kmp_ftn_stdcall \
kmp_version \
$(empty)
lib_cpp_items :=
@@ -653,6 +688,7 @@
ifeq "$(OMP_VERSION)" "40"
lib_cpp_items += kmp_taskdeps
+ lib_cpp_items += kmp_cancel
endif
# OS-specific files.
@@ -1214,7 +1250,9 @@
tt-c-flags += -pthread
endif
tt-c-flags += -o $(tt-exe-file)
- tt-c-flags += $(if $(filter 64,$(arch)),,$(if $(filter 32,$(arch)),-m32,-m64))
+ ifneq "$(filter 32 32e 64,$(arch))" ""
+ tt-c-flags += $(if $(filter 64,$(arch)),,$(if $(filter 32,$(arch)),-m32,-m64))
+ endif
tt-libs += $(lib_file)
ifeq "$(os)-$(COVERAGE)-$(LINK_TYPE)" "lin-on-stat"
# Static coverage build on Linux* OS fails due to unresolved symbols dlopen, dlsym, dlclose.
@@ -1343,8 +1381,16 @@
ifeq "$(arch)" "64"
td_exp += libc.so.6.1
endif
+ ifeq "$(arch)" "arm"
+ td_exp += libc.so.6
+ td_exp += ld-linux-armhf.so.3
+ endif
td_exp += libdl.so.2
td_exp += libgcc_s.so.1
+ ifeq "$(filter 32 32e 64,$(arch))" ""
+ td_exp += libffi.so.6
+ td_exp += libffi.so.5
+ endif
ifneq "$(LIB_TYPE)" "stub"
td_exp += libpthread.so.0
endif
diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify.h b/openmp/runtime/src/thirdparty/ittnotify/ittnotify.h
index 9cc398c..9df6e2f 100644
--- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify.h
+++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify.h
@@ -109,12 +109,18 @@
# define ITT_PLATFORM_POSIX 2
#endif /* ITT_PLATFORM_POSIX */
+#ifndef ITT_PLATFORM_MAC
+# define ITT_PLATFORM_MAC 3
+#endif /* ITT_PLATFORM_MAC */
+
#ifndef ITT_PLATFORM
# if ITT_OS==ITT_OS_WIN
# define ITT_PLATFORM ITT_PLATFORM_WIN
+# elif ITT_OS==ITT_OS_MAC
+# define ITT_PLATFORM ITT_PLATFORM_MAC
# else
# define ITT_PLATFORM ITT_PLATFORM_POSIX
-# endif /* _WIN32 */
+# endif
#endif /* ITT_PLATFORM */
#if defined(_UNICODE) && !defined(UNICODE)
@@ -135,11 +141,11 @@
# if ITT_PLATFORM==ITT_PLATFORM_WIN
# define CDECL __cdecl
# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__
-# define CDECL /* not actual on x86_64 platform */
-# else /* _M_X64 || _M_AMD64 || __x86_64__ */
+# if defined _M_IX86 || defined __i386__
# define CDECL __attribute__ ((cdecl))
-# endif /* _M_X64 || _M_AMD64 || __x86_64__ */
+# else /* _M_IX86 || __i386__ */
+# define CDECL /* actual only on x86 platform */
+# endif /* _M_IX86 || __i386__ */
# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
#endif /* CDECL */
@@ -147,11 +153,11 @@
# if ITT_PLATFORM==ITT_PLATFORM_WIN
# define STDCALL __stdcall
# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__
-# define STDCALL /* not supported on x86_64 platform */
-# else /* _M_X64 || _M_AMD64 || __x86_64__ */
+# if defined _M_IX86 || defined __i386__
# define STDCALL __attribute__ ((stdcall))
-# endif /* _M_X64 || _M_AMD64 || __x86_64__ */
+# else /* _M_IX86 || __i386__ */
+# define STDCALL /* supported only on x86 platform */
+# endif /* _M_IX86 || __i386__ */
# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
#endif /* STDCALL */
@@ -164,8 +170,8 @@
#if ITT_PLATFORM==ITT_PLATFORM_WIN
/* use __forceinline (VC++ specific) */
-#define INLINE __forceinline
-#define INLINE_ATTRIBUTE /* nothing */
+#define ITT_INLINE __forceinline
+#define ITT_INLINE_ATTRIBUTE /* nothing */
#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
/*
* Generally, functions are not inlined unless optimization is specified.
@@ -173,11 +179,11 @@
* if no optimization level was specified.
*/
#ifdef __STRICT_ANSI__
-#define INLINE static
+#define ITT_INLINE static
#else /* __STRICT_ANSI__ */
-#define INLINE static inline
+#define ITT_INLINE static inline
#endif /* __STRICT_ANSI__ */
-#define INLINE_ATTRIBUTE __attribute__ ((always_inline))
+#define ITT_INLINE_ATTRIBUTE __attribute__ ((always_inline, unused))
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
/** @endcond */
@@ -398,6 +404,128 @@
/** @} threads group */
/**
+ * @defgroup suppress Error suppression
+ * @ingroup public
+ * General behavior: application continues to run, but errors are suppressed
+ *
+ * @{
+ */
+
+/*****************************************************************//**
+ * @name group of functions used for error suppression in correctness tools
+ *********************************************************************/
+/** @{ */
+/**
+ * @hideinitializer
+ * @brief possible value for suppression mask
+ */
+#define __itt_suppress_all_errors 0x7fffffff
+
+/**
+ * @hideinitializer
+ * @brief possible value for suppression mask (suppresses errors from threading analysis)
+ */
+#define __itt_suppress_threading_errors 0x000000ff
+
+/**
+ * @hideinitializer
+ * @brief possible value for suppression mask (suppresses errors from memory analysis)
+ */
+#define __itt_suppress_memory_errors 0x0000ff00
+
+/**
+ * @brief Start suppressing errors identified in mask on this thread
+ */
+void ITTAPI __itt_suppress_push(unsigned int mask);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_push, (unsigned int mask))
+#define __itt_suppress_push ITTNOTIFY_VOID(suppress_push)
+#define __itt_suppress_push_ptr ITTNOTIFY_NAME(suppress_push)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_push(mask)
+#define __itt_suppress_push_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_push_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Undo the effects of the matching call to __itt_suppress_push
+ */
+void ITTAPI __itt_suppress_pop(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_pop, (void))
+#define __itt_suppress_pop ITTNOTIFY_VOID(suppress_pop)
+#define __itt_suppress_pop_ptr ITTNOTIFY_NAME(suppress_pop)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_pop()
+#define __itt_suppress_pop_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_pop_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @enum __itt_model_disable
+ * @brief Enumerator for the disable methods
+ */
+typedef enum __itt_suppress_mode {
+ __itt_unsuppress_range,
+ __itt_suppress_range
+} __itt_suppress_mode_t;
+
+/**
+ * @brief Mark a range of memory for error suppression or unsuppression for error types included in mask
+ */
+void ITTAPI __itt_suppress_mark_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size))
+#define __itt_suppress_mark_range ITTNOTIFY_VOID(suppress_mark_range)
+#define __itt_suppress_mark_range_ptr ITTNOTIFY_NAME(suppress_mark_range)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_mark_range(mask)
+#define __itt_suppress_mark_range_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_mark_range_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Undo the effect of a matching call to __itt_suppress_mark_range. If not matching
+ * call is found, nothing is changed.
+ */
+void ITTAPI __itt_suppress_clear_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_clear_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size))
+#define __itt_suppress_clear_range ITTNOTIFY_VOID(suppress_clear_range)
+#define __itt_suppress_clear_range_ptr ITTNOTIFY_NAME(suppress_clear_range)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_clear_range(mask)
+#define __itt_suppress_clear_range_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_clear_range_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} */
+/** @} suppress group */
+
+/**
* @defgroup sync Synchronization
* @ingroup public
* Indicate user-written synchronization code
@@ -820,8 +948,10 @@
#if ITT_PLATFORM==ITT_PLATFORM_WIN
void ITTAPI __itt_model_site_beginW(const wchar_t *name);
#endif
+void ITTAPI __itt_model_site_beginA(const char *name);
void ITTAPI __itt_model_site_beginAL(const char *name, size_t siteNameLen);
void ITTAPI __itt_model_site_end (__itt_model_site *site, __itt_model_site_instance *instance);
+void ITTAPI __itt_model_site_end_2(void);
/** @cond exclude_from_documentation */
#ifndef INTEL_NO_MACRO_BODY
@@ -830,18 +960,24 @@
#if ITT_PLATFORM==ITT_PLATFORM_WIN
ITT_STUBV(ITTAPI, void, model_site_beginW, (const wchar_t *name))
#endif
+ITT_STUBV(ITTAPI, void, model_site_beginA, (const char *name))
ITT_STUBV(ITTAPI, void, model_site_beginAL, (const char *name, size_t siteNameLen))
ITT_STUBV(ITTAPI, void, model_site_end, (__itt_model_site *site, __itt_model_site_instance *instance))
+ITT_STUBV(ITTAPI, void, model_site_end_2, (void))
#define __itt_model_site_begin ITTNOTIFY_VOID(model_site_begin)
#define __itt_model_site_begin_ptr ITTNOTIFY_NAME(model_site_begin)
#if ITT_PLATFORM==ITT_PLATFORM_WIN
#define __itt_model_site_beginW ITTNOTIFY_VOID(model_site_beginW)
#define __itt_model_site_beginW_ptr ITTNOTIFY_NAME(model_site_beginW)
#endif
+#define __itt_model_site_beginA ITTNOTIFY_VOID(model_site_beginA)
+#define __itt_model_site_beginA_ptr ITTNOTIFY_NAME(model_site_beginA)
#define __itt_model_site_beginAL ITTNOTIFY_VOID(model_site_beginAL)
#define __itt_model_site_beginAL_ptr ITTNOTIFY_NAME(model_site_beginAL)
#define __itt_model_site_end ITTNOTIFY_VOID(model_site_end)
#define __itt_model_site_end_ptr ITTNOTIFY_NAME(model_site_end)
+#define __itt_model_site_end_2 ITTNOTIFY_VOID(model_site_end_2)
+#define __itt_model_site_end_2_ptr ITTNOTIFY_NAME(model_site_end_2)
#else /* INTEL_NO_ITTNOTIFY_API */
#define __itt_model_site_begin(site, instance, name)
#define __itt_model_site_begin_ptr 0
@@ -849,18 +985,24 @@
#define __itt_model_site_beginW(name)
#define __itt_model_site_beginW_ptr 0
#endif
+#define __itt_model_site_beginA(name)
+#define __itt_model_site_beginA_ptr 0
#define __itt_model_site_beginAL(name, siteNameLen)
#define __itt_model_site_beginAL_ptr 0
#define __itt_model_site_end(site, instance)
#define __itt_model_site_end_ptr 0
+#define __itt_model_site_end_2()
+#define __itt_model_site_end_2_ptr 0
#endif /* INTEL_NO_ITTNOTIFY_API */
#else /* INTEL_NO_MACRO_BODY */
#define __itt_model_site_begin_ptr 0
#if ITT_PLATFORM==ITT_PLATFORM_WIN
#define __itt_model_site_beginW_ptr 0
#endif
+#define __itt_model_site_beginA_ptr 0
#define __itt_model_site_beginAL_ptr 0
#define __itt_model_site_end_ptr 0
+#define __itt_model_site_end_2_ptr 0
#endif /* INTEL_NO_MACRO_BODY */
/** @endcond */
@@ -878,9 +1020,14 @@
void ITTAPI __itt_model_task_begin(__itt_model_task *task, __itt_model_task_instance *instance, const char *name);
#if ITT_PLATFORM==ITT_PLATFORM_WIN
void ITTAPI __itt_model_task_beginW(const wchar_t *name);
+void ITTAPI __itt_model_iteration_taskW(const wchar_t *name);
#endif
+void ITTAPI __itt_model_task_beginA(const char *name);
void ITTAPI __itt_model_task_beginAL(const char *name, size_t taskNameLen);
+void ITTAPI __itt_model_iteration_taskA(const char *name);
+void ITTAPI __itt_model_iteration_taskAL(const char *name, size_t taskNameLen);
void ITTAPI __itt_model_task_end (__itt_model_task *task, __itt_model_task_instance *instance);
+void ITTAPI __itt_model_task_end_2(void);
/** @cond exclude_from_documentation */
#ifndef INTEL_NO_MACRO_BODY
@@ -888,19 +1035,34 @@
ITT_STUBV(ITTAPI, void, model_task_begin, (__itt_model_task *task, __itt_model_task_instance *instance, const char *name))
#if ITT_PLATFORM==ITT_PLATFORM_WIN
ITT_STUBV(ITTAPI, void, model_task_beginW, (const wchar_t *name))
+ITT_STUBV(ITTAPI, void, model_iteration_taskW, (const wchar_t *name))
#endif
+ITT_STUBV(ITTAPI, void, model_task_beginA, (const char *name))
ITT_STUBV(ITTAPI, void, model_task_beginAL, (const char *name, size_t taskNameLen))
+ITT_STUBV(ITTAPI, void, model_iteration_taskA, (const char *name))
+ITT_STUBV(ITTAPI, void, model_iteration_taskAL, (const char *name, size_t taskNameLen))
ITT_STUBV(ITTAPI, void, model_task_end, (__itt_model_task *task, __itt_model_task_instance *instance))
+ITT_STUBV(ITTAPI, void, model_task_end_2, (void))
#define __itt_model_task_begin ITTNOTIFY_VOID(model_task_begin)
#define __itt_model_task_begin_ptr ITTNOTIFY_NAME(model_task_begin)
#if ITT_PLATFORM==ITT_PLATFORM_WIN
#define __itt_model_task_beginW ITTNOTIFY_VOID(model_task_beginW)
#define __itt_model_task_beginW_ptr ITTNOTIFY_NAME(model_task_beginW)
+#define __itt_model_iteration_taskW ITTNOTIFY_VOID(model_iteration_taskW)
+#define __itt_model_iteration_taskW_ptr ITTNOTIFY_NAME(model_iteration_taskW)
#endif
+#define __itt_model_task_beginA ITTNOTIFY_VOID(model_task_beginA)
+#define __itt_model_task_beginA_ptr ITTNOTIFY_NAME(model_task_beginA)
#define __itt_model_task_beginAL ITTNOTIFY_VOID(model_task_beginAL)
#define __itt_model_task_beginAL_ptr ITTNOTIFY_NAME(model_task_beginAL)
+#define __itt_model_iteration_taskA ITTNOTIFY_VOID(model_iteration_taskA)
+#define __itt_model_iteration_taskA_ptr ITTNOTIFY_NAME(model_iteration_taskA)
+#define __itt_model_iteration_taskAL ITTNOTIFY_VOID(model_iteration_taskAL)
+#define __itt_model_iteration_taskAL_ptr ITTNOTIFY_NAME(model_iteration_taskAL)
#define __itt_model_task_end ITTNOTIFY_VOID(model_task_end)
#define __itt_model_task_end_ptr ITTNOTIFY_NAME(model_task_end)
+#define __itt_model_task_end_2 ITTNOTIFY_VOID(model_task_end_2)
+#define __itt_model_task_end_2_ptr ITTNOTIFY_NAME(model_task_end_2)
#else /* INTEL_NO_ITTNOTIFY_API */
#define __itt_model_task_begin(task, instance, name)
#define __itt_model_task_begin_ptr 0
@@ -908,18 +1070,30 @@
#define __itt_model_task_beginW(name)
#define __itt_model_task_beginW_ptr 0
#endif
+#define __itt_model_task_beginA(name)
+#define __itt_model_task_beginA_ptr 0
#define __itt_model_task_beginAL(name, siteNameLen)
#define __itt_model_task_beginAL_ptr 0
+#define __itt_model_iteration_taskA(name)
+#define __itt_model_iteration_taskA_ptr 0
+#define __itt_model_iteration_taskAL(name, siteNameLen)
+#define __itt_model_iteration_taskAL_ptr 0
#define __itt_model_task_end(task, instance)
#define __itt_model_task_end_ptr 0
+#define __itt_model_task_end_2()
+#define __itt_model_task_end_2_ptr 0
#endif /* INTEL_NO_ITTNOTIFY_API */
#else /* INTEL_NO_MACRO_BODY */
#define __itt_model_task_begin_ptr 0
#if ITT_PLATFORM==ITT_PLATFORM_WIN
#define __itt_model_task_beginW_ptr 0
#endif
+#define __itt_model_task_beginA_ptr 0
#define __itt_model_task_beginAL_ptr 0
+#define __itt_model_iteration_taskA_ptr 0
+#define __itt_model_iteration_taskAL_ptr 0
#define __itt_model_task_end_ptr 0
+#define __itt_model_task_end_2_ptr 0
#endif /* INTEL_NO_MACRO_BODY */
/** @endcond */
@@ -936,26 +1110,40 @@
* but may not have identical semantics.)
*/
void ITTAPI __itt_model_lock_acquire(void *lock);
+void ITTAPI __itt_model_lock_acquire_2(void *lock);
void ITTAPI __itt_model_lock_release(void *lock);
+void ITTAPI __itt_model_lock_release_2(void *lock);
/** @cond exclude_from_documentation */
#ifndef INTEL_NO_MACRO_BODY
#ifndef INTEL_NO_ITTNOTIFY_API
ITT_STUBV(ITTAPI, void, model_lock_acquire, (void *lock))
+ITT_STUBV(ITTAPI, void, model_lock_acquire_2, (void *lock))
ITT_STUBV(ITTAPI, void, model_lock_release, (void *lock))
+ITT_STUBV(ITTAPI, void, model_lock_release_2, (void *lock))
#define __itt_model_lock_acquire ITTNOTIFY_VOID(model_lock_acquire)
#define __itt_model_lock_acquire_ptr ITTNOTIFY_NAME(model_lock_acquire)
+#define __itt_model_lock_acquire_2 ITTNOTIFY_VOID(model_lock_acquire_2)
+#define __itt_model_lock_acquire_2_ptr ITTNOTIFY_NAME(model_lock_acquire_2)
#define __itt_model_lock_release ITTNOTIFY_VOID(model_lock_release)
#define __itt_model_lock_release_ptr ITTNOTIFY_NAME(model_lock_release)
+#define __itt_model_lock_release_2 ITTNOTIFY_VOID(model_lock_release_2)
+#define __itt_model_lock_release_2_ptr ITTNOTIFY_NAME(model_lock_release_2)
#else /* INTEL_NO_ITTNOTIFY_API */
#define __itt_model_lock_acquire(lock)
#define __itt_model_lock_acquire_ptr 0
+#define __itt_model_lock_acquire_2(lock)
+#define __itt_model_lock_acquire_2_ptr 0
#define __itt_model_lock_release(lock)
#define __itt_model_lock_release_ptr 0
+#define __itt_model_lock_release_2(lock)
+#define __itt_model_lock_release_2_ptr 0
#endif /* INTEL_NO_ITTNOTIFY_API */
#else /* INTEL_NO_MACRO_BODY */
#define __itt_model_lock_acquire_ptr 0
+#define __itt_model_lock_acquire_2_ptr 0
#define __itt_model_lock_release_ptr 0
+#define __itt_model_lock_release_2_ptr 0
#endif /* INTEL_NO_MACRO_BODY */
/** @endcond */
@@ -1104,25 +1292,32 @@
*/
void ITTAPI __itt_model_disable_push(__itt_model_disable x);
void ITTAPI __itt_model_disable_pop(void);
+void ITTAPI __itt_model_aggregate_task(size_t x);
/** @cond exclude_from_documentation */
#ifndef INTEL_NO_MACRO_BODY
#ifndef INTEL_NO_ITTNOTIFY_API
ITT_STUBV(ITTAPI, void, model_disable_push, (__itt_model_disable x))
ITT_STUBV(ITTAPI, void, model_disable_pop, (void))
+ITT_STUBV(ITTAPI, void, model_aggregate_task, (size_t x))
#define __itt_model_disable_push ITTNOTIFY_VOID(model_disable_push)
#define __itt_model_disable_push_ptr ITTNOTIFY_NAME(model_disable_push)
#define __itt_model_disable_pop ITTNOTIFY_VOID(model_disable_pop)
#define __itt_model_disable_pop_ptr ITTNOTIFY_NAME(model_disable_pop)
+#define __itt_model_aggregate_task ITTNOTIFY_VOID(model_aggregate_task)
+#define __itt_model_aggregate_task_ptr ITTNOTIFY_NAME(model_aggregate_task)
#else /* INTEL_NO_ITTNOTIFY_API */
#define __itt_model_disable_push(x)
#define __itt_model_disable_push_ptr 0
#define __itt_model_disable_pop()
#define __itt_model_disable_pop_ptr 0
+#define __itt_model_aggregate_task(x)
+#define __itt_model_aggregate_task_ptr 0
#endif /* INTEL_NO_ITTNOTIFY_API */
#else /* INTEL_NO_MACRO_BODY */
#define __itt_model_disable_push_ptr 0
#define __itt_model_disable_pop_ptr 0
+#define __itt_model_aggregate_task_ptr 0
#endif /* INTEL_NO_MACRO_BODY */
/** @endcond */
/** @} model group */
@@ -1348,9 +1543,97 @@
#define __itt_heap_internal_access_end_ptr 0
#endif /* INTEL_NO_MACRO_BODY */
/** @endcond */
-/** @} heap group */
+
+/** @brief record memory growth begin */
+void ITTAPI __itt_heap_record_memory_growth_begin(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin, (void))
+#define __itt_heap_record_memory_growth_begin ITTNOTIFY_VOID(heap_record_memory_growth_begin)
+#define __itt_heap_record_memory_growth_begin_ptr ITTNOTIFY_NAME(heap_record_memory_growth_begin)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_record_memory_growth_begin()
+#define __itt_heap_record_memory_growth_begin_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_record_memory_growth_begin_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
/** @endcond */
+/** @brief record memory growth end */
+void ITTAPI __itt_heap_record_memory_growth_end(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end, (void))
+#define __itt_heap_record_memory_growth_end ITTNOTIFY_VOID(heap_record_memory_growth_end)
+#define __itt_heap_record_memory_growth_end_ptr ITTNOTIFY_NAME(heap_record_memory_growth_end)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_record_memory_growth_end()
+#define __itt_heap_record_memory_growth_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_record_memory_growth_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Specify the type of heap detection/reporting to modify.
+ */
+/**
+ * @hideinitializer
+ * @brief Report on memory leaks.
+ */
+#define __itt_heap_leaks 0x00000001
+
+/**
+ * @hideinitializer
+ * @brief Report on memory growth.
+ */
+#define __itt_heap_growth 0x00000002
+
+
+/** @brief heap reset detection */
+void ITTAPI __itt_heap_reset_detection(unsigned int reset_mask);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_reset_detection, (unsigned int reset_mask))
+#define __itt_heap_reset_detection ITTNOTIFY_VOID(heap_reset_detection)
+#define __itt_heap_reset_detection_ptr ITTNOTIFY_NAME(heap_reset_detection)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_reset_detection()
+#define __itt_heap_reset_detection_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_reset_detection_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief report */
+void ITTAPI __itt_heap_record(unsigned int record_mask);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_record, (unsigned int record_mask))
+#define __itt_heap_record ITTNOTIFY_VOID(heap_record)
+#define __itt_heap_record_ptr ITTNOTIFY_NAME(heap_record)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_record()
+#define __itt_heap_record_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_record_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @} heap group */
+/** @endcond */
/* ========================================================================== */
/**
@@ -1475,8 +1758,8 @@
* @param[in] extra The extra data to unique identify object; low QWORD of the ID value.
*/
-INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) INLINE_ATTRIBUTE;
-INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra)
+ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra)
{
__itt_id id = __itt_null;
id.d1 = (unsigned long long)((uintptr_t)addr);
@@ -1633,6 +1916,40 @@
/** @endcond */
/** @} handles group */
+/** @cond exclude_from_documentation */
+typedef unsigned long long __itt_timestamp;
+/** @endcond */
+
+static const __itt_timestamp __itt_timestamp_none = (__itt_timestamp)-1LL;
+
+/** @cond exclude_from_gpa_documentation */
+
+/**
+ * @ingroup timestamps
+ * @brief Return timestamp corresponding to current moment.
+ * This returns the timestamp in format that is most relevant for the current
+ * host or platform. Do not rely that it's RDTSC value. It is possible
+ * to compare __itt_timestamp values with "<" operator.
+ */
+__itt_timestamp ITTAPI __itt_get_timestamp(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void))
+#define __itt_get_timestamp ITTNOTIFY_DATA(get_timestamp)
+#define __itt_get_timestamp_ptr ITTNOTIFY_NAME(get_timestamp)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_get_timestamp()
+#define __itt_get_timestamp_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_get_timestamp_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} timestamps */
+/** @endcond */
+
/** @cond exclude_from_gpa_documentation */
/**
@@ -1717,24 +2034,46 @@
*/
void ITTAPI __itt_frame_end_v3(const __itt_domain *domain, __itt_id *id);
+/**
+ * @ingroup frames
+ * @brief Submits a frame instance.
+ * Successive calls to __itt_frame_begin or __itt_frame_submit with the
+ * same ID are ignored until a call to __itt_frame_end or __itt_frame_submit
+ * with the same ID.
+ * Passing special __itt_timestamp_none value as "end" argument means
+ * take the current timestamp as the end timestamp.
+ * @param[in] domain The domain for this frame instance
+ * @param[in] id The instance ID for this frame instance or NULL
+ * @param[in] begin Timestamp of the beggining of the frame
+ * @param[in] end Timestamp of the end of the frame
+ */
+void ITTAPI __itt_frame_submit_v3(const __itt_domain *domain, __itt_id *id,
+ __itt_timestamp begin, __itt_timestamp end);
+
/** @cond exclude_from_documentation */
#ifndef INTEL_NO_MACRO_BODY
#ifndef INTEL_NO_ITTNOTIFY_API
ITT_STUBV(ITTAPI, void, frame_begin_v3, (const __itt_domain *domain, __itt_id *id))
ITT_STUBV(ITTAPI, void, frame_end_v3, (const __itt_domain *domain, __itt_id *id))
+ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end))
#define __itt_frame_begin_v3(d,x) ITTNOTIFY_VOID_D1(frame_begin_v3,d,x)
#define __itt_frame_begin_v3_ptr ITTNOTIFY_NAME(frame_begin_v3)
#define __itt_frame_end_v3(d,x) ITTNOTIFY_VOID_D1(frame_end_v3,d,x)
#define __itt_frame_end_v3_ptr ITTNOTIFY_NAME(frame_end_v3)
+#define __itt_frame_submit_v3(d,x,b,e) ITTNOTIFY_VOID_D3(frame_submit_v3,d,x,b,e)
+#define __itt_frame_submit_v3_ptr ITTNOTIFY_NAME(frame_submit_v3)
#else /* INTEL_NO_ITTNOTIFY_API */
#define __itt_frame_begin_v3(domain,id)
#define __itt_frame_begin_v3_ptr 0
#define __itt_frame_end_v3(domain,id)
#define __itt_frame_end_v3_ptr 0
+#define __itt_frame_submit_v3(domain,id,begin,end)
+#define __itt_frame_submit_v3_ptr 0
#endif /* INTEL_NO_ITTNOTIFY_API */
#else /* INTEL_NO_MACRO_BODY */
#define __itt_frame_begin_v3_ptr 0
#define __itt_frame_end_v3_ptr 0
+#define __itt_frame_submit_v3_ptr 0
#endif /* INTEL_NO_MACRO_BODY */
/** @endcond */
/** @} frames group */
@@ -2730,8 +3069,125 @@
#endif /* INTEL_NO_MACRO_BODY */
/** @endcond */
/** @} events group */
+
+
+/**
+ * @defgroup arrays Arrays Visualizer
+ * @ingroup public
+ * Visualize arrays
+ * @{
+ */
+
+/**
+ * @enum __itt_av_data_type
+ * @brief Defines types of arrays data (for C/C++ intrinsic types)
+ */
+typedef enum
+{
+ __itt_e_first = 0,
+ __itt_e_char = 0, /* 1-byte integer */
+ __itt_e_uchar, /* 1-byte unsigned integer */
+ __itt_e_int16, /* 2-byte integer */
+ __itt_e_uint16, /* 2-byte unsigned integer */
+ __itt_e_int32, /* 4-byte integer */
+ __itt_e_uint32, /* 4-byte unsigned integer */
+ __itt_e_int64, /* 8-byte integer */
+ __itt_e_uint64, /* 8-byte unsigned integer */
+ __itt_e_float, /* 4-byte floating */
+ __itt_e_double, /* 8-byte floating */
+ __itt_e_last = __itt_e_double
+} __itt_av_data_type;
+
+/**
+ * @brief Save an array data to a file.
+ * Output format is defined by the file extension. The csv and bmp formats are supported (bmp - for 2-dimensional array only).
+ * @param[in] data - pointer to the array data
+ * @param[in] rank - the rank of the array
+ * @param[in] dimensions - pointer to an array of integers, which specifies the array dimensions.
+ * The size of dimensions must be equal to the rank
+ * @param[in] type - the type of the array, specified as one of the __itt_av_data_type values (for intrinsic types)
+ * @param[in] filePath - the file path; the output format is defined by the file extension
+ * @param[in] columnOrder - defines how the array is stored in the linear memory.
+ * It should be 1 for column-major order (e.g. in FORTRAN) or 0 - for row-major order (e.g. in C).
+ */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+int ITTAPI __itt_av_saveA(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder);
+int ITTAPI __itt_av_saveW(void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_av_save __itt_av_saveW
+# define __itt_av_save_ptr __itt_av_saveW_ptr
+#else /* UNICODE */
+# define __itt_av_save __itt_av_saveA
+# define __itt_av_save_ptr __itt_av_saveA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+int ITTAPI __itt_av_save(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder))
+ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, av_save, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_av_saveA ITTNOTIFY_DATA(av_saveA)
+#define __itt_av_saveA_ptr ITTNOTIFY_NAME(av_saveA)
+#define __itt_av_saveW ITTNOTIFY_DATA(av_saveW)
+#define __itt_av_saveW_ptr ITTNOTIFY_NAME(av_saveW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_av_save ITTNOTIFY_DATA(av_save)
+#define __itt_av_save_ptr ITTNOTIFY_NAME(av_save)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_av_saveA(name)
+#define __itt_av_saveA_ptr 0
+#define __itt_av_saveW(name)
+#define __itt_av_saveW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_av_save(name)
+#define __itt_av_save_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_av_saveA_ptr 0
+#define __itt_av_saveW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_av_save_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
/** @endcond */
+void ITTAPI __itt_enable_attach(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, enable_attach, (void))
+#define __itt_enable_attach ITTNOTIFY_VOID(enable_attach)
+#define __itt_enable_attach_ptr ITTNOTIFY_NAME(enable_attach)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_enable_attach()
+#define __itt_enable_attach_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_enable_attach_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @cond exclude_from_gpa_documentation */
+
+/** @} arrays group */
+
+/** @endcond */
+
+
#ifdef __cplusplus
}
#endif /* __cplusplus */
diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h
index bccaa38..40c8614 100644
--- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h
+++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h
@@ -42,12 +42,18 @@
# define ITT_PLATFORM_POSIX 2
#endif /* ITT_PLATFORM_POSIX */
+#ifndef ITT_PLATFORM_MAC
+# define ITT_PLATFORM_MAC 3
+#endif /* ITT_PLATFORM_MAC */
+
#ifndef ITT_PLATFORM
# if ITT_OS==ITT_OS_WIN
# define ITT_PLATFORM ITT_PLATFORM_WIN
+# elif ITT_OS==ITT_OS_MAC
+# define ITT_PLATFORM ITT_PLATFORM_MAC
# else
# define ITT_PLATFORM ITT_PLATFORM_POSIX
-# endif /* _WIN32 */
+# endif
#endif /* ITT_PLATFORM */
#if defined(_UNICODE) && !defined(UNICODE)
@@ -68,11 +74,11 @@
# if ITT_PLATFORM==ITT_PLATFORM_WIN
# define CDECL __cdecl
# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__
-# define CDECL /* not actual on x86_64 platform */
-# else /* _M_X64 || _M_AMD64 || __x86_64__ */
+# if defined _M_IX86 || defined __i386__
# define CDECL __attribute__ ((cdecl))
-# endif /* _M_X64 || _M_AMD64 || __x86_64__ */
+# else /* _M_IX86 || __i386__ */
+# define CDECL /* actual only on x86 platform */
+# endif /* _M_IX86 || __i386__ */
# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
#endif /* CDECL */
@@ -80,11 +86,11 @@
# if ITT_PLATFORM==ITT_PLATFORM_WIN
# define STDCALL __stdcall
# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__
-# define STDCALL /* not supported on x86_64 platform */
-# else /* _M_X64 || _M_AMD64 || __x86_64__ */
+# if defined _M_IX86 || defined __i386__
# define STDCALL __attribute__ ((stdcall))
-# endif /* _M_X64 || _M_AMD64 || __x86_64__ */
+# else /* _M_IX86 || __i386__ */
+# define STDCALL /* supported only on x86 platform */
+# endif /* _M_IX86 || __i386__ */
# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
#endif /* STDCALL */
@@ -97,8 +103,8 @@
#if ITT_PLATFORM==ITT_PLATFORM_WIN
/* use __forceinline (VC++ specific) */
-#define INLINE __forceinline
-#define INLINE_ATTRIBUTE /* nothing */
+#define ITT_INLINE __forceinline
+#define ITT_INLINE_ATTRIBUTE /* nothing */
#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
/*
* Generally, functions are not inlined unless optimization is specified.
@@ -106,11 +112,11 @@
* if no optimization level was specified.
*/
#ifdef __STRICT_ANSI__
-#define INLINE static
+#define ITT_INLINE static
#else /* __STRICT_ANSI__ */
-#define INLINE static inline
+#define ITT_INLINE static inline
#endif /* __STRICT_ANSI__ */
-#define INLINE_ATTRIBUTE __attribute__ ((always_inline))
+#define ITT_INLINE_ATTRIBUTE __attribute__ ((always_inline, unused))
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
/** @endcond */
@@ -122,17 +128,19 @@
# define ITT_ARCH_IA32E 2
#endif /* ITT_ARCH_IA32E */
-#ifndef ITT_ARCH_IA64
-# define ITT_ARCH_IA64 3
-#endif /* ITT_ARCH_IA64 */
+#ifndef ITT_ARCH_ARM
+# define ITT_ARCH_ARM 4
+#endif /* ITT_ARCH_ARM */
#ifndef ITT_ARCH
-# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__
-# define ITT_ARCH ITT_ARCH_IA32E
-# elif defined _M_IA64 || defined __ia64
-# define ITT_ARCH ITT_ARCH_IA64
-# else
+# if defined _M_IX86 || defined __i386__
# define ITT_ARCH ITT_ARCH_IA32
+# elif defined _M_X64 || defined _M_AMD64 || defined __x86_64__
+# define ITT_ARCH ITT_ARCH_IA32E
+# elif defined _M_IA64 || defined __ia64__
+# define ITT_ARCH ITT_ARCH_IA64
+# elif defined _M_ARM || __arm__
+# define ITT_ARCH ITT_ARCH_ARM
# endif
#endif
@@ -145,7 +153,10 @@
#define ITT_TO_STR_AUX(x) #x
#define ITT_TO_STR(x) ITT_TO_STR_AUX(x)
-#define __ITT_BUILD_ASSERT(expr, suffix) do { static char __itt_build_check_##suffix[(expr) ? 1 : -1]; __itt_build_check_##suffix[0] = 0; } while(0)
+#define __ITT_BUILD_ASSERT(expr, suffix) do { \
+ static char __itt_build_check_##suffix[(expr) ? 1 : -1]; \
+ __itt_build_check_##suffix[0] = 0; \
+} while(0)
#define _ITT_BUILD_ASSERT(expr, suffix) __ITT_BUILD_ASSERT((expr), suffix)
#define ITT_BUILD_ASSERT(expr) _ITT_BUILD_ASSERT((expr), __LINE__)
@@ -158,7 +169,8 @@
#define API_VERSION_NUM 0.0.0
#endif /* API_VERSION_NUM */
-#define API_VERSION "ITT-API-Version " ITT_TO_STR(API_VERSION_NUM) " (" ITT_TO_STR(API_VERSION_BUILD) ")"
+#define API_VERSION "ITT-API-Version " ITT_TO_STR(API_VERSION_NUM) \
+ " (" ITT_TO_STR(API_VERSION_BUILD) ")"
/* OS communication functions */
#if ITT_PLATFORM==ITT_PLATFORM_WIN
@@ -176,12 +188,16 @@
#ifndef _GNU_SOURCE
#define _GNU_SOURCE 1 /* need for PTHREAD_MUTEX_RECURSIVE */
#endif /* _GNU_SOURCE */
+#ifndef __USE_UNIX98
+#define __USE_UNIX98 1 /* need for PTHREAD_MUTEX_RECURSIVE, on SLES11.1 with gcc 4.3.4 wherein pthread.h missing dependency on __USE_XOPEN2K8 */
+#endif /*__USE_UNIX98*/
#include <pthread.h>
typedef void* lib_t;
typedef pthread_t TIDT;
typedef pthread_mutex_t mutex_t;
#define MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
-#define _strong_alias(name, aliasname) extern __typeof (name) aliasname __attribute__ ((alias (#name)));
+#define _strong_alias(name, aliasname) \
+ extern __typeof (name) aliasname __attribute__ ((alias (#name)));
#define strong_alias(name, aliasname) _strong_alias(name, aliasname)
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
@@ -200,29 +216,35 @@
#define __itt_thread_id() GetCurrentThreadId()
#define __itt_thread_yield() SwitchToThread()
#ifndef ITT_SIMPLE_INIT
-INLINE int __itt_interlocked_increment(volatile long* ptr)
+ITT_INLINE long
+__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE long __itt_interlocked_increment(volatile long* ptr)
{
return InterlockedIncrement(ptr);
}
#endif /* ITT_SIMPLE_INIT */
#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
#define __itt_get_proc(lib, name) dlsym(lib, name)
-#define __itt_mutex_init(mutex) \
- { \
- pthread_mutexattr_t mutex_attr; \
- int error_code = pthread_mutexattr_init(&mutex_attr); \
- if (error_code) \
- __itt_report_error(__itt_error_system, "pthread_mutexattr_init", error_code); \
- error_code = pthread_mutexattr_settype(&mutex_attr, PTHREAD_MUTEX_RECURSIVE); \
- if (error_code) \
- __itt_report_error(__itt_error_system, "pthread_mutexattr_settype", error_code); \
- error_code = pthread_mutex_init(mutex, &mutex_attr); \
- if (error_code) \
- __itt_report_error(__itt_error_system, "pthread_mutex_init", error_code); \
- error_code = pthread_mutexattr_destroy(&mutex_attr); \
- if (error_code) \
- __itt_report_error(__itt_error_system, "pthread_mutexattr_destroy", error_code); \
- }
+#define __itt_mutex_init(mutex) {\
+ pthread_mutexattr_t mutex_attr; \
+ int error_code = pthread_mutexattr_init(&mutex_attr); \
+ if (error_code) \
+ __itt_report_error(__itt_error_system, "pthread_mutexattr_init", \
+ error_code); \
+ error_code = pthread_mutexattr_settype(&mutex_attr, \
+ PTHREAD_MUTEX_RECURSIVE); \
+ if (error_code) \
+ __itt_report_error(__itt_error_system, "pthread_mutexattr_settype", \
+ error_code); \
+ error_code = pthread_mutex_init(mutex, &mutex_attr); \
+ if (error_code) \
+ __itt_report_error(__itt_error_system, "pthread_mutex_init", \
+ error_code); \
+ error_code = pthread_mutexattr_destroy(&mutex_attr); \
+ if (error_code) \
+ __itt_report_error(__itt_error_system, "pthread_mutexattr_destroy", \
+ error_code); \
+}
#define __itt_mutex_lock(mutex) pthread_mutex_lock(mutex)
#define __itt_mutex_unlock(mutex) pthread_mutex_unlock(mutex)
#define __itt_load_lib(name) dlopen(name, RTLD_LAZY)
@@ -238,23 +260,29 @@
#ifdef __INTEL_COMPILER
#define __TBB_machine_fetchadd4(addr, val) __fetchadd4_acq((void *)addr, val)
#else /* __INTEL_COMPILER */
-/* TODO: Add Support for not Intel compilers for IA64 */
+/* TODO: Add Support for not Intel compilers for IA-64 architecture */
#endif /* __INTEL_COMPILER */
-#else /* ITT_ARCH!=ITT_ARCH_IA64 */
-INLINE int __TBB_machine_fetchadd4(volatile void* ptr, long addend)
+#elif ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_IA32E /* ITT_ARCH!=ITT_ARCH_IA64 */
+ITT_INLINE long
+__TBB_machine_fetchadd4(volatile void* ptr, long addend) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend)
{
- int result;
- __asm__ __volatile__("lock\nxaddl %0,%1"
- : "=r"(result),"=m"(*(long*)ptr)
- : "0"(addend), "m"(*(long*)ptr)
+ long result;
+ __asm__ __volatile__("lock\nxadd %0,%1"
+ : "=r"(result),"=m"(*(int*)ptr)
+ : "0"(addend), "m"(*(int*)ptr)
: "memory");
return result;
}
+#elif ITT_ARCH==ITT_ARCH_ARM
+#define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr, val)
#endif /* ITT_ARCH==ITT_ARCH_IA64 */
#ifndef ITT_SIMPLE_INIT
-INLINE int __itt_interlocked_increment(volatile long* ptr)
+ITT_INLINE long
+__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE long __itt_interlocked_increment(volatile long* ptr)
{
- return __TBB_machine_fetchadd4(ptr, 1) + 1;
+ return __TBB_machine_fetchadd4(ptr, 1) + 1L;
}
#endif /* ITT_SIMPLE_INIT */
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.c b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.c
index 5257d0d..4b5f464 100644
--- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.c
+++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.c
@@ -29,7 +29,7 @@
#include "disable_warnings.h"
-static const char api_version[] = API_VERSION "\0\n@(#) 201495 2011-12-01 14:14:56Z\n";
+static const char api_version[] = API_VERSION "\0\n@(#) $Revision: 42754 $\n";
#define _N_(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n)
@@ -43,6 +43,12 @@
#error Unsupported or unknown OS.
#endif
+#ifdef __ANDROID__
+/* default location of userapi collector on Android */
+#define ANDROID_ITTNOTIFY_DEFAULT_PATH "/data/data/com.intel.vtune/intel/libittnotify.so"
+#endif
+
+
#ifndef LIB_VAR_NAME
#if ITT_ARCH==ITT_ARCH_IA32
#define LIB_VAR_NAME INTEL_LIBITTNOTIFY32
@@ -146,7 +152,7 @@
static __itt_group_alias group_alias[] = {
{ "KMP_FOR_TPROFILE", (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync | __itt_group_mark) },
- { "KMP_FOR_TCHECK", (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync | __itt_group_fsync | __itt_group_mark) },
+ { "KMP_FOR_TCHECK", (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync | __itt_group_fsync | __itt_group_mark | __itt_group_suppress) },
{ NULL, (__itt_group_none) },
{ api_version, (__itt_group_none) } /* !!! Just to avoid unused code elimination !!! */
};
@@ -162,7 +168,7 @@
/* Define functions with static implementation */
#undef ITT_STUB
#undef ITT_STUBV
-#define ITT_STUB(api,type,name,args,params,nameindll,group,format) {ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (void*)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (__itt_group_id)(group)},
+#define ITT_STUB(api,type,name,args,params,nameindll,group,format) { ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (__itt_group_id)(group)},
#define ITT_STUBV ITT_STUB
#define __ITT_INTERNAL_INIT
#include "ittnotify_static.h"
@@ -170,7 +176,7 @@
/* Define functions without static implementation */
#undef ITT_STUB
#undef ITT_STUBV
-#define ITT_STUB(api,type,name,args,params,nameindll,group,format) {ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), NULL, (__itt_group_id)(group)},
+#define ITT_STUB(api,type,name,args,params,nameindll,group,format) {ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), NULL, (__itt_group_id)(group)},
#define ITT_STUBV ITT_STUB
#include "ittnotify_static.h"
{NULL, NULL, NULL, NULL, __itt_group_none}
@@ -225,7 +231,7 @@
static const char dll_path[PATH_MAX] = { 0 };
/* static part descriptor which handles. all notification api attributes. */
-static __itt_global __itt_ittapi_global = {
+__itt_global _N_(_ittapi_global) = {
ITT_MAGIC, /* identification info */
ITT_MAJOR, ITT_MINOR, API_VERSION_BUILD, /* version info */
0, /* api_initialized */
@@ -261,9 +267,9 @@
{
va_list args;
va_start(args, code);
- if (__itt_ittapi_global.error_handler != NULL)
+ if (_N_(_ittapi_global).error_handler != NULL)
{
- __itt_error_handler_t* handler = (__itt_error_handler_t*)__itt_ittapi_global.error_handler;
+ __itt_error_handler_t* handler = (__itt_error_handler_t*)(size_t)_N_(_ittapi_global).error_handler;
handler(code, args);
}
#ifdef ITT_NOTIFY_EXT_REPORT
@@ -281,7 +287,7 @@
{
__itt_domain *h_tail, *h;
- if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0)
+ if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
{
__itt_init_ittlib_name(NULL, __itt_group_all);
if (ITTNOTIFY_NAME(domain_createW) && ITTNOTIFY_NAME(domain_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW),_init)))
@@ -289,16 +295,16 @@
}
if (name == NULL)
- return __itt_ittapi_global.domain_list;
+ return _N_(_ittapi_global).domain_list;
- ITT_MUTEX_INIT_AND_LOCK(__itt_ittapi_global);
- for (h_tail = NULL, h = __itt_ittapi_global.domain_list; h != NULL; h_tail = h, h = h->next)
+ ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
+ for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next)
if (h->nameW != NULL && !wcscmp(h->nameW, name))
break;
if (h == NULL) {
- NEW_DOMAIN_W(&__itt_ittapi_global,h,h_tail,name);
+ NEW_DOMAIN_W(&_N_(_ittapi_global),h,h_tail,name);
}
- __itt_mutex_unlock(&__itt_ittapi_global.mutex);
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
return h;
}
@@ -309,7 +315,7 @@
{
__itt_domain *h_tail, *h;
- if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0)
+ if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
{
__itt_init_ittlib_name(NULL, __itt_group_all);
#if ITT_PLATFORM==ITT_PLATFORM_WIN
@@ -322,16 +328,16 @@
}
if (name == NULL)
- return __itt_ittapi_global.domain_list;
+ return _N_(_ittapi_global).domain_list;
- ITT_MUTEX_INIT_AND_LOCK(__itt_ittapi_global);
- for (h_tail = NULL, h = __itt_ittapi_global.domain_list; h != NULL; h_tail = h, h = h->next)
+ ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
+ for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next)
if (h->nameA != NULL && !__itt_fstrcmp(h->nameA, name))
break;
if (h == NULL) {
- NEW_DOMAIN_A(&__itt_ittapi_global,h,h_tail,name);
+ NEW_DOMAIN_A(&_N_(_ittapi_global),h,h_tail,name);
}
- __itt_mutex_unlock(&__itt_ittapi_global.mutex);
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
return h;
}
@@ -340,7 +346,7 @@
{
__itt_string_handle *h_tail, *h;
- if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0)
+ if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
{
__itt_init_ittlib_name(NULL, __itt_group_all);
if (ITTNOTIFY_NAME(string_handle_createW) && ITTNOTIFY_NAME(string_handle_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createW),_init)))
@@ -348,16 +354,16 @@
}
if (name == NULL)
- return __itt_ittapi_global.string_list;
+ return _N_(_ittapi_global).string_list;
- ITT_MUTEX_INIT_AND_LOCK(__itt_ittapi_global);
- for (h_tail = NULL, h = __itt_ittapi_global.string_list; h != NULL; h_tail = h, h = h->next)
+ ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
+ for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next)
if (h->strW != NULL && !wcscmp(h->strW, name))
break;
if (h == NULL) {
- NEW_STRING_HANDLE_W(&__itt_ittapi_global,h,h_tail,name);
+ NEW_STRING_HANDLE_W(&_N_(_ittapi_global),h,h_tail,name);
}
- __itt_mutex_unlock(&__itt_ittapi_global.mutex);
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
return h;
}
@@ -368,7 +374,7 @@
{
__itt_string_handle *h_tail, *h;
- if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0)
+ if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
{
__itt_init_ittlib_name(NULL, __itt_group_all);
#if ITT_PLATFORM==ITT_PLATFORM_WIN
@@ -381,16 +387,16 @@
}
if (name == NULL)
- return __itt_ittapi_global.string_list;
+ return _N_(_ittapi_global).string_list;
- ITT_MUTEX_INIT_AND_LOCK(__itt_ittapi_global);
- for (h_tail = NULL, h = __itt_ittapi_global.string_list; h != NULL; h_tail = h, h = h->next)
+ ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
+ for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next)
if (h->strA != NULL && !__itt_fstrcmp(h->strA, name))
break;
if (h == NULL) {
- NEW_STRING_HANDLE_A(&__itt_ittapi_global,h,h_tail,name);
+ NEW_STRING_HANDLE_A(&_N_(_ittapi_global),h,h_tail,name);
}
- __itt_mutex_unlock(&__itt_ittapi_global.mutex);
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
return h;
}
@@ -398,7 +404,7 @@
static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init))(void)
{
- if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0)
+ if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
{
__itt_init_ittlib_name(NULL, __itt_group_all);
if (ITTNOTIFY_NAME(pause) && ITTNOTIFY_NAME(pause) != ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init)))
@@ -407,12 +413,12 @@
return;
}
}
- __itt_ittapi_global.state = __itt_collection_paused;
+ _N_(_ittapi_global).state = __itt_collection_paused;
}
static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init))(void)
{
- if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0)
+ if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
{
__itt_init_ittlib_name(NULL, __itt_group_all);
if (ITTNOTIFY_NAME(resume) && ITTNOTIFY_NAME(resume) != ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init)))
@@ -421,7 +427,7 @@
return;
}
}
- __itt_ittapi_global.state = __itt_collection_normal;
+ _N_(_ittapi_global).state = __itt_collection_normal;
}
#if ITT_PLATFORM==ITT_PLATFORM_WIN
@@ -430,7 +436,7 @@
TIDT tid = __itt_thread_id();
__itt_thread_info *h_tail, *h;
- if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0)
+ if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
{
__itt_init_ittlib_name(NULL, __itt_group_all);
if (ITTNOTIFY_NAME(thread_set_nameW) && ITTNOTIFY_NAME(thread_set_nameW) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init)))
@@ -440,18 +446,18 @@
}
}
- __itt_mutex_lock(&__itt_ittapi_global.mutex);
- for (h_tail = NULL, h = __itt_ittapi_global.thread_list; h != NULL; h_tail = h, h = h->next)
+ __itt_mutex_lock(&_N_(_ittapi_global).mutex);
+ for (h_tail = NULL, h = _N_(_ittapi_global).thread_list; h != NULL; h_tail = h, h = h->next)
if (h->tid == tid)
break;
if (h == NULL) {
- NEW_THREAD_INFO_W(&__itt_ittapi_global, h, h_tail, tid, __itt_thread_normal, name);
+ NEW_THREAD_INFO_W(&_N_(_ittapi_global), h, h_tail, tid, __itt_thread_normal, name);
}
else
{
h->nameW = name ? _wcsdup(name) : NULL;
}
- __itt_mutex_unlock(&__itt_ittapi_global.mutex);
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
}
static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_setW),_init))(const wchar_t* name, int namelen)
@@ -469,7 +475,7 @@
TIDT tid = __itt_thread_id();
__itt_thread_info *h_tail, *h;
- if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0)
+ if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
{
__itt_init_ittlib_name(NULL, __itt_group_all);
#if ITT_PLATFORM==ITT_PLATFORM_WIN
@@ -487,18 +493,18 @@
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
}
- __itt_mutex_lock(&__itt_ittapi_global.mutex);
- for (h_tail = NULL, h = __itt_ittapi_global.thread_list; h != NULL; h_tail = h, h = h->next)
+ __itt_mutex_lock(&_N_(_ittapi_global).mutex);
+ for (h_tail = NULL, h = _N_(_ittapi_global).thread_list; h != NULL; h_tail = h, h = h->next)
if (h->tid == tid)
break;
if (h == NULL) {
- NEW_THREAD_INFO_A(&__itt_ittapi_global, h, h_tail, tid, __itt_thread_normal, name);
+ NEW_THREAD_INFO_A(&_N_(_ittapi_global), h, h_tail, tid, __itt_thread_normal, name);
}
else
{
h->nameA = name ? __itt_fstrdup(name) : NULL;
}
- __itt_mutex_unlock(&__itt_ittapi_global.mutex);
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
}
#if ITT_PLATFORM==ITT_PLATFORM_WIN
@@ -522,7 +528,7 @@
TIDT tid = __itt_thread_id();
__itt_thread_info *h_tail, *h;
- if (!__itt_ittapi_global.api_initialized && __itt_ittapi_global.thread_list->tid == 0)
+ if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list->tid == 0)
{
__itt_init_ittlib_name(NULL, __itt_group_all);
if (ITTNOTIFY_NAME(thread_ignore) && ITTNOTIFY_NAME(thread_ignore) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init)))
@@ -532,19 +538,19 @@
}
}
- __itt_mutex_lock(&__itt_ittapi_global.mutex);
- for (h_tail = NULL, h = __itt_ittapi_global.thread_list; h != NULL; h_tail = h, h = h->next)
+ __itt_mutex_lock(&_N_(_ittapi_global).mutex);
+ for (h_tail = NULL, h = _N_(_ittapi_global).thread_list; h != NULL; h_tail = h, h = h->next)
if (h->tid == tid)
break;
if (h == NULL) {
static const char* name = "unknown";
- NEW_THREAD_INFO_A(&__itt_ittapi_global, h, h_tail, tid, __itt_thread_ignored, name);
+ NEW_THREAD_INFO_A(&_N_(_ittapi_global), h, h_tail, tid, __itt_thread_ignored, name);
}
else
{
h->state = __itt_thread_ignored;
}
- __itt_mutex_unlock(&__itt_ittapi_global.mutex);
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
}
static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_ignore),_init))(void)
@@ -552,6 +558,17 @@
ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))();
}
+static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(enable_attach),_init))(void)
+{
+#ifdef __ANDROID__
+ /*
+ * if LIB_VAR_NAME env variable were set before then stay previous value
+ * else set default path
+ */
+ setenv(ITT_TO_STR(LIB_VAR_NAME), ANDROID_ITTNOTIFY_DEFAULT_PATH, 0);
+#endif
+}
+
/* -------------------------------------------------------------------------- */
static const char* __itt_fsplit(const char* s, const char* sep, const char** out, int* len)
@@ -666,80 +683,10 @@
return NULL;
}
-#if ITT_PLATFORM==ITT_PLATFORM_WIN
-
-#include <Winreg.h>
-
-typedef LONG (APIENTRY* RegCloseKeyProcType)(HKEY);
-typedef LONG (APIENTRY* RegOpenKeyExAProcType)(HKEY, LPCTSTR, DWORD, REGSAM, PHKEY);
-typedef LONG (APIENTRY* RegGetValueAProcType)(HKEY, LPCTSTR, LPCTSTR, DWORD, LPDWORD, PVOID, LPDWORD);
-
-/* This function return value of registry key that placed into static buffer.
- * This was done to aviod dynamic memory allocation.
- */
-static const char* __itt_get_lib_name_registry(void)
-{
-#define MAX_REG_VALUE_SIZE 4086
- static char reg_buff[MAX_REG_VALUE_SIZE];
- DWORD size;
- LONG res;
- HKEY hKey;
- RegCloseKeyProcType pRegCloseKey;
- RegOpenKeyExAProcType pRegOpenKeyExA;
- RegGetValueAProcType pRegGetValueA;
- HMODULE h_advapi32 = LoadLibraryA("advapi32.dll");
- DWORD autodetect = 0;
-
- if (h_advapi32 == NULL)
- {
- return NULL;
- }
-
- pRegCloseKey = (RegCloseKeyProcType)GetProcAddress(h_advapi32, "RegCloseKey");
- pRegOpenKeyExA = (RegOpenKeyExAProcType)GetProcAddress(h_advapi32, "RegOpenKeyExA");
- pRegGetValueA = (RegGetValueAProcType)GetProcAddress(h_advapi32, "RegGetValueA");
-
- if (pRegCloseKey == NULL ||
- pRegOpenKeyExA == NULL ||
- pRegGetValueA == NULL)
- {
- FreeLibrary(h_advapi32);
- return NULL;
- }
-
- res = pRegOpenKeyExA(HKEY_CURRENT_USER, (LPCTSTR)"Software\\Intel Corporation\\ITT Environment\\Collector", 0, KEY_READ, &hKey);
- if (res != ERROR_SUCCESS || hKey == 0)
- {
- FreeLibrary(h_advapi32);
- return NULL;
- }
-
- size = sizeof(DWORD);
- res = pRegGetValueA(hKey, (LPCTSTR)"AutoDetect", NULL, RRF_RT_REG_DWORD, NULL, (BYTE*)&autodetect, &size);
- if (res != ERROR_SUCCESS || size == 0 || autodetect == 0)
- {
- pRegCloseKey(hKey);
- FreeLibrary(h_advapi32);
- return NULL;
- }
-
- size = MAX_REG_VALUE_SIZE-1;
- res = pRegGetValueA(hKey, (LPCTSTR)ITT_TO_STR(LIB_VAR_NAME), NULL, REG_SZ, NULL, (BYTE*)®_buff, &size);
- pRegCloseKey(hKey);
- FreeLibrary(h_advapi32);
-
- return (res == ERROR_SUCCESS && size > 0) ? reg_buff : NULL;
-}
-
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-
static const char* __itt_get_lib_name(void)
{
const char* lib_name = __itt_get_env_var(ITT_TO_STR(LIB_VAR_NAME));
-#if ITT_PLATFORM==ITT_PLATFORM_WIN
- if (lib_name == NULL)
- lib_name = __itt_get_lib_name_registry();
-#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
return lib_name;
}
@@ -761,9 +708,8 @@
const char* chunk;
while ((group_str = __itt_fsplit(group_str, ",; ", &chunk, &len)) != NULL)
{
- __itt_fstrcpyn(gr, chunk, sizeof(gr));
-
- gr[min((unsigned int)len, sizeof(gr) - 1)] = 0;
+ __itt_fstrcpyn(gr, chunk, sizeof(gr) - 1);
+ gr[min(len, (int)(sizeof(gr) - 1))] = 0;
for (i = 0; group_list[i].name != NULL; i++)
{
@@ -810,8 +756,8 @@
{
register int i;
// Fill all pointers with initial stubs
- for (i = 0; __itt_ittapi_global.api_list_ptr[i].name != NULL; i++)
- *__itt_ittapi_global.api_list_ptr[i].func_ptr = __itt_ittapi_global.api_list_ptr[i].init_func;
+ for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++)
+ *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].init_func;
}
*/
@@ -819,8 +765,8 @@
{
register int i;
/* Nulify all pointers except domain_create and string_handle_create */
- for (i = 0; __itt_ittapi_global.api_list_ptr[i].name != NULL; i++)
- *__itt_ittapi_global.api_list_ptr[i].func_ptr = __itt_ittapi_global.api_list_ptr[i].null_func;
+ for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++)
+ *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func;
}
#if ITT_PLATFORM==ITT_PLATFORM_WIN
@@ -834,30 +780,30 @@
__itt_api_fini_t* __itt_api_fini_ptr;
static volatile TIDT current_thread = 0;
- if (__itt_ittapi_global.api_initialized)
+ if (_N_(_ittapi_global).api_initialized)
{
- __itt_mutex_lock(&__itt_ittapi_global.mutex);
- if (__itt_ittapi_global.api_initialized)
+ __itt_mutex_lock(&_N_(_ittapi_global).mutex);
+ if (_N_(_ittapi_global).api_initialized)
{
if (current_thread == 0)
{
current_thread = __itt_thread_id();
- __itt_api_fini_ptr = (__itt_api_fini_t*)__itt_get_proc(__itt_ittapi_global.lib, "__itt_api_fini");
+ __itt_api_fini_ptr = (__itt_api_fini_t*)(size_t)__itt_get_proc(_N_(_ittapi_global).lib, "__itt_api_fini");
if (__itt_api_fini_ptr)
- __itt_api_fini_ptr(&__itt_ittapi_global);
+ __itt_api_fini_ptr(&_N_(_ittapi_global));
__itt_nullify_all_pointers();
/* TODO: !!! not safe !!! don't support unload so far.
- * if (__itt_ittapi_global.lib != NULL)
- * __itt_unload_lib(__itt_ittapi_global.lib);
- * __itt_ittapi_global.lib = NULL;
+ * if (_N_(_ittapi_global).lib != NULL)
+ * __itt_unload_lib(_N_(_ittapi_global).lib);
+ * _N_(_ittapi_global).lib = NULL;
*/
- __itt_ittapi_global.api_initialized = 0;
+ _N_(_ittapi_global).api_initialized = 0;
current_thread = 0;
}
}
- __itt_mutex_unlock(&__itt_ittapi_global.mutex);
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
}
}
@@ -870,51 +816,52 @@
#endif /* ITT_COMPLETE_GROUP */
static volatile TIDT current_thread = 0;
- if (!__itt_ittapi_global.api_initialized)
+ if (!_N_(_ittapi_global).api_initialized)
{
#ifndef ITT_SIMPLE_INIT
- ITT_MUTEX_INIT_AND_LOCK(__itt_ittapi_global);
+ ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
#endif /* ITT_SIMPLE_INIT */
- if (!__itt_ittapi_global.api_initialized)
+ if (!_N_(_ittapi_global).api_initialized)
{
if (current_thread == 0)
{
current_thread = __itt_thread_id();
- __itt_ittapi_global.thread_list->tid = current_thread;
+ _N_(_ittapi_global).thread_list->tid = current_thread;
if (lib_name == NULL)
lib_name = __itt_get_lib_name();
groups = __itt_get_groups();
if (groups != __itt_group_none || lib_name != NULL)
{
- __itt_ittapi_global.lib = __itt_load_lib((lib_name == NULL) ? ittnotify_lib_name : lib_name);
- if (__itt_ittapi_global.lib != NULL)
+ _N_(_ittapi_global).lib = __itt_load_lib((lib_name == NULL) ? ittnotify_lib_name : lib_name);
+
+ if (_N_(_ittapi_global).lib != NULL)
{
__itt_api_init_t* __itt_api_init_ptr;
- int lib_version = __itt_lib_version(__itt_ittapi_global.lib);
+ int lib_version = __itt_lib_version(_N_(_ittapi_global).lib);
switch (lib_version) {
case 0:
groups = __itt_group_legacy;
case 1:
/* Fill all pointers from dynamic library */
- for (i = 0; __itt_ittapi_global.api_list_ptr[i].name != NULL; i++)
+ for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++)
{
- if (__itt_ittapi_global.api_list_ptr[i].group & groups & init_groups)
+ if (_N_(_ittapi_global).api_list_ptr[i].group & groups & init_groups)
{
- *__itt_ittapi_global.api_list_ptr[i].func_ptr = (void*)__itt_get_proc(__itt_ittapi_global.lib, __itt_ittapi_global.api_list_ptr[i].name);
- if (*__itt_ittapi_global.api_list_ptr[i].func_ptr == NULL)
+ *_N_(_ittapi_global).api_list_ptr[i].func_ptr = (void*)__itt_get_proc(_N_(_ittapi_global).lib, _N_(_ittapi_global).api_list_ptr[i].name);
+ if (*_N_(_ittapi_global).api_list_ptr[i].func_ptr == NULL)
{
/* Restore pointers for function with static implementation */
- *__itt_ittapi_global.api_list_ptr[i].func_ptr = __itt_ittapi_global.api_list_ptr[i].null_func;
- __itt_report_error(__itt_error_no_symbol, lib_name, __itt_ittapi_global.api_list_ptr[i].name);
+ *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func;
+ __itt_report_error(__itt_error_no_symbol, lib_name, _N_(_ittapi_global).api_list_ptr[i].name);
#ifdef ITT_COMPLETE_GROUP
- zero_group = (__itt_group_id)(zero_group | __itt_ittapi_global.api_list_ptr[i].group);
+ zero_group = (__itt_group_id)(zero_group | _N_(_ittapi_global).api_list_ptr[i].group);
#endif /* ITT_COMPLETE_GROUP */
}
}
else
- *__itt_ittapi_global.api_list_ptr[i].func_ptr = __itt_ittapi_global.api_list_ptr[i].null_func;
+ *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func;
}
if (groups == __itt_group_legacy)
@@ -934,15 +881,15 @@
}
#ifdef ITT_COMPLETE_GROUP
- for (i = 0; __itt_ittapi_global.api_list_ptr[i].name != NULL; i++)
- if (__itt_ittapi_global.api_list_ptr[i].group & zero_group)
- *__itt_ittapi_global.api_list_ptr[i].func_ptr = __itt_ittapi_global.api_list_ptr[i].null_func;
+ for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++)
+ if (_N_(_ittapi_global).api_list_ptr[i].group & zero_group)
+ *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func;
#endif /* ITT_COMPLETE_GROUP */
break;
case 2:
- __itt_api_init_ptr = (__itt_api_init_t*)__itt_get_proc(__itt_ittapi_global.lib, "__itt_api_init");
+ __itt_api_init_ptr = (__itt_api_init_t*)(size_t)__itt_get_proc(_N_(_ittapi_global).lib, "__itt_api_init");
if (__itt_api_init_ptr)
- __itt_api_init_ptr(&__itt_ittapi_global, init_groups);
+ __itt_api_init_ptr(&_N_(_ittapi_global), init_groups);
break;
}
}
@@ -963,7 +910,7 @@
{
__itt_nullify_all_pointers();
}
- __itt_ittapi_global.api_initialized = 1;
+ _N_(_ittapi_global).api_initialized = 1;
current_thread = 0;
/* !!! Just to avoid unused code elimination !!! */
if (__itt_fini_ittlib_ptr == _N_(fini_ittlib)) current_thread = 0;
@@ -971,25 +918,26 @@
}
#ifndef ITT_SIMPLE_INIT
- __itt_mutex_unlock(&__itt_ittapi_global.mutex);
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
#endif /* ITT_SIMPLE_INIT */
}
/* Evaluating if any function ptr is non empty and it's in init_groups */
- for (i = 0; __itt_ittapi_global.api_list_ptr[i].name != NULL; i++)
- if (*__itt_ittapi_global.api_list_ptr[i].func_ptr != __itt_ittapi_global.api_list_ptr[i].null_func &&
- __itt_ittapi_global.api_list_ptr[i].group & init_groups)
+ for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++)
+ if (*_N_(_ittapi_global).api_list_ptr[i].func_ptr != _N_(_ittapi_global).api_list_ptr[i].null_func &&
+ _N_(_ittapi_global).api_list_ptr[i].group & init_groups)
return 1;
return 0;
}
ITT_EXTERN_C __itt_error_handler_t* _N_(set_error_handler)(__itt_error_handler_t* handler)
{
- __itt_error_handler_t* prev = (__itt_error_handler_t*)__itt_ittapi_global.error_handler;
- __itt_ittapi_global.error_handler = (void*)handler;
+ __itt_error_handler_t* prev = (__itt_error_handler_t*)(size_t)_N_(_ittapi_global).error_handler;
+ _N_(_ittapi_global).error_handler = (void*)(size_t)handler;
return prev;
}
#if ITT_PLATFORM==ITT_PLATFORM_WIN
#pragma warning(pop)
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.h b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.h
index 1e9eb43..fe1fe3c 100644
--- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.h
+++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.h
@@ -60,6 +60,8 @@
ITT_STUBV(LIBITTAPI, void, thr_ignore, (void), (ITT_NO_PARAMS), thr_ignore, __itt_group_thread | __itt_group_legacy, "no args")
#endif /* __ITT_INTERNAL_BODY */
+ITT_STUBV(ITTAPI, void, enable_attach, (void), (ITT_NO_PARAMS), enable_attach, __itt_group_all, "no args")
+
#else /* __ITT_INTERNAL_INIT */
#if ITT_PLATFORM==ITT_PLATFORM_WIN
@@ -78,6 +80,11 @@
ITT_STUBV(ITTAPI, void, sync_acquired, (void *addr), (ITT_FORMAT addr), sync_acquired, __itt_group_sync, "%p")
ITT_STUBV(ITTAPI, void, sync_releasing, (void* addr), (ITT_FORMAT addr), sync_releasing, __itt_group_sync, "%p")
+ITT_STUBV(ITTAPI, void, suppress_push, (unsigned int mask), (ITT_FORMAT mask), suppress_push, __itt_group_suppress, "%p")
+ITT_STUBV(ITTAPI, void, suppress_pop, (void), (ITT_NO_PARAMS), suppress_pop, __itt_group_suppress, "no args")
+ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size),(ITT_FORMAT mode, mask, address, size), suppress_mark_range, __itt_group_suppress, "%d, %p, %p, %d")
+ITT_STUBV(ITTAPI, void, suppress_clear_range,(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size),(ITT_FORMAT mode, mask, address, size), suppress_clear_range,__itt_group_suppress, "%d, %p, %p, %d")
+
ITT_STUBV(ITTAPI, void, fsync_prepare, (void* addr), (ITT_FORMAT addr), sync_prepare, __itt_group_fsync, "%p")
ITT_STUBV(ITTAPI, void, fsync_cancel, (void *addr), (ITT_FORMAT addr), sync_cancel, __itt_group_fsync, "%p")
ITT_STUBV(ITTAPI, void, fsync_acquired, (void *addr), (ITT_FORMAT addr), sync_acquired, __itt_group_fsync, "%p")
@@ -95,16 +102,26 @@
ITT_STUBV(ITTAPI, void, model_reduction_uses, (void* addr, size_t size), (ITT_FORMAT addr, size), model_reduction_uses, __itt_group_model, "%p, %d")
ITT_STUBV(ITTAPI, void, model_observe_uses, (void* addr, size_t size), (ITT_FORMAT addr, size), model_observe_uses, __itt_group_model, "%p, %d")
ITT_STUBV(ITTAPI, void, model_clear_uses, (void* addr), (ITT_FORMAT addr), model_clear_uses, __itt_group_model, "%p")
-ITT_STUBV(ITTAPI, void, model_disable_push, (__itt_model_disable x), (ITT_FORMAT x), model_disable_push, __itt_group_model, "%p")
-ITT_STUBV(ITTAPI, void, model_disable_pop, (void), (ITT_NO_PARAMS), model_disable_pop, __itt_group_model, "no args")
#ifndef __ITT_INTERNAL_BODY
#if ITT_PLATFORM==ITT_PLATFORM_WIN
ITT_STUBV(ITTAPI, void, model_site_beginW, (const wchar_t *name), (ITT_FORMAT name), model_site_beginW, __itt_group_model, "\"%s\"")
ITT_STUBV(ITTAPI, void, model_task_beginW, (const wchar_t *name), (ITT_FORMAT name), model_task_beginW, __itt_group_model, "\"%s\"")
+ITT_STUBV(ITTAPI, void, model_iteration_taskW, (const wchar_t *name), (ITT_FORMAT name), model_iteration_taskW, __itt_group_model, "\"%s\"")
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, model_site_beginA, (const char *name), (ITT_FORMAT name), model_site_beginA, __itt_group_model, "\"%s\"")
ITT_STUBV(ITTAPI, void, model_site_beginAL, (const char *name, size_t len), (ITT_FORMAT name, len), model_site_beginAL, __itt_group_model, "\"%s\", %d")
+ITT_STUBV(ITTAPI, void, model_task_beginA, (const char *name), (ITT_FORMAT name), model_task_beginA, __itt_group_model, "\"%s\"")
ITT_STUBV(ITTAPI, void, model_task_beginAL, (const char *name, size_t len), (ITT_FORMAT name, len), model_task_beginAL, __itt_group_model, "\"%s\", %d")
+ITT_STUBV(ITTAPI, void, model_iteration_taskA, (const char *name), (ITT_FORMAT name), model_iteration_taskA, __itt_group_model, "\"%s\"")
+ITT_STUBV(ITTAPI, void, model_iteration_taskAL, (const char *name, size_t len), (ITT_FORMAT name, len), model_iteration_taskAL, __itt_group_model, "\"%s\", %d")
+ITT_STUBV(ITTAPI, void, model_site_end_2, (void), (ITT_NO_PARAMS), model_site_end_2, __itt_group_model, "no args")
+ITT_STUBV(ITTAPI, void, model_task_end_2, (void), (ITT_NO_PARAMS), model_task_end_2, __itt_group_model, "no args")
+ITT_STUBV(ITTAPI, void, model_lock_acquire_2, (void *lock), (ITT_FORMAT lock), model_lock_acquire_2, __itt_group_model, "%p")
+ITT_STUBV(ITTAPI, void, model_lock_release_2, (void *lock), (ITT_FORMAT lock), model_lock_release_2, __itt_group_model, "%p")
+ITT_STUBV(ITTAPI, void, model_aggregate_task, (size_t count), (ITT_FORMAT count), model_aggregate_task, __itt_group_model, "%d")
+ITT_STUBV(ITTAPI, void, model_disable_push, (__itt_model_disable x), (ITT_FORMAT x), model_disable_push, __itt_group_model, "%p")
+ITT_STUBV(ITTAPI, void, model_disable_pop, (void), (ITT_NO_PARAMS), model_disable_pop, __itt_group_model, "no args")
#endif /* __ITT_INTERNAL_BODY */
#ifndef __ITT_INTERNAL_BODY
@@ -123,16 +140,23 @@
ITT_STUBV(ITTAPI, void, heap_reallocate_end, (__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized), (ITT_FORMAT h, addr, new_addr, new_size, initialized), heap_reallocate_end, __itt_group_heap, "%p, %p, %p, %lu, %d")
ITT_STUBV(ITTAPI, void, heap_internal_access_begin, (void), (ITT_NO_PARAMS), heap_internal_access_begin, __itt_group_heap, "no args")
ITT_STUBV(ITTAPI, void, heap_internal_access_end, (void), (ITT_NO_PARAMS), heap_internal_access_end, __itt_group_heap, "no args")
+ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin, (void), (ITT_NO_PARAMS), heap_record_memory_growth_begin, __itt_group_heap, "no args")
+ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end, (void), (ITT_NO_PARAMS), heap_record_memory_growth_end, __itt_group_heap, "no args")
+ITT_STUBV(ITTAPI, void, heap_reset_detection, (unsigned int reset_mask), (ITT_FORMAT reset_mask), heap_reset_detection, __itt_group_heap, "%u")
+ITT_STUBV(ITTAPI, void, heap_record, (unsigned int record_mask), (ITT_FORMAT record_mask), heap_record, __itt_group_heap, "%u")
ITT_STUBV(ITTAPI, void, id_create, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), id_create, __itt_group_structure, "%p, %lu")
ITT_STUBV(ITTAPI, void, id_destroy, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), id_destroy, __itt_group_structure, "%p, %lu")
+ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void), (ITT_NO_PARAMS), get_timestamp, __itt_group_structure, "no args")
+
ITT_STUBV(ITTAPI, void, region_begin, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), region_begin, __itt_group_structure, "%p, %lu, %lu, %p")
ITT_STUBV(ITTAPI, void, region_end, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), region_end, __itt_group_structure, "%p, %lu")
#ifndef __ITT_INTERNAL_BODY
ITT_STUBV(ITTAPI, void, frame_begin_v3, (const __itt_domain *domain, __itt_id *id), (ITT_FORMAT domain, id), frame_begin_v3, __itt_group_structure, "%p, %p")
ITT_STUBV(ITTAPI, void, frame_end_v3, (const __itt_domain *domain, __itt_id *id), (ITT_FORMAT domain, id), frame_end_v3, __itt_group_structure, "%p, %p")
+ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end), (ITT_FORMAT domain, id, begin, end), frame_submit_v3, __itt_group_structure, "%p, %p, %lu, %lu")
#endif /* __ITT_INTERNAL_BODY */
ITT_STUBV(ITTAPI, void, task_group, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), task_group, __itt_group_structure, "%p, %lu, %lu, %p")
@@ -280,4 +304,13 @@
ITT_STUB(ITTAPI, const char*, api_version, (void), (ITT_NO_PARAMS), api_version, __itt_group_all & ~__itt_group_legacy, "no args")
#endif /* __ITT_INTERNAL_BODY */
+#ifndef __ITT_INTERNAL_BODY
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_saveA, __itt_group_arrays, "%p, %d, %p, %d, \"%s\", %d")
+ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_saveW, __itt_group_arrays, "%p, %d, %p, %d, \"%S\", %d")
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, av_save, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_save, __itt_group_arrays, "%p, %d, %p, %d, \"%s\", %d")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* __ITT_INTERNAL_BODY */
+
#endif /* __ITT_INTERNAL_INIT */
diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_types.h b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_types.h
index 2799173..3695a67 100644
--- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_types.h
+++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_types.h
@@ -29,6 +29,8 @@
__itt_group_heap = 1<<11,
__itt_group_splitter_max = 1<<12,
__itt_group_structure = 1<<12,
+ __itt_group_suppress = 1<<13,
+ __itt_group_arrays = 1<<14,
__itt_group_all = -1
} __itt_group_id;
@@ -57,6 +59,8 @@
{ __itt_group_stitch, "stitch" }, \
{ __itt_group_heap, "heap" }, \
{ __itt_group_structure, "structure" }, \
+ { __itt_group_suppress, "suppress" }, \
+ { __itt_group_arrays, "arrays" }, \
{ __itt_group_none, NULL } \
}
diff --git a/openmp/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h b/openmp/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h
index b10676f..9919294 100644
--- a/openmp/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h
+++ b/openmp/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h
@@ -47,12 +47,18 @@
# define ITT_PLATFORM_POSIX 2
#endif /* ITT_PLATFORM_POSIX */
+#ifndef ITT_PLATFORM_MAC
+# define ITT_PLATFORM_MAC 3
+#endif /* ITT_PLATFORM_MAC */
+
#ifndef ITT_PLATFORM
# if ITT_OS==ITT_OS_WIN
# define ITT_PLATFORM ITT_PLATFORM_WIN
+# elif ITT_OS==ITT_OS_MAC
+# define ITT_PLATFORM ITT_PLATFORM_MAC
# else
# define ITT_PLATFORM ITT_PLATFORM_POSIX
-# endif /* _WIN32 */
+# endif
#endif /* ITT_PLATFORM */
#if defined(_UNICODE) && !defined(UNICODE)
@@ -73,11 +79,11 @@
# if ITT_PLATFORM==ITT_PLATFORM_WIN
# define CDECL __cdecl
# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__
-# define CDECL /* not actual on x86_64 platform */
-# else /* _M_X64 || _M_AMD64 || __x86_64__ */
+# if defined _M_IX86 || defined __i386__
# define CDECL __attribute__ ((cdecl))
-# endif /* _M_X64 || _M_AMD64 || __x86_64__ */
+# else /* _M_IX86 || __i386__ */
+# define CDECL /* actual only on x86 platform */
+# endif /* _M_IX86 || __i386__ */
# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
#endif /* CDECL */
@@ -85,11 +91,11 @@
# if ITT_PLATFORM==ITT_PLATFORM_WIN
# define STDCALL __stdcall
# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__
-# define STDCALL /* not supported on x86_64 platform */
-# else /* _M_X64 || _M_AMD64 || __x86_64__ */
+# if defined _M_IX86 || defined __i386__
# define STDCALL __attribute__ ((stdcall))
-# endif /* _M_X64 || _M_AMD64 || __x86_64__ */
+# else /* _M_IX86 || __i386__ */
+# define STDCALL /* supported only on x86 platform */
+# endif /* _M_IX86 || __i386__ */
# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
#endif /* STDCALL */
@@ -102,8 +108,8 @@
#if ITT_PLATFORM==ITT_PLATFORM_WIN
/* use __forceinline (VC++ specific) */
-#define INLINE __forceinline
-#define INLINE_ATTRIBUTE /* nothing */
+#define ITT_INLINE __forceinline
+#define ITT_INLINE_ATTRIBUTE /* nothing */
#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
/*
* Generally, functions are not inlined unless optimization is specified.
@@ -111,11 +117,11 @@
* if no optimization level was specified.
*/
#ifdef __STRICT_ANSI__
-#define INLINE static
+#define ITT_INLINE static
#else /* __STRICT_ANSI__ */
-#define INLINE static inline
+#define ITT_INLINE static inline
#endif /* __STRICT_ANSI__ */
-#define INLINE_ATTRIBUTE __attribute__ ((always_inline))
+#define ITT_INLINE_ATTRIBUTE __attribute__ ((always_inline, unused))
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
/** @endcond */
diff --git a/openmp/runtime/src/z_Linux_asm.s b/openmp/runtime/src/z_Linux_asm.s
index 1bfdc0b..1f1ba1b 100644
--- a/openmp/runtime/src/z_Linux_asm.s
+++ b/openmp/runtime/src/z_Linux_asm.s
@@ -1,7 +1,7 @@
// z_Linux_asm.s: - microtasking routines specifically
// written for Intel platforms running Linux* OS
-// $Revision: 42582 $
-// $Date: 2013-08-09 06:30:22 -0500 (Fri, 09 Aug 2013) $
+// $Revision: 42810 $
+// $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
//
////===----------------------------------------------------------------------===//
@@ -77,7 +77,7 @@
KMP_PREFIX_UNDERSCORE(\proc):
.endm
# endif // defined __APPLE__ && defined __MACH__
-#endif // __i386 || defined __x86_64
+#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64
// -----------------------------------------------------------------------
@@ -1573,3 +1573,19 @@
// -----------------------------------------------------------------------
#endif /* KMP_ARCH_X86_64 */
+
+#if KMP_ARCH_ARM
+ .data
+ .comm .gomp_critical_user_,32,8
+ .data
+ .align 4
+ .global __kmp_unnamed_critical_addr
+__kmp_unnamed_critical_addr:
+ .4byte .gomp_critical_user_
+ .size __kmp_unnamed_critical_addr,4
+#endif /* KMP_ARCH_ARM */
+
+
+#if defined(__linux__)
+.section .note.GNU-stack,"",@progbits
+#endif
diff --git a/openmp/runtime/src/z_Linux_util.c b/openmp/runtime/src/z_Linux_util.c
index 4675302..27e394f 100644
--- a/openmp/runtime/src/z_Linux_util.c
+++ b/openmp/runtime/src/z_Linux_util.c
@@ -1,7 +1,7 @@
/*
* z_Linux_util.c -- platform specific routines.
- * $Revision: 42582 $
- * $Date: 2013-08-09 06:30:22 -0500 (Fri, 09 Aug 2013) $
+ * $Revision: 42847 $
+ * $Date: 2013-11-26 09:10:01 -0600 (Tue, 26 Nov 2013) $
*/
@@ -32,7 +32,7 @@
#if KMP_OS_LINUX
# include <sys/sysinfo.h>
-# if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+# if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
// We should really include <futex.h>, but that causes compatibility problems on different
// Linux* OS distributions that either require that you include (or break when you try to include)
// <pci/types.h>.
@@ -55,6 +55,12 @@
#include <ctype.h>
#include <fcntl.h>
+// For non-x86 architecture
+#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
+# include <stdbool.h>
+# include <ffi.h>
+#endif
+
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
@@ -112,7 +118,7 @@
* stone forever.
*/
-# if KMP_ARCH_X86
+# if KMP_ARCH_X86 || KMP_ARCH_ARM
# ifndef __NR_sched_setaffinity
# define __NR_sched_setaffinity 241
# elif __NR_sched_setaffinity != 241
@@ -434,7 +440,7 @@
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
int
__kmp_futex_determine_capable()
@@ -451,7 +457,7 @@
return retval;
}
-#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
@@ -2004,43 +2010,21 @@
} // __kmp_get_xproc
-/*
- Parse /proc/cpuinfo file for processor frequency, return frequency in Hz, or ~ 0 in case of
- error.
-*/
-static
-kmp_uint64
-__kmp_get_frequency_from_proc(
-) {
+int
+__kmp_read_from_file( char const *path, char const *format, ... )
+{
+ int result;
+ va_list args;
- kmp_uint64 result = ~ 0;
- FILE * file = NULL;
- double freq = HUGE_VAL;
- int rc;
+ va_start(args, format);
+ FILE *f = fopen(path, "rb");
+ if ( f == NULL )
+ return 0;
+ result = vfscanf(f, format, args);
+ fclose(f);
- //
- // FIXME - use KMP_CPUINFO_FILE here if it is set!!!
- //
- file = fopen( "/proc/cpuinfo", "r" );
- if ( file == NULL ) {
- return result;
- }; // if
- for ( ; ; ) {
- rc = fscanf( file, "cpu MHz : %lf\n", & freq ); // Try to scan frequency.
- if ( rc == 1 ) { // Success.
- break;
- }; // if
- fscanf( file, "%*[^\n]\n" ); // Failure -- skip line.
- }; // for
- fclose( file );
- if ( freq == HUGE_VAL || freq <= 0 ) {
- return result;
- }; // if
- result = (kmp_uint64)( freq * 1.0E+6 );
- KA_TRACE( 5, ( "cpu frequency from /proc/cpuinfo: %" KMP_UINT64_SPEC "\n", result ) );
return result;
-} // func __kmp_get_frequency_from_proc
-
+}
void
__kmp_runtime_initialize( void )
@@ -2059,15 +2043,6 @@
}; // if
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
- if ( __kmp_cpu_frequency == 0 ) {
- // First try nominal frequency.
- __kmp_cpu_frequency = __kmp_cpuinfo.frequency;
- if ( __kmp_cpu_frequency == 0 || __kmp_cpu_frequency == ~ 0 ) {
- // Next Try to get CPU frequency from /proc/cpuinfo.
- __kmp_cpu_frequency = __kmp_get_frequency_from_proc();
- }; // if
- }; // if
-
__kmp_xproc = __kmp_get_xproc();
if ( sysconf( _SC_THREADS ) ) {
@@ -2536,5 +2511,42 @@
#endif // USE_LOAD_BALANCE
+
+#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
+
+int __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid, int argc,
+ void *p_argv[] )
+{
+ int argc_full = argc + 2;
+ int i;
+ ffi_cif cif;
+ ffi_type *types[argc_full];
+ void *args[argc_full];
+ void *idp[2];
+
+ /* We're only passing pointers to the target. */
+ for (i = 0; i < argc_full; i++)
+ types[i] = &ffi_type_pointer;
+
+ /* Ugly double-indirection, but that's how it goes... */
+ idp[0] = >id;
+ idp[1] = &tid;
+ args[0] = &idp[0];
+ args[1] = &idp[1];
+
+ for (i = 0; i < argc; i++)
+ args[2 + i] = &p_argv[i];
+
+ if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, argc_full,
+ &ffi_type_void, types) != FFI_OK)
+ abort();
+
+ ffi_call(&cif, (void (*)(void))pkfn, NULL, args);
+
+ return 1;
+}
+
+#endif // KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
+
// end of file //
diff --git a/openmp/runtime/src/z_Windows_NT_util.c b/openmp/runtime/src/z_Windows_NT_util.c
index bd22c25..ba59110 100644
--- a/openmp/runtime/src/z_Windows_NT_util.c
+++ b/openmp/runtime/src/z_Windows_NT_util.c
@@ -1,7 +1,7 @@
/*
* z_Windows_NT_util.c -- platform specific routines.
- * $Revision: 42518 $
- * $Date: 2013-07-15 11:12:26 -0500 (Mon, 15 Jul 2013) $
+ * $Revision: 42816 $
+ * $Date: 2013-11-11 15:33:37 -0600 (Mon, 11 Nov 2013) $
*/
@@ -391,14 +391,14 @@
/* TODO: shouldn't this use release semantics to ensure that __kmp_suspend_initialize_thread
gets called first?
*/
- old_spin = __kmp_test_then_or32( (volatile kmp_int32 *) spinner,
+ old_spin = KMP_TEST_THEN_OR32( (volatile kmp_int32 *) spinner,
KMP_BARRIER_SLEEP_STATE );
KF_TRACE( 5, ( "__kmp_suspend: T#%d set sleep bit for spin(%p)==%d\n",
th_gtid, spinner, *spinner ) );
if ( old_spin == checker ) {
- __kmp_test_then_and32( (volatile kmp_int32 *) spinner, ~(KMP_BARRIER_SLEEP_STATE) );
+ KMP_TEST_THEN_AND32( (volatile kmp_int32 *) spinner, ~(KMP_BARRIER_SLEEP_STATE) );
KF_TRACE( 5, ( "__kmp_suspend: T#%d false alarm, reset sleep bit for spin(%p)\n",
th_gtid, spinner) );
@@ -501,7 +501,7 @@
}
TCW_PTR(th->th.th_sleep_loc, NULL);
- old_spin = __kmp_test_then_and32( (kmp_int32 volatile *) spin, ~( KMP_BARRIER_SLEEP_STATE ) );
+ old_spin = KMP_TEST_THEN_AND32( (kmp_int32 volatile *) spin, ~( KMP_BARRIER_SLEEP_STATE ) );
if ( ( old_spin & KMP_BARRIER_SLEEP_STATE ) == 0 ) {
KF_TRACE( 5, ( "__kmp_resume: T#%d exiting, thread T#%d already awake - spin(%p): "
@@ -874,24 +874,6 @@
}; // if
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
- if ( __kmp_cpu_frequency == 0 ) {
- // __kmp_hardware_timestamp() calls to QueryPerformanceCounter(). If
- // __kmp_hardware_timestamp() rewritten to use RDTSC instruction (or its 64 analog),
- // probably we should try to get frequency from __kmp_cpuinfo.frequency first (see
- // z_Linux_util.c).
- LARGE_INTEGER freq;
- BOOL rc;
- rc = QueryPerformanceFrequency( & freq );
- if ( rc ) {
- KMP_DEBUG_ASSERT( sizeof( __kmp_cpu_frequency ) >= sizeof( freq.QuadPart ) );
- KMP_DEBUG_ASSERT( freq.QuadPart >= 0 );
- __kmp_cpu_frequency = freq.QuadPart;
- KA_TRACE( 5, ( "cpu frequency: %" KMP_UINT64_SPEC "\n", __kmp_cpu_frequency ) );
- } else {
- __kmp_cpu_frequency = ~ 0;
- }; // if
- }; // if
-
/* Set up minimum number of threads to switch to TLS gtid */
#if KMP_OS_WINDOWS && ! defined GUIDEDLL_EXPORTS
// Windows* OS, static library.
diff --git a/openmp/runtime/tools/check-tools.pl b/openmp/runtime/tools/check-tools.pl
index 8140e11..1878ca5 100755
--- a/openmp/runtime/tools/check-tools.pl
+++ b/openmp/runtime/tools/check-tools.pl
@@ -268,6 +268,9 @@
} elsif ( $stdout =~ m{^.*? \(SUSE Linux\) (\d+\.\d+\.\d+)\s+\[.*? (\d+)\]}m ) {
# gcc (SUSE Linux) 4.3.2 [gcc-4_3-branch revision 141291]
( $ver, $bld ) = ( $1, $2 );
+ } elsif ( $stdout =~ m{^.*? \(SUSE Linux\) (\d+\.\d+\.\d+)\s+\d+\s+\[.*? (\d+)\]}m ) {
+ # gcc (SUSE Linux) 4.7.2 20130108 [gcc-4_7-branch revision 195012]
+ ( $ver, $bld ) = ( $1, $2 );
} elsif ( $stdout =~ m{^.*? \((Debian|Ubuntu).*?\) (\d+\.\d+\.\d+)}m ) {
# gcc (Debian 4.7.2-22) 4.7.2
# Debian support from Sylvestre Ledru
@@ -286,6 +289,35 @@
}; # sub get_gnu_compiler_version
+sub get_clang_compiler_version($) {
+ my ( $tool ) = @_;
+ my ( @ret ) = ( $tool );
+ my ( $rc, $stdout, $stderr, $version );
+ $rc = run( [ $tool, "--version" ], $stdout, $stderr );
+ if ( $rc >= 0 ) {
+ my ( $ver, $bld );
+ if ( $target_os eq "mac" ) {
+ # Apple LLVM version 4.2 (clang-425.0.28) (based on LLVM 3.2svn)
+ $stdout =~ m{^.*? (\d+\.\d+) \(.*-(\d+\.\d+\.\d+)\)}m;
+ ( $ver, $bld ) = ( $1, $2 );
+ } else {
+ if ( 0 ) {
+ } elsif ( $stdout =~ m{^.*? (\d+\.\d+) \((.*)\)}m ) {
+ # clang version 3.3 (tags/RELEASE_33/final)
+ ( $ver, $bld ) = ( $1, $2 );
+ }
+ }; # if
+ if ( defined( $ver ) ) {
+ $version = $ver . ( defined( $bld ) ? " ($bld)" : "" );
+ } else {
+ warning( "Cannot parse Clang compiler version:", $stdout, "(eof)" );
+ }; # if
+ }; # if
+ push( @ret, $version );
+ return @ret;
+}; # sub get_gnu_compiler_version
+
+
sub get_ms_compiler_version() {
my ( $rc, $stdout, $stderr, $version );
my $tool = "cl";
@@ -349,18 +381,30 @@
my $make;
my $intel = 1; # Check Intel compilers.
-my $gnu_fortran = 0; # Check GNU Fortran.
+my $fortran = 0; # Check for corresponding Fortran compiler, ifort for intel
+ # gfortran for gnu
+ # gfortran for clang
+my $clang = 0; # Check Clang Compilers.
my $intel_compilers = {
"lin" => { c => "icc", cpp => "icpc", f => "ifort" },
"lrb" => { c => "icc", cpp => "icpc", f => "ifort" },
"mac" => { c => "icc", cpp => "icpc", f => "ifort" },
"win" => { c => "icl", cpp => undef, f => "ifort" },
};
+my $gnu_compilers = {
+ "lin" => { c => "gcc", cpp => "g++", f => "gfortran" },
+ "mac" => { c => "gcc", cpp => "g++", f => "gfortran" },
+};
+my $clang_compilers = {
+ "lin" => { c => "clang", cpp => "clang++" },
+ "mac" => { c => "clang", cpp => "clang++" },
+};
get_options(
Platform::target_options(),
"intel!" => \$intel,
- "gnu-fortran!" => \$gnu_fortran,
+ "fortran" => \$fortran,
+ "clang" => \$clang,
"make" => \$make,
"pedantic" => \$pedantic,
);
@@ -375,21 +419,32 @@
# If Intel C++ compiler has a name different from C compiler, check it as well.
push( @versions, [ "Intel C++ Compiler", get_intel_compiler_version( $ic->{ cpp } ) ] );
}; # if
- if ( defined( $ic->{ f } ) ) {
- push( @versions, [ "Intel Fortran Compiler", get_intel_compiler_version( $ic->{ f } ) ] );
- }; # if
+ # fortran check must be explicitly specified on command line with --fortran
+ if ( $fortran ) {
+ if ( defined( $ic->{ f } ) ) {
+ push( @versions, [ "Intel Fortran Compiler", get_intel_compiler_version( $ic->{ f } ) ] );
+ }; # if
+ };
}; # if
if ( $target_os eq "lin" or $target_os eq "mac" ) {
- push( @versions, [ "GNU C Compiler", get_gnu_compiler_version( "gcc" ) ] );
- push( @versions, [ "GNU C++ Compiler", get_gnu_compiler_version( "g++" ) ] );
- if ( $gnu_fortran ) {
- push( @versions, [ "GNU Fortran Compiler", get_gnu_compiler_version( "gfortran" ) ] );
- }; # if
-}; # if
+ # check for gnu tools by default because touch-test.c is compiled with them.
+ push( @versions, [ "GNU C Compiler", get_gnu_compiler_version( $gnu_compilers->{ $target_os }->{ c } ) ] );
+ push( @versions, [ "GNU C++ Compiler", get_gnu_compiler_version( $gnu_compilers->{ $target_os }->{ cpp } ) ] );
+ if ( $clang ) {
+ push( @versions, [ "Clang C Compiler", get_clang_compiler_version( $clang_compilers->{ $target_os }->{ c } ) ] );
+ push( @versions, [ "Clang C++ Compiler", get_clang_compiler_version( $clang_compilers->{ $target_os }->{ cpp } ) ] );
+ };
+ # if intel fortran has been checked then gnu fortran is unnecessary
+ # also, if user specifies clang as build compiler, then gfortran is assumed fortran compiler
+ if ( $fortran and not $intel ) {
+ push( @versions, [ "GNU Fortran Compiler", get_gnu_compiler_version( $gnu_compilers->{ $target_os }->{ f } ) ] );
+ };
+};
if ( $target_os eq "win" ) {
push( @versions, [ "MS C/C++ Compiler", get_ms_compiler_version() ] );
push( @versions, [ "MS Linker", get_ms_linker_version() ] );
}; # if
+
my $count = 0;
foreach my $item ( @versions ) {
my ( $title, $tool, $version ) = @$item;
diff --git a/openmp/runtime/tools/common.inc b/openmp/runtime/tools/common.inc
index 4154b29..8eceb98 100644
--- a/openmp/runtime/tools/common.inc
+++ b/openmp/runtime/tools/common.inc
@@ -56,10 +56,14 @@
# Setting defaults
mode?=release
-ifeq "$(omp_os)" "windows"
- compiler?=icl
+ifeq "$(filter 32 32e 64,$(arch))" ""
+ compiler?=gcc
else
- compiler?=icc
+ ifeq "$(omp_os)" "windows"
+ compiler?=icl
+ else
+ compiler?=icc
+ endif
endif
ifneq "$(mic)" "no"
diff --git a/openmp/runtime/tools/lib/Platform.pm b/openmp/runtime/tools/lib/Platform.pm
index 584eeb7..d723174 100644
--- a/openmp/runtime/tools/lib/Platform.pm
+++ b/openmp/runtime/tools/lib/Platform.pm
@@ -48,6 +48,8 @@
$arch = "32";
} elsif ( $arch =~ m{\A\s*(?:48|(?:ia)?32e|Intel\s*64|Intel\(R\)\s*64|x86[_-]64|x64|AMD64)\s*\z}i ) {
$arch = "32e";
+ } elsif ( $arch =~ m{\Aarm(?:v7\D*)?\z} ) {
+ $arch = "arm";
} else {
$arch = undef;
}; # if
@@ -59,6 +61,7 @@
my %legal = (
"32" => "IA-32 architecture",
"32e" => "Intel(R) 64",
+ "arm" => "ARM",
);
sub legal_arch($) {
@@ -76,6 +79,7 @@
"32" => "ia32",
"32e" => "intel64",
"64" => "ia64",
+ "arm" => "arm",
);
sub arch_opt($) {
@@ -153,6 +157,8 @@
$_host_arch = "64";
} elsif ( $hardware_platform eq "x86_64" ) {
$_host_arch = "32e";
+ } elsif ( $hardware_platform eq "arm" ) {
+ $_host_arch = "arm";
} else {
die "Unsupported host hardware platform: \"$hardware_platform\"; stopped";
}; # if
@@ -178,7 +184,7 @@
# Use arch specified in LIBOMP_ARCH.
$_target_arch = canon_arch( $ENV{ LIBOMP_ARCH } );
if ( not defined( $_target_arch ) ) {
- die "Uknown architecture specified in LIBOMP_ARCH environment variable: \"$ENV{ LIBOMP_ARCH }\"";
+ die "Unknown architecture specified in LIBOMP_ARCH environment variable: \"$ENV{ LIBOMP_ARCH }\"";
}; # if
} else {
# Otherwise use host architecture.
@@ -191,7 +197,7 @@
# Use OS specified in LIBOMP_OS.
$_target_os = canon_os( $ENV{ LIBOMP_OS } );
if ( not defined( $_target_os ) ) {
- die "Uknown OS specified in LIBOMP_OS environment variable: \"$ENV{ LIBOMP_OS }\"";
+ die "Unknown OS specified in LIBOMP_OS environment variable: \"$ENV{ LIBOMP_OS }\"";
}; # if
} else {
# Otherwise use host OS.
diff --git a/openmp/runtime/tools/lib/Uname.pm b/openmp/runtime/tools/lib/Uname.pm
index f978f8b8..9556884 100644
--- a/openmp/runtime/tools/lib/Uname.pm
+++ b/openmp/runtime/tools/lib/Uname.pm
@@ -145,6 +145,8 @@
$values{ hardware_platform } = "i386";
} elsif ( $values{ machine } =~ m{\Ax86_64\z} ) {
$values{ hardware_platform } = "x86_64";
+ } elsif ( $values{ machine } =~ m{\Aarmv7\D*\z} ) {
+ $values{ hardware_platform } = "arm";
} else {
die "Unsupported machine (\"$values{ machine }\") returned by POSIX::uname(); stopped";
}; # if
@@ -276,7 +278,7 @@
or runtime_error( "$release: Cannot find the first line:", $bulk, "(eof)" );
my $first_line = $1;
$values{ operating_system_description } = $first_line;
- $first_line =~ m{\A(.*?)\s+release\s+(.*?)\s+\((.*?)(?:\s+Update\s+(.*?))?\)\s*$}
+ $first_line =~ m{\A(.*?)\s+release\s+(.*?)(?:\s+\((.*?)(?:\s+Update\s+(.*?))?\))?\s*$}
or runtime_error( "$release:1: Cannot parse line:", $first_line );
$values{ operating_system_name } = $1;
$values{ operating_system_release } = $2 . ( defined( $4 ) ? ".$4" : "" );
diff --git a/openmp/runtime/tools/src/common-checks.mk b/openmp/runtime/tools/src/common-checks.mk
index 08c246f..0959fc6 100644
--- a/openmp/runtime/tools/src/common-checks.mk
+++ b/openmp/runtime/tools/src/common-checks.mk
@@ -19,17 +19,27 @@
# Check tools versions.
#
ifeq "$(clean)" "" # Do not check tools if clean goal specified.
- ifeq "$(c)" "gcc"
- curr_tools := $(strip $(shell $(perl) $(tools_dir)check-tools.pl $(oa-opts) --no-intel --gnu-fortran --make))
- ifneq "$(findstring N/A,$(curr_tools))" ""
- curr_tools := $(strip $(shell $(perl) $(tools_dir)check-tools.pl $(oa-opts) --make))
- fort = ifort
- else
- fort = gfortran
- endif
+
+ check_tools_flags = --make
+
+ # determine if fortran check is required from goals
+ # MAKECMDGOALS is like argv for gnu make
+ ifneq "$(filter mod all,$(MAKECMDGOALS))" ""
+ check_tools_flags += --fortran
else
- curr_tools := $(strip $(shell $(perl) $(tools_dir)check-tools.pl $(oa-opts) --make))
+ ifeq "$(MAKECMDGOALS)" "" # will default to all if no goals specified on command line
+ check_tools_flags += --fortran
+ endif
endif
+ ifneq "$(filter gcc clang,$(c))" "" # if build compiler is gcc or clang
+ check_tools_flags += --no-intel
+ endif
+ ifeq "$(c)" "clang"
+ check_tools_flags += --clang
+ endif
+
+ curr_tools := $(strip $(shell $(perl) $(tools_dir)check-tools.pl $(oa-opts) $(check_tools_flags)))
+
ifeq "$(curr_tools)" ""
$(error check-tools.pl failed)
endif
diff --git a/openmp/runtime/tools/src/common-defs.mk b/openmp/runtime/tools/src/common-defs.mk
index 1c164bc..ebd1922 100644
--- a/openmp/runtime/tools/src/common-defs.mk
+++ b/openmp/runtime/tools/src/common-defs.mk
@@ -45,7 +45,7 @@
# Description:
# The function return printable name of specified architecture, IA-32 architecture or Intel(R) 64.
#
-legal_arch = $(if $(filter 32,$(1)),IA-32,$(if $(filter 32e,$(1)),Intel(R) 64,$(if $(filter l1,$(1)),L1OM,$(error Bad architecture specified: $(1))))))
+legal_arch = $(if $(filter 32,$(1)),IA-32,$(if $(filter 32e,$(1)),Intel(R) 64,$(if $(filter l1,$(1)),L1OM,$(if $(filter arm,$(1)),ARM,$(error Bad architecture specified: $(1))))))
# Synopsis:
# var_name = $(call check_variable,var,list)
@@ -128,9 +128,9 @@
# --------------------------------------------------------------------------------------------------
os := $(call check_variable,os,lin lrb mac win)
-arch := $(call check_variable,arch,32 32e 64)
+arch := $(call check_variable,arch,32 32e 64 arm)
platform := $(os)_$(arch)
-platform := $(call check_variable,platform,lin_32 lin_32e lin_64 lrb_32e mac_32 mac_32e win_32 win_32e win_64)
+platform := $(call check_variable,platform,lin_32 lin_32e lin_64 lin_arm lrb_32e mac_32 mac_32e win_32 win_32e win_64)
# oa-opts means "os and arch options". They are passed to almost all perl scripts.
oa-opts := --os=$(os) --arch=$(arch)
diff --git a/openmp/runtime/tools/src/common-tools.mk b/openmp/runtime/tools/src/common-tools.mk
index 65bc92e..a9c9fbc 100644
--- a/openmp/runtime/tools/src/common-tools.mk
+++ b/openmp/runtime/tools/src/common-tools.mk
@@ -33,6 +33,10 @@
# on Windows* OS generates such a dependency: "kmp_runtime.obj: .\kmp_i18n.inc", and make complains
# "No rule to build .\kmp_i18n.inc". Using "./" solves the problem.
cpp-flags += -I ./
+# For non-x86 architecture
+ifeq "$(filter 32 32e 64,$(arch))" ""
+ cpp-flags += $(shell pkg-config --cflags libffi)
+endif
# Add all VPATH directories to path for searching include files.
cpp-flags += $(foreach i,$(VPATH),-I $(i))
@@ -60,6 +64,9 @@
ifeq "$(c)" "gcc"
cxx = g++
endif
+ ifeq "$(c)" "clang"
+ cxx = clang++
+ endif
# Output file flag.
c-out = -o$(space)
cxx-out = -o$(space)
@@ -70,7 +77,9 @@
c-flags-m += -M -MG
cxx-flags-m += -M -MG
# Enable C99 language.
- c-flags += -std=c99
+ ifneq "$(CPLUSPLUS)" "on"
+ c-flags += -std=gnu99
+ endif
# Generate position-independent code (a must for shared objects).
ifeq "$(LINK_TYPE)" "dyna"
c-flags += -fPIC
@@ -118,12 +127,24 @@
ifeq "$(c)" "gcc"
as = gcc
endif
+ ifeq "$(c)" "clang"
+ as = clang
+ endif
as-out = -o$(space)
as-flags += $(cpp-flags)
# Compile only, no link.
as-flags += -c
as-flags += -x assembler-with-cpp
# --- Fortran ---
+ ifeq "$(c)" "icc"
+ fort = ifort
+ endif
+ ifeq "$(c)" "gcc"
+ fort = gfortran
+ endif
+ ifeq "$(c)" "clang"
+ fort = gfortran
+ endif
ifeq "$(fort)" ""
fort = ifort
endif
@@ -148,6 +169,11 @@
cxx-flags += -mia32
endif
endif
+ ifeq "$(c)" "gcc"
+ ifeq "$(arch)" "arm"
+ c-flags += -marm
+ endif
+ endif
# --- Librarian ---
ar = ar
ar-out = $(empty)
@@ -298,7 +324,9 @@
c-flags-m += -QM -QMM -QMG
cxx-flags-m += -QM -QMM -QMG
# Enable C99 language.
- c-flags += -Qstd=c99
+ ifneq "$(CPLUSPLUS)" "on"
+ c-flags += -Qstd=gnu99
+ endif
# Enable C++ exception handling.
# ??? Why we disable it on Linux* OS?
cxx-flags += -EHsc
diff --git a/openmp/www/README.txt b/openmp/www/README.txt
index 3880bf0..6ecca7f 100644
--- a/openmp/www/README.txt
+++ b/openmp/www/README.txt
@@ -74,13 +74,13 @@
Supported Architectures: IA-32 architecture, Intel(R) 64, and
Intel(R) Many Integrated Core Architecture
- -----------------------------------------------------------
- | icc/icl | gcc |
---------------|------------------------------|--------------------------|
-| Linux* OS | Yes(1,5) | Yes(2,4) |
-| OS X* | Yes(1,3,4) | No |
-| Windows* OS | Yes(1,4) | No |
--------------------------------------------------------------------------
+ --------------------------------------------
+ | icc/icl | gcc | clang |
+--------------|---------------|--------------------------|
+| Linux* OS | Yes(1,5) | Yes(2,4) | Yes(4,6,7) |
+| OS X* | Yes(1,3,4) | No | Yes(4,6,7) |
+| Windows* OS | Yes(1,4) | No | No |
+----------------------------------------------------------
(1) On IA-32 architecture and Intel(R) 64, icc/icl versions 12.x are
supported (12.1 is recommended).
@@ -89,6 +89,14 @@
(4) Intel(R) Many Integrated Core Architecture not supported.
(5) On Intel(R) Many Integrated Core Architecture, icc/icl versions 13.0
or later are required.
+(6) clang version 3.3 is supported.
+(7) clang currently does not offer a software-implemented 128 bit extended
+ precision type. Thus, all entry points reliant on this type are removed
+ from the library and cannot be called in the user program. The following
+ functions are not available:
+ __kmpc_atomic_cmplx16_*
+ __kmpc_atomic_float16_*
+ __kmpc_atomic_*_fp
Front-end Compilers that work with this RTL
===========================================
diff --git a/openmp/www/Reference.pdf b/openmp/www/Reference.pdf
index 60ce400..680f98c 100644
--- a/openmp/www/Reference.pdf
+++ b/openmp/www/Reference.pdf
Binary files differ