Initial commit: Final state of the master project
This commit is contained in:
217
Research/inc/tbb/machine/gcc_armv7.h
Normal file
217
Research/inc/tbb/machine/gcc_armv7.h
Normal file
@@ -0,0 +1,217 @@
|
||||
/*
|
||||
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
|
||||
you can redistribute it and/or modify it under the terms of the GNU General Public License
|
||||
version 2 as published by the Free Software Foundation. Threading Building Blocks is
|
||||
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
|
||||
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
See the GNU General Public License for more details. You should have received a copy of
|
||||
the GNU General Public License along with Threading Building Blocks; if not, write to the
|
||||
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
As a special exception, you may use this file as part of a free software library without
|
||||
restriction. Specifically, if other files instantiate templates or use macros or inline
|
||||
functions from this file, or you compile this file and link it with other files to produce
|
||||
an executable, this file does not by itself cause the resulting executable to be covered
|
||||
by the GNU General Public License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General Public License.
|
||||
*/
|
||||
|
||||
/*
|
||||
Platform isolation layer for the ARMv7-a architecture.
|
||||
*/
|
||||
|
||||
#ifndef __TBB_machine_H
|
||||
#error Do not include this file directly; include tbb_machine.h instead
|
||||
#endif
|
||||
|
||||
//TODO: is ARMv7 is the only version ever to support?
|
||||
#if !(__ARM_ARCH_7A__)
|
||||
#error compilation requires an ARMv7-a architecture.
|
||||
#endif
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define __TBB_WORDSIZE 4
|
||||
|
||||
// Traditionally ARM is little-endian.
|
||||
// Note that, since only the layout of aligned 32-bit words is of interest,
|
||||
// any apparent PDP-endianness of 32-bit words at half-word alignment or
|
||||
// any little-endian ordering of big-endian 32-bit words in 64-bit quantities
|
||||
// may be disregarded for this setting.
|
||||
#if __BIG_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__)
|
||||
#define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
|
||||
#elif __LITTLE_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__)
|
||||
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
|
||||
#elif defined(__BYTE_ORDER__)
|
||||
#define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
|
||||
#else
|
||||
#define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT
|
||||
#endif
|
||||
|
||||
|
||||
#define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
|
||||
#define __TBB_full_memory_fence() __asm__ __volatile__("dmb ish": : :"memory")
|
||||
#define __TBB_control_consistency_helper() __TBB_full_memory_fence()
|
||||
#define __TBB_acquire_consistency_helper() __TBB_full_memory_fence()
|
||||
#define __TBB_release_consistency_helper() __TBB_full_memory_fence()
|
||||
|
||||
//--------------------------------------------------
|
||||
// Compare and swap
|
||||
//--------------------------------------------------
|
||||
|
||||
/**
|
||||
* Atomic CAS for 32 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
|
||||
* @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
|
||||
* @param value value to assign *ptr to if *ptr==comparand
|
||||
* @param comparand value to compare with *ptr
|
||||
* @return value originally in memory at ptr, regardless of success
|
||||
*/
|
||||
static inline int32_t __TBB_machine_cmpswp4(volatile void *ptr, int32_t value, int32_t comparand )
|
||||
{
|
||||
int32_t oldval, res;
|
||||
|
||||
__TBB_full_memory_fence();
|
||||
|
||||
do {
|
||||
__asm__ __volatile__(
|
||||
"ldrex %1, [%3]\n"
|
||||
"mov %0, #0\n"
|
||||
"cmp %1, %4\n"
|
||||
"it eq\n"
|
||||
"strexeq %0, %5, [%3]\n"
|
||||
: "=&r" (res), "=&r" (oldval), "+Qo" (*(volatile int32_t*)ptr)
|
||||
: "r" ((int32_t *)ptr), "Ir" (comparand), "r" (value)
|
||||
: "cc");
|
||||
} while (res);
|
||||
|
||||
__TBB_full_memory_fence();
|
||||
|
||||
return oldval;
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomic CAS for 64 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
|
||||
* @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
|
||||
* @param value value to assign *ptr to if *ptr==comparand
|
||||
* @param comparand value to compare with *ptr
|
||||
* @return value originally in memory at ptr, regardless of success
|
||||
*/
|
||||
static inline int64_t __TBB_machine_cmpswp8(volatile void *ptr, int64_t value, int64_t comparand )
|
||||
{
|
||||
int64_t oldval;
|
||||
int32_t res;
|
||||
|
||||
__TBB_full_memory_fence();
|
||||
|
||||
do {
|
||||
__asm__ __volatile__(
|
||||
"mov %0, #0\n"
|
||||
"ldrexd %1, %H1, [%3]\n"
|
||||
"cmp %1, %4\n"
|
||||
"it eq\n"
|
||||
"cmpeq %H1, %H4\n"
|
||||
"it eq\n"
|
||||
"strexdeq %0, %5, %H5, [%3]"
|
||||
: "=&r" (res), "=&r" (oldval), "+Qo" (*(volatile int64_t*)ptr)
|
||||
: "r" ((int64_t *)ptr), "r" (comparand), "r" (value)
|
||||
: "cc");
|
||||
} while (res);
|
||||
|
||||
__TBB_full_memory_fence();
|
||||
|
||||
return oldval;
|
||||
}
|
||||
|
||||
static inline int32_t __TBB_machine_fetchadd4(volatile void* ptr, int32_t addend)
|
||||
{
|
||||
unsigned long tmp;
|
||||
int32_t result, tmp2;
|
||||
|
||||
__TBB_full_memory_fence();
|
||||
|
||||
__asm__ __volatile__(
|
||||
"1: ldrex %0, [%4]\n"
|
||||
" add %3, %0, %5\n"
|
||||
" strex %1, %3, [%4]\n"
|
||||
" cmp %1, #0\n"
|
||||
" bne 1b\n"
|
||||
: "=&r" (result), "=&r" (tmp), "+Qo" (*(volatile int32_t*)ptr), "=&r"(tmp2)
|
||||
: "r" ((int32_t *)ptr), "Ir" (addend)
|
||||
: "cc");
|
||||
|
||||
__TBB_full_memory_fence();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t addend)
|
||||
{
|
||||
unsigned long tmp;
|
||||
int64_t result, tmp2;
|
||||
|
||||
__TBB_full_memory_fence();
|
||||
|
||||
__asm__ __volatile__(
|
||||
"1: ldrexd %0, %H0, [%4]\n"
|
||||
" adds %3, %0, %5\n"
|
||||
" adc %H3, %H0, %H5\n"
|
||||
" strexd %1, %3, %H3, [%4]\n"
|
||||
" cmp %1, #0\n"
|
||||
" bne 1b"
|
||||
: "=&r" (result), "=&r" (tmp), "+Qo" (*(volatile int64_t*)ptr), "=&r"(tmp2)
|
||||
: "r" ((int64_t *)ptr), "r" (addend)
|
||||
: "cc");
|
||||
|
||||
|
||||
__TBB_full_memory_fence();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline void __TBB_machine_pause (int32_t delay )
|
||||
{
|
||||
while(delay>0)
|
||||
{
|
||||
__TBB_compiler_fence();
|
||||
delay--;
|
||||
}
|
||||
}
|
||||
|
||||
namespace tbb {
|
||||
namespace internal {
|
||||
template <typename T, size_t S>
|
||||
struct machine_load_store_relaxed {
|
||||
static inline T load ( const volatile T& location ) {
|
||||
const T value = location;
|
||||
|
||||
/*
|
||||
* An extra memory barrier is required for errata #761319
|
||||
* Please see http://infocenter.arm.com/help/topic/com.arm.doc.uan0004a
|
||||
*/
|
||||
__TBB_acquire_consistency_helper();
|
||||
return value;
|
||||
}
|
||||
|
||||
static inline void store ( volatile T& location, T value ) {
|
||||
location = value;
|
||||
}
|
||||
};
|
||||
}} // namespaces internal, tbb
|
||||
|
||||
// Machine specific atomic operations
|
||||
|
||||
#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
|
||||
#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
|
||||
#define __TBB_Pause(V) __TBB_machine_pause(V)
|
||||
|
||||
// Use generics for some things
|
||||
#define __TBB_USE_GENERIC_PART_WORD_CAS 1
|
||||
#define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 1
|
||||
#define __TBB_USE_GENERIC_PART_WORD_FETCH_STORE 1
|
||||
#define __TBB_USE_GENERIC_FETCH_STORE 1
|
||||
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
|
||||
#define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1
|
||||
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
|
||||
131
Research/inc/tbb/machine/gcc_generic.h
Normal file
131
Research/inc/tbb/machine/gcc_generic.h
Normal file
@@ -0,0 +1,131 @@
|
||||
/*
|
||||
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
|
||||
you can redistribute it and/or modify it under the terms of the GNU General Public License
|
||||
version 2 as published by the Free Software Foundation. Threading Building Blocks is
|
||||
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
|
||||
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
See the GNU General Public License for more details. You should have received a copy of
|
||||
the GNU General Public License along with Threading Building Blocks; if not, write to the
|
||||
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
As a special exception, you may use this file as part of a free software library without
|
||||
restriction. Specifically, if other files instantiate templates or use macros or inline
|
||||
functions from this file, or you compile this file and link it with other files to produce
|
||||
an executable, this file does not by itself cause the resulting executable to be covered
|
||||
by the GNU General Public License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General Public License.
|
||||
*/
|
||||
|
||||
#if !defined(__TBB_machine_H) || defined(__TBB_machine_gcc_generic_H)
|
||||
#error Do not #include this internal file directly; use public TBB headers instead.
|
||||
#endif
|
||||
|
||||
#define __TBB_machine_gcc_generic_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define __TBB_WORDSIZE __SIZEOF_POINTER__
|
||||
|
||||
#if __TBB_GCC_64BIT_ATOMIC_BUILTINS_BROKEN
|
||||
#define __TBB_64BIT_ATOMICS 0
|
||||
#endif
|
||||
|
||||
/** FPU control setting not available for non-Intel architectures on Android **/
|
||||
#if __ANDROID__ && __TBB_generic_arch
|
||||
#define __TBB_CPU_CTL_ENV_PRESENT 0
|
||||
#endif
|
||||
|
||||
// __BYTE_ORDER__ is used in accordance with http://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html,
|
||||
// but __BIG_ENDIAN__ or __LITTLE_ENDIAN__ may be more commonly found instead.
|
||||
#if __BIG_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__)
|
||||
#define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
|
||||
#elif __LITTLE_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__)
|
||||
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
|
||||
#elif defined(__BYTE_ORDER__)
|
||||
#define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
|
||||
#else
|
||||
#define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT
|
||||
#endif
|
||||
|
||||
/** As this generic implementation has absolutely no information about underlying
|
||||
hardware, its performance most likely will be sub-optimal because of full memory
|
||||
fence usages where a more lightweight synchronization means (or none at all)
|
||||
could suffice. Thus if you use this header to enable TBB on a new platform,
|
||||
consider forking it and relaxing below helpers as appropriate. **/
|
||||
#define __TBB_acquire_consistency_helper() __sync_synchronize()
|
||||
#define __TBB_release_consistency_helper() __sync_synchronize()
|
||||
#define __TBB_full_memory_fence() __sync_synchronize()
|
||||
#define __TBB_control_consistency_helper() __sync_synchronize()
|
||||
|
||||
#define __TBB_MACHINE_DEFINE_ATOMICS(S,T) \
|
||||
inline T __TBB_machine_cmpswp##S( volatile void *ptr, T value, T comparand ) { \
|
||||
return __sync_val_compare_and_swap(reinterpret_cast<volatile T *>(ptr),comparand,value); \
|
||||
} \
|
||||
\
|
||||
inline T __TBB_machine_fetchadd##S( volatile void *ptr, T value ) { \
|
||||
return __sync_fetch_and_add(reinterpret_cast<volatile T *>(ptr),value); \
|
||||
} \
|
||||
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(1,int8_t)
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(2,int16_t)
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(4,int32_t)
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(8,int64_t)
|
||||
|
||||
#undef __TBB_MACHINE_DEFINE_ATOMICS
|
||||
|
||||
namespace tbb{ namespace internal { namespace gcc_builtins {
|
||||
inline int clz(unsigned int x){ return __builtin_clz(x);};
|
||||
inline int clz(unsigned long int x){ return __builtin_clzl(x);};
|
||||
inline int clz(unsigned long long int x){ return __builtin_clzll(x);};
|
||||
}}}
|
||||
//gcc __builtin_clz builtin count _number_ of leading zeroes
|
||||
static inline intptr_t __TBB_machine_lg( uintptr_t x ) {
|
||||
return sizeof(x)*8 - tbb::internal::gcc_builtins::clz(x) -1 ;
|
||||
}
|
||||
|
||||
static inline void __TBB_machine_or( volatile void *ptr, uintptr_t addend ) {
|
||||
__sync_fetch_and_or(reinterpret_cast<volatile uintptr_t *>(ptr),addend);
|
||||
}
|
||||
|
||||
static inline void __TBB_machine_and( volatile void *ptr, uintptr_t addend ) {
|
||||
__sync_fetch_and_and(reinterpret_cast<volatile uintptr_t *>(ptr),addend);
|
||||
}
|
||||
|
||||
|
||||
typedef unsigned char __TBB_Flag;
|
||||
|
||||
typedef __TBB_atomic __TBB_Flag __TBB_atomic_flag;
|
||||
|
||||
inline bool __TBB_machine_try_lock_byte( __TBB_atomic_flag &flag ) {
|
||||
return __sync_lock_test_and_set(&flag,1)==0;
|
||||
}
|
||||
|
||||
inline void __TBB_machine_unlock_byte( __TBB_atomic_flag &flag ) {
|
||||
__sync_lock_release(&flag);
|
||||
}
|
||||
|
||||
// Machine specific atomic operations
|
||||
#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
|
||||
#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
|
||||
|
||||
#define __TBB_TryLockByte __TBB_machine_try_lock_byte
|
||||
#define __TBB_UnlockByte __TBB_machine_unlock_byte
|
||||
|
||||
// Definition of other functions
|
||||
#define __TBB_Log2(V) __TBB_machine_lg(V)
|
||||
|
||||
#define __TBB_USE_GENERIC_FETCH_STORE 1
|
||||
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
|
||||
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
|
||||
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
|
||||
|
||||
#if __TBB_WORDSIZE==4
|
||||
#define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1
|
||||
#endif
|
||||
|
||||
#if __TBB_x86_32 || __TBB_x86_64
|
||||
#include "gcc_itsx.h"
|
||||
#endif
|
||||
100
Research/inc/tbb/machine/gcc_ia32_common.h
Normal file
100
Research/inc/tbb/machine/gcc_ia32_common.h
Normal file
@@ -0,0 +1,100 @@
|
||||
/*
|
||||
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
|
||||
you can redistribute it and/or modify it under the terms of the GNU General Public License
|
||||
version 2 as published by the Free Software Foundation. Threading Building Blocks is
|
||||
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
|
||||
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
See the GNU General Public License for more details. You should have received a copy of
|
||||
the GNU General Public License along with Threading Building Blocks; if not, write to the
|
||||
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
As a special exception, you may use this file as part of a free software library without
|
||||
restriction. Specifically, if other files instantiate templates or use macros or inline
|
||||
functions from this file, or you compile this file and link it with other files to produce
|
||||
an executable, this file does not by itself cause the resulting executable to be covered
|
||||
by the GNU General Public License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General Public License.
|
||||
*/
|
||||
|
||||
#ifndef __TBB_machine_gcc_ia32_common_H
|
||||
#define __TBB_machine_gcc_ia32_common_H
|
||||
|
||||
//TODO: Add a higher-level function, e.g. tbb::interal::log2(), into tbb_stddef.h, which
|
||||
//uses __TBB_Log2 and contains the assert and remove the assert from here and all other
|
||||
//platform-specific headers.
|
||||
//TODO: Check if use of gcc intrinsic gives a better chance for cross call optimizations
|
||||
template <typename T>
|
||||
static inline intptr_t __TBB_machine_lg( T x ) {
|
||||
__TBB_ASSERT(x>0, "The logarithm of a non-positive value is undefined.");
|
||||
uintptr_t j, i = x;
|
||||
__asm__("bsr %1,%0" : "=r"(j) : "r"(i));
|
||||
return j;
|
||||
}
|
||||
#define __TBB_Log2(V) __TBB_machine_lg(V)
|
||||
|
||||
#ifndef __TBB_Pause
|
||||
//TODO: check if raising a ratio of pause instructions to loop control instructions
|
||||
//(via e.g. loop unrolling) gives any benefit for HT. E.g, the current implementation
|
||||
//does about 2 CPU-consuming instructions for every pause instruction. Perhaps for
|
||||
//high pause counts it should use an unrolled loop to raise the ratio, and thus free
|
||||
//up more integer cycles for the other hyperthread. On the other hand, if the loop is
|
||||
//unrolled too far, it won't fit in the core's loop cache, and thus take away
|
||||
//instruction decode slots from the other hyperthread.
|
||||
|
||||
//TODO: check if use of gcc __builtin_ia32_pause intrinsic gives a "some how" better performing code
|
||||
static inline void __TBB_machine_pause( int32_t delay ) {
|
||||
for (int32_t i = 0; i < delay; i++) {
|
||||
__asm__ __volatile__("pause;");
|
||||
}
|
||||
return;
|
||||
}
|
||||
#define __TBB_Pause(V) __TBB_machine_pause(V)
|
||||
#endif /* !__TBB_Pause */
|
||||
|
||||
// API to retrieve/update FPU control setting
|
||||
#ifndef __TBB_CPU_CTL_ENV_PRESENT
|
||||
#define __TBB_CPU_CTL_ENV_PRESENT 1
|
||||
namespace tbb {
|
||||
namespace internal {
|
||||
class cpu_ctl_env {
|
||||
private:
|
||||
int mxcsr;
|
||||
short x87cw;
|
||||
static const int MXCSR_CONTROL_MASK = ~0x3f; /* all except last six status bits */
|
||||
public:
|
||||
bool operator!=( const cpu_ctl_env& ctl ) const { return mxcsr != ctl.mxcsr || x87cw != ctl.x87cw; }
|
||||
void get_env() {
|
||||
#if __TBB_ICC_12_0_INL_ASM_FSTCW_BROKEN
|
||||
cpu_ctl_env loc_ctl;
|
||||
__asm__ __volatile__ (
|
||||
"stmxcsr %0\n\t"
|
||||
"fstcw %1"
|
||||
: "=m"(loc_ctl.mxcsr), "=m"(loc_ctl.x87cw)
|
||||
);
|
||||
*this = loc_ctl;
|
||||
#else
|
||||
__asm__ __volatile__ (
|
||||
"stmxcsr %0\n\t"
|
||||
"fstcw %1"
|
||||
: "=m"(mxcsr), "=m"(x87cw)
|
||||
);
|
||||
#endif
|
||||
mxcsr &= MXCSR_CONTROL_MASK;
|
||||
}
|
||||
void set_env() const {
|
||||
__asm__ __volatile__ (
|
||||
"ldmxcsr %0\n\t"
|
||||
"fldcw %1"
|
||||
: : "m"(mxcsr), "m"(x87cw)
|
||||
);
|
||||
}
|
||||
};
|
||||
} // namespace internal
|
||||
} // namespace tbb
|
||||
#endif /* !__TBB_CPU_CTL_ENV_PRESENT */
|
||||
|
||||
#include "gcc_itsx.h"
|
||||
|
||||
#endif /* __TBB_machine_gcc_ia32_common_H */
|
||||
123
Research/inc/tbb/machine/gcc_itsx.h
Normal file
123
Research/inc/tbb/machine/gcc_itsx.h
Normal file
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
|
||||
you can redistribute it and/or modify it under the terms of the GNU General Public License
|
||||
version 2 as published by the Free Software Foundation. Threading Building Blocks is
|
||||
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
|
||||
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
See the GNU General Public License for more details. You should have received a copy of
|
||||
the GNU General Public License along with Threading Building Blocks; if not, write to the
|
||||
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
As a special exception, you may use this file as part of a free software library without
|
||||
restriction. Specifically, if other files instantiate templates or use macros or inline
|
||||
functions from this file, or you compile this file and link it with other files to produce
|
||||
an executable, this file does not by itself cause the resulting executable to be covered
|
||||
by the GNU General Public License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General Public License.
|
||||
*/
|
||||
|
||||
#if !defined(__TBB_machine_H) || defined(__TBB_machine_gcc_itsx_H)
|
||||
#error Do not #include this internal file directly; use public TBB headers instead.
|
||||
#endif
|
||||
|
||||
#define __TBB_machine_gcc_itsx_H
|
||||
|
||||
#define __TBB_OP_XACQUIRE 0xF2
|
||||
#define __TBB_OP_XRELEASE 0xF3
|
||||
#define __TBB_OP_LOCK 0xF0
|
||||
|
||||
#define __TBB_STRINGIZE_INTERNAL(arg) #arg
|
||||
#define __TBB_STRINGIZE(arg) __TBB_STRINGIZE_INTERNAL(arg)
|
||||
|
||||
#ifdef __TBB_x86_64
|
||||
#define __TBB_r_out "=r"
|
||||
#else
|
||||
#define __TBB_r_out "=q"
|
||||
#endif
|
||||
|
||||
inline static uint8_t __TBB_machine_try_lock_elided( volatile uint8_t* lk )
|
||||
{
|
||||
uint8_t value = 1;
|
||||
__asm__ volatile (".byte " __TBB_STRINGIZE(__TBB_OP_XACQUIRE)"; lock; xchgb %0, %1;"
|
||||
: __TBB_r_out(value), "=m"(*lk) : "0"(value), "m"(*lk) : "memory" );
|
||||
return uint8_t(value^1);
|
||||
}
|
||||
|
||||
inline static void __TBB_machine_try_lock_elided_cancel()
|
||||
{
|
||||
// 'pause' instruction aborts HLE/RTM transactions
|
||||
__asm__ volatile ("pause\n" : : : "memory" );
|
||||
}
|
||||
|
||||
inline static void __TBB_machine_unlock_elided( volatile uint8_t* lk )
|
||||
{
|
||||
__asm__ volatile (".byte " __TBB_STRINGIZE(__TBB_OP_XRELEASE)"; movb $0, %0"
|
||||
: "=m"(*lk) : "m"(*lk) : "memory" );
|
||||
}
|
||||
|
||||
#if __TBB_TSX_INTRINSICS_PRESENT
|
||||
#include <immintrin.h>
|
||||
|
||||
#define __TBB_machine_is_in_transaction _xtest
|
||||
#define __TBB_machine_begin_transaction _xbegin
|
||||
#define __TBB_machine_end_transaction _xend
|
||||
#define __TBB_machine_transaction_conflict_abort() _xabort(0xff)
|
||||
|
||||
#else
|
||||
|
||||
/*!
|
||||
* Check if the instruction is executed in a transaction or not
|
||||
*/
|
||||
inline static bool __TBB_machine_is_in_transaction()
|
||||
{
|
||||
int8_t res = 0;
|
||||
#if __TBB_x86_32
|
||||
__asm__ volatile (".byte 0x0F; .byte 0x01; .byte 0xD6;\n"
|
||||
"setz %0" : "=q"(res) : : "memory" );
|
||||
#else
|
||||
__asm__ volatile (".byte 0x0F; .byte 0x01; .byte 0xD6;\n"
|
||||
"setz %0" : "=r"(res) : : "memory" );
|
||||
#endif
|
||||
return res==0;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Enter speculative execution mode.
|
||||
* @return -1 on success
|
||||
* abort cause ( or 0 ) on abort
|
||||
*/
|
||||
inline static uint32_t __TBB_machine_begin_transaction()
|
||||
{
|
||||
uint32_t res = ~uint32_t(0); // success value
|
||||
__asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n" // XBEGIN <abort-offset>
|
||||
" .long 2f-1b-6\n" // 2f-1b == difference in addresses of start
|
||||
// of XBEGIN and the MOVL
|
||||
// 2f - 1b - 6 == that difference minus the size of the
|
||||
// XBEGIN instruction. This is the abort offset to
|
||||
// 2: below.
|
||||
" jmp 3f\n" // success (leave -1 in res)
|
||||
"2: movl %%eax,%0\n" // store failure code in res
|
||||
"3:"
|
||||
:"=r"(res):"0"(res):"memory","%eax");
|
||||
return res;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Attempt to commit/end transaction
|
||||
*/
|
||||
inline static void __TBB_machine_end_transaction()
|
||||
{
|
||||
__asm__ volatile (".byte 0x0F; .byte 0x01; .byte 0xD5" :::"memory"); // XEND
|
||||
}
|
||||
|
||||
/*
|
||||
* aborts with code 0xFF (lock already held)
|
||||
*/
|
||||
inline static void __TBB_machine_transaction_conflict_abort()
|
||||
{
|
||||
__asm__ volatile (".byte 0xC6; .byte 0xF8; .byte 0xFF" :::"memory");
|
||||
}
|
||||
|
||||
#endif /* __TBB_TSX_INTRINSICS_PRESENT */
|
||||
70
Research/inc/tbb/machine/ibm_aix51.h
Normal file
70
Research/inc/tbb/machine/ibm_aix51.h
Normal file
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
|
||||
you can redistribute it and/or modify it under the terms of the GNU General Public License
|
||||
version 2 as published by the Free Software Foundation. Threading Building Blocks is
|
||||
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
|
||||
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
See the GNU General Public License for more details. You should have received a copy of
|
||||
the GNU General Public License along with Threading Building Blocks; if not, write to the
|
||||
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
As a special exception, you may use this file as part of a free software library without
|
||||
restriction. Specifically, if other files instantiate templates or use macros or inline
|
||||
functions from this file, or you compile this file and link it with other files to produce
|
||||
an executable, this file does not by itself cause the resulting executable to be covered
|
||||
by the GNU General Public License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General Public License.
|
||||
*/
|
||||
|
||||
// TODO: revise by comparing with mac_ppc.h
|
||||
|
||||
#if !defined(__TBB_machine_H) || defined(__TBB_machine_ibm_aix51_H)
|
||||
#error Do not #include this internal file directly; use public TBB headers instead.
|
||||
#endif
|
||||
|
||||
#define __TBB_machine_ibm_aix51_H
|
||||
|
||||
#define __TBB_WORDSIZE 8
|
||||
#define __TBB_ENDIANNESS __TBB_ENDIAN_BIG // assumption based on operating system
|
||||
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
#include <sched.h>
|
||||
|
||||
extern "C" {
|
||||
int32_t __TBB_machine_cas_32 (volatile void* ptr, int32_t value, int32_t comparand);
|
||||
int64_t __TBB_machine_cas_64 (volatile void* ptr, int64_t value, int64_t comparand);
|
||||
void __TBB_machine_flush ();
|
||||
void __TBB_machine_lwsync ();
|
||||
void __TBB_machine_isync ();
|
||||
}
|
||||
|
||||
// Mapping of old entry point names retained for the sake of backward binary compatibility
|
||||
#define __TBB_machine_cmpswp4 __TBB_machine_cas_32
|
||||
#define __TBB_machine_cmpswp8 __TBB_machine_cas_64
|
||||
|
||||
#define __TBB_Yield() sched_yield()
|
||||
|
||||
#define __TBB_USE_GENERIC_PART_WORD_CAS 1
|
||||
#define __TBB_USE_GENERIC_FETCH_ADD 1
|
||||
#define __TBB_USE_GENERIC_FETCH_STORE 1
|
||||
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
|
||||
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
|
||||
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
|
||||
|
||||
#if __GNUC__
|
||||
#define __TBB_control_consistency_helper() __asm__ __volatile__( "isync": : :"memory")
|
||||
#define __TBB_acquire_consistency_helper() __asm__ __volatile__("lwsync": : :"memory")
|
||||
#define __TBB_release_consistency_helper() __asm__ __volatile__("lwsync": : :"memory")
|
||||
#define __TBB_full_memory_fence() __asm__ __volatile__( "sync": : :"memory")
|
||||
#else
|
||||
// IBM C++ Compiler does not support inline assembly
|
||||
// TODO: Since XL 9.0 or earlier GCC syntax is supported. Replace with more
|
||||
// lightweight implementation (like in mac_ppc.h)
|
||||
#define __TBB_control_consistency_helper() __TBB_machine_isync ()
|
||||
#define __TBB_acquire_consistency_helper() __TBB_machine_lwsync ()
|
||||
#define __TBB_release_consistency_helper() __TBB_machine_lwsync ()
|
||||
#define __TBB_full_memory_fence() __TBB_machine_flush ()
|
||||
#endif
|
||||
258
Research/inc/tbb/machine/icc_generic.h
Normal file
258
Research/inc/tbb/machine/icc_generic.h
Normal file
@@ -0,0 +1,258 @@
|
||||
/*
|
||||
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
|
||||
you can redistribute it and/or modify it under the terms of the GNU General Public License
|
||||
version 2 as published by the Free Software Foundation. Threading Building Blocks is
|
||||
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
|
||||
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
See the GNU General Public License for more details. You should have received a copy of
|
||||
the GNU General Public License along with Threading Building Blocks; if not, write to the
|
||||
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
As a special exception, you may use this file as part of a free software library without
|
||||
restriction. Specifically, if other files instantiate templates or use macros or inline
|
||||
functions from this file, or you compile this file and link it with other files to produce
|
||||
an executable, this file does not by itself cause the resulting executable to be covered
|
||||
by the GNU General Public License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General Public License.
|
||||
*/
|
||||
|
||||
#if !defined(__TBB_machine_H) || defined(__TBB_machine_icc_generic_H)
|
||||
#error Do not #include this internal file directly; use public TBB headers instead.
|
||||
#endif
|
||||
|
||||
#if ! __TBB_ICC_BUILTIN_ATOMICS_PRESENT
|
||||
#error "Intel C++ Compiler of at least 12.0 version is needed to use ICC intrinsics port"
|
||||
#endif
|
||||
|
||||
#define __TBB_machine_icc_generic_H
|
||||
|
||||
//ICC mimics the "native" target compiler
|
||||
#if _MSC_VER
|
||||
#include "msvc_ia32_common.h"
|
||||
#else
|
||||
#include "gcc_ia32_common.h"
|
||||
#endif
|
||||
|
||||
//TODO: Make __TBB_WORDSIZE macro optional for ICC intrinsics port.
|
||||
//As compiler intrinsics are used for all the operations it is possible to do.
|
||||
|
||||
#if __TBB_x86_32
|
||||
#define __TBB_WORDSIZE 4
|
||||
#else
|
||||
#define __TBB_WORDSIZE 8
|
||||
#endif
|
||||
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
|
||||
|
||||
//__TBB_compiler_fence() defined just in case, as it seems not to be used on its own anywhere else
|
||||
#if _MSC_VER
|
||||
//TODO: any way to use same intrinsics on windows and linux?
|
||||
#pragma intrinsic(_ReadWriteBarrier)
|
||||
#define __TBB_compiler_fence() _ReadWriteBarrier()
|
||||
#else
|
||||
#define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
|
||||
#endif
|
||||
|
||||
#ifndef __TBB_full_memory_fence
|
||||
#if _MSC_VER
|
||||
//TODO: any way to use same intrinsics on windows and linux?
|
||||
#pragma intrinsic(_mm_mfence)
|
||||
#define __TBB_full_memory_fence() _mm_mfence()
|
||||
#else
|
||||
#define __TBB_full_memory_fence() __asm__ __volatile__("mfence": : :"memory")
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
|
||||
|
||||
namespace tbb { namespace internal {
|
||||
//TODO: is there any way to reuse definition of memory_order enum from ICC instead of copy paste.
|
||||
//however it seems unlikely that ICC will silently change exact enum values, as they are defined
|
||||
//in the ISO exactly like this.
|
||||
//TODO: add test that exact values of the enum are same as in the ISO C++11
|
||||
typedef enum memory_order {
|
||||
memory_order_relaxed, memory_order_consume, memory_order_acquire,
|
||||
memory_order_release, memory_order_acq_rel, memory_order_seq_cst
|
||||
} memory_order;
|
||||
|
||||
namespace icc_intrinsics_port {
|
||||
template <typename T>
|
||||
T convert_argument(T value){
|
||||
return value;
|
||||
}
|
||||
//The overload below is needed to have explicit conversion of pointer to void* in argument list.
|
||||
//compiler bug?
|
||||
//TODO: add according broken macro and recheck with ICC 13.0 if the overload is still needed
|
||||
template <typename T>
|
||||
void* convert_argument(T* value){
|
||||
return (void*)value;
|
||||
}
|
||||
}
|
||||
//TODO: code below is a bit repetitive, consider simplifying it
|
||||
template <typename T, size_t S>
|
||||
struct machine_load_store {
|
||||
static T load_with_acquire ( const volatile T& location ) {
|
||||
return __atomic_load_explicit(&location, memory_order_acquire);
|
||||
}
|
||||
static void store_with_release ( volatile T &location, T value ) {
|
||||
__atomic_store_explicit(&location, icc_intrinsics_port::convert_argument(value), memory_order_release);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, size_t S>
|
||||
struct machine_load_store_relaxed {
|
||||
static inline T load ( const T& location ) {
|
||||
return __atomic_load_explicit(&location, memory_order_relaxed);
|
||||
}
|
||||
static inline void store ( T& location, T value ) {
|
||||
__atomic_store_explicit(&location, icc_intrinsics_port::convert_argument(value), memory_order_relaxed);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, size_t S>
|
||||
struct machine_load_store_seq_cst {
|
||||
static T load ( const volatile T& location ) {
|
||||
return __atomic_load_explicit(&location, memory_order_seq_cst);
|
||||
}
|
||||
|
||||
static void store ( volatile T &location, T value ) {
|
||||
__atomic_store_explicit(&location, value, memory_order_seq_cst);
|
||||
}
|
||||
};
|
||||
|
||||
}} // namespace tbb::internal
|
||||
|
||||
namespace tbb{ namespace internal { namespace icc_intrinsics_port{
|
||||
typedef enum memory_order_map {
|
||||
relaxed = memory_order_relaxed,
|
||||
acquire = memory_order_acquire,
|
||||
release = memory_order_release,
|
||||
full_fence= memory_order_seq_cst
|
||||
} memory_order_map;
|
||||
}}}// namespace tbb::internal
|
||||
|
||||
#define __TBB_MACHINE_DEFINE_ATOMICS(S,T,M) \
|
||||
inline T __TBB_machine_cmpswp##S##M( volatile void *ptr, T value, T comparand ) { \
|
||||
__atomic_compare_exchange_strong_explicit( \
|
||||
(T*)ptr \
|
||||
,&comparand \
|
||||
,value \
|
||||
, tbb::internal::icc_intrinsics_port::M \
|
||||
, tbb::internal::icc_intrinsics_port::M); \
|
||||
return comparand; \
|
||||
} \
|
||||
\
|
||||
inline T __TBB_machine_fetchstore##S##M(volatile void *ptr, T value) { \
|
||||
return __atomic_exchange_explicit((T*)ptr, value, tbb::internal::icc_intrinsics_port::M); \
|
||||
} \
|
||||
\
|
||||
inline T __TBB_machine_fetchadd##S##M(volatile void *ptr, T value) { \
|
||||
return __atomic_fetch_add_explicit((T*)ptr, value, tbb::internal::icc_intrinsics_port::M); \
|
||||
} \
|
||||
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(1,tbb::internal::int8_t, full_fence)
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(1,tbb::internal::int8_t, acquire)
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(1,tbb::internal::int8_t, release)
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(1,tbb::internal::int8_t, relaxed)
|
||||
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(2,tbb::internal::int16_t, full_fence)
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(2,tbb::internal::int16_t, acquire)
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(2,tbb::internal::int16_t, release)
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(2,tbb::internal::int16_t, relaxed)
|
||||
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(4,tbb::internal::int32_t, full_fence)
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(4,tbb::internal::int32_t, acquire)
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(4,tbb::internal::int32_t, release)
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(4,tbb::internal::int32_t, relaxed)
|
||||
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(8,tbb::internal::int64_t, full_fence)
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(8,tbb::internal::int64_t, acquire)
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(8,tbb::internal::int64_t, release)
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(8,tbb::internal::int64_t, relaxed)
|
||||
|
||||
|
||||
#undef __TBB_MACHINE_DEFINE_ATOMICS
|
||||
|
||||
#define __TBB_USE_FENCED_ATOMICS 1
|
||||
|
||||
namespace tbb { namespace internal {
|
||||
#if __TBB_FORCE_64BIT_ALIGNMENT_BROKEN
|
||||
__TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(full_fence)
|
||||
__TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(full_fence)
|
||||
|
||||
__TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(acquire)
|
||||
__TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(release)
|
||||
|
||||
__TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(relaxed)
|
||||
__TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(relaxed)
|
||||
|
||||
template <typename T>
|
||||
struct machine_load_store<T,8> {
|
||||
static T load_with_acquire ( const volatile T& location ) {
|
||||
if( tbb::internal::is_aligned(&location,8)) {
|
||||
return __atomic_load_explicit(&location, memory_order_acquire);
|
||||
} else {
|
||||
return __TBB_machine_generic_load8acquire(&location);
|
||||
}
|
||||
}
|
||||
static void store_with_release ( volatile T &location, T value ) {
|
||||
if( tbb::internal::is_aligned(&location,8)) {
|
||||
__atomic_store_explicit(&location, icc_intrinsics_port::convert_argument(value), memory_order_release);
|
||||
} else {
|
||||
return __TBB_machine_generic_store8release(&location,value);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct machine_load_store_relaxed<T,8> {
|
||||
static T load( const volatile T& location ) {
|
||||
if( tbb::internal::is_aligned(&location,8)) {
|
||||
return __atomic_load_explicit(&location, memory_order_relaxed);
|
||||
} else {
|
||||
return __TBB_machine_generic_load8relaxed(&location);
|
||||
}
|
||||
}
|
||||
static void store( volatile T &location, T value ) {
|
||||
if( tbb::internal::is_aligned(&location,8)) {
|
||||
__atomic_store_explicit(&location, icc_intrinsics_port::convert_argument(value), memory_order_relaxed);
|
||||
} else {
|
||||
return __TBB_machine_generic_store8relaxed(&location,value);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T >
|
||||
struct machine_load_store_seq_cst<T,8> {
|
||||
static T load ( const volatile T& location ) {
|
||||
if( tbb::internal::is_aligned(&location,8)) {
|
||||
return __atomic_load_explicit(&location, memory_order_seq_cst);
|
||||
} else {
|
||||
return __TBB_machine_generic_load8full_fence(&location);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void store ( volatile T &location, T value ) {
|
||||
if( tbb::internal::is_aligned(&location,8)) {
|
||||
__atomic_store_explicit(&location, value, memory_order_seq_cst);
|
||||
} else {
|
||||
return __TBB_machine_generic_store8full_fence(&location,value);
|
||||
}
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
}} // namespace tbb::internal
|
||||
template <typename T>
|
||||
inline void __TBB_machine_OR( T *operand, T addend ) {
|
||||
__atomic_fetch_or_explicit(operand, addend, tbb::internal::memory_order_seq_cst);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void __TBB_machine_AND( T *operand, T addend ) {
|
||||
__atomic_fetch_and_explicit(operand, addend, tbb::internal::memory_order_seq_cst);
|
||||
}
|
||||
|
||||
84
Research/inc/tbb/machine/linux_common.h
Normal file
84
Research/inc/tbb/machine/linux_common.h
Normal file
@@ -0,0 +1,84 @@
|
||||
/*
|
||||
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
|
||||
you can redistribute it and/or modify it under the terms of the GNU General Public License
|
||||
version 2 as published by the Free Software Foundation. Threading Building Blocks is
|
||||
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
|
||||
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
See the GNU General Public License for more details. You should have received a copy of
|
||||
the GNU General Public License along with Threading Building Blocks; if not, write to the
|
||||
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
As a special exception, you may use this file as part of a free software library without
|
||||
restriction. Specifically, if other files instantiate templates or use macros or inline
|
||||
functions from this file, or you compile this file and link it with other files to produce
|
||||
an executable, this file does not by itself cause the resulting executable to be covered
|
||||
by the GNU General Public License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General Public License.
|
||||
*/
|
||||
|
||||
#ifndef __TBB_machine_H
|
||||
#error Do not #include this internal file directly; use public TBB headers instead.
|
||||
#endif
|
||||
|
||||
#include <sched.h>
|
||||
#define __TBB_Yield() sched_yield()
|
||||
|
||||
#include <unistd.h>
|
||||
/* Futex definitions */
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#if defined(SYS_futex)
|
||||
|
||||
#define __TBB_USE_FUTEX 1
|
||||
#include <limits.h>
|
||||
#include <errno.h>
|
||||
// Unfortunately, some versions of Linux do not have a header that defines FUTEX_WAIT and FUTEX_WAKE.
|
||||
|
||||
#ifdef FUTEX_WAIT
|
||||
#define __TBB_FUTEX_WAIT FUTEX_WAIT
|
||||
#else
|
||||
#define __TBB_FUTEX_WAIT 0
|
||||
#endif
|
||||
|
||||
#ifdef FUTEX_WAKE
|
||||
#define __TBB_FUTEX_WAKE FUTEX_WAKE
|
||||
#else
|
||||
#define __TBB_FUTEX_WAKE 1
|
||||
#endif
|
||||
|
||||
#ifndef __TBB_ASSERT
|
||||
#error machine specific headers must be included after tbb_stddef.h
|
||||
#endif
|
||||
|
||||
namespace tbb {
|
||||
|
||||
namespace internal {
|
||||
|
||||
inline int futex_wait( void *futex, int comparand ) {
|
||||
int r = syscall( SYS_futex,futex,__TBB_FUTEX_WAIT,comparand,NULL,NULL,0 );
|
||||
#if TBB_USE_ASSERT
|
||||
int e = errno;
|
||||
__TBB_ASSERT( r==0||r==EWOULDBLOCK||(r==-1&&(e==EAGAIN||e==EINTR)), "futex_wait failed." );
|
||||
#endif /* TBB_USE_ASSERT */
|
||||
return r;
|
||||
}
|
||||
|
||||
inline int futex_wakeup_one( void *futex ) {
|
||||
int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAKE,1,NULL,NULL,0 );
|
||||
__TBB_ASSERT( r==0||r==1, "futex_wakeup_one: more than one thread woken up?" );
|
||||
return r;
|
||||
}
|
||||
|
||||
inline int futex_wakeup_all( void *futex ) {
|
||||
int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAKE,INT_MAX,NULL,NULL,0 );
|
||||
__TBB_ASSERT( r>=0, "futex_wakeup_all: error in waking up threads" );
|
||||
return r;
|
||||
}
|
||||
|
||||
} /* namespace internal */
|
||||
|
||||
} /* namespace tbb */
|
||||
|
||||
#endif /* SYS_futex */
|
||||
232
Research/inc/tbb/machine/linux_ia32.h
Normal file
232
Research/inc/tbb/machine/linux_ia32.h
Normal file
@@ -0,0 +1,232 @@
|
||||
/*
|
||||
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
|
||||
you can redistribute it and/or modify it under the terms of the GNU General Public License
|
||||
version 2 as published by the Free Software Foundation. Threading Building Blocks is
|
||||
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
|
||||
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
See the GNU General Public License for more details. You should have received a copy of
|
||||
the GNU General Public License along with Threading Building Blocks; if not, write to the
|
||||
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
As a special exception, you may use this file as part of a free software library without
|
||||
restriction. Specifically, if other files instantiate templates or use macros or inline
|
||||
functions from this file, or you compile this file and link it with other files to produce
|
||||
an executable, this file does not by itself cause the resulting executable to be covered
|
||||
by the GNU General Public License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General Public License.
|
||||
*/
|
||||
|
||||
#if !defined(__TBB_machine_H) || defined(__TBB_machine_linux_ia32_H)
|
||||
#error Do not #include this internal file directly; use public TBB headers instead.
|
||||
#endif
|
||||
|
||||
#define __TBB_machine_linux_ia32_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "gcc_ia32_common.h"
|
||||
|
||||
#define __TBB_WORDSIZE 4
|
||||
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
|
||||
|
||||
#define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
|
||||
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
|
||||
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
|
||||
#define __TBB_release_consistency_helper() __TBB_compiler_fence()
|
||||
#define __TBB_full_memory_fence() __asm__ __volatile__("mfence": : :"memory")
|
||||
|
||||
#if __TBB_ICC_ASM_VOLATILE_BROKEN
|
||||
#define __TBB_VOLATILE
|
||||
#else
|
||||
#define __TBB_VOLATILE volatile
|
||||
#endif
|
||||
|
||||
#define __TBB_MACHINE_DEFINE_ATOMICS(S,T,X,R) \
|
||||
static inline T __TBB_machine_cmpswp##S (volatile void *ptr, T value, T comparand ) \
|
||||
{ \
|
||||
T result; \
|
||||
\
|
||||
__asm__ __volatile__("lock\ncmpxchg" X " %2,%1" \
|
||||
: "=a"(result), "=m"(*(__TBB_VOLATILE T*)ptr) \
|
||||
: "q"(value), "0"(comparand), "m"(*(__TBB_VOLATILE T*)ptr) \
|
||||
: "memory"); \
|
||||
return result; \
|
||||
} \
|
||||
\
|
||||
static inline T __TBB_machine_fetchadd##S(volatile void *ptr, T addend) \
|
||||
{ \
|
||||
T result; \
|
||||
__asm__ __volatile__("lock\nxadd" X " %0,%1" \
|
||||
: R (result), "=m"(*(__TBB_VOLATILE T*)ptr) \
|
||||
: "0"(addend), "m"(*(__TBB_VOLATILE T*)ptr) \
|
||||
: "memory"); \
|
||||
return result; \
|
||||
} \
|
||||
\
|
||||
static inline T __TBB_machine_fetchstore##S(volatile void *ptr, T value) \
|
||||
{ \
|
||||
T result; \
|
||||
__asm__ __volatile__("lock\nxchg" X " %0,%1" \
|
||||
: R (result), "=m"(*(__TBB_VOLATILE T*)ptr) \
|
||||
: "0"(value), "m"(*(__TBB_VOLATILE T*)ptr) \
|
||||
: "memory"); \
|
||||
return result; \
|
||||
} \
|
||||
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(1,int8_t,"","=q")
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(2,int16_t,"","=r")
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(4,int32_t,"l","=r")
|
||||
|
||||
#if __INTEL_COMPILER
|
||||
#pragma warning( push )
|
||||
// reference to EBX in a function requiring stack alignment
|
||||
#pragma warning( disable: 998 )
|
||||
#endif
|
||||
|
||||
#if __TBB_GCC_CAS8_BUILTIN_INLINING_BROKEN
|
||||
#define __TBB_IA32_CAS8_NOINLINE __attribute__ ((noinline))
|
||||
#else
|
||||
#define __TBB_IA32_CAS8_NOINLINE
|
||||
#endif
|
||||
|
||||
static inline __TBB_IA32_CAS8_NOINLINE int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, int64_t comparand ) {
|
||||
//TODO: remove the extra part of condition once __TBB_GCC_BUILTIN_ATOMICS_PRESENT is lowered to gcc version 4.1.2
|
||||
#if (__TBB_GCC_BUILTIN_ATOMICS_PRESENT || (__TBB_GCC_VERSION >= 40102)) && !__TBB_GCC_64BIT_ATOMIC_BUILTINS_BROKEN
|
||||
return __sync_val_compare_and_swap( reinterpret_cast<volatile int64_t*>(ptr), comparand, value );
|
||||
#else /* !__TBB_GCC_BUILTIN_ATOMICS_PRESENT */
|
||||
//TODO: look like ICC 13.0 has some issues with this code, investigate it more deeply
|
||||
int64_t result;
|
||||
union {
|
||||
int64_t i64;
|
||||
int32_t i32[2];
|
||||
};
|
||||
i64 = value;
|
||||
#if __PIC__
|
||||
/* compiling position-independent code */
|
||||
// EBX register preserved for compliance with position-independent code rules on IA32
|
||||
int32_t tmp;
|
||||
__asm__ __volatile__ (
|
||||
"movl %%ebx,%2\n\t"
|
||||
"movl %5,%%ebx\n\t"
|
||||
#if __GNUC__==3
|
||||
"lock\n\t cmpxchg8b %1\n\t"
|
||||
#else
|
||||
"lock\n\t cmpxchg8b (%3)\n\t"
|
||||
#endif
|
||||
"movl %2,%%ebx"
|
||||
: "=A"(result)
|
||||
, "=m"(*(__TBB_VOLATILE int64_t *)ptr)
|
||||
, "=m"(tmp)
|
||||
#if __GNUC__==3
|
||||
: "m"(*(__TBB_VOLATILE int64_t *)ptr)
|
||||
#else
|
||||
: "SD"(ptr)
|
||||
#endif
|
||||
, "0"(comparand)
|
||||
, "m"(i32[0]), "c"(i32[1])
|
||||
: "memory"
|
||||
#if __INTEL_COMPILER
|
||||
,"ebx"
|
||||
#endif
|
||||
);
|
||||
#else /* !__PIC__ */
|
||||
__asm__ __volatile__ (
|
||||
"lock\n\t cmpxchg8b %1\n\t"
|
||||
: "=A"(result), "=m"(*(__TBB_VOLATILE int64_t *)ptr)
|
||||
: "m"(*(__TBB_VOLATILE int64_t *)ptr)
|
||||
, "0"(comparand)
|
||||
, "b"(i32[0]), "c"(i32[1])
|
||||
: "memory"
|
||||
);
|
||||
#endif /* __PIC__ */
|
||||
return result;
|
||||
#endif /* !__TBB_GCC_BUILTIN_ATOMICS_PRESENT */
|
||||
}
|
||||
|
||||
#undef __TBB_IA32_CAS8_NOINLINE
|
||||
|
||||
#if __INTEL_COMPILER
|
||||
#pragma warning( pop )
|
||||
#endif // warning 998 is back
|
||||
|
||||
static inline void __TBB_machine_or( volatile void *ptr, uint32_t addend ) {
|
||||
__asm__ __volatile__("lock\norl %1,%0" : "=m"(*(__TBB_VOLATILE uint32_t *)ptr) : "r"(addend), "m"(*(__TBB_VOLATILE uint32_t *)ptr) : "memory");
|
||||
}
|
||||
|
||||
static inline void __TBB_machine_and( volatile void *ptr, uint32_t addend ) {
|
||||
__asm__ __volatile__("lock\nandl %1,%0" : "=m"(*(__TBB_VOLATILE uint32_t *)ptr) : "r"(addend), "m"(*(__TBB_VOLATILE uint32_t *)ptr) : "memory");
|
||||
}
|
||||
|
||||
//TODO: Check if it possible and profitable for IA-32 architecture on (Linux* and Windows*)
|
||||
//to use of 64-bit load/store via floating point registers together with full fence
|
||||
//for sequentially consistent load/store, instead of CAS.
|
||||
|
||||
#if __clang__
|
||||
#define __TBB_fildq "fildll"
|
||||
#define __TBB_fistpq "fistpll"
|
||||
#else
|
||||
#define __TBB_fildq "fildq"
|
||||
#define __TBB_fistpq "fistpq"
|
||||
#endif
|
||||
|
||||
static inline int64_t __TBB_machine_aligned_load8 (const volatile void *ptr) {
|
||||
__TBB_ASSERT(tbb::internal::is_aligned(ptr,8),"__TBB_machine_aligned_load8 should be used with 8 byte aligned locations only \n");
|
||||
int64_t result;
|
||||
__asm__ __volatile__ ( __TBB_fildq " %1\n\t"
|
||||
__TBB_fistpq " %0" : "=m"(result) : "m"(*(const __TBB_VOLATILE uint64_t*)ptr) : "memory" );
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline void __TBB_machine_aligned_store8 (volatile void *ptr, int64_t value ) {
|
||||
__TBB_ASSERT(tbb::internal::is_aligned(ptr,8),"__TBB_machine_aligned_store8 should be used with 8 byte aligned locations only \n");
|
||||
// Aligned store
|
||||
__asm__ __volatile__ ( __TBB_fildq " %1\n\t"
|
||||
__TBB_fistpq " %0" : "=m"(*(__TBB_VOLATILE int64_t*)ptr) : "m"(value) : "memory" );
|
||||
}
|
||||
|
||||
static inline int64_t __TBB_machine_load8 (const volatile void *ptr) {
|
||||
#if __TBB_FORCE_64BIT_ALIGNMENT_BROKEN
|
||||
if( tbb::internal::is_aligned(ptr,8)) {
|
||||
#endif
|
||||
return __TBB_machine_aligned_load8(ptr);
|
||||
#if __TBB_FORCE_64BIT_ALIGNMENT_BROKEN
|
||||
} else {
|
||||
// Unaligned load
|
||||
return __TBB_machine_cmpswp8(const_cast<void*>(ptr),0,0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
//! Handles misaligned 8-byte store
|
||||
/** Defined in tbb_misc.cpp */
|
||||
extern "C" void __TBB_machine_store8_slow( volatile void *ptr, int64_t value );
|
||||
extern "C" void __TBB_machine_store8_slow_perf_warning( volatile void *ptr );
|
||||
|
||||
static inline void __TBB_machine_store8(volatile void *ptr, int64_t value) {
|
||||
#if __TBB_FORCE_64BIT_ALIGNMENT_BROKEN
|
||||
if( tbb::internal::is_aligned(ptr,8)) {
|
||||
#endif
|
||||
__TBB_machine_aligned_store8(ptr,value);
|
||||
#if __TBB_FORCE_64BIT_ALIGNMENT_BROKEN
|
||||
} else {
|
||||
// Unaligned store
|
||||
#if TBB_USE_PERFORMANCE_WARNINGS
|
||||
__TBB_machine_store8_slow_perf_warning(ptr);
|
||||
#endif /* TBB_USE_PERFORMANCE_WARNINGS */
|
||||
__TBB_machine_store8_slow(ptr,value);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// Machine specific atomic operations
|
||||
#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
|
||||
#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
|
||||
|
||||
#define __TBB_USE_GENERIC_DWORD_FETCH_ADD 1
|
||||
#define __TBB_USE_GENERIC_DWORD_FETCH_STORE 1
|
||||
#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1
|
||||
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
|
||||
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
|
||||
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
|
||||
|
||||
181
Research/inc/tbb/machine/linux_ia64.h
Normal file
181
Research/inc/tbb/machine/linux_ia64.h
Normal file
@@ -0,0 +1,181 @@
|
||||
/*
|
||||
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
|
||||
you can redistribute it and/or modify it under the terms of the GNU General Public License
|
||||
version 2 as published by the Free Software Foundation. Threading Building Blocks is
|
||||
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
|
||||
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
See the GNU General Public License for more details. You should have received a copy of
|
||||
the GNU General Public License along with Threading Building Blocks; if not, write to the
|
||||
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
As a special exception, you may use this file as part of a free software library without
|
||||
restriction. Specifically, if other files instantiate templates or use macros or inline
|
||||
functions from this file, or you compile this file and link it with other files to produce
|
||||
an executable, this file does not by itself cause the resulting executable to be covered
|
||||
by the GNU General Public License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General Public License.
|
||||
*/
|
||||
|
||||
#if !defined(__TBB_machine_H) || defined(__TBB_machine_linux_ia64_H)
|
||||
#error Do not #include this internal file directly; use public TBB headers instead.
|
||||
#endif
|
||||
|
||||
#define __TBB_machine_linux_ia64_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <ia64intrin.h>
|
||||
|
||||
#define __TBB_WORDSIZE 8
|
||||
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
|
||||
|
||||
#if __INTEL_COMPILER
|
||||
#define __TBB_compiler_fence()
|
||||
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
|
||||
#define __TBB_acquire_consistency_helper()
|
||||
#define __TBB_release_consistency_helper()
|
||||
#define __TBB_full_memory_fence() __mf()
|
||||
#else
|
||||
#define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
|
||||
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
|
||||
// Even though GCC imbues volatile loads with acquire semantics, it sometimes moves
|
||||
// loads over the acquire fence. The following helpers stop such incorrect code motion.
|
||||
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
|
||||
#define __TBB_release_consistency_helper() __TBB_compiler_fence()
|
||||
#define __TBB_full_memory_fence() __asm__ __volatile__("mf": : :"memory")
|
||||
#endif /* !__INTEL_COMPILER */
|
||||
|
||||
// Most of the functions will be in a .s file
|
||||
// TODO: revise dynamic_link, memory pools and etc. if the library dependency is removed.
|
||||
|
||||
extern "C" {
|
||||
int8_t __TBB_machine_fetchadd1__TBB_full_fence (volatile void *ptr, int8_t addend);
|
||||
int8_t __TBB_machine_fetchadd1acquire(volatile void *ptr, int8_t addend);
|
||||
int8_t __TBB_machine_fetchadd1release(volatile void *ptr, int8_t addend);
|
||||
|
||||
int16_t __TBB_machine_fetchadd2__TBB_full_fence (volatile void *ptr, int16_t addend);
|
||||
int16_t __TBB_machine_fetchadd2acquire(volatile void *ptr, int16_t addend);
|
||||
int16_t __TBB_machine_fetchadd2release(volatile void *ptr, int16_t addend);
|
||||
|
||||
int32_t __TBB_machine_fetchadd4__TBB_full_fence (volatile void *ptr, int32_t value);
|
||||
int32_t __TBB_machine_fetchadd4acquire(volatile void *ptr, int32_t addend);
|
||||
int32_t __TBB_machine_fetchadd4release(volatile void *ptr, int32_t addend);
|
||||
|
||||
int64_t __TBB_machine_fetchadd8__TBB_full_fence (volatile void *ptr, int64_t value);
|
||||
int64_t __TBB_machine_fetchadd8acquire(volatile void *ptr, int64_t addend);
|
||||
int64_t __TBB_machine_fetchadd8release(volatile void *ptr, int64_t addend);
|
||||
|
||||
int8_t __TBB_machine_fetchstore1__TBB_full_fence (volatile void *ptr, int8_t value);
|
||||
int8_t __TBB_machine_fetchstore1acquire(volatile void *ptr, int8_t value);
|
||||
int8_t __TBB_machine_fetchstore1release(volatile void *ptr, int8_t value);
|
||||
|
||||
int16_t __TBB_machine_fetchstore2__TBB_full_fence (volatile void *ptr, int16_t value);
|
||||
int16_t __TBB_machine_fetchstore2acquire(volatile void *ptr, int16_t value);
|
||||
int16_t __TBB_machine_fetchstore2release(volatile void *ptr, int16_t value);
|
||||
|
||||
int32_t __TBB_machine_fetchstore4__TBB_full_fence (volatile void *ptr, int32_t value);
|
||||
int32_t __TBB_machine_fetchstore4acquire(volatile void *ptr, int32_t value);
|
||||
int32_t __TBB_machine_fetchstore4release(volatile void *ptr, int32_t value);
|
||||
|
||||
int64_t __TBB_machine_fetchstore8__TBB_full_fence (volatile void *ptr, int64_t value);
|
||||
int64_t __TBB_machine_fetchstore8acquire(volatile void *ptr, int64_t value);
|
||||
int64_t __TBB_machine_fetchstore8release(volatile void *ptr, int64_t value);
|
||||
|
||||
int8_t __TBB_machine_cmpswp1__TBB_full_fence (volatile void *ptr, int8_t value, int8_t comparand);
|
||||
int8_t __TBB_machine_cmpswp1acquire(volatile void *ptr, int8_t value, int8_t comparand);
|
||||
int8_t __TBB_machine_cmpswp1release(volatile void *ptr, int8_t value, int8_t comparand);
|
||||
|
||||
int16_t __TBB_machine_cmpswp2__TBB_full_fence (volatile void *ptr, int16_t value, int16_t comparand);
|
||||
int16_t __TBB_machine_cmpswp2acquire(volatile void *ptr, int16_t value, int16_t comparand);
|
||||
int16_t __TBB_machine_cmpswp2release(volatile void *ptr, int16_t value, int16_t comparand);
|
||||
|
||||
int32_t __TBB_machine_cmpswp4__TBB_full_fence (volatile void *ptr, int32_t value, int32_t comparand);
|
||||
int32_t __TBB_machine_cmpswp4acquire(volatile void *ptr, int32_t value, int32_t comparand);
|
||||
int32_t __TBB_machine_cmpswp4release(volatile void *ptr, int32_t value, int32_t comparand);
|
||||
|
||||
int64_t __TBB_machine_cmpswp8__TBB_full_fence (volatile void *ptr, int64_t value, int64_t comparand);
|
||||
int64_t __TBB_machine_cmpswp8acquire(volatile void *ptr, int64_t value, int64_t comparand);
|
||||
int64_t __TBB_machine_cmpswp8release(volatile void *ptr, int64_t value, int64_t comparand);
|
||||
|
||||
int64_t __TBB_machine_lg(uint64_t value);
|
||||
void __TBB_machine_pause(int32_t delay);
|
||||
bool __TBB_machine_trylockbyte( volatile unsigned char &ptr );
|
||||
int64_t __TBB_machine_lockbyte( volatile unsigned char &ptr );
|
||||
|
||||
//! Retrieves the current RSE backing store pointer. IA64 specific.
|
||||
void* __TBB_get_bsp();
|
||||
|
||||
int32_t __TBB_machine_load1_relaxed(const void *ptr);
|
||||
int32_t __TBB_machine_load2_relaxed(const void *ptr);
|
||||
int32_t __TBB_machine_load4_relaxed(const void *ptr);
|
||||
int64_t __TBB_machine_load8_relaxed(const void *ptr);
|
||||
|
||||
void __TBB_machine_store1_relaxed(void *ptr, int32_t value);
|
||||
void __TBB_machine_store2_relaxed(void *ptr, int32_t value);
|
||||
void __TBB_machine_store4_relaxed(void *ptr, int32_t value);
|
||||
void __TBB_machine_store8_relaxed(void *ptr, int64_t value);
|
||||
} // extern "C"
|
||||
|
||||
// Mapping old entry points to the names corresponding to the new full_fence identifier.
|
||||
#define __TBB_machine_fetchadd1full_fence __TBB_machine_fetchadd1__TBB_full_fence
|
||||
#define __TBB_machine_fetchadd2full_fence __TBB_machine_fetchadd2__TBB_full_fence
|
||||
#define __TBB_machine_fetchadd4full_fence __TBB_machine_fetchadd4__TBB_full_fence
|
||||
#define __TBB_machine_fetchadd8full_fence __TBB_machine_fetchadd8__TBB_full_fence
|
||||
#define __TBB_machine_fetchstore1full_fence __TBB_machine_fetchstore1__TBB_full_fence
|
||||
#define __TBB_machine_fetchstore2full_fence __TBB_machine_fetchstore2__TBB_full_fence
|
||||
#define __TBB_machine_fetchstore4full_fence __TBB_machine_fetchstore4__TBB_full_fence
|
||||
#define __TBB_machine_fetchstore8full_fence __TBB_machine_fetchstore8__TBB_full_fence
|
||||
#define __TBB_machine_cmpswp1full_fence __TBB_machine_cmpswp1__TBB_full_fence
|
||||
#define __TBB_machine_cmpswp2full_fence __TBB_machine_cmpswp2__TBB_full_fence
|
||||
#define __TBB_machine_cmpswp4full_fence __TBB_machine_cmpswp4__TBB_full_fence
|
||||
#define __TBB_machine_cmpswp8full_fence __TBB_machine_cmpswp8__TBB_full_fence
|
||||
|
||||
// Mapping relaxed operations to the entry points implementing them.
|
||||
/** On IA64 RMW operations implicitly have acquire semantics. Thus one cannot
|
||||
actually have completely relaxed RMW operation here. **/
|
||||
#define __TBB_machine_fetchadd1relaxed __TBB_machine_fetchadd1acquire
|
||||
#define __TBB_machine_fetchadd2relaxed __TBB_machine_fetchadd2acquire
|
||||
#define __TBB_machine_fetchadd4relaxed __TBB_machine_fetchadd4acquire
|
||||
#define __TBB_machine_fetchadd8relaxed __TBB_machine_fetchadd8acquire
|
||||
#define __TBB_machine_fetchstore1relaxed __TBB_machine_fetchstore1acquire
|
||||
#define __TBB_machine_fetchstore2relaxed __TBB_machine_fetchstore2acquire
|
||||
#define __TBB_machine_fetchstore4relaxed __TBB_machine_fetchstore4acquire
|
||||
#define __TBB_machine_fetchstore8relaxed __TBB_machine_fetchstore8acquire
|
||||
#define __TBB_machine_cmpswp1relaxed __TBB_machine_cmpswp1acquire
|
||||
#define __TBB_machine_cmpswp2relaxed __TBB_machine_cmpswp2acquire
|
||||
#define __TBB_machine_cmpswp4relaxed __TBB_machine_cmpswp4acquire
|
||||
#define __TBB_machine_cmpswp8relaxed __TBB_machine_cmpswp8acquire
|
||||
|
||||
#define __TBB_MACHINE_DEFINE_ATOMICS(S,V) \
|
||||
template <typename T> \
|
||||
struct machine_load_store_relaxed<T,S> { \
|
||||
static inline T load ( const T& location ) { \
|
||||
return (T)__TBB_machine_load##S##_relaxed(&location); \
|
||||
} \
|
||||
static inline void store ( T& location, T value ) { \
|
||||
__TBB_machine_store##S##_relaxed(&location, (V)value); \
|
||||
} \
|
||||
}
|
||||
|
||||
namespace tbb {
|
||||
namespace internal {
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(1,int8_t);
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(2,int16_t);
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(4,int32_t);
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(8,int64_t);
|
||||
}} // namespaces internal, tbb
|
||||
|
||||
#undef __TBB_MACHINE_DEFINE_ATOMICS
|
||||
|
||||
#define __TBB_USE_FENCED_ATOMICS 1
|
||||
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
|
||||
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
|
||||
|
||||
// Definition of Lock functions
|
||||
#define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)
|
||||
#define __TBB_LockByte(P) __TBB_machine_lockbyte(P)
|
||||
|
||||
// Definition of other utility functions
|
||||
#define __TBB_Pause(V) __TBB_machine_pause(V)
|
||||
#define __TBB_Log2(V) __TBB_machine_lg(V)
|
||||
96
Research/inc/tbb/machine/linux_intel64.h
Normal file
96
Research/inc/tbb/machine/linux_intel64.h
Normal file
@@ -0,0 +1,96 @@
|
||||
/*
|
||||
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
|
||||
you can redistribute it and/or modify it under the terms of the GNU General Public License
|
||||
version 2 as published by the Free Software Foundation. Threading Building Blocks is
|
||||
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
|
||||
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
See the GNU General Public License for more details. You should have received a copy of
|
||||
the GNU General Public License along with Threading Building Blocks; if not, write to the
|
||||
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
As a special exception, you may use this file as part of a free software library without
|
||||
restriction. Specifically, if other files instantiate templates or use macros or inline
|
||||
functions from this file, or you compile this file and link it with other files to produce
|
||||
an executable, this file does not by itself cause the resulting executable to be covered
|
||||
by the GNU General Public License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General Public License.
|
||||
*/
|
||||
|
||||
#if !defined(__TBB_machine_H) || defined(__TBB_machine_linux_intel64_H)
|
||||
#error Do not #include this internal file directly; use public TBB headers instead.
|
||||
#endif
|
||||
|
||||
#define __TBB_machine_linux_intel64_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "gcc_ia32_common.h"
|
||||
|
||||
#define __TBB_WORDSIZE 8
|
||||
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
|
||||
|
||||
#define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
|
||||
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
|
||||
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
|
||||
#define __TBB_release_consistency_helper() __TBB_compiler_fence()
|
||||
|
||||
#ifndef __TBB_full_memory_fence
|
||||
#define __TBB_full_memory_fence() __asm__ __volatile__("mfence": : :"memory")
|
||||
#endif
|
||||
|
||||
#define __TBB_MACHINE_DEFINE_ATOMICS(S,T,X) \
|
||||
static inline T __TBB_machine_cmpswp##S (volatile void *ptr, T value, T comparand ) \
|
||||
{ \
|
||||
T result; \
|
||||
\
|
||||
__asm__ __volatile__("lock\ncmpxchg" X " %2,%1" \
|
||||
: "=a"(result), "=m"(*(volatile T*)ptr) \
|
||||
: "q"(value), "0"(comparand), "m"(*(volatile T*)ptr) \
|
||||
: "memory"); \
|
||||
return result; \
|
||||
} \
|
||||
\
|
||||
static inline T __TBB_machine_fetchadd##S(volatile void *ptr, T addend) \
|
||||
{ \
|
||||
T result; \
|
||||
__asm__ __volatile__("lock\nxadd" X " %0,%1" \
|
||||
: "=r"(result),"=m"(*(volatile T*)ptr) \
|
||||
: "0"(addend), "m"(*(volatile T*)ptr) \
|
||||
: "memory"); \
|
||||
return result; \
|
||||
} \
|
||||
\
|
||||
static inline T __TBB_machine_fetchstore##S(volatile void *ptr, T value) \
|
||||
{ \
|
||||
T result; \
|
||||
__asm__ __volatile__("lock\nxchg" X " %0,%1" \
|
||||
: "=r"(result),"=m"(*(volatile T*)ptr) \
|
||||
: "0"(value), "m"(*(volatile T*)ptr) \
|
||||
: "memory"); \
|
||||
return result; \
|
||||
} \
|
||||
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(1,int8_t,"")
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(2,int16_t,"")
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(4,int32_t,"")
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(8,int64_t,"q")
|
||||
|
||||
#undef __TBB_MACHINE_DEFINE_ATOMICS
|
||||
|
||||
static inline void __TBB_machine_or( volatile void *ptr, uint64_t value ) {
|
||||
__asm__ __volatile__("lock\norq %1,%0" : "=m"(*(volatile uint64_t*)ptr) : "r"(value), "m"(*(volatile uint64_t*)ptr) : "memory");
|
||||
}
|
||||
|
||||
static inline void __TBB_machine_and( volatile void *ptr, uint64_t value ) {
|
||||
__asm__ __volatile__("lock\nandq %1,%0" : "=m"(*(volatile uint64_t*)ptr) : "r"(value), "m"(*(volatile uint64_t*)ptr) : "memory");
|
||||
}
|
||||
|
||||
#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
|
||||
#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
|
||||
|
||||
#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1
|
||||
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
|
||||
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
|
||||
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
|
||||
|
||||
313
Research/inc/tbb/machine/mac_ppc.h
Normal file
313
Research/inc/tbb/machine/mac_ppc.h
Normal file
@@ -0,0 +1,313 @@
|
||||
/*
|
||||
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
|
||||
you can redistribute it and/or modify it under the terms of the GNU General Public License
|
||||
version 2 as published by the Free Software Foundation. Threading Building Blocks is
|
||||
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
|
||||
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
See the GNU General Public License for more details. You should have received a copy of
|
||||
the GNU General Public License along with Threading Building Blocks; if not, write to the
|
||||
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
As a special exception, you may use this file as part of a free software library without
|
||||
restriction. Specifically, if other files instantiate templates or use macros or inline
|
||||
functions from this file, or you compile this file and link it with other files to produce
|
||||
an executable, this file does not by itself cause the resulting executable to be covered
|
||||
by the GNU General Public License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General Public License.
|
||||
*/
|
||||
|
||||
#if !defined(__TBB_machine_H) || defined(__TBB_machine_gcc_power_H)
|
||||
#error Do not #include this internal file directly; use public TBB headers instead.
|
||||
#endif
|
||||
|
||||
#define __TBB_machine_gcc_power_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
|
||||
// TODO: rename to gcc_power.h?
|
||||
// This file is for Power Architecture with compilers supporting GNU inline-assembler syntax (currently GNU g++ and IBM XL).
|
||||
// Note that XL V9.0 (sometimes?) has trouble dealing with empty input and/or clobber lists, so they should be avoided.
|
||||
|
||||
#if __powerpc64__ || __ppc64__
|
||||
// IBM XL documents __powerpc64__ (and __PPC64__).
|
||||
// Apple documents __ppc64__ (with __ppc__ only on 32-bit).
|
||||
#define __TBB_WORDSIZE 8
|
||||
#else
|
||||
#define __TBB_WORDSIZE 4
|
||||
#endif
|
||||
|
||||
// Traditionally Power Architecture is big-endian.
|
||||
// Little-endian could be just an address manipulation (compatibility with TBB not verified),
|
||||
// or normal little-endian (on more recent systems). Embedded PowerPC systems may support
|
||||
// page-specific endianness, but then one endianness must be hidden from TBB so that it still sees only one.
|
||||
#if __BIG_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__)
|
||||
#define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
|
||||
#elif __LITTLE_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__)
|
||||
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
|
||||
#elif defined(__BYTE_ORDER__)
|
||||
#define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
|
||||
#else
|
||||
#define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT
|
||||
#endif
|
||||
|
||||
// On Power Architecture, (lock-free) 64-bit atomics require 64-bit hardware:
|
||||
#if __TBB_WORDSIZE==8
|
||||
// Do not change the following definition, because TBB itself will use 64-bit atomics in 64-bit builds.
|
||||
#define __TBB_64BIT_ATOMICS 1
|
||||
#elif __bgp__
|
||||
// Do not change the following definition, because this is known 32-bit hardware.
|
||||
#define __TBB_64BIT_ATOMICS 0
|
||||
#else
|
||||
// To enable 64-bit atomics in 32-bit builds, set the value below to 1 instead of 0.
|
||||
// You must make certain that the program will only use them on actual 64-bit hardware
|
||||
// (which typically means that the entire program is only executed on such hardware),
|
||||
// because their implementation involves machine instructions that are illegal elsewhere.
|
||||
// The setting can be chosen independently per compilation unit,
|
||||
// which also means that TBB itself does not need to be rebuilt.
|
||||
// Alternatively (but only for the current architecture and TBB version),
|
||||
// override the default as a predefined macro when invoking the compiler.
|
||||
#ifndef __TBB_64BIT_ATOMICS
|
||||
#define __TBB_64BIT_ATOMICS 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
inline int32_t __TBB_machine_cmpswp4 (volatile void *ptr, int32_t value, int32_t comparand )
|
||||
{
|
||||
int32_t result;
|
||||
|
||||
__asm__ __volatile__("sync\n"
|
||||
"0:\n\t"
|
||||
"lwarx %[res],0,%[ptr]\n\t" /* load w/ reservation */
|
||||
"cmpw %[res],%[cmp]\n\t" /* compare against comparand */
|
||||
"bne- 1f\n\t" /* exit if not same */
|
||||
"stwcx. %[val],0,%[ptr]\n\t" /* store new value */
|
||||
"bne- 0b\n" /* retry if reservation lost */
|
||||
"1:\n\t" /* the exit */
|
||||
"isync"
|
||||
: [res]"=&r"(result)
|
||||
, "+m"(* (int32_t*) ptr) /* redundant with "memory" */
|
||||
: [ptr]"r"(ptr)
|
||||
, [val]"r"(value)
|
||||
, [cmp]"r"(comparand)
|
||||
: "memory" /* compiler full fence */
|
||||
, "cr0" /* clobbered by cmp and/or stwcx. */
|
||||
);
|
||||
return result;
|
||||
}
|
||||
|
||||
#if __TBB_WORDSIZE==8
|
||||
|
||||
inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, int64_t comparand )
|
||||
{
|
||||
int64_t result;
|
||||
__asm__ __volatile__("sync\n"
|
||||
"0:\n\t"
|
||||
"ldarx %[res],0,%[ptr]\n\t" /* load w/ reservation */
|
||||
"cmpd %[res],%[cmp]\n\t" /* compare against comparand */
|
||||
"bne- 1f\n\t" /* exit if not same */
|
||||
"stdcx. %[val],0,%[ptr]\n\t" /* store new value */
|
||||
"bne- 0b\n" /* retry if reservation lost */
|
||||
"1:\n\t" /* the exit */
|
||||
"isync"
|
||||
: [res]"=&r"(result)
|
||||
, "+m"(* (int64_t*) ptr) /* redundant with "memory" */
|
||||
: [ptr]"r"(ptr)
|
||||
, [val]"r"(value)
|
||||
, [cmp]"r"(comparand)
|
||||
: "memory" /* compiler full fence */
|
||||
, "cr0" /* clobbered by cmp and/or stdcx. */
|
||||
);
|
||||
return result;
|
||||
}
|
||||
|
||||
#elif __TBB_64BIT_ATOMICS /* && __TBB_WORDSIZE==4 */
|
||||
|
||||
inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, int64_t comparand )
|
||||
{
|
||||
int64_t result;
|
||||
int64_t value_register, comparand_register, result_register; // dummy variables to allocate registers
|
||||
__asm__ __volatile__("sync\n\t"
|
||||
"ld %[val],%[valm]\n\t"
|
||||
"ld %[cmp],%[cmpm]\n"
|
||||
"0:\n\t"
|
||||
"ldarx %[res],0,%[ptr]\n\t" /* load w/ reservation */
|
||||
"cmpd %[res],%[cmp]\n\t" /* compare against comparand */
|
||||
"bne- 1f\n\t" /* exit if not same */
|
||||
"stdcx. %[val],0,%[ptr]\n\t" /* store new value */
|
||||
"bne- 0b\n" /* retry if reservation lost */
|
||||
"1:\n\t" /* the exit */
|
||||
"std %[res],%[resm]\n\t"
|
||||
"isync"
|
||||
: [resm]"=m"(result)
|
||||
, [res] "=&r"( result_register)
|
||||
, [val] "=&r"( value_register)
|
||||
, [cmp] "=&r"(comparand_register)
|
||||
, "+m"(* (int64_t*) ptr) /* redundant with "memory" */
|
||||
: [ptr] "r"(ptr)
|
||||
, [valm]"m"(value)
|
||||
, [cmpm]"m"(comparand)
|
||||
: "memory" /* compiler full fence */
|
||||
, "cr0" /* clobbered by cmpd and/or stdcx. */
|
||||
);
|
||||
return result;
|
||||
}
|
||||
|
||||
#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
|
||||
|
||||
#define __TBB_MACHINE_DEFINE_LOAD_STORE(S,ldx,stx,cmpx) \
|
||||
template <typename T> \
|
||||
struct machine_load_store<T,S> { \
|
||||
static inline T load_with_acquire(const volatile T& location) { \
|
||||
T result; \
|
||||
__asm__ __volatile__(ldx " %[res],0(%[ptr])\n" \
|
||||
"0:\n\t" \
|
||||
cmpx " %[res],%[res]\n\t" \
|
||||
"bne- 0b\n\t" \
|
||||
"isync" \
|
||||
: [res]"=r"(result) \
|
||||
: [ptr]"b"(&location) /* cannot use register 0 here */ \
|
||||
, "m"(location) /* redundant with "memory" */ \
|
||||
: "memory" /* compiler acquire fence */ \
|
||||
, "cr0" /* clobbered by cmpw/cmpd */); \
|
||||
return result; \
|
||||
} \
|
||||
static inline void store_with_release(volatile T &location, T value) { \
|
||||
__asm__ __volatile__("lwsync\n\t" \
|
||||
stx " %[val],0(%[ptr])" \
|
||||
: "=m"(location) /* redundant with "memory" */ \
|
||||
: [ptr]"b"(&location) /* cannot use register 0 here */ \
|
||||
, [val]"r"(value) \
|
||||
: "memory"/*compiler release fence*/ /*(cr0 not affected)*/); \
|
||||
} \
|
||||
}; \
|
||||
\
|
||||
template <typename T> \
|
||||
struct machine_load_store_relaxed<T,S> { \
|
||||
static inline T load (const __TBB_atomic T& location) { \
|
||||
T result; \
|
||||
__asm__ __volatile__(ldx " %[res],0(%[ptr])" \
|
||||
: [res]"=r"(result) \
|
||||
: [ptr]"b"(&location) /* cannot use register 0 here */ \
|
||||
, "m"(location) \
|
||||
); /*(no compiler fence)*/ /*(cr0 not affected)*/ \
|
||||
return result; \
|
||||
} \
|
||||
static inline void store (__TBB_atomic T &location, T value) { \
|
||||
__asm__ __volatile__(stx " %[val],0(%[ptr])" \
|
||||
: "=m"(location) \
|
||||
: [ptr]"b"(&location) /* cannot use register 0 here */ \
|
||||
, [val]"r"(value) \
|
||||
); /*(no compiler fence)*/ /*(cr0 not affected)*/ \
|
||||
} \
|
||||
};
|
||||
|
||||
namespace tbb {
|
||||
namespace internal {
|
||||
__TBB_MACHINE_DEFINE_LOAD_STORE(1,"lbz","stb","cmpw")
|
||||
__TBB_MACHINE_DEFINE_LOAD_STORE(2,"lhz","sth","cmpw")
|
||||
__TBB_MACHINE_DEFINE_LOAD_STORE(4,"lwz","stw","cmpw")
|
||||
|
||||
#if __TBB_WORDSIZE==8
|
||||
|
||||
__TBB_MACHINE_DEFINE_LOAD_STORE(8,"ld" ,"std","cmpd")
|
||||
|
||||
#elif __TBB_64BIT_ATOMICS /* && __TBB_WORDSIZE==4 */
|
||||
|
||||
template <typename T>
|
||||
struct machine_load_store<T,8> {
|
||||
static inline T load_with_acquire(const volatile T& location) {
|
||||
T result;
|
||||
T result_register; // dummy variable to allocate a register
|
||||
__asm__ __volatile__("ld %[res],0(%[ptr])\n\t"
|
||||
"std %[res],%[resm]\n"
|
||||
"0:\n\t"
|
||||
"cmpd %[res],%[res]\n\t"
|
||||
"bne- 0b\n\t"
|
||||
"isync"
|
||||
: [resm]"=m"(result)
|
||||
, [res]"=&r"(result_register)
|
||||
: [ptr]"b"(&location) /* cannot use register 0 here */
|
||||
, "m"(location) /* redundant with "memory" */
|
||||
: "memory" /* compiler acquire fence */
|
||||
, "cr0" /* clobbered by cmpd */);
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline void store_with_release(volatile T &location, T value) {
|
||||
T value_register; // dummy variable to allocate a register
|
||||
__asm__ __volatile__("lwsync\n\t"
|
||||
"ld %[val],%[valm]\n\t"
|
||||
"std %[val],0(%[ptr])"
|
||||
: "=m"(location) /* redundant with "memory" */
|
||||
, [val]"=&r"(value_register)
|
||||
: [ptr]"b"(&location) /* cannot use register 0 here */
|
||||
, [valm]"m"(value)
|
||||
: "memory"/*compiler release fence*/ /*(cr0 not affected)*/);
|
||||
}
|
||||
};
|
||||
|
||||
struct machine_load_store_relaxed<T,8> {
|
||||
static inline T load (const volatile T& location) {
|
||||
T result;
|
||||
T result_register; // dummy variable to allocate a register
|
||||
__asm__ __volatile__("ld %[res],0(%[ptr])\n\t"
|
||||
"std %[res],%[resm]"
|
||||
: [resm]"=m"(result)
|
||||
, [res]"=&r"(result_register)
|
||||
: [ptr]"b"(&location) /* cannot use register 0 here */
|
||||
, "m"(location)
|
||||
); /*(no compiler fence)*/ /*(cr0 not affected)*/
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline void store (volatile T &location, T value) {
|
||||
T value_register; // dummy variable to allocate a register
|
||||
__asm__ __volatile__("ld %[val],%[valm]\n\t"
|
||||
"std %[val],0(%[ptr])"
|
||||
: "=m"(location)
|
||||
, [val]"=&r"(value_register)
|
||||
: [ptr]"b"(&location) /* cannot use register 0 here */
|
||||
, [valm]"m"(value)
|
||||
); /*(no compiler fence)*/ /*(cr0 not affected)*/
|
||||
}
|
||||
};
|
||||
#define __TBB_machine_load_store_relaxed_8
|
||||
|
||||
#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
|
||||
|
||||
}} // namespaces internal, tbb
|
||||
|
||||
#undef __TBB_MACHINE_DEFINE_LOAD_STORE
|
||||
|
||||
#define __TBB_USE_GENERIC_PART_WORD_CAS 1
|
||||
#define __TBB_USE_GENERIC_FETCH_ADD 1
|
||||
#define __TBB_USE_GENERIC_FETCH_STORE 1
|
||||
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
|
||||
|
||||
#define __TBB_control_consistency_helper() __asm__ __volatile__("isync": : :"memory")
|
||||
#define __TBB_full_memory_fence() __asm__ __volatile__( "sync": : :"memory")
|
||||
|
||||
static inline intptr_t __TBB_machine_lg( uintptr_t x ) {
|
||||
__TBB_ASSERT(x, "__TBB_Log2(0) undefined");
|
||||
// cntlzd/cntlzw starts counting at 2^63/2^31 (ignoring any higher-order bits), and does not affect cr0
|
||||
#if __TBB_WORDSIZE==8
|
||||
__asm__ __volatile__ ("cntlzd %0,%0" : "+r"(x));
|
||||
return 63-static_cast<intptr_t>(x);
|
||||
#else
|
||||
__asm__ __volatile__ ("cntlzw %0,%0" : "+r"(x));
|
||||
return 31-static_cast<intptr_t>(x);
|
||||
#endif
|
||||
}
|
||||
#define __TBB_Log2(V) __TBB_machine_lg(V)
|
||||
|
||||
// Assumes implicit alignment for any 32-bit value
|
||||
typedef uint32_t __TBB_Flag;
|
||||
#define __TBB_Flag __TBB_Flag
|
||||
|
||||
inline bool __TBB_machine_trylockbyte( __TBB_atomic __TBB_Flag &flag ) {
|
||||
return __TBB_machine_cmpswp4(&flag,1,0)==0;
|
||||
}
|
||||
#define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)
|
||||
133
Research/inc/tbb/machine/macos_common.h
Normal file
133
Research/inc/tbb/machine/macos_common.h
Normal file
@@ -0,0 +1,133 @@
|
||||
/*
|
||||
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
|
||||
you can redistribute it and/or modify it under the terms of the GNU General Public License
|
||||
version 2 as published by the Free Software Foundation. Threading Building Blocks is
|
||||
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
|
||||
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
See the GNU General Public License for more details. You should have received a copy of
|
||||
the GNU General Public License along with Threading Building Blocks; if not, write to the
|
||||
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
As a special exception, you may use this file as part of a free software library without
|
||||
restriction. Specifically, if other files instantiate templates or use macros or inline
|
||||
functions from this file, or you compile this file and link it with other files to produce
|
||||
an executable, this file does not by itself cause the resulting executable to be covered
|
||||
by the GNU General Public License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General Public License.
|
||||
*/
|
||||
|
||||
#if !defined(__TBB_machine_H) || defined(__TBB_machine_macos_common_H)
|
||||
#error Do not #include this internal file directly; use public TBB headers instead.
|
||||
#endif
|
||||
|
||||
#define __TBB_machine_macos_common_H
|
||||
|
||||
#include <sched.h>
|
||||
#define __TBB_Yield() sched_yield()
|
||||
|
||||
// __TBB_HardwareConcurrency
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
static inline int __TBB_macos_available_cpu() {
|
||||
int name[2] = {CTL_HW, HW_AVAILCPU};
|
||||
int ncpu;
|
||||
size_t size = sizeof(ncpu);
|
||||
sysctl( name, 2, &ncpu, &size, NULL, 0 );
|
||||
return ncpu;
|
||||
}
|
||||
|
||||
#define __TBB_HardwareConcurrency() __TBB_macos_available_cpu()
|
||||
|
||||
#ifndef __TBB_full_memory_fence
|
||||
// TBB has not recognized the architecture (none of the architecture abstraction
|
||||
// headers was included).
|
||||
#define __TBB_UnknownArchitecture 1
|
||||
#endif
|
||||
|
||||
#if __TBB_UnknownArchitecture
|
||||
// Implementation of atomic operations based on OS provided primitives
|
||||
#include <libkern/OSAtomic.h>
|
||||
|
||||
static inline int64_t __TBB_machine_cmpswp8_OsX(volatile void *ptr, int64_t value, int64_t comparand)
|
||||
{
|
||||
__TBB_ASSERT( tbb::internal::is_aligned(ptr,8), "address not properly aligned for OS X* atomics");
|
||||
int64_t* address = (int64_t*)ptr;
|
||||
while( !OSAtomicCompareAndSwap64Barrier(comparand, value, address) ){
|
||||
#if __TBB_WORDSIZE==8
|
||||
int64_t snapshot = *address;
|
||||
#else
|
||||
int64_t snapshot = OSAtomicAdd64( 0, address );
|
||||
#endif
|
||||
if( snapshot!=comparand ) return snapshot;
|
||||
}
|
||||
return comparand;
|
||||
}
|
||||
|
||||
#define __TBB_machine_cmpswp8 __TBB_machine_cmpswp8_OsX
|
||||
|
||||
#endif /* __TBB_UnknownArchitecture */
|
||||
|
||||
#if __TBB_UnknownArchitecture
|
||||
|
||||
#ifndef __TBB_WORDSIZE
|
||||
#define __TBB_WORDSIZE 4
|
||||
#endif
|
||||
|
||||
#ifdef __TBB_ENDIANNESS
|
||||
// Already determined based on hardware architecture.
|
||||
#elif __BIG_ENDIAN__
|
||||
#define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
|
||||
#elif __LITTLE_ENDIAN__
|
||||
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
|
||||
#else
|
||||
#define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
|
||||
#endif
|
||||
|
||||
/** As this generic implementation has absolutely no information about underlying
|
||||
hardware, its performance most likely will be sub-optimal because of full memory
|
||||
fence usages where a more lightweight synchronization means (or none at all)
|
||||
could suffice. Thus if you use this header to enable TBB on a new platform,
|
||||
consider forking it and relaxing below helpers as appropriate. **/
|
||||
#define __TBB_control_consistency_helper() OSMemoryBarrier()
|
||||
#define __TBB_acquire_consistency_helper() OSMemoryBarrier()
|
||||
#define __TBB_release_consistency_helper() OSMemoryBarrier()
|
||||
#define __TBB_full_memory_fence() OSMemoryBarrier()
|
||||
|
||||
static inline int32_t __TBB_machine_cmpswp4(volatile void *ptr, int32_t value, int32_t comparand)
|
||||
{
|
||||
__TBB_ASSERT( tbb::internal::is_aligned(ptr,4), "address not properly aligned for OS X* atomics");
|
||||
int32_t* address = (int32_t*)ptr;
|
||||
while( !OSAtomicCompareAndSwap32Barrier(comparand, value, address) ){
|
||||
int32_t snapshot = *address;
|
||||
if( snapshot!=comparand ) return snapshot;
|
||||
}
|
||||
return comparand;
|
||||
}
|
||||
|
||||
static inline int32_t __TBB_machine_fetchadd4(volatile void *ptr, int32_t addend)
|
||||
{
|
||||
__TBB_ASSERT( tbb::internal::is_aligned(ptr,4), "address not properly aligned for OS X* atomics");
|
||||
return OSAtomicAdd32Barrier(addend, (int32_t*)ptr) - addend;
|
||||
}
|
||||
|
||||
static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t addend)
|
||||
{
|
||||
__TBB_ASSERT( tbb::internal::is_aligned(ptr,8), "address not properly aligned for OS X* atomics");
|
||||
return OSAtomicAdd64Barrier(addend, (int64_t*)ptr) - addend;
|
||||
}
|
||||
|
||||
#define __TBB_USE_GENERIC_PART_WORD_CAS 1
|
||||
#define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 1
|
||||
#define __TBB_USE_GENERIC_FETCH_STORE 1
|
||||
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
|
||||
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
|
||||
#if __TBB_WORDSIZE == 4
|
||||
#define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1
|
||||
#endif
|
||||
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
|
||||
|
||||
#endif /* __TBB_UnknownArchitecture */
|
||||
61
Research/inc/tbb/machine/mic_common.h
Normal file
61
Research/inc/tbb/machine/mic_common.h
Normal file
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
|
||||
you can redistribute it and/or modify it under the terms of the GNU General Public License
|
||||
version 2 as published by the Free Software Foundation. Threading Building Blocks is
|
||||
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
|
||||
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
See the GNU General Public License for more details. You should have received a copy of
|
||||
the GNU General Public License along with Threading Building Blocks; if not, write to the
|
||||
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
As a special exception, you may use this file as part of a free software library without
|
||||
restriction. Specifically, if other files instantiate templates or use macros or inline
|
||||
functions from this file, or you compile this file and link it with other files to produce
|
||||
an executable, this file does not by itself cause the resulting executable to be covered
|
||||
by the GNU General Public License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General Public License.
|
||||
*/
|
||||
|
||||
#ifndef __TBB_mic_common_H
|
||||
#define __TBB_mic_common_H
|
||||
|
||||
#ifndef __TBB_machine_H
|
||||
#error Do not #include this internal file directly; use public TBB headers instead.
|
||||
#endif
|
||||
|
||||
#if ! __TBB_DEFINE_MIC
|
||||
#error mic_common.h should be included only when building for Intel(R) Many Integrated Core Architecture
|
||||
#endif
|
||||
|
||||
#ifndef __TBB_PREFETCHING
|
||||
#define __TBB_PREFETCHING 1
|
||||
#endif
|
||||
#if __TBB_PREFETCHING
|
||||
#include <immintrin.h>
|
||||
#define __TBB_cl_prefetch(p) _mm_prefetch((const char*)p, _MM_HINT_T1)
|
||||
#define __TBB_cl_evict(p) _mm_clevict(p, _MM_HINT_T1)
|
||||
#endif
|
||||
|
||||
/** Intel(R) Many Integrated Core Architecture does not support mfence and pause instructions **/
|
||||
#define __TBB_full_memory_fence() __asm__ __volatile__("lock; addl $0,(%%rsp)":::"memory")
|
||||
#define __TBB_Pause(x) _mm_delay_32(16*(x))
|
||||
#define __TBB_STEALING_PAUSE 1500/16
|
||||
#include <sched.h>
|
||||
#define __TBB_Yield() sched_yield()
|
||||
|
||||
// low-level timing intrinsic and its type
|
||||
#define __TBB_machine_time_stamp() _rdtsc()
|
||||
typedef uint64_t machine_tsc_t;
|
||||
|
||||
/** Specifics **/
|
||||
#define __TBB_STEALING_ABORT_ON_CONTENTION 1
|
||||
#define __TBB_YIELD2P 1
|
||||
#define __TBB_HOARD_NONLOCAL_TASKS 1
|
||||
|
||||
#if ! ( __FreeBSD__ || __linux__ )
|
||||
#error Intel(R) Many Integrated Core Compiler does not define __FreeBSD__ or __linux__ anymore. Check for the __TBB_XXX_BROKEN defined under __FreeBSD__ or __linux__.
|
||||
#endif /* ! ( __FreeBSD__ || __linux__ ) */
|
||||
|
||||
#endif /* __TBB_mic_common_H */
|
||||
171
Research/inc/tbb/machine/msvc_armv7.h
Normal file
171
Research/inc/tbb/machine/msvc_armv7.h
Normal file
@@ -0,0 +1,171 @@
|
||||
/*
|
||||
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
|
||||
you can redistribute it and/or modify it under the terms of the GNU General Public License
|
||||
version 2 as published by the Free Software Foundation. Threading Building Blocks is
|
||||
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
|
||||
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
See the GNU General Public License for more details. You should have received a copy of
|
||||
the GNU General Public License along with Threading Building Blocks; if not, write to the
|
||||
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
As a special exception, you may use this file as part of a free software library without
|
||||
restriction. Specifically, if other files instantiate templates or use macros or inline
|
||||
functions from this file, or you compile this file and link it with other files to produce
|
||||
an executable, this file does not by itself cause the resulting executable to be covered
|
||||
by the GNU General Public License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General Public License.
|
||||
*/
|
||||
|
||||
#if !defined(__TBB_machine_H) || defined(__TBB_msvc_armv7_H)
|
||||
#error Do not #include this internal file directly; use public TBB headers instead.
|
||||
#endif
|
||||
|
||||
#define __TBB_msvc_armv7_H
|
||||
|
||||
#include <intrin.h>
|
||||
#include <float.h>
|
||||
|
||||
#define __TBB_WORDSIZE 4
|
||||
|
||||
#define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
|
||||
|
||||
#if defined(TBB_WIN32_USE_CL_BUILTINS)
|
||||
// We can test this on _M_IX86
|
||||
#pragma intrinsic(_ReadWriteBarrier)
|
||||
#pragma intrinsic(_mm_mfence)
|
||||
#define __TBB_compiler_fence() _ReadWriteBarrier()
|
||||
#define __TBB_full_memory_fence() _mm_mfence()
|
||||
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
|
||||
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
|
||||
#define __TBB_release_consistency_helper() __TBB_compiler_fence()
|
||||
#else
|
||||
//Now __dmb(_ARM_BARRIER_SY) is used for both compiler and memory fences
|
||||
//This might be changed later after testing
|
||||
#define __TBB_compiler_fence() __dmb(_ARM_BARRIER_SY)
|
||||
#define __TBB_full_memory_fence() __dmb(_ARM_BARRIER_SY)
|
||||
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
|
||||
#define __TBB_acquire_consistency_helper() __TBB_full_memory_fence()
|
||||
#define __TBB_release_consistency_helper() __TBB_full_memory_fence()
|
||||
#endif
|
||||
|
||||
//--------------------------------------------------
|
||||
// Compare and swap
|
||||
//--------------------------------------------------
|
||||
|
||||
/**
|
||||
* Atomic CAS for 32 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
|
||||
* @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
|
||||
* @param value value to assign *ptr to if *ptr==comparand
|
||||
* @param comparand value to compare with *ptr
|
||||
* @return value originally in memory at ptr, regardless of success
|
||||
*/
|
||||
|
||||
#define __TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(S,T,F) \
|
||||
inline T __TBB_machine_cmpswp##S( volatile void *ptr, T value, T comparand ) { \
|
||||
return _InterlockedCompareExchange##F(reinterpret_cast<volatile T *>(ptr),value,comparand); \
|
||||
} \
|
||||
|
||||
#define __TBB_MACHINE_DEFINE_ATOMICS_FETCHADD(S,T,F) \
|
||||
inline T __TBB_machine_fetchadd##S( volatile void *ptr, T value ) { \
|
||||
return _InterlockedExchangeAdd##F(reinterpret_cast<volatile T *>(ptr),value); \
|
||||
} \
|
||||
|
||||
__TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(1,char,8)
|
||||
__TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(2,short,16)
|
||||
__TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(4,long,)
|
||||
__TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(8,__int64,64)
|
||||
__TBB_MACHINE_DEFINE_ATOMICS_FETCHADD(4,long,)
|
||||
#if defined(TBB_WIN32_USE_CL_BUILTINS)
|
||||
// No _InterlockedExchangeAdd64 intrinsic on _M_IX86
|
||||
#define __TBB_64BIT_ATOMICS 0
|
||||
#else
|
||||
__TBB_MACHINE_DEFINE_ATOMICS_FETCHADD(8,__int64,64)
|
||||
#endif
|
||||
|
||||
inline void __TBB_machine_pause (int32_t delay )
|
||||
{
|
||||
while(delay>0)
|
||||
{
|
||||
__TBB_compiler_fence();
|
||||
delay--;
|
||||
}
|
||||
}
|
||||
|
||||
// API to retrieve/update FPU control setting
|
||||
#define __TBB_CPU_CTL_ENV_PRESENT 1
|
||||
|
||||
namespace tbb {
|
||||
namespace internal {
|
||||
|
||||
template <typename T, size_t S>
|
||||
struct machine_load_store_relaxed {
|
||||
static inline T load ( const volatile T& location ) {
|
||||
const T value = location;
|
||||
|
||||
/*
|
||||
* An extra memory barrier is required for errata #761319
|
||||
* Please see http://infocenter.arm.com/help/topic/com.arm.doc.uan0004a
|
||||
*/
|
||||
__TBB_acquire_consistency_helper();
|
||||
return value;
|
||||
}
|
||||
|
||||
static inline void store ( volatile T& location, T value ) {
|
||||
location = value;
|
||||
}
|
||||
};
|
||||
|
||||
class cpu_ctl_env {
|
||||
private:
|
||||
unsigned int my_ctl;
|
||||
public:
|
||||
bool operator!=( const cpu_ctl_env& ctl ) const { return my_ctl != ctl.my_ctl; }
|
||||
void get_env() { my_ctl = _control87(0, 0); }
|
||||
void set_env() const { _control87( my_ctl, ~0U ); }
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespaces tbb
|
||||
|
||||
// Machine specific atomic operations
|
||||
#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
|
||||
#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
|
||||
#define __TBB_Pause(V) __TBB_machine_pause(V)
|
||||
|
||||
// Use generics for some things
|
||||
#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1
|
||||
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
|
||||
#define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 1
|
||||
#define __TBB_USE_GENERIC_PART_WORD_FETCH_STORE 1
|
||||
#define __TBB_USE_GENERIC_FETCH_STORE 1
|
||||
#define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1
|
||||
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
|
||||
|
||||
#if defined(TBB_WIN32_USE_CL_BUILTINS)
|
||||
#if !__TBB_WIN8UI_SUPPORT
|
||||
extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
|
||||
#define __TBB_Yield() SwitchToThread()
|
||||
#else
|
||||
#include<thread>
|
||||
#define __TBB_Yield() std::this_thread::yield()
|
||||
#endif
|
||||
#else
|
||||
#define __TBB_Yield() __yield()
|
||||
#endif
|
||||
|
||||
// Machine specific atomic operations
|
||||
#define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V)
|
||||
#define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V)
|
||||
|
||||
template <typename T1,typename T2>
|
||||
inline void __TBB_machine_OR( T1 *operand, T2 addend ) {
|
||||
_InterlockedOr((long volatile *)operand, (long)addend);
|
||||
}
|
||||
|
||||
template <typename T1,typename T2>
|
||||
inline void __TBB_machine_AND( T1 *operand, T2 addend ) {
|
||||
_InterlockedAnd((long volatile *)operand, (long)addend);
|
||||
}
|
||||
|
||||
216
Research/inc/tbb/machine/msvc_ia32_common.h
Normal file
216
Research/inc/tbb/machine/msvc_ia32_common.h
Normal file
@@ -0,0 +1,216 @@
|
||||
/*
|
||||
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
|
||||
you can redistribute it and/or modify it under the terms of the GNU General Public License
|
||||
version 2 as published by the Free Software Foundation. Threading Building Blocks is
|
||||
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
|
||||
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
See the GNU General Public License for more details. You should have received a copy of
|
||||
the GNU General Public License along with Threading Building Blocks; if not, write to the
|
||||
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
As a special exception, you may use this file as part of a free software library without
|
||||
restriction. Specifically, if other files instantiate templates or use macros or inline
|
||||
functions from this file, or you compile this file and link it with other files to produce
|
||||
an executable, this file does not by itself cause the resulting executable to be covered
|
||||
by the GNU General Public License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General Public License.
|
||||
*/
|
||||
|
||||
#ifndef __TBB_machine_msvc_ia32_common_H
|
||||
#define __TBB_machine_msvc_ia32_common_H
|
||||
|
||||
#include <intrin.h>
|
||||
|
||||
//TODO: consider moving this macro to tbb_config.h and used there MSVC asm is used
|
||||
#if !_M_X64 || __INTEL_COMPILER
|
||||
#define __TBB_X86_MSVC_INLINE_ASM_AVAILABLE 1
|
||||
|
||||
#if _M_X64
|
||||
#define __TBB_r(reg_name) r##reg_name
|
||||
#else
|
||||
#define __TBB_r(reg_name) e##reg_name
|
||||
#endif
|
||||
#else
|
||||
//MSVC in x64 mode does not accept inline assembler
|
||||
#define __TBB_X86_MSVC_INLINE_ASM_AVAILABLE 0
|
||||
#endif
|
||||
|
||||
#define __TBB_NO_X86_MSVC_INLINE_ASM_MSG "The compiler being used is not supported (outdated?)"
|
||||
|
||||
#if (_MSC_VER >= 1300) || (__INTEL_COMPILER) //Use compiler intrinsic when available
|
||||
#define __TBB_PAUSE_USE_INTRINSIC 1
|
||||
#pragma intrinsic(_mm_pause)
|
||||
namespace tbb { namespace internal { namespace intrinsics { namespace msvc {
|
||||
static inline void __TBB_machine_pause (uintptr_t delay ) {
|
||||
for (;delay>0; --delay )
|
||||
_mm_pause();
|
||||
}
|
||||
}}}}
|
||||
#else
|
||||
#if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
|
||||
#error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
|
||||
#endif
|
||||
|
||||
namespace tbb { namespace internal { namespace inline_asm { namespace msvc {
|
||||
static inline void __TBB_machine_pause (uintptr_t delay ) {
|
||||
_asm
|
||||
{
|
||||
mov __TBB_r(ax), delay
|
||||
__TBB_L1:
|
||||
pause
|
||||
add __TBB_r(ax), -1
|
||||
jne __TBB_L1
|
||||
}
|
||||
return;
|
||||
}
|
||||
}}}}
|
||||
#endif
|
||||
|
||||
static inline void __TBB_machine_pause (uintptr_t delay ){
|
||||
#if __TBB_PAUSE_USE_INTRINSIC
|
||||
tbb::internal::intrinsics::msvc::__TBB_machine_pause(delay);
|
||||
#else
|
||||
tbb::internal::inline_asm::msvc::__TBB_machine_pause(delay);
|
||||
#endif
|
||||
}
|
||||
|
||||
//TODO: move this function to windows_api.h or to place where it is used
|
||||
#if (_MSC_VER<1400) && (!_WIN64) && (__TBB_X86_MSVC_INLINE_ASM_AVAILABLE)
|
||||
static inline void* __TBB_machine_get_current_teb () {
|
||||
void* pteb;
|
||||
__asm mov eax, fs:[0x18]
|
||||
__asm mov pteb, eax
|
||||
return pteb;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if ( _MSC_VER>=1400 && !defined(__INTEL_COMPILER) ) || (__INTEL_COMPILER>=1200)
|
||||
// MSVC did not have this intrinsic prior to VC8.
|
||||
// ICL 11.1 fails to compile a TBB example if __TBB_Log2 uses the intrinsic.
|
||||
#define __TBB_LOG2_USE_BSR_INTRINSIC 1
|
||||
#if _M_X64
|
||||
#define __TBB_BSR_INTRINSIC _BitScanReverse64
|
||||
#else
|
||||
#define __TBB_BSR_INTRINSIC _BitScanReverse
|
||||
#endif
|
||||
#pragma intrinsic(__TBB_BSR_INTRINSIC)
|
||||
|
||||
namespace tbb { namespace internal { namespace intrinsics { namespace msvc {
|
||||
inline uintptr_t __TBB_machine_lg( uintptr_t i ){
|
||||
unsigned long j;
|
||||
__TBB_BSR_INTRINSIC( &j, i );
|
||||
return j;
|
||||
}
|
||||
}}}}
|
||||
#else
|
||||
#if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
|
||||
#error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
|
||||
#endif
|
||||
|
||||
namespace tbb { namespace internal { namespace inline_asm { namespace msvc {
|
||||
inline uintptr_t __TBB_machine_lg( uintptr_t i ){
|
||||
uintptr_t j;
|
||||
__asm
|
||||
{
|
||||
bsr __TBB_r(ax), i
|
||||
mov j, __TBB_r(ax)
|
||||
}
|
||||
return j;
|
||||
}
|
||||
}}}}
|
||||
#endif
|
||||
|
||||
static inline intptr_t __TBB_machine_lg( uintptr_t i ) {
|
||||
#if __TBB_LOG2_USE_BSR_INTRINSIC
|
||||
return tbb::internal::intrinsics::msvc::__TBB_machine_lg(i);
|
||||
#else
|
||||
return tbb::internal::inline_asm::msvc::__TBB_machine_lg(i);
|
||||
#endif
|
||||
}
|
||||
|
||||
// API to retrieve/update FPU control setting
|
||||
#define __TBB_CPU_CTL_ENV_PRESENT 1
|
||||
|
||||
namespace tbb { namespace internal { class cpu_ctl_env; } }
|
||||
#if __TBB_X86_MSVC_INLINE_ASM_AVAILABLE
|
||||
inline void __TBB_get_cpu_ctl_env ( tbb::internal::cpu_ctl_env* ctl ) {
|
||||
__asm {
|
||||
__asm mov __TBB_r(ax), ctl
|
||||
__asm stmxcsr [__TBB_r(ax)]
|
||||
__asm fstcw [__TBB_r(ax)+4]
|
||||
}
|
||||
}
|
||||
inline void __TBB_set_cpu_ctl_env ( const tbb::internal::cpu_ctl_env* ctl ) {
|
||||
__asm {
|
||||
__asm mov __TBB_r(ax), ctl
|
||||
__asm ldmxcsr [__TBB_r(ax)]
|
||||
__asm fldcw [__TBB_r(ax)+4]
|
||||
}
|
||||
}
|
||||
#else
|
||||
extern "C" {
|
||||
void __TBB_EXPORTED_FUNC __TBB_get_cpu_ctl_env ( tbb::internal::cpu_ctl_env* );
|
||||
void __TBB_EXPORTED_FUNC __TBB_set_cpu_ctl_env ( const tbb::internal::cpu_ctl_env* );
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace tbb {
|
||||
namespace internal {
|
||||
class cpu_ctl_env {
|
||||
private:
|
||||
int mxcsr;
|
||||
short x87cw;
|
||||
static const int MXCSR_CONTROL_MASK = ~0x3f; /* all except last six status bits */
|
||||
public:
|
||||
bool operator!=( const cpu_ctl_env& ctl ) const { return mxcsr != ctl.mxcsr || x87cw != ctl.x87cw; }
|
||||
void get_env() {
|
||||
__TBB_get_cpu_ctl_env( this );
|
||||
mxcsr &= MXCSR_CONTROL_MASK;
|
||||
}
|
||||
void set_env() const { __TBB_set_cpu_ctl_env( this ); }
|
||||
};
|
||||
} // namespace internal
|
||||
} // namespace tbb
|
||||
|
||||
#if !__TBB_WIN8UI_SUPPORT
|
||||
extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
|
||||
#define __TBB_Yield() SwitchToThread()
|
||||
#else
|
||||
#include<thread>
|
||||
#define __TBB_Yield() std::this_thread::yield()
|
||||
#endif
|
||||
|
||||
#define __TBB_Pause(V) __TBB_machine_pause(V)
|
||||
#define __TBB_Log2(V) __TBB_machine_lg(V)
|
||||
|
||||
#undef __TBB_r
|
||||
|
||||
extern "C" {
|
||||
__int8 __TBB_EXPORTED_FUNC __TBB_machine_try_lock_elided (volatile void* ptr);
|
||||
void __TBB_EXPORTED_FUNC __TBB_machine_unlock_elided (volatile void* ptr);
|
||||
|
||||
// 'pause' instruction aborts HLE/RTM transactions
|
||||
#if __TBB_PAUSE_USE_INTRINSIC
|
||||
inline static void __TBB_machine_try_lock_elided_cancel() { _mm_pause(); }
|
||||
#else
|
||||
inline static void __TBB_machine_try_lock_elided_cancel() { _asm pause; }
|
||||
#endif
|
||||
|
||||
#if __TBB_TSX_INTRINSICS_PRESENT
|
||||
#define __TBB_machine_is_in_transaction _xtest
|
||||
#define __TBB_machine_begin_transaction _xbegin
|
||||
#define __TBB_machine_end_transaction _xend
|
||||
// The value (0xFF) below comes from the
|
||||
// Intel(R) 64 and IA-32 Architectures Optimization Reference Manual 12.4.5 lock not free
|
||||
#define __TBB_machine_transaction_conflict_abort() _xabort(0xFF)
|
||||
#else
|
||||
__int8 __TBB_EXPORTED_FUNC __TBB_machine_is_in_transaction();
|
||||
unsigned __int32 __TBB_EXPORTED_FUNC __TBB_machine_begin_transaction();
|
||||
void __TBB_EXPORTED_FUNC __TBB_machine_end_transaction();
|
||||
void __TBB_EXPORTED_FUNC __TBB_machine_transaction_conflict_abort();
|
||||
#endif /* __TBB_TSX_INTRINSICS_PRESENT */
|
||||
}
|
||||
|
||||
#endif /* __TBB_machine_msvc_ia32_common_H */
|
||||
203
Research/inc/tbb/machine/sunos_sparc.h
Normal file
203
Research/inc/tbb/machine/sunos_sparc.h
Normal file
@@ -0,0 +1,203 @@
|
||||
/*
|
||||
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
|
||||
you can redistribute it and/or modify it under the terms of the GNU General Public License
|
||||
version 2 as published by the Free Software Foundation. Threading Building Blocks is
|
||||
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
|
||||
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
See the GNU General Public License for more details. You should have received a copy of
|
||||
the GNU General Public License along with Threading Building Blocks; if not, write to the
|
||||
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
As a special exception, you may use this file as part of a free software library without
|
||||
restriction. Specifically, if other files instantiate templates or use macros or inline
|
||||
functions from this file, or you compile this file and link it with other files to produce
|
||||
an executable, this file does not by itself cause the resulting executable to be covered
|
||||
by the GNU General Public License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General Public License.
|
||||
*/
|
||||
|
||||
|
||||
#if !defined(__TBB_machine_H) || defined(__TBB_machine_sunos_sparc_H)
|
||||
#error Do not #include this internal file directly; use public TBB headers instead.
|
||||
#endif
|
||||
|
||||
#define __TBB_machine_sunos_sparc_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define __TBB_WORDSIZE 8
|
||||
// Big endian is assumed for SPARC.
|
||||
// While hardware may support page-specific bi-endianness, only big endian pages may be exposed to TBB
|
||||
#define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
|
||||
|
||||
/** To those working on SPARC hardware. Consider relaxing acquire and release
|
||||
consistency helpers to no-op (as this port covers TSO mode only). **/
|
||||
#define __TBB_compiler_fence() __asm__ __volatile__ ("": : :"memory")
|
||||
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
|
||||
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
|
||||
#define __TBB_release_consistency_helper() __TBB_compiler_fence()
|
||||
#define __TBB_full_memory_fence() __asm__ __volatile__("membar #LoadLoad|#LoadStore|#StoreStore|#StoreLoad": : : "memory")
|
||||
|
||||
//--------------------------------------------------
|
||||
// Compare and swap
|
||||
//--------------------------------------------------
|
||||
|
||||
/**
|
||||
* Atomic CAS for 32 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
|
||||
* @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
|
||||
* @param value value to assign *ptr to if *ptr==comparand
|
||||
* @param comparand value to compare with *ptr
|
||||
( @return value originally in memory at ptr, regardless of success
|
||||
*/
|
||||
static inline int32_t __TBB_machine_cmpswp4(volatile void *ptr, int32_t value, int32_t comparand ){
|
||||
int32_t result;
|
||||
__asm__ __volatile__(
|
||||
"cas\t[%5],%4,%1"
|
||||
: "=m"(*(int32_t *)ptr), "=r"(result)
|
||||
: "m"(*(int32_t *)ptr), "1"(value), "r"(comparand), "r"(ptr)
|
||||
: "memory");
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomic CAS for 64 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
|
||||
* @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
|
||||
* @param value value to assign *ptr to if *ptr==comparand
|
||||
* @param comparand value to compare with *ptr
|
||||
( @return value originally in memory at ptr, regardless of success
|
||||
*/
|
||||
static inline int64_t __TBB_machine_cmpswp8(volatile void *ptr, int64_t value, int64_t comparand ){
|
||||
int64_t result;
|
||||
__asm__ __volatile__(
|
||||
"casx\t[%5],%4,%1"
|
||||
: "=m"(*(int64_t *)ptr), "=r"(result)
|
||||
: "m"(*(int64_t *)ptr), "1"(value), "r"(comparand), "r"(ptr)
|
||||
: "memory");
|
||||
return result;
|
||||
}
|
||||
|
||||
//---------------------------------------------------
|
||||
// Fetch and add
|
||||
//---------------------------------------------------
|
||||
|
||||
/**
|
||||
* Atomic fetch and add for 32 bit values, in this case implemented by continuously checking success of atomicity
|
||||
* @param ptr pointer to value to add addend to
|
||||
* @param addened value to add to *ptr
|
||||
* @return value at ptr before addened was added
|
||||
*/
|
||||
static inline int32_t __TBB_machine_fetchadd4(volatile void *ptr, int32_t addend){
|
||||
int32_t result;
|
||||
__asm__ __volatile__ (
|
||||
"0:\t add\t %3, %4, %0\n" // do addition
|
||||
"\t cas\t [%2], %3, %0\n" // cas to store result in memory
|
||||
"\t cmp\t %3, %0\n" // check if value from memory is original
|
||||
"\t bne,a,pn\t %%icc, 0b\n" // if not try again
|
||||
"\t mov %0, %3\n" // use branch delay slot to move new value in memory to be added
|
||||
: "=&r"(result), "=m"(*(int32_t *)ptr)
|
||||
: "r"(ptr), "r"(*(int32_t *)ptr), "r"(addend), "m"(*(int32_t *)ptr)
|
||||
: "ccr", "memory");
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomic fetch and add for 64 bit values, in this case implemented by continuously checking success of atomicity
|
||||
* @param ptr pointer to value to add addend to
|
||||
* @param addened value to add to *ptr
|
||||
* @return value at ptr before addened was added
|
||||
*/
|
||||
static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t addend){
|
||||
int64_t result;
|
||||
__asm__ __volatile__ (
|
||||
"0:\t add\t %3, %4, %0\n" // do addition
|
||||
"\t casx\t [%2], %3, %0\n" // cas to store result in memory
|
||||
"\t cmp\t %3, %0\n" // check if value from memory is original
|
||||
"\t bne,a,pn\t %%xcc, 0b\n" // if not try again
|
||||
"\t mov %0, %3\n" // use branch delay slot to move new value in memory to be added
|
||||
: "=&r"(result), "=m"(*(int64_t *)ptr)
|
||||
: "r"(ptr), "r"(*(int64_t *)ptr), "r"(addend), "m"(*(int64_t *)ptr)
|
||||
: "ccr", "memory");
|
||||
return result;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------
|
||||
// Logarithm (base two, integer)
|
||||
//--------------------------------------------------------
|
||||
|
||||
static inline int64_t __TBB_machine_lg( uint64_t x ) {
|
||||
__TBB_ASSERT(x, "__TBB_Log2(0) undefined");
|
||||
uint64_t count;
|
||||
// one hot encode
|
||||
x |= (x >> 1);
|
||||
x |= (x >> 2);
|
||||
x |= (x >> 4);
|
||||
x |= (x >> 8);
|
||||
x |= (x >> 16);
|
||||
x |= (x >> 32);
|
||||
// count 1's
|
||||
__asm__ ("popc %1, %0" : "=r"(count) : "r"(x) );
|
||||
return count-1;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------
|
||||
|
||||
static inline void __TBB_machine_or( volatile void *ptr, uint64_t value ) {
|
||||
__asm__ __volatile__ (
|
||||
"0:\t or\t %2, %3, %%g1\n" // do operation
|
||||
"\t casx\t [%1], %2, %%g1\n" // cas to store result in memory
|
||||
"\t cmp\t %2, %%g1\n" // check if value from memory is original
|
||||
"\t bne,a,pn\t %%xcc, 0b\n" // if not try again
|
||||
"\t mov %%g1, %2\n" // use branch delay slot to move new value in memory to be added
|
||||
: "=m"(*(int64_t *)ptr)
|
||||
: "r"(ptr), "r"(*(int64_t *)ptr), "r"(value), "m"(*(int64_t *)ptr)
|
||||
: "ccr", "g1", "memory");
|
||||
}
|
||||
|
||||
static inline void __TBB_machine_and( volatile void *ptr, uint64_t value ) {
|
||||
__asm__ __volatile__ (
|
||||
"0:\t and\t %2, %3, %%g1\n" // do operation
|
||||
"\t casx\t [%1], %2, %%g1\n" // cas to store result in memory
|
||||
"\t cmp\t %2, %%g1\n" // check if value from memory is original
|
||||
"\t bne,a,pn\t %%xcc, 0b\n" // if not try again
|
||||
"\t mov %%g1, %2\n" // use branch delay slot to move new value in memory to be added
|
||||
: "=m"(*(int64_t *)ptr)
|
||||
: "r"(ptr), "r"(*(int64_t *)ptr), "r"(value), "m"(*(int64_t *)ptr)
|
||||
: "ccr", "g1", "memory");
|
||||
}
|
||||
|
||||
|
||||
static inline void __TBB_machine_pause( int32_t delay ) {
|
||||
// do nothing, inlined, doesn't matter
|
||||
}
|
||||
|
||||
// put 0xff in memory location, return memory value,
|
||||
// generic trylockbyte puts 0x01, however this is fine
|
||||
// because all that matters is that 0 is unlocked
|
||||
static inline bool __TBB_machine_trylockbyte(unsigned char &flag){
|
||||
unsigned char result;
|
||||
__asm__ __volatile__ (
|
||||
"ldstub\t [%2], %0\n"
|
||||
: "=r"(result), "=m"(flag)
|
||||
: "r"(&flag), "m"(flag)
|
||||
: "memory");
|
||||
return result == 0;
|
||||
}
|
||||
|
||||
#define __TBB_USE_GENERIC_PART_WORD_CAS 1
|
||||
#define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 1
|
||||
#define __TBB_USE_GENERIC_FETCH_STORE 1
|
||||
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
|
||||
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
|
||||
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
|
||||
|
||||
#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
|
||||
#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
|
||||
|
||||
// Definition of other functions
|
||||
#define __TBB_Pause(V) __TBB_machine_pause(V)
|
||||
#define __TBB_Log2(V) __TBB_machine_lg(V)
|
||||
|
||||
#define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)
|
||||
79
Research/inc/tbb/machine/windows_api.h
Normal file
79
Research/inc/tbb/machine/windows_api.h
Normal file
@@ -0,0 +1,79 @@
|
||||
/*
|
||||
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
|
||||
you can redistribute it and/or modify it under the terms of the GNU General Public License
|
||||
version 2 as published by the Free Software Foundation. Threading Building Blocks is
|
||||
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
|
||||
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
See the GNU General Public License for more details. You should have received a copy of
|
||||
the GNU General Public License along with Threading Building Blocks; if not, write to the
|
||||
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
As a special exception, you may use this file as part of a free software library without
|
||||
restriction. Specifically, if other files instantiate templates or use macros or inline
|
||||
functions from this file, or you compile this file and link it with other files to produce
|
||||
an executable, this file does not by itself cause the resulting executable to be covered
|
||||
by the GNU General Public License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General Public License.
|
||||
*/
|
||||
|
||||
#ifndef __TBB_machine_windows_api_H
|
||||
#define __TBB_machine_windows_api_H
|
||||
|
||||
#if _WIN32 || _WIN64
|
||||
|
||||
#if _XBOX
|
||||
|
||||
#define NONET
|
||||
#define NOD3D
|
||||
#include <xtl.h>
|
||||
|
||||
#else // Assume "usual" Windows
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
#endif // _XBOX
|
||||
|
||||
#if _WIN32_WINNT < 0x0600
|
||||
// The following Windows API function is declared explicitly;
|
||||
// otherwise it fails to compile by VS2005.
|
||||
#if !defined(WINBASEAPI) || (_WIN32_WINNT < 0x0501 && _MSC_VER == 1400)
|
||||
#define __TBB_WINBASEAPI extern "C"
|
||||
#else
|
||||
#define __TBB_WINBASEAPI WINBASEAPI
|
||||
#endif
|
||||
__TBB_WINBASEAPI BOOL WINAPI TryEnterCriticalSection( LPCRITICAL_SECTION );
|
||||
__TBB_WINBASEAPI BOOL WINAPI InitializeCriticalSectionAndSpinCount( LPCRITICAL_SECTION, DWORD );
|
||||
// Overloading WINBASEAPI macro and using local functions missing in Windows XP/2003
|
||||
#define InitializeCriticalSectionEx inlineInitializeCriticalSectionEx
|
||||
#define CreateSemaphoreEx inlineCreateSemaphoreEx
|
||||
#define CreateEventEx inlineCreateEventEx
|
||||
inline BOOL WINAPI inlineInitializeCriticalSectionEx( LPCRITICAL_SECTION lpCriticalSection, DWORD dwSpinCount, DWORD )
|
||||
{
|
||||
return InitializeCriticalSectionAndSpinCount( lpCriticalSection, dwSpinCount );
|
||||
}
|
||||
inline HANDLE WINAPI inlineCreateSemaphoreEx( LPSECURITY_ATTRIBUTES lpSemaphoreAttributes, LONG lInitialCount, LONG lMaximumCount, LPCTSTR lpName, DWORD, DWORD )
|
||||
{
|
||||
return CreateSemaphore( lpSemaphoreAttributes, lInitialCount, lMaximumCount, lpName );
|
||||
}
|
||||
inline HANDLE WINAPI inlineCreateEventEx( LPSECURITY_ATTRIBUTES lpEventAttributes, LPCTSTR lpName, DWORD dwFlags, DWORD )
|
||||
{
|
||||
BOOL manual_reset = dwFlags&0x00000001 ? TRUE : FALSE; // CREATE_EVENT_MANUAL_RESET
|
||||
BOOL initial_set = dwFlags&0x00000002 ? TRUE : FALSE; // CREATE_EVENT_INITIAL_SET
|
||||
return CreateEvent( lpEventAttributes, manual_reset, initial_set, lpName );
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(RTL_SRWLOCK_INIT)
|
||||
#ifndef __TBB_USE_SRWLOCK
|
||||
// TODO: turn it on when bug 1952 will be fixed
|
||||
#define __TBB_USE_SRWLOCK 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#else
|
||||
#error tbb/machine/windows_api.h should only be used for Windows based platforms
|
||||
#endif // _WIN32 || _WIN64
|
||||
|
||||
#endif // __TBB_machine_windows_api_H
|
||||
144
Research/inc/tbb/machine/windows_ia32.h
Normal file
144
Research/inc/tbb/machine/windows_ia32.h
Normal file
@@ -0,0 +1,144 @@
|
||||
/*
|
||||
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
|
||||
you can redistribute it and/or modify it under the terms of the GNU General Public License
|
||||
version 2 as published by the Free Software Foundation. Threading Building Blocks is
|
||||
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
|
||||
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
See the GNU General Public License for more details. You should have received a copy of
|
||||
the GNU General Public License along with Threading Building Blocks; if not, write to the
|
||||
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
As a special exception, you may use this file as part of a free software library without
|
||||
restriction. Specifically, if other files instantiate templates or use macros or inline
|
||||
functions from this file, or you compile this file and link it with other files to produce
|
||||
an executable, this file does not by itself cause the resulting executable to be covered
|
||||
by the GNU General Public License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General Public License.
|
||||
*/
|
||||
|
||||
#if !defined(__TBB_machine_H) || defined(__TBB_machine_windows_ia32_H)
|
||||
#error Do not #include this internal file directly; use public TBB headers instead.
|
||||
#endif
|
||||
|
||||
#define __TBB_machine_windows_ia32_H
|
||||
|
||||
#include "msvc_ia32_common.h"
|
||||
|
||||
#define __TBB_WORDSIZE 4
|
||||
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
|
||||
|
||||
#if __INTEL_COMPILER && (__INTEL_COMPILER < 1100)
|
||||
#define __TBB_compiler_fence() __asm { __asm nop }
|
||||
#define __TBB_full_memory_fence() __asm { __asm mfence }
|
||||
#elif _MSC_VER >= 1300 || __INTEL_COMPILER
|
||||
#pragma intrinsic(_ReadWriteBarrier)
|
||||
#pragma intrinsic(_mm_mfence)
|
||||
#define __TBB_compiler_fence() _ReadWriteBarrier()
|
||||
#define __TBB_full_memory_fence() _mm_mfence()
|
||||
#else
|
||||
#error Unsupported compiler - need to define __TBB_{control,acquire,release}_consistency_helper to support it
|
||||
#endif
|
||||
|
||||
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
|
||||
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
|
||||
#define __TBB_release_consistency_helper() __TBB_compiler_fence()
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
|
||||
// Workaround for overzealous compiler warnings in /Wp64 mode
|
||||
#pragma warning (push)
|
||||
#pragma warning (disable: 4244 4267)
|
||||
#endif
|
||||
|
||||
extern "C" {
|
||||
__int64 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp8 (volatile void *ptr, __int64 value, __int64 comparand );
|
||||
__int64 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd8 (volatile void *ptr, __int64 addend );
|
||||
__int64 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore8 (volatile void *ptr, __int64 value );
|
||||
void __TBB_EXPORTED_FUNC __TBB_machine_store8 (volatile void *ptr, __int64 value );
|
||||
__int64 __TBB_EXPORTED_FUNC __TBB_machine_load8 (const volatile void *ptr);
|
||||
}
|
||||
|
||||
//TODO: use _InterlockedXXX intrinsics as they available since VC 2005
|
||||
#define __TBB_MACHINE_DEFINE_ATOMICS(S,T,U,A,C) \
|
||||
static inline T __TBB_machine_cmpswp##S ( volatile void * ptr, U value, U comparand ) { \
|
||||
T result; \
|
||||
volatile T *p = (T *)ptr; \
|
||||
__asm \
|
||||
{ \
|
||||
__asm mov edx, p \
|
||||
__asm mov C , value \
|
||||
__asm mov A , comparand \
|
||||
__asm lock cmpxchg [edx], C \
|
||||
__asm mov result, A \
|
||||
} \
|
||||
return result; \
|
||||
} \
|
||||
\
|
||||
static inline T __TBB_machine_fetchadd##S ( volatile void * ptr, U addend ) { \
|
||||
T result; \
|
||||
volatile T *p = (T *)ptr; \
|
||||
__asm \
|
||||
{ \
|
||||
__asm mov edx, p \
|
||||
__asm mov A, addend \
|
||||
__asm lock xadd [edx], A \
|
||||
__asm mov result, A \
|
||||
} \
|
||||
return result; \
|
||||
}\
|
||||
\
|
||||
static inline T __TBB_machine_fetchstore##S ( volatile void * ptr, U value ) { \
|
||||
T result; \
|
||||
volatile T *p = (T *)ptr; \
|
||||
__asm \
|
||||
{ \
|
||||
__asm mov edx, p \
|
||||
__asm mov A, value \
|
||||
__asm lock xchg [edx], A \
|
||||
__asm mov result, A \
|
||||
} \
|
||||
return result; \
|
||||
}
|
||||
|
||||
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(1, __int8, __int8, al, cl)
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(2, __int16, __int16, ax, cx)
|
||||
__TBB_MACHINE_DEFINE_ATOMICS(4, ptrdiff_t, ptrdiff_t, eax, ecx)
|
||||
|
||||
#undef __TBB_MACHINE_DEFINE_ATOMICS
|
||||
|
||||
static inline void __TBB_machine_OR( volatile void *operand, __int32 addend ) {
|
||||
__asm
|
||||
{
|
||||
mov eax, addend
|
||||
mov edx, [operand]
|
||||
lock or [edx], eax
|
||||
}
|
||||
}
|
||||
|
||||
static inline void __TBB_machine_AND( volatile void *operand, __int32 addend ) {
|
||||
__asm
|
||||
{
|
||||
mov eax, addend
|
||||
mov edx, [operand]
|
||||
lock and [edx], eax
|
||||
}
|
||||
}
|
||||
|
||||
#define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V)
|
||||
#define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V)
|
||||
|
||||
//TODO: Check if it possible and profitable for IA-32 architecture on (Linux and Windows)
|
||||
//to use of 64-bit load/store via floating point registers together with full fence
|
||||
//for sequentially consistent load/store, instead of CAS.
|
||||
#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1
|
||||
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
|
||||
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
|
||||
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
|
||||
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
|
||||
#pragma warning (pop)
|
||||
#endif // warnings 4244, 4267 are back
|
||||
|
||||
105
Research/inc/tbb/machine/windows_intel64.h
Normal file
105
Research/inc/tbb/machine/windows_intel64.h
Normal file
@@ -0,0 +1,105 @@
|
||||
/*
|
||||
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
|
||||
you can redistribute it and/or modify it under the terms of the GNU General Public License
|
||||
version 2 as published by the Free Software Foundation. Threading Building Blocks is
|
||||
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
|
||||
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
See the GNU General Public License for more details. You should have received a copy of
|
||||
the GNU General Public License along with Threading Building Blocks; if not, write to the
|
||||
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
As a special exception, you may use this file as part of a free software library without
|
||||
restriction. Specifically, if other files instantiate templates or use macros or inline
|
||||
functions from this file, or you compile this file and link it with other files to produce
|
||||
an executable, this file does not by itself cause the resulting executable to be covered
|
||||
by the GNU General Public License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General Public License.
|
||||
*/
|
||||
|
||||
#if !defined(__TBB_machine_H) || defined(__TBB_machine_windows_intel64_H)
|
||||
#error Do not #include this internal file directly; use public TBB headers instead.
|
||||
#endif
|
||||
|
||||
#define __TBB_machine_windows_intel64_H
|
||||
|
||||
#define __TBB_WORDSIZE 8
|
||||
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
|
||||
|
||||
#include <intrin.h>
|
||||
#include "msvc_ia32_common.h"
|
||||
|
||||
//TODO: Use _InterlockedXXX16 intrinsics for 2 byte operations
|
||||
#if !__INTEL_COMPILER
|
||||
#pragma intrinsic(_InterlockedOr64)
|
||||
#pragma intrinsic(_InterlockedAnd64)
|
||||
#pragma intrinsic(_InterlockedCompareExchange)
|
||||
#pragma intrinsic(_InterlockedCompareExchange64)
|
||||
#pragma intrinsic(_InterlockedExchangeAdd)
|
||||
#pragma intrinsic(_InterlockedExchangeAdd64)
|
||||
#pragma intrinsic(_InterlockedExchange)
|
||||
#pragma intrinsic(_InterlockedExchange64)
|
||||
#endif /* !(__INTEL_COMPILER) */
|
||||
|
||||
#if __INTEL_COMPILER && (__INTEL_COMPILER < 1100)
|
||||
#define __TBB_compiler_fence() __asm { __asm nop }
|
||||
#define __TBB_full_memory_fence() __asm { __asm mfence }
|
||||
#elif _MSC_VER >= 1300 || __INTEL_COMPILER
|
||||
#pragma intrinsic(_ReadWriteBarrier)
|
||||
#pragma intrinsic(_mm_mfence)
|
||||
#define __TBB_compiler_fence() _ReadWriteBarrier()
|
||||
#define __TBB_full_memory_fence() _mm_mfence()
|
||||
#endif
|
||||
|
||||
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
|
||||
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
|
||||
#define __TBB_release_consistency_helper() __TBB_compiler_fence()
|
||||
|
||||
// ATTENTION: if you ever change argument types in machine-specific primitives,
|
||||
// please take care of atomic_word<> specializations in tbb/atomic.h
|
||||
extern "C" {
|
||||
__int8 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp1 (volatile void *ptr, __int8 value, __int8 comparand );
|
||||
__int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd1 (volatile void *ptr, __int8 addend );
|
||||
__int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore1 (volatile void *ptr, __int8 value );
|
||||
__int16 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp2 (volatile void *ptr, __int16 value, __int16 comparand );
|
||||
__int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd2 (volatile void *ptr, __int16 addend );
|
||||
__int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore2 (volatile void *ptr, __int16 value );
|
||||
}
|
||||
|
||||
inline long __TBB_machine_cmpswp4 (volatile void *ptr, __int32 value, __int32 comparand ) {
|
||||
return _InterlockedCompareExchange( (long*)ptr, value, comparand );
|
||||
}
|
||||
inline long __TBB_machine_fetchadd4 (volatile void *ptr, __int32 addend ) {
|
||||
return _InterlockedExchangeAdd( (long*)ptr, addend );
|
||||
}
|
||||
inline long __TBB_machine_fetchstore4 (volatile void *ptr, __int32 value ) {
|
||||
return _InterlockedExchange( (long*)ptr, value );
|
||||
}
|
||||
|
||||
inline __int64 __TBB_machine_cmpswp8 (volatile void *ptr, __int64 value, __int64 comparand ) {
|
||||
return _InterlockedCompareExchange64( (__int64*)ptr, value, comparand );
|
||||
}
|
||||
inline __int64 __TBB_machine_fetchadd8 (volatile void *ptr, __int64 addend ) {
|
||||
return _InterlockedExchangeAdd64( (__int64*)ptr, addend );
|
||||
}
|
||||
inline __int64 __TBB_machine_fetchstore8 (volatile void *ptr, __int64 value ) {
|
||||
return _InterlockedExchange64( (__int64*)ptr, value );
|
||||
}
|
||||
|
||||
#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1
|
||||
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
|
||||
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
|
||||
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
|
||||
|
||||
inline void __TBB_machine_OR( volatile void *operand, intptr_t addend ) {
|
||||
_InterlockedOr64((__int64*)operand, addend);
|
||||
}
|
||||
|
||||
inline void __TBB_machine_AND( volatile void *operand, intptr_t addend ) {
|
||||
_InterlockedAnd64((__int64*)operand, addend);
|
||||
}
|
||||
|
||||
#define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V)
|
||||
#define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V)
|
||||
|
||||
119
Research/inc/tbb/machine/xbox360_ppc.h
Normal file
119
Research/inc/tbb/machine/xbox360_ppc.h
Normal file
@@ -0,0 +1,119 @@
|
||||
/*
|
||||
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
|
||||
|
||||
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
|
||||
you can redistribute it and/or modify it under the terms of the GNU General Public License
|
||||
version 2 as published by the Free Software Foundation. Threading Building Blocks is
|
||||
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
|
||||
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
See the GNU General Public License for more details. You should have received a copy of
|
||||
the GNU General Public License along with Threading Building Blocks; if not, write to the
|
||||
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
As a special exception, you may use this file as part of a free software library without
|
||||
restriction. Specifically, if other files instantiate templates or use macros or inline
|
||||
functions from this file, or you compile this file and link it with other files to produce
|
||||
an executable, this file does not by itself cause the resulting executable to be covered
|
||||
by the GNU General Public License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General Public License.
|
||||
*/
|
||||
|
||||
// TODO: revise by comparing with mac_ppc.h
|
||||
|
||||
#if !defined(__TBB_machine_H) || defined(__TBB_machine_xbox360_ppc_H)
|
||||
#error Do not #include this internal file directly; use public TBB headers instead.
|
||||
#endif
|
||||
|
||||
#define __TBB_machine_xbox360_ppc_H
|
||||
|
||||
#define NONET
|
||||
#define NOD3D
|
||||
#include "xtl.h"
|
||||
#include "ppcintrinsics.h"
|
||||
|
||||
#if _MSC_VER >= 1300
|
||||
extern "C" void _MemoryBarrier();
|
||||
#pragma intrinsic(_MemoryBarrier)
|
||||
#define __TBB_control_consistency_helper() __isync()
|
||||
#define __TBB_acquire_consistency_helper() _MemoryBarrier()
|
||||
#define __TBB_release_consistency_helper() _MemoryBarrier()
|
||||
#endif
|
||||
|
||||
#define __TBB_full_memory_fence() __sync()
|
||||
|
||||
#define __TBB_WORDSIZE 4
|
||||
#define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
|
||||
|
||||
//todo: define __TBB_USE_FENCED_ATOMICS and define acquire/release primitives to maximize performance
|
||||
|
||||
inline __int32 __TBB_machine_cmpswp4(volatile void *ptr, __int32 value, __int32 comparand ) {
|
||||
__sync();
|
||||
__int32 result = InterlockedCompareExchange((volatile LONG*)ptr, value, comparand);
|
||||
__isync();
|
||||
return result;
|
||||
}
|
||||
|
||||
inline __int64 __TBB_machine_cmpswp8(volatile void *ptr, __int64 value, __int64 comparand )
|
||||
{
|
||||
__sync();
|
||||
__int64 result = InterlockedCompareExchange64((volatile LONG64*)ptr, value, comparand);
|
||||
__isync();
|
||||
return result;
|
||||
}
|
||||
|
||||
#define __TBB_USE_GENERIC_PART_WORD_CAS 1
|
||||
#define __TBB_USE_GENERIC_FETCH_ADD 1
|
||||
#define __TBB_USE_GENERIC_FETCH_STORE 1
|
||||
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
|
||||
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
|
||||
#define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1
|
||||
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
|
||||
|
||||
#pragma optimize( "", off )
|
||||
inline void __TBB_machine_pause (__int32 delay )
|
||||
{
|
||||
for (__int32 i=0; i<delay; i++) {;};
|
||||
}
|
||||
#pragma optimize( "", on )
|
||||
|
||||
#define __TBB_Yield() Sleep(0)
|
||||
#define __TBB_Pause(V) __TBB_machine_pause(V)
|
||||
|
||||
// This port uses only 2 hardware threads for TBB on XBOX 360.
|
||||
// Others are left to sound etc.
|
||||
// Change the following mask to allow TBB use more HW threads.
|
||||
static const int __TBB_XBOX360_HARDWARE_THREAD_MASK = 0x0C;
|
||||
|
||||
static inline int __TBB_XBOX360_DetectNumberOfWorkers()
|
||||
{
|
||||
char a[__TBB_XBOX360_HARDWARE_THREAD_MASK]; //compile time assert - at least one bit should be set always
|
||||
a[0]=0;
|
||||
|
||||
return ((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 0) & 1) +
|
||||
((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 1) & 1) +
|
||||
((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 2) & 1) +
|
||||
((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 3) & 1) +
|
||||
((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 4) & 1) +
|
||||
((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 5) & 1) + 1; // +1 accomodates for the master thread
|
||||
}
|
||||
|
||||
static inline int __TBB_XBOX360_GetHardwareThreadIndex(int workerThreadIndex)
|
||||
{
|
||||
workerThreadIndex %= __TBB_XBOX360_DetectNumberOfWorkers()-1;
|
||||
int m = __TBB_XBOX360_HARDWARE_THREAD_MASK;
|
||||
int index = 0;
|
||||
int skipcount = workerThreadIndex;
|
||||
while (true)
|
||||
{
|
||||
if ((m & 1)!=0)
|
||||
{
|
||||
if (skipcount==0) break;
|
||||
skipcount--;
|
||||
}
|
||||
m >>= 1;
|
||||
index++;
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
#define __TBB_HardwareConcurrency() __TBB_XBOX360_DetectNumberOfWorkers()
|
||||
Reference in New Issue
Block a user