Initial commit: Final state of the master project

This commit is contained in:
2017-09-16 09:41:37 +02:00
commit 696180d43b
832 changed files with 169717 additions and 0 deletions

View File

@@ -0,0 +1,217 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
/*
Platform isolation layer for the ARMv7-a architecture.
*/
#ifndef __TBB_machine_H
#error Do not include this file directly; include tbb_machine.h instead
#endif
//TODO: is ARMv7 is the only version ever to support?
#if !(__ARM_ARCH_7A__)
#error compilation requires an ARMv7-a architecture.
#endif
#include <sys/param.h>
#include <unistd.h>
#define __TBB_WORDSIZE 4
// Traditionally ARM is little-endian.
// Note that, since only the layout of aligned 32-bit words is of interest,
// any apparent PDP-endianness of 32-bit words at half-word alignment or
// any little-endian ordering of big-endian 32-bit words in 64-bit quantities
// may be disregarded for this setting.
#if __BIG_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__)
#define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
#elif __LITTLE_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__)
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
#elif defined(__BYTE_ORDER__)
#define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
#else
#define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT
#endif
#define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
#define __TBB_full_memory_fence() __asm__ __volatile__("dmb ish": : :"memory")
#define __TBB_control_consistency_helper() __TBB_full_memory_fence()
#define __TBB_acquire_consistency_helper() __TBB_full_memory_fence()
#define __TBB_release_consistency_helper() __TBB_full_memory_fence()
//--------------------------------------------------
// Compare and swap
//--------------------------------------------------
/**
* Atomic CAS for 32 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
* @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
* @param value value to assign *ptr to if *ptr==comparand
* @param comparand value to compare with *ptr
* @return value originally in memory at ptr, regardless of success
*/
static inline int32_t __TBB_machine_cmpswp4(volatile void *ptr, int32_t value, int32_t comparand )
{
int32_t oldval, res;
__TBB_full_memory_fence();
do {
__asm__ __volatile__(
"ldrex %1, [%3]\n"
"mov %0, #0\n"
"cmp %1, %4\n"
"it eq\n"
"strexeq %0, %5, [%3]\n"
: "=&r" (res), "=&r" (oldval), "+Qo" (*(volatile int32_t*)ptr)
: "r" ((int32_t *)ptr), "Ir" (comparand), "r" (value)
: "cc");
} while (res);
__TBB_full_memory_fence();
return oldval;
}
/**
* Atomic CAS for 64 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
* @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
* @param value value to assign *ptr to if *ptr==comparand
* @param comparand value to compare with *ptr
* @return value originally in memory at ptr, regardless of success
*/
static inline int64_t __TBB_machine_cmpswp8(volatile void *ptr, int64_t value, int64_t comparand )
{
int64_t oldval;
int32_t res;
__TBB_full_memory_fence();
do {
__asm__ __volatile__(
"mov %0, #0\n"
"ldrexd %1, %H1, [%3]\n"
"cmp %1, %4\n"
"it eq\n"
"cmpeq %H1, %H4\n"
"it eq\n"
"strexdeq %0, %5, %H5, [%3]"
: "=&r" (res), "=&r" (oldval), "+Qo" (*(volatile int64_t*)ptr)
: "r" ((int64_t *)ptr), "r" (comparand), "r" (value)
: "cc");
} while (res);
__TBB_full_memory_fence();
return oldval;
}
static inline int32_t __TBB_machine_fetchadd4(volatile void* ptr, int32_t addend)
{
unsigned long tmp;
int32_t result, tmp2;
__TBB_full_memory_fence();
__asm__ __volatile__(
"1: ldrex %0, [%4]\n"
" add %3, %0, %5\n"
" strex %1, %3, [%4]\n"
" cmp %1, #0\n"
" bne 1b\n"
: "=&r" (result), "=&r" (tmp), "+Qo" (*(volatile int32_t*)ptr), "=&r"(tmp2)
: "r" ((int32_t *)ptr), "Ir" (addend)
: "cc");
__TBB_full_memory_fence();
return result;
}
static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t addend)
{
unsigned long tmp;
int64_t result, tmp2;
__TBB_full_memory_fence();
__asm__ __volatile__(
"1: ldrexd %0, %H0, [%4]\n"
" adds %3, %0, %5\n"
" adc %H3, %H0, %H5\n"
" strexd %1, %3, %H3, [%4]\n"
" cmp %1, #0\n"
" bne 1b"
: "=&r" (result), "=&r" (tmp), "+Qo" (*(volatile int64_t*)ptr), "=&r"(tmp2)
: "r" ((int64_t *)ptr), "r" (addend)
: "cc");
__TBB_full_memory_fence();
return result;
}
inline void __TBB_machine_pause (int32_t delay )
{
while(delay>0)
{
__TBB_compiler_fence();
delay--;
}
}
namespace tbb {
namespace internal {
template <typename T, size_t S>
struct machine_load_store_relaxed {
static inline T load ( const volatile T& location ) {
const T value = location;
/*
* An extra memory barrier is required for errata #761319
* Please see http://infocenter.arm.com/help/topic/com.arm.doc.uan0004a
*/
__TBB_acquire_consistency_helper();
return value;
}
static inline void store ( volatile T& location, T value ) {
location = value;
}
};
}} // namespaces internal, tbb
// Machine specific atomic operations
#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
#define __TBB_Pause(V) __TBB_machine_pause(V)
// Use generics for some things
#define __TBB_USE_GENERIC_PART_WORD_CAS 1
#define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 1
#define __TBB_USE_GENERIC_PART_WORD_FETCH_STORE 1
#define __TBB_USE_GENERIC_FETCH_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1

View File

@@ -0,0 +1,131 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#if !defined(__TBB_machine_H) || defined(__TBB_machine_gcc_generic_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#define __TBB_machine_gcc_generic_H
#include <stdint.h>
#include <unistd.h>
#define __TBB_WORDSIZE __SIZEOF_POINTER__
#if __TBB_GCC_64BIT_ATOMIC_BUILTINS_BROKEN
#define __TBB_64BIT_ATOMICS 0
#endif
/** FPU control setting not available for non-Intel architectures on Android **/
#if __ANDROID__ && __TBB_generic_arch
#define __TBB_CPU_CTL_ENV_PRESENT 0
#endif
// __BYTE_ORDER__ is used in accordance with http://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html,
// but __BIG_ENDIAN__ or __LITTLE_ENDIAN__ may be more commonly found instead.
#if __BIG_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__)
#define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
#elif __LITTLE_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__)
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
#elif defined(__BYTE_ORDER__)
#define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
#else
#define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT
#endif
/** As this generic implementation has absolutely no information about underlying
hardware, its performance most likely will be sub-optimal because of full memory
fence usages where a more lightweight synchronization means (or none at all)
could suffice. Thus if you use this header to enable TBB on a new platform,
consider forking it and relaxing below helpers as appropriate. **/
#define __TBB_acquire_consistency_helper() __sync_synchronize()
#define __TBB_release_consistency_helper() __sync_synchronize()
#define __TBB_full_memory_fence() __sync_synchronize()
#define __TBB_control_consistency_helper() __sync_synchronize()
#define __TBB_MACHINE_DEFINE_ATOMICS(S,T) \
inline T __TBB_machine_cmpswp##S( volatile void *ptr, T value, T comparand ) { \
return __sync_val_compare_and_swap(reinterpret_cast<volatile T *>(ptr),comparand,value); \
} \
\
inline T __TBB_machine_fetchadd##S( volatile void *ptr, T value ) { \
return __sync_fetch_and_add(reinterpret_cast<volatile T *>(ptr),value); \
} \
__TBB_MACHINE_DEFINE_ATOMICS(1,int8_t)
__TBB_MACHINE_DEFINE_ATOMICS(2,int16_t)
__TBB_MACHINE_DEFINE_ATOMICS(4,int32_t)
__TBB_MACHINE_DEFINE_ATOMICS(8,int64_t)
#undef __TBB_MACHINE_DEFINE_ATOMICS
namespace tbb{ namespace internal { namespace gcc_builtins {
inline int clz(unsigned int x){ return __builtin_clz(x);};
inline int clz(unsigned long int x){ return __builtin_clzl(x);};
inline int clz(unsigned long long int x){ return __builtin_clzll(x);};
}}}
//gcc __builtin_clz builtin count _number_ of leading zeroes
static inline intptr_t __TBB_machine_lg( uintptr_t x ) {
return sizeof(x)*8 - tbb::internal::gcc_builtins::clz(x) -1 ;
}
static inline void __TBB_machine_or( volatile void *ptr, uintptr_t addend ) {
__sync_fetch_and_or(reinterpret_cast<volatile uintptr_t *>(ptr),addend);
}
static inline void __TBB_machine_and( volatile void *ptr, uintptr_t addend ) {
__sync_fetch_and_and(reinterpret_cast<volatile uintptr_t *>(ptr),addend);
}
typedef unsigned char __TBB_Flag;
typedef __TBB_atomic __TBB_Flag __TBB_atomic_flag;
inline bool __TBB_machine_try_lock_byte( __TBB_atomic_flag &flag ) {
return __sync_lock_test_and_set(&flag,1)==0;
}
inline void __TBB_machine_unlock_byte( __TBB_atomic_flag &flag ) {
__sync_lock_release(&flag);
}
// Machine specific atomic operations
#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
#define __TBB_TryLockByte __TBB_machine_try_lock_byte
#define __TBB_UnlockByte __TBB_machine_unlock_byte
// Definition of other functions
#define __TBB_Log2(V) __TBB_machine_lg(V)
#define __TBB_USE_GENERIC_FETCH_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
#if __TBB_WORDSIZE==4
#define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1
#endif
#if __TBB_x86_32 || __TBB_x86_64
#include "gcc_itsx.h"
#endif

View File

@@ -0,0 +1,100 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_machine_gcc_ia32_common_H
#define __TBB_machine_gcc_ia32_common_H
//TODO: Add a higher-level function, e.g. tbb::interal::log2(), into tbb_stddef.h, which
//uses __TBB_Log2 and contains the assert and remove the assert from here and all other
//platform-specific headers.
//TODO: Check if use of gcc intrinsic gives a better chance for cross call optimizations
template <typename T>
static inline intptr_t __TBB_machine_lg( T x ) {
__TBB_ASSERT(x>0, "The logarithm of a non-positive value is undefined.");
uintptr_t j, i = x;
__asm__("bsr %1,%0" : "=r"(j) : "r"(i));
return j;
}
#define __TBB_Log2(V) __TBB_machine_lg(V)
#ifndef __TBB_Pause
//TODO: check if raising a ratio of pause instructions to loop control instructions
//(via e.g. loop unrolling) gives any benefit for HT. E.g, the current implementation
//does about 2 CPU-consuming instructions for every pause instruction. Perhaps for
//high pause counts it should use an unrolled loop to raise the ratio, and thus free
//up more integer cycles for the other hyperthread. On the other hand, if the loop is
//unrolled too far, it won't fit in the core's loop cache, and thus take away
//instruction decode slots from the other hyperthread.
//TODO: check if use of gcc __builtin_ia32_pause intrinsic gives a "some how" better performing code
static inline void __TBB_machine_pause( int32_t delay ) {
for (int32_t i = 0; i < delay; i++) {
__asm__ __volatile__("pause;");
}
return;
}
#define __TBB_Pause(V) __TBB_machine_pause(V)
#endif /* !__TBB_Pause */
// API to retrieve/update FPU control setting
#ifndef __TBB_CPU_CTL_ENV_PRESENT
#define __TBB_CPU_CTL_ENV_PRESENT 1
namespace tbb {
namespace internal {
class cpu_ctl_env {
private:
int mxcsr;
short x87cw;
static const int MXCSR_CONTROL_MASK = ~0x3f; /* all except last six status bits */
public:
bool operator!=( const cpu_ctl_env& ctl ) const { return mxcsr != ctl.mxcsr || x87cw != ctl.x87cw; }
void get_env() {
#if __TBB_ICC_12_0_INL_ASM_FSTCW_BROKEN
cpu_ctl_env loc_ctl;
__asm__ __volatile__ (
"stmxcsr %0\n\t"
"fstcw %1"
: "=m"(loc_ctl.mxcsr), "=m"(loc_ctl.x87cw)
);
*this = loc_ctl;
#else
__asm__ __volatile__ (
"stmxcsr %0\n\t"
"fstcw %1"
: "=m"(mxcsr), "=m"(x87cw)
);
#endif
mxcsr &= MXCSR_CONTROL_MASK;
}
void set_env() const {
__asm__ __volatile__ (
"ldmxcsr %0\n\t"
"fldcw %1"
: : "m"(mxcsr), "m"(x87cw)
);
}
};
} // namespace internal
} // namespace tbb
#endif /* !__TBB_CPU_CTL_ENV_PRESENT */
#include "gcc_itsx.h"
#endif /* __TBB_machine_gcc_ia32_common_H */

View File

@@ -0,0 +1,123 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#if !defined(__TBB_machine_H) || defined(__TBB_machine_gcc_itsx_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#define __TBB_machine_gcc_itsx_H
#define __TBB_OP_XACQUIRE 0xF2
#define __TBB_OP_XRELEASE 0xF3
#define __TBB_OP_LOCK 0xF0
#define __TBB_STRINGIZE_INTERNAL(arg) #arg
#define __TBB_STRINGIZE(arg) __TBB_STRINGIZE_INTERNAL(arg)
#ifdef __TBB_x86_64
#define __TBB_r_out "=r"
#else
#define __TBB_r_out "=q"
#endif
inline static uint8_t __TBB_machine_try_lock_elided( volatile uint8_t* lk )
{
uint8_t value = 1;
__asm__ volatile (".byte " __TBB_STRINGIZE(__TBB_OP_XACQUIRE)"; lock; xchgb %0, %1;"
: __TBB_r_out(value), "=m"(*lk) : "0"(value), "m"(*lk) : "memory" );
return uint8_t(value^1);
}
inline static void __TBB_machine_try_lock_elided_cancel()
{
// 'pause' instruction aborts HLE/RTM transactions
__asm__ volatile ("pause\n" : : : "memory" );
}
inline static void __TBB_machine_unlock_elided( volatile uint8_t* lk )
{
__asm__ volatile (".byte " __TBB_STRINGIZE(__TBB_OP_XRELEASE)"; movb $0, %0"
: "=m"(*lk) : "m"(*lk) : "memory" );
}
#if __TBB_TSX_INTRINSICS_PRESENT
#include <immintrin.h>
#define __TBB_machine_is_in_transaction _xtest
#define __TBB_machine_begin_transaction _xbegin
#define __TBB_machine_end_transaction _xend
#define __TBB_machine_transaction_conflict_abort() _xabort(0xff)
#else
/*!
* Check if the instruction is executed in a transaction or not
*/
inline static bool __TBB_machine_is_in_transaction()
{
int8_t res = 0;
#if __TBB_x86_32
__asm__ volatile (".byte 0x0F; .byte 0x01; .byte 0xD6;\n"
"setz %0" : "=q"(res) : : "memory" );
#else
__asm__ volatile (".byte 0x0F; .byte 0x01; .byte 0xD6;\n"
"setz %0" : "=r"(res) : : "memory" );
#endif
return res==0;
}
/*!
* Enter speculative execution mode.
* @return -1 on success
* abort cause ( or 0 ) on abort
*/
inline static uint32_t __TBB_machine_begin_transaction()
{
uint32_t res = ~uint32_t(0); // success value
__asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n" // XBEGIN <abort-offset>
" .long 2f-1b-6\n" // 2f-1b == difference in addresses of start
// of XBEGIN and the MOVL
// 2f - 1b - 6 == that difference minus the size of the
// XBEGIN instruction. This is the abort offset to
// 2: below.
" jmp 3f\n" // success (leave -1 in res)
"2: movl %%eax,%0\n" // store failure code in res
"3:"
:"=r"(res):"0"(res):"memory","%eax");
return res;
}
/*!
* Attempt to commit/end transaction
*/
inline static void __TBB_machine_end_transaction()
{
__asm__ volatile (".byte 0x0F; .byte 0x01; .byte 0xD5" :::"memory"); // XEND
}
/*
* aborts with code 0xFF (lock already held)
*/
inline static void __TBB_machine_transaction_conflict_abort()
{
__asm__ volatile (".byte 0xC6; .byte 0xF8; .byte 0xFF" :::"memory");
}
#endif /* __TBB_TSX_INTRINSICS_PRESENT */

View File

@@ -0,0 +1,70 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
// TODO: revise by comparing with mac_ppc.h
#if !defined(__TBB_machine_H) || defined(__TBB_machine_ibm_aix51_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#define __TBB_machine_ibm_aix51_H
#define __TBB_WORDSIZE 8
#define __TBB_ENDIANNESS __TBB_ENDIAN_BIG // assumption based on operating system
#include <stdint.h>
#include <unistd.h>
#include <sched.h>
extern "C" {
int32_t __TBB_machine_cas_32 (volatile void* ptr, int32_t value, int32_t comparand);
int64_t __TBB_machine_cas_64 (volatile void* ptr, int64_t value, int64_t comparand);
void __TBB_machine_flush ();
void __TBB_machine_lwsync ();
void __TBB_machine_isync ();
}
// Mapping of old entry point names retained for the sake of backward binary compatibility
#define __TBB_machine_cmpswp4 __TBB_machine_cas_32
#define __TBB_machine_cmpswp8 __TBB_machine_cas_64
#define __TBB_Yield() sched_yield()
#define __TBB_USE_GENERIC_PART_WORD_CAS 1
#define __TBB_USE_GENERIC_FETCH_ADD 1
#define __TBB_USE_GENERIC_FETCH_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
#if __GNUC__
#define __TBB_control_consistency_helper() __asm__ __volatile__( "isync": : :"memory")
#define __TBB_acquire_consistency_helper() __asm__ __volatile__("lwsync": : :"memory")
#define __TBB_release_consistency_helper() __asm__ __volatile__("lwsync": : :"memory")
#define __TBB_full_memory_fence() __asm__ __volatile__( "sync": : :"memory")
#else
// IBM C++ Compiler does not support inline assembly
// TODO: Since XL 9.0 or earlier GCC syntax is supported. Replace with more
// lightweight implementation (like in mac_ppc.h)
#define __TBB_control_consistency_helper() __TBB_machine_isync ()
#define __TBB_acquire_consistency_helper() __TBB_machine_lwsync ()
#define __TBB_release_consistency_helper() __TBB_machine_lwsync ()
#define __TBB_full_memory_fence() __TBB_machine_flush ()
#endif

View File

@@ -0,0 +1,258 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#if !defined(__TBB_machine_H) || defined(__TBB_machine_icc_generic_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#if ! __TBB_ICC_BUILTIN_ATOMICS_PRESENT
#error "Intel C++ Compiler of at least 12.0 version is needed to use ICC intrinsics port"
#endif
#define __TBB_machine_icc_generic_H
//ICC mimics the "native" target compiler
#if _MSC_VER
#include "msvc_ia32_common.h"
#else
#include "gcc_ia32_common.h"
#endif
//TODO: Make __TBB_WORDSIZE macro optional for ICC intrinsics port.
//As compiler intrinsics are used for all the operations it is possible to do.
#if __TBB_x86_32
#define __TBB_WORDSIZE 4
#else
#define __TBB_WORDSIZE 8
#endif
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
//__TBB_compiler_fence() defined just in case, as it seems not to be used on its own anywhere else
#if _MSC_VER
//TODO: any way to use same intrinsics on windows and linux?
#pragma intrinsic(_ReadWriteBarrier)
#define __TBB_compiler_fence() _ReadWriteBarrier()
#else
#define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
#endif
#ifndef __TBB_full_memory_fence
#if _MSC_VER
//TODO: any way to use same intrinsics on windows and linux?
#pragma intrinsic(_mm_mfence)
#define __TBB_full_memory_fence() _mm_mfence()
#else
#define __TBB_full_memory_fence() __asm__ __volatile__("mfence": : :"memory")
#endif
#endif
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
namespace tbb { namespace internal {
//TODO: is there any way to reuse definition of memory_order enum from ICC instead of copy paste.
//however it seems unlikely that ICC will silently change exact enum values, as they are defined
//in the ISO exactly like this.
//TODO: add test that exact values of the enum are same as in the ISO C++11
typedef enum memory_order {
memory_order_relaxed, memory_order_consume, memory_order_acquire,
memory_order_release, memory_order_acq_rel, memory_order_seq_cst
} memory_order;
namespace icc_intrinsics_port {
template <typename T>
T convert_argument(T value){
return value;
}
//The overload below is needed to have explicit conversion of pointer to void* in argument list.
//compiler bug?
//TODO: add according broken macro and recheck with ICC 13.0 if the overload is still needed
template <typename T>
void* convert_argument(T* value){
return (void*)value;
}
}
//TODO: code below is a bit repetitive, consider simplifying it
template <typename T, size_t S>
struct machine_load_store {
static T load_with_acquire ( const volatile T& location ) {
return __atomic_load_explicit(&location, memory_order_acquire);
}
static void store_with_release ( volatile T &location, T value ) {
__atomic_store_explicit(&location, icc_intrinsics_port::convert_argument(value), memory_order_release);
}
};
template <typename T, size_t S>
struct machine_load_store_relaxed {
static inline T load ( const T& location ) {
return __atomic_load_explicit(&location, memory_order_relaxed);
}
static inline void store ( T& location, T value ) {
__atomic_store_explicit(&location, icc_intrinsics_port::convert_argument(value), memory_order_relaxed);
}
};
template <typename T, size_t S>
struct machine_load_store_seq_cst {
static T load ( const volatile T& location ) {
return __atomic_load_explicit(&location, memory_order_seq_cst);
}
static void store ( volatile T &location, T value ) {
__atomic_store_explicit(&location, value, memory_order_seq_cst);
}
};
}} // namespace tbb::internal
namespace tbb{ namespace internal { namespace icc_intrinsics_port{
typedef enum memory_order_map {
relaxed = memory_order_relaxed,
acquire = memory_order_acquire,
release = memory_order_release,
full_fence= memory_order_seq_cst
} memory_order_map;
}}}// namespace tbb::internal
#define __TBB_MACHINE_DEFINE_ATOMICS(S,T,M) \
inline T __TBB_machine_cmpswp##S##M( volatile void *ptr, T value, T comparand ) { \
__atomic_compare_exchange_strong_explicit( \
(T*)ptr \
,&comparand \
,value \
, tbb::internal::icc_intrinsics_port::M \
, tbb::internal::icc_intrinsics_port::M); \
return comparand; \
} \
\
inline T __TBB_machine_fetchstore##S##M(volatile void *ptr, T value) { \
return __atomic_exchange_explicit((T*)ptr, value, tbb::internal::icc_intrinsics_port::M); \
} \
\
inline T __TBB_machine_fetchadd##S##M(volatile void *ptr, T value) { \
return __atomic_fetch_add_explicit((T*)ptr, value, tbb::internal::icc_intrinsics_port::M); \
} \
__TBB_MACHINE_DEFINE_ATOMICS(1,tbb::internal::int8_t, full_fence)
__TBB_MACHINE_DEFINE_ATOMICS(1,tbb::internal::int8_t, acquire)
__TBB_MACHINE_DEFINE_ATOMICS(1,tbb::internal::int8_t, release)
__TBB_MACHINE_DEFINE_ATOMICS(1,tbb::internal::int8_t, relaxed)
__TBB_MACHINE_DEFINE_ATOMICS(2,tbb::internal::int16_t, full_fence)
__TBB_MACHINE_DEFINE_ATOMICS(2,tbb::internal::int16_t, acquire)
__TBB_MACHINE_DEFINE_ATOMICS(2,tbb::internal::int16_t, release)
__TBB_MACHINE_DEFINE_ATOMICS(2,tbb::internal::int16_t, relaxed)
__TBB_MACHINE_DEFINE_ATOMICS(4,tbb::internal::int32_t, full_fence)
__TBB_MACHINE_DEFINE_ATOMICS(4,tbb::internal::int32_t, acquire)
__TBB_MACHINE_DEFINE_ATOMICS(4,tbb::internal::int32_t, release)
__TBB_MACHINE_DEFINE_ATOMICS(4,tbb::internal::int32_t, relaxed)
__TBB_MACHINE_DEFINE_ATOMICS(8,tbb::internal::int64_t, full_fence)
__TBB_MACHINE_DEFINE_ATOMICS(8,tbb::internal::int64_t, acquire)
__TBB_MACHINE_DEFINE_ATOMICS(8,tbb::internal::int64_t, release)
__TBB_MACHINE_DEFINE_ATOMICS(8,tbb::internal::int64_t, relaxed)
#undef __TBB_MACHINE_DEFINE_ATOMICS
#define __TBB_USE_FENCED_ATOMICS 1
namespace tbb { namespace internal {
#if __TBB_FORCE_64BIT_ALIGNMENT_BROKEN
__TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(full_fence)
__TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(full_fence)
__TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(acquire)
__TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(release)
__TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(relaxed)
__TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(relaxed)
template <typename T>
struct machine_load_store<T,8> {
static T load_with_acquire ( const volatile T& location ) {
if( tbb::internal::is_aligned(&location,8)) {
return __atomic_load_explicit(&location, memory_order_acquire);
} else {
return __TBB_machine_generic_load8acquire(&location);
}
}
static void store_with_release ( volatile T &location, T value ) {
if( tbb::internal::is_aligned(&location,8)) {
__atomic_store_explicit(&location, icc_intrinsics_port::convert_argument(value), memory_order_release);
} else {
return __TBB_machine_generic_store8release(&location,value);
}
}
};
template <typename T>
struct machine_load_store_relaxed<T,8> {
static T load( const volatile T& location ) {
if( tbb::internal::is_aligned(&location,8)) {
return __atomic_load_explicit(&location, memory_order_relaxed);
} else {
return __TBB_machine_generic_load8relaxed(&location);
}
}
static void store( volatile T &location, T value ) {
if( tbb::internal::is_aligned(&location,8)) {
__atomic_store_explicit(&location, icc_intrinsics_port::convert_argument(value), memory_order_relaxed);
} else {
return __TBB_machine_generic_store8relaxed(&location,value);
}
}
};
template <typename T >
struct machine_load_store_seq_cst<T,8> {
static T load ( const volatile T& location ) {
if( tbb::internal::is_aligned(&location,8)) {
return __atomic_load_explicit(&location, memory_order_seq_cst);
} else {
return __TBB_machine_generic_load8full_fence(&location);
}
}
static void store ( volatile T &location, T value ) {
if( tbb::internal::is_aligned(&location,8)) {
__atomic_store_explicit(&location, value, memory_order_seq_cst);
} else {
return __TBB_machine_generic_store8full_fence(&location,value);
}
}
};
#endif
}} // namespace tbb::internal
template <typename T>
inline void __TBB_machine_OR( T *operand, T addend ) {
__atomic_fetch_or_explicit(operand, addend, tbb::internal::memory_order_seq_cst);
}
template <typename T>
inline void __TBB_machine_AND( T *operand, T addend ) {
__atomic_fetch_and_explicit(operand, addend, tbb::internal::memory_order_seq_cst);
}

View File

@@ -0,0 +1,84 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_machine_H
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#include <sched.h>
#define __TBB_Yield() sched_yield()
#include <unistd.h>
/* Futex definitions */
#include <sys/syscall.h>
#if defined(SYS_futex)
#define __TBB_USE_FUTEX 1
#include <limits.h>
#include <errno.h>
// Unfortunately, some versions of Linux do not have a header that defines FUTEX_WAIT and FUTEX_WAKE.
#ifdef FUTEX_WAIT
#define __TBB_FUTEX_WAIT FUTEX_WAIT
#else
#define __TBB_FUTEX_WAIT 0
#endif
#ifdef FUTEX_WAKE
#define __TBB_FUTEX_WAKE FUTEX_WAKE
#else
#define __TBB_FUTEX_WAKE 1
#endif
#ifndef __TBB_ASSERT
#error machine specific headers must be included after tbb_stddef.h
#endif
namespace tbb {
namespace internal {
inline int futex_wait( void *futex, int comparand ) {
int r = syscall( SYS_futex,futex,__TBB_FUTEX_WAIT,comparand,NULL,NULL,0 );
#if TBB_USE_ASSERT
int e = errno;
__TBB_ASSERT( r==0||r==EWOULDBLOCK||(r==-1&&(e==EAGAIN||e==EINTR)), "futex_wait failed." );
#endif /* TBB_USE_ASSERT */
return r;
}
inline int futex_wakeup_one( void *futex ) {
int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAKE,1,NULL,NULL,0 );
__TBB_ASSERT( r==0||r==1, "futex_wakeup_one: more than one thread woken up?" );
return r;
}
inline int futex_wakeup_all( void *futex ) {
int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAKE,INT_MAX,NULL,NULL,0 );
__TBB_ASSERT( r>=0, "futex_wakeup_all: error in waking up threads" );
return r;
}
} /* namespace internal */
} /* namespace tbb */
#endif /* SYS_futex */

View File

@@ -0,0 +1,232 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#if !defined(__TBB_machine_H) || defined(__TBB_machine_linux_ia32_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#define __TBB_machine_linux_ia32_H
#include <stdint.h>
#include "gcc_ia32_common.h"
#define __TBB_WORDSIZE 4
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
#define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
#define __TBB_release_consistency_helper() __TBB_compiler_fence()
#define __TBB_full_memory_fence() __asm__ __volatile__("mfence": : :"memory")
#if __TBB_ICC_ASM_VOLATILE_BROKEN
#define __TBB_VOLATILE
#else
#define __TBB_VOLATILE volatile
#endif
#define __TBB_MACHINE_DEFINE_ATOMICS(S,T,X,R) \
static inline T __TBB_machine_cmpswp##S (volatile void *ptr, T value, T comparand ) \
{ \
T result; \
\
__asm__ __volatile__("lock\ncmpxchg" X " %2,%1" \
: "=a"(result), "=m"(*(__TBB_VOLATILE T*)ptr) \
: "q"(value), "0"(comparand), "m"(*(__TBB_VOLATILE T*)ptr) \
: "memory"); \
return result; \
} \
\
static inline T __TBB_machine_fetchadd##S(volatile void *ptr, T addend) \
{ \
T result; \
__asm__ __volatile__("lock\nxadd" X " %0,%1" \
: R (result), "=m"(*(__TBB_VOLATILE T*)ptr) \
: "0"(addend), "m"(*(__TBB_VOLATILE T*)ptr) \
: "memory"); \
return result; \
} \
\
static inline T __TBB_machine_fetchstore##S(volatile void *ptr, T value) \
{ \
T result; \
__asm__ __volatile__("lock\nxchg" X " %0,%1" \
: R (result), "=m"(*(__TBB_VOLATILE T*)ptr) \
: "0"(value), "m"(*(__TBB_VOLATILE T*)ptr) \
: "memory"); \
return result; \
} \
__TBB_MACHINE_DEFINE_ATOMICS(1,int8_t,"","=q")
__TBB_MACHINE_DEFINE_ATOMICS(2,int16_t,"","=r")
__TBB_MACHINE_DEFINE_ATOMICS(4,int32_t,"l","=r")
#if __INTEL_COMPILER
#pragma warning( push )
// reference to EBX in a function requiring stack alignment
#pragma warning( disable: 998 )
#endif
#if __TBB_GCC_CAS8_BUILTIN_INLINING_BROKEN
#define __TBB_IA32_CAS8_NOINLINE __attribute__ ((noinline))
#else
#define __TBB_IA32_CAS8_NOINLINE
#endif
static inline __TBB_IA32_CAS8_NOINLINE int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, int64_t comparand ) {
//TODO: remove the extra part of condition once __TBB_GCC_BUILTIN_ATOMICS_PRESENT is lowered to gcc version 4.1.2
#if (__TBB_GCC_BUILTIN_ATOMICS_PRESENT || (__TBB_GCC_VERSION >= 40102)) && !__TBB_GCC_64BIT_ATOMIC_BUILTINS_BROKEN
return __sync_val_compare_and_swap( reinterpret_cast<volatile int64_t*>(ptr), comparand, value );
#else /* !__TBB_GCC_BUILTIN_ATOMICS_PRESENT */
//TODO: look like ICC 13.0 has some issues with this code, investigate it more deeply
int64_t result;
union {
int64_t i64;
int32_t i32[2];
};
i64 = value;
#if __PIC__
/* compiling position-independent code */
// EBX register preserved for compliance with position-independent code rules on IA32
int32_t tmp;
__asm__ __volatile__ (
"movl %%ebx,%2\n\t"
"movl %5,%%ebx\n\t"
#if __GNUC__==3
"lock\n\t cmpxchg8b %1\n\t"
#else
"lock\n\t cmpxchg8b (%3)\n\t"
#endif
"movl %2,%%ebx"
: "=A"(result)
, "=m"(*(__TBB_VOLATILE int64_t *)ptr)
, "=m"(tmp)
#if __GNUC__==3
: "m"(*(__TBB_VOLATILE int64_t *)ptr)
#else
: "SD"(ptr)
#endif
, "0"(comparand)
, "m"(i32[0]), "c"(i32[1])
: "memory"
#if __INTEL_COMPILER
,"ebx"
#endif
);
#else /* !__PIC__ */
__asm__ __volatile__ (
"lock\n\t cmpxchg8b %1\n\t"
: "=A"(result), "=m"(*(__TBB_VOLATILE int64_t *)ptr)
: "m"(*(__TBB_VOLATILE int64_t *)ptr)
, "0"(comparand)
, "b"(i32[0]), "c"(i32[1])
: "memory"
);
#endif /* __PIC__ */
return result;
#endif /* !__TBB_GCC_BUILTIN_ATOMICS_PRESENT */
}
#undef __TBB_IA32_CAS8_NOINLINE
#if __INTEL_COMPILER
#pragma warning( pop )
#endif // warning 998 is back
static inline void __TBB_machine_or( volatile void *ptr, uint32_t addend ) {
__asm__ __volatile__("lock\norl %1,%0" : "=m"(*(__TBB_VOLATILE uint32_t *)ptr) : "r"(addend), "m"(*(__TBB_VOLATILE uint32_t *)ptr) : "memory");
}
static inline void __TBB_machine_and( volatile void *ptr, uint32_t addend ) {
__asm__ __volatile__("lock\nandl %1,%0" : "=m"(*(__TBB_VOLATILE uint32_t *)ptr) : "r"(addend), "m"(*(__TBB_VOLATILE uint32_t *)ptr) : "memory");
}
//TODO: Check if it possible and profitable for IA-32 architecture on (Linux* and Windows*)
//to use of 64-bit load/store via floating point registers together with full fence
//for sequentially consistent load/store, instead of CAS.
#if __clang__
#define __TBB_fildq "fildll"
#define __TBB_fistpq "fistpll"
#else
#define __TBB_fildq "fildq"
#define __TBB_fistpq "fistpq"
#endif
static inline int64_t __TBB_machine_aligned_load8 (const volatile void *ptr) {
__TBB_ASSERT(tbb::internal::is_aligned(ptr,8),"__TBB_machine_aligned_load8 should be used with 8 byte aligned locations only \n");
int64_t result;
__asm__ __volatile__ ( __TBB_fildq " %1\n\t"
__TBB_fistpq " %0" : "=m"(result) : "m"(*(const __TBB_VOLATILE uint64_t*)ptr) : "memory" );
return result;
}
static inline void __TBB_machine_aligned_store8 (volatile void *ptr, int64_t value ) {
__TBB_ASSERT(tbb::internal::is_aligned(ptr,8),"__TBB_machine_aligned_store8 should be used with 8 byte aligned locations only \n");
// Aligned store
__asm__ __volatile__ ( __TBB_fildq " %1\n\t"
__TBB_fistpq " %0" : "=m"(*(__TBB_VOLATILE int64_t*)ptr) : "m"(value) : "memory" );
}
static inline int64_t __TBB_machine_load8 (const volatile void *ptr) {
#if __TBB_FORCE_64BIT_ALIGNMENT_BROKEN
if( tbb::internal::is_aligned(ptr,8)) {
#endif
return __TBB_machine_aligned_load8(ptr);
#if __TBB_FORCE_64BIT_ALIGNMENT_BROKEN
} else {
// Unaligned load
return __TBB_machine_cmpswp8(const_cast<void*>(ptr),0,0);
}
#endif
}
//! Handles misaligned 8-byte store
/** Defined in tbb_misc.cpp */
extern "C" void __TBB_machine_store8_slow( volatile void *ptr, int64_t value );
extern "C" void __TBB_machine_store8_slow_perf_warning( volatile void *ptr );
static inline void __TBB_machine_store8(volatile void *ptr, int64_t value) {
#if __TBB_FORCE_64BIT_ALIGNMENT_BROKEN
if( tbb::internal::is_aligned(ptr,8)) {
#endif
__TBB_machine_aligned_store8(ptr,value);
#if __TBB_FORCE_64BIT_ALIGNMENT_BROKEN
} else {
// Unaligned store
#if TBB_USE_PERFORMANCE_WARNINGS
__TBB_machine_store8_slow_perf_warning(ptr);
#endif /* TBB_USE_PERFORMANCE_WARNINGS */
__TBB_machine_store8_slow(ptr,value);
}
#endif
}
// Machine specific atomic operations
#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
#define __TBB_USE_GENERIC_DWORD_FETCH_ADD 1
#define __TBB_USE_GENERIC_DWORD_FETCH_STORE 1
#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1

View File

@@ -0,0 +1,181 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#if !defined(__TBB_machine_H) || defined(__TBB_machine_linux_ia64_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#define __TBB_machine_linux_ia64_H
#include <stdint.h>
#include <ia64intrin.h>
#define __TBB_WORDSIZE 8
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
#if __INTEL_COMPILER
#define __TBB_compiler_fence()
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
#define __TBB_acquire_consistency_helper()
#define __TBB_release_consistency_helper()
#define __TBB_full_memory_fence() __mf()
#else
#define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
// Even though GCC imbues volatile loads with acquire semantics, it sometimes moves
// loads over the acquire fence. The following helpers stop such incorrect code motion.
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
#define __TBB_release_consistency_helper() __TBB_compiler_fence()
#define __TBB_full_memory_fence() __asm__ __volatile__("mf": : :"memory")
#endif /* !__INTEL_COMPILER */
// Most of the functions will be in a .s file
// TODO: revise dynamic_link, memory pools and etc. if the library dependency is removed.
extern "C" {
int8_t __TBB_machine_fetchadd1__TBB_full_fence (volatile void *ptr, int8_t addend);
int8_t __TBB_machine_fetchadd1acquire(volatile void *ptr, int8_t addend);
int8_t __TBB_machine_fetchadd1release(volatile void *ptr, int8_t addend);
int16_t __TBB_machine_fetchadd2__TBB_full_fence (volatile void *ptr, int16_t addend);
int16_t __TBB_machine_fetchadd2acquire(volatile void *ptr, int16_t addend);
int16_t __TBB_machine_fetchadd2release(volatile void *ptr, int16_t addend);
int32_t __TBB_machine_fetchadd4__TBB_full_fence (volatile void *ptr, int32_t value);
int32_t __TBB_machine_fetchadd4acquire(volatile void *ptr, int32_t addend);
int32_t __TBB_machine_fetchadd4release(volatile void *ptr, int32_t addend);
int64_t __TBB_machine_fetchadd8__TBB_full_fence (volatile void *ptr, int64_t value);
int64_t __TBB_machine_fetchadd8acquire(volatile void *ptr, int64_t addend);
int64_t __TBB_machine_fetchadd8release(volatile void *ptr, int64_t addend);
int8_t __TBB_machine_fetchstore1__TBB_full_fence (volatile void *ptr, int8_t value);
int8_t __TBB_machine_fetchstore1acquire(volatile void *ptr, int8_t value);
int8_t __TBB_machine_fetchstore1release(volatile void *ptr, int8_t value);
int16_t __TBB_machine_fetchstore2__TBB_full_fence (volatile void *ptr, int16_t value);
int16_t __TBB_machine_fetchstore2acquire(volatile void *ptr, int16_t value);
int16_t __TBB_machine_fetchstore2release(volatile void *ptr, int16_t value);
int32_t __TBB_machine_fetchstore4__TBB_full_fence (volatile void *ptr, int32_t value);
int32_t __TBB_machine_fetchstore4acquire(volatile void *ptr, int32_t value);
int32_t __TBB_machine_fetchstore4release(volatile void *ptr, int32_t value);
int64_t __TBB_machine_fetchstore8__TBB_full_fence (volatile void *ptr, int64_t value);
int64_t __TBB_machine_fetchstore8acquire(volatile void *ptr, int64_t value);
int64_t __TBB_machine_fetchstore8release(volatile void *ptr, int64_t value);
int8_t __TBB_machine_cmpswp1__TBB_full_fence (volatile void *ptr, int8_t value, int8_t comparand);
int8_t __TBB_machine_cmpswp1acquire(volatile void *ptr, int8_t value, int8_t comparand);
int8_t __TBB_machine_cmpswp1release(volatile void *ptr, int8_t value, int8_t comparand);
int16_t __TBB_machine_cmpswp2__TBB_full_fence (volatile void *ptr, int16_t value, int16_t comparand);
int16_t __TBB_machine_cmpswp2acquire(volatile void *ptr, int16_t value, int16_t comparand);
int16_t __TBB_machine_cmpswp2release(volatile void *ptr, int16_t value, int16_t comparand);
int32_t __TBB_machine_cmpswp4__TBB_full_fence (volatile void *ptr, int32_t value, int32_t comparand);
int32_t __TBB_machine_cmpswp4acquire(volatile void *ptr, int32_t value, int32_t comparand);
int32_t __TBB_machine_cmpswp4release(volatile void *ptr, int32_t value, int32_t comparand);
int64_t __TBB_machine_cmpswp8__TBB_full_fence (volatile void *ptr, int64_t value, int64_t comparand);
int64_t __TBB_machine_cmpswp8acquire(volatile void *ptr, int64_t value, int64_t comparand);
int64_t __TBB_machine_cmpswp8release(volatile void *ptr, int64_t value, int64_t comparand);
int64_t __TBB_machine_lg(uint64_t value);
void __TBB_machine_pause(int32_t delay);
bool __TBB_machine_trylockbyte( volatile unsigned char &ptr );
int64_t __TBB_machine_lockbyte( volatile unsigned char &ptr );
//! Retrieves the current RSE backing store pointer. IA64 specific.
void* __TBB_get_bsp();
int32_t __TBB_machine_load1_relaxed(const void *ptr);
int32_t __TBB_machine_load2_relaxed(const void *ptr);
int32_t __TBB_machine_load4_relaxed(const void *ptr);
int64_t __TBB_machine_load8_relaxed(const void *ptr);
void __TBB_machine_store1_relaxed(void *ptr, int32_t value);
void __TBB_machine_store2_relaxed(void *ptr, int32_t value);
void __TBB_machine_store4_relaxed(void *ptr, int32_t value);
void __TBB_machine_store8_relaxed(void *ptr, int64_t value);
} // extern "C"
// Mapping old entry points to the names corresponding to the new full_fence identifier.
#define __TBB_machine_fetchadd1full_fence __TBB_machine_fetchadd1__TBB_full_fence
#define __TBB_machine_fetchadd2full_fence __TBB_machine_fetchadd2__TBB_full_fence
#define __TBB_machine_fetchadd4full_fence __TBB_machine_fetchadd4__TBB_full_fence
#define __TBB_machine_fetchadd8full_fence __TBB_machine_fetchadd8__TBB_full_fence
#define __TBB_machine_fetchstore1full_fence __TBB_machine_fetchstore1__TBB_full_fence
#define __TBB_machine_fetchstore2full_fence __TBB_machine_fetchstore2__TBB_full_fence
#define __TBB_machine_fetchstore4full_fence __TBB_machine_fetchstore4__TBB_full_fence
#define __TBB_machine_fetchstore8full_fence __TBB_machine_fetchstore8__TBB_full_fence
#define __TBB_machine_cmpswp1full_fence __TBB_machine_cmpswp1__TBB_full_fence
#define __TBB_machine_cmpswp2full_fence __TBB_machine_cmpswp2__TBB_full_fence
#define __TBB_machine_cmpswp4full_fence __TBB_machine_cmpswp4__TBB_full_fence
#define __TBB_machine_cmpswp8full_fence __TBB_machine_cmpswp8__TBB_full_fence
// Mapping relaxed operations to the entry points implementing them.
/** On IA64 RMW operations implicitly have acquire semantics. Thus one cannot
actually have completely relaxed RMW operation here. **/
#define __TBB_machine_fetchadd1relaxed __TBB_machine_fetchadd1acquire
#define __TBB_machine_fetchadd2relaxed __TBB_machine_fetchadd2acquire
#define __TBB_machine_fetchadd4relaxed __TBB_machine_fetchadd4acquire
#define __TBB_machine_fetchadd8relaxed __TBB_machine_fetchadd8acquire
#define __TBB_machine_fetchstore1relaxed __TBB_machine_fetchstore1acquire
#define __TBB_machine_fetchstore2relaxed __TBB_machine_fetchstore2acquire
#define __TBB_machine_fetchstore4relaxed __TBB_machine_fetchstore4acquire
#define __TBB_machine_fetchstore8relaxed __TBB_machine_fetchstore8acquire
#define __TBB_machine_cmpswp1relaxed __TBB_machine_cmpswp1acquire
#define __TBB_machine_cmpswp2relaxed __TBB_machine_cmpswp2acquire
#define __TBB_machine_cmpswp4relaxed __TBB_machine_cmpswp4acquire
#define __TBB_machine_cmpswp8relaxed __TBB_machine_cmpswp8acquire
#define __TBB_MACHINE_DEFINE_ATOMICS(S,V) \
template <typename T> \
struct machine_load_store_relaxed<T,S> { \
static inline T load ( const T& location ) { \
return (T)__TBB_machine_load##S##_relaxed(&location); \
} \
static inline void store ( T& location, T value ) { \
__TBB_machine_store##S##_relaxed(&location, (V)value); \
} \
}
namespace tbb {
namespace internal {
__TBB_MACHINE_DEFINE_ATOMICS(1,int8_t);
__TBB_MACHINE_DEFINE_ATOMICS(2,int16_t);
__TBB_MACHINE_DEFINE_ATOMICS(4,int32_t);
__TBB_MACHINE_DEFINE_ATOMICS(8,int64_t);
}} // namespaces internal, tbb
#undef __TBB_MACHINE_DEFINE_ATOMICS
#define __TBB_USE_FENCED_ATOMICS 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
// Definition of Lock functions
#define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)
#define __TBB_LockByte(P) __TBB_machine_lockbyte(P)
// Definition of other utility functions
#define __TBB_Pause(V) __TBB_machine_pause(V)
#define __TBB_Log2(V) __TBB_machine_lg(V)

View File

@@ -0,0 +1,96 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#if !defined(__TBB_machine_H) || defined(__TBB_machine_linux_intel64_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#define __TBB_machine_linux_intel64_H
#include <stdint.h>
#include "gcc_ia32_common.h"
#define __TBB_WORDSIZE 8
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
#define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
#define __TBB_release_consistency_helper() __TBB_compiler_fence()
#ifndef __TBB_full_memory_fence
#define __TBB_full_memory_fence() __asm__ __volatile__("mfence": : :"memory")
#endif
#define __TBB_MACHINE_DEFINE_ATOMICS(S,T,X) \
static inline T __TBB_machine_cmpswp##S (volatile void *ptr, T value, T comparand ) \
{ \
T result; \
\
__asm__ __volatile__("lock\ncmpxchg" X " %2,%1" \
: "=a"(result), "=m"(*(volatile T*)ptr) \
: "q"(value), "0"(comparand), "m"(*(volatile T*)ptr) \
: "memory"); \
return result; \
} \
\
static inline T __TBB_machine_fetchadd##S(volatile void *ptr, T addend) \
{ \
T result; \
__asm__ __volatile__("lock\nxadd" X " %0,%1" \
: "=r"(result),"=m"(*(volatile T*)ptr) \
: "0"(addend), "m"(*(volatile T*)ptr) \
: "memory"); \
return result; \
} \
\
static inline T __TBB_machine_fetchstore##S(volatile void *ptr, T value) \
{ \
T result; \
__asm__ __volatile__("lock\nxchg" X " %0,%1" \
: "=r"(result),"=m"(*(volatile T*)ptr) \
: "0"(value), "m"(*(volatile T*)ptr) \
: "memory"); \
return result; \
} \
__TBB_MACHINE_DEFINE_ATOMICS(1,int8_t,"")
__TBB_MACHINE_DEFINE_ATOMICS(2,int16_t,"")
__TBB_MACHINE_DEFINE_ATOMICS(4,int32_t,"")
__TBB_MACHINE_DEFINE_ATOMICS(8,int64_t,"q")
#undef __TBB_MACHINE_DEFINE_ATOMICS
static inline void __TBB_machine_or( volatile void *ptr, uint64_t value ) {
__asm__ __volatile__("lock\norq %1,%0" : "=m"(*(volatile uint64_t*)ptr) : "r"(value), "m"(*(volatile uint64_t*)ptr) : "memory");
}
static inline void __TBB_machine_and( volatile void *ptr, uint64_t value ) {
__asm__ __volatile__("lock\nandq %1,%0" : "=m"(*(volatile uint64_t*)ptr) : "r"(value), "m"(*(volatile uint64_t*)ptr) : "memory");
}
#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1

View File

@@ -0,0 +1,313 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#if !defined(__TBB_machine_H) || defined(__TBB_machine_gcc_power_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#define __TBB_machine_gcc_power_H
#include <stdint.h>
#include <unistd.h>
// TODO: rename to gcc_power.h?
// This file is for Power Architecture with compilers supporting GNU inline-assembler syntax (currently GNU g++ and IBM XL).
// Note that XL V9.0 (sometimes?) has trouble dealing with empty input and/or clobber lists, so they should be avoided.
#if __powerpc64__ || __ppc64__
// IBM XL documents __powerpc64__ (and __PPC64__).
// Apple documents __ppc64__ (with __ppc__ only on 32-bit).
#define __TBB_WORDSIZE 8
#else
#define __TBB_WORDSIZE 4
#endif
// Traditionally Power Architecture is big-endian.
// Little-endian could be just an address manipulation (compatibility with TBB not verified),
// or normal little-endian (on more recent systems). Embedded PowerPC systems may support
// page-specific endianness, but then one endianness must be hidden from TBB so that it still sees only one.
#if __BIG_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__)
#define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
#elif __LITTLE_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__)
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
#elif defined(__BYTE_ORDER__)
#define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
#else
#define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT
#endif
// On Power Architecture, (lock-free) 64-bit atomics require 64-bit hardware:
#if __TBB_WORDSIZE==8
// Do not change the following definition, because TBB itself will use 64-bit atomics in 64-bit builds.
#define __TBB_64BIT_ATOMICS 1
#elif __bgp__
// Do not change the following definition, because this is known 32-bit hardware.
#define __TBB_64BIT_ATOMICS 0
#else
// To enable 64-bit atomics in 32-bit builds, set the value below to 1 instead of 0.
// You must make certain that the program will only use them on actual 64-bit hardware
// (which typically means that the entire program is only executed on such hardware),
// because their implementation involves machine instructions that are illegal elsewhere.
// The setting can be chosen independently per compilation unit,
// which also means that TBB itself does not need to be rebuilt.
// Alternatively (but only for the current architecture and TBB version),
// override the default as a predefined macro when invoking the compiler.
#ifndef __TBB_64BIT_ATOMICS
#define __TBB_64BIT_ATOMICS 0
#endif
#endif
inline int32_t __TBB_machine_cmpswp4 (volatile void *ptr, int32_t value, int32_t comparand )
{
int32_t result;
__asm__ __volatile__("sync\n"
"0:\n\t"
"lwarx %[res],0,%[ptr]\n\t" /* load w/ reservation */
"cmpw %[res],%[cmp]\n\t" /* compare against comparand */
"bne- 1f\n\t" /* exit if not same */
"stwcx. %[val],0,%[ptr]\n\t" /* store new value */
"bne- 0b\n" /* retry if reservation lost */
"1:\n\t" /* the exit */
"isync"
: [res]"=&r"(result)
, "+m"(* (int32_t*) ptr) /* redundant with "memory" */
: [ptr]"r"(ptr)
, [val]"r"(value)
, [cmp]"r"(comparand)
: "memory" /* compiler full fence */
, "cr0" /* clobbered by cmp and/or stwcx. */
);
return result;
}
#if __TBB_WORDSIZE==8
inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, int64_t comparand )
{
int64_t result;
__asm__ __volatile__("sync\n"
"0:\n\t"
"ldarx %[res],0,%[ptr]\n\t" /* load w/ reservation */
"cmpd %[res],%[cmp]\n\t" /* compare against comparand */
"bne- 1f\n\t" /* exit if not same */
"stdcx. %[val],0,%[ptr]\n\t" /* store new value */
"bne- 0b\n" /* retry if reservation lost */
"1:\n\t" /* the exit */
"isync"
: [res]"=&r"(result)
, "+m"(* (int64_t*) ptr) /* redundant with "memory" */
: [ptr]"r"(ptr)
, [val]"r"(value)
, [cmp]"r"(comparand)
: "memory" /* compiler full fence */
, "cr0" /* clobbered by cmp and/or stdcx. */
);
return result;
}
#elif __TBB_64BIT_ATOMICS /* && __TBB_WORDSIZE==4 */
inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, int64_t comparand )
{
int64_t result;
int64_t value_register, comparand_register, result_register; // dummy variables to allocate registers
__asm__ __volatile__("sync\n\t"
"ld %[val],%[valm]\n\t"
"ld %[cmp],%[cmpm]\n"
"0:\n\t"
"ldarx %[res],0,%[ptr]\n\t" /* load w/ reservation */
"cmpd %[res],%[cmp]\n\t" /* compare against comparand */
"bne- 1f\n\t" /* exit if not same */
"stdcx. %[val],0,%[ptr]\n\t" /* store new value */
"bne- 0b\n" /* retry if reservation lost */
"1:\n\t" /* the exit */
"std %[res],%[resm]\n\t"
"isync"
: [resm]"=m"(result)
, [res] "=&r"( result_register)
, [val] "=&r"( value_register)
, [cmp] "=&r"(comparand_register)
, "+m"(* (int64_t*) ptr) /* redundant with "memory" */
: [ptr] "r"(ptr)
, [valm]"m"(value)
, [cmpm]"m"(comparand)
: "memory" /* compiler full fence */
, "cr0" /* clobbered by cmpd and/or stdcx. */
);
return result;
}
#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
#define __TBB_MACHINE_DEFINE_LOAD_STORE(S,ldx,stx,cmpx) \
template <typename T> \
struct machine_load_store<T,S> { \
static inline T load_with_acquire(const volatile T& location) { \
T result; \
__asm__ __volatile__(ldx " %[res],0(%[ptr])\n" \
"0:\n\t" \
cmpx " %[res],%[res]\n\t" \
"bne- 0b\n\t" \
"isync" \
: [res]"=r"(result) \
: [ptr]"b"(&location) /* cannot use register 0 here */ \
, "m"(location) /* redundant with "memory" */ \
: "memory" /* compiler acquire fence */ \
, "cr0" /* clobbered by cmpw/cmpd */); \
return result; \
} \
static inline void store_with_release(volatile T &location, T value) { \
__asm__ __volatile__("lwsync\n\t" \
stx " %[val],0(%[ptr])" \
: "=m"(location) /* redundant with "memory" */ \
: [ptr]"b"(&location) /* cannot use register 0 here */ \
, [val]"r"(value) \
: "memory"/*compiler release fence*/ /*(cr0 not affected)*/); \
} \
}; \
\
template <typename T> \
struct machine_load_store_relaxed<T,S> { \
static inline T load (const __TBB_atomic T& location) { \
T result; \
__asm__ __volatile__(ldx " %[res],0(%[ptr])" \
: [res]"=r"(result) \
: [ptr]"b"(&location) /* cannot use register 0 here */ \
, "m"(location) \
); /*(no compiler fence)*/ /*(cr0 not affected)*/ \
return result; \
} \
static inline void store (__TBB_atomic T &location, T value) { \
__asm__ __volatile__(stx " %[val],0(%[ptr])" \
: "=m"(location) \
: [ptr]"b"(&location) /* cannot use register 0 here */ \
, [val]"r"(value) \
); /*(no compiler fence)*/ /*(cr0 not affected)*/ \
} \
};
namespace tbb {
namespace internal {
__TBB_MACHINE_DEFINE_LOAD_STORE(1,"lbz","stb","cmpw")
__TBB_MACHINE_DEFINE_LOAD_STORE(2,"lhz","sth","cmpw")
__TBB_MACHINE_DEFINE_LOAD_STORE(4,"lwz","stw","cmpw")
#if __TBB_WORDSIZE==8
__TBB_MACHINE_DEFINE_LOAD_STORE(8,"ld" ,"std","cmpd")
#elif __TBB_64BIT_ATOMICS /* && __TBB_WORDSIZE==4 */
template <typename T>
struct machine_load_store<T,8> {
static inline T load_with_acquire(const volatile T& location) {
T result;
T result_register; // dummy variable to allocate a register
__asm__ __volatile__("ld %[res],0(%[ptr])\n\t"
"std %[res],%[resm]\n"
"0:\n\t"
"cmpd %[res],%[res]\n\t"
"bne- 0b\n\t"
"isync"
: [resm]"=m"(result)
, [res]"=&r"(result_register)
: [ptr]"b"(&location) /* cannot use register 0 here */
, "m"(location) /* redundant with "memory" */
: "memory" /* compiler acquire fence */
, "cr0" /* clobbered by cmpd */);
return result;
}
static inline void store_with_release(volatile T &location, T value) {
T value_register; // dummy variable to allocate a register
__asm__ __volatile__("lwsync\n\t"
"ld %[val],%[valm]\n\t"
"std %[val],0(%[ptr])"
: "=m"(location) /* redundant with "memory" */
, [val]"=&r"(value_register)
: [ptr]"b"(&location) /* cannot use register 0 here */
, [valm]"m"(value)
: "memory"/*compiler release fence*/ /*(cr0 not affected)*/);
}
};
struct machine_load_store_relaxed<T,8> {
static inline T load (const volatile T& location) {
T result;
T result_register; // dummy variable to allocate a register
__asm__ __volatile__("ld %[res],0(%[ptr])\n\t"
"std %[res],%[resm]"
: [resm]"=m"(result)
, [res]"=&r"(result_register)
: [ptr]"b"(&location) /* cannot use register 0 here */
, "m"(location)
); /*(no compiler fence)*/ /*(cr0 not affected)*/
return result;
}
static inline void store (volatile T &location, T value) {
T value_register; // dummy variable to allocate a register
__asm__ __volatile__("ld %[val],%[valm]\n\t"
"std %[val],0(%[ptr])"
: "=m"(location)
, [val]"=&r"(value_register)
: [ptr]"b"(&location) /* cannot use register 0 here */
, [valm]"m"(value)
); /*(no compiler fence)*/ /*(cr0 not affected)*/
}
};
#define __TBB_machine_load_store_relaxed_8
#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
}} // namespaces internal, tbb
#undef __TBB_MACHINE_DEFINE_LOAD_STORE
#define __TBB_USE_GENERIC_PART_WORD_CAS 1
#define __TBB_USE_GENERIC_FETCH_ADD 1
#define __TBB_USE_GENERIC_FETCH_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
#define __TBB_control_consistency_helper() __asm__ __volatile__("isync": : :"memory")
#define __TBB_full_memory_fence() __asm__ __volatile__( "sync": : :"memory")
static inline intptr_t __TBB_machine_lg( uintptr_t x ) {
__TBB_ASSERT(x, "__TBB_Log2(0) undefined");
// cntlzd/cntlzw starts counting at 2^63/2^31 (ignoring any higher-order bits), and does not affect cr0
#if __TBB_WORDSIZE==8
__asm__ __volatile__ ("cntlzd %0,%0" : "+r"(x));
return 63-static_cast<intptr_t>(x);
#else
__asm__ __volatile__ ("cntlzw %0,%0" : "+r"(x));
return 31-static_cast<intptr_t>(x);
#endif
}
#define __TBB_Log2(V) __TBB_machine_lg(V)
// Assumes implicit alignment for any 32-bit value
typedef uint32_t __TBB_Flag;
#define __TBB_Flag __TBB_Flag
inline bool __TBB_machine_trylockbyte( __TBB_atomic __TBB_Flag &flag ) {
return __TBB_machine_cmpswp4(&flag,1,0)==0;
}
#define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)

View File

@@ -0,0 +1,133 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#if !defined(__TBB_machine_H) || defined(__TBB_machine_macos_common_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#define __TBB_machine_macos_common_H
#include <sched.h>
#define __TBB_Yield() sched_yield()
// __TBB_HardwareConcurrency
#include <sys/types.h>
#include <sys/sysctl.h>
static inline int __TBB_macos_available_cpu() {
int name[2] = {CTL_HW, HW_AVAILCPU};
int ncpu;
size_t size = sizeof(ncpu);
sysctl( name, 2, &ncpu, &size, NULL, 0 );
return ncpu;
}
#define __TBB_HardwareConcurrency() __TBB_macos_available_cpu()
#ifndef __TBB_full_memory_fence
// TBB has not recognized the architecture (none of the architecture abstraction
// headers was included).
#define __TBB_UnknownArchitecture 1
#endif
#if __TBB_UnknownArchitecture
// Implementation of atomic operations based on OS provided primitives
#include <libkern/OSAtomic.h>
static inline int64_t __TBB_machine_cmpswp8_OsX(volatile void *ptr, int64_t value, int64_t comparand)
{
__TBB_ASSERT( tbb::internal::is_aligned(ptr,8), "address not properly aligned for OS X* atomics");
int64_t* address = (int64_t*)ptr;
while( !OSAtomicCompareAndSwap64Barrier(comparand, value, address) ){
#if __TBB_WORDSIZE==8
int64_t snapshot = *address;
#else
int64_t snapshot = OSAtomicAdd64( 0, address );
#endif
if( snapshot!=comparand ) return snapshot;
}
return comparand;
}
#define __TBB_machine_cmpswp8 __TBB_machine_cmpswp8_OsX
#endif /* __TBB_UnknownArchitecture */
#if __TBB_UnknownArchitecture
#ifndef __TBB_WORDSIZE
#define __TBB_WORDSIZE 4
#endif
#ifdef __TBB_ENDIANNESS
// Already determined based on hardware architecture.
#elif __BIG_ENDIAN__
#define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
#elif __LITTLE_ENDIAN__
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
#else
#define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
#endif
/** As this generic implementation has absolutely no information about underlying
hardware, its performance most likely will be sub-optimal because of full memory
fence usages where a more lightweight synchronization means (or none at all)
could suffice. Thus if you use this header to enable TBB on a new platform,
consider forking it and relaxing below helpers as appropriate. **/
#define __TBB_control_consistency_helper() OSMemoryBarrier()
#define __TBB_acquire_consistency_helper() OSMemoryBarrier()
#define __TBB_release_consistency_helper() OSMemoryBarrier()
#define __TBB_full_memory_fence() OSMemoryBarrier()
static inline int32_t __TBB_machine_cmpswp4(volatile void *ptr, int32_t value, int32_t comparand)
{
__TBB_ASSERT( tbb::internal::is_aligned(ptr,4), "address not properly aligned for OS X* atomics");
int32_t* address = (int32_t*)ptr;
while( !OSAtomicCompareAndSwap32Barrier(comparand, value, address) ){
int32_t snapshot = *address;
if( snapshot!=comparand ) return snapshot;
}
return comparand;
}
static inline int32_t __TBB_machine_fetchadd4(volatile void *ptr, int32_t addend)
{
__TBB_ASSERT( tbb::internal::is_aligned(ptr,4), "address not properly aligned for OS X* atomics");
return OSAtomicAdd32Barrier(addend, (int32_t*)ptr) - addend;
}
static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t addend)
{
__TBB_ASSERT( tbb::internal::is_aligned(ptr,8), "address not properly aligned for OS X* atomics");
return OSAtomicAdd64Barrier(addend, (int64_t*)ptr) - addend;
}
#define __TBB_USE_GENERIC_PART_WORD_CAS 1
#define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 1
#define __TBB_USE_GENERIC_FETCH_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#if __TBB_WORDSIZE == 4
#define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1
#endif
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
#endif /* __TBB_UnknownArchitecture */

View File

@@ -0,0 +1,61 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_mic_common_H
#define __TBB_mic_common_H
#ifndef __TBB_machine_H
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#if ! __TBB_DEFINE_MIC
#error mic_common.h should be included only when building for Intel(R) Many Integrated Core Architecture
#endif
#ifndef __TBB_PREFETCHING
#define __TBB_PREFETCHING 1
#endif
#if __TBB_PREFETCHING
#include <immintrin.h>
#define __TBB_cl_prefetch(p) _mm_prefetch((const char*)p, _MM_HINT_T1)
#define __TBB_cl_evict(p) _mm_clevict(p, _MM_HINT_T1)
#endif
/** Intel(R) Many Integrated Core Architecture does not support mfence and pause instructions **/
#define __TBB_full_memory_fence() __asm__ __volatile__("lock; addl $0,(%%rsp)":::"memory")
#define __TBB_Pause(x) _mm_delay_32(16*(x))
#define __TBB_STEALING_PAUSE 1500/16
#include <sched.h>
#define __TBB_Yield() sched_yield()
// low-level timing intrinsic and its type
#define __TBB_machine_time_stamp() _rdtsc()
typedef uint64_t machine_tsc_t;
/** Specifics **/
#define __TBB_STEALING_ABORT_ON_CONTENTION 1
#define __TBB_YIELD2P 1
#define __TBB_HOARD_NONLOCAL_TASKS 1
#if ! ( __FreeBSD__ || __linux__ )
#error Intel(R) Many Integrated Core Compiler does not define __FreeBSD__ or __linux__ anymore. Check for the __TBB_XXX_BROKEN defined under __FreeBSD__ or __linux__.
#endif /* ! ( __FreeBSD__ || __linux__ ) */
#endif /* __TBB_mic_common_H */

View File

@@ -0,0 +1,171 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#if !defined(__TBB_machine_H) || defined(__TBB_msvc_armv7_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#define __TBB_msvc_armv7_H
#include <intrin.h>
#include <float.h>
#define __TBB_WORDSIZE 4
#define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
#if defined(TBB_WIN32_USE_CL_BUILTINS)
// We can test this on _M_IX86
#pragma intrinsic(_ReadWriteBarrier)
#pragma intrinsic(_mm_mfence)
#define __TBB_compiler_fence() _ReadWriteBarrier()
#define __TBB_full_memory_fence() _mm_mfence()
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
#define __TBB_release_consistency_helper() __TBB_compiler_fence()
#else
//Now __dmb(_ARM_BARRIER_SY) is used for both compiler and memory fences
//This might be changed later after testing
#define __TBB_compiler_fence() __dmb(_ARM_BARRIER_SY)
#define __TBB_full_memory_fence() __dmb(_ARM_BARRIER_SY)
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
#define __TBB_acquire_consistency_helper() __TBB_full_memory_fence()
#define __TBB_release_consistency_helper() __TBB_full_memory_fence()
#endif
//--------------------------------------------------
// Compare and swap
//--------------------------------------------------
/**
* Atomic CAS for 32 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
* @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
* @param value value to assign *ptr to if *ptr==comparand
* @param comparand value to compare with *ptr
* @return value originally in memory at ptr, regardless of success
*/
#define __TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(S,T,F) \
inline T __TBB_machine_cmpswp##S( volatile void *ptr, T value, T comparand ) { \
return _InterlockedCompareExchange##F(reinterpret_cast<volatile T *>(ptr),value,comparand); \
} \
#define __TBB_MACHINE_DEFINE_ATOMICS_FETCHADD(S,T,F) \
inline T __TBB_machine_fetchadd##S( volatile void *ptr, T value ) { \
return _InterlockedExchangeAdd##F(reinterpret_cast<volatile T *>(ptr),value); \
} \
__TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(1,char,8)
__TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(2,short,16)
__TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(4,long,)
__TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(8,__int64,64)
__TBB_MACHINE_DEFINE_ATOMICS_FETCHADD(4,long,)
#if defined(TBB_WIN32_USE_CL_BUILTINS)
// No _InterlockedExchangeAdd64 intrinsic on _M_IX86
#define __TBB_64BIT_ATOMICS 0
#else
__TBB_MACHINE_DEFINE_ATOMICS_FETCHADD(8,__int64,64)
#endif
inline void __TBB_machine_pause (int32_t delay )
{
while(delay>0)
{
__TBB_compiler_fence();
delay--;
}
}
// API to retrieve/update FPU control setting
#define __TBB_CPU_CTL_ENV_PRESENT 1
namespace tbb {
namespace internal {
template <typename T, size_t S>
struct machine_load_store_relaxed {
static inline T load ( const volatile T& location ) {
const T value = location;
/*
* An extra memory barrier is required for errata #761319
* Please see http://infocenter.arm.com/help/topic/com.arm.doc.uan0004a
*/
__TBB_acquire_consistency_helper();
return value;
}
static inline void store ( volatile T& location, T value ) {
location = value;
}
};
class cpu_ctl_env {
private:
unsigned int my_ctl;
public:
bool operator!=( const cpu_ctl_env& ctl ) const { return my_ctl != ctl.my_ctl; }
void get_env() { my_ctl = _control87(0, 0); }
void set_env() const { _control87( my_ctl, ~0U ); }
};
} // namespace internal
} // namespaces tbb
// Machine specific atomic operations
#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
#define __TBB_Pause(V) __TBB_machine_pause(V)
// Use generics for some things
#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 1
#define __TBB_USE_GENERIC_PART_WORD_FETCH_STORE 1
#define __TBB_USE_GENERIC_FETCH_STORE 1
#define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
#if defined(TBB_WIN32_USE_CL_BUILTINS)
#if !__TBB_WIN8UI_SUPPORT
extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
#define __TBB_Yield() SwitchToThread()
#else
#include<thread>
#define __TBB_Yield() std::this_thread::yield()
#endif
#else
#define __TBB_Yield() __yield()
#endif
// Machine specific atomic operations
#define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V)
#define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V)
template <typename T1,typename T2>
inline void __TBB_machine_OR( T1 *operand, T2 addend ) {
_InterlockedOr((long volatile *)operand, (long)addend);
}
template <typename T1,typename T2>
inline void __TBB_machine_AND( T1 *operand, T2 addend ) {
_InterlockedAnd((long volatile *)operand, (long)addend);
}

View File

@@ -0,0 +1,216 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_machine_msvc_ia32_common_H
#define __TBB_machine_msvc_ia32_common_H
#include <intrin.h>
//TODO: consider moving this macro to tbb_config.h and used there MSVC asm is used
#if !_M_X64 || __INTEL_COMPILER
#define __TBB_X86_MSVC_INLINE_ASM_AVAILABLE 1
#if _M_X64
#define __TBB_r(reg_name) r##reg_name
#else
#define __TBB_r(reg_name) e##reg_name
#endif
#else
//MSVC in x64 mode does not accept inline assembler
#define __TBB_X86_MSVC_INLINE_ASM_AVAILABLE 0
#endif
#define __TBB_NO_X86_MSVC_INLINE_ASM_MSG "The compiler being used is not supported (outdated?)"
#if (_MSC_VER >= 1300) || (__INTEL_COMPILER) //Use compiler intrinsic when available
#define __TBB_PAUSE_USE_INTRINSIC 1
#pragma intrinsic(_mm_pause)
namespace tbb { namespace internal { namespace intrinsics { namespace msvc {
static inline void __TBB_machine_pause (uintptr_t delay ) {
for (;delay>0; --delay )
_mm_pause();
}
}}}}
#else
#if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
#error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
#endif
namespace tbb { namespace internal { namespace inline_asm { namespace msvc {
static inline void __TBB_machine_pause (uintptr_t delay ) {
_asm
{
mov __TBB_r(ax), delay
__TBB_L1:
pause
add __TBB_r(ax), -1
jne __TBB_L1
}
return;
}
}}}}
#endif
static inline void __TBB_machine_pause (uintptr_t delay ){
#if __TBB_PAUSE_USE_INTRINSIC
tbb::internal::intrinsics::msvc::__TBB_machine_pause(delay);
#else
tbb::internal::inline_asm::msvc::__TBB_machine_pause(delay);
#endif
}
//TODO: move this function to windows_api.h or to place where it is used
#if (_MSC_VER<1400) && (!_WIN64) && (__TBB_X86_MSVC_INLINE_ASM_AVAILABLE)
static inline void* __TBB_machine_get_current_teb () {
void* pteb;
__asm mov eax, fs:[0x18]
__asm mov pteb, eax
return pteb;
}
#endif
#if ( _MSC_VER>=1400 && !defined(__INTEL_COMPILER) ) || (__INTEL_COMPILER>=1200)
// MSVC did not have this intrinsic prior to VC8.
// ICL 11.1 fails to compile a TBB example if __TBB_Log2 uses the intrinsic.
#define __TBB_LOG2_USE_BSR_INTRINSIC 1
#if _M_X64
#define __TBB_BSR_INTRINSIC _BitScanReverse64
#else
#define __TBB_BSR_INTRINSIC _BitScanReverse
#endif
#pragma intrinsic(__TBB_BSR_INTRINSIC)
namespace tbb { namespace internal { namespace intrinsics { namespace msvc {
inline uintptr_t __TBB_machine_lg( uintptr_t i ){
unsigned long j;
__TBB_BSR_INTRINSIC( &j, i );
return j;
}
}}}}
#else
#if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
#error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
#endif
namespace tbb { namespace internal { namespace inline_asm { namespace msvc {
inline uintptr_t __TBB_machine_lg( uintptr_t i ){
uintptr_t j;
__asm
{
bsr __TBB_r(ax), i
mov j, __TBB_r(ax)
}
return j;
}
}}}}
#endif
static inline intptr_t __TBB_machine_lg( uintptr_t i ) {
#if __TBB_LOG2_USE_BSR_INTRINSIC
return tbb::internal::intrinsics::msvc::__TBB_machine_lg(i);
#else
return tbb::internal::inline_asm::msvc::__TBB_machine_lg(i);
#endif
}
// API to retrieve/update FPU control setting
#define __TBB_CPU_CTL_ENV_PRESENT 1
namespace tbb { namespace internal { class cpu_ctl_env; } }
#if __TBB_X86_MSVC_INLINE_ASM_AVAILABLE
inline void __TBB_get_cpu_ctl_env ( tbb::internal::cpu_ctl_env* ctl ) {
__asm {
__asm mov __TBB_r(ax), ctl
__asm stmxcsr [__TBB_r(ax)]
__asm fstcw [__TBB_r(ax)+4]
}
}
inline void __TBB_set_cpu_ctl_env ( const tbb::internal::cpu_ctl_env* ctl ) {
__asm {
__asm mov __TBB_r(ax), ctl
__asm ldmxcsr [__TBB_r(ax)]
__asm fldcw [__TBB_r(ax)+4]
}
}
#else
extern "C" {
void __TBB_EXPORTED_FUNC __TBB_get_cpu_ctl_env ( tbb::internal::cpu_ctl_env* );
void __TBB_EXPORTED_FUNC __TBB_set_cpu_ctl_env ( const tbb::internal::cpu_ctl_env* );
}
#endif
namespace tbb {
namespace internal {
class cpu_ctl_env {
private:
int mxcsr;
short x87cw;
static const int MXCSR_CONTROL_MASK = ~0x3f; /* all except last six status bits */
public:
bool operator!=( const cpu_ctl_env& ctl ) const { return mxcsr != ctl.mxcsr || x87cw != ctl.x87cw; }
void get_env() {
__TBB_get_cpu_ctl_env( this );
mxcsr &= MXCSR_CONTROL_MASK;
}
void set_env() const { __TBB_set_cpu_ctl_env( this ); }
};
} // namespace internal
} // namespace tbb
#if !__TBB_WIN8UI_SUPPORT
extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
#define __TBB_Yield() SwitchToThread()
#else
#include<thread>
#define __TBB_Yield() std::this_thread::yield()
#endif
#define __TBB_Pause(V) __TBB_machine_pause(V)
#define __TBB_Log2(V) __TBB_machine_lg(V)
#undef __TBB_r
extern "C" {
__int8 __TBB_EXPORTED_FUNC __TBB_machine_try_lock_elided (volatile void* ptr);
void __TBB_EXPORTED_FUNC __TBB_machine_unlock_elided (volatile void* ptr);
// 'pause' instruction aborts HLE/RTM transactions
#if __TBB_PAUSE_USE_INTRINSIC
inline static void __TBB_machine_try_lock_elided_cancel() { _mm_pause(); }
#else
inline static void __TBB_machine_try_lock_elided_cancel() { _asm pause; }
#endif
#if __TBB_TSX_INTRINSICS_PRESENT
#define __TBB_machine_is_in_transaction _xtest
#define __TBB_machine_begin_transaction _xbegin
#define __TBB_machine_end_transaction _xend
// The value (0xFF) below comes from the
// Intel(R) 64 and IA-32 Architectures Optimization Reference Manual 12.4.5 lock not free
#define __TBB_machine_transaction_conflict_abort() _xabort(0xFF)
#else
__int8 __TBB_EXPORTED_FUNC __TBB_machine_is_in_transaction();
unsigned __int32 __TBB_EXPORTED_FUNC __TBB_machine_begin_transaction();
void __TBB_EXPORTED_FUNC __TBB_machine_end_transaction();
void __TBB_EXPORTED_FUNC __TBB_machine_transaction_conflict_abort();
#endif /* __TBB_TSX_INTRINSICS_PRESENT */
}
#endif /* __TBB_machine_msvc_ia32_common_H */

View File

@@ -0,0 +1,203 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#if !defined(__TBB_machine_H) || defined(__TBB_machine_sunos_sparc_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#define __TBB_machine_sunos_sparc_H
#include <stdint.h>
#include <unistd.h>
#define __TBB_WORDSIZE 8
// Big endian is assumed for SPARC.
// While hardware may support page-specific bi-endianness, only big endian pages may be exposed to TBB
#define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
/** To those working on SPARC hardware. Consider relaxing acquire and release
consistency helpers to no-op (as this port covers TSO mode only). **/
#define __TBB_compiler_fence() __asm__ __volatile__ ("": : :"memory")
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
#define __TBB_release_consistency_helper() __TBB_compiler_fence()
#define __TBB_full_memory_fence() __asm__ __volatile__("membar #LoadLoad|#LoadStore|#StoreStore|#StoreLoad": : : "memory")
//--------------------------------------------------
// Compare and swap
//--------------------------------------------------
/**
* Atomic CAS for 32 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
* @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
* @param value value to assign *ptr to if *ptr==comparand
* @param comparand value to compare with *ptr
( @return value originally in memory at ptr, regardless of success
*/
static inline int32_t __TBB_machine_cmpswp4(volatile void *ptr, int32_t value, int32_t comparand ){
int32_t result;
__asm__ __volatile__(
"cas\t[%5],%4,%1"
: "=m"(*(int32_t *)ptr), "=r"(result)
: "m"(*(int32_t *)ptr), "1"(value), "r"(comparand), "r"(ptr)
: "memory");
return result;
}
/**
* Atomic CAS for 64 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
* @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
* @param value value to assign *ptr to if *ptr==comparand
* @param comparand value to compare with *ptr
( @return value originally in memory at ptr, regardless of success
*/
static inline int64_t __TBB_machine_cmpswp8(volatile void *ptr, int64_t value, int64_t comparand ){
int64_t result;
__asm__ __volatile__(
"casx\t[%5],%4,%1"
: "=m"(*(int64_t *)ptr), "=r"(result)
: "m"(*(int64_t *)ptr), "1"(value), "r"(comparand), "r"(ptr)
: "memory");
return result;
}
//---------------------------------------------------
// Fetch and add
//---------------------------------------------------
/**
* Atomic fetch and add for 32 bit values, in this case implemented by continuously checking success of atomicity
* @param ptr pointer to value to add addend to
* @param addened value to add to *ptr
* @return value at ptr before addened was added
*/
static inline int32_t __TBB_machine_fetchadd4(volatile void *ptr, int32_t addend){
int32_t result;
__asm__ __volatile__ (
"0:\t add\t %3, %4, %0\n" // do addition
"\t cas\t [%2], %3, %0\n" // cas to store result in memory
"\t cmp\t %3, %0\n" // check if value from memory is original
"\t bne,a,pn\t %%icc, 0b\n" // if not try again
"\t mov %0, %3\n" // use branch delay slot to move new value in memory to be added
: "=&r"(result), "=m"(*(int32_t *)ptr)
: "r"(ptr), "r"(*(int32_t *)ptr), "r"(addend), "m"(*(int32_t *)ptr)
: "ccr", "memory");
return result;
}
/**
* Atomic fetch and add for 64 bit values, in this case implemented by continuously checking success of atomicity
* @param ptr pointer to value to add addend to
* @param addened value to add to *ptr
* @return value at ptr before addened was added
*/
static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t addend){
int64_t result;
__asm__ __volatile__ (
"0:\t add\t %3, %4, %0\n" // do addition
"\t casx\t [%2], %3, %0\n" // cas to store result in memory
"\t cmp\t %3, %0\n" // check if value from memory is original
"\t bne,a,pn\t %%xcc, 0b\n" // if not try again
"\t mov %0, %3\n" // use branch delay slot to move new value in memory to be added
: "=&r"(result), "=m"(*(int64_t *)ptr)
: "r"(ptr), "r"(*(int64_t *)ptr), "r"(addend), "m"(*(int64_t *)ptr)
: "ccr", "memory");
return result;
}
//--------------------------------------------------------
// Logarithm (base two, integer)
//--------------------------------------------------------
static inline int64_t __TBB_machine_lg( uint64_t x ) {
__TBB_ASSERT(x, "__TBB_Log2(0) undefined");
uint64_t count;
// one hot encode
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
x |= (x >> 32);
// count 1's
__asm__ ("popc %1, %0" : "=r"(count) : "r"(x) );
return count-1;
}
//--------------------------------------------------------
static inline void __TBB_machine_or( volatile void *ptr, uint64_t value ) {
__asm__ __volatile__ (
"0:\t or\t %2, %3, %%g1\n" // do operation
"\t casx\t [%1], %2, %%g1\n" // cas to store result in memory
"\t cmp\t %2, %%g1\n" // check if value from memory is original
"\t bne,a,pn\t %%xcc, 0b\n" // if not try again
"\t mov %%g1, %2\n" // use branch delay slot to move new value in memory to be added
: "=m"(*(int64_t *)ptr)
: "r"(ptr), "r"(*(int64_t *)ptr), "r"(value), "m"(*(int64_t *)ptr)
: "ccr", "g1", "memory");
}
static inline void __TBB_machine_and( volatile void *ptr, uint64_t value ) {
__asm__ __volatile__ (
"0:\t and\t %2, %3, %%g1\n" // do operation
"\t casx\t [%1], %2, %%g1\n" // cas to store result in memory
"\t cmp\t %2, %%g1\n" // check if value from memory is original
"\t bne,a,pn\t %%xcc, 0b\n" // if not try again
"\t mov %%g1, %2\n" // use branch delay slot to move new value in memory to be added
: "=m"(*(int64_t *)ptr)
: "r"(ptr), "r"(*(int64_t *)ptr), "r"(value), "m"(*(int64_t *)ptr)
: "ccr", "g1", "memory");
}
static inline void __TBB_machine_pause( int32_t delay ) {
// do nothing, inlined, doesn't matter
}
// put 0xff in memory location, return memory value,
// generic trylockbyte puts 0x01, however this is fine
// because all that matters is that 0 is unlocked
static inline bool __TBB_machine_trylockbyte(unsigned char &flag){
unsigned char result;
__asm__ __volatile__ (
"ldstub\t [%2], %0\n"
: "=r"(result), "=m"(flag)
: "r"(&flag), "m"(flag)
: "memory");
return result == 0;
}
#define __TBB_USE_GENERIC_PART_WORD_CAS 1
#define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 1
#define __TBB_USE_GENERIC_FETCH_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
// Definition of other functions
#define __TBB_Pause(V) __TBB_machine_pause(V)
#define __TBB_Log2(V) __TBB_machine_lg(V)
#define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)

View File

@@ -0,0 +1,79 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_machine_windows_api_H
#define __TBB_machine_windows_api_H
#if _WIN32 || _WIN64
#if _XBOX
#define NONET
#define NOD3D
#include <xtl.h>
#else // Assume "usual" Windows
#include <windows.h>
#endif // _XBOX
#if _WIN32_WINNT < 0x0600
// The following Windows API function is declared explicitly;
// otherwise it fails to compile by VS2005.
#if !defined(WINBASEAPI) || (_WIN32_WINNT < 0x0501 && _MSC_VER == 1400)
#define __TBB_WINBASEAPI extern "C"
#else
#define __TBB_WINBASEAPI WINBASEAPI
#endif
__TBB_WINBASEAPI BOOL WINAPI TryEnterCriticalSection( LPCRITICAL_SECTION );
__TBB_WINBASEAPI BOOL WINAPI InitializeCriticalSectionAndSpinCount( LPCRITICAL_SECTION, DWORD );
// Overloading WINBASEAPI macro and using local functions missing in Windows XP/2003
#define InitializeCriticalSectionEx inlineInitializeCriticalSectionEx
#define CreateSemaphoreEx inlineCreateSemaphoreEx
#define CreateEventEx inlineCreateEventEx
inline BOOL WINAPI inlineInitializeCriticalSectionEx( LPCRITICAL_SECTION lpCriticalSection, DWORD dwSpinCount, DWORD )
{
return InitializeCriticalSectionAndSpinCount( lpCriticalSection, dwSpinCount );
}
inline HANDLE WINAPI inlineCreateSemaphoreEx( LPSECURITY_ATTRIBUTES lpSemaphoreAttributes, LONG lInitialCount, LONG lMaximumCount, LPCTSTR lpName, DWORD, DWORD )
{
return CreateSemaphore( lpSemaphoreAttributes, lInitialCount, lMaximumCount, lpName );
}
inline HANDLE WINAPI inlineCreateEventEx( LPSECURITY_ATTRIBUTES lpEventAttributes, LPCTSTR lpName, DWORD dwFlags, DWORD )
{
BOOL manual_reset = dwFlags&0x00000001 ? TRUE : FALSE; // CREATE_EVENT_MANUAL_RESET
BOOL initial_set = dwFlags&0x00000002 ? TRUE : FALSE; // CREATE_EVENT_INITIAL_SET
return CreateEvent( lpEventAttributes, manual_reset, initial_set, lpName );
}
#endif
#if defined(RTL_SRWLOCK_INIT)
#ifndef __TBB_USE_SRWLOCK
// TODO: turn it on when bug 1952 will be fixed
#define __TBB_USE_SRWLOCK 0
#endif
#endif
#else
#error tbb/machine/windows_api.h should only be used for Windows based platforms
#endif // _WIN32 || _WIN64
#endif // __TBB_machine_windows_api_H

View File

@@ -0,0 +1,144 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#if !defined(__TBB_machine_H) || defined(__TBB_machine_windows_ia32_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#define __TBB_machine_windows_ia32_H
#include "msvc_ia32_common.h"
#define __TBB_WORDSIZE 4
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
#if __INTEL_COMPILER && (__INTEL_COMPILER < 1100)
#define __TBB_compiler_fence() __asm { __asm nop }
#define __TBB_full_memory_fence() __asm { __asm mfence }
#elif _MSC_VER >= 1300 || __INTEL_COMPILER
#pragma intrinsic(_ReadWriteBarrier)
#pragma intrinsic(_mm_mfence)
#define __TBB_compiler_fence() _ReadWriteBarrier()
#define __TBB_full_memory_fence() _mm_mfence()
#else
#error Unsupported compiler - need to define __TBB_{control,acquire,release}_consistency_helper to support it
#endif
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
#define __TBB_release_consistency_helper() __TBB_compiler_fence()
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
// Workaround for overzealous compiler warnings in /Wp64 mode
#pragma warning (push)
#pragma warning (disable: 4244 4267)
#endif
extern "C" {
__int64 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp8 (volatile void *ptr, __int64 value, __int64 comparand );
__int64 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd8 (volatile void *ptr, __int64 addend );
__int64 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore8 (volatile void *ptr, __int64 value );
void __TBB_EXPORTED_FUNC __TBB_machine_store8 (volatile void *ptr, __int64 value );
__int64 __TBB_EXPORTED_FUNC __TBB_machine_load8 (const volatile void *ptr);
}
//TODO: use _InterlockedXXX intrinsics as they available since VC 2005
#define __TBB_MACHINE_DEFINE_ATOMICS(S,T,U,A,C) \
static inline T __TBB_machine_cmpswp##S ( volatile void * ptr, U value, U comparand ) { \
T result; \
volatile T *p = (T *)ptr; \
__asm \
{ \
__asm mov edx, p \
__asm mov C , value \
__asm mov A , comparand \
__asm lock cmpxchg [edx], C \
__asm mov result, A \
} \
return result; \
} \
\
static inline T __TBB_machine_fetchadd##S ( volatile void * ptr, U addend ) { \
T result; \
volatile T *p = (T *)ptr; \
__asm \
{ \
__asm mov edx, p \
__asm mov A, addend \
__asm lock xadd [edx], A \
__asm mov result, A \
} \
return result; \
}\
\
static inline T __TBB_machine_fetchstore##S ( volatile void * ptr, U value ) { \
T result; \
volatile T *p = (T *)ptr; \
__asm \
{ \
__asm mov edx, p \
__asm mov A, value \
__asm lock xchg [edx], A \
__asm mov result, A \
} \
return result; \
}
__TBB_MACHINE_DEFINE_ATOMICS(1, __int8, __int8, al, cl)
__TBB_MACHINE_DEFINE_ATOMICS(2, __int16, __int16, ax, cx)
__TBB_MACHINE_DEFINE_ATOMICS(4, ptrdiff_t, ptrdiff_t, eax, ecx)
#undef __TBB_MACHINE_DEFINE_ATOMICS
static inline void __TBB_machine_OR( volatile void *operand, __int32 addend ) {
__asm
{
mov eax, addend
mov edx, [operand]
lock or [edx], eax
}
}
static inline void __TBB_machine_AND( volatile void *operand, __int32 addend ) {
__asm
{
mov eax, addend
mov edx, [operand]
lock and [edx], eax
}
}
#define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V)
#define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V)
//TODO: Check if it possible and profitable for IA-32 architecture on (Linux and Windows)
//to use of 64-bit load/store via floating point registers together with full fence
//for sequentially consistent load/store, instead of CAS.
#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
#pragma warning (pop)
#endif // warnings 4244, 4267 are back

View File

@@ -0,0 +1,105 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#if !defined(__TBB_machine_H) || defined(__TBB_machine_windows_intel64_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#define __TBB_machine_windows_intel64_H
#define __TBB_WORDSIZE 8
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
#include <intrin.h>
#include "msvc_ia32_common.h"
//TODO: Use _InterlockedXXX16 intrinsics for 2 byte operations
#if !__INTEL_COMPILER
#pragma intrinsic(_InterlockedOr64)
#pragma intrinsic(_InterlockedAnd64)
#pragma intrinsic(_InterlockedCompareExchange)
#pragma intrinsic(_InterlockedCompareExchange64)
#pragma intrinsic(_InterlockedExchangeAdd)
#pragma intrinsic(_InterlockedExchangeAdd64)
#pragma intrinsic(_InterlockedExchange)
#pragma intrinsic(_InterlockedExchange64)
#endif /* !(__INTEL_COMPILER) */
#if __INTEL_COMPILER && (__INTEL_COMPILER < 1100)
#define __TBB_compiler_fence() __asm { __asm nop }
#define __TBB_full_memory_fence() __asm { __asm mfence }
#elif _MSC_VER >= 1300 || __INTEL_COMPILER
#pragma intrinsic(_ReadWriteBarrier)
#pragma intrinsic(_mm_mfence)
#define __TBB_compiler_fence() _ReadWriteBarrier()
#define __TBB_full_memory_fence() _mm_mfence()
#endif
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
#define __TBB_release_consistency_helper() __TBB_compiler_fence()
// ATTENTION: if you ever change argument types in machine-specific primitives,
// please take care of atomic_word<> specializations in tbb/atomic.h
extern "C" {
__int8 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp1 (volatile void *ptr, __int8 value, __int8 comparand );
__int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd1 (volatile void *ptr, __int8 addend );
__int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore1 (volatile void *ptr, __int8 value );
__int16 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp2 (volatile void *ptr, __int16 value, __int16 comparand );
__int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd2 (volatile void *ptr, __int16 addend );
__int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore2 (volatile void *ptr, __int16 value );
}
inline long __TBB_machine_cmpswp4 (volatile void *ptr, __int32 value, __int32 comparand ) {
return _InterlockedCompareExchange( (long*)ptr, value, comparand );
}
inline long __TBB_machine_fetchadd4 (volatile void *ptr, __int32 addend ) {
return _InterlockedExchangeAdd( (long*)ptr, addend );
}
inline long __TBB_machine_fetchstore4 (volatile void *ptr, __int32 value ) {
return _InterlockedExchange( (long*)ptr, value );
}
inline __int64 __TBB_machine_cmpswp8 (volatile void *ptr, __int64 value, __int64 comparand ) {
return _InterlockedCompareExchange64( (__int64*)ptr, value, comparand );
}
inline __int64 __TBB_machine_fetchadd8 (volatile void *ptr, __int64 addend ) {
return _InterlockedExchangeAdd64( (__int64*)ptr, addend );
}
inline __int64 __TBB_machine_fetchstore8 (volatile void *ptr, __int64 value ) {
return _InterlockedExchange64( (__int64*)ptr, value );
}
#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
inline void __TBB_machine_OR( volatile void *operand, intptr_t addend ) {
_InterlockedOr64((__int64*)operand, addend);
}
inline void __TBB_machine_AND( volatile void *operand, intptr_t addend ) {
_InterlockedAnd64((__int64*)operand, addend);
}
#define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V)
#define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V)

View File

@@ -0,0 +1,119 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
// TODO: revise by comparing with mac_ppc.h
#if !defined(__TBB_machine_H) || defined(__TBB_machine_xbox360_ppc_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#define __TBB_machine_xbox360_ppc_H
#define NONET
#define NOD3D
#include "xtl.h"
#include "ppcintrinsics.h"
#if _MSC_VER >= 1300
extern "C" void _MemoryBarrier();
#pragma intrinsic(_MemoryBarrier)
#define __TBB_control_consistency_helper() __isync()
#define __TBB_acquire_consistency_helper() _MemoryBarrier()
#define __TBB_release_consistency_helper() _MemoryBarrier()
#endif
#define __TBB_full_memory_fence() __sync()
#define __TBB_WORDSIZE 4
#define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
//todo: define __TBB_USE_FENCED_ATOMICS and define acquire/release primitives to maximize performance
inline __int32 __TBB_machine_cmpswp4(volatile void *ptr, __int32 value, __int32 comparand ) {
__sync();
__int32 result = InterlockedCompareExchange((volatile LONG*)ptr, value, comparand);
__isync();
return result;
}
inline __int64 __TBB_machine_cmpswp8(volatile void *ptr, __int64 value, __int64 comparand )
{
__sync();
__int64 result = InterlockedCompareExchange64((volatile LONG64*)ptr, value, comparand);
__isync();
return result;
}
#define __TBB_USE_GENERIC_PART_WORD_CAS 1
#define __TBB_USE_GENERIC_FETCH_ADD 1
#define __TBB_USE_GENERIC_FETCH_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
#pragma optimize( "", off )
inline void __TBB_machine_pause (__int32 delay )
{
for (__int32 i=0; i<delay; i++) {;};
}
#pragma optimize( "", on )
#define __TBB_Yield() Sleep(0)
#define __TBB_Pause(V) __TBB_machine_pause(V)
// This port uses only 2 hardware threads for TBB on XBOX 360.
// Others are left to sound etc.
// Change the following mask to allow TBB use more HW threads.
static const int __TBB_XBOX360_HARDWARE_THREAD_MASK = 0x0C;
static inline int __TBB_XBOX360_DetectNumberOfWorkers()
{
char a[__TBB_XBOX360_HARDWARE_THREAD_MASK]; //compile time assert - at least one bit should be set always
a[0]=0;
return ((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 0) & 1) +
((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 1) & 1) +
((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 2) & 1) +
((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 3) & 1) +
((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 4) & 1) +
((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 5) & 1) + 1; // +1 accomodates for the master thread
}
static inline int __TBB_XBOX360_GetHardwareThreadIndex(int workerThreadIndex)
{
workerThreadIndex %= __TBB_XBOX360_DetectNumberOfWorkers()-1;
int m = __TBB_XBOX360_HARDWARE_THREAD_MASK;
int index = 0;
int skipcount = workerThreadIndex;
while (true)
{
if ((m & 1)!=0)
{
if (skipcount==0) break;
skipcount--;
}
m >>= 1;
index++;
}
return index;
}
#define __TBB_HardwareConcurrency() __TBB_XBOX360_DetectNumberOfWorkers()