Initial commit: Final state of the master project

This commit is contained in:
2017-09-16 09:41:37 +02:00
commit 696180d43b
832 changed files with 169717 additions and 0 deletions

View File

@@ -0,0 +1,202 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB__aggregator_H
#define __TBB__aggregator_H
#if !TBB_PREVIEW_AGGREGATOR
#error Set TBB_PREVIEW_AGGREGATOR before including aggregator.h
#endif
#include "atomic.h"
#include "tbb_profiling.h"
namespace tbb {
namespace interface6 {
using namespace tbb::internal;
class aggregator_operation {
template<typename handler_type> friend class aggregator_ext;
uintptr_t status;
aggregator_operation* my_next;
public:
enum aggregator_operation_status { agg_waiting=0, agg_finished };
aggregator_operation() : status(agg_waiting), my_next(NULL) {}
/// Call start before handling this operation
void start() { call_itt_notify(acquired, &status); }
/// Call finish when done handling this operation
/** The operation will be released to its originating thread, and possibly deleted. */
void finish() { itt_store_word_with_release(status, uintptr_t(agg_finished)); }
aggregator_operation* next() { return itt_hide_load_word(my_next);}
void set_next(aggregator_operation* n) { itt_hide_store_word(my_next, n); }
};
namespace internal {
class basic_operation_base : public aggregator_operation {
friend class basic_handler;
virtual void apply_body() = 0;
public:
basic_operation_base() : aggregator_operation() {}
virtual ~basic_operation_base() {}
};
template<typename Body>
class basic_operation : public basic_operation_base, no_assign {
const Body& my_body;
/*override*/ void apply_body() { my_body(); }
public:
basic_operation(const Body& b) : basic_operation_base(), my_body(b) {}
};
class basic_handler {
public:
basic_handler() {}
void operator()(aggregator_operation* op_list) const {
while (op_list) {
// ITT note: &(op_list->status) tag is used to cover accesses to the operation data.
// The executing thread "acquires" the tag (see start()) and then performs
// the associated operation w/o triggering a race condition diagnostics.
// A thread that created the operation is waiting for its status (see execute_impl()),
// so when this thread is done with the operation, it will "release" the tag
// and update the status (see finish()) to give control back to the waiting thread.
basic_operation_base& request = static_cast<basic_operation_base&>(*op_list);
// IMPORTANT: need to advance op_list to op_list->next() before calling request.finish()
op_list = op_list->next();
request.start();
request.apply_body();
request.finish();
}
}
};
} // namespace internal
//! Aggregator base class and expert interface
/** An aggregator for collecting operations coming from multiple sources and executing
them serially on a single thread. */
template <typename handler_type>
class aggregator_ext : tbb::internal::no_copy {
public:
aggregator_ext(const handler_type& h) : handler_busy(0), handle_operations(h) { mailbox = NULL; }
//! EXPERT INTERFACE: Enter a user-made operation into the aggregator's mailbox.
/** Details of user-made operations must be handled by user-provided handler */
void process(aggregator_operation *op) { execute_impl(*op); }
protected:
/** Place operation in mailbox, then either handle mailbox or wait for the operation
to be completed by a different thread. */
void execute_impl(aggregator_operation& op) {
aggregator_operation* res;
// ITT note: &(op.status) tag is used to cover accesses to this operation. This
// thread has created the operation, and now releases it so that the handler
// thread may handle the associated operation w/o triggering a race condition;
// thus this tag will be acquired just before the operation is handled in the
// handle_operations functor.
call_itt_notify(releasing, &(op.status));
// insert the operation in the queue
do {
// ITT may flag the following line as a race; it is a false positive:
// This is an atomic read; we don't provide itt_hide_load_word for atomics
op.my_next = res = mailbox; // NOT A RACE
} while (mailbox.compare_and_swap(&op, res) != res);
if (!res) { // first in the list; handle the operations
// ITT note: &mailbox tag covers access to the handler_busy flag, which this
// waiting handler thread will try to set before entering handle_operations.
call_itt_notify(acquired, &mailbox);
start_handle_operations();
__TBB_ASSERT(op.status, NULL);
}
else { // not first; wait for op to be ready
call_itt_notify(prepare, &(op.status));
spin_wait_while_eq(op.status, uintptr_t(aggregator_operation::agg_waiting));
itt_load_word_with_acquire(op.status);
}
}
private:
//! An atomically updated list (aka mailbox) of aggregator_operations
atomic<aggregator_operation *> mailbox;
//! Controls thread access to handle_operations
/** Behaves as boolean flag where 0=false, 1=true */
uintptr_t handler_busy;
handler_type handle_operations;
//! Trigger the handling of operations when the handler is free
void start_handle_operations() {
aggregator_operation *pending_operations;
// ITT note: &handler_busy tag covers access to mailbox as it is passed
// between active and waiting handlers. Below, the waiting handler waits until
// the active handler releases, and the waiting handler acquires &handler_busy as
// it becomes the active_handler. The release point is at the end of this
// function, when all operations in mailbox have been handled by the
// owner of this aggregator.
call_itt_notify(prepare, &handler_busy);
// get handler_busy: only one thread can possibly spin here at a time
spin_wait_until_eq(handler_busy, uintptr_t(0));
call_itt_notify(acquired, &handler_busy);
// acquire fence not necessary here due to causality rule and surrounding atomics
__TBB_store_with_release(handler_busy, uintptr_t(1));
// ITT note: &mailbox tag covers access to the handler_busy flag itself.
// Capturing the state of the mailbox signifies that handler_busy has been
// set and a new active handler will now process that list's operations.
call_itt_notify(releasing, &mailbox);
// grab pending_operations
pending_operations = mailbox.fetch_and_store(NULL);
// handle all the operations
handle_operations(pending_operations);
// release the handler
itt_store_word_with_release(handler_busy, uintptr_t(0));
}
};
//! Basic aggregator interface
class aggregator : private aggregator_ext<internal::basic_handler> {
public:
aggregator() : aggregator_ext<internal::basic_handler>(internal::basic_handler()) {}
//! BASIC INTERFACE: Enter a function for exclusive execution by the aggregator.
/** The calling thread stores the function object in a basic_operation and
places the operation in the aggregator's mailbox */
template<typename Body>
void execute(const Body& b) {
internal::basic_operation<Body> op(b);
this->execute_impl(op);
}
};
} // namespace interface6
using interface6::aggregator;
using interface6::aggregator_ext;
using interface6::aggregator_operation;
} // namespace tbb
#endif // __TBB__aggregator_H

View File

@@ -0,0 +1,47 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_aligned_space_H
#define __TBB_aligned_space_H
#include "tbb_stddef.h"
#include "tbb_machine.h"
namespace tbb {
//! Block of space aligned sufficiently to construct an array T with N elements.
/** The elements are not constructed or destroyed by this class.
@ingroup memory_allocation */
template<typename T,size_t N=1>
class aligned_space {
private:
typedef __TBB_TypeWithAlignmentAtLeastAsStrict(T) element_type;
element_type array[(sizeof(T)*N+sizeof(element_type)-1)/sizeof(element_type)];
public:
//! Pointer to beginning of array
T* begin() {return internal::punned_cast<T*>(this);}
//! Pointer to one past last element in array.
T* end() {return begin()+N;}
};
} // namespace tbb
#endif /* __TBB_aligned_space_H */

556
Research/inc/tbb/atomic.h Normal file
View File

@@ -0,0 +1,556 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_atomic_H
#define __TBB_atomic_H
#include <cstddef>
#if _MSC_VER
#define __TBB_LONG_LONG __int64
#else
#define __TBB_LONG_LONG long long
#endif /* _MSC_VER */
#include "tbb_machine.h"
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
// Workaround for overzealous compiler warnings
#pragma warning (push)
#pragma warning (disable: 4244 4267 4512)
#endif
namespace tbb {
//! Specifies memory semantics.
enum memory_semantics {
//! Sequential consistency
full_fence,
//! Acquire
acquire,
//! Release
release,
//! No ordering
relaxed
};
//! @cond INTERNAL
namespace internal {
#if __TBB_ATTRIBUTE_ALIGNED_PRESENT
#define __TBB_DECL_ATOMIC_FIELD(t,f,a) t f __attribute__ ((aligned(a)));
#elif __TBB_DECLSPEC_ALIGN_PRESENT
#define __TBB_DECL_ATOMIC_FIELD(t,f,a) __declspec(align(a)) t f;
#else
#error Do not know syntax for forcing alignment.
#endif
template<size_t S>
struct atomic_rep; // Primary template declared, but never defined.
template<>
struct atomic_rep<1> { // Specialization
typedef int8_t word;
};
template<>
struct atomic_rep<2> { // Specialization
typedef int16_t word;
};
template<>
struct atomic_rep<4> { // Specialization
#if _MSC_VER && !_WIN64
// Work-around that avoids spurious /Wp64 warnings
typedef intptr_t word;
#else
typedef int32_t word;
#endif
};
#if __TBB_64BIT_ATOMICS
template<>
struct atomic_rep<8> { // Specialization
typedef int64_t word;
};
#endif
template<typename value_type, size_t size>
struct aligned_storage;
//the specializations are needed to please MSVC syntax of __declspec(align()) which accept _literal_ constants only
#if __TBB_ATOMIC_CTORS
#define ATOMIC_STORAGE_PARTIAL_SPECIALIZATION(S) \
template<typename value_type> \
struct aligned_storage<value_type,S> { \
__TBB_DECL_ATOMIC_FIELD(value_type,my_value,S) \
aligned_storage() = default ; \
constexpr aligned_storage(value_type value):my_value(value){} \
}; \
#else
#define ATOMIC_STORAGE_PARTIAL_SPECIALIZATION(S) \
template<typename value_type> \
struct aligned_storage<value_type,S> { \
__TBB_DECL_ATOMIC_FIELD(value_type,my_value,S) \
}; \
#endif
template<typename value_type>
struct aligned_storage<value_type,1> {
value_type my_value;
#if __TBB_ATOMIC_CTORS
aligned_storage() = default ;
constexpr aligned_storage(value_type value):my_value(value){}
#endif
};
ATOMIC_STORAGE_PARTIAL_SPECIALIZATION(2)
ATOMIC_STORAGE_PARTIAL_SPECIALIZATION(4)
#if __TBB_64BIT_ATOMICS
ATOMIC_STORAGE_PARTIAL_SPECIALIZATION(8)
#endif
template<size_t Size, memory_semantics M>
struct atomic_traits; // Primary template declared, but not defined.
#define __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(S,M) \
template<> struct atomic_traits<S,M> { \
typedef atomic_rep<S>::word word; \
inline static word compare_and_swap( volatile void* location, word new_value, word comparand ) { \
return __TBB_machine_cmpswp##S##M(location,new_value,comparand); \
} \
inline static word fetch_and_add( volatile void* location, word addend ) { \
return __TBB_machine_fetchadd##S##M(location,addend); \
} \
inline static word fetch_and_store( volatile void* location, word value ) { \
return __TBB_machine_fetchstore##S##M(location,value); \
} \
};
#define __TBB_DECL_ATOMIC_PRIMITIVES(S) \
template<memory_semantics M> \
struct atomic_traits<S,M> { \
typedef atomic_rep<S>::word word; \
inline static word compare_and_swap( volatile void* location, word new_value, word comparand ) { \
return __TBB_machine_cmpswp##S(location,new_value,comparand); \
} \
inline static word fetch_and_add( volatile void* location, word addend ) { \
return __TBB_machine_fetchadd##S(location,addend); \
} \
inline static word fetch_and_store( volatile void* location, word value ) { \
return __TBB_machine_fetchstore##S(location,value); \
} \
};
template<memory_semantics M>
struct atomic_load_store_traits; // Primary template declaration
#define __TBB_DECL_ATOMIC_LOAD_STORE_PRIMITIVES(M) \
template<> struct atomic_load_store_traits<M> { \
template <typename T> \
inline static T load( const volatile T& location ) { \
return __TBB_load_##M( location ); \
} \
template <typename T> \
inline static void store( volatile T& location, T value ) { \
__TBB_store_##M( location, value ); \
} \
}
#if __TBB_USE_FENCED_ATOMICS
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,full_fence)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,full_fence)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,full_fence)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,acquire)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,acquire)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,acquire)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,release)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,release)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,release)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,relaxed)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,relaxed)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,relaxed)
#if __TBB_64BIT_ATOMICS
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,full_fence)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,acquire)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,release)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,relaxed)
#endif
#else /* !__TBB_USE_FENCED_ATOMICS */
__TBB_DECL_ATOMIC_PRIMITIVES(1)
__TBB_DECL_ATOMIC_PRIMITIVES(2)
__TBB_DECL_ATOMIC_PRIMITIVES(4)
#if __TBB_64BIT_ATOMICS
__TBB_DECL_ATOMIC_PRIMITIVES(8)
#endif
#endif /* !__TBB_USE_FENCED_ATOMICS */
__TBB_DECL_ATOMIC_LOAD_STORE_PRIMITIVES(full_fence);
__TBB_DECL_ATOMIC_LOAD_STORE_PRIMITIVES(acquire);
__TBB_DECL_ATOMIC_LOAD_STORE_PRIMITIVES(release);
__TBB_DECL_ATOMIC_LOAD_STORE_PRIMITIVES(relaxed);
//! Additive inverse of 1 for type T.
/** Various compilers issue various warnings if -1 is used with various integer types.
The baroque expression below avoids all the warnings (we hope). */
#define __TBB_MINUS_ONE(T) (T(T(0)-T(1)))
//! Base class that provides basic functionality for atomic<T> without fetch_and_add.
/** Works for any type T that has the same size as an integral type, has a trivial constructor/destructor,
and can be copied/compared by memcpy/memcmp. */
template<typename T>
struct atomic_impl {
protected:
aligned_storage<T,sizeof(T)> my_storage;
private:
//TODO: rechecks on recent versions of gcc if union is still the _only_ way to do a conversion without warnings
//! Union type used to convert type T to underlying integral type.
template<typename value_type>
union converter {
typedef typename atomic_rep<sizeof(value_type)>::word bits_type;
converter(){}
converter(value_type a_value) : value(a_value) {}
value_type value;
bits_type bits;
};
template<typename value_t>
static typename converter<value_t>::bits_type to_bits(value_t value){
return converter<value_t>(value).bits;
}
template<typename value_t>
static value_t to_value(typename converter<value_t>::bits_type bits){
converter<value_t> u;
u.bits = bits;
return u.value;
}
template<typename value_t>
union ptr_converter; //Primary template declared, but never defined.
template<typename value_t>
union ptr_converter<value_t *> {
ptr_converter(){}
ptr_converter(value_t* a_value) : value(a_value) {}
value_t* value;
uintptr_t bits;
};
//TODO: check if making to_bits accepting reference (thus unifying it with to_bits_ref)
//does not hurt performance
template<typename value_t>
static typename converter<value_t>::bits_type & to_bits_ref(value_t& value){
//TODO: this #ifdef is temporary workaround, as union conversion seems to fail
//on suncc for 64 bit types for 32 bit target
#if !__SUNPRO_CC
return *(typename converter<value_t>::bits_type*)ptr_converter<value_t*>(&value).bits;
#else
return *(typename converter<value_t>::bits_type*)(&value);
#endif
}
public:
typedef T value_type;
#if __TBB_ATOMIC_CTORS
atomic_impl() = default ;
constexpr atomic_impl(value_type value):my_storage(value){}
#endif
template<memory_semantics M>
value_type fetch_and_store( value_type value ) {
return to_value<value_type>(
internal::atomic_traits<sizeof(value_type),M>::fetch_and_store( &my_storage.my_value, to_bits(value) )
);
}
value_type fetch_and_store( value_type value ) {
return fetch_and_store<full_fence>(value);
}
template<memory_semantics M>
value_type compare_and_swap( value_type value, value_type comparand ) {
return to_value<value_type>(
internal::atomic_traits<sizeof(value_type),M>::compare_and_swap( &my_storage.my_value, to_bits(value), to_bits(comparand) )
);
}
value_type compare_and_swap( value_type value, value_type comparand ) {
return compare_and_swap<full_fence>(value,comparand);
}
operator value_type() const volatile { // volatile qualifier here for backwards compatibility
return to_value<value_type>(
__TBB_load_with_acquire( to_bits_ref(my_storage.my_value) )
);
}
template<memory_semantics M>
value_type load () const {
return to_value<value_type>(
internal::atomic_load_store_traits<M>::load( to_bits_ref(my_storage.my_value) )
);
}
value_type load () const {
return load<acquire>();
}
template<memory_semantics M>
void store ( value_type value ) {
internal::atomic_load_store_traits<M>::store( to_bits_ref(my_storage.my_value), to_bits(value));
}
void store ( value_type value ) {
store<release>( value );
}
protected:
value_type store_with_release( value_type rhs ) {
//TODO: unify with store<release>
__TBB_store_with_release( to_bits_ref(my_storage.my_value), to_bits(rhs) );
return rhs;
}
};
//! Base class that provides basic functionality for atomic<T> with fetch_and_add.
/** I is the underlying type.
D is the difference type.
StepType should be char if I is an integral type, and T if I is a T*. */
template<typename I, typename D, typename StepType>
struct atomic_impl_with_arithmetic: atomic_impl<I> {
public:
typedef I value_type;
#if __TBB_ATOMIC_CTORS
atomic_impl_with_arithmetic() = default ;
constexpr atomic_impl_with_arithmetic(value_type value): atomic_impl<I>(value){}
#endif
template<memory_semantics M>
value_type fetch_and_add( D addend ) {
return value_type(internal::atomic_traits<sizeof(value_type),M>::fetch_and_add( &this->my_storage.my_value, addend*sizeof(StepType) ));
}
value_type fetch_and_add( D addend ) {
return fetch_and_add<full_fence>(addend);
}
template<memory_semantics M>
value_type fetch_and_increment() {
return fetch_and_add<M>(1);
}
value_type fetch_and_increment() {
return fetch_and_add(1);
}
template<memory_semantics M>
value_type fetch_and_decrement() {
return fetch_and_add<M>(__TBB_MINUS_ONE(D));
}
value_type fetch_and_decrement() {
return fetch_and_add(__TBB_MINUS_ONE(D));
}
public:
value_type operator+=( D value ) {
return fetch_and_add(value)+value;
}
value_type operator-=( D value ) {
// Additive inverse of value computed using binary minus,
// instead of unary minus, for sake of avoiding compiler warnings.
return operator+=(D(0)-value);
}
value_type operator++() {
return fetch_and_add(1)+1;
}
value_type operator--() {
return fetch_and_add(__TBB_MINUS_ONE(D))-1;
}
value_type operator++(int) {
return fetch_and_add(1);
}
value_type operator--(int) {
return fetch_and_add(__TBB_MINUS_ONE(D));
}
};
} /* Internal */
//! @endcond
//! Primary template for atomic.
/** See the Reference for details.
@ingroup synchronization */
template<typename T>
struct atomic: internal::atomic_impl<T> {
#if __TBB_ATOMIC_CTORS
atomic() = default;
constexpr atomic(T arg): internal::atomic_impl<T>(arg) {}
#endif
T operator=( T rhs ) {
// "this" required here in strict ISO C++ because store_with_release is a dependent name
return this->store_with_release(rhs);
}
atomic<T>& operator=( const atomic<T>& rhs ) {this->store_with_release(rhs); return *this;}
};
#if __TBB_ATOMIC_CTORS
#define __TBB_DECL_ATOMIC(T) \
template<> struct atomic<T>: internal::atomic_impl_with_arithmetic<T,T,char> { \
atomic() = default; \
constexpr atomic(T arg): internal::atomic_impl_with_arithmetic<T,T,char>(arg) {} \
\
T operator=( T rhs ) {return store_with_release(rhs);} \
atomic<T>& operator=( const atomic<T>& rhs ) {store_with_release(rhs); return *this;} \
};
#else
#define __TBB_DECL_ATOMIC(T) \
template<> struct atomic<T>: internal::atomic_impl_with_arithmetic<T,T,char> { \
T operator=( T rhs ) {return store_with_release(rhs);} \
atomic<T>& operator=( const atomic<T>& rhs ) {store_with_release(rhs); return *this;} \
};
#endif
#if __TBB_64BIT_ATOMICS
//TODO: consider adding non-default (and atomic) copy constructor for 32bit platform
__TBB_DECL_ATOMIC(__TBB_LONG_LONG)
__TBB_DECL_ATOMIC(unsigned __TBB_LONG_LONG)
#else
// test_atomic will verify that sizeof(long long)==8
#endif
__TBB_DECL_ATOMIC(long)
__TBB_DECL_ATOMIC(unsigned long)
#if _MSC_VER && !_WIN64
#if __TBB_ATOMIC_CTORS
/* Special version of __TBB_DECL_ATOMIC that avoids gratuitous warnings from cl /Wp64 option.
It is identical to __TBB_DECL_ATOMIC(unsigned) except that it replaces operator=(T)
with an operator=(U) that explicitly converts the U to a T. Types T and U should be
type synonyms on the platform. Type U should be the wider variant of T from the
perspective of /Wp64. */
#define __TBB_DECL_ATOMIC_ALT(T,U) \
template<> struct atomic<T>: internal::atomic_impl_with_arithmetic<T,T,char> { \
atomic() = default ; \
constexpr atomic(T arg): internal::atomic_impl_with_arithmetic<T,T,char>(arg) {} \
T operator=( U rhs ) {return store_with_release(T(rhs));} \
atomic<T>& operator=( const atomic<T>& rhs ) {store_with_release(rhs); return *this;} \
};
#else
#define __TBB_DECL_ATOMIC_ALT(T,U) \
template<> struct atomic<T>: internal::atomic_impl_with_arithmetic<T,T,char> { \
T operator=( U rhs ) {return store_with_release(T(rhs));} \
atomic<T>& operator=( const atomic<T>& rhs ) {store_with_release(rhs); return *this;} \
};
#endif
__TBB_DECL_ATOMIC_ALT(unsigned,size_t)
__TBB_DECL_ATOMIC_ALT(int,ptrdiff_t)
#else
__TBB_DECL_ATOMIC(unsigned)
__TBB_DECL_ATOMIC(int)
#endif /* _MSC_VER && !_WIN64 */
__TBB_DECL_ATOMIC(unsigned short)
__TBB_DECL_ATOMIC(short)
__TBB_DECL_ATOMIC(char)
__TBB_DECL_ATOMIC(signed char)
__TBB_DECL_ATOMIC(unsigned char)
#if !_MSC_VER || defined(_NATIVE_WCHAR_T_DEFINED)
__TBB_DECL_ATOMIC(wchar_t)
#endif /* _MSC_VER||!defined(_NATIVE_WCHAR_T_DEFINED) */
//! Specialization for atomic<T*> with arithmetic and operator->.
template<typename T> struct atomic<T*>: internal::atomic_impl_with_arithmetic<T*,ptrdiff_t,T> {
#if __TBB_ATOMIC_CTORS
atomic() = default ;
constexpr atomic(T* arg): internal::atomic_impl_with_arithmetic<T*,ptrdiff_t,T>(arg) {}
#endif
T* operator=( T* rhs ) {
// "this" required here in strict ISO C++ because store_with_release is a dependent name
return this->store_with_release(rhs);
}
atomic<T*>& operator=( const atomic<T*>& rhs ) {
this->store_with_release(rhs); return *this;
}
T* operator->() const {
return (*this);
}
};
//! Specialization for atomic<void*>, for sake of not allowing arithmetic or operator->.
template<> struct atomic<void*>: internal::atomic_impl<void*> {
#if __TBB_ATOMIC_CTORS
atomic() = default ;
constexpr atomic(void* arg): internal::atomic_impl<void*>(arg) {}
#endif
void* operator=( void* rhs ) {
// "this" required here in strict ISO C++ because store_with_release is a dependent name
return this->store_with_release(rhs);
}
atomic<void*>& operator=( const atomic<void*>& rhs ) {
this->store_with_release(rhs); return *this;
}
};
// Helpers to workaround ugly syntax of calling template member function of a
// template class with template argument dependent on template parameters.
template <memory_semantics M, typename T>
T load ( const atomic<T>& a ) { return a.template load<M>(); }
template <memory_semantics M, typename T>
void store ( atomic<T>& a, T value ) { a.template store<M>(value); }
namespace interface6{
//! Make an atomic for use in an initialization (list), as an alternative to zero-initialization or normal assignment.
template<typename T>
atomic<T> make_atomic(T t) {
atomic<T> a;
store<relaxed>(a,t);
return a;
}
}
using interface6::make_atomic;
namespace internal {
template<memory_semantics M, typename T >
void swap(atomic<T> & lhs, atomic<T> & rhs){
T tmp = load<M>(lhs);
store<M>(lhs,load<M>(rhs));
store<M>(rhs,tmp);
}
// only to aid in the gradual conversion of ordinary variables to proper atomics
template<typename T>
inline atomic<T>& as_atomic( T& t ) {
return (atomic<T>&)t;
}
} // namespace tbb::internal
} // namespace tbb
#if _MSC_VER && !__INTEL_COMPILER
#pragma warning (pop)
#endif // warnings 4244, 4267 are back
#endif /* __TBB_atomic_H */

View File

@@ -0,0 +1,159 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_blocked_range_H
#define __TBB_blocked_range_H
#include "tbb_stddef.h"
namespace tbb {
/** \page range_req Requirements on range concept
Class \c R implementing the concept of range must define:
- \code R::R( const R& ); \endcode Copy constructor
- \code R::~R(); \endcode Destructor
- \code bool R::is_divisible() const; \endcode True if range can be partitioned into two subranges
- \code bool R::empty() const; \endcode True if range is empty
- \code R::R( R& r, split ); \endcode Split range \c r into two subranges.
**/
//! A range over which to iterate.
/** @ingroup algorithms */
template<typename Value>
class blocked_range {
public:
//! Type of a value
/** Called a const_iterator for sake of algorithms that need to treat a blocked_range
as an STL container. */
typedef Value const_iterator;
//! Type for size of a range
typedef std::size_t size_type;
//! Construct range with default-constructed values for begin and end.
/** Requires that Value have a default constructor. */
blocked_range() : my_end(), my_begin() {}
//! Construct range over half-open interval [begin,end), with the given grainsize.
blocked_range( Value begin_, Value end_, size_type grainsize_=1 ) :
my_end(end_), my_begin(begin_), my_grainsize(grainsize_)
{
__TBB_ASSERT( my_grainsize>0, "grainsize must be positive" );
}
//! Beginning of range.
const_iterator begin() const {return my_begin;}
//! One past last value in range.
const_iterator end() const {return my_end;}
//! Size of the range
/** Unspecified if end()<begin(). */
size_type size() const {
__TBB_ASSERT( !(end()<begin()), "size() unspecified if end()<begin()" );
return size_type(my_end-my_begin);
}
//! The grain size for this range.
size_type grainsize() const {return my_grainsize;}
//------------------------------------------------------------------------
// Methods that implement Range concept
//------------------------------------------------------------------------
//! True if range is empty.
bool empty() const {return !(my_begin<my_end);}
//! True if range is divisible.
/** Unspecified if end()<begin(). */
bool is_divisible() const {return my_grainsize<size();}
//! Split range.
/** The new Range *this has the second part, the old range r has the first part.
Unspecified if end()<begin() or !is_divisible(). */
blocked_range( blocked_range& r, split ) :
my_end(r.my_end),
my_begin(do_split(r, split())),
my_grainsize(r.my_grainsize)
{
// only comparison 'less than' is required from values of blocked_range objects
__TBB_ASSERT( !(my_begin < r.my_end) && !(r.my_end < my_begin), "blocked_range has been split incorrectly" );
}
#if __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES
//! Static field to support proportional split
static const bool is_splittable_in_proportion = true;
//! Split range.
/** The new Range *this has the second part split according to specified proportion, the old range r has the first part.
Unspecified if end()<begin() or !is_divisible(). */
blocked_range( blocked_range& r, proportional_split& proportion ) :
my_end(r.my_end),
my_begin(do_split(r, proportion)),
my_grainsize(r.my_grainsize)
{
// only comparison 'less than' is required from values of blocked_range objects
__TBB_ASSERT( !(my_begin < r.my_end) && !(r.my_end < my_begin), "blocked_range has been split incorrectly" );
}
#endif /* __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES */
private:
/** NOTE: my_end MUST be declared before my_begin, otherwise the forking constructor will break. */
Value my_end;
Value my_begin;
size_type my_grainsize;
//! Auxiliary function used by forking constructor.
/** Using this function lets us not require that Value support assignment or default construction. */
static Value do_split( blocked_range& r, split )
{
__TBB_ASSERT( r.is_divisible(), "cannot split blocked_range that is not divisible" );
Value middle = r.my_begin + (r.my_end - r.my_begin) / 2u;
r.my_end = middle;
return middle;
}
#if __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES
static Value do_split( blocked_range& r, proportional_split& proportion )
{
__TBB_ASSERT( r.is_divisible(), "cannot split blocked_range that is not divisible" );
// usage of 32-bit floating point arithmetic is not enough to handle ranges of
// more than 2^24 iterations accurately. However, even on ranges with 2^64
// iterations the computational error approximately equals to 0.000001% which
// makes small impact on uniform distribution of such range's iterations (assuming
// all iterations take equal time to complete). See 'test_partitioner_whitebox'
// for implementation of an exact split algorithm
size_type right_part = size_type(float(r.size()) * float(proportion.right())
/ float(proportion.left() + proportion.right()) + 0.5f);
return r.my_end = Value(r.my_end - right_part);
}
#endif /* __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES */
template<typename RowValue, typename ColValue>
friend class blocked_range2d;
template<typename RowValue, typename ColValue, typename PageValue>
friend class blocked_range3d;
};
} // namespace tbb
#endif /* __TBB_blocked_range_H */

View File

@@ -0,0 +1,108 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_blocked_range2d_H
#define __TBB_blocked_range2d_H
#include "tbb_stddef.h"
#include "blocked_range.h"
namespace tbb {
//! A 2-dimensional range that models the Range concept.
/** @ingroup algorithms */
template<typename RowValue, typename ColValue=RowValue>
class blocked_range2d {
public:
//! Type for size of an iteration range
typedef blocked_range<RowValue> row_range_type;
typedef blocked_range<ColValue> col_range_type;
private:
row_range_type my_rows;
col_range_type my_cols;
public:
blocked_range2d( RowValue row_begin, RowValue row_end, typename row_range_type::size_type row_grainsize,
ColValue col_begin, ColValue col_end, typename col_range_type::size_type col_grainsize ) :
my_rows(row_begin,row_end,row_grainsize),
my_cols(col_begin,col_end,col_grainsize)
{
}
blocked_range2d( RowValue row_begin, RowValue row_end,
ColValue col_begin, ColValue col_end ) :
my_rows(row_begin,row_end),
my_cols(col_begin,col_end)
{
}
//! True if range is empty
bool empty() const {
// Yes, it is a logical OR here, not AND.
return my_rows.empty() || my_cols.empty();
}
//! True if range is divisible into two pieces.
bool is_divisible() const {
return my_rows.is_divisible() || my_cols.is_divisible();
}
blocked_range2d( blocked_range2d& r, split ) :
my_rows(r.my_rows),
my_cols(r.my_cols)
{
split split_obj;
do_split(r, split_obj);
}
#if __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES
//! Static field to support proportional split
static const bool is_splittable_in_proportion = true;
blocked_range2d( blocked_range2d& r, proportional_split& proportion ) :
my_rows(r.my_rows),
my_cols(r.my_cols)
{
do_split(r, proportion);
}
#endif /* __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES */
template <typename Split>
void do_split( blocked_range2d& r, Split& split_obj )
{
if( my_rows.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_rows.grainsize()) ) {
my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj);
} else {
my_rows.my_begin = row_range_type::do_split(r.my_rows, split_obj);
}
}
//! The rows of the iteration space
const row_range_type& rows() const {return my_rows;}
//! The columns of the iteration space
const col_range_type& cols() const {return my_cols;}
};
} // namespace tbb
#endif /* __TBB_blocked_range2d_H */

View File

@@ -0,0 +1,128 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_blocked_range3d_H
#define __TBB_blocked_range3d_H
#include "tbb_stddef.h"
#include "blocked_range.h"
namespace tbb {
//! A 3-dimensional range that models the Range concept.
/** @ingroup algorithms */
template<typename PageValue, typename RowValue=PageValue, typename ColValue=RowValue>
class blocked_range3d {
public:
//! Type for size of an iteration range
typedef blocked_range<PageValue> page_range_type;
typedef blocked_range<RowValue> row_range_type;
typedef blocked_range<ColValue> col_range_type;
private:
page_range_type my_pages;
row_range_type my_rows;
col_range_type my_cols;
public:
blocked_range3d( PageValue page_begin, PageValue page_end,
RowValue row_begin, RowValue row_end,
ColValue col_begin, ColValue col_end ) :
my_pages(page_begin,page_end),
my_rows(row_begin,row_end),
my_cols(col_begin,col_end)
{
}
blocked_range3d( PageValue page_begin, PageValue page_end, typename page_range_type::size_type page_grainsize,
RowValue row_begin, RowValue row_end, typename row_range_type::size_type row_grainsize,
ColValue col_begin, ColValue col_end, typename col_range_type::size_type col_grainsize ) :
my_pages(page_begin,page_end,page_grainsize),
my_rows(row_begin,row_end,row_grainsize),
my_cols(col_begin,col_end,col_grainsize)
{
}
//! True if range is empty
bool empty() const {
// Yes, it is a logical OR here, not AND.
return my_pages.empty() || my_rows.empty() || my_cols.empty();
}
//! True if range is divisible into two pieces.
bool is_divisible() const {
return my_pages.is_divisible() || my_rows.is_divisible() || my_cols.is_divisible();
}
blocked_range3d( blocked_range3d& r, split ) :
my_pages(r.my_pages),
my_rows(r.my_rows),
my_cols(r.my_cols)
{
split split_obj;
do_split(r, split_obj);
}
#if __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES
//! Static field to support proportional split
static const bool is_splittable_in_proportion = true;
blocked_range3d( blocked_range3d& r, proportional_split& proportion ) :
my_pages(r.my_pages),
my_rows(r.my_rows),
my_cols(r.my_cols)
{
do_split(r, proportion);
}
#endif /* __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES */
template <typename Split>
void do_split( blocked_range3d& r, Split& split_obj)
{
if ( my_pages.size()*double(my_rows.grainsize()) < my_rows.size()*double(my_pages.grainsize()) ) {
if ( my_rows.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_rows.grainsize()) ) {
my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj);
} else {
my_rows.my_begin = row_range_type::do_split(r.my_rows, split_obj);
}
} else {
if ( my_pages.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_pages.grainsize()) ) {
my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj);
} else {
my_pages.my_begin = page_range_type::do_split(r.my_pages, split_obj);
}
}
}
//! The pages of the iteration space
const page_range_type& pages() const {return my_pages;}
//! The rows of the iteration space
const row_range_type& rows() const {return my_rows;}
//! The columns of the iteration space
const col_range_type& cols() const {return my_cols;}
};
} // namespace tbb
#endif /* __TBB_blocked_range3d_H */

View File

@@ -0,0 +1,137 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_cache_aligned_allocator_H
#define __TBB_cache_aligned_allocator_H
#include <new>
#include "tbb_stddef.h"
#if __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
#include <utility> // std::forward
#endif
namespace tbb {
//! @cond INTERNAL
namespace internal {
//! Cache/sector line size.
/** @ingroup memory_allocation */
size_t __TBB_EXPORTED_FUNC NFS_GetLineSize();
//! Allocate memory on cache/sector line boundary.
/** @ingroup memory_allocation */
void* __TBB_EXPORTED_FUNC NFS_Allocate( size_t n_element, size_t element_size, void* hint );
//! Free memory allocated by NFS_Allocate.
/** Freeing a NULL pointer is allowed, but has no effect.
@ingroup memory_allocation */
void __TBB_EXPORTED_FUNC NFS_Free( void* );
}
//! @endcond
#if _MSC_VER && !defined(__INTEL_COMPILER)
// Workaround for erroneous "unreferenced parameter" warning in method destroy.
#pragma warning (push)
#pragma warning (disable: 4100)
#endif
//! Meets "allocator" requirements of ISO C++ Standard, Section 20.1.5
/** The members are ordered the same way they are in section 20.4.1
of the ISO C++ standard.
@ingroup memory_allocation */
template<typename T>
class cache_aligned_allocator {
public:
typedef typename internal::allocator_type<T>::value_type value_type;
typedef value_type* pointer;
typedef const value_type* const_pointer;
typedef value_type& reference;
typedef const value_type& const_reference;
typedef size_t size_type;
typedef ptrdiff_t difference_type;
template<typename U> struct rebind {
typedef cache_aligned_allocator<U> other;
};
cache_aligned_allocator() throw() {}
cache_aligned_allocator( const cache_aligned_allocator& ) throw() {}
template<typename U> cache_aligned_allocator(const cache_aligned_allocator<U>&) throw() {}
pointer address(reference x) const {return &x;}
const_pointer address(const_reference x) const {return &x;}
//! Allocate space for n objects, starting on a cache/sector line.
pointer allocate( size_type n, const void* hint=0 ) {
// The "hint" argument is always ignored in NFS_Allocate thus const_cast shouldn't hurt
return pointer(internal::NFS_Allocate( n, sizeof(value_type), const_cast<void*>(hint) ));
}
//! Free block of memory that starts on a cache line
void deallocate( pointer p, size_type ) {
internal::NFS_Free(p);
}
//! Largest value for which method allocate might succeed.
size_type max_size() const throw() {
return (~size_t(0)-internal::NFS_MaxLineSize)/sizeof(value_type);
}
//! Copy-construct value at location pointed to by p.
#if __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
template<typename U, typename... Args>
void construct(U *p, Args&&... args)
{ ::new((void *)p) U(std::forward<Args>(args)...); }
#else // __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
#if __TBB_CPP11_RVALUE_REF_PRESENT
void construct( pointer p, value_type&& value ) {::new((void*)(p)) value_type(std::move(value));}
#endif
void construct( pointer p, const value_type& value ) {::new((void*)(p)) value_type(value);}
#endif // __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
//! Destroy value at location pointed to by p.
void destroy( pointer p ) {p->~value_type();}
};
#if _MSC_VER && !defined(__INTEL_COMPILER)
#pragma warning (pop)
#endif // warning 4100 is back
//! Analogous to std::allocator<void>, as defined in ISO C++ Standard, Section 20.4.1
/** @ingroup memory_allocation */
template<>
class cache_aligned_allocator<void> {
public:
typedef void* pointer;
typedef const void* const_pointer;
typedef void value_type;
template<typename U> struct rebind {
typedef cache_aligned_allocator<U> other;
};
};
template<typename T, typename U>
inline bool operator==( const cache_aligned_allocator<T>&, const cache_aligned_allocator<U>& ) {return true;}
template<typename T, typename U>
inline bool operator!=( const cache_aligned_allocator<T>&, const cache_aligned_allocator<U>& ) {return false;}
} // namespace tbb
#endif /* __TBB_cache_aligned_allocator_H */

View File

@@ -0,0 +1,72 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_combinable_H
#define __TBB_combinable_H
#include "enumerable_thread_specific.h"
#include "cache_aligned_allocator.h"
namespace tbb {
/** \name combinable
**/
//@{
//! Thread-local storage with optional reduction
/** @ingroup containers */
template <typename T>
class combinable {
private:
typedef typename tbb::cache_aligned_allocator<T> my_alloc;
typedef typename tbb::enumerable_thread_specific<T, my_alloc, ets_no_key> my_ets_type;
my_ets_type my_ets;
public:
combinable() { }
template <typename finit>
combinable( finit _finit) : my_ets(_finit) { }
//! destructor
~combinable() {
}
combinable(const combinable& other) : my_ets(other.my_ets) { }
combinable & operator=( const combinable & other) { my_ets = other.my_ets; return *this; }
void clear() { my_ets.clear(); }
T& local() { return my_ets.local(); }
T& local(bool & exists) { return my_ets.local(exists); }
// combine_func_t has signature T(T,T) or T(const T&, const T&)
template <typename combine_func_t>
T combine(combine_func_t f_combine) { return my_ets.combine(f_combine); }
// combine_func_t has signature void(T) or void(const T&)
template <typename combine_func_t>
void combine_each(combine_func_t f_combine) { my_ets.combine_each(f_combine); }
};
} // namespace tbb
#endif /* __TBB_combinable_H */

View File

@@ -0,0 +1,476 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_condition_variable_H
#define __TBB_condition_variable_H
#if _WIN32||_WIN64
#include "../machine/windows_api.h"
namespace tbb {
namespace interface5 {
namespace internal {
struct condition_variable_using_event
{
//! Event for blocking waiting threads.
HANDLE event;
//! Protects invariants involving n_waiters, release_count, and epoch.
CRITICAL_SECTION mutex;
//! Number of threads waiting on this condition variable
int n_waiters;
//! Number of threads remaining that should no longer wait on this condition variable.
int release_count;
//! To keep threads from waking up prematurely with earlier signals.
unsigned epoch;
};
}}} // namespace tbb::interface5::internal
#ifndef CONDITION_VARIABLE_INIT
typedef void* CONDITION_VARIABLE;
typedef CONDITION_VARIABLE* PCONDITION_VARIABLE;
#endif
#else /* if not _WIN32||_WIN64 */
#include <errno.h> // some systems need it for ETIMEDOUT
#include <pthread.h>
#if __linux__
#include <ctime>
#else /* generic Unix */
#include <sys/time.h>
#endif
#endif /* _WIN32||_WIN64 */
#include "../tbb_stddef.h"
#include "../mutex.h"
#include "../tbb_thread.h"
#include "../tbb_exception.h"
#include "../tbb_profiling.h"
namespace tbb {
namespace interface5 {
// C++0x standard working draft 30.4.3
// Lock tag types
struct defer_lock_t { }; //! do not acquire ownership of the mutex
struct try_to_lock_t { }; //! try to acquire ownership of the mutex without blocking
struct adopt_lock_t { }; //! assume the calling thread has already
const defer_lock_t defer_lock = {};
const try_to_lock_t try_to_lock = {};
const adopt_lock_t adopt_lock = {};
// C++0x standard working draft 30.4.3.1
//! lock_guard
template<typename M>
class lock_guard : tbb::internal::no_copy {
public:
//! mutex type
typedef M mutex_type;
//! Constructor
/** precondition: If mutex_type is not a recursive mutex, the calling thread
does not own the mutex m. */
explicit lock_guard(mutex_type& m) : pm(m) {m.lock();}
//! Adopt_lock constructor
/** precondition: the calling thread owns the mutex m. */
lock_guard(mutex_type& m, adopt_lock_t) : pm(m) {}
//! Destructor
~lock_guard() { pm.unlock(); }
private:
mutex_type& pm;
};
// C++0x standard working draft 30.4.3.2
//! unique_lock
template<typename M>
class unique_lock : tbb::internal::no_copy {
friend class condition_variable;
public:
typedef M mutex_type;
// 30.4.3.2.1 construct/copy/destroy
// NB: Without constructors that take an r-value reference to a unique_lock, the following constructor is of little use.
//! Constructor
/** postcondition: pm==0 && owns==false */
unique_lock() : pm(NULL), owns(false) {}
//! Constructor
/** precondition: if mutex_type is not a recursive mutex, the calling thread
does not own the mutex m. If the precondition is not met, a deadlock occurs.
postcondition: pm==&m and owns==true */
explicit unique_lock(mutex_type& m) : pm(&m) {m.lock(); owns=true;}
//! Defer_lock constructor
/** postcondition: pm==&m and owns==false */
unique_lock(mutex_type& m, defer_lock_t) : pm(&m), owns(false) {}
//! Try_to_lock constructor
/** precondition: if mutex_type is not a recursive mutex, the calling thread
does not own the mutex m. If the precondition is not met, a deadlock occurs.
postcondition: pm==&m and owns==res where res is the value returned by
the call to m.try_lock(). */
unique_lock(mutex_type& m, try_to_lock_t) : pm(&m) {owns = m.try_lock();}
//! Adopt_lock constructor
/** precondition: the calling thread owns the mutex. If it does not, mutex->unlock() would fail.
postcondition: pm==&m and owns==true */
unique_lock(mutex_type& m, adopt_lock_t) : pm(&m), owns(true) {}
//! Timed unique_lock acquisition.
/** To avoid requiring support for namespace chrono, this method deviates from the working draft in that
it uses tbb::tick_count::interval_t to specify the time duration. */
unique_lock(mutex_type& m, const tick_count::interval_t &i) : pm(&m) {owns = try_lock_for( i );}
#if __TBB_CPP11_RVALUE_REF_PRESENT
//! Move constructor
/** postconditions: pm == src_p.pm and owns == src_p.owns (where src_p is the state of src just prior to this
construction), src.pm == 0 and src.owns == false. */
unique_lock(unique_lock && src): pm(NULL), owns(false) {this->swap(src);}
//! Move assignment
/** effects: If owns calls pm->unlock().
Postconditions: pm == src_p.pm and owns == src_p.owns (where src_p is the state of src just prior to this
assignment), src.pm == 0 and src.owns == false. */
unique_lock& operator=(unique_lock && src) {
if (owns)
this->unlock();
pm = NULL;
this->swap(src);
return *this;
}
#endif // __TBB_CPP11_RVALUE_REF_PRESENT
//! Destructor
~unique_lock() { if( owns ) pm->unlock(); }
// 30.4.3.2.2 locking
//! Lock the mutex and own it.
void lock() {
if( pm ) {
if( !owns ) {
pm->lock();
owns = true;
} else
throw_exception_v4( tbb::internal::eid_possible_deadlock );
} else
throw_exception_v4( tbb::internal::eid_operation_not_permitted );
__TBB_ASSERT( owns, NULL );
}
//! Try to lock the mutex.
/** If successful, note that this lock owns it. Otherwise, set it false. */
bool try_lock() {
if( pm ) {
if( !owns )
owns = pm->try_lock();
else
throw_exception_v4( tbb::internal::eid_possible_deadlock );
} else
throw_exception_v4( tbb::internal::eid_operation_not_permitted );
return owns;
}
//! Try to lock the mutex.
bool try_lock_for( const tick_count::interval_t &i );
//! Unlock the mutex
/** And note that this lock no longer owns it. */
void unlock() {
if( owns ) {
pm->unlock();
owns = false;
} else
throw_exception_v4( tbb::internal::eid_operation_not_permitted );
__TBB_ASSERT( !owns, NULL );
}
// 30.4.3.2.3 modifiers
//! Swap the two unique locks
void swap(unique_lock& u) {
mutex_type* t_pm = u.pm; u.pm = pm; pm = t_pm;
bool t_owns = u.owns; u.owns = owns; owns = t_owns;
}
//! Release control over the mutex.
mutex_type* release() {
mutex_type* o_pm = pm;
pm = NULL;
owns = false;
return o_pm;
}
// 30.4.3.2.4 observers
//! Does this lock own the mutex?
bool owns_lock() const { return owns; }
// TODO: Un-comment 'explicit' when the last non-C++0x compiler support is dropped
//! Does this lock own the mutex?
/*explicit*/ operator bool() const { return owns; }
//! Return the mutex that this lock currently has.
mutex_type* mutex() const { return pm; }
private:
mutex_type* pm;
bool owns;
};
template<typename M>
bool unique_lock<M>::try_lock_for( const tick_count::interval_t &i)
{
const int unique_lock_tick = 100; /* microseconds; 0.1 milliseconds */
// the smallest wait-time is 0.1 milliseconds.
bool res = pm->try_lock();
int duration_in_micro;
if( !res && (duration_in_micro=int(i.seconds()*1e6))>unique_lock_tick ) {
tick_count::interval_t i_100( double(unique_lock_tick)/1e6 /* seconds */); // 100 microseconds = 0.1*10E-3
do {
this_tbb_thread::sleep(i_100); // sleep for 100 micro seconds
duration_in_micro -= unique_lock_tick;
res = pm->try_lock();
} while( !res && duration_in_micro>unique_lock_tick );
}
return (owns=res);
}
//! Swap the two unique locks that have the mutexes of same type
template<typename M>
void swap(unique_lock<M>& x, unique_lock<M>& y) { x.swap( y ); }
namespace internal {
#if _WIN32||_WIN64
union condvar_impl_t {
condition_variable_using_event cv_event;
CONDITION_VARIABLE cv_native;
};
void __TBB_EXPORTED_FUNC internal_initialize_condition_variable( condvar_impl_t& cv );
void __TBB_EXPORTED_FUNC internal_destroy_condition_variable( condvar_impl_t& cv );
void __TBB_EXPORTED_FUNC internal_condition_variable_notify_one( condvar_impl_t& cv );
void __TBB_EXPORTED_FUNC internal_condition_variable_notify_all( condvar_impl_t& cv );
bool __TBB_EXPORTED_FUNC internal_condition_variable_wait( condvar_impl_t& cv, mutex* mtx, const tick_count::interval_t* i = NULL );
#else /* if !(_WIN32||_WIN64), i.e., POSIX threads */
typedef pthread_cond_t condvar_impl_t;
#endif
} // namespace internal
//! cv_status
/** C++0x standard working draft 30.5 */
enum cv_status { no_timeout, timeout };
//! condition variable
/** C++0x standard working draft 30.5.1
@ingroup synchronization */
class condition_variable : tbb::internal::no_copy {
public:
//! Constructor
condition_variable() {
#if _WIN32||_WIN64
internal_initialize_condition_variable( my_cv );
#else
pthread_cond_init( &my_cv, NULL );
#endif
}
//! Destructor
~condition_variable() {
//precondition: There shall be no thread blocked on *this.
#if _WIN32||_WIN64
internal_destroy_condition_variable( my_cv );
#else
pthread_cond_destroy( &my_cv );
#endif
}
//! Notify one thread and wake it up
void notify_one() {
#if _WIN32||_WIN64
internal_condition_variable_notify_one( my_cv );
#else
pthread_cond_signal( &my_cv );
#endif
}
//! Notify all threads
void notify_all() {
#if _WIN32||_WIN64
internal_condition_variable_notify_all( my_cv );
#else
pthread_cond_broadcast( &my_cv );
#endif
}
//! Release the mutex associated with the lock and wait on this condition variable
void wait(unique_lock<mutex>& lock);
//! Wait on this condition variable while pred is false
template <class Predicate>
void wait(unique_lock<mutex>& lock, Predicate pred) {
while( !pred() )
wait( lock );
}
//! Timed version of wait()
cv_status wait_for(unique_lock<mutex>& lock, const tick_count::interval_t &i );
//! Timed version of the predicated wait
/** The loop terminates when pred() returns true or when the time duration specified by rel_time (i) has elapsed. */
template<typename Predicate>
bool wait_for(unique_lock<mutex>& lock, const tick_count::interval_t &i, Predicate pred)
{
while( !pred() ) {
cv_status st = wait_for( lock, i );
if( st==timeout )
return pred();
}
return true;
}
// C++0x standard working draft. 30.2.3
typedef internal::condvar_impl_t* native_handle_type;
native_handle_type native_handle() { return (native_handle_type) &my_cv; }
private:
internal::condvar_impl_t my_cv;
};
#if _WIN32||_WIN64
inline void condition_variable::wait( unique_lock<mutex>& lock )
{
__TBB_ASSERT( lock.owns, NULL );
lock.owns = false;
if( !internal_condition_variable_wait( my_cv, lock.mutex() ) ) {
int ec = GetLastError();
// on Windows 7, SleepConditionVariableCS() may return ERROR_TIMEOUT while the doc says it returns WAIT_TIMEOUT
__TBB_ASSERT_EX( ec!=WAIT_TIMEOUT&&ec!=ERROR_TIMEOUT, NULL );
lock.owns = true;
throw_exception_v4( tbb::internal::eid_condvar_wait_failed );
}
lock.owns = true;
}
inline cv_status condition_variable::wait_for( unique_lock<mutex>& lock, const tick_count::interval_t& i )
{
cv_status rc = no_timeout;
__TBB_ASSERT( lock.owns, NULL );
lock.owns = false;
// condvar_wait could be SleepConditionVariableCS (or SleepConditionVariableSRW) or our own pre-vista cond_var_wait()
if( !internal_condition_variable_wait( my_cv, lock.mutex(), &i ) ) {
int ec = GetLastError();
if( ec==WAIT_TIMEOUT || ec==ERROR_TIMEOUT )
rc = timeout;
else {
lock.owns = true;
throw_exception_v4( tbb::internal::eid_condvar_wait_failed );
}
}
lock.owns = true;
return rc;
}
#else /* !(_WIN32||_WIN64) */
inline void condition_variable::wait( unique_lock<mutex>& lock )
{
__TBB_ASSERT( lock.owns, NULL );
lock.owns = false;
if( pthread_cond_wait( &my_cv, lock.mutex()->native_handle() ) ) {
lock.owns = true;
throw_exception_v4( tbb::internal::eid_condvar_wait_failed );
}
// upon successful return, the mutex has been locked and is owned by the calling thread.
lock.owns = true;
}
inline cv_status condition_variable::wait_for( unique_lock<mutex>& lock, const tick_count::interval_t& i )
{
#if __linux__
struct timespec req;
double sec = i.seconds();
clock_gettime( CLOCK_REALTIME, &req );
req.tv_sec += static_cast<long>(sec);
req.tv_nsec += static_cast<long>( (sec - static_cast<long>(sec))*1e9 );
#else /* generic Unix */
struct timeval tv;
struct timespec req;
double sec = i.seconds();
int status = gettimeofday(&tv, NULL);
__TBB_ASSERT_EX( status==0, "gettimeofday failed" );
req.tv_sec = tv.tv_sec + static_cast<long>(sec);
req.tv_nsec = tv.tv_usec*1000 + static_cast<long>( (sec - static_cast<long>(sec))*1e9 );
#endif /*(choice of OS) */
if( req.tv_nsec>=1e9 ) {
req.tv_sec += 1;
req.tv_nsec -= static_cast<long int>(1e9);
}
__TBB_ASSERT( 0<=req.tv_nsec && req.tv_nsec<1e9, NULL );
int ec;
cv_status rc = no_timeout;
__TBB_ASSERT( lock.owns, NULL );
lock.owns = false;
if( ( ec=pthread_cond_timedwait( &my_cv, lock.mutex()->native_handle(), &req ) ) ) {
if( ec==ETIMEDOUT )
rc = timeout;
else {
__TBB_ASSERT( lock.try_lock()==false, NULL );
lock.owns = true;
throw_exception_v4( tbb::internal::eid_condvar_wait_failed );
}
}
lock.owns = true;
return rc;
}
#endif /* !(_WIN32||_WIN64) */
} // namespace interface5
__TBB_DEFINE_PROFILING_SET_NAME(interface5::condition_variable)
} // namespace tbb
#if TBB_IMPLEMENT_CPP0X
namespace std {
using tbb::interface5::defer_lock_t;
using tbb::interface5::try_to_lock_t;
using tbb::interface5::adopt_lock_t;
using tbb::interface5::defer_lock;
using tbb::interface5::try_to_lock;
using tbb::interface5::adopt_lock;
using tbb::interface5::lock_guard;
using tbb::interface5::unique_lock;
using tbb::interface5::swap; /* this is for void std::swap(unique_lock<M>&,unique_lock<M>&) */
using tbb::interface5::condition_variable;
using tbb::interface5::cv_status;
using tbb::interface5::timeout;
using tbb::interface5::no_timeout;
} // namespace std
#endif /* TBB_IMPLEMENT_CPP0X */
#endif /* __TBB_condition_variable_H */

View File

@@ -0,0 +1,62 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_compat_ppl_H
#define __TBB_compat_ppl_H
#include "../task_group.h"
#include "../parallel_invoke.h"
#include "../parallel_for_each.h"
#include "../parallel_for.h"
#include "../tbb_exception.h"
#include "../critical_section.h"
#include "../reader_writer_lock.h"
#include "../combinable.h"
namespace Concurrency {
#if __TBB_TASK_GROUP_CONTEXT
using tbb::task_handle;
using tbb::task_group_status;
using tbb::task_group;
using tbb::structured_task_group;
using tbb::invalid_multiple_scheduling;
using tbb::missing_wait;
using tbb::make_task;
using tbb::not_complete;
using tbb::complete;
using tbb::canceled;
using tbb::is_current_task_group_canceling;
#endif /* __TBB_TASK_GROUP_CONTEXT */
using tbb::parallel_invoke;
using tbb::strict_ppl::parallel_for;
using tbb::parallel_for_each;
using tbb::critical_section;
using tbb::reader_writer_lock;
using tbb::combinable;
using tbb::improper_lock;
} // namespace Concurrency
#endif /* __TBB_compat_ppl_H */

View File

@@ -0,0 +1,46 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_thread_H
#define __TBB_thread_H
#include "../tbb_thread.h"
#if TBB_IMPLEMENT_CPP0X
namespace std {
typedef tbb::tbb_thread thread;
namespace this_thread {
using tbb::this_tbb_thread::get_id;
using tbb::this_tbb_thread::yield;
inline void sleep_for(const tbb::tick_count::interval_t& rel_time) {
tbb::internal::thread_sleep_v3( rel_time );
}
}
}
#endif /* TBB_IMPLEMENT_CPP0X */
#endif /* __TBB_thread_H */

View File

@@ -0,0 +1,488 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_tuple_H
#define __TBB_tuple_H
#include <utility>
#include "../tbb_stddef.h"
// build preprocessor variables for varying number of arguments
// Need the leading comma so the empty __TBB_T_PACK will not cause a syntax error.
#if __TBB_VARIADIC_MAX <= 5
#define __TBB_T_PACK
#define __TBB_U_PACK
#define __TBB_TYPENAME_T_PACK
#define __TBB_TYPENAME_U_PACK
#define __TBB_NULL_TYPE_PACK
#define __TBB_REF_T_PARAM_PACK
#define __TBB_CONST_REF_T_PARAM_PACK
#define __TBB_T_PARAM_LIST_PACK
#define __TBB_CONST_NULL_REF_PACK
//
#elif __TBB_VARIADIC_MAX == 6
#define __TBB_T_PACK ,__T5
#define __TBB_U_PACK ,__U5
#define __TBB_TYPENAME_T_PACK , typename __T5
#define __TBB_TYPENAME_U_PACK , typename __U5
#define __TBB_NULL_TYPE_PACK , null_type
#define __TBB_REF_T_PARAM_PACK ,__T5& t5
#define __TBB_CONST_REF_T_PARAM_PACK ,const __T5& t5
#define __TBB_T_PARAM_LIST_PACK ,t5
#define __TBB_CONST_NULL_REF_PACK , const null_type&
//
#elif __TBB_VARIADIC_MAX == 7
#define __TBB_T_PACK ,__T5, __T6
#define __TBB_U_PACK ,__U5, __U6
#define __TBB_TYPENAME_T_PACK , typename __T5 , typename __T6
#define __TBB_TYPENAME_U_PACK , typename __U5 , typename __U6
#define __TBB_NULL_TYPE_PACK , null_type, null_type
#define __TBB_REF_T_PARAM_PACK ,__T5& t5, __T6& t6
#define __TBB_CONST_REF_T_PARAM_PACK ,const __T5& t5, const __T6& t6
#define __TBB_T_PARAM_LIST_PACK ,t5 ,t6
#define __TBB_CONST_NULL_REF_PACK , const null_type&, const null_type&
//
#elif __TBB_VARIADIC_MAX == 8
#define __TBB_T_PACK ,__T5, __T6, __T7
#define __TBB_U_PACK ,__U5, __U6, __U7
#define __TBB_TYPENAME_T_PACK , typename __T5 , typename __T6, typename __T7
#define __TBB_TYPENAME_U_PACK , typename __U5 , typename __U6, typename __U7
#define __TBB_NULL_TYPE_PACK , null_type, null_type, null_type
#define __TBB_REF_T_PARAM_PACK ,__T5& t5, __T6& t6, __T7& t7
#define __TBB_CONST_REF_T_PARAM_PACK , const __T5& t5, const __T6& t6, const __T7& t7
#define __TBB_T_PARAM_LIST_PACK ,t5 ,t6 ,t7
#define __TBB_CONST_NULL_REF_PACK , const null_type&, const null_type&, const null_type&
//
#elif __TBB_VARIADIC_MAX == 9
#define __TBB_T_PACK ,__T5, __T6, __T7, __T8
#define __TBB_U_PACK ,__U5, __U6, __U7, __U8
#define __TBB_TYPENAME_T_PACK , typename __T5, typename __T6, typename __T7, typename __T8
#define __TBB_TYPENAME_U_PACK , typename __U5, typename __U6, typename __U7, typename __U8
#define __TBB_NULL_TYPE_PACK , null_type, null_type, null_type, null_type
#define __TBB_REF_T_PARAM_PACK ,__T5& t5, __T6& t6, __T7& t7, __T8& t8
#define __TBB_CONST_REF_T_PARAM_PACK , const __T5& t5, const __T6& t6, const __T7& t7, const __T8& t8
#define __TBB_T_PARAM_LIST_PACK ,t5 ,t6 ,t7 ,t8
#define __TBB_CONST_NULL_REF_PACK , const null_type&, const null_type&, const null_type&, const null_type&
//
#elif __TBB_VARIADIC_MAX >= 10
#define __TBB_T_PACK ,__T5, __T6, __T7, __T8, __T9
#define __TBB_U_PACK ,__U5, __U6, __U7, __U8, __U9
#define __TBB_TYPENAME_T_PACK , typename __T5, typename __T6, typename __T7, typename __T8, typename __T9
#define __TBB_TYPENAME_U_PACK , typename __U5, typename __U6, typename __U7, typename __U8, typename __U9
#define __TBB_NULL_TYPE_PACK , null_type, null_type, null_type, null_type, null_type
#define __TBB_REF_T_PARAM_PACK ,__T5& t5, __T6& t6, __T7& t7, __T8& t8, __T9& t9
#define __TBB_CONST_REF_T_PARAM_PACK , const __T5& t5, const __T6& t6, const __T7& t7, const __T8& t8, const __T9& t9
#define __TBB_T_PARAM_LIST_PACK ,t5 ,t6 ,t7 ,t8 ,t9
#define __TBB_CONST_NULL_REF_PACK , const null_type&, const null_type&, const null_type&, const null_type&, const null_type&
#endif
namespace tbb {
namespace interface5 {
namespace internal {
struct null_type { };
}
using internal::null_type;
// tuple forward declaration
template <typename __T0=null_type, typename __T1=null_type, typename __T2=null_type,
typename __T3=null_type, typename __T4=null_type
#if __TBB_VARIADIC_MAX >= 6
, typename __T5=null_type
#if __TBB_VARIADIC_MAX >= 7
, typename __T6=null_type
#if __TBB_VARIADIC_MAX >= 8
, typename __T7=null_type
#if __TBB_VARIADIC_MAX >= 9
, typename __T8=null_type
#if __TBB_VARIADIC_MAX >= 10
, typename __T9=null_type
#endif
#endif
#endif
#endif
#endif
>
class tuple;
namespace internal {
// const null_type temp
inline const null_type cnull() { return null_type(); }
// cons forward declaration
template <typename __HT, typename __TT> struct cons;
// type of a component of the cons
template<int __N, typename __T>
struct component {
typedef typename __T::tail_type next;
typedef typename component<__N-1,next>::type type;
};
template<typename __T>
struct component<0,__T> {
typedef typename __T::head_type type;
};
template<>
struct component<0,null_type> {
typedef null_type type;
};
// const version of component
template<int __N, typename __T>
struct component<__N, const __T>
{
typedef typename __T::tail_type next;
typedef const typename component<__N-1,next>::type type;
};
template<typename __T>
struct component<0, const __T>
{
typedef const typename __T::head_type type;
};
// helper class for getting components of cons
template< int __N>
struct get_helper {
template<typename __HT, typename __TT>
inline static typename component<__N, cons<__HT,__TT> >::type& get(cons<__HT,__TT>& ti) {
return get_helper<__N-1>::get(ti.tail);
}
template<typename __HT, typename __TT>
inline static typename component<__N, cons<__HT,__TT> >::type const& get(const cons<__HT,__TT>& ti) {
return get_helper<__N-1>::get(ti.tail);
}
};
template<>
struct get_helper<0> {
template<typename __HT, typename __TT>
inline static typename component<0, cons<__HT,__TT> >::type& get(cons<__HT,__TT>& ti) {
return ti.head;
}
template<typename __HT, typename __TT>
inline static typename component<0, cons<__HT,__TT> >::type const& get(const cons<__HT,__TT>& ti) {
return ti.head;
}
};
// traits adaptor
template <typename __T0, typename __T1, typename __T2, typename __T3, typename __T4 __TBB_TYPENAME_T_PACK>
struct tuple_traits {
typedef cons <__T0, typename tuple_traits<__T1, __T2, __T3, __T4 __TBB_T_PACK , null_type>::U > U;
};
template <typename __T0>
struct tuple_traits<__T0, null_type, null_type, null_type, null_type __TBB_NULL_TYPE_PACK > {
typedef cons<__T0, null_type> U;
};
template<>
struct tuple_traits<null_type, null_type, null_type, null_type, null_type __TBB_NULL_TYPE_PACK > {
typedef null_type U;
};
// core cons defs
template <typename __HT, typename __TT>
struct cons{
typedef __HT head_type;
typedef __TT tail_type;
head_type head;
tail_type tail;
static const int length = 1 + tail_type::length;
// default constructors
explicit cons() : head(), tail() { }
// non-default constructors
cons(head_type& h, const tail_type& t) : head(h), tail(t) { }
template <typename __T0, typename __T1, typename __T2, typename __T3, typename __T4 __TBB_TYPENAME_T_PACK >
cons(const __T0& t0, const __T1& t1, const __T2& t2, const __T3& t3, const __T4& t4 __TBB_CONST_REF_T_PARAM_PACK) :
head(t0), tail(t1, t2, t3, t4 __TBB_T_PARAM_LIST_PACK, cnull()) { }
template <typename __T0, typename __T1, typename __T2, typename __T3, typename __T4 __TBB_TYPENAME_T_PACK >
cons(__T0& t0, __T1& t1, __T2& t2, __T3& t3, __T4& t4 __TBB_REF_T_PARAM_PACK) :
head(t0), tail(t1, t2, t3, t4 __TBB_T_PARAM_LIST_PACK , cnull()) { }
template <typename __HT1, typename __TT1>
cons(const cons<__HT1,__TT1>& other) : head(other.head), tail(other.tail) { }
cons& operator=(const cons& other) { head = other.head; tail = other.tail; return *this; }
friend bool operator==(const cons& me, const cons& other) {
return me.head == other.head && me.tail == other.tail;
}
friend bool operator<(const cons& me, const cons& other) {
return me.head < other.head || (!(other.head < me.head) && me.tail < other.tail);
}
friend bool operator>(const cons& me, const cons& other) { return other<me; }
friend bool operator!=(const cons& me, const cons& other) { return !(me==other); }
friend bool operator>=(const cons& me, const cons& other) { return !(me<other); }
friend bool operator<=(const cons& me, const cons& other) { return !(me>other); }
template<typename __HT1, typename __TT1>
friend bool operator==(const cons<__HT,__TT>& me, const cons<__HT1,__TT1>& other) {
return me.head == other.head && me.tail == other.tail;
}
template<typename __HT1, typename __TT1>
friend bool operator<(const cons<__HT,__TT>& me, const cons<__HT1,__TT1>& other) {
return me.head < other.head || (!(other.head < me.head) && me.tail < other.tail);
}
template<typename __HT1, typename __TT1>
friend bool operator>(const cons<__HT,__TT>& me, const cons<__HT1,__TT1>& other) { return other<me; }
template<typename __HT1, typename __TT1>
friend bool operator!=(const cons<__HT,__TT>& me, const cons<__HT1,__TT1>& other) { return !(me==other); }
template<typename __HT1, typename __TT1>
friend bool operator>=(const cons<__HT,__TT>& me, const cons<__HT1,__TT1>& other) { return !(me<other); }
template<typename __HT1, typename __TT1>
friend bool operator<=(const cons<__HT,__TT>& me, const cons<__HT1,__TT1>& other) { return !(me>other); }
}; // cons
template <typename __HT>
struct cons<__HT,null_type> {
typedef __HT head_type;
typedef null_type tail_type;
head_type head;
static const int length = 1;
// default constructor
cons() : head() { /*std::cout << "default constructor 1\n";*/ }
cons(const null_type&, const null_type&, const null_type&, const null_type&, const null_type& __TBB_CONST_NULL_REF_PACK) : head() { /*std::cout << "default constructor 2\n";*/ }
// non-default constructor
template<typename __T1>
cons(__T1& t1, const null_type&, const null_type&, const null_type&, const null_type& __TBB_CONST_NULL_REF_PACK) : head(t1) { /*std::cout << "non-default a1, t1== " << t1 << "\n";*/}
cons(head_type& h, const null_type& = null_type() ) : head(h) { }
cons(const head_type& t0, const null_type&, const null_type&, const null_type&, const null_type& __TBB_CONST_NULL_REF_PACK) : head(t0) { }
// converting constructor
template<typename __HT1>
cons(__HT1 h1, const null_type&, const null_type&, const null_type&, const null_type& __TBB_CONST_NULL_REF_PACK) : head(h1) { }
// copy constructor
template<typename __HT1>
cons( const cons<__HT1, null_type>& other) : head(other.head) { }
// assignment operator
cons& operator=(const cons& other) { head = other.head; return *this; }
friend bool operator==(const cons& me, const cons& other) { return me.head == other.head; }
friend bool operator<(const cons& me, const cons& other) { return me.head < other.head; }
friend bool operator>(const cons& me, const cons& other) { return other<me; }
friend bool operator!=(const cons& me, const cons& other) {return !(me==other); }
friend bool operator<=(const cons& me, const cons& other) {return !(me>other); }
friend bool operator>=(const cons& me, const cons& other) {return !(me<other); }
template<typename __HT1>
friend bool operator==(const cons<__HT,null_type>& me, const cons<__HT1,null_type>& other) {
return me.head == other.head;
}
template<typename __HT1>
friend bool operator<(const cons<__HT,null_type>& me, const cons<__HT1,null_type>& other) {
return me.head < other.head;
}
template<typename __HT1>
friend bool operator>(const cons<__HT,null_type>& me, const cons<__HT1,null_type>& other) { return other<me; }
template<typename __HT1>
friend bool operator!=(const cons<__HT,null_type>& me, const cons<__HT1,null_type>& other) { return !(me==other); }
template<typename __HT1>
friend bool operator<=(const cons<__HT,null_type>& me, const cons<__HT1,null_type>& other) { return !(me>other); }
template<typename __HT1>
friend bool operator>=(const cons<__HT,null_type>& me, const cons<__HT1,null_type>& other) { return !(me<other); }
}; // cons
template <>
struct cons<null_type,null_type> { typedef null_type tail_type; static const int length = 0; };
// wrapper for default constructor
template<typename __T>
inline const __T wrap_dcons(__T*) { return __T(); }
} // namespace internal
// tuple definition
template<typename __T0, typename __T1, typename __T2, typename __T3, typename __T4 __TBB_TYPENAME_T_PACK >
class tuple : public internal::tuple_traits<__T0, __T1, __T2, __T3, __T4 __TBB_T_PACK >::U {
// friends
template <typename __T> friend class tuple_size;
template<int __N, typename __T> friend struct tuple_element;
// stl components
typedef tuple<__T0,__T1,__T2,__T3,__T4 __TBB_T_PACK > value_type;
typedef value_type *pointer;
typedef const value_type *const_pointer;
typedef value_type &reference;
typedef const value_type &const_reference;
typedef size_t size_type;
typedef typename internal::tuple_traits<__T0,__T1,__T2,__T3, __T4 __TBB_T_PACK >::U my_cons;
public:
tuple(const __T0& t0=internal::wrap_dcons((__T0*)NULL)
,const __T1& t1=internal::wrap_dcons((__T1*)NULL)
,const __T2& t2=internal::wrap_dcons((__T2*)NULL)
,const __T3& t3=internal::wrap_dcons((__T3*)NULL)
,const __T4& t4=internal::wrap_dcons((__T4*)NULL)
#if __TBB_VARIADIC_MAX >= 6
,const __T5& t5=internal::wrap_dcons((__T5*)NULL)
#if __TBB_VARIADIC_MAX >= 7
,const __T6& t6=internal::wrap_dcons((__T6*)NULL)
#if __TBB_VARIADIC_MAX >= 8
,const __T7& t7=internal::wrap_dcons((__T7*)NULL)
#if __TBB_VARIADIC_MAX >= 9
,const __T8& t8=internal::wrap_dcons((__T8*)NULL)
#if __TBB_VARIADIC_MAX >= 10
,const __T9& t9=internal::wrap_dcons((__T9*)NULL)
#endif
#endif
#endif
#endif
#endif
) :
my_cons(t0,t1,t2,t3,t4 __TBB_T_PARAM_LIST_PACK) { }
template<int __N>
struct internal_tuple_element {
typedef typename internal::component<__N,my_cons>::type type;
};
template<int __N>
typename internal_tuple_element<__N>::type& get() { return internal::get_helper<__N>::get(*this); }
template<int __N>
typename internal_tuple_element<__N>::type const& get() const { return internal::get_helper<__N>::get(*this); }
template<typename __U1, typename __U2>
tuple& operator=(const internal::cons<__U1,__U2>& other) {
my_cons::operator=(other);
return *this;
}
template<typename __U1, typename __U2>
tuple& operator=(const std::pair<__U1,__U2>& other) {
// __TBB_ASSERT(tuple_size<value_type>::value == 2, "Invalid size for pair to tuple assignment");
this->head = other.first;
this->tail.head = other.second;
return *this;
}
friend bool operator==(const tuple& me, const tuple& other) {return static_cast<const my_cons &>(me)==(other);}
friend bool operator<(const tuple& me, const tuple& other) {return static_cast<const my_cons &>(me)<(other);}
friend bool operator>(const tuple& me, const tuple& other) {return static_cast<const my_cons &>(me)>(other);}
friend bool operator!=(const tuple& me, const tuple& other) {return static_cast<const my_cons &>(me)!=(other);}
friend bool operator>=(const tuple& me, const tuple& other) {return static_cast<const my_cons &>(me)>=(other);}
friend bool operator<=(const tuple& me, const tuple& other) {return static_cast<const my_cons &>(me)<=(other);}
}; // tuple
// empty tuple
template<>
class tuple<null_type, null_type, null_type, null_type, null_type __TBB_NULL_TYPE_PACK > : public null_type {
};
// helper classes
template < typename __T>
class tuple_size {
public:
static const size_t value = 1 + tuple_size<typename __T::tail_type>::value;
};
template <>
class tuple_size<tuple<> > {
public:
static const size_t value = 0;
};
template <>
class tuple_size<null_type> {
public:
static const size_t value = 0;
};
template<int __N, typename __T>
struct tuple_element {
typedef typename internal::component<__N, typename __T::my_cons>::type type;
};
template<int __N, typename __T0, typename __T1, typename __T2, typename __T3, typename __T4 __TBB_TYPENAME_T_PACK >
inline static typename tuple_element<__N,tuple<__T0,__T1,__T2,__T3,__T4 __TBB_T_PACK > >::type&
get(tuple<__T0,__T1,__T2,__T3,__T4 __TBB_T_PACK >& t) { return internal::get_helper<__N>::get(t); }
template<int __N, typename __T0, typename __T1, typename __T2, typename __T3, typename __T4 __TBB_TYPENAME_T_PACK >
inline static typename tuple_element<__N,tuple<__T0,__T1,__T2,__T3,__T4 __TBB_T_PACK > >::type const&
get(const tuple<__T0,__T1,__T2,__T3,__T4 __TBB_T_PACK >& t) { return internal::get_helper<__N>::get(t); }
} // interface5
} // tbb
#if !__TBB_CPP11_TUPLE_PRESENT
namespace tbb {
namespace flow {
using tbb::interface5::tuple;
using tbb::interface5::tuple_size;
using tbb::interface5::tuple_element;
using tbb::interface5::get;
}
}
#endif
#undef __TBB_T_PACK
#undef __TBB_U_PACK
#undef __TBB_TYPENAME_T_PACK
#undef __TBB_TYPENAME_U_PACK
#undef __TBB_NULL_TYPE_PACK
#undef __TBB_REF_T_PARAM_PACK
#undef __TBB_CONST_REF_T_PARAM_PACK
#undef __TBB_T_PARAM_LIST_PACK
#undef __TBB_CONST_NULL_REF_PACK
#endif /* __TBB_tuple_H */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,235 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_concurrent_lru_cache_H
#define __TBB_concurrent_lru_cache_H
#if ! TBB_PREVIEW_CONCURRENT_LRU_CACHE
#error Set TBB_PREVIEW_CONCURRENT_LRU_CACHE to include concurrent_lru_cache.h
#endif
#include <map>
#include <list>
#include "tbb_stddef.h"
#include "atomic.h"
#include "internal/_aggregator_impl.h"
namespace tbb{
namespace interface6 {
template <typename key_type, typename value_type, typename value_functor_type = value_type (*)(key_type) >
class concurrent_lru_cache : internal::no_assign{
private:
typedef concurrent_lru_cache self_type;
typedef value_functor_type value_function_type;
typedef std::size_t ref_counter_type;
struct map_value_type;
typedef std::map<key_type, map_value_type> map_storage_type;
typedef std::list<typename map_storage_type::iterator> lru_list_type;
struct map_value_type {
value_type my_value;
ref_counter_type my_ref_counter;
typename lru_list_type::iterator my_lru_list_iterator;
bool my_is_ready;
map_value_type (value_type const& a_value, ref_counter_type a_ref_counter, typename lru_list_type::iterator a_lru_list_iterator, bool a_is_ready)
: my_value(a_value), my_ref_counter(a_ref_counter), my_lru_list_iterator (a_lru_list_iterator), my_is_ready(a_is_ready)
{}
};
class handle_object;
struct aggregator_operation;
typedef aggregator_operation aggregated_operation_type;
typedef tbb::internal::aggregating_functor<self_type,aggregated_operation_type> aggregator_function_type;
friend class tbb::internal::aggregating_functor<self_type,aggregated_operation_type>;
typedef tbb::internal::aggregator<aggregator_function_type, aggregated_operation_type> aggregator_type;
private:
value_function_type my_value_function;
std::size_t const my_number_of_lru_history_items;
map_storage_type my_map_storage;
lru_list_type my_lru_list;
aggregator_type my_aggregator;
public:
typedef handle_object handle;
public:
concurrent_lru_cache(value_function_type f, std::size_t number_of_lru_history_items)
: my_value_function(f),my_number_of_lru_history_items(number_of_lru_history_items)
{
my_aggregator.initialize_handler(aggregator_function_type(this));
}
handle_object operator[](key_type k){
retrieve_aggregator_operation op(k);
my_aggregator.execute(&op);
if (op.is_new_value_needed()){
op.result().second.my_value = my_value_function(k);
__TBB_store_with_release(op.result().second.my_is_ready, true);
}else{
tbb::internal::spin_wait_while_eq(op.result().second.my_is_ready,false);
}
return handle_object(*this,op.result());
}
private:
void signal_end_of_usage(typename map_storage_type::reference value_ref){
signal_end_of_usage_aggregator_operation op(value_ref);
my_aggregator.execute(&op);
}
private:
struct handle_move_t:no_assign{
concurrent_lru_cache & my_cache_ref;
typename map_storage_type::reference my_map_record_ref;
handle_move_t(concurrent_lru_cache & cache_ref, typename map_storage_type::reference value_ref):my_cache_ref(cache_ref),my_map_record_ref(value_ref) {};
};
class handle_object {
concurrent_lru_cache * my_cache_pointer;
typename map_storage_type::reference my_map_record_ref;
public:
handle_object(concurrent_lru_cache & cache_ref, typename map_storage_type::reference value_ref):my_cache_pointer(&cache_ref), my_map_record_ref(value_ref) {}
handle_object(handle_move_t m):my_cache_pointer(&m.my_cache_ref), my_map_record_ref(m.my_map_record_ref){}
operator handle_move_t(){ return move(*this);}
value_type& value(){
__TBB_ASSERT(my_cache_pointer,"get value from moved from object?");
return my_map_record_ref.second.my_value;
}
~handle_object(){
if (my_cache_pointer){
my_cache_pointer->signal_end_of_usage(my_map_record_ref);
}
}
private:
friend handle_move_t move(handle_object& h){
return handle_object::move(h);
}
static handle_move_t move(handle_object& h){
__TBB_ASSERT(h.my_cache_pointer,"move from the same object twice ?");
concurrent_lru_cache * cache_pointer = NULL;
std::swap(cache_pointer,h.my_cache_pointer);
return handle_move_t(*cache_pointer,h.my_map_record_ref);
}
private:
void operator=(handle_object&);
#if __SUNPRO_CC
// Presumably due to a compiler error, private copy constructor
// breaks expressions like handle h = cache[key];
public:
#endif
handle_object(handle_object &);
};
private:
//TODO: looks like aggregator_operation is a perfect match for statically typed variant type
struct aggregator_operation : tbb::internal::aggregated_operation<aggregator_operation>{
enum e_op_type {op_retive, op_signal_end_of_usage};
//TODO: try to use pointer to function apply_visitor here
//TODO: try virtual functions and measure the difference
e_op_type my_operation_type;
aggregator_operation(e_op_type operation_type): my_operation_type(operation_type) {}
void cast_and_handle(self_type& container ){
if (my_operation_type==op_retive){
static_cast<retrieve_aggregator_operation*>(this)->handle(container);
}else{
static_cast<signal_end_of_usage_aggregator_operation*>(this)->handle(container);
}
}
};
struct retrieve_aggregator_operation : aggregator_operation, private internal::no_assign {
key_type my_key;
typename map_storage_type::pointer my_result_map_record_pointer;
bool my_is_new_value_needed;
retrieve_aggregator_operation(key_type key):aggregator_operation(aggregator_operation::op_retive),my_key(key),my_is_new_value_needed(false){}
void handle(self_type& container ){
my_result_map_record_pointer = & container.retrieve_serial(my_key,my_is_new_value_needed);
}
typename map_storage_type::reference result(){ return * my_result_map_record_pointer; }
bool is_new_value_needed(){return my_is_new_value_needed;}
};
struct signal_end_of_usage_aggregator_operation : aggregator_operation, private internal::no_assign {
typename map_storage_type::reference my_map_record_ref;
signal_end_of_usage_aggregator_operation(typename map_storage_type::reference map_record_ref):aggregator_operation(aggregator_operation::op_signal_end_of_usage),my_map_record_ref(map_record_ref){}
void handle(self_type& container ){
container.signal_end_of_usage_serial(my_map_record_ref);
}
};
private:
void handle_operations(aggregator_operation* op_list){
while(op_list){
op_list->cast_and_handle(*this);
aggregator_operation* tmp = op_list;
op_list=op_list->next;
tbb::internal::itt_store_word_with_release(tmp->status, uintptr_t(1));
}
}
private:
typename map_storage_type::reference retrieve_serial(key_type k, bool& is_new_value_needed){
typename map_storage_type::iterator it = my_map_storage.find(k);
if (it == my_map_storage.end()){
it = my_map_storage.insert(it,std::make_pair(k,map_value_type(value_type(),0,my_lru_list.end(),false)));
is_new_value_needed = true;
}else {
typename lru_list_type::iterator list_it = it->second.my_lru_list_iterator;
if (list_it!=my_lru_list.end()) {
__TBB_ASSERT(!it->second.my_ref_counter,"item to be evicted should not have a live references");
//item is going to be used. Therefore it is not a subject for eviction
//so - remove it from LRU history.
my_lru_list.erase(list_it);
it->second.my_lru_list_iterator= my_lru_list.end();
}
}
++(it->second.my_ref_counter);
return *it;
}
void signal_end_of_usage_serial(typename map_storage_type::reference map_record_ref){
typename map_storage_type::iterator it = my_map_storage.find(map_record_ref.first);
__TBB_ASSERT(it!=my_map_storage.end(),"cache should not return past-end iterators to outer world");
__TBB_ASSERT(&(*it) == &map_record_ref,"dangling reference has been returned to outside world? data race ?");
__TBB_ASSERT( my_lru_list.end()== std::find(my_lru_list.begin(),my_lru_list.end(),it),
"object in use should not be in list of unused objects ");
if (! --(it->second.my_ref_counter)){
//it was the last reference so put it to the LRU history
if (my_lru_list.size()>=my_number_of_lru_history_items){
//evict items in order to get a space
size_t number_of_elements_to_evict = 1 + my_lru_list.size() - my_number_of_lru_history_items;
for (size_t i=0; i<number_of_elements_to_evict; ++i){
typename map_storage_type::iterator it_to_evict = my_lru_list.back();
__TBB_ASSERT(!it_to_evict->second.my_ref_counter,"item to be evicted should not have a live references");
my_lru_list.pop_back();
my_map_storage.erase(it_to_evict);
}
}
my_lru_list.push_front(it);
it->second.my_lru_list_iterator = my_lru_list.begin();
}
}
};
} // namespace interface6
using interface6::concurrent_lru_cache;
} // namespace tbb
#endif //__TBB_concurrent_lru_cache_H

View File

@@ -0,0 +1,489 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_concurrent_priority_queue_H
#define __TBB_concurrent_priority_queue_H
#include "atomic.h"
#include "cache_aligned_allocator.h"
#include "tbb_exception.h"
#include "tbb_stddef.h"
#include "tbb_profiling.h"
#include "internal/_aggregator_impl.h"
#include <vector>
#include <iterator>
#include <functional>
#if __TBB_INITIALIZER_LISTS_PRESENT
#include <initializer_list>
#endif
#if __TBB_CPP11_IS_COPY_CONSTRUCTIBLE_PRESENT
#include <type_traits>
#endif
namespace tbb {
namespace interface5 {
namespace internal {
#if __TBB_CPP11_IS_COPY_CONSTRUCTIBLE_PRESENT
template<typename T, bool C = std::is_copy_constructible<T>::value>
struct use_element_copy_constructor {
typedef tbb::internal::true_type type;
};
template<typename T>
struct use_element_copy_constructor <T,false> {
typedef tbb::internal::false_type type;
};
#else
template<typename>
struct use_element_copy_constructor {
typedef tbb::internal::true_type type;
};
#endif
} // namespace internal
using namespace tbb::internal;
//! Concurrent priority queue
template <typename T, typename Compare=std::less<T>, typename A=cache_aligned_allocator<T> >
class concurrent_priority_queue {
public:
//! Element type in the queue.
typedef T value_type;
//! Reference type
typedef T& reference;
//! Const reference type
typedef const T& const_reference;
//! Integral type for representing size of the queue.
typedef size_t size_type;
//! Difference type for iterator
typedef ptrdiff_t difference_type;
//! Allocator type
typedef A allocator_type;
//! Constructs a new concurrent_priority_queue with default capacity
explicit concurrent_priority_queue(const allocator_type& a = allocator_type()) : mark(0), my_size(0), data(a)
{
my_aggregator.initialize_handler(my_functor_t(this));
}
//! Constructs a new concurrent_priority_queue with init_sz capacity
explicit concurrent_priority_queue(size_type init_capacity, const allocator_type& a = allocator_type()) :
mark(0), my_size(0), data(a)
{
data.reserve(init_capacity);
my_aggregator.initialize_handler(my_functor_t(this));
}
//! [begin,end) constructor
template<typename InputIterator>
concurrent_priority_queue(InputIterator begin, InputIterator end, const allocator_type& a = allocator_type()) :
mark(0), data(begin, end, a)
{
my_aggregator.initialize_handler(my_functor_t(this));
heapify();
my_size = data.size();
}
#if __TBB_INITIALIZER_LISTS_PRESENT
//! Constructor from std::initializer_list
concurrent_priority_queue(std::initializer_list<T> init_list, const allocator_type &a = allocator_type()) :
mark(0),data(init_list.begin(), init_list.end(), a)
{
my_aggregator.initialize_handler(my_functor_t(this));
heapify();
my_size = data.size();
}
#endif //# __TBB_INITIALIZER_LISTS_PRESENT
//! Copy constructor
/** This operation is unsafe if there are pending concurrent operations on the src queue. */
explicit concurrent_priority_queue(const concurrent_priority_queue& src) : mark(src.mark),
my_size(src.my_size), data(src.data.begin(), src.data.end(), src.data.get_allocator())
{
my_aggregator.initialize_handler(my_functor_t(this));
heapify();
}
//! Copy constructor with specific allocator
/** This operation is unsafe if there are pending concurrent operations on the src queue. */
concurrent_priority_queue(const concurrent_priority_queue& src, const allocator_type& a) : mark(src.mark),
my_size(src.my_size), data(src.data.begin(), src.data.end(), a)
{
my_aggregator.initialize_handler(my_functor_t(this));
heapify();
}
//! Assignment operator
/** This operation is unsafe if there are pending concurrent operations on the src queue. */
concurrent_priority_queue& operator=(const concurrent_priority_queue& src) {
if (this != &src) {
vector_t(src.data.begin(), src.data.end(), src.data.get_allocator()).swap(data);
mark = src.mark;
my_size = src.my_size;
}
return *this;
}
#if __TBB_CPP11_RVALUE_REF_PRESENT
//! Move constructor
/** This operation is unsafe if there are pending concurrent operations on the src queue. */
concurrent_priority_queue(concurrent_priority_queue&& src) : mark(src.mark),
my_size(src.my_size), data(std::move(src.data))
{
my_aggregator.initialize_handler(my_functor_t(this));
}
//! Move constructor with specific allocator
/** This operation is unsafe if there are pending concurrent operations on the src queue. */
concurrent_priority_queue(concurrent_priority_queue&& src, const allocator_type& a) : mark(src.mark),
my_size(src.my_size),
#if __TBB_ALLOCATOR_TRAITS_PRESENT
data(std::move(src.data), a)
#else
// Some early version of C++11 STL vector does not have a constructor of vector(vector&& , allocator).
// It seems that the reason is absence of support of allocator_traits (stateful allocators).
data(a)
#endif //__TBB_ALLOCATOR_TRAITS_PRESENT
{
my_aggregator.initialize_handler(my_functor_t(this));
#if !__TBB_ALLOCATOR_TRAITS_PRESENT
if (a != src.data.get_allocator()){
data.reserve(src.data.size());
data.assign(std::make_move_iterator(src.data.begin()), std::make_move_iterator(src.data.end()));
}else{
data = std::move(src.data);
}
#endif //!__TBB_ALLOCATOR_TRAITS_PRESENT
}
//! Move assignment operator
/** This operation is unsafe if there are pending concurrent operations on the src queue. */
concurrent_priority_queue& operator=( concurrent_priority_queue&& src) {
if (this != &src) {
mark = src.mark;
my_size = src.my_size;
#if !__TBB_ALLOCATOR_TRAITS_PRESENT
if (data.get_allocator() != src.data.get_allocator()){
vector_t(std::make_move_iterator(src.data.begin()), std::make_move_iterator(src.data.end()), data.get_allocator()).swap(data);
}else
#endif //!__TBB_ALLOCATOR_TRAITS_PRESENT
{
data = std::move(src.data);
}
}
return *this;
}
#endif //__TBB_CPP11_RVALUE_REF_PRESENT
//! Assign the queue from [begin,end) range, not thread-safe
template<typename InputIterator>
void assign(InputIterator begin, InputIterator end) {
vector_t(begin, end, data.get_allocator()).swap(data);
mark = 0;
my_size = data.size();
heapify();
}
#if __TBB_INITIALIZER_LISTS_PRESENT
//! Assign the queue from std::initializer_list, not thread-safe
void assign(std::initializer_list<T> il) { this->assign(il.begin(), il.end()); }
//! Assign from std::initializer_list, not thread-safe
concurrent_priority_queue& operator=(std::initializer_list<T> il) {
this->assign(il.begin(), il.end());
return *this;
}
#endif //# __TBB_INITIALIZER_LISTS_PRESENT
//! Returns true if empty, false otherwise
/** Returned value may not reflect results of pending operations.
This operation reads shared data and will trigger a race condition. */
bool empty() const { return size()==0; }
//! Returns the current number of elements contained in the queue
/** Returned value may not reflect results of pending operations.
This operation reads shared data and will trigger a race condition. */
size_type size() const { return __TBB_load_with_acquire(my_size); }
//! Pushes elem onto the queue, increasing capacity of queue if necessary
/** This operation can be safely used concurrently with other push, try_pop or emplace operations. */
void push(const_reference elem) {
#if __TBB_CPP11_IS_COPY_CONSTRUCTIBLE_PRESENT
__TBB_STATIC_ASSERT( std::is_copy_constructible<value_type>::value, "The type is not copy constructible. Copying push operation is impossible." );
#endif
cpq_operation op_data(elem, PUSH_OP);
my_aggregator.execute(&op_data);
if (op_data.status == FAILED) // exception thrown
throw_exception(eid_bad_alloc);
}
#if __TBB_CPP11_RVALUE_REF_PRESENT
//! Pushes elem onto the queue, increasing capacity of queue if necessary
/** This operation can be safely used concurrently with other push, try_pop or emplace operations. */
void push(value_type &&elem) {
cpq_operation op_data(elem, PUSH_RVALUE_OP);
my_aggregator.execute(&op_data);
if (op_data.status == FAILED) // exception thrown
throw_exception(eid_bad_alloc);
}
#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
//! Constructs a new element using args as the arguments for its construction and pushes it onto the queue */
/** This operation can be safely used concurrently with other push, try_pop or emplace operations. */
template<typename... Args>
void emplace(Args&&... args) {
push(value_type(std::forward<Args>(args)...));
}
#endif /* __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT */
#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
//! Gets a reference to and removes highest priority element
/** If a highest priority element was found, sets elem and returns true,
otherwise returns false.
This operation can be safely used concurrently with other push, try_pop or emplace operations. */
bool try_pop(reference elem) {
cpq_operation op_data(POP_OP);
op_data.elem = &elem;
my_aggregator.execute(&op_data);
return op_data.status==SUCCEEDED;
}
//! Clear the queue; not thread-safe
/** This operation is unsafe if there are pending concurrent operations on the queue.
Resets size, effectively emptying queue; does not free space.
May not clear elements added in pending operations. */
void clear() {
data.clear();
mark = 0;
my_size = 0;
}
//! Swap this queue with another; not thread-safe
/** This operation is unsafe if there are pending concurrent operations on the queue. */
void swap(concurrent_priority_queue& q) {
using std::swap;
data.swap(q.data);
swap(mark, q.mark);
swap(my_size, q.my_size);
}
//! Return allocator object
allocator_type get_allocator() const { return data.get_allocator(); }
private:
enum operation_type {INVALID_OP, PUSH_OP, POP_OP, PUSH_RVALUE_OP};
enum operation_status { WAIT=0, SUCCEEDED, FAILED };
class cpq_operation : public aggregated_operation<cpq_operation> {
public:
operation_type type;
union {
value_type *elem;
size_type sz;
};
cpq_operation(const_reference e, operation_type t) :
type(t), elem(const_cast<value_type*>(&e)) {}
cpq_operation(operation_type t) : type(t) {}
};
class my_functor_t {
concurrent_priority_queue<T, Compare, A> *cpq;
public:
my_functor_t() {}
my_functor_t(concurrent_priority_queue<T, Compare, A> *cpq_) : cpq(cpq_) {}
void operator()(cpq_operation* op_list) {
cpq->handle_operations(op_list);
}
};
typedef tbb::internal::aggregator< my_functor_t, cpq_operation > aggregator_t;
aggregator_t my_aggregator;
//! Padding added to avoid false sharing
char padding1[NFS_MaxLineSize - sizeof(aggregator_t)];
//! The point at which unsorted elements begin
size_type mark;
__TBB_atomic size_type my_size;
Compare compare;
//! Padding added to avoid false sharing
char padding2[NFS_MaxLineSize - (2*sizeof(size_type)) - sizeof(Compare)];
//! Storage for the heap of elements in queue, plus unheapified elements
/** data has the following structure:
binary unheapified
heap elements
____|_______|____
| | |
v v v
[_|...|_|_|...|_| |...| ]
0 ^ ^ ^
| | |__capacity
| |__my_size
|__mark
Thus, data stores the binary heap starting at position 0 through
mark-1 (it may be empty). Then there are 0 or more elements
that have not yet been inserted into the heap, in positions
mark through my_size-1. */
typedef std::vector<value_type, allocator_type> vector_t;
vector_t data;
void handle_operations(cpq_operation *op_list) {
cpq_operation *tmp, *pop_list=NULL;
__TBB_ASSERT(mark == data.size(), NULL);
// First pass processes all constant (amortized; reallocation may happen) time pushes and pops.
while (op_list) {
// ITT note: &(op_list->status) tag is used to cover accesses to op_list
// node. This thread is going to handle the operation, and so will acquire it
// and perform the associated operation w/o triggering a race condition; the
// thread that created the operation is waiting on the status field, so when
// this thread is done with the operation, it will perform a
// store_with_release to give control back to the waiting thread in
// aggregator::insert_operation.
call_itt_notify(acquired, &(op_list->status));
__TBB_ASSERT(op_list->type != INVALID_OP, NULL);
tmp = op_list;
op_list = itt_hide_load_word(op_list->next);
if (tmp->type == POP_OP) {
if (mark < data.size() &&
compare(data[0], data[data.size()-1])) {
// there are newly pushed elems and the last one
// is higher than top
*(tmp->elem) = move(data[data.size()-1]);
__TBB_store_with_release(my_size, my_size-1);
itt_store_word_with_release(tmp->status, uintptr_t(SUCCEEDED));
data.pop_back();
__TBB_ASSERT(mark<=data.size(), NULL);
}
else { // no convenient item to pop; postpone
itt_hide_store_word(tmp->next, pop_list);
pop_list = tmp;
}
} else { // PUSH_OP or PUSH_RVALUE_OP
__TBB_ASSERT(tmp->type == PUSH_OP || tmp->type == PUSH_RVALUE_OP, "Unknown operation" );
__TBB_TRY{
if (tmp->type == PUSH_OP) {
push_back_helper(*(tmp->elem), typename internal::use_element_copy_constructor<value_type>::type());
} else {
data.push_back(move(*(tmp->elem)));
}
__TBB_store_with_release(my_size, my_size + 1);
itt_store_word_with_release(tmp->status, uintptr_t(SUCCEEDED));
} __TBB_CATCH(...) {
itt_store_word_with_release(tmp->status, uintptr_t(FAILED));
}
}
}
// second pass processes pop operations
while (pop_list) {
tmp = pop_list;
pop_list = itt_hide_load_word(pop_list->next);
__TBB_ASSERT(tmp->type == POP_OP, NULL);
if (data.empty()) {
itt_store_word_with_release(tmp->status, uintptr_t(FAILED));
}
else {
__TBB_ASSERT(mark<=data.size(), NULL);
if (mark < data.size() &&
compare(data[0], data[data.size()-1])) {
// there are newly pushed elems and the last one is
// higher than top
*(tmp->elem) = move(data[data.size()-1]);
__TBB_store_with_release(my_size, my_size-1);
itt_store_word_with_release(tmp->status, uintptr_t(SUCCEEDED));
data.pop_back();
}
else { // extract top and push last element down heap
*(tmp->elem) = move(data[0]);
__TBB_store_with_release(my_size, my_size-1);
itt_store_word_with_release(tmp->status, uintptr_t(SUCCEEDED));
reheap();
}
}
}
// heapify any leftover pushed elements before doing the next
// batch of operations
if (mark<data.size()) heapify();
__TBB_ASSERT(mark == data.size(), NULL);
}
//! Merge unsorted elements into heap
void heapify() {
if (!mark && data.size()>0) mark = 1;
for (; mark<data.size(); ++mark) {
// for each unheapified element under size
size_type cur_pos = mark;
value_type to_place = move(data[mark]);
do { // push to_place up the heap
size_type parent = (cur_pos-1)>>1;
if (!compare(data[parent], to_place)) break;
data[cur_pos] = move(data[parent]);
cur_pos = parent;
} while( cur_pos );
data[cur_pos] = move(to_place);
}
}
//! Re-heapify after an extraction
/** Re-heapify by pushing last element down the heap from the root. */
void reheap() {
size_type cur_pos=0, child=1;
while (child < mark) {
size_type target = child;
if (child+1 < mark && compare(data[child], data[child+1]))
++target;
// target now has the higher priority child
if (compare(data[target], data[data.size()-1])) break;
data[cur_pos] = move(data[target]);
cur_pos = target;
child = (cur_pos<<1)+1;
}
if (cur_pos != data.size()-1)
data[cur_pos] = move(data[data.size()-1]);
data.pop_back();
if (mark > data.size()) mark = data.size();
}
void push_back_helper(const T& t, tbb::internal::true_type) {
data.push_back(t);
}
void push_back_helper(const T&, tbb::internal::false_type) {
__TBB_ASSERT( false, "The type is not copy constructible. Copying push operation is impossible." );
}
};
} // namespace interface5
using interface5::concurrent_priority_queue;
} // namespace tbb
#endif /* __TBB_concurrent_priority_queue_H */

View File

@@ -0,0 +1,462 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_concurrent_queue_H
#define __TBB_concurrent_queue_H
#include "internal/_concurrent_queue_impl.h"
namespace tbb {
namespace strict_ppl {
//! A high-performance thread-safe non-blocking concurrent queue.
/** Multiple threads may each push and pop concurrently.
Assignment construction is not allowed.
@ingroup containers */
template<typename T, typename A = cache_aligned_allocator<T> >
class concurrent_queue: public internal::concurrent_queue_base_v3<T> {
template<typename Container, typename Value> friend class internal::concurrent_queue_iterator;
//! Allocator type
typedef typename A::template rebind<char>::other page_allocator_type;
page_allocator_type my_allocator;
//! Allocates a block of size n (bytes)
/*override*/ virtual void *allocate_block( size_t n ) {
void *b = reinterpret_cast<void*>(my_allocator.allocate( n ));
if( !b )
internal::throw_exception(internal::eid_bad_alloc);
return b;
}
//! Deallocates block created by allocate_block.
/*override*/ virtual void deallocate_block( void *b, size_t n ) {
my_allocator.deallocate( reinterpret_cast<char*>(b), n );
}
static void copy_construct_item(T* location, const void* src){
new (location) T(*static_cast<const T*>(src));
}
#if __TBB_CPP11_RVALUE_REF_PRESENT
static void move_construct_item(T* location, const void* src) {
new (location) T( std::move(*static_cast<T*>(const_cast<void*>(src))) );
}
#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
public:
//! Element type in the queue.
typedef T value_type;
//! Reference type
typedef T& reference;
//! Const reference type
typedef const T& const_reference;
//! Integral type for representing size of the queue.
typedef size_t size_type;
//! Difference type for iterator
typedef ptrdiff_t difference_type;
//! Allocator type
typedef A allocator_type;
//! Construct empty queue
explicit concurrent_queue(const allocator_type& a = allocator_type()) :
my_allocator( a )
{
}
//! [begin,end) constructor
template<typename InputIterator>
concurrent_queue( InputIterator begin, InputIterator end, const allocator_type& a = allocator_type()) :
my_allocator( a )
{
for( ; begin != end; ++begin )
this->push(*begin);
}
//! Copy constructor
concurrent_queue( const concurrent_queue& src, const allocator_type& a = allocator_type()) :
internal::concurrent_queue_base_v3<T>(), my_allocator( a )
{
this->assign( src, copy_construct_item );
}
#if __TBB_CPP11_RVALUE_REF_PRESENT
//! Move constructors
concurrent_queue( concurrent_queue&& src ) :
internal::concurrent_queue_base_v3<T>(), my_allocator( std::move(src.my_allocator) )
{
this->internal_swap( src );
}
concurrent_queue( concurrent_queue&& src, const allocator_type& a ) :
internal::concurrent_queue_base_v3<T>(), my_allocator( a )
{
// checking that memory allocated by one instance of allocator can be deallocated
// with another
if( my_allocator == src.my_allocator) {
this->internal_swap( src );
} else {
// allocators are different => performing per-element move
this->assign( src, move_construct_item );
src.clear();
}
}
#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
//! Destroy queue
~concurrent_queue();
//! Enqueue an item at tail of queue.
void push( const T& source ) {
this->internal_push( &source, copy_construct_item );
}
#if __TBB_CPP11_RVALUE_REF_PRESENT
void push( T&& source ) {
this->internal_push( &source, move_construct_item );
}
#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
template<typename... Arguments>
void emplace( Arguments&&... args ) {
push( T(std::forward<Arguments>( args )...) );
}
#endif //__TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
//! Attempt to dequeue an item from head of queue.
/** Does not wait for item to become available.
Returns true if successful; false otherwise. */
bool try_pop( T& result ) {
return this->internal_try_pop( &result );
}
//! Return the number of items in the queue; thread unsafe
size_type unsafe_size() const {return this->internal_size();}
//! Equivalent to size()==0.
bool empty() const {return this->internal_empty();}
//! Clear the queue. not thread-safe.
void clear() ;
//! Return allocator object
allocator_type get_allocator() const { return this->my_allocator; }
typedef internal::concurrent_queue_iterator<concurrent_queue,T> iterator;
typedef internal::concurrent_queue_iterator<concurrent_queue,const T> const_iterator;
//------------------------------------------------------------------------
// The iterators are intended only for debugging. They are slow and not thread safe.
//------------------------------------------------------------------------
iterator unsafe_begin() {return iterator(*this);}
iterator unsafe_end() {return iterator();}
const_iterator unsafe_begin() const {return const_iterator(*this);}
const_iterator unsafe_end() const {return const_iterator();}
} ;
template<typename T, class A>
concurrent_queue<T,A>::~concurrent_queue() {
clear();
this->internal_finish_clear();
}
template<typename T, class A>
void concurrent_queue<T,A>::clear() {
while( !empty() ) {
T value;
this->internal_try_pop(&value);
}
}
} // namespace strict_ppl
//! A high-performance thread-safe blocking concurrent bounded queue.
/** This is the pre-PPL TBB concurrent queue which supports boundedness and blocking semantics.
Note that method names agree with the PPL-style concurrent queue.
Multiple threads may each push and pop concurrently.
Assignment construction is not allowed.
@ingroup containers */
template<typename T, class A = cache_aligned_allocator<T> >
class concurrent_bounded_queue: public internal::concurrent_queue_base_v8 {
template<typename Container, typename Value> friend class internal::concurrent_queue_iterator;
//! Allocator type
typedef typename A::template rebind<char>::other page_allocator_type;
page_allocator_type my_allocator;
typedef typename concurrent_queue_base_v3::padded_page<T> padded_page;
typedef typename concurrent_queue_base_v3::copy_specifics copy_specifics;
//! Class used to ensure exception-safety of method "pop"
class destroyer: internal::no_copy {
T& my_value;
public:
destroyer( T& value ) : my_value(value) {}
~destroyer() {my_value.~T();}
};
T& get_ref( page& p, size_t index ) {
__TBB_ASSERT( index<items_per_page, NULL );
return (&static_cast<padded_page*>(static_cast<void*>(&p))->last)[index];
}
/*override*/ virtual void copy_item( page& dst, size_t index, const void* src ) {
new( &get_ref(dst,index) ) T(*static_cast<const T*>(src));
}
#if __TBB_CPP11_RVALUE_REF_PRESENT
/*override*/ virtual void move_item( page& dst, size_t index, const void* src ) {
new( &get_ref(dst,index) ) T( std::move(*static_cast<T*>(const_cast<void*>(src))) );
}
#else
/*override*/ virtual void move_item( page&, size_t, const void* ) {
__TBB_ASSERT( false, "Unreachable code" );
}
#endif
/*override*/ virtual void copy_page_item( page& dst, size_t dindex, const page& src, size_t sindex ) {
new( &get_ref(dst,dindex) ) T( get_ref( const_cast<page&>(src), sindex ) );
}
#if __TBB_CPP11_RVALUE_REF_PRESENT
/*override*/ virtual void move_page_item( page& dst, size_t dindex, const page& src, size_t sindex ) {
new( &get_ref(dst,dindex) ) T( std::move(get_ref( const_cast<page&>(src), sindex )) );
}
#else
/*override*/ virtual void move_page_item( page&, size_t, const page&, size_t ) {
__TBB_ASSERT( false, "Unreachable code" );
}
#endif
/*override*/ virtual void assign_and_destroy_item( void* dst, page& src, size_t index ) {
T& from = get_ref(src,index);
destroyer d(from);
*static_cast<T*>(dst) = tbb::internal::move( from );
}
/*override*/ virtual page *allocate_page() {
size_t n = sizeof(padded_page) + (items_per_page-1)*sizeof(T);
page *p = reinterpret_cast<page*>(my_allocator.allocate( n ));
if( !p )
internal::throw_exception(internal::eid_bad_alloc);
return p;
}
/*override*/ virtual void deallocate_page( page *p ) {
size_t n = sizeof(padded_page) + (items_per_page-1)*sizeof(T);
my_allocator.deallocate( reinterpret_cast<char*>(p), n );
}
public:
//! Element type in the queue.
typedef T value_type;
//! Allocator type
typedef A allocator_type;
//! Reference type
typedef T& reference;
//! Const reference type
typedef const T& const_reference;
//! Integral type for representing size of the queue.
/** Note that the size_type is a signed integral type.
This is because the size can be negative if there are pending pops without corresponding pushes. */
typedef std::ptrdiff_t size_type;
//! Difference type for iterator
typedef std::ptrdiff_t difference_type;
//! Construct empty queue
explicit concurrent_bounded_queue(const allocator_type& a = allocator_type()) :
concurrent_queue_base_v8( sizeof(T) ), my_allocator( a )
{
}
//! Copy constructor
concurrent_bounded_queue( const concurrent_bounded_queue& src, const allocator_type& a = allocator_type())
: concurrent_queue_base_v8( sizeof(T) ), my_allocator( a )
{
assign( src );
}
#if __TBB_CPP11_RVALUE_REF_PRESENT
//! Move constructors
concurrent_bounded_queue( concurrent_bounded_queue&& src )
: concurrent_queue_base_v8( sizeof(T) ), my_allocator( std::move(src.my_allocator) )
{
internal_swap( src );
}
concurrent_bounded_queue( concurrent_bounded_queue&& src, const allocator_type& a )
: concurrent_queue_base_v8( sizeof(T) ), my_allocator( a )
{
// checking that memory allocated by one instance of allocator can be deallocated
// with another
if( my_allocator == src.my_allocator) {
this->internal_swap( src );
} else {
// allocators are different => performing per-element move
this->move_content( src );
src.clear();
}
}
#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
//! [begin,end) constructor
template<typename InputIterator>
concurrent_bounded_queue( InputIterator begin, InputIterator end,
const allocator_type& a = allocator_type())
: concurrent_queue_base_v8( sizeof(T) ), my_allocator( a )
{
for( ; begin != end; ++begin )
internal_push_if_not_full(&*begin);
}
//! Destroy queue
~concurrent_bounded_queue();
//! Enqueue an item at tail of queue.
void push( const T& source ) {
internal_push( &source );
}
#if __TBB_CPP11_RVALUE_REF_PRESENT
//! Move an item at tail of queue.
void push( T&& source ) {
internal_push_move( &source );
}
#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
template<typename... Arguments>
void emplace( Arguments&&... args ) {
push( T(std::forward<Arguments>( args )...) );
}
#endif /* __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT */
#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
//! Dequeue item from head of queue.
/** Block until an item becomes available, and then dequeue it. */
void pop( T& destination ) {
internal_pop( &destination );
}
#if TBB_USE_EXCEPTIONS
//! Abort all pending queue operations
void abort() {
internal_abort();
}
#endif
//! Enqueue an item at tail of queue if queue is not already full.
/** Does not wait for queue to become not full.
Returns true if item is pushed; false if queue was already full. */
bool try_push( const T& source ) {
return internal_push_if_not_full( &source );
}
#if __TBB_CPP11_RVALUE_REF_PRESENT
//! Move an item at tail of queue if queue is not already full.
/** Does not wait for queue to become not full.
Returns true if item is pushed; false if queue was already full. */
bool try_push( T&& source ) {
return internal_push_move_if_not_full( &source );
}
#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
template<typename... Arguments>
bool try_emplace( Arguments&&... args ) {
return try_push( T(std::forward<Arguments>( args )...) );
}
#endif /* __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT */
#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
//! Attempt to dequeue an item from head of queue.
/** Does not wait for item to become available.
Returns true if successful; false otherwise. */
bool try_pop( T& destination ) {
return internal_pop_if_present( &destination );
}
//! Return number of pushes minus number of pops.
/** Note that the result can be negative if there are pops waiting for the
corresponding pushes. The result can also exceed capacity() if there
are push operations in flight. */
size_type size() const {return internal_size();}
//! Equivalent to size()<=0.
bool empty() const {return internal_empty();}
//! Maximum number of allowed elements
size_type capacity() const {
return my_capacity;
}
//! Set the capacity
/** Setting the capacity to 0 causes subsequent try_push operations to always fail,
and subsequent push operations to block forever. */
void set_capacity( size_type new_capacity ) {
internal_set_capacity( new_capacity, sizeof(T) );
}
//! return allocator object
allocator_type get_allocator() const { return this->my_allocator; }
//! clear the queue. not thread-safe.
void clear() ;
typedef internal::concurrent_queue_iterator<concurrent_bounded_queue,T> iterator;
typedef internal::concurrent_queue_iterator<concurrent_bounded_queue,const T> const_iterator;
//------------------------------------------------------------------------
// The iterators are intended only for debugging. They are slow and not thread safe.
//------------------------------------------------------------------------
iterator unsafe_begin() {return iterator(*this);}
iterator unsafe_end() {return iterator();}
const_iterator unsafe_begin() const {return const_iterator(*this);}
const_iterator unsafe_end() const {return const_iterator();}
};
template<typename T, class A>
concurrent_bounded_queue<T,A>::~concurrent_bounded_queue() {
clear();
internal_finish_clear();
}
template<typename T, class A>
void concurrent_bounded_queue<T,A>::clear() {
while( !empty() ) {
T value;
internal_pop_if_present(&value);
}
}
using strict_ppl::concurrent_queue;
} // namespace tbb
#endif /* __TBB_concurrent_queue_H */

View File

@@ -0,0 +1,326 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
/* Container implementations in this header are based on PPL implementations
provided by Microsoft. */
#ifndef __TBB_concurrent_unordered_map_H
#define __TBB_concurrent_unordered_map_H
#include "internal/_concurrent_unordered_impl.h"
namespace tbb
{
namespace interface5 {
// Template class for hash map traits
template<typename Key, typename T, typename Hash_compare, typename Allocator, bool Allow_multimapping>
class concurrent_unordered_map_traits
{
protected:
typedef std::pair<const Key, T> value_type;
typedef Key key_type;
typedef Hash_compare hash_compare;
typedef typename Allocator::template rebind<value_type>::other allocator_type;
enum { allow_multimapping = Allow_multimapping };
concurrent_unordered_map_traits() : my_hash_compare() {}
concurrent_unordered_map_traits(const hash_compare& hc) : my_hash_compare(hc) {}
class value_compare : public std::binary_function<value_type, value_type, bool>
{
friend class concurrent_unordered_map_traits<Key, T, Hash_compare, Allocator, Allow_multimapping>;
public:
bool operator()(const value_type& left, const value_type& right) const
{
return (my_hash_compare(left.first, right.first));
}
value_compare(const hash_compare& comparator) : my_hash_compare(comparator) {}
protected:
hash_compare my_hash_compare; // the comparator predicate for keys
};
template<class Type1, class Type2>
static const Key& get_key(const std::pair<Type1, Type2>& value) {
return (value.first);
}
hash_compare my_hash_compare; // the comparator predicate for keys
};
template <typename Key, typename T, typename Hasher = tbb::tbb_hash<Key>, typename Key_equality = std::equal_to<Key>,
typename Allocator = tbb::tbb_allocator<std::pair<const Key, T> > >
class concurrent_unordered_map :
public internal::concurrent_unordered_base< concurrent_unordered_map_traits<Key, T,
internal::hash_compare<Key, Hasher, Key_equality>, Allocator, false> >
{
// Base type definitions
typedef internal::hash_compare<Key, Hasher, Key_equality> hash_compare;
typedef concurrent_unordered_map_traits<Key, T, hash_compare, Allocator, false> traits_type;
typedef internal::concurrent_unordered_base< traits_type > base_type;
#if __TBB_EXTRA_DEBUG
public:
#endif
using traits_type::allow_multimapping;
public:
using base_type::end;
using base_type::find;
using base_type::insert;
// Type definitions
typedef Key key_type;
typedef typename base_type::value_type value_type;
typedef T mapped_type;
typedef Hasher hasher;
typedef Key_equality key_equal;
typedef hash_compare key_compare;
typedef typename base_type::allocator_type allocator_type;
typedef typename base_type::pointer pointer;
typedef typename base_type::const_pointer const_pointer;
typedef typename base_type::reference reference;
typedef typename base_type::const_reference const_reference;
typedef typename base_type::size_type size_type;
typedef typename base_type::difference_type difference_type;
typedef typename base_type::iterator iterator;
typedef typename base_type::const_iterator const_iterator;
typedef typename base_type::iterator local_iterator;
typedef typename base_type::const_iterator const_local_iterator;
// Construction/destruction/copying
explicit concurrent_unordered_map(size_type n_of_buckets = base_type::initial_bucket_number,
const hasher& _Hasher = hasher(), const key_equal& _Key_equality = key_equal(),
const allocator_type& a = allocator_type())
: base_type(n_of_buckets, key_compare(_Hasher, _Key_equality), a)
{
}
concurrent_unordered_map(const Allocator& a) : base_type(base_type::initial_bucket_number, key_compare(), a)
{
}
template <typename Iterator>
concurrent_unordered_map(Iterator first, Iterator last, size_type n_of_buckets = base_type::initial_bucket_number,
const hasher& _Hasher = hasher(), const key_equal& _Key_equality = key_equal(),
const allocator_type& a = allocator_type())
: base_type(n_of_buckets, key_compare(_Hasher, _Key_equality), a)
{
insert(first, last);
}
#if __TBB_INITIALIZER_LISTS_PRESENT
//! Constructor from initializer_list
concurrent_unordered_map(std::initializer_list<value_type> il, size_type n_of_buckets = base_type::initial_bucket_number,
const hasher& _Hasher = hasher(), const key_equal& _Key_equality = key_equal(),
const allocator_type& a = allocator_type())
: base_type(n_of_buckets, key_compare(_Hasher, _Key_equality), a)
{
this->insert(il.begin(),il.end());
}
#endif //# __TBB_INITIALIZER_LISTS_PRESENT
#if __TBB_CPP11_RVALUE_REF_PRESENT && __TBB_CPP11_IMPLICIT_MOVE_MEMBERS_GENERATION_FOR_DERIVED_BROKEN
concurrent_unordered_map(const concurrent_unordered_map& table)
: base_type(table)
{
}
concurrent_unordered_map& operator=(const concurrent_unordered_map& table)
{
return static_cast<concurrent_unordered_map&>(base_type::operator=(table));
}
concurrent_unordered_map(concurrent_unordered_map&& table)
: base_type(std::move(table))
{
}
concurrent_unordered_map& operator=(concurrent_unordered_map&& table)
{
return static_cast<concurrent_unordered_map&>(base_type::operator=(std::move(table)));
}
#endif //__TBB_CPP11_IMPLICIT_MOVE_MEMBERS_GENERATION_FOR_DERIVED_BROKEN
concurrent_unordered_map(const concurrent_unordered_map& table, const Allocator& a)
: base_type(table, a)
{
}
#if __TBB_CPP11_RVALUE_REF_PRESENT
concurrent_unordered_map(concurrent_unordered_map&& table, const Allocator& a) : base_type(std::move(table), a)
{
}
#endif
// Observers
mapped_type& operator[](const key_type& key)
{
iterator where = find(key);
if (where == end())
{
where = insert(std::pair<key_type, mapped_type>(key, mapped_type())).first;
}
return ((*where).second);
}
mapped_type& at(const key_type& key)
{
iterator where = find(key);
if (where == end())
{
tbb::internal::throw_exception(tbb::internal::eid_invalid_key);
}
return ((*where).second);
}
const mapped_type& at(const key_type& key) const
{
const_iterator where = find(key);
if (where == end())
{
tbb::internal::throw_exception(tbb::internal::eid_invalid_key);
}
return ((*where).second);
}
};
template < typename Key, typename T, typename Hasher = tbb::tbb_hash<Key>, typename Key_equality = std::equal_to<Key>,
typename Allocator = tbb::tbb_allocator<std::pair<const Key, T> > >
class concurrent_unordered_multimap :
public internal::concurrent_unordered_base< concurrent_unordered_map_traits< Key, T,
internal::hash_compare<Key, Hasher, Key_equality>, Allocator, true> >
{
// Base type definitions
typedef internal::hash_compare<Key, Hasher, Key_equality> hash_compare;
typedef concurrent_unordered_map_traits<Key, T, hash_compare, Allocator, true> traits_type;
typedef internal::concurrent_unordered_base<traits_type> base_type;
#if __TBB_EXTRA_DEBUG
public:
#endif
using traits_type::allow_multimapping;
public:
using base_type::insert;
// Type definitions
typedef Key key_type;
typedef typename base_type::value_type value_type;
typedef T mapped_type;
typedef Hasher hasher;
typedef Key_equality key_equal;
typedef hash_compare key_compare;
typedef typename base_type::allocator_type allocator_type;
typedef typename base_type::pointer pointer;
typedef typename base_type::const_pointer const_pointer;
typedef typename base_type::reference reference;
typedef typename base_type::const_reference const_reference;
typedef typename base_type::size_type size_type;
typedef typename base_type::difference_type difference_type;
typedef typename base_type::iterator iterator;
typedef typename base_type::const_iterator const_iterator;
typedef typename base_type::iterator local_iterator;
typedef typename base_type::const_iterator const_local_iterator;
// Construction/destruction/copying
explicit concurrent_unordered_multimap(size_type n_of_buckets = base_type::initial_bucket_number,
const hasher& _Hasher = hasher(), const key_equal& _Key_equality = key_equal(),
const allocator_type& a = allocator_type())
: base_type(n_of_buckets, key_compare(_Hasher, _Key_equality), a)
{
}
concurrent_unordered_multimap(const Allocator& a) : base_type(base_type::initial_bucket_number, key_compare(), a)
{
}
template <typename Iterator>
concurrent_unordered_multimap(Iterator first, Iterator last, size_type n_of_buckets = base_type::initial_bucket_number,
const hasher& _Hasher = hasher(), const key_equal& _Key_equality = key_equal(),
const allocator_type& a = allocator_type())
: base_type(n_of_buckets,key_compare(_Hasher,_Key_equality), a)
{
insert(first, last);
}
#if __TBB_INITIALIZER_LISTS_PRESENT
//! Constructor from initializer_list
concurrent_unordered_multimap(std::initializer_list<value_type> il, size_type n_of_buckets = base_type::initial_bucket_number,
const hasher& _Hasher = hasher(), const key_equal& _Key_equality = key_equal(),
const allocator_type& a = allocator_type())
: base_type(n_of_buckets, key_compare(_Hasher, _Key_equality), a)
{
this->insert(il.begin(),il.end());
}
#endif //# __TBB_INITIALIZER_LISTS_PRESENT
#if __TBB_CPP11_RVALUE_REF_PRESENT && __TBB_CPP11_IMPLICIT_MOVE_MEMBERS_GENERATION_FOR_DERIVED_BROKEN
concurrent_unordered_multimap(const concurrent_unordered_multimap& table)
: base_type(table)
{
}
concurrent_unordered_multimap& operator=(const concurrent_unordered_multimap& table)
{
return static_cast<concurrent_unordered_multimap&>(base_type::operator=(table));
}
concurrent_unordered_multimap(concurrent_unordered_multimap&& table)
: base_type(std::move(table))
{
}
concurrent_unordered_multimap& operator=(concurrent_unordered_multimap&& table)
{
return static_cast<concurrent_unordered_multimap&>(base_type::operator=(std::move(table)));
}
#endif //__TBB_CPP11_IMPLICIT_MOVE_MEMBERS_GENERATION_FOR_DERIVED_BROKEN
concurrent_unordered_multimap(const concurrent_unordered_multimap& table, const Allocator& a)
: base_type(table, a)
{
}
#if __TBB_CPP11_RVALUE_REF_PRESENT
concurrent_unordered_multimap(concurrent_unordered_multimap&& table, const Allocator& a) : base_type(std::move(table), a)
{
}
#endif
};
} // namespace interface5
using interface5::concurrent_unordered_map;
using interface5::concurrent_unordered_multimap;
} // namespace tbb
#endif// __TBB_concurrent_unordered_map_H

View File

@@ -0,0 +1,269 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
/* Container implementations in this header are based on PPL implementations
provided by Microsoft. */
#ifndef __TBB_concurrent_unordered_set_H
#define __TBB_concurrent_unordered_set_H
#include "internal/_concurrent_unordered_impl.h"
namespace tbb
{
namespace interface5 {
// Template class for hash set traits
template<typename Key, typename Hash_compare, typename Allocator, bool Allow_multimapping>
class concurrent_unordered_set_traits
{
protected:
typedef Key value_type;
typedef Key key_type;
typedef Hash_compare hash_compare;
typedef typename Allocator::template rebind<value_type>::other allocator_type;
enum { allow_multimapping = Allow_multimapping };
concurrent_unordered_set_traits() : my_hash_compare() {}
concurrent_unordered_set_traits(const hash_compare& hc) : my_hash_compare(hc) {}
typedef hash_compare value_compare;
static const Key& get_key(const value_type& value) {
return value;
}
hash_compare my_hash_compare; // the comparator predicate for keys
};
template <typename Key, typename Hasher = tbb::tbb_hash<Key>, typename Key_equality = std::equal_to<Key>, typename Allocator = tbb::tbb_allocator<Key> >
class concurrent_unordered_set : public internal::concurrent_unordered_base< concurrent_unordered_set_traits<Key, internal::hash_compare<Key, Hasher, Key_equality>, Allocator, false> >
{
// Base type definitions
typedef internal::hash_compare<Key, Hasher, Key_equality> hash_compare;
typedef internal::concurrent_unordered_base< concurrent_unordered_set_traits<Key, hash_compare, Allocator, false> > base_type;
typedef concurrent_unordered_set_traits<Key, internal::hash_compare<Key, Hasher, Key_equality>, Allocator, false> traits_type;
#if __TBB_EXTRA_DEBUG
public:
#endif
using traits_type::allow_multimapping;
public:
using base_type::insert;
// Type definitions
typedef Key key_type;
typedef typename base_type::value_type value_type;
typedef Key mapped_type;
typedef Hasher hasher;
typedef Key_equality key_equal;
typedef hash_compare key_compare;
typedef typename base_type::allocator_type allocator_type;
typedef typename base_type::pointer pointer;
typedef typename base_type::const_pointer const_pointer;
typedef typename base_type::reference reference;
typedef typename base_type::const_reference const_reference;
typedef typename base_type::size_type size_type;
typedef typename base_type::difference_type difference_type;
typedef typename base_type::iterator iterator;
typedef typename base_type::const_iterator const_iterator;
typedef typename base_type::iterator local_iterator;
typedef typename base_type::const_iterator const_local_iterator;
// Construction/destruction/copying
explicit concurrent_unordered_set(size_type n_of_buckets = base_type::initial_bucket_number, const hasher& a_hasher = hasher(),
const key_equal& a_keyeq = key_equal(), const allocator_type& a = allocator_type())
: base_type(n_of_buckets, key_compare(a_hasher, a_keyeq), a)
{
}
concurrent_unordered_set(const Allocator& a) : base_type(base_type::initial_bucket_number, key_compare(), a)
{
}
template <typename Iterator>
concurrent_unordered_set(Iterator first, Iterator last, size_type n_of_buckets = base_type::initial_bucket_number, const hasher& a_hasher = hasher(),
const key_equal& a_keyeq = key_equal(), const allocator_type& a = allocator_type())
: base_type(n_of_buckets, key_compare(a_hasher, a_keyeq), a)
{
insert(first, last);
}
#if __TBB_INITIALIZER_LISTS_PRESENT
//! Constructor from initializer_list
concurrent_unordered_set(std::initializer_list<value_type> il, size_type n_of_buckets = base_type::initial_bucket_number, const hasher& a_hasher = hasher(),
const key_equal& a_keyeq = key_equal(), const allocator_type& a = allocator_type())
: base_type(n_of_buckets, key_compare(a_hasher, a_keyeq), a)
{
this->insert(il.begin(),il.end());
}
#endif //# __TBB_INITIALIZER_LISTS_PRESENT
#if __TBB_CPP11_RVALUE_REF_PRESENT && __TBB_CPP11_IMPLICIT_MOVE_MEMBERS_GENERATION_FOR_DERIVED_BROKEN
concurrent_unordered_set(const concurrent_unordered_set& table)
: base_type(table)
{
}
concurrent_unordered_set& operator=(const concurrent_unordered_set& table)
{
return static_cast<concurrent_unordered_set&>(base_type::operator=(table));
}
concurrent_unordered_set(concurrent_unordered_set&& table)
: base_type(std::move(table))
{
}
concurrent_unordered_set& operator=(concurrent_unordered_set&& table)
{
return static_cast<concurrent_unordered_set&>(base_type::operator=(std::move(table)));
}
#endif //__TBB_CPP11_IMPLICIT_MOVE_MEMBERS_GENERATION_FOR_DERIVED_BROKEN
concurrent_unordered_set(const concurrent_unordered_set& table, const Allocator& a)
: base_type(table, a)
{
}
#if __TBB_CPP11_RVALUE_REF_PRESENT
concurrent_unordered_set(concurrent_unordered_set&& table, const Allocator& a)
: base_type(std::move(table), a)
{
}
#endif //__TBB_CPP11_RVALUE_REF_PRESENT
};
template <typename Key, typename Hasher = tbb::tbb_hash<Key>, typename Key_equality = std::equal_to<Key>,
typename Allocator = tbb::tbb_allocator<Key> >
class concurrent_unordered_multiset :
public internal::concurrent_unordered_base< concurrent_unordered_set_traits<Key,
internal::hash_compare<Key, Hasher, Key_equality>, Allocator, true> >
{
// Base type definitions
typedef internal::hash_compare<Key, Hasher, Key_equality> hash_compare;
typedef concurrent_unordered_set_traits<Key, hash_compare, Allocator, true> traits_type;
typedef internal::concurrent_unordered_base< traits_type > base_type;
#if __TBB_EXTRA_DEBUG
public:
#endif
using traits_type::allow_multimapping;
public:
using base_type::insert;
// Type definitions
typedef Key key_type;
typedef typename base_type::value_type value_type;
typedef Key mapped_type;
typedef Hasher hasher;
typedef Key_equality key_equal;
typedef hash_compare key_compare;
typedef typename base_type::allocator_type allocator_type;
typedef typename base_type::pointer pointer;
typedef typename base_type::const_pointer const_pointer;
typedef typename base_type::reference reference;
typedef typename base_type::const_reference const_reference;
typedef typename base_type::size_type size_type;
typedef typename base_type::difference_type difference_type;
typedef typename base_type::iterator iterator;
typedef typename base_type::const_iterator const_iterator;
typedef typename base_type::iterator local_iterator;
typedef typename base_type::const_iterator const_local_iterator;
// Construction/destruction/copying
explicit concurrent_unordered_multiset(size_type n_of_buckets = base_type::initial_bucket_number,
const hasher& _Hasher = hasher(), const key_equal& _Key_equality = key_equal(),
const allocator_type& a = allocator_type())
: base_type(n_of_buckets, key_compare(_Hasher, _Key_equality), a)
{
}
concurrent_unordered_multiset(const Allocator& a) : base_type(base_type::initial_bucket_number, key_compare(), a)
{
}
template <typename Iterator>
concurrent_unordered_multiset(Iterator first, Iterator last, size_type n_of_buckets = base_type::initial_bucket_number,
const hasher& _Hasher = hasher(), const key_equal& _Key_equality = key_equal(),
const allocator_type& a = allocator_type())
: base_type(n_of_buckets, key_compare(_Hasher, _Key_equality), a)
{
insert(first, last);
}
#if __TBB_INITIALIZER_LISTS_PRESENT
//! Constructor from initializer_list
concurrent_unordered_multiset(std::initializer_list<value_type> il, size_type n_of_buckets = base_type::initial_bucket_number, const hasher& a_hasher = hasher(),
const key_equal& a_keyeq = key_equal(), const allocator_type& a = allocator_type())
: base_type(n_of_buckets, key_compare(a_hasher, a_keyeq), a)
{
this->insert(il.begin(),il.end());
}
#endif //# __TBB_INITIALIZER_LISTS_PRESENT
#if __TBB_CPP11_RVALUE_REF_PRESENT && __TBB_CPP11_IMPLICIT_MOVE_MEMBERS_GENERATION_FOR_DERIVED_BROKEN
concurrent_unordered_multiset(const concurrent_unordered_multiset& table)
: base_type(table)
{
}
concurrent_unordered_multiset& operator=(const concurrent_unordered_multiset& table)
{
return static_cast<concurrent_unordered_multiset&>(base_type::operator=(table));
}
concurrent_unordered_multiset(concurrent_unordered_multiset&& table)
: base_type(std::move(table))
{
}
concurrent_unordered_multiset& operator=(concurrent_unordered_multiset&& table)
{
return static_cast<concurrent_unordered_multiset&>(base_type::operator=(std::move(table)));
}
#endif //__TBB_CPP11_IMPLICIT_MOVE_MEMBERS_GENERATION_FOR_DERIVED_BROKEN
concurrent_unordered_multiset(const concurrent_unordered_multiset& table, const Allocator& a)
: base_type(table, a)
{
}
#if __TBB_CPP11_RVALUE_REF_PRESENT
concurrent_unordered_multiset(concurrent_unordered_multiset&& table, const Allocator& a)
: base_type(std::move(table), a)
{
}
#endif //__TBB_CPP11_RVALUE_REF_PRESENT
};
} // namespace interface5
using interface5::concurrent_unordered_set;
using interface5::concurrent_unordered_multiset;
} // namespace tbb
#endif// __TBB_concurrent_unordered_set_H

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,133 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef _TBB_CRITICAL_SECTION_H_
#define _TBB_CRITICAL_SECTION_H_
#if _WIN32||_WIN64
#include "machine/windows_api.h"
#else
#include <pthread.h>
#include <errno.h>
#endif // _WIN32||WIN64
#include "tbb_stddef.h"
#include "tbb_thread.h"
#include "tbb_exception.h"
#include "tbb_profiling.h"
namespace tbb {
namespace internal {
class critical_section_v4 : internal::no_copy {
#if _WIN32||_WIN64
CRITICAL_SECTION my_impl;
#else
pthread_mutex_t my_impl;
#endif
tbb_thread::id my_tid;
public:
void __TBB_EXPORTED_METHOD internal_construct();
critical_section_v4() {
#if _WIN32||_WIN64
InitializeCriticalSectionEx( &my_impl, 4000, 0 );
#else
pthread_mutex_init(&my_impl, NULL);
#endif
internal_construct();
}
~critical_section_v4() {
__TBB_ASSERT(my_tid == tbb_thread::id(), "Destroying a still-held critical section");
#if _WIN32||_WIN64
DeleteCriticalSection(&my_impl);
#else
pthread_mutex_destroy(&my_impl);
#endif
}
class scoped_lock : internal::no_copy {
private:
critical_section_v4 &my_crit;
public:
scoped_lock( critical_section_v4& lock_me) :my_crit(lock_me) {
my_crit.lock();
}
~scoped_lock() {
my_crit.unlock();
}
};
void lock() {
tbb_thread::id local_tid = this_tbb_thread::get_id();
if(local_tid == my_tid) throw_exception( eid_improper_lock );
#if _WIN32||_WIN64
EnterCriticalSection( &my_impl );
#else
int rval = pthread_mutex_lock(&my_impl);
__TBB_ASSERT_EX(!rval, "critical_section::lock: pthread_mutex_lock failed");
#endif
__TBB_ASSERT(my_tid == tbb_thread::id(), NULL);
my_tid = local_tid;
}
bool try_lock() {
bool gotlock;
tbb_thread::id local_tid = this_tbb_thread::get_id();
if(local_tid == my_tid) return false;
#if _WIN32||_WIN64
gotlock = TryEnterCriticalSection( &my_impl ) != 0;
#else
int rval = pthread_mutex_trylock(&my_impl);
// valid returns are 0 (locked) and [EBUSY]
__TBB_ASSERT(rval == 0 || rval == EBUSY, "critical_section::trylock: pthread_mutex_trylock failed");
gotlock = rval == 0;
#endif
if(gotlock) {
my_tid = local_tid;
}
return gotlock;
}
void unlock() {
__TBB_ASSERT(this_tbb_thread::get_id() == my_tid, "thread unlocking critical_section is not thread that locked it");
my_tid = tbb_thread::id();
#if _WIN32||_WIN64
LeaveCriticalSection( &my_impl );
#else
int rval = pthread_mutex_unlock(&my_impl);
__TBB_ASSERT_EX(!rval, "critical_section::unlock: pthread_mutex_unlock failed");
#endif
}
static const bool is_rw_mutex = false;
static const bool is_recursive_mutex = false;
static const bool is_fair_mutex = true;
}; // critical_section_v4
} // namespace internal
typedef internal::critical_section_v4 critical_section;
__TBB_DEFINE_PROFILING_SET_NAME(critical_section)
} // namespace tbb
#endif // _TBB_CRITICAL_SECTION_H_

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,29 @@
<HTML>
<BODY>
<H2>Overview</H2>
Include files for Intel&reg; Threading Building Blocks classes and functions.
<BR><A HREF=".">Click here</A> to see all files in the directory.
<H2>Directories</H2>
<DL>
<DT><A HREF="compat">compat</A>
<DD>Include files for source level compatibility with other frameworks.
<DT><A HREF="internal">internal</A>
<DD>Include files with implementation details; not for direct use.
<DT><A HREF="machine">machine</A>
<DD>Include files for low-level architecture specific functionality; not for direct use.
</DL>
<HR>
<A HREF="../index.html">Up to parent directory</A>
<p></p>
Copyright &copy; 2005-2015 Intel Corporation. All Rights Reserved.
<P></P>
Intel is a registered trademark or trademark of Intel Corporation
or its subsidiaries in the United States and other countries.
<p></p>
* Other names and brands may be claimed as the property of others.
</BODY>
</HTML>

View File

@@ -0,0 +1,180 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB__aggregator_impl_H
#define __TBB__aggregator_impl_H
#include "../atomic.h"
#if !__TBBMALLOC_BUILD
#include "../tbb_profiling.h"
#endif
namespace tbb {
namespace interface6 {
namespace internal {
using namespace tbb::internal;
//! aggregated_operation base class
template <typename Derived>
class aggregated_operation {
public:
uintptr_t status;
Derived *next;
aggregated_operation() : status(0), next(NULL) {}
};
//! Aggregator base class
/** An aggregator for collecting operations coming from multiple sources and executing
them serially on a single thread. operation_type must be derived from
aggregated_operation. The parameter handler_type is a functor that will be passed the
list of operations and is expected to handle each operation appropriately, setting the
status of each operation to non-zero.*/
template < typename operation_type >
class aggregator_generic {
public:
aggregator_generic() : handler_busy(false) { pending_operations = NULL; }
//! Place operation in list
/** Place operation in list and either handle list or wait for operation to
complete.
long_life_time specifies life time of an operation inserting in an aggregator.
"Long" (long_life_time == true) life time operation can be accessed
even after executing it.
"Short" (long_life_time == false) life time operations can be destroyed
during executing so any access to it after executing is invalid.*/
template < typename handler_type >
void execute(operation_type *op, handler_type &handle_operations, bool long_life_time = true) {
operation_type *res;
// op->status should be read before inserting the operation in the
// aggregator queue since it can become invalid after executing a
// handler (if the operation has 'short' life time.)
const uintptr_t status = op->status;
// ITT note: &(op->status) tag is used to cover accesses to this op node. This
// thread has created the operation, and now releases it so that the handler
// thread may handle the associated operation w/o triggering a race condition;
// thus this tag will be acquired just before the operation is handled in the
// handle_operations functor.
call_itt_notify(releasing, &(op->status));
// insert the operation in the queue.
do {
// ITT may flag the following line as a race; it is a false positive:
// This is an atomic read; we don't provide itt_hide_load_word for atomics
op->next = res = pending_operations; // NOT A RACE
} while (pending_operations.compare_and_swap(op, res) != res);
if (!res) { // first in the list; handle the operations.
// ITT note: &pending_operations tag covers access to the handler_busy flag,
// which this waiting handler thread will try to set before entering
// handle_operations.
call_itt_notify(acquired, &pending_operations);
start_handle_operations(handle_operations);
// The operation with 'short' life time can already be destroyed.
if (long_life_time)
__TBB_ASSERT(op->status, NULL);
}
// not first; wait for op to be ready.
else if (!status) { // operation is blocking here.
__TBB_ASSERT(long_life_time, "The blocking operation cannot have 'short' life time. Since it can already be destroyed.");
call_itt_notify(prepare, &(op->status));
spin_wait_while_eq(op->status, uintptr_t(0));
itt_load_word_with_acquire(op->status);
}
}
private:
//! An atomically updated list (aka mailbox) of pending operations
atomic<operation_type *> pending_operations;
//! Controls thread access to handle_operations
uintptr_t handler_busy;
//! Trigger the handling of operations when the handler is free
template < typename handler_type >
void start_handle_operations( handler_type &handle_operations ) {
operation_type *op_list;
// ITT note: &handler_busy tag covers access to pending_operations as it is passed
// between active and waiting handlers. Below, the waiting handler waits until
// the active handler releases, and the waiting handler acquires &handler_busy as
// it becomes the active_handler. The release point is at the end of this
// function, when all operations in pending_operations have been handled by the
// owner of this aggregator.
call_itt_notify(prepare, &handler_busy);
// get the handler_busy:
// only one thread can possibly spin here at a time
spin_wait_until_eq(handler_busy, uintptr_t(0));
call_itt_notify(acquired, &handler_busy);
// acquire fence not necessary here due to causality rule and surrounding atomics
__TBB_store_with_release(handler_busy, uintptr_t(1));
// ITT note: &pending_operations tag covers access to the handler_busy flag
// itself. Capturing the state of the pending_operations signifies that
// handler_busy has been set and a new active handler will now process that list's
// operations.
call_itt_notify(releasing, &pending_operations);
// grab pending_operations
op_list = pending_operations.fetch_and_store(NULL);
// handle all the operations
handle_operations(op_list);
// release the handler
itt_store_word_with_release(handler_busy, uintptr_t(0));
}
};
template < typename handler_type, typename operation_type >
class aggregator : public aggregator_generic<operation_type> {
handler_type handle_operations;
public:
aggregator() {}
explicit aggregator(handler_type h) : handle_operations(h) {}
void initialize_handler(handler_type h) { handle_operations = h; }
void execute(operation_type *op) {
aggregator_generic<operation_type>::execute(op, handle_operations);
}
};
// the most-compatible friend declaration (vs, gcc, icc) is
// template<class U, class V> friend class aggregating_functor;
template<typename aggregating_class, typename operation_list>
class aggregating_functor {
aggregating_class *fi;
public:
aggregating_functor() {}
aggregating_functor(aggregating_class *fi_) : fi(fi_) {}
void operator()(operation_list* op_list) { fi->handle_operations(op_list); }
};
} // namespace internal
} // namespace interface6
namespace internal {
using interface6::internal::aggregated_operation;
using interface6::internal::aggregator_generic;
using interface6::internal::aggregator;
using interface6::internal::aggregating_functor;
} // namespace internal
} // namespace tbb
#endif // __TBB__aggregator_impl_H

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,757 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB__flow_graph_impl_H
#define __TBB__flow_graph_impl_H
#ifndef __TBB_flow_graph_H
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
namespace internal {
namespace graph_policy_namespace {
enum graph_buffer_policy { rejecting, reserving, queueing, tag_matching };
}
// -------------- function_body containers ----------------------
//! A functor that takes no input and generates a value of type Output
template< typename Output >
class source_body : tbb::internal::no_assign {
public:
virtual ~source_body() {}
virtual bool operator()(Output &output) = 0;
virtual source_body* clone() = 0;
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
virtual void reset_body() = 0;
#endif
};
//! The leaf for source_body
template< typename Output, typename Body>
class source_body_leaf : public source_body<Output> {
public:
source_body_leaf( const Body &_body ) : body(_body), init_body(_body) { }
/*override*/ bool operator()(Output &output) { return body( output ); }
/*override*/ source_body_leaf* clone() {
return new source_body_leaf< Output, Body >(init_body);
}
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
/*override*/ void reset_body() {
body = init_body;
}
#endif
Body get_body() { return body; }
private:
Body body;
Body init_body;
};
//! A functor that takes an Input and generates an Output
template< typename Input, typename Output >
class function_body : tbb::internal::no_assign {
public:
virtual ~function_body() {}
virtual Output operator()(const Input &input) = 0;
virtual function_body* clone() = 0;
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
virtual void reset_body() = 0;
#endif
};
//! the leaf for function_body
template <typename Input, typename Output, typename B>
class function_body_leaf : public function_body< Input, Output > {
public:
function_body_leaf( const B &_body ) : body(_body), init_body(_body) { }
Output operator()(const Input &i) { return body(i); }
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
/*override*/ void reset_body() {
body = init_body;
}
#endif
B get_body() { return body; }
/*override*/ function_body_leaf* clone() {
return new function_body_leaf< Input, Output, B >(init_body);
}
private:
B body;
B init_body;
};
//! the leaf for function_body specialized for Input and output of continue_msg
template <typename B>
class function_body_leaf< continue_msg, continue_msg, B> : public function_body< continue_msg, continue_msg > {
public:
function_body_leaf( const B &_body ) : body(_body), init_body(_body) { }
continue_msg operator()( const continue_msg &i ) {
body(i);
return i;
}
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
/*override*/ void reset_body() {
body = init_body;
}
#endif
B get_body() { return body; }
/*override*/ function_body_leaf* clone() {
return new function_body_leaf< continue_msg, continue_msg, B >(init_body);
}
private:
B body;
B init_body;
};
//! the leaf for function_body specialized for Output of continue_msg
template <typename Input, typename B>
class function_body_leaf< Input, continue_msg, B> : public function_body< Input, continue_msg > {
public:
function_body_leaf( const B &_body ) : body(_body), init_body(_body) { }
continue_msg operator()(const Input &i) {
body(i);
return continue_msg();
}
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
/*override*/ void reset_body() {
body = init_body;
}
#endif
B get_body() { return body; }
/*override*/ function_body_leaf* clone() {
return new function_body_leaf< Input, continue_msg, B >(init_body);
}
private:
B body;
B init_body;
};
//! the leaf for function_body specialized for Input of continue_msg
template <typename Output, typename B>
class function_body_leaf< continue_msg, Output, B > : public function_body< continue_msg, Output > {
public:
function_body_leaf( const B &_body ) : body(_body), init_body(_body) { }
Output operator()(const continue_msg &i) {
return body(i);
}
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
/*override*/ void reset_body() {
body = init_body;
}
#endif
B get_body() { return body; }
/*override*/ function_body_leaf* clone() {
return new function_body_leaf< continue_msg, Output, B >(init_body);
}
private:
B body;
B init_body;
};
//! function_body that takes an Input and a set of output ports
template<typename Input, typename OutputSet>
class multifunction_body : tbb::internal::no_assign {
public:
virtual ~multifunction_body () {}
virtual void operator()(const Input &/* input*/, OutputSet &/*oset*/) = 0;
virtual multifunction_body* clone() = 0;
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
virtual void reset_body() = 0;
#endif
};
//! leaf for multifunction. OutputSet can be a std::tuple or a vector.
template<typename Input, typename OutputSet, typename B>
class multifunction_body_leaf : public multifunction_body<Input, OutputSet> {
public:
multifunction_body_leaf(const B &_body) : body(_body), init_body(_body) { }
void operator()(const Input &input, OutputSet &oset) {
body(input, oset); // body may explicitly put() to one or more of oset.
}
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
/*override*/ void reset_body() {
body = init_body;
}
#endif
B get_body() { return body; }
/*override*/ multifunction_body_leaf* clone() {
return new multifunction_body_leaf<Input, OutputSet,B>(init_body);
}
private:
B body;
B init_body;
};
// --------------------------- end of function_body containers ------------------------
// --------------------------- node task bodies ---------------------------------------
//! A task that calls a node's forward_task function
template< typename NodeType >
class forward_task_bypass : public task {
NodeType &my_node;
public:
forward_task_bypass( NodeType &n ) : my_node(n) {}
task *execute() {
task * new_task = my_node.forward_task();
if (new_task == SUCCESSFULLY_ENQUEUED) new_task = NULL;
return new_task;
}
};
//! A task that calls a node's apply_body_bypass function, passing in an input of type Input
// return the task* unless it is SUCCESSFULLY_ENQUEUED, in which case return NULL
template< typename NodeType, typename Input >
class apply_body_task_bypass : public task {
NodeType &my_node;
Input my_input;
public:
apply_body_task_bypass( NodeType &n, const Input &i ) : my_node(n), my_input(i) {}
task *execute() {
task * next_task = my_node.apply_body_bypass( my_input );
if(next_task == SUCCESSFULLY_ENQUEUED) next_task = NULL;
return next_task;
}
};
//! A task that calls a node's apply_body function with no input
template< typename NodeType >
class source_task_bypass : public task {
NodeType &my_node;
public:
source_task_bypass( NodeType &n ) : my_node(n) {}
task *execute() {
task *new_task = my_node.apply_body_bypass( );
if(new_task == SUCCESSFULLY_ENQUEUED) return NULL;
return new_task;
}
};
// ------------------------ end of node task bodies -----------------------------------
//! An empty functor that takes an Input and returns a default constructed Output
template< typename Input, typename Output >
struct empty_body {
Output operator()( const Input & ) const { return Output(); }
};
//! A node_cache maintains a std::queue of elements of type T. Each operation is protected by a lock.
template< typename T, typename M=spin_mutex >
class node_cache {
public:
typedef size_t size_type;
bool empty() {
typename my_mutex_type::scoped_lock lock( my_mutex );
return internal_empty();
}
void add( T &n ) {
typename my_mutex_type::scoped_lock lock( my_mutex );
internal_push(n);
}
void remove( T &n ) {
typename my_mutex_type::scoped_lock lock( my_mutex );
for ( size_t i = internal_size(); i != 0; --i ) {
T &s = internal_pop();
if ( &s == &n ) return; // only remove one predecessor per request
internal_push(s);
}
}
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
typedef std::vector<T *> predecessor_vector_type;
void internal_add_built_predecessor( T &n ) {
typename my_mutex_type::scoped_lock lock( my_mutex );
my_built_predecessors.add_edge(n);
}
void internal_delete_built_predecessor( T &n ) {
typename my_mutex_type::scoped_lock lock( my_mutex );
my_built_predecessors.delete_edge(n);
}
void copy_predecessors( predecessor_vector_type &v) {
typename my_mutex_type::scoped_lock lock( my_mutex );
my_built_predecessors.copy_edges(v);
}
size_t predecessor_count() {
typename my_mutex_type::scoped_lock lock(my_mutex);
return (size_t)(my_built_predecessors.edge_count());
}
#endif /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
protected:
typedef M my_mutex_type;
my_mutex_type my_mutex;
std::queue< T * > my_q;
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
edge_container<T> my_built_predecessors;
#endif
// Assumes lock is held
inline bool internal_empty( ) {
return my_q.empty();
}
// Assumes lock is held
inline size_type internal_size( ) {
return my_q.size();
}
// Assumes lock is held
inline void internal_push( T &n ) {
my_q.push(&n);
}
// Assumes lock is held
inline T &internal_pop() {
T *v = my_q.front();
my_q.pop();
return *v;
}
};
//! A cache of predecessors that only supports try_get
template< typename T, typename M=spin_mutex >
class predecessor_cache : public node_cache< sender<T>, M > {
public:
typedef M my_mutex_type;
typedef T output_type;
typedef sender<output_type> predecessor_type;
typedef receiver<output_type> successor_type;
predecessor_cache( ) : my_owner( NULL ) { }
void set_owner( successor_type *owner ) { my_owner = owner; }
bool get_item( output_type &v ) {
bool msg = false;
do {
predecessor_type *src;
{
typename my_mutex_type::scoped_lock lock(this->my_mutex);
if ( this->internal_empty() ) {
break;
}
src = &this->internal_pop();
}
// Try to get from this sender
msg = src->try_get( v );
if (msg == false) {
// Relinquish ownership of the edge
if ( my_owner)
src->register_successor( *my_owner );
} else {
// Retain ownership of the edge
this->add(*src);
}
} while ( msg == false );
return msg;
}
void reset( __TBB_PFG_RESET_ARG(reset_flags f)) {
if(my_owner) {
for(;;) {
predecessor_type *src;
{
if(this->internal_empty()) break;
src = &this->internal_pop();
}
src->register_successor( *my_owner);
}
}
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
if (f&rf_extract && my_owner)
my_built_predecessors.receiver_extract(*my_owner);
__TBB_ASSERT(!(f&rf_extract) || this->internal_empty(), "predecessor cache not empty");
#endif
}
protected:
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
using node_cache< sender<T>, M >::my_built_predecessors;
#endif
successor_type *my_owner;
};
//! An cache of predecessors that supports requests and reservations
template< typename T, typename M=spin_mutex >
class reservable_predecessor_cache : public predecessor_cache< T, M > {
public:
typedef M my_mutex_type;
typedef T output_type;
typedef sender<T> predecessor_type;
typedef receiver<T> successor_type;
reservable_predecessor_cache( ) : reserved_src(NULL) { }
bool
try_reserve( output_type &v ) {
bool msg = false;
do {
{
typename my_mutex_type::scoped_lock lock(this->my_mutex);
if ( reserved_src || this->internal_empty() )
return false;
reserved_src = &this->internal_pop();
}
// Try to get from this sender
msg = reserved_src->try_reserve( v );
if (msg == false) {
typename my_mutex_type::scoped_lock lock(this->my_mutex);
// Relinquish ownership of the edge
reserved_src->register_successor( *this->my_owner );
reserved_src = NULL;
} else {
// Retain ownership of the edge
this->add( *reserved_src );
}
} while ( msg == false );
return msg;
}
bool
try_release( ) {
reserved_src->try_release( );
reserved_src = NULL;
return true;
}
bool
try_consume( ) {
reserved_src->try_consume( );
reserved_src = NULL;
return true;
}
void reset( __TBB_PFG_RESET_ARG(reset_flags f)) {
reserved_src = NULL;
predecessor_cache<T,M>::reset(__TBB_PFG_RESET_ARG(f));
}
private:
predecessor_type *reserved_src;
};
//! An abstract cache of successors
template<typename T, typename M=spin_rw_mutex >
class successor_cache : tbb::internal::no_copy {
protected:
typedef M my_mutex_type;
my_mutex_type my_mutex;
typedef receiver<T> *pointer_type;
typedef std::list< pointer_type > my_successors_type;
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
edge_container<receiver<T> > my_built_successors;
#endif
my_successors_type my_successors;
sender<T> *my_owner;
public:
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
typedef std::vector<pointer_type> successor_vector_type;
void internal_add_built_successor( receiver<T> &r) {
typename my_mutex_type::scoped_lock l(my_mutex, true);
my_built_successors.add_edge( r );
}
void internal_delete_built_successor( receiver<T> &r) {
typename my_mutex_type::scoped_lock l(my_mutex, true);
my_built_successors.delete_edge(r);
}
void copy_successors( successor_vector_type &v) {
typename my_mutex_type::scoped_lock l(my_mutex, false);
my_built_successors.copy_edges(v);
}
size_t successor_count() {
typename my_mutex_type::scoped_lock l(my_mutex,false);
return my_built_successors.edge_count();
}
void reset( __TBB_PFG_RESET_ARG(reset_flags f)) {
if (f&rf_extract && my_owner)
my_built_successors.sender_extract(*my_owner);
}
#endif /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
successor_cache( ) : my_owner(NULL) {}
void set_owner( sender<T> *owner ) { my_owner = owner; }
virtual ~successor_cache() {}
void register_successor( receiver<T> &r ) {
typename my_mutex_type::scoped_lock l(my_mutex, true);
my_successors.push_back( &r );
}
void remove_successor( receiver<T> &r ) {
typename my_mutex_type::scoped_lock l(my_mutex, true);
for ( typename my_successors_type::iterator i = my_successors.begin();
i != my_successors.end(); ++i ) {
if ( *i == & r ) {
my_successors.erase(i);
break;
}
}
}
bool empty() {
typename my_mutex_type::scoped_lock l(my_mutex, false);
return my_successors.empty();
}
void clear() {
my_successors.clear();
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
my_built_successors.clear();
#endif
}
virtual task * try_put_task( const T &t ) = 0;
};
//! An abstract cache of successors, specialized to continue_msg
template<>
class successor_cache< continue_msg > : tbb::internal::no_copy {
protected:
typedef spin_rw_mutex my_mutex_type;
my_mutex_type my_mutex;
typedef receiver<continue_msg> *pointer_type;
typedef std::list< pointer_type > my_successors_type;
my_successors_type my_successors;
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
edge_container<receiver<continue_msg> > my_built_successors;
#endif
sender<continue_msg> *my_owner;
public:
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
typedef std::vector<pointer_type> successor_vector_type;
void internal_add_built_successor( receiver<continue_msg> &r) {
my_mutex_type::scoped_lock l(my_mutex, true);
my_built_successors.add_edge( r );
}
void internal_delete_built_successor( receiver<continue_msg> &r) {
my_mutex_type::scoped_lock l(my_mutex, true);
my_built_successors.delete_edge(r);
}
void copy_successors( successor_vector_type &v) {
my_mutex_type::scoped_lock l(my_mutex, false);
my_built_successors.copy_edges(v);
}
size_t successor_count() {
my_mutex_type::scoped_lock l(my_mutex,false);
return my_built_successors.edge_count();
}
void reset( __TBB_PFG_RESET_ARG(reset_flags f)) {
if (f&rf_extract && my_owner)
my_built_successors.sender_extract(*my_owner);
}
#endif /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
successor_cache( ) : my_owner(NULL) {}
void set_owner( sender<continue_msg> *owner ) { my_owner = owner; }
virtual ~successor_cache() {}
void register_successor( receiver<continue_msg> &r ) {
my_mutex_type::scoped_lock l(my_mutex, true);
my_successors.push_back( &r );
if ( my_owner && r.is_continue_receiver() ) {
r.register_predecessor( *my_owner );
}
}
void remove_successor( receiver<continue_msg> &r ) {
my_mutex_type::scoped_lock l(my_mutex, true);
for ( my_successors_type::iterator i = my_successors.begin();
i != my_successors.end(); ++i ) {
if ( *i == & r ) {
// TODO: Check if we need to test for continue_receiver before
// removing from r.
if ( my_owner )
r.remove_predecessor( *my_owner );
my_successors.erase(i);
break;
}
}
}
bool empty() {
my_mutex_type::scoped_lock l(my_mutex, false);
return my_successors.empty();
}
void clear() {
my_successors.clear();
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
my_built_successors.clear();
#endif
}
virtual task * try_put_task( const continue_msg &t ) = 0;
};
//! A cache of successors that are broadcast to
template<typename T, typename M=spin_rw_mutex>
class broadcast_cache : public successor_cache<T, M> {
typedef M my_mutex_type;
typedef std::list< receiver<T> * > my_successors_type;
public:
broadcast_cache( ) {}
// as above, but call try_put_task instead, and return the last task we received (if any)
/*override*/ task * try_put_task( const T &t ) {
task * last_task = NULL;
bool upgraded = true;
typename my_mutex_type::scoped_lock l(this->my_mutex, upgraded);
typename my_successors_type::iterator i = this->my_successors.begin();
while ( i != this->my_successors.end() ) {
task *new_task = (*i)->try_put_task(t);
last_task = combine_tasks(last_task, new_task); // enqueue if necessary
if(new_task) {
++i;
}
else { // failed
if ( (*i)->register_predecessor(*this->my_owner) ) {
if (!upgraded) {
l.upgrade_to_writer();
upgraded = true;
}
i = this->my_successors.erase(i);
} else {
++i;
}
}
}
return last_task;
}
};
//! A cache of successors that are put in a round-robin fashion
template<typename T, typename M=spin_rw_mutex >
class round_robin_cache : public successor_cache<T, M> {
typedef size_t size_type;
typedef M my_mutex_type;
typedef std::list< receiver<T> * > my_successors_type;
public:
round_robin_cache( ) {}
size_type size() {
typename my_mutex_type::scoped_lock l(this->my_mutex, false);
return this->my_successors.size();
}
/*override*/task *try_put_task( const T &t ) {
bool upgraded = true;
typename my_mutex_type::scoped_lock l(this->my_mutex, upgraded);
typename my_successors_type::iterator i = this->my_successors.begin();
while ( i != this->my_successors.end() ) {
task *new_task = (*i)->try_put_task(t);
if ( new_task ) {
return new_task;
} else {
if ( (*i)->register_predecessor(*this->my_owner) ) {
if (!upgraded) {
l.upgrade_to_writer();
upgraded = true;
}
i = this->my_successors.erase(i);
}
else {
++i;
}
}
}
return NULL;
}
};
template<typename T>
class decrementer : public continue_receiver, tbb::internal::no_copy {
T *my_node;
task *execute() {
return my_node->decrement_counter();
}
public:
typedef continue_msg input_type;
typedef continue_msg output_type;
decrementer( int number_of_predecessors = 0 ) : continue_receiver( number_of_predecessors ) { }
void set_owner( T *node ) { my_node = node; }
};
}
#endif // __TBB__flow_graph_impl_H

View File

@@ -0,0 +1,453 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB__flow_graph_indexer_impl_H
#define __TBB__flow_graph_indexer_impl_H
#ifndef __TBB_flow_graph_H
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#include "tbb/internal/_flow_graph_types_impl.h"
namespace internal {
// Output of the indexer_node is a tbb::flow::tagged_msg, and will be of
// the form tagged_msg<tag, result>
// where the value of tag will indicate which result was put to the
// successor.
template<typename IndexerNodeBaseType, typename T, size_t K>
task* do_try_put(const T &v, void *p) {
typename IndexerNodeBaseType::output_type o(K, v);
return reinterpret_cast<IndexerNodeBaseType *>(p)->try_put_task(&o);
}
template<typename TupleTypes,int N>
struct indexer_helper {
template<typename IndexerNodeBaseType, typename PortTuple>
static inline void set_indexer_node_pointer(PortTuple &my_input, IndexerNodeBaseType *p) {
typedef typename tuple_element<N-1, TupleTypes>::type T;
task *(*indexer_node_put_task)(const T&, void *) = do_try_put<IndexerNodeBaseType, T, N-1>;
tbb::flow::get<N-1>(my_input).set_up(p, indexer_node_put_task);
indexer_helper<TupleTypes,N-1>::template set_indexer_node_pointer<IndexerNodeBaseType,PortTuple>(my_input, p);
}
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
template<typename InputTuple>
static inline void reset_inputs(InputTuple &my_input, reset_flags f) {
join_helper<N-1>::reset_inputs(my_input, f);
tbb::flow::get<N-1>(my_input).reset_receiver(f);
}
#endif
};
template<typename TupleTypes>
struct indexer_helper<TupleTypes,1> {
template<typename IndexerNodeBaseType, typename PortTuple>
static inline void set_indexer_node_pointer(PortTuple &my_input, IndexerNodeBaseType *p) {
typedef typename tuple_element<0, TupleTypes>::type T;
task *(*indexer_node_put_task)(const T&, void *) = do_try_put<IndexerNodeBaseType, T, 0>;
tbb::flow::get<0>(my_input).set_up(p, indexer_node_put_task);
}
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
template<typename InputTuple>
static inline void reset_inputs(InputTuple &my_input, reset_flags f) {
tbb::flow::get<0>(my_input).reset_receiver(f);
}
#endif
};
template<typename T>
class indexer_input_port : public receiver<T> {
private:
void* my_indexer_ptr;
typedef task* (* forward_function_ptr)(T const &, void* );
forward_function_ptr my_try_put_task;
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
spin_mutex my_pred_mutex;
edge_container<sender<T> > my_built_predecessors;
#endif /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
public:
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
indexer_input_port() : my_pred_mutex() {}
indexer_input_port( const indexer_input_port & /*other*/ ) : receiver<T>(), my_pred_mutex() {
}
#endif /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
void set_up(void *p, forward_function_ptr f) {
my_indexer_ptr = p;
my_try_put_task = f;
}
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
typedef std::vector<sender<T> *> predecessor_vector_type;
/*override*/size_t predecessor_count() {
spin_mutex::scoped_lock l(my_pred_mutex);
return my_built_predecessors.edge_count();
}
/*override*/void internal_add_built_predecessor(sender<T> &p) {
spin_mutex::scoped_lock l(my_pred_mutex);
my_built_predecessors.add_edge(p);
}
/*override*/void internal_delete_built_predecessor(sender<T> &p) {
spin_mutex::scoped_lock l(my_pred_mutex);
my_built_predecessors.delete_edge(p);
}
/*override*/void copy_predecessors( predecessor_vector_type &v) {
spin_mutex::scoped_lock l(my_pred_mutex);
return my_built_predecessors.copy_edges(v);
}
#endif /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
protected:
template< typename R, typename B > friend class run_and_put_task;
template<typename X, typename Y> friend class internal::broadcast_cache;
template<typename X, typename Y> friend class internal::round_robin_cache;
task *try_put_task(const T &v) {
return my_try_put_task(v, my_indexer_ptr);
}
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
public:
/*override*/void reset_receiver(__TBB_PFG_RESET_ARG(reset_flags f)) {
if(f&rf_extract) my_built_predecessors.receiver_extract(*this);
}
#else
/*override*/void reset_receiver(__TBB_PFG_RESET_ARG(reset_flags /*f*/)) { }
#endif
};
template<typename InputTuple, typename OutputType, typename StructTypes>
class indexer_node_FE {
public:
static const int N = tbb::flow::tuple_size<InputTuple>::value;
typedef OutputType output_type;
typedef InputTuple input_type;
input_type &input_ports() { return my_inputs; }
protected:
input_type my_inputs;
};
//! indexer_node_base
template<typename InputTuple, typename OutputType, typename StructTypes>
class indexer_node_base : public graph_node, public indexer_node_FE<InputTuple, OutputType,StructTypes>,
public sender<OutputType> {
protected:
using graph_node::my_graph;
public:
static const size_t N = tbb::flow::tuple_size<InputTuple>::value;
typedef OutputType output_type;
typedef StructTypes tuple_types;
typedef receiver<output_type> successor_type;
typedef indexer_node_FE<InputTuple, output_type,StructTypes> input_ports_type;
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
typedef std::vector<successor_type *> successor_vector_type;
#endif
private:
// ----------- Aggregator ------------
enum op_type { reg_succ, rem_succ, try__put_task
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
, add_blt_succ, del_blt_succ,
blt_succ_cnt, blt_succ_cpy
#endif
};
enum op_stat {WAIT=0, SUCCEEDED, FAILED};
typedef indexer_node_base<InputTuple,output_type,StructTypes> my_class;
class indexer_node_base_operation : public aggregated_operation<indexer_node_base_operation> {
public:
char type;
union {
output_type const *my_arg;
successor_type *my_succ;
task *bypass_t;
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
size_t cnt_val;
successor_vector_type *succv;
#endif
};
indexer_node_base_operation(const output_type* e, op_type t) :
type(char(t)), my_arg(e) {}
indexer_node_base_operation(const successor_type &s, op_type t) : type(char(t)),
my_succ(const_cast<successor_type *>(&s)) {}
indexer_node_base_operation(op_type t) : type(char(t)) {}
};
typedef internal::aggregating_functor<my_class, indexer_node_base_operation> my_handler;
friend class internal::aggregating_functor<my_class, indexer_node_base_operation>;
aggregator<my_handler, indexer_node_base_operation> my_aggregator;
void handle_operations(indexer_node_base_operation* op_list) {
indexer_node_base_operation *current;
while(op_list) {
current = op_list;
op_list = op_list->next;
switch(current->type) {
case reg_succ:
my_successors.register_successor(*(current->my_succ));
__TBB_store_with_release(current->status, SUCCEEDED);
break;
case rem_succ:
my_successors.remove_successor(*(current->my_succ));
__TBB_store_with_release(current->status, SUCCEEDED);
break;
case try__put_task: {
current->bypass_t = my_successors.try_put_task(*(current->my_arg));
__TBB_store_with_release(current->status, SUCCEEDED); // return of try_put_task actual return value
}
break;
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
case add_blt_succ:
my_successors.internal_add_built_successor(*(current->my_succ));
__TBB_store_with_release(current->status, SUCCEEDED);
break;
case del_blt_succ:
my_successors.internal_delete_built_successor(*(current->my_succ));
__TBB_store_with_release(current->status, SUCCEEDED);
break;
case blt_succ_cnt:
current->cnt_val = my_successors.successor_count();
__TBB_store_with_release(current->status, SUCCEEDED);
break;
case blt_succ_cpy:
my_successors.copy_successors(*(current->succv));
__TBB_store_with_release(current->status, SUCCEEDED);
break;
#endif /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
}
}
}
// ---------- end aggregator -----------
public:
indexer_node_base(graph& g) : graph_node(g), input_ports_type() {
indexer_helper<StructTypes,N>::set_indexer_node_pointer(this->my_inputs, this);
my_successors.set_owner(this);
my_aggregator.initialize_handler(my_handler(this));
}
indexer_node_base(const indexer_node_base& other) : graph_node(other.my_graph), input_ports_type(), sender<output_type>() {
indexer_helper<StructTypes,N>::set_indexer_node_pointer(this->my_inputs, this);
my_successors.set_owner(this);
my_aggregator.initialize_handler(my_handler(this));
}
bool register_successor(successor_type &r) {
indexer_node_base_operation op_data(r, reg_succ);
my_aggregator.execute(&op_data);
return op_data.status == SUCCEEDED;
}
bool remove_successor( successor_type &r) {
indexer_node_base_operation op_data(r, rem_succ);
my_aggregator.execute(&op_data);
return op_data.status == SUCCEEDED;
}
task * try_put_task(output_type const *v) {
indexer_node_base_operation op_data(v, try__put_task);
my_aggregator.execute(&op_data);
return op_data.bypass_t;
}
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
void internal_add_built_successor( successor_type &r) {
indexer_node_base_operation op_data(r, add_blt_succ);
my_aggregator.execute(&op_data);
}
void internal_delete_built_successor( successor_type &r) {
indexer_node_base_operation op_data(r, del_blt_succ);
my_aggregator.execute(&op_data);
}
size_t successor_count() {
indexer_node_base_operation op_data(blt_succ_cnt);
my_aggregator.execute(&op_data);
return op_data.cnt_val;
}
void copy_successors( successor_vector_type &v) {
indexer_node_base_operation op_data(blt_succ_cpy);
op_data.succv = &v;
my_aggregator.execute(&op_data);
}
#endif /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
protected:
/*override*/void reset(__TBB_PFG_RESET_ARG(reset_flags f)) {
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
my_successors.reset(f);
indexer_helper<StructTypes,N>::reset_inputs(this->my_inputs, f);
#endif
}
private:
broadcast_cache<output_type, null_rw_mutex> my_successors;
}; //indexer_node_base
template<int N, typename InputTuple> struct input_types;
template<typename InputTuple>
struct input_types<1, InputTuple> {
typedef typename tuple_element<0, InputTuple>::type first_type;
typedef typename internal::tagged_msg<size_t, first_type > type;
};
template<typename InputTuple>
struct input_types<2, InputTuple> {
typedef typename tuple_element<0, InputTuple>::type first_type;
typedef typename tuple_element<1, InputTuple>::type second_type;
typedef typename internal::tagged_msg<size_t, first_type, second_type> type;
};
template<typename InputTuple>
struct input_types<3, InputTuple> {
typedef typename tuple_element<0, InputTuple>::type first_type;
typedef typename tuple_element<1, InputTuple>::type second_type;
typedef typename tuple_element<2, InputTuple>::type third_type;
typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type> type;
};
template<typename InputTuple>
struct input_types<4, InputTuple> {
typedef typename tuple_element<0, InputTuple>::type first_type;
typedef typename tuple_element<1, InputTuple>::type second_type;
typedef typename tuple_element<2, InputTuple>::type third_type;
typedef typename tuple_element<3, InputTuple>::type fourth_type;
typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
fourth_type> type;
};
template<typename InputTuple>
struct input_types<5, InputTuple> {
typedef typename tuple_element<0, InputTuple>::type first_type;
typedef typename tuple_element<1, InputTuple>::type second_type;
typedef typename tuple_element<2, InputTuple>::type third_type;
typedef typename tuple_element<3, InputTuple>::type fourth_type;
typedef typename tuple_element<4, InputTuple>::type fifth_type;
typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
fourth_type, fifth_type> type;
};
template<typename InputTuple>
struct input_types<6, InputTuple> {
typedef typename tuple_element<0, InputTuple>::type first_type;
typedef typename tuple_element<1, InputTuple>::type second_type;
typedef typename tuple_element<2, InputTuple>::type third_type;
typedef typename tuple_element<3, InputTuple>::type fourth_type;
typedef typename tuple_element<4, InputTuple>::type fifth_type;
typedef typename tuple_element<5, InputTuple>::type sixth_type;
typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
fourth_type, fifth_type, sixth_type> type;
};
template<typename InputTuple>
struct input_types<7, InputTuple> {
typedef typename tuple_element<0, InputTuple>::type first_type;
typedef typename tuple_element<1, InputTuple>::type second_type;
typedef typename tuple_element<2, InputTuple>::type third_type;
typedef typename tuple_element<3, InputTuple>::type fourth_type;
typedef typename tuple_element<4, InputTuple>::type fifth_type;
typedef typename tuple_element<5, InputTuple>::type sixth_type;
typedef typename tuple_element<6, InputTuple>::type seventh_type;
typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
fourth_type, fifth_type, sixth_type,
seventh_type> type;
};
template<typename InputTuple>
struct input_types<8, InputTuple> {
typedef typename tuple_element<0, InputTuple>::type first_type;
typedef typename tuple_element<1, InputTuple>::type second_type;
typedef typename tuple_element<2, InputTuple>::type third_type;
typedef typename tuple_element<3, InputTuple>::type fourth_type;
typedef typename tuple_element<4, InputTuple>::type fifth_type;
typedef typename tuple_element<5, InputTuple>::type sixth_type;
typedef typename tuple_element<6, InputTuple>::type seventh_type;
typedef typename tuple_element<7, InputTuple>::type eighth_type;
typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
fourth_type, fifth_type, sixth_type,
seventh_type, eighth_type> type;
};
template<typename InputTuple>
struct input_types<9, InputTuple> {
typedef typename tuple_element<0, InputTuple>::type first_type;
typedef typename tuple_element<1, InputTuple>::type second_type;
typedef typename tuple_element<2, InputTuple>::type third_type;
typedef typename tuple_element<3, InputTuple>::type fourth_type;
typedef typename tuple_element<4, InputTuple>::type fifth_type;
typedef typename tuple_element<5, InputTuple>::type sixth_type;
typedef typename tuple_element<6, InputTuple>::type seventh_type;
typedef typename tuple_element<7, InputTuple>::type eighth_type;
typedef typename tuple_element<8, InputTuple>::type nineth_type;
typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
fourth_type, fifth_type, sixth_type,
seventh_type, eighth_type, nineth_type> type;
};
template<typename InputTuple>
struct input_types<10, InputTuple> {
typedef typename tuple_element<0, InputTuple>::type first_type;
typedef typename tuple_element<1, InputTuple>::type second_type;
typedef typename tuple_element<2, InputTuple>::type third_type;
typedef typename tuple_element<3, InputTuple>::type fourth_type;
typedef typename tuple_element<4, InputTuple>::type fifth_type;
typedef typename tuple_element<5, InputTuple>::type sixth_type;
typedef typename tuple_element<6, InputTuple>::type seventh_type;
typedef typename tuple_element<7, InputTuple>::type eighth_type;
typedef typename tuple_element<8, InputTuple>::type nineth_type;
typedef typename tuple_element<9, InputTuple>::type tenth_type;
typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
fourth_type, fifth_type, sixth_type,
seventh_type, eighth_type, nineth_type,
tenth_type> type;
};
// type generators
template<typename OutputTuple>
struct indexer_types : public input_types<tuple_size<OutputTuple>::value, OutputTuple> {
static const int N = tbb::flow::tuple_size<OutputTuple>::value;
typedef typename input_types<N, OutputTuple>::type output_type;
typedef typename wrap_tuple_elements<N,indexer_input_port,OutputTuple>::type input_ports_type;
typedef internal::indexer_node_FE<input_ports_type,output_type,OutputTuple> indexer_FE_type;
typedef internal::indexer_node_base<input_ports_type, output_type, OutputTuple> indexer_base_type;
};
template<class OutputTuple>
class unfolded_indexer_node : public indexer_types<OutputTuple>::indexer_base_type {
public:
typedef typename indexer_types<OutputTuple>::input_ports_type input_ports_type;
typedef OutputTuple tuple_types;
typedef typename indexer_types<OutputTuple>::output_type output_type;
private:
typedef typename indexer_types<OutputTuple>::indexer_base_type base_type;
public:
unfolded_indexer_node(graph& g) : base_type(g) {}
unfolded_indexer_node(const unfolded_indexer_node &other) : base_type(other) {}
};
} /* namespace internal */
#endif /* __TBB__flow_graph_indexer_impl_H */

View File

@@ -0,0 +1,279 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB__flow_graph_item_buffer_impl_H
#define __TBB__flow_graph_item_buffer_impl_H
#ifndef __TBB_flow_graph_H
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#include "tbb/internal/_flow_graph_types_impl.h" // for aligned_pair
// in namespace tbb::flow::interface7 (included in _flow_graph_node_impl.h)
//! Expandable buffer of items. The possible operations are push, pop,
//* tests for empty and so forth. No mutual exclusion is built in.
//* objects are constructed into and explicitly-destroyed. get_my_item gives
// a read-only reference to the item in the buffer. set_my_item may be called
// with either an empty or occupied slot.
using internal::aligned_pair;
using internal::alignment_of;
namespace internal {
template <typename T, typename A=cache_aligned_allocator<T> >
class item_buffer {
public:
typedef T item_type;
enum buffer_item_state { no_item=0, has_item=1, reserved_item=2 };
protected:
typedef size_t size_type;
typedef typename aligned_pair<item_type, buffer_item_state>::type buffer_item_type;
typedef typename A::template rebind<buffer_item_type>::other allocator_type;
buffer_item_type *my_array;
size_type my_array_size;
static const size_type initial_buffer_size = 4;
size_type my_head;
size_type my_tail;
bool buffer_empty() { return my_head == my_tail; }
buffer_item_type &item(size_type i) {
__TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].second))%alignment_of<buffer_item_state>::value),NULL);
__TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].first))%alignment_of<item_type>::value), NULL);
return my_array[i & (my_array_size - 1) ];
}
bool my_item_valid(size_type i) { return (i < my_tail) && (i >= my_head) && (item(i).second != no_item); }
bool my_item_reserved(size_type i) { return item(i).second == reserved_item; }
// object management in buffer
const item_type &get_my_item(size_t i) {
__TBB_ASSERT(my_item_valid(i),"attempt to get invalid item");
item_type *itm = (tbb::internal::punned_cast<item_type *>(&(item(i).first)));
return *(const item_type *)itm;
}
// may be called with an empty slot or a slot that has already been constructed into.
void set_my_item(size_t i, const item_type &o) {
if(item(i).second != no_item) {
destroy_item(i);
}
new(&(item(i).first)) item_type(o);
item(i).second = has_item;
}
// destructively-fetch an object from the buffer
void fetch_item(size_t i, item_type &o) {
__TBB_ASSERT(my_item_valid(i), "Trying to fetch an empty slot");
o = get_my_item(i); // could have std::move assign semantics
destroy_item(i);
}
// move an existing item from one slot to another. The moved-to slot must be unoccupied,
// the moved-from slot must exist and not be reserved. The after, from will be empty,
// to will be occupied but not reserved
void move_item(size_t to, size_t from) {
__TBB_ASSERT(!my_item_valid(to), "Trying to move to a non-empty slot");
__TBB_ASSERT(my_item_valid(from), "Trying to move from an empty slot");
set_my_item(to, get_my_item(from)); // could have std::move semantics
destroy_item(from);
}
// put an item in an empty slot. Return true if successful, else false
bool place_item(size_t here, const item_type &me) {
#if !TBB_DEPRECATED_SEQUENCER_DUPLICATES
if(my_item_valid(here)) return false;
#endif
set_my_item(here, me);
return true;
}
// could be implemented with std::move semantics
void swap_items(size_t i, size_t j) {
__TBB_ASSERT(my_item_valid(i) && my_item_valid(j), "attempt to swap invalid item(s)");
item_type temp = get_my_item(i);
set_my_item(i, get_my_item(j));
set_my_item(j, temp);
}
void destroy_item(size_type i) {
__TBB_ASSERT(my_item_valid(i), "destruction of invalid item");
(tbb::internal::punned_cast<item_type *>(&(item(i).first)))->~item_type();
item(i).second = no_item;
}
// returns a copy of the front
void copy_front(item_type &v) {
__TBB_ASSERT(my_item_valid(my_head), "attempt to fetch head non-item");
v = get_my_item(my_head);
}
// returns a copy of the back
void copy_back(item_type &v) {
__TBB_ASSERT(my_item_valid(my_tail-1), "attempt to fetch head non-item");
v = get_my_item(my_tail-1);
}
// following methods are for reservation of the front of a bufffer.
void reserve_item(size_type i) { __TBB_ASSERT(my_item_valid(i) && !my_item_reserved(i), "item cannot be reserved"); item(i).second = reserved_item; }
void release_item(size_type i) { __TBB_ASSERT(my_item_reserved(i), "item is not reserved"); item(i).second = has_item; }
void destroy_front() { destroy_item(my_head); ++my_head; }
void destroy_back() { destroy_item(my_tail-1); --my_tail; }
// we have to be able to test against a new tail value without changing my_tail
// grow_array doesn't work if we change my_tail when the old array is too small
size_type size(size_t new_tail = 0) { return (new_tail ? new_tail : my_tail) - my_head; }
size_type capacity() { return my_array_size; }
// sequencer_node does not use this method, so we don't
// need a version that passes in the new_tail value.
bool buffer_full() { return size() >= capacity(); }
//! Grows the internal array.
void grow_my_array( size_t minimum_size ) {
// test that we haven't made the structure inconsistent.
__TBB_ASSERT(capacity() >= my_tail - my_head, "total items exceed capacity");
size_type new_size = my_array_size ? 2*my_array_size : initial_buffer_size;
while( new_size<minimum_size )
new_size*=2;
buffer_item_type* new_array = allocator_type().allocate(new_size);
// initialize validity to "no"
for( size_type i=0; i<new_size; ++i ) { new_array[i].second = no_item; }
for( size_type i=my_head; i<my_tail; ++i) {
if(my_item_valid(i)) { // sequencer_node may have empty slots
// placement-new copy-construct; could be std::move
char *new_space = (char *)&(new_array[i&(new_size-1)].first);
(void)new(new_space) item_type(get_my_item(i));
new_array[i&(new_size-1)].second = item(i).second;
}
}
clean_up_buffer(/*reset_pointers*/false);
my_array = new_array;
my_array_size = new_size;
}
bool push_back(item_type &v) {
if(buffer_full()) {
grow_my_array(size() + 1);
}
set_my_item(my_tail, v);
++my_tail;
return true;
}
bool pop_back(item_type &v) {
if (!my_item_valid(my_tail-1)) {
return false;
}
copy_back(v);
destroy_back();
return true;
}
bool pop_front(item_type &v) {
if(!my_item_valid(my_head)) {
return false;
}
copy_front(v);
destroy_front();
return true;
}
// This is used both for reset and for grow_my_array. In the case of grow_my_array
// we want to retain the values of the head and tail.
void clean_up_buffer(bool reset_pointers) {
if (my_array) {
for( size_type i=my_head; i<my_tail; ++i ) {
if(my_item_valid(i))
destroy_item(i);
}
allocator_type().deallocate(my_array,my_array_size);
}
my_array = NULL;
if(reset_pointers) {
my_head = my_tail = my_array_size = 0;
}
}
public:
//! Constructor
item_buffer( ) : my_array(NULL), my_array_size(0),
my_head(0), my_tail(0) {
grow_my_array(initial_buffer_size);
}
~item_buffer() {
clean_up_buffer(/*reset_pointers*/true);
}
void reset() { clean_up_buffer(/*reset_pointers*/true); grow_my_array(initial_buffer_size); }
};
//! item_buffer with reservable front-end. NOTE: if reserving, do not
//* complete operation with pop_front(); use consume_front().
//* No synchronization built-in.
template<typename T, typename A=cache_aligned_allocator<T> >
class reservable_item_buffer : public item_buffer<T, A> {
protected:
using item_buffer<T, A>::my_item_valid;
using item_buffer<T, A>::my_head;
public:
reservable_item_buffer() : item_buffer<T, A>(), my_reserved(false) {}
void reset() {my_reserved = false; item_buffer<T,A>::reset(); }
protected:
bool reserve_front(T &v) {
if(my_reserved || !my_item_valid(my_head)) return false;
my_reserved = true;
// reserving the head
this->copy_front(v);
this->reserve_item(this->my_head);
return true;
}
void consume_front() {
__TBB_ASSERT(my_reserved, "Attempt to consume a non-reserved item");
this->destroy_front();
my_reserved = false;
}
void release_front() {
__TBB_ASSERT(my_reserved, "Attempt to release a non-reserved item");
this->release_item(this->my_head);
my_reserved = false;
}
bool my_reserved;
};
} // namespace internal
#endif // __TBB__flow_graph_item_buffer_impl_H

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,742 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB__flow_graph_node_impl_H
#define __TBB__flow_graph_node_impl_H
#ifndef __TBB_flow_graph_H
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#include "_flow_graph_item_buffer_impl.h"
//! @cond INTERNAL
namespace internal {
using tbb::internal::aggregated_operation;
using tbb::internal::aggregating_functor;
using tbb::internal::aggregator;
template< typename T, typename A >
class function_input_queue : public item_buffer<T,A> {
public:
bool pop( T& t ) {
return this->pop_front( t );
}
bool push( T& t ) {
return this->push_back( t );
}
};
//! Input and scheduling for a function node that takes a type Input as input
// The only up-ref is apply_body_impl, which should implement the function
// call and any handling of the result.
template< typename Input, typename A, typename ImplType >
class function_input_base : public receiver<Input>, tbb::internal::no_assign {
enum op_stat {WAIT=0, SUCCEEDED, FAILED};
enum op_type {reg_pred, rem_pred, app_body, try_fwd, tryput_bypass, app_body_bypass
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
, add_blt_pred, del_blt_pred,
blt_pred_cnt, blt_pred_cpy // create vector copies of preds and succs
#endif
};
typedef function_input_base<Input, A, ImplType> my_class;
public:
//! The input type of this receiver
typedef Input input_type;
typedef sender<Input> predecessor_type;
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
typedef std::vector<predecessor_type *> predecessor_vector_type;
#endif
//! Constructor for function_input_base
function_input_base( graph &g, size_t max_concurrency, function_input_queue<input_type,A> *q = NULL )
: my_graph(g), my_max_concurrency(max_concurrency), my_concurrency(0),
my_queue(q), forwarder_busy(false) {
my_predecessors.set_owner(this);
my_aggregator.initialize_handler(my_handler(this));
}
//! Copy constructor
function_input_base( const function_input_base& src, function_input_queue<input_type,A> *q = NULL ) :
receiver<Input>(), tbb::internal::no_assign(),
my_graph(src.my_graph), my_max_concurrency(src.my_max_concurrency),
my_concurrency(0), my_queue(q), forwarder_busy(false)
{
my_predecessors.set_owner(this);
my_aggregator.initialize_handler(my_handler(this));
}
//! Destructor
virtual ~function_input_base() {
if ( my_queue ) delete my_queue;
}
//! Put to the node, returning a task if available
virtual task * try_put_task( const input_type &t ) {
if ( my_max_concurrency == 0 ) {
return create_body_task( t );
} else {
my_operation op_data(t, tryput_bypass);
my_aggregator.execute(&op_data);
if(op_data.status == SUCCEEDED ) {
return op_data.bypass_t;
}
return NULL;
}
}
//! Adds src to the list of cached predecessors.
/* override */ bool register_predecessor( predecessor_type &src ) {
my_operation op_data(reg_pred);
op_data.r = &src;
my_aggregator.execute(&op_data);
return true;
}
//! Removes src from the list of cached predecessors.
/* override */ bool remove_predecessor( predecessor_type &src ) {
my_operation op_data(rem_pred);
op_data.r = &src;
my_aggregator.execute(&op_data);
return true;
}
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
//! Adds to list of predecessors added by make_edge
/*override*/ void internal_add_built_predecessor( predecessor_type &src) {
my_operation op_data(add_blt_pred);
op_data.r = &src;
my_aggregator.execute(&op_data);
}
//! removes from to list of predecessors (used by remove_edge)
/*override*/ void internal_delete_built_predecessor( predecessor_type &src) {
my_operation op_data(del_blt_pred);
op_data.r = &src;
my_aggregator.execute(&op_data);
}
/*override*/ size_t predecessor_count() {
my_operation op_data(blt_pred_cnt);
my_aggregator.execute(&op_data);
return op_data.cnt_val;
}
/*override*/ void copy_predecessors(predecessor_vector_type &v) {
my_operation op_data(blt_pred_cpy);
op_data.predv = &v;
my_aggregator.execute(&op_data);
}
#endif /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
protected:
void reset_function_input_base( __TBB_PFG_RESET_ARG(reset_flags f)) {
my_concurrency = 0;
if(my_queue) {
my_queue->reset();
}
reset_receiver(__TBB_PFG_RESET_ARG(f));
forwarder_busy = false;
}
graph& my_graph;
const size_t my_max_concurrency;
size_t my_concurrency;
function_input_queue<input_type, A> *my_queue;
predecessor_cache<input_type, null_mutex > my_predecessors;
/*override*/void reset_receiver( __TBB_PFG_RESET_ARG(reset_flags f)) {
my_predecessors.reset(__TBB_PFG_RESET_ARG(f));
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
__TBB_ASSERT(!(f & rf_extract) || my_predecessors.empty(), "function_input_base reset failed");
#endif
}
private:
friend class apply_body_task_bypass< my_class, input_type >;
friend class forward_task_bypass< my_class >;
class my_operation : public aggregated_operation< my_operation > {
public:
char type;
union {
input_type *elem;
predecessor_type *r;
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
size_t cnt_val;
predecessor_vector_type *predv;
#endif /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
};
tbb::task *bypass_t;
my_operation(const input_type& e, op_type t) :
type(char(t)), elem(const_cast<input_type*>(&e)) {}
my_operation(op_type t) : type(char(t)), r(NULL) {}
};
bool forwarder_busy;
typedef internal::aggregating_functor<my_class, my_operation> my_handler;
friend class internal::aggregating_functor<my_class, my_operation>;
aggregator< my_handler, my_operation > my_aggregator;
void handle_operations(my_operation *op_list) {
my_operation *tmp;
while (op_list) {
tmp = op_list;
op_list = op_list->next;
switch (tmp->type) {
case reg_pred:
my_predecessors.add(*(tmp->r));
__TBB_store_with_release(tmp->status, SUCCEEDED);
if (!forwarder_busy) {
forwarder_busy = true;
spawn_forward_task();
}
break;
case rem_pred:
my_predecessors.remove(*(tmp->r));
__TBB_store_with_release(tmp->status, SUCCEEDED);
break;
case app_body:
__TBB_ASSERT(my_max_concurrency != 0, NULL);
--my_concurrency;
__TBB_store_with_release(tmp->status, SUCCEEDED);
if (my_concurrency<my_max_concurrency) {
input_type i;
bool item_was_retrieved = false;
if ( my_queue )
item_was_retrieved = my_queue->pop(i);
else
item_was_retrieved = my_predecessors.get_item(i);
if (item_was_retrieved) {
++my_concurrency;
spawn_body_task(i);
}
}
break;
case app_body_bypass: {
task * new_task = NULL;
__TBB_ASSERT(my_max_concurrency != 0, NULL);
--my_concurrency;
if (my_concurrency<my_max_concurrency) {
input_type i;
bool item_was_retrieved = false;
if ( my_queue )
item_was_retrieved = my_queue->pop(i);
else
item_was_retrieved = my_predecessors.get_item(i);
if (item_was_retrieved) {
++my_concurrency;
new_task = create_body_task(i);
}
}
tmp->bypass_t = new_task;
__TBB_store_with_release(tmp->status, SUCCEEDED);
}
break;
case tryput_bypass: internal_try_put_task(tmp); break;
case try_fwd: internal_forward(tmp); break;
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
case add_blt_pred: {
my_predecessors.internal_add_built_predecessor(*(tmp->r));
__TBB_store_with_release(tmp->status, SUCCEEDED);
}
break;
case del_blt_pred:
my_predecessors.internal_delete_built_predecessor(*(tmp->r));
__TBB_store_with_release(tmp->status, SUCCEEDED);
break;
case blt_pred_cnt:
tmp->cnt_val = my_predecessors.predecessor_count();
__TBB_store_with_release(tmp->status, SUCCEEDED);
break;
case blt_pred_cpy:
my_predecessors.copy_predecessors( *(tmp->predv) );
__TBB_store_with_release(tmp->status, SUCCEEDED);
break;
#endif /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
}
}
}
//! Put to the node, but return the task instead of enqueueing it
void internal_try_put_task(my_operation *op) {
__TBB_ASSERT(my_max_concurrency != 0, NULL);
if (my_concurrency < my_max_concurrency) {
++my_concurrency;
task * new_task = create_body_task(*(op->elem));
op->bypass_t = new_task;
__TBB_store_with_release(op->status, SUCCEEDED);
} else if ( my_queue && my_queue->push(*(op->elem)) ) {
op->bypass_t = SUCCESSFULLY_ENQUEUED;
__TBB_store_with_release(op->status, SUCCEEDED);
} else {
op->bypass_t = NULL;
__TBB_store_with_release(op->status, FAILED);
}
}
//! Tries to spawn bodies if available and if concurrency allows
void internal_forward(my_operation *op) {
op->bypass_t = NULL;
if (my_concurrency<my_max_concurrency || !my_max_concurrency) {
input_type i;
bool item_was_retrieved = false;
if ( my_queue )
item_was_retrieved = my_queue->pop(i);
else
item_was_retrieved = my_predecessors.get_item(i);
if (item_was_retrieved) {
++my_concurrency;
op->bypass_t = create_body_task(i);
__TBB_store_with_release(op->status, SUCCEEDED);
return;
}
}
__TBB_store_with_release(op->status, FAILED);
forwarder_busy = false;
}
//! Applies the body to the provided input
// then decides if more work is available
void apply_body( input_type &i ) {
task *new_task = apply_body_bypass(i);
if(!new_task) return;
if(new_task == SUCCESSFULLY_ENQUEUED) return;
FLOW_SPAWN(*new_task);
return;
}
//! Applies the body to the provided input
// then decides if more work is available
task * apply_body_bypass( input_type &i ) {
task * new_task = static_cast<ImplType *>(this)->apply_body_impl_bypass(i);
if ( my_max_concurrency != 0 ) {
my_operation op_data(app_body_bypass); // tries to pop an item or get_item, enqueues another apply_body
my_aggregator.execute(&op_data);
tbb::task *ttask = op_data.bypass_t;
new_task = combine_tasks(new_task, ttask);
}
return new_task;
}
//! allocates a task to call apply_body( input )
inline task * create_body_task( const input_type &input ) {
task* tp = my_graph.root_task();
return (tp) ?
new(task::allocate_additional_child_of(*tp))
apply_body_task_bypass < my_class, input_type >(*this, input) :
NULL;
}
//! Spawns a task that calls apply_body( input )
inline void spawn_body_task( const input_type &input ) {
task* tp = create_body_task(input);
// tp == NULL => g.reset(), which shouldn't occur in concurrent context
if(tp) {
FLOW_SPAWN(*tp);
}
}
//! This is executed by an enqueued task, the "forwarder"
task *forward_task() {
my_operation op_data(try_fwd);
task *rval = NULL;
do {
op_data.status = WAIT;
my_aggregator.execute(&op_data);
if(op_data.status == SUCCEEDED) {
tbb::task *ttask = op_data.bypass_t;
rval = combine_tasks(rval, ttask);
}
} while (op_data.status == SUCCEEDED);
return rval;
}
inline task *create_forward_task() {
task* tp = my_graph.root_task();
return (tp) ?
new(task::allocate_additional_child_of(*tp)) forward_task_bypass< my_class >(*this) :
NULL;
}
//! Spawns a task that calls forward()
inline void spawn_forward_task() {
task* tp = create_forward_task();
if(tp) {
FLOW_SPAWN(*tp);
}
}
}; // function_input_base
//! Implements methods for a function node that takes a type Input as input and sends
// a type Output to its successors.
template< typename Input, typename Output, typename A>
class function_input : public function_input_base<Input, A, function_input<Input,Output,A> > {
public:
typedef Input input_type;
typedef Output output_type;
typedef function_input<Input,Output,A> my_class;
typedef function_input_base<Input, A, my_class> base_type;
typedef function_input_queue<input_type, A> input_queue_type;
// constructor
template<typename Body>
function_input( graph &g, size_t max_concurrency, Body& body, function_input_queue<input_type,A> *q = NULL ) :
base_type(g, max_concurrency, q),
my_body( new internal::function_body_leaf< input_type, output_type, Body>(body) ) {
}
//! Copy constructor
function_input( const function_input& src, input_queue_type *q = NULL ) :
base_type(src, q),
my_body( src.my_body->clone() ) {
}
~function_input() {
delete my_body;
}
template< typename Body >
Body copy_function_object() {
internal::function_body<input_type, output_type> &body_ref = *this->my_body;
return dynamic_cast< internal::function_body_leaf<input_type, output_type, Body> & >(body_ref).get_body();
}
task * apply_body_impl_bypass( const input_type &i) {
#if TBB_PREVIEW_FLOW_GRAPH_TRACE
// There is an extra copied needed to capture the
// body execution without the try_put
tbb::internal::fgt_begin_body( my_body );
output_type v = (*my_body)(i);
tbb::internal::fgt_end_body( my_body );
task * new_task = successors().try_put_task( v );
#else
task * new_task = successors().try_put_task( (*my_body)(i) );
#endif
return new_task;
}
protected:
void reset_function_input(__TBB_PFG_RESET_ARG(reset_flags f)) {
base_type::reset_function_input_base(__TBB_PFG_RESET_ARG(f));
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
if(f & rf_reset_bodies) my_body->reset_body();
#endif
}
function_body<input_type, output_type> *my_body;
virtual broadcast_cache<output_type > &successors() = 0;
}; // function_input
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
// helper templates to reset the successor edges of the output ports of an multifunction_node
template<int N>
struct reset_element {
template<typename P>
static void reset_this(P &p, reset_flags f) {
(void)tbb::flow::get<N-1>(p).successors().reset(f);
reset_element<N-1>::reset_this(p, f);
}
template<typename P>
static bool this_empty(P &p) {
if(tbb::flow::get<N-1>(p).successors().empty())
return reset_element<N-1>::this_empty(p);
return false;
}
};
template<>
struct reset_element<1> {
template<typename P>
static void reset_this(P &p, reset_flags f) {
(void)tbb::flow::get<0>(p).successors().reset(f);
}
template<typename P>
static bool this_empty(P &p) {
return tbb::flow::get<0>(p).successors().empty();
}
};
#endif
//! Implements methods for a function node that takes a type Input as input
// and has a tuple of output ports specified.
template< typename Input, typename OutputPortSet, typename A>
class multifunction_input : public function_input_base<Input, A, multifunction_input<Input,OutputPortSet,A> > {
public:
static const int N = tbb::flow::tuple_size<OutputPortSet>::value;
typedef Input input_type;
typedef OutputPortSet output_ports_type;
typedef multifunction_input<Input,OutputPortSet,A> my_class;
typedef function_input_base<Input, A, my_class> base_type;
typedef function_input_queue<input_type, A> input_queue_type;
// constructor
template<typename Body>
multifunction_input(
graph &g,
size_t max_concurrency,
Body& body,
function_input_queue<input_type,A> *q = NULL ) :
base_type(g, max_concurrency, q),
my_body( new internal::multifunction_body_leaf<input_type, output_ports_type, Body>(body) ) {
}
//! Copy constructor
multifunction_input( const multifunction_input& src, input_queue_type *q = NULL ) :
base_type(src, q),
my_body( src.my_body->clone() ) {
}
~multifunction_input() {
delete my_body;
}
template< typename Body >
Body copy_function_object() {
internal::multifunction_body<input_type, output_ports_type> &body_ref = *this->my_body;
return dynamic_cast< internal::multifunction_body_leaf<input_type, output_ports_type, Body> & >(body_ref).get_body();
}
// for multifunction nodes we do not have a single successor as such. So we just tell
// the task we were successful.
task * apply_body_impl_bypass( const input_type &i) {
tbb::internal::fgt_begin_body( my_body );
(*my_body)(i, my_output_ports);
tbb::internal::fgt_end_body( my_body );
task * new_task = SUCCESSFULLY_ENQUEUED;
return new_task;
}
output_ports_type &output_ports(){ return my_output_ports; }
protected:
/*override*/void reset(__TBB_PFG_RESET_ARG(reset_flags f)) {
base_type::reset_function_input_base(__TBB_PFG_RESET_ARG(f));
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
reset_element<N>::reset_this(my_output_ports, f);
if(f & rf_reset_bodies) my_body->reset_body();
__TBB_ASSERT(!(f & rf_extract) || reset_element<N>::this_empty(my_output_ports), "multifunction_node reset failed");
#endif
}
multifunction_body<input_type, output_ports_type> *my_body;
output_ports_type my_output_ports;
}; // multifunction_input
// template to refer to an output port of a multifunction_node
template<size_t N, typename MOP>
typename tbb::flow::tuple_element<N, typename MOP::output_ports_type>::type &output_port(MOP &op) {
return tbb::flow::get<N>(op.output_ports());
}
// helper structs for split_node
template<int N>
struct emit_element {
template<typename T, typename P>
static void emit_this(const T &t, P &p) {
(void)tbb::flow::get<N-1>(p).try_put(tbb::flow::get<N-1>(t));
emit_element<N-1>::emit_this(t,p);
}
};
template<>
struct emit_element<1> {
template<typename T, typename P>
static void emit_this(const T &t, P &p) {
(void)tbb::flow::get<0>(p).try_put(tbb::flow::get<0>(t));
}
};
//! Implements methods for an executable node that takes continue_msg as input
template< typename Output >
class continue_input : public continue_receiver {
public:
//! The input type of this receiver
typedef continue_msg input_type;
//! The output type of this receiver
typedef Output output_type;
template< typename Body >
continue_input( graph &g, Body& body )
: my_graph_ptr(&g),
my_body( new internal::function_body_leaf< input_type, output_type, Body>(body) ) { }
template< typename Body >
continue_input( graph &g, int number_of_predecessors, Body& body )
: continue_receiver( number_of_predecessors ), my_graph_ptr(&g),
my_body( new internal::function_body_leaf< input_type, output_type, Body>(body) ) { }
continue_input( const continue_input& src ) : continue_receiver(src),
my_graph_ptr(src.my_graph_ptr), my_body( src.my_body->clone() ) {}
~continue_input() {
delete my_body;
}
template< typename Body >
Body copy_function_object() {
internal::function_body<input_type, output_type> &body_ref = *my_body;
return dynamic_cast< internal::function_body_leaf<input_type, output_type, Body> & >(body_ref).get_body();
}
/*override*/void reset_receiver( __TBB_PFG_RESET_ARG(reset_flags f)) {
continue_receiver::reset_receiver(__TBB_PFG_RESET_ARG(f));
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
if(f & rf_reset_bodies) my_body->reset_body();
#endif
}
protected:
graph* my_graph_ptr;
function_body<input_type, output_type> *my_body;
virtual broadcast_cache<output_type > &successors() = 0;
friend class apply_body_task_bypass< continue_input< Output >, continue_msg >;
//! Applies the body to the provided input
/* override */ task *apply_body_bypass( input_type ) {
#if TBB_PREVIEW_FLOW_GRAPH_TRACE
// There is an extra copied needed to capture the
// body execution without the try_put
tbb::internal::fgt_begin_body( my_body );
output_type v = (*my_body)( continue_msg() );
tbb::internal::fgt_end_body( my_body );
return successors().try_put_task( v );
#else
return successors().try_put_task( (*my_body)( continue_msg() ) );
#endif
}
//! Spawns a task that applies the body
/* override */ task *execute( ) {
task* tp = my_graph_ptr->root_task();
return (tp) ?
new ( task::allocate_additional_child_of( *tp ) )
apply_body_task_bypass< continue_input< Output >, continue_msg >( *this, continue_msg() ) :
NULL;
}
}; // continue_input
//! Implements methods for both executable and function nodes that puts Output to its successors
template< typename Output >
class function_output : public sender<Output> {
public:
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
template<int N> friend struct reset_element;
#endif
typedef Output output_type;
typedef receiver<output_type> successor_type;
typedef broadcast_cache<output_type> broadcast_cache_type;
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
typedef std::vector<successor_type *> successor_vector_type;
#endif
function_output() { my_successors.set_owner(this); }
function_output(const function_output & /*other*/) : sender<output_type>() {
my_successors.set_owner(this);
}
//! Adds a new successor to this node
/* override */ bool register_successor( receiver<output_type> &r ) {
successors().register_successor( r );
return true;
}
//! Removes a successor from this node
/* override */ bool remove_successor( receiver<output_type> &r ) {
successors().remove_successor( r );
return true;
}
#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
/*override*/ void internal_add_built_successor( receiver<output_type> &r) {
successors().internal_add_built_successor( r );
}
/*override*/ void internal_delete_built_successor( receiver<output_type> &r) {
successors().internal_delete_built_successor( r );
}
/*override*/ size_t successor_count() {
return successors().successor_count();
}
/*override*/ void copy_successors( successor_vector_type &v) {
successors().copy_successors(v);
}
#endif /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
// for multifunction_node. The function_body that implements
// the node will have an input and an output tuple of ports. To put
// an item to a successor, the body should
//
// get<I>(output_ports).try_put(output_value);
//
// return value will be bool returned from successors.try_put.
task *try_put_task(const output_type &i) { return my_successors.try_put_task(i); }
protected:
broadcast_cache_type my_successors;
broadcast_cache_type &successors() { return my_successors; }
}; // function_output
template< typename Output >
class multifunction_output : public function_output<Output> {
public:
typedef Output output_type;
typedef function_output<output_type> base_type;
using base_type::my_successors;
multifunction_output() : base_type() {my_successors.set_owner(this);}
multifunction_output( const multifunction_output &/*other*/) : base_type() { my_successors.set_owner(this); }
bool try_put(const output_type &i) {
task *res = my_successors.try_put_task(i);
if(!res) return false;
if(res != SUCCESSFULLY_ENQUEUED) FLOW_SPAWN(*res);
return true;
}
}; // multifunction_output
} // internal
#endif // __TBB__flow_graph_node_impl_H

View File

@@ -0,0 +1,251 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
// tagged buffer that can expand, and can support as many deletions as additions
// list-based, with elements of list held in array (for destruction management),
// multiplicative hashing (like ets). No synchronization built-in.
//
#ifndef __TBB__flow_graph_tagged_buffer_impl_H
#define __TBB__flow_graph_tagged_buffer_impl_H
#ifndef __TBB_flow_graph_H
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
// included in namespace tbb::flow::interface7::internal
template<typename T, typename U, size_t NoTagMark>
struct otherData {
T t;
U next;
otherData() : t(NoTagMark), next(NULL) {}
};
template<typename TagType, typename ValueType, size_t NoTagMark>
struct buffer_element_type {
// the second parameter below is void * because we can't forward-declare the type
// itself, so we just reinterpret_cast below.
typedef typename aligned_pair<ValueType, otherData<TagType, void *, NoTagMark> >::type type;
};
template
<
typename TagType,
typename ValueType,
size_t NoTagMark = 0,
typename Allocator=tbb::cache_aligned_allocator< typename buffer_element_type<TagType, ValueType, NoTagMark>::type >
>
class tagged_buffer {
public:
static const size_t INITIAL_SIZE = 8; // initial size of the hash pointer table
static const TagType NO_TAG = TagType(NoTagMark);
typedef ValueType value_type;
typedef typename buffer_element_type<TagType, ValueType, NO_TAG>::type element_type;
typedef value_type *pointer_type;
typedef element_type *list_array_type; // array we manage manually
typedef list_array_type *pointer_array_type;
typedef typename Allocator::template rebind<list_array_type>::other pointer_array_allocator_type;
typedef typename Allocator::template rebind<element_type>::other elements_array_allocator;
private:
size_t my_size;
size_t nelements;
pointer_array_type pointer_array; // pointer_array[my_size]
list_array_type elements_array; // elements_array[my_size / 2]
element_type* free_list;
size_t mask() { return my_size - 1; }
static size_t hash(TagType t) {
return uintptr_t(t)*tbb::internal::select_size_t_constant<0x9E3779B9,0x9E3779B97F4A7C15ULL>::value;
}
void set_up_free_list( element_type **p_free_list, list_array_type la, size_t sz) {
for(size_t i=0; i < sz - 1; ++i ) { // construct free list
la[i].second.next = &(la[i+1]);
la[i].second.t = NO_TAG;
}
la[sz-1].second.next = NULL;
*p_free_list = &(la[0]);
}
// cleanup for exceptions
struct DoCleanup {
pointer_array_type *my_pa;
list_array_type *my_elements;
size_t my_size;
DoCleanup(pointer_array_type &pa, list_array_type &my_els, size_t sz) :
my_pa(&pa), my_elements(&my_els), my_size(sz) { }
~DoCleanup() {
if(my_pa) {
size_t dont_care = 0;
internal_free_buffer(*my_pa, *my_elements, my_size, dont_care);
}
}
};
// exception-safety requires we do all the potentially-throwing operations first
void grow_array() {
size_t new_size = my_size*2;
size_t new_nelements = nelements; // internal_free_buffer zeroes this
list_array_type new_elements_array = NULL;
pointer_array_type new_pointer_array = NULL;
list_array_type new_free_list = NULL;
{
DoCleanup my_cleanup(new_pointer_array, new_elements_array, new_size);
new_elements_array = elements_array_allocator().allocate(my_size);
new_pointer_array = pointer_array_allocator_type().allocate(new_size);
for(size_t i=0; i < new_size; ++i) new_pointer_array[i] = NULL;
set_up_free_list(&new_free_list, new_elements_array, my_size );
for(size_t i=0; i < my_size; ++i) {
for( element_type* op = pointer_array[i]; op; op = (element_type *)(op->second.next)) {
value_type *ov = reinterpret_cast<value_type *>(&(op->first));
// could have std::move semantics
internal_tagged_insert(new_pointer_array, new_size, new_free_list, op->second.t, *ov);
}
}
my_cleanup.my_pa = NULL;
my_cleanup.my_elements = NULL;
}
internal_free_buffer(pointer_array, elements_array, my_size, nelements);
free_list = new_free_list;
pointer_array = new_pointer_array;
elements_array = new_elements_array;
my_size = new_size;
nelements = new_nelements;
}
// v should have perfect forwarding if std::move implemented.
// we use this method to move elements in grow_array, so can't use class fields
void internal_tagged_insert( element_type **p_pointer_array, size_t p_sz, list_array_type &p_free_list,
const TagType t, const value_type &v) {
size_t l_mask = p_sz-1;
size_t h = hash(t) & l_mask;
__TBB_ASSERT(p_free_list, "Error: free list not set up.");
element_type* my_elem = p_free_list; p_free_list = (element_type *)(p_free_list->second.next);
my_elem->second.t = t;
(void) new(&(my_elem->first)) value_type(v);
my_elem->second.next = p_pointer_array[h];
p_pointer_array[h] = my_elem;
}
void internal_initialize_buffer() {
pointer_array = pointer_array_allocator_type().allocate(my_size);
for(size_t i = 0; i < my_size; ++i) pointer_array[i] = NULL;
elements_array = elements_array_allocator().allocate(my_size / 2);
set_up_free_list(&free_list, elements_array, my_size / 2);
}
// made static so an enclosed class can use to properly dispose of the internals
static void internal_free_buffer( pointer_array_type &pa, list_array_type &el, size_t &sz, size_t &ne ) {
if(pa) {
for(size_t i = 0; i < sz; ++i ) {
element_type *p_next;
for( element_type *p = pa[i]; p; p = p_next) {
p_next = (element_type *)p->second.next;
value_type *vp = reinterpret_cast<value_type *>(&(p->first));
vp->~value_type();
}
}
pointer_array_allocator_type().deallocate(pa, sz);
pa = NULL;
}
// Separate test (if allocation of pa throws, el may be allocated.
// but no elements will be constructed.)
if(el) {
elements_array_allocator().deallocate(el, sz / 2);
el = NULL;
}
sz = INITIAL_SIZE;
ne = 0;
}
public:
tagged_buffer() : my_size(INITIAL_SIZE), nelements(0) {
internal_initialize_buffer();
}
~tagged_buffer() {
internal_free_buffer(pointer_array, elements_array, my_size, nelements);
}
void reset() {
internal_free_buffer(pointer_array, elements_array, my_size, nelements);
internal_initialize_buffer();
}
bool tagged_insert(const TagType t, const value_type &v) {
pointer_type p;
if(tagged_find_ref(t, p)) {
p->~value_type();
(void) new(p) value_type(v); // copy-construct into the space
return false;
}
++nelements;
if(nelements*2 > my_size) grow_array();
internal_tagged_insert(pointer_array, my_size, free_list, t, v);
return true;
}
// returns reference to array element.v
bool tagged_find_ref(const TagType t, pointer_type &v) {
size_t i = hash(t) & mask();
for(element_type* p = pointer_array[i]; p; p = (element_type *)(p->second.next)) {
if(p->second.t == t) {
v = reinterpret_cast<pointer_type>(&(p->first));
return true;
}
}
return false;
}
bool tagged_find( const TagType t, value_type &v) {
value_type *p;
if(tagged_find_ref(t, p)) {
v = *p;
return true;
}
else
return false;
}
void tagged_delete(const TagType t) {
size_t h = hash(t) & mask();
element_type* prev = NULL;
for(element_type* p = pointer_array[h]; p; prev = p, p = (element_type *)(p->second.next)) {
if(p->second.t == t) {
value_type *vp = reinterpret_cast<value_type *>(&(p->first));
vp->~value_type();
p->second.t = NO_TAG;
if(prev) prev->second.next = p->second.next;
else pointer_array[h] = (element_type *)(p->second.next);
p->second.next = free_list;
free_list = p;
--nelements;
return;
}
}
__TBB_ASSERT(false, "tag not found for delete");
}
};
#endif // __TBB__flow_graph_tagged_buffer_impl_H

View File

@@ -0,0 +1,205 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef _FGT_GRAPH_TRACE_IMPL_H
#define _FGT_GRAPH_TRACE_IMPL_H
#include "../tbb_profiling.h"
namespace tbb {
namespace internal {
#if TBB_PREVIEW_FLOW_GRAPH_TRACE
static inline void fgt_internal_create_input_port( void *node, void *p, string_index name_index ) {
itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_INPUT_PORT, node, FLOW_NODE, name_index );
}
static inline void fgt_internal_create_output_port( void *node, void *p, string_index name_index ) {
itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_OUTPUT_PORT, node, FLOW_NODE, name_index );
}
template < typename TypesTuple, typename PortsTuple, int N >
struct fgt_internal_input_helper {
static void register_port( void *node, PortsTuple &ports ) {
fgt_internal_create_input_port( node, (void*)static_cast< tbb::flow::interface7::receiver< typename tbb::flow::tuple_element<N-1,TypesTuple>::type > * >(&(tbb::flow::get<N-1>(ports))),
static_cast<tbb::internal::string_index>(FLOW_INPUT_PORT_0 + N - 1) );
fgt_internal_input_helper<TypesTuple, PortsTuple, N-1>::register_port( node, ports );
}
};
template < typename TypesTuple, typename PortsTuple >
struct fgt_internal_input_helper<TypesTuple,PortsTuple,1> {
static void register_port( void *node, PortsTuple &ports ) {
fgt_internal_create_input_port( node, (void*)static_cast< tbb::flow::interface7::receiver< typename tbb::flow::tuple_element<0,TypesTuple>::type > * >(&(tbb::flow::get<0>(ports))),
FLOW_INPUT_PORT_0 );
}
};
template < typename TypesTuple, typename PortsTuple, int N >
struct fgt_internal_output_helper {
static void register_port( void *node, PortsTuple &ports ) {
fgt_internal_create_output_port( node, (void*)static_cast< tbb::flow::interface7::sender< typename tbb::flow::tuple_element<N-1,TypesTuple>::type > * >(&(tbb::flow::get<N-1>(ports))),
static_cast<tbb::internal::string_index>(FLOW_OUTPUT_PORT_0 + N - 1) );
fgt_internal_output_helper<TypesTuple, PortsTuple, N-1>::register_port( node, ports );
}
};
template < typename TypesTuple, typename PortsTuple >
struct fgt_internal_output_helper<TypesTuple,PortsTuple,1> {
static void register_port( void *node, PortsTuple &ports ) {
fgt_internal_create_output_port( node, (void*)static_cast< tbb::flow::interface7::sender< typename tbb::flow::tuple_element<0,TypesTuple>::type > * >(&(tbb::flow::get<0>(ports))),
FLOW_OUTPUT_PORT_0 );
}
};
template< typename NodeType >
void fgt_multioutput_node_desc( const NodeType *node, const char *desc ) {
void *addr = (void *)( static_cast< tbb::flow::interface7::receiver< typename NodeType::input_type > * >(const_cast< NodeType *>(node)) );
itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc );
}
template< typename NodeType >
static inline void fgt_node_desc( const NodeType *node, const char *desc ) {
void *addr = (void *)( static_cast< tbb::flow::interface7::sender< typename NodeType::output_type > * >(const_cast< NodeType *>(node)) );
itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc );
}
static inline void fgt_graph_desc( void *g, const char *desc ) {
itt_metadata_str_add( ITT_DOMAIN_FLOW, g, FLOW_GRAPH, FLOW_OBJECT_NAME, desc );
}
static inline void fgt_body( void *node, void *body ) {
itt_relation_add( ITT_DOMAIN_FLOW, body, FLOW_BODY, __itt_relation_is_child_of, node, FLOW_NODE );
}
template< typename OutputTuple, int N, typename PortsTuple >
static inline void fgt_multioutput_node( string_index t, void *g, void *input_port, PortsTuple &ports ) {
itt_make_task_group( ITT_DOMAIN_FLOW, input_port, FLOW_NODE, g, FLOW_GRAPH, t );
fgt_internal_create_input_port( input_port, input_port, FLOW_INPUT_PORT_0 );
fgt_internal_output_helper<OutputTuple, PortsTuple, N>::register_port( input_port, ports );
}
template< typename OutputTuple, int N, typename PortsTuple >
static inline void fgt_multioutput_node_with_body( string_index t, void *g, void *input_port, PortsTuple &ports, void *body ) {
itt_make_task_group( ITT_DOMAIN_FLOW, input_port, FLOW_NODE, g, FLOW_GRAPH, t );
fgt_internal_create_input_port( input_port, input_port, FLOW_INPUT_PORT_0 );
fgt_internal_output_helper<OutputTuple, PortsTuple, N>::register_port( input_port, ports );
fgt_body( input_port, body );
}
template< typename InputTuple, int N, typename PortsTuple >
static inline void fgt_multiinput_node( string_index t, void *g, PortsTuple &ports, void *output_port) {
itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t );
fgt_internal_create_output_port( output_port, output_port, FLOW_OUTPUT_PORT_0 );
fgt_internal_input_helper<InputTuple, PortsTuple, N>::register_port( output_port, ports );
}
static inline void fgt_node( string_index t, void *g, void *output_port ) {
itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t );
fgt_internal_create_output_port( output_port, output_port, FLOW_OUTPUT_PORT_0 );
}
static inline void fgt_node_with_body( string_index t, void *g, void *output_port, void *body ) {
itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t );
fgt_internal_create_output_port( output_port, output_port, FLOW_OUTPUT_PORT_0 );
fgt_body( output_port, body );
}
static inline void fgt_node( string_index t, void *g, void *input_port, void *output_port ) {
fgt_node( t, g, output_port );
fgt_internal_create_input_port( output_port, input_port, FLOW_INPUT_PORT_0 );
}
static inline void fgt_node_with_body( string_index t, void *g, void *input_port, void *output_port, void *body ) {
fgt_node_with_body( t, g, output_port, body );
fgt_internal_create_input_port( output_port, input_port, FLOW_INPUT_PORT_0 );
}
static inline void fgt_node( string_index t, void *g, void *input_port, void *decrement_port, void *output_port ) {
fgt_node( t, g, input_port, output_port );
fgt_internal_create_input_port( output_port, decrement_port, FLOW_INPUT_PORT_1 );
}
static inline void fgt_make_edge( void *output_port, void *input_port ) {
itt_relation_add( ITT_DOMAIN_FLOW, output_port, FLOW_OUTPUT_PORT, __itt_relation_is_predecessor_to, input_port, FLOW_INPUT_PORT);
}
static inline void fgt_remove_edge( void *output_port, void *input_port ) {
itt_relation_add( ITT_DOMAIN_FLOW, output_port, FLOW_OUTPUT_PORT, __itt_relation_is_sibling_of, input_port, FLOW_INPUT_PORT);
}
static inline void fgt_graph( void *g ) {
itt_make_task_group( ITT_DOMAIN_FLOW, g, FLOW_GRAPH, NULL, FLOW_NULL, FLOW_GRAPH );
}
static inline void fgt_begin_body( void *body ) {
itt_task_begin( ITT_DOMAIN_FLOW, body, FLOW_BODY, NULL, FLOW_NULL, FLOW_NULL );
}
static inline void fgt_end_body( void * ) {
itt_task_end( ITT_DOMAIN_FLOW );
}
#else // TBB_PREVIEW_FLOW_GRAPH_TRACE
static inline void fgt_graph( void * /*g*/ ) { }
template< typename NodeType >
static inline void fgt_multioutput_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { }
template< typename NodeType >
static inline void fgt_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { }
static inline void fgt_graph_desc( void * /*g*/, const char * /*desc*/ ) { }
static inline void fgt_body( void * /*node*/, void * /*body*/ ) { }
template< typename OutputTuple, int N, typename PortsTuple >
static inline void fgt_multioutput_node( string_index /*t*/, void * /*g*/, void * /*input_port*/, PortsTuple & /*ports*/ ) { }
template< typename OutputTuple, int N, typename PortsTuple >
static inline void fgt_multioutput_node_with_body( string_index /*t*/, void * /*g*/, void * /*input_port*/, PortsTuple & /*ports*/, void * /*body*/ ) { }
template< typename InputTuple, int N, typename PortsTuple >
static inline void fgt_multiinput_node( string_index /*t*/, void * /*g*/, PortsTuple & /*ports*/, void * /*output_port*/ ) { }
static inline void fgt_node( string_index /*t*/, void * /*g*/, void * /*output_port*/ ) { }
static inline void fgt_node( string_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*output_port*/ ) { }
static inline void fgt_node( string_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*decrement_port*/, void * /*output_port*/ ) { }
static inline void fgt_node_with_body( string_index /*t*/, void * /*g*/, void * /*output_port*/, void * /*body*/ ) { }
static inline void fgt_node_with_body( string_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*output_port*/, void * /*body*/ ) { }
static inline void fgt_make_edge( void * /*output_port*/, void * /*input_port*/ ) { }
static inline void fgt_remove_edge( void * /*output_port*/, void * /*input_port*/ ) { }
static inline void fgt_begin_body( void * /*body*/ ) { }
static inline void fgt_end_body( void * /*body*/) { }
#endif // TBB_PREVIEW_FLOW_GRAPH_TRACE
} // namespace internal
} // namespace tbb
#endif

View File

@@ -0,0 +1,497 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB__flow_graph_types_impl_H
#define __TBB__flow_graph_types_impl_H
#ifndef __TBB_flow_graph_H
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
// included in namespace tbb::flow::interface7
namespace internal {
// wrap each element of a tuple in a template, and make a tuple of the result.
template<int N, template<class> class PT, typename TypeTuple>
struct wrap_tuple_elements;
template<template<class> class PT, typename TypeTuple>
struct wrap_tuple_elements<1, PT, TypeTuple> {
typedef typename tbb::flow::tuple<
PT<typename tbb::flow::tuple_element<0,TypeTuple>::type> >
type;
};
template<template<class> class PT, typename TypeTuple>
struct wrap_tuple_elements<2, PT, TypeTuple> {
typedef typename tbb::flow::tuple<
PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<1,TypeTuple>::type> >
type;
};
template<template<class> class PT, typename TypeTuple>
struct wrap_tuple_elements<3, PT, TypeTuple> {
typedef typename tbb::flow::tuple<
PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<2,TypeTuple>::type> >
type;
};
template<template<class> class PT, typename TypeTuple>
struct wrap_tuple_elements<4, PT, TypeTuple> {
typedef typename tbb::flow::tuple<
PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<3,TypeTuple>::type> >
type;
};
template<template<class> class PT, typename TypeTuple>
struct wrap_tuple_elements<5, PT, TypeTuple> {
typedef typename tbb::flow::tuple<
PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<3,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<4,TypeTuple>::type> >
type;
};
#if __TBB_VARIADIC_MAX >= 6
template<template<class> class PT, typename TypeTuple>
struct wrap_tuple_elements<6, PT, TypeTuple> {
typedef typename tbb::flow::tuple<
PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<3,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<4,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<5,TypeTuple>::type> >
type;
};
#endif
#if __TBB_VARIADIC_MAX >= 7
template<template<class> class PT, typename TypeTuple>
struct wrap_tuple_elements<7, PT, TypeTuple> {
typedef typename tbb::flow::tuple<
PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<3,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<4,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<5,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<6,TypeTuple>::type> >
type;
};
#endif
#if __TBB_VARIADIC_MAX >= 8
template<template<class> class PT, typename TypeTuple>
struct wrap_tuple_elements<8, PT, TypeTuple> {
typedef typename tbb::flow::tuple<
PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<3,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<4,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<5,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<6,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<7,TypeTuple>::type> >
type;
};
#endif
#if __TBB_VARIADIC_MAX >= 9
template<template<class> class PT, typename TypeTuple>
struct wrap_tuple_elements<9, PT, TypeTuple> {
typedef typename tbb::flow::tuple<
PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<3,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<4,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<5,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<6,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<7,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<8,TypeTuple>::type> >
type;
};
#endif
#if __TBB_VARIADIC_MAX >= 10
template<template<class> class PT, typename TypeTuple>
struct wrap_tuple_elements<10, PT, TypeTuple> {
typedef typename tbb::flow::tuple<
PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<3,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<4,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<5,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<6,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<7,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<8,TypeTuple>::type>,
PT<typename tbb::flow::tuple_element<9,TypeTuple>::type> >
type;
};
#endif
//! type mimicking std::pair but with trailing fill to ensure each element of an array
//* will have the correct alignment
template<typename T1, typename T2, size_t REM>
struct type_plus_align {
char first[sizeof(T1)];
T2 second;
char fill1[REM];
};
template<typename T1, typename T2>
struct type_plus_align<T1,T2,0> {
char first[sizeof(T1)];
T2 second;
};
template<class U> struct alignment_of {
typedef struct { char t; U padded; } test_alignment;
static const size_t value = sizeof(test_alignment) - sizeof(U);
};
// T1, T2 are actual types stored. The space defined for T1 in the type returned
// is a char array of the correct size. Type T2 should be trivially-constructible,
// T1 must be explicitly managed.
template<typename T1, typename T2>
struct aligned_pair {
static const size_t t1_align = alignment_of<T1>::value;
static const size_t t2_align = alignment_of<T2>::value;
typedef type_plus_align<T1, T2, 0 > just_pair;
static const size_t max_align = t1_align < t2_align ? t2_align : t1_align;
static const size_t extra_bytes = sizeof(just_pair) % max_align;
static const size_t remainder = extra_bytes ? max_align - extra_bytes : 0;
public:
typedef type_plus_align<T1,T2,remainder> type;
}; // aligned_pair
// support for variant type
// type we use when we're not storing a value
struct default_constructed { };
// type which contains another type, tests for what type is contained, and references to it.
// internal::Wrapper<T>
// void CopyTo( void *newSpace) : builds a Wrapper<T> copy of itself in newSpace
// struct to allow us to copy and test the type of objects
struct WrapperBase {
virtual ~WrapperBase() {}
virtual void CopyTo(void* /*newSpace*/) const { }
};
// Wrapper<T> contains a T, with the ability to test what T is. The Wrapper<T> can be
// constructed from a T, can be copy-constructed from another Wrapper<T>, and can be
// examined via value(), but not modified.
template<typename T>
struct Wrapper: public WrapperBase {
typedef T value_type;
typedef T* pointer_type;
private:
T value_space;
public:
const value_type &value() const { return value_space; }
private:
Wrapper();
// on exception will ensure the Wrapper will contain only a trivially-constructed object
struct _unwind_space {
pointer_type space;
_unwind_space(pointer_type p) : space(p) {}
~_unwind_space() {
if(space) (void) new (space) Wrapper<default_constructed>(default_constructed());
}
};
public:
explicit Wrapper( const T& other ) : value_space(other) { }
explicit Wrapper(const Wrapper& other) : value_space(other.value_space) { }
/*override*/void CopyTo(void* newSpace) const {
_unwind_space guard((pointer_type)newSpace);
(void) new(newSpace) Wrapper(value_space);
guard.space = NULL;
}
/*override*/~Wrapper() { }
};
// specialization for array objects
template<typename T, size_t N>
struct Wrapper<T[N]> : public WrapperBase {
typedef T value_type;
typedef T* pointer_type;
// space must be untyped.
typedef T ArrayType[N];
private:
// The space is not of type T[N] because when copy-constructing, it would be
// default-initialized and then copied to in some fashion, resulting in two
// constructions and one destruction per element. If the type is char[ ], we
// placement new into each element, resulting in one construction per element.
static const size_t space_size = sizeof(ArrayType) / sizeof(char);
char value_space[space_size];
// on exception will ensure the already-built objects will be destructed
// (the value_space is a char array, so it is already trivially-destructible.)
struct _unwind_class {
pointer_type space;
int already_built;
_unwind_class(pointer_type p) : space(p), already_built(0) {}
~_unwind_class() {
if(space) {
for(size_t i = already_built; i > 0 ; --i ) space[i-1].~value_type();
(void) new(space) Wrapper<default_constructed>(default_constructed());
}
}
};
public:
const ArrayType &value() const {
char *vp = const_cast<char *>(value_space);
return reinterpret_cast<ArrayType &>(*vp);
}
private:
Wrapper();
public:
// have to explicitly construct because other decays to a const value_type*
explicit Wrapper(const ArrayType& other) {
_unwind_class guard((pointer_type)value_space);
pointer_type vp = reinterpret_cast<pointer_type>(&value_space);
for(size_t i = 0; i < N; ++i ) {
(void) new(vp++) value_type(other[i]);
++(guard.already_built);
}
guard.space = NULL;
}
explicit Wrapper(const Wrapper& other) : WrapperBase() {
// we have to do the heavy lifting to copy contents
_unwind_class guard((pointer_type)value_space);
pointer_type dp = reinterpret_cast<pointer_type>(value_space);
pointer_type sp = reinterpret_cast<pointer_type>(const_cast<char *>(other.value_space));
for(size_t i = 0; i < N; ++i, ++dp, ++sp) {
(void) new(dp) value_type(*sp);
++(guard.already_built);
}
guard.space = NULL;
}
/*override*/void CopyTo(void* newSpace) const {
(void) new(newSpace) Wrapper(*this); // exceptions handled in copy constructor
}
/*override*/~Wrapper() {
// have to destroy explicitly in reverse order
pointer_type vp = reinterpret_cast<pointer_type>(&value_space);
for(size_t i = N; i > 0 ; --i ) vp[i-1].~value_type();
}
};
// given a tuple, return the type of the element that has the maximum alignment requirement.
// Given a tuple and that type, return the number of elements of the object with the max
// alignment requirement that is at least as big as the largest object in the tuple.
template<bool, class T1, class T2> struct pick_one;
template<class T1, class T2> struct pick_one<true , T1, T2> { typedef T1 type; };
template<class T1, class T2> struct pick_one<false, T1, T2> { typedef T2 type; };
template< template<class> class Selector, typename T1, typename T2 >
struct pick_max {
typedef typename pick_one< (Selector<T1>::value > Selector<T2>::value), T1, T2 >::type type;
};
template<typename T> struct size_of { static const int value = sizeof(T); };
template< size_t N, class Tuple, template<class> class Selector > struct pick_tuple_max {
typedef typename pick_tuple_max<N-1, Tuple, Selector>::type LeftMaxType;
typedef typename tbb::flow::tuple_element<N-1, Tuple>::type ThisType;
typedef typename pick_max<Selector, LeftMaxType, ThisType>::type type;
};
template< class Tuple, template<class> class Selector > struct pick_tuple_max<0, Tuple, Selector> {
typedef typename tbb::flow::tuple_element<0, Tuple>::type type;
};
// is the specified type included in a tuple?
template<class U, class V> struct is_same_type { static const bool value = false; };
template<class W> struct is_same_type<W,W> { static const bool value = true; };
template<class Q, size_t N, class Tuple>
struct is_element_of {
typedef typename tbb::flow::tuple_element<N-1, Tuple>::type T_i;
static const bool value = is_same_type<Q,T_i>::value || is_element_of<Q,N-1,Tuple>::value;
};
template<class Q, class Tuple>
struct is_element_of<Q,0,Tuple> {
typedef typename tbb::flow::tuple_element<0, Tuple>::type T_i;
static const bool value = is_same_type<Q,T_i>::value;
};
// allow the construction of types that are listed tuple. If a disallowed type
// construction is written, a method involving this type is created. The
// type has no definition, so a syntax error is generated.
template<typename T> struct ERROR_Type_Not_allowed_In_Tagged_Msg_Not_Member_Of_Tuple;
template<typename T, bool BUILD_IT> struct do_if;
template<typename T>
struct do_if<T, true> {
static void construct(void *mySpace, const T& x) {
(void) new(mySpace) Wrapper<T>(x);
}
};
template<typename T>
struct do_if<T, false> {
static void construct(void * /*mySpace*/, const T& x) {
// This method is instantiated when the type T does not match any of the
// element types in the Tuple in variant<Tuple>.
ERROR_Type_Not_allowed_In_Tagged_Msg_Not_Member_Of_Tuple<T>::bad_type(x);
}
};
// Tuple tells us the allowed types that variant can hold. It determines the alignment of the space in
// Wrapper, and how big Wrapper is.
//
// the object can only be tested for type, and a read-only reference can be fetched by cast_to<T>().
using tbb::internal::punned_cast;
struct tagged_null_type {};
template<typename TagType, typename T0, typename T1=tagged_null_type, typename T2=tagged_null_type, typename T3=tagged_null_type,
typename T4=tagged_null_type, typename T5=tagged_null_type, typename T6=tagged_null_type,
typename T7=tagged_null_type, typename T8=tagged_null_type, typename T9=tagged_null_type>
class tagged_msg {
typedef tbb::flow::tuple<T0, T1, T2, T3, T4
#if __TBB_VARIADIC_MAX >= 6
, T5
#endif
#if __TBB_VARIADIC_MAX >= 7
, T6
#endif
#if __TBB_VARIADIC_MAX >= 8
, T7
#endif
#if __TBB_VARIADIC_MAX >= 9
, T8
#endif
#if __TBB_VARIADIC_MAX >= 10
, T9
#endif
> Tuple;
private:
class variant {
static const size_t N = tbb::flow::tuple_size<Tuple>::value;
typedef typename pick_tuple_max<N, Tuple, alignment_of>::type AlignType;
typedef typename pick_tuple_max<N, Tuple, size_of>::type MaxSizeType;
static const size_t MaxNBytes = (sizeof(Wrapper<MaxSizeType>)+sizeof(AlignType)-1);
static const size_t MaxNElements = MaxNBytes/sizeof(AlignType);
typedef typename tbb::aligned_space<AlignType, MaxNElements> SpaceType;
SpaceType my_space;
static const size_t MaxSize = sizeof(SpaceType);
public:
variant() { (void) new(&my_space) Wrapper<default_constructed>(default_constructed()); }
template<typename T>
variant( const T& x ) {
do_if<T, is_element_of<T, N, Tuple>::value>::construct(&my_space,x);
}
variant(const variant& other) {
const WrapperBase * h = punned_cast<const WrapperBase *>(&(other.my_space));
h->CopyTo(&my_space);
}
// assignment must destroy and re-create the Wrapper type, as there is no way
// to create a Wrapper-to-Wrapper assign even if we find they agree in type.
void operator=( const variant& rhs ) {
if(&rhs != this) {
WrapperBase *h = punned_cast<WrapperBase *>(&my_space);
h->~WrapperBase();
const WrapperBase *ch = punned_cast<const WrapperBase *>(&(rhs.my_space));
ch->CopyTo(&my_space);
}
}
template<typename U>
const U& variant_cast_to() const {
const Wrapper<U> *h = dynamic_cast<const Wrapper<U>*>(punned_cast<const WrapperBase *>(&my_space));
if(!h) {
tbb::internal::throw_exception(tbb::internal::eid_bad_tagged_msg_cast);
}
return h->value();
}
template<typename U>
bool variant_is_a() const { return dynamic_cast<const Wrapper<U>*>(punned_cast<const WrapperBase *>(&my_space)) != NULL; }
bool variant_is_default_constructed() const {return variant_is_a<default_constructed>();}
~variant() {
WrapperBase *h = punned_cast<WrapperBase *>(&my_space);
h->~WrapperBase();
}
}; //class variant
TagType my_tag;
variant my_msg;
public:
tagged_msg(): my_tag(TagType(~0)), my_msg(){}
template<typename T, typename R>
tagged_msg(T const &index, R const &value) : my_tag(index), my_msg(value) {}
#if __TBB_CONST_REF_TO_ARRAY_TEMPLATE_PARAM_BROKEN
template<typename T, typename R, size_t N>
tagged_msg(T const &index, R (&value)[N]) : my_tag(index), my_msg(value) {}
#endif
void set_tag(TagType const &index) {my_tag = index;}
TagType tag() const {return my_tag;}
template<typename V>
const V& cast_to() const {return my_msg.template variant_cast_to<V>();}
template<typename V>
bool is_a() const {return my_msg.template variant_is_a<V>();}
bool is_default_constructed() const {return my_msg.variant_is_default_constructed();}
}; //class tagged_msg
// template to simplify cast and test for tagged_msg in template contexts
template<typename T, typename V>
const T& cast_to(V const &v) { return v.template cast_to<T>(); }
template<typename T, typename V>
bool is_a(V const &v) { return v.template is_a<T>(); }
} // namespace internal
#endif /* __TBB__flow_graph_types_impl_H */

View File

@@ -0,0 +1,102 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_mutex_padding_H
#define __TBB_mutex_padding_H
// wrapper for padding mutexes to be alone on a cache line, without requiring they be allocated
// from a pool. Because we allow them to be defined anywhere they must be two cache lines in size.
namespace tbb {
namespace interface7 {
namespace internal {
static const size_t cache_line_size = 64;
// Pad a mutex to occupy a number of full cache lines sufficient to avoid false sharing
// with other data; space overhead is up to 2*cache_line_size-1.
template<typename Mutex, bool is_rw> class padded_mutex;
template<typename Mutex>
class padded_mutex<Mutex,false> : tbb::internal::mutex_copy_deprecated_and_disabled {
typedef long pad_type;
pad_type my_pad[((sizeof(Mutex)+cache_line_size-1)/cache_line_size+1)*cache_line_size/sizeof(pad_type)];
Mutex *impl() { return (Mutex *)((uintptr_t(this)|(cache_line_size-1))+1);}
public:
static const bool is_rw_mutex = Mutex::is_rw_mutex;
static const bool is_recursive_mutex = Mutex::is_recursive_mutex;
static const bool is_fair_mutex = Mutex::is_fair_mutex;
padded_mutex() { new(impl()) Mutex(); }
~padded_mutex() { impl()->~Mutex(); }
//! Represents acquisition of a mutex.
class scoped_lock : tbb::internal::no_copy {
typename Mutex::scoped_lock my_scoped_lock;
public:
scoped_lock() : my_scoped_lock() {}
scoped_lock( padded_mutex& m ) : my_scoped_lock(*m.impl()) { }
~scoped_lock() { }
void acquire( padded_mutex& m ) { my_scoped_lock.acquire(*m.impl()); }
bool try_acquire( padded_mutex& m ) { return my_scoped_lock.try_acquire(*m.impl()); }
void release() { my_scoped_lock.release(); }
};
};
template<typename Mutex>
class padded_mutex<Mutex,true> : tbb::internal::mutex_copy_deprecated_and_disabled {
typedef long pad_type;
pad_type my_pad[((sizeof(Mutex)+cache_line_size-1)/cache_line_size+1)*cache_line_size/sizeof(pad_type)];
Mutex *impl() { return (Mutex *)((uintptr_t(this)|(cache_line_size-1))+1);}
public:
static const bool is_rw_mutex = Mutex::is_rw_mutex;
static const bool is_recursive_mutex = Mutex::is_recursive_mutex;
static const bool is_fair_mutex = Mutex::is_fair_mutex;
padded_mutex() { new(impl()) Mutex(); }
~padded_mutex() { impl()->~Mutex(); }
//! Represents acquisition of a mutex.
class scoped_lock : tbb::internal::no_copy {
typename Mutex::scoped_lock my_scoped_lock;
public:
scoped_lock() : my_scoped_lock() {}
scoped_lock( padded_mutex& m, bool write = true ) : my_scoped_lock(*m.impl(),write) { }
~scoped_lock() { }
void acquire( padded_mutex& m, bool write = true ) { my_scoped_lock.acquire(*m.impl(),write); }
bool try_acquire( padded_mutex& m, bool write = true ) { return my_scoped_lock.try_acquire(*m.impl(),write); }
bool upgrade_to_writer() { return my_scoped_lock.upgrade_to_writer(); }
bool downgrade_to_reader() { return my_scoped_lock.downgrade_to_reader(); }
void release() { my_scoped_lock.release(); }
};
};
} // namespace internal
} // namespace interface7
} // namespace tbb
#endif /* __TBB_mutex_padding_H */

View File

@@ -0,0 +1,70 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_range_iterator_H
#define __TBB_range_iterator_H
#include "../tbb_stddef.h"
#if __TBB_CPP11_STD_BEGIN_END_PRESENT && __TBB_CPP11_AUTO_PRESENT && __TBB_CPP11_DECLTYPE_PRESENT
#include <iterator>
#endif
namespace tbb {
// iterators to first and last elements of container
namespace internal {
#if __TBB_CPP11_STD_BEGIN_END_PRESENT && __TBB_CPP11_AUTO_PRESENT && __TBB_CPP11_DECLTYPE_PRESENT
using std::begin;
using std::end;
template<typename Container>
auto first(Container& c)-> decltype(begin(c)) {return begin(c);}
template<typename Container>
auto first(const Container& c)-> decltype(begin(c)) {return begin(c);}
template<typename Container>
auto last(Container& c)-> decltype(begin(c)) {return end(c);}
template<typename Container>
auto last(const Container& c)-> decltype(begin(c)) {return end(c);}
#else
template<typename Container>
typename Container::iterator first(Container& c) {return c.begin();}
template<typename Container>
typename Container::const_iterator first(const Container& c) {return c.begin();}
template<typename Container>
typename Container::iterator last(Container& c) {return c.end();}
template<typename Container>
typename Container::const_iterator last(const Container& c) {return c.end();}
#endif
template<typename T, size_t size>
T* first(T (&arr) [size]) {return arr;}
template<typename T, size_t size>
T* last(T (&arr) [size]) {return arr + size;}
} //namespace internal
} //namespace tbb
#endif // __TBB_range_iterator_H

View File

@@ -0,0 +1,65 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
TBB_STRING_RESOURCE(FLOW_BROADCAST_NODE, "broadcast_node")
TBB_STRING_RESOURCE(FLOW_BUFFER_NODE, "buffer_node")
TBB_STRING_RESOURCE(FLOW_CONTINUE_NODE, "continue_node")
TBB_STRING_RESOURCE(FLOW_FUNCTION_NODE, "function_node")
TBB_STRING_RESOURCE(FLOW_JOIN_NODE_QUEUEING, "join_node (queueing)")
TBB_STRING_RESOURCE(FLOW_JOIN_NODE_RESERVING, "join_node (reserving)")
TBB_STRING_RESOURCE(FLOW_JOIN_NODE_TAG_MATCHING, "join_node (tag_matching)")
TBB_STRING_RESOURCE(FLOW_LIMITER_NODE, "limiter_node")
TBB_STRING_RESOURCE(FLOW_MULTIFUNCTION_NODE, "multifunction_node")
TBB_STRING_RESOURCE(FLOW_OR_NODE, "or_node") //no longer in use, kept for backward compatibilty
TBB_STRING_RESOURCE(FLOW_OVERWRITE_NODE, "overwrite_node")
TBB_STRING_RESOURCE(FLOW_PRIORITY_QUEUE_NODE, "priority_queue_node")
TBB_STRING_RESOURCE(FLOW_QUEUE_NODE, "queue_node")
TBB_STRING_RESOURCE(FLOW_SEQUENCER_NODE, "sequencer_node")
TBB_STRING_RESOURCE(FLOW_SOURCE_NODE, "source_node")
TBB_STRING_RESOURCE(FLOW_SPLIT_NODE, "split_node")
TBB_STRING_RESOURCE(FLOW_WRITE_ONCE_NODE, "write_once_node")
TBB_STRING_RESOURCE(FLOW_BODY, "body")
TBB_STRING_RESOURCE(FLOW_GRAPH, "graph")
TBB_STRING_RESOURCE(FLOW_NODE, "node")
TBB_STRING_RESOURCE(FLOW_INPUT_PORT, "input_port")
TBB_STRING_RESOURCE(FLOW_INPUT_PORT_0, "input_port_0")
TBB_STRING_RESOURCE(FLOW_INPUT_PORT_1, "input_port_1")
TBB_STRING_RESOURCE(FLOW_INPUT_PORT_2, "input_port_2")
TBB_STRING_RESOURCE(FLOW_INPUT_PORT_3, "input_port_3")
TBB_STRING_RESOURCE(FLOW_INPUT_PORT_4, "input_port_4")
TBB_STRING_RESOURCE(FLOW_INPUT_PORT_5, "input_port_5")
TBB_STRING_RESOURCE(FLOW_INPUT_PORT_6, "input_port_6")
TBB_STRING_RESOURCE(FLOW_INPUT_PORT_7, "input_port_7")
TBB_STRING_RESOURCE(FLOW_INPUT_PORT_8, "input_port_8")
TBB_STRING_RESOURCE(FLOW_INPUT_PORT_9, "input_port_9")
TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT, "output_port")
TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_0, "output_port_0")
TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_1, "output_port_1")
TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_2, "output_port_2")
TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_3, "output_port_3")
TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_4, "output_port_4")
TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_5, "output_port_5")
TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_6, "output_port_6")
TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_7, "output_port_7")
TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_8, "output_port_8")
TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_9, "output_port_9")
TBB_STRING_RESOURCE(FLOW_OBJECT_NAME, "object_name")
TBB_STRING_RESOURCE(FLOW_NULL, "null")
TBB_STRING_RESOURCE(FLOW_INDEXER_NODE, "indexer_node")

View File

@@ -0,0 +1,73 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_tbb_windef_H
#error Do not #include this internal file directly; use public TBB headers instead.
#endif /* __TBB_tbb_windef_H */
// Check that the target Windows version has all API calls requried for TBB.
// Do not increase the version in condition beyond 0x0500 without prior discussion!
#if defined(_WIN32_WINNT) && _WIN32_WINNT<0x0501
#error TBB is unable to run on old Windows versions; _WIN32_WINNT must be 0x0501 or greater.
#endif
#if !defined(_MT)
#error TBB requires linkage with multithreaded C/C++ runtime library. \
Choose multithreaded DLL runtime in project settings, or use /MD[d] compiler switch.
#endif
// Workaround for the problem with MVSC headers failing to define namespace std
namespace std {
using ::size_t; using ::ptrdiff_t;
}
#define __TBB_STRING_AUX(x) #x
#define __TBB_STRING(x) __TBB_STRING_AUX(x)
// Default setting of TBB_USE_DEBUG
#ifdef TBB_USE_DEBUG
# if TBB_USE_DEBUG
# if !defined(_DEBUG)
# pragma message(__FILE__ "(" __TBB_STRING(__LINE__) ") : Warning: Recommend using /MDd if compiling with TBB_USE_DEBUG!=0")
# endif
# else
# if defined(_DEBUG)
# pragma message(__FILE__ "(" __TBB_STRING(__LINE__) ") : Warning: Recommend using /MD if compiling with TBB_USE_DEBUG==0")
# endif
# endif
#endif
#if (__TBB_BUILD || __TBBMALLOC_BUILD) && !defined(__TBB_NO_IMPLICIT_LINKAGE)
#define __TBB_NO_IMPLICIT_LINKAGE 1
#endif
#if _MSC_VER
#if !__TBB_NO_IMPLICIT_LINKAGE
#ifdef __TBB_LIB_NAME
#pragma comment(lib, __TBB_STRING(__TBB_LIB_NAME))
#else
#ifdef _DEBUG
#pragma comment(lib, "tbb_debug.lib")
#else
#pragma comment(lib, "tbb.lib")
#endif
#endif
#endif
#endif

View File

@@ -0,0 +1,148 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB__x86_eliding_mutex_impl_H
#define __TBB__x86_eliding_mutex_impl_H
#ifndef __TBB_spin_mutex_H
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#if ( __TBB_x86_32 || __TBB_x86_64 )
namespace tbb {
namespace interface7 {
namespace internal {
template<typename Mutex, bool is_rw>
class padded_mutex;
//! An eliding lock that occupies a single byte.
/** A x86_eliding_mutex is an HLE-enabled spin mutex. It is recommended to
put the mutex on a cache line that is not shared by the data it protects.
It should be used for locking short critical sections where the lock is
contended but the data it protects are not. If zero-initialized, the
mutex is considered unheld.
@ingroup synchronization */
class x86_eliding_mutex : tbb::internal::mutex_copy_deprecated_and_disabled {
//! 0 if lock is released, 1 if lock is acquired.
__TBB_atomic_flag flag;
friend class padded_mutex<x86_eliding_mutex, false>;
public:
//! Construct unacquired lock.
/** Equivalent to zero-initialization of *this. */
x86_eliding_mutex() : flag(0) {}
// bug in gcc 3.x.x causes syntax error in spite of the friend declaration above.
// Make the scoped_lock public in that case.
#if __TBB_USE_X86_ELIDING_MUTEX || __TBB_GCC_VERSION < 40000
#else
// by default we will not provide the scoped_lock interface. The user
// should use the padded version of the mutex. scoped_lock is used in
// padded_mutex template.
private:
#endif
// scoped_lock in padded_mutex<> is the interface to use.
//! Represents acquisition of a mutex.
class scoped_lock : tbb::internal::no_copy {
private:
//! Points to currently held mutex, or NULL if no lock is held.
x86_eliding_mutex* my_mutex;
public:
//! Construct without acquiring a mutex.
scoped_lock() : my_mutex(NULL) {}
//! Construct and acquire lock on a mutex.
scoped_lock( x86_eliding_mutex& m ) : my_mutex(NULL) { acquire(m); }
//! Acquire lock.
void acquire( x86_eliding_mutex& m ) {
__TBB_ASSERT( !my_mutex, "already holding a lock" );
my_mutex=&m;
my_mutex->lock();
}
//! Try acquiring lock (non-blocking)
/** Return true if lock acquired; false otherwise. */
bool try_acquire( x86_eliding_mutex& m ) {
__TBB_ASSERT( !my_mutex, "already holding a lock" );
bool result = m.try_lock();
if( result ) {
my_mutex = &m;
}
return result;
}
//! Release lock
void release() {
__TBB_ASSERT( my_mutex, "release on scoped_lock that is not holding a lock" );
my_mutex->unlock();
my_mutex = NULL;
}
//! Destroy lock. If holding a lock, releases the lock first.
~scoped_lock() {
if( my_mutex ) {
release();
}
}
};
#if __TBB_USE_X86_ELIDING_MUTEX || __TBB_GCC_VERSION < 40000
#else
public:
#endif /* __TBB_USE_X86_ELIDING_MUTEX */
// Mutex traits
static const bool is_rw_mutex = false;
static const bool is_recursive_mutex = false;
static const bool is_fair_mutex = false;
// ISO C++0x compatibility methods
//! Acquire lock
void lock() {
__TBB_LockByteElided(flag);
}
//! Try acquiring lock (non-blocking)
/** Return true if lock acquired; false otherwise. */
bool try_lock() {
return __TBB_TryLockByteElided(flag);
}
//! Release lock
void unlock() {
__TBB_UnlockByteElided( flag );
}
}; // end of x86_eliding_mutex
} // namespace internal
} // namespace interface7
} // namespace tbb
#endif /* ( __TBB_x86_32 || __TBB_x86_64 ) */
#endif /* __TBB__x86_eliding_mutex_impl_H */

View File

@@ -0,0 +1,225 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB__x86_rtm_rw_mutex_impl_H
#define __TBB__x86_rtm_rw_mutex_impl_H
#ifndef __TBB_spin_rw_mutex_H
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#if __TBB_TSX_AVAILABLE
#include "../tbb_stddef.h"
#include "../tbb_machine.h"
#include "../tbb_profiling.h"
#include "../spin_rw_mutex.h"
namespace tbb {
namespace interface8 {
namespace internal {
enum RTM_type {
RTM_not_in_mutex,
RTM_transacting_reader,
RTM_transacting_writer,
RTM_real_reader,
RTM_real_writer
};
static const unsigned long speculation_granularity = 64;
//! Fast, unfair, spinning speculation-enabled reader-writer lock with backoff and
// writer-preference
/** @ingroup synchronization */
class x86_rtm_rw_mutex: private spin_rw_mutex {
#if __TBB_USE_X86_RTM_RW_MUTEX || __TBB_GCC_VERSION < 40000
// bug in gcc 3.x.x causes syntax error in spite of the friend declaration below.
// Make the scoped_lock public in that case.
public:
#else
private:
#endif
friend class interface7::internal::padded_mutex<x86_rtm_rw_mutex,true>;
class scoped_lock; // should be private
friend class scoped_lock;
private:
//! @cond INTERNAL
//! Internal construct unacquired mutex.
void __TBB_EXPORTED_METHOD internal_construct();
//! Internal acquire write lock.
// only_speculate == true if we're doing a try_lock, else false.
void __TBB_EXPORTED_METHOD internal_acquire_writer(x86_rtm_rw_mutex::scoped_lock&, bool only_speculate=false);
//! Internal acquire read lock.
// only_speculate == true if we're doing a try_lock, else false.
void __TBB_EXPORTED_METHOD internal_acquire_reader(x86_rtm_rw_mutex::scoped_lock&, bool only_speculate=false);
//! Internal upgrade reader to become a writer.
bool __TBB_EXPORTED_METHOD internal_upgrade( x86_rtm_rw_mutex::scoped_lock& );
//! Out of line code for downgrading a writer to a reader.
bool __TBB_EXPORTED_METHOD internal_downgrade( x86_rtm_rw_mutex::scoped_lock& );
//! Internal try_acquire write lock.
bool __TBB_EXPORTED_METHOD internal_try_acquire_writer( x86_rtm_rw_mutex::scoped_lock& );
//! Internal release lock.
void __TBB_EXPORTED_METHOD internal_release( x86_rtm_rw_mutex::scoped_lock& );
static x86_rtm_rw_mutex* internal_get_mutex( const spin_rw_mutex::scoped_lock& lock )
{
return static_cast<x86_rtm_rw_mutex*>( lock.internal_get_mutex() );
}
static void internal_set_mutex( spin_rw_mutex::scoped_lock& lock, spin_rw_mutex* mtx )
{
lock.internal_set_mutex( mtx );
}
//! @endcond
public:
//! Construct unacquired mutex.
x86_rtm_rw_mutex() {
w_flag = false;
#if TBB_USE_THREADING_TOOLS
internal_construct();
#endif
}
#if TBB_USE_ASSERT
//! Empty destructor.
~x86_rtm_rw_mutex() {}
#endif /* TBB_USE_ASSERT */
// Mutex traits
static const bool is_rw_mutex = true;
static const bool is_recursive_mutex = false;
static const bool is_fair_mutex = false;
#if __TBB_USE_X86_RTM_RW_MUTEX || __TBB_GCC_VERSION < 40000
#else
// by default we will not provide the scoped_lock interface. The user
// should use the padded version of the mutex. scoped_lock is used in
// padded_mutex template.
private:
#endif
//! The scoped locking pattern
/** It helps to avoid the common problem of forgetting to release lock.
It also nicely provides the "node" for queuing locks. */
// Speculation-enabled scoped lock for spin_rw_mutex
// The idea is to be able to reuse the acquire/release methods of spin_rw_mutex
// and its scoped lock wherever possible. The only way to use a speculative lock is to use
// a scoped_lock. (because transaction_state must be local)
class scoped_lock : tbb::internal::no_copy {
friend class x86_rtm_rw_mutex;
spin_rw_mutex::scoped_lock my_scoped_lock;
RTM_type transaction_state;
public:
//! Construct lock that has not acquired a mutex.
/** Equivalent to zero-initialization of *this. */
scoped_lock() : my_scoped_lock(), transaction_state(RTM_not_in_mutex) {
}
//! Acquire lock on given mutex.
scoped_lock( x86_rtm_rw_mutex& m, bool write = true ) : my_scoped_lock(),
transaction_state(RTM_not_in_mutex) {
acquire(m, write);
}
//! Release lock (if lock is held).
~scoped_lock() {
if(transaction_state != RTM_not_in_mutex) release();
}
//! Acquire lock on given mutex.
void acquire( x86_rtm_rw_mutex& m, bool write = true ) {
if( write ) m.internal_acquire_writer(*this);
else m.internal_acquire_reader(*this);
}
//! Release lock
void release() {
x86_rtm_rw_mutex* mutex = x86_rtm_rw_mutex::internal_get_mutex(my_scoped_lock);
__TBB_ASSERT( mutex, "lock is not acquired" );
__TBB_ASSERT( transaction_state!=RTM_not_in_mutex, "lock is not acquired" );
return mutex->internal_release(*this);
}
//! Upgrade reader to become a writer.
/** Returns whether the upgrade happened without releasing and re-acquiring the lock */
bool upgrade_to_writer() {
x86_rtm_rw_mutex* mutex = x86_rtm_rw_mutex::internal_get_mutex(my_scoped_lock);
__TBB_ASSERT( mutex, "lock is not acquired" );
__TBB_ASSERT( transaction_state==RTM_transacting_reader || transaction_state==RTM_real_reader, "Invalid state for upgrade" );
return mutex->internal_upgrade(*this);
}
//! Downgrade writer to become a reader.
/** Returns whether the downgrade happened without releasing and re-acquiring the lock */
bool downgrade_to_reader() {
x86_rtm_rw_mutex* mutex = x86_rtm_rw_mutex::internal_get_mutex(my_scoped_lock);
__TBB_ASSERT( mutex, "lock is not acquired" );
__TBB_ASSERT( transaction_state==RTM_transacting_writer || transaction_state==RTM_real_writer, "Invalid state for downgrade" );
return mutex->internal_downgrade(*this);
}
//! Attempt to acquire mutex.
/** returns true if successful. */
bool try_acquire( x86_rtm_rw_mutex& m, bool write = true ) {
#if TBB_USE_ASSERT
x86_rtm_rw_mutex* mutex = x86_rtm_rw_mutex::internal_get_mutex(my_scoped_lock);
__TBB_ASSERT( !mutex, "lock is already acquired" );
#endif
// have to assign m to our mutex.
// cannot set the mutex, because try_acquire in spin_rw_mutex depends on it being NULL.
if(write) return m.internal_try_acquire_writer(*this);
// speculatively acquire the lock. If this fails, do try_acquire on the spin_rw_mutex.
m.internal_acquire_reader(*this, /*only_speculate=*/true);
if(transaction_state == RTM_transacting_reader) return true;
if( my_scoped_lock.try_acquire(m, false)) {
transaction_state = RTM_real_reader;
return true;
}
return false;
}
}; // class x86_rtm_rw_mutex::scoped_lock
// ISO C++0x compatibility methods not provided because we cannot maintain
// state about whether a thread is in a transaction.
private:
char pad[speculation_granularity-sizeof(spin_rw_mutex)]; // padding
// If true, writer holds the spin_rw_mutex.
tbb::atomic<bool> w_flag; // want this on a separate cache line
}; // x86_rtm_rw_mutex
} // namespace internal
} // namespace interface8
} // namespace tbb
#endif /* __TBB_TSX_AVAILABLE */
#endif /* __TBB__x86_rtm_rw_mutex_impl_H */

View File

@@ -0,0 +1,217 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
/*
Platform isolation layer for the ARMv7-a architecture.
*/
#ifndef __TBB_machine_H
#error Do not include this file directly; include tbb_machine.h instead
#endif
//TODO: is ARMv7 is the only version ever to support?
#if !(__ARM_ARCH_7A__)
#error compilation requires an ARMv7-a architecture.
#endif
#include <sys/param.h>
#include <unistd.h>
#define __TBB_WORDSIZE 4
// Traditionally ARM is little-endian.
// Note that, since only the layout of aligned 32-bit words is of interest,
// any apparent PDP-endianness of 32-bit words at half-word alignment or
// any little-endian ordering of big-endian 32-bit words in 64-bit quantities
// may be disregarded for this setting.
#if __BIG_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__)
#define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
#elif __LITTLE_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__)
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
#elif defined(__BYTE_ORDER__)
#define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
#else
#define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT
#endif
#define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
#define __TBB_full_memory_fence() __asm__ __volatile__("dmb ish": : :"memory")
#define __TBB_control_consistency_helper() __TBB_full_memory_fence()
#define __TBB_acquire_consistency_helper() __TBB_full_memory_fence()
#define __TBB_release_consistency_helper() __TBB_full_memory_fence()
//--------------------------------------------------
// Compare and swap
//--------------------------------------------------
/**
* Atomic CAS for 32 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
* @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
* @param value value to assign *ptr to if *ptr==comparand
* @param comparand value to compare with *ptr
* @return value originally in memory at ptr, regardless of success
*/
static inline int32_t __TBB_machine_cmpswp4(volatile void *ptr, int32_t value, int32_t comparand )
{
int32_t oldval, res;
__TBB_full_memory_fence();
do {
__asm__ __volatile__(
"ldrex %1, [%3]\n"
"mov %0, #0\n"
"cmp %1, %4\n"
"it eq\n"
"strexeq %0, %5, [%3]\n"
: "=&r" (res), "=&r" (oldval), "+Qo" (*(volatile int32_t*)ptr)
: "r" ((int32_t *)ptr), "Ir" (comparand), "r" (value)
: "cc");
} while (res);
__TBB_full_memory_fence();
return oldval;
}
/**
* Atomic CAS for 64 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
* @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
* @param value value to assign *ptr to if *ptr==comparand
* @param comparand value to compare with *ptr
* @return value originally in memory at ptr, regardless of success
*/
static inline int64_t __TBB_machine_cmpswp8(volatile void *ptr, int64_t value, int64_t comparand )
{
int64_t oldval;
int32_t res;
__TBB_full_memory_fence();
do {
__asm__ __volatile__(
"mov %0, #0\n"
"ldrexd %1, %H1, [%3]\n"
"cmp %1, %4\n"
"it eq\n"
"cmpeq %H1, %H4\n"
"it eq\n"
"strexdeq %0, %5, %H5, [%3]"
: "=&r" (res), "=&r" (oldval), "+Qo" (*(volatile int64_t*)ptr)
: "r" ((int64_t *)ptr), "r" (comparand), "r" (value)
: "cc");
} while (res);
__TBB_full_memory_fence();
return oldval;
}
static inline int32_t __TBB_machine_fetchadd4(volatile void* ptr, int32_t addend)
{
unsigned long tmp;
int32_t result, tmp2;
__TBB_full_memory_fence();
__asm__ __volatile__(
"1: ldrex %0, [%4]\n"
" add %3, %0, %5\n"
" strex %1, %3, [%4]\n"
" cmp %1, #0\n"
" bne 1b\n"
: "=&r" (result), "=&r" (tmp), "+Qo" (*(volatile int32_t*)ptr), "=&r"(tmp2)
: "r" ((int32_t *)ptr), "Ir" (addend)
: "cc");
__TBB_full_memory_fence();
return result;
}
static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t addend)
{
unsigned long tmp;
int64_t result, tmp2;
__TBB_full_memory_fence();
__asm__ __volatile__(
"1: ldrexd %0, %H0, [%4]\n"
" adds %3, %0, %5\n"
" adc %H3, %H0, %H5\n"
" strexd %1, %3, %H3, [%4]\n"
" cmp %1, #0\n"
" bne 1b"
: "=&r" (result), "=&r" (tmp), "+Qo" (*(volatile int64_t*)ptr), "=&r"(tmp2)
: "r" ((int64_t *)ptr), "r" (addend)
: "cc");
__TBB_full_memory_fence();
return result;
}
inline void __TBB_machine_pause (int32_t delay )
{
while(delay>0)
{
__TBB_compiler_fence();
delay--;
}
}
namespace tbb {
namespace internal {
template <typename T, size_t S>
struct machine_load_store_relaxed {
static inline T load ( const volatile T& location ) {
const T value = location;
/*
* An extra memory barrier is required for errata #761319
* Please see http://infocenter.arm.com/help/topic/com.arm.doc.uan0004a
*/
__TBB_acquire_consistency_helper();
return value;
}
static inline void store ( volatile T& location, T value ) {
location = value;
}
};
}} // namespaces internal, tbb
// Machine specific atomic operations
#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
#define __TBB_Pause(V) __TBB_machine_pause(V)
// Use generics for some things
#define __TBB_USE_GENERIC_PART_WORD_CAS 1
#define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 1
#define __TBB_USE_GENERIC_PART_WORD_FETCH_STORE 1
#define __TBB_USE_GENERIC_FETCH_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1

View File

@@ -0,0 +1,131 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#if !defined(__TBB_machine_H) || defined(__TBB_machine_gcc_generic_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#define __TBB_machine_gcc_generic_H
#include <stdint.h>
#include <unistd.h>
#define __TBB_WORDSIZE __SIZEOF_POINTER__
#if __TBB_GCC_64BIT_ATOMIC_BUILTINS_BROKEN
#define __TBB_64BIT_ATOMICS 0
#endif
/** FPU control setting not available for non-Intel architectures on Android **/
#if __ANDROID__ && __TBB_generic_arch
#define __TBB_CPU_CTL_ENV_PRESENT 0
#endif
// __BYTE_ORDER__ is used in accordance with http://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html,
// but __BIG_ENDIAN__ or __LITTLE_ENDIAN__ may be more commonly found instead.
#if __BIG_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__)
#define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
#elif __LITTLE_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__)
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
#elif defined(__BYTE_ORDER__)
#define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
#else
#define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT
#endif
/** As this generic implementation has absolutely no information about underlying
hardware, its performance most likely will be sub-optimal because of full memory
fence usages where a more lightweight synchronization means (or none at all)
could suffice. Thus if you use this header to enable TBB on a new platform,
consider forking it and relaxing below helpers as appropriate. **/
#define __TBB_acquire_consistency_helper() __sync_synchronize()
#define __TBB_release_consistency_helper() __sync_synchronize()
#define __TBB_full_memory_fence() __sync_synchronize()
#define __TBB_control_consistency_helper() __sync_synchronize()
#define __TBB_MACHINE_DEFINE_ATOMICS(S,T) \
inline T __TBB_machine_cmpswp##S( volatile void *ptr, T value, T comparand ) { \
return __sync_val_compare_and_swap(reinterpret_cast<volatile T *>(ptr),comparand,value); \
} \
\
inline T __TBB_machine_fetchadd##S( volatile void *ptr, T value ) { \
return __sync_fetch_and_add(reinterpret_cast<volatile T *>(ptr),value); \
} \
__TBB_MACHINE_DEFINE_ATOMICS(1,int8_t)
__TBB_MACHINE_DEFINE_ATOMICS(2,int16_t)
__TBB_MACHINE_DEFINE_ATOMICS(4,int32_t)
__TBB_MACHINE_DEFINE_ATOMICS(8,int64_t)
#undef __TBB_MACHINE_DEFINE_ATOMICS
namespace tbb{ namespace internal { namespace gcc_builtins {
inline int clz(unsigned int x){ return __builtin_clz(x);};
inline int clz(unsigned long int x){ return __builtin_clzl(x);};
inline int clz(unsigned long long int x){ return __builtin_clzll(x);};
}}}
//gcc __builtin_clz builtin count _number_ of leading zeroes
static inline intptr_t __TBB_machine_lg( uintptr_t x ) {
return sizeof(x)*8 - tbb::internal::gcc_builtins::clz(x) -1 ;
}
static inline void __TBB_machine_or( volatile void *ptr, uintptr_t addend ) {
__sync_fetch_and_or(reinterpret_cast<volatile uintptr_t *>(ptr),addend);
}
static inline void __TBB_machine_and( volatile void *ptr, uintptr_t addend ) {
__sync_fetch_and_and(reinterpret_cast<volatile uintptr_t *>(ptr),addend);
}
typedef unsigned char __TBB_Flag;
typedef __TBB_atomic __TBB_Flag __TBB_atomic_flag;
inline bool __TBB_machine_try_lock_byte( __TBB_atomic_flag &flag ) {
return __sync_lock_test_and_set(&flag,1)==0;
}
inline void __TBB_machine_unlock_byte( __TBB_atomic_flag &flag ) {
__sync_lock_release(&flag);
}
// Machine specific atomic operations
#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
#define __TBB_TryLockByte __TBB_machine_try_lock_byte
#define __TBB_UnlockByte __TBB_machine_unlock_byte
// Definition of other functions
#define __TBB_Log2(V) __TBB_machine_lg(V)
#define __TBB_USE_GENERIC_FETCH_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
#if __TBB_WORDSIZE==4
#define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1
#endif
#if __TBB_x86_32 || __TBB_x86_64
#include "gcc_itsx.h"
#endif

View File

@@ -0,0 +1,100 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_machine_gcc_ia32_common_H
#define __TBB_machine_gcc_ia32_common_H
//TODO: Add a higher-level function, e.g. tbb::interal::log2(), into tbb_stddef.h, which
//uses __TBB_Log2 and contains the assert and remove the assert from here and all other
//platform-specific headers.
//TODO: Check if use of gcc intrinsic gives a better chance for cross call optimizations
template <typename T>
static inline intptr_t __TBB_machine_lg( T x ) {
__TBB_ASSERT(x>0, "The logarithm of a non-positive value is undefined.");
uintptr_t j, i = x;
__asm__("bsr %1,%0" : "=r"(j) : "r"(i));
return j;
}
#define __TBB_Log2(V) __TBB_machine_lg(V)
#ifndef __TBB_Pause
//TODO: check if raising a ratio of pause instructions to loop control instructions
//(via e.g. loop unrolling) gives any benefit for HT. E.g, the current implementation
//does about 2 CPU-consuming instructions for every pause instruction. Perhaps for
//high pause counts it should use an unrolled loop to raise the ratio, and thus free
//up more integer cycles for the other hyperthread. On the other hand, if the loop is
//unrolled too far, it won't fit in the core's loop cache, and thus take away
//instruction decode slots from the other hyperthread.
//TODO: check if use of gcc __builtin_ia32_pause intrinsic gives a "some how" better performing code
static inline void __TBB_machine_pause( int32_t delay ) {
for (int32_t i = 0; i < delay; i++) {
__asm__ __volatile__("pause;");
}
return;
}
#define __TBB_Pause(V) __TBB_machine_pause(V)
#endif /* !__TBB_Pause */
// API to retrieve/update FPU control setting
#ifndef __TBB_CPU_CTL_ENV_PRESENT
#define __TBB_CPU_CTL_ENV_PRESENT 1
namespace tbb {
namespace internal {
class cpu_ctl_env {
private:
int mxcsr;
short x87cw;
static const int MXCSR_CONTROL_MASK = ~0x3f; /* all except last six status bits */
public:
bool operator!=( const cpu_ctl_env& ctl ) const { return mxcsr != ctl.mxcsr || x87cw != ctl.x87cw; }
void get_env() {
#if __TBB_ICC_12_0_INL_ASM_FSTCW_BROKEN
cpu_ctl_env loc_ctl;
__asm__ __volatile__ (
"stmxcsr %0\n\t"
"fstcw %1"
: "=m"(loc_ctl.mxcsr), "=m"(loc_ctl.x87cw)
);
*this = loc_ctl;
#else
__asm__ __volatile__ (
"stmxcsr %0\n\t"
"fstcw %1"
: "=m"(mxcsr), "=m"(x87cw)
);
#endif
mxcsr &= MXCSR_CONTROL_MASK;
}
void set_env() const {
__asm__ __volatile__ (
"ldmxcsr %0\n\t"
"fldcw %1"
: : "m"(mxcsr), "m"(x87cw)
);
}
};
} // namespace internal
} // namespace tbb
#endif /* !__TBB_CPU_CTL_ENV_PRESENT */
#include "gcc_itsx.h"
#endif /* __TBB_machine_gcc_ia32_common_H */

View File

@@ -0,0 +1,123 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#if !defined(__TBB_machine_H) || defined(__TBB_machine_gcc_itsx_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#define __TBB_machine_gcc_itsx_H
#define __TBB_OP_XACQUIRE 0xF2
#define __TBB_OP_XRELEASE 0xF3
#define __TBB_OP_LOCK 0xF0
#define __TBB_STRINGIZE_INTERNAL(arg) #arg
#define __TBB_STRINGIZE(arg) __TBB_STRINGIZE_INTERNAL(arg)
#ifdef __TBB_x86_64
#define __TBB_r_out "=r"
#else
#define __TBB_r_out "=q"
#endif
inline static uint8_t __TBB_machine_try_lock_elided( volatile uint8_t* lk )
{
uint8_t value = 1;
__asm__ volatile (".byte " __TBB_STRINGIZE(__TBB_OP_XACQUIRE)"; lock; xchgb %0, %1;"
: __TBB_r_out(value), "=m"(*lk) : "0"(value), "m"(*lk) : "memory" );
return uint8_t(value^1);
}
inline static void __TBB_machine_try_lock_elided_cancel()
{
// 'pause' instruction aborts HLE/RTM transactions
__asm__ volatile ("pause\n" : : : "memory" );
}
inline static void __TBB_machine_unlock_elided( volatile uint8_t* lk )
{
__asm__ volatile (".byte " __TBB_STRINGIZE(__TBB_OP_XRELEASE)"; movb $0, %0"
: "=m"(*lk) : "m"(*lk) : "memory" );
}
#if __TBB_TSX_INTRINSICS_PRESENT
#include <immintrin.h>
#define __TBB_machine_is_in_transaction _xtest
#define __TBB_machine_begin_transaction _xbegin
#define __TBB_machine_end_transaction _xend
#define __TBB_machine_transaction_conflict_abort() _xabort(0xff)
#else
/*!
* Check if the instruction is executed in a transaction or not
*/
inline static bool __TBB_machine_is_in_transaction()
{
int8_t res = 0;
#if __TBB_x86_32
__asm__ volatile (".byte 0x0F; .byte 0x01; .byte 0xD6;\n"
"setz %0" : "=q"(res) : : "memory" );
#else
__asm__ volatile (".byte 0x0F; .byte 0x01; .byte 0xD6;\n"
"setz %0" : "=r"(res) : : "memory" );
#endif
return res==0;
}
/*!
* Enter speculative execution mode.
* @return -1 on success
* abort cause ( or 0 ) on abort
*/
inline static uint32_t __TBB_machine_begin_transaction()
{
uint32_t res = ~uint32_t(0); // success value
__asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n" // XBEGIN <abort-offset>
" .long 2f-1b-6\n" // 2f-1b == difference in addresses of start
// of XBEGIN and the MOVL
// 2f - 1b - 6 == that difference minus the size of the
// XBEGIN instruction. This is the abort offset to
// 2: below.
" jmp 3f\n" // success (leave -1 in res)
"2: movl %%eax,%0\n" // store failure code in res
"3:"
:"=r"(res):"0"(res):"memory","%eax");
return res;
}
/*!
* Attempt to commit/end transaction
*/
inline static void __TBB_machine_end_transaction()
{
__asm__ volatile (".byte 0x0F; .byte 0x01; .byte 0xD5" :::"memory"); // XEND
}
/*
* aborts with code 0xFF (lock already held)
*/
inline static void __TBB_machine_transaction_conflict_abort()
{
__asm__ volatile (".byte 0xC6; .byte 0xF8; .byte 0xFF" :::"memory");
}
#endif /* __TBB_TSX_INTRINSICS_PRESENT */

View File

@@ -0,0 +1,70 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
// TODO: revise by comparing with mac_ppc.h
#if !defined(__TBB_machine_H) || defined(__TBB_machine_ibm_aix51_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#define __TBB_machine_ibm_aix51_H
#define __TBB_WORDSIZE 8
#define __TBB_ENDIANNESS __TBB_ENDIAN_BIG // assumption based on operating system
#include <stdint.h>
#include <unistd.h>
#include <sched.h>
extern "C" {
int32_t __TBB_machine_cas_32 (volatile void* ptr, int32_t value, int32_t comparand);
int64_t __TBB_machine_cas_64 (volatile void* ptr, int64_t value, int64_t comparand);
void __TBB_machine_flush ();
void __TBB_machine_lwsync ();
void __TBB_machine_isync ();
}
// Mapping of old entry point names retained for the sake of backward binary compatibility
#define __TBB_machine_cmpswp4 __TBB_machine_cas_32
#define __TBB_machine_cmpswp8 __TBB_machine_cas_64
#define __TBB_Yield() sched_yield()
#define __TBB_USE_GENERIC_PART_WORD_CAS 1
#define __TBB_USE_GENERIC_FETCH_ADD 1
#define __TBB_USE_GENERIC_FETCH_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
#if __GNUC__
#define __TBB_control_consistency_helper() __asm__ __volatile__( "isync": : :"memory")
#define __TBB_acquire_consistency_helper() __asm__ __volatile__("lwsync": : :"memory")
#define __TBB_release_consistency_helper() __asm__ __volatile__("lwsync": : :"memory")
#define __TBB_full_memory_fence() __asm__ __volatile__( "sync": : :"memory")
#else
// IBM C++ Compiler does not support inline assembly
// TODO: Since XL 9.0 or earlier GCC syntax is supported. Replace with more
// lightweight implementation (like in mac_ppc.h)
#define __TBB_control_consistency_helper() __TBB_machine_isync ()
#define __TBB_acquire_consistency_helper() __TBB_machine_lwsync ()
#define __TBB_release_consistency_helper() __TBB_machine_lwsync ()
#define __TBB_full_memory_fence() __TBB_machine_flush ()
#endif

View File

@@ -0,0 +1,258 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#if !defined(__TBB_machine_H) || defined(__TBB_machine_icc_generic_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#if ! __TBB_ICC_BUILTIN_ATOMICS_PRESENT
#error "Intel C++ Compiler of at least 12.0 version is needed to use ICC intrinsics port"
#endif
#define __TBB_machine_icc_generic_H
//ICC mimics the "native" target compiler
#if _MSC_VER
#include "msvc_ia32_common.h"
#else
#include "gcc_ia32_common.h"
#endif
//TODO: Make __TBB_WORDSIZE macro optional for ICC intrinsics port.
//As compiler intrinsics are used for all the operations it is possible to do.
#if __TBB_x86_32
#define __TBB_WORDSIZE 4
#else
#define __TBB_WORDSIZE 8
#endif
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
//__TBB_compiler_fence() defined just in case, as it seems not to be used on its own anywhere else
#if _MSC_VER
//TODO: any way to use same intrinsics on windows and linux?
#pragma intrinsic(_ReadWriteBarrier)
#define __TBB_compiler_fence() _ReadWriteBarrier()
#else
#define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
#endif
#ifndef __TBB_full_memory_fence
#if _MSC_VER
//TODO: any way to use same intrinsics on windows and linux?
#pragma intrinsic(_mm_mfence)
#define __TBB_full_memory_fence() _mm_mfence()
#else
#define __TBB_full_memory_fence() __asm__ __volatile__("mfence": : :"memory")
#endif
#endif
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
namespace tbb { namespace internal {
//TODO: is there any way to reuse definition of memory_order enum from ICC instead of copy paste.
//however it seems unlikely that ICC will silently change exact enum values, as they are defined
//in the ISO exactly like this.
//TODO: add test that exact values of the enum are same as in the ISO C++11
typedef enum memory_order {
memory_order_relaxed, memory_order_consume, memory_order_acquire,
memory_order_release, memory_order_acq_rel, memory_order_seq_cst
} memory_order;
namespace icc_intrinsics_port {
template <typename T>
T convert_argument(T value){
return value;
}
//The overload below is needed to have explicit conversion of pointer to void* in argument list.
//compiler bug?
//TODO: add according broken macro and recheck with ICC 13.0 if the overload is still needed
template <typename T>
void* convert_argument(T* value){
return (void*)value;
}
}
//TODO: code below is a bit repetitive, consider simplifying it
template <typename T, size_t S>
struct machine_load_store {
static T load_with_acquire ( const volatile T& location ) {
return __atomic_load_explicit(&location, memory_order_acquire);
}
static void store_with_release ( volatile T &location, T value ) {
__atomic_store_explicit(&location, icc_intrinsics_port::convert_argument(value), memory_order_release);
}
};
template <typename T, size_t S>
struct machine_load_store_relaxed {
static inline T load ( const T& location ) {
return __atomic_load_explicit(&location, memory_order_relaxed);
}
static inline void store ( T& location, T value ) {
__atomic_store_explicit(&location, icc_intrinsics_port::convert_argument(value), memory_order_relaxed);
}
};
template <typename T, size_t S>
struct machine_load_store_seq_cst {
static T load ( const volatile T& location ) {
return __atomic_load_explicit(&location, memory_order_seq_cst);
}
static void store ( volatile T &location, T value ) {
__atomic_store_explicit(&location, value, memory_order_seq_cst);
}
};
}} // namespace tbb::internal
namespace tbb{ namespace internal { namespace icc_intrinsics_port{
typedef enum memory_order_map {
relaxed = memory_order_relaxed,
acquire = memory_order_acquire,
release = memory_order_release,
full_fence= memory_order_seq_cst
} memory_order_map;
}}}// namespace tbb::internal
#define __TBB_MACHINE_DEFINE_ATOMICS(S,T,M) \
inline T __TBB_machine_cmpswp##S##M( volatile void *ptr, T value, T comparand ) { \
__atomic_compare_exchange_strong_explicit( \
(T*)ptr \
,&comparand \
,value \
, tbb::internal::icc_intrinsics_port::M \
, tbb::internal::icc_intrinsics_port::M); \
return comparand; \
} \
\
inline T __TBB_machine_fetchstore##S##M(volatile void *ptr, T value) { \
return __atomic_exchange_explicit((T*)ptr, value, tbb::internal::icc_intrinsics_port::M); \
} \
\
inline T __TBB_machine_fetchadd##S##M(volatile void *ptr, T value) { \
return __atomic_fetch_add_explicit((T*)ptr, value, tbb::internal::icc_intrinsics_port::M); \
} \
__TBB_MACHINE_DEFINE_ATOMICS(1,tbb::internal::int8_t, full_fence)
__TBB_MACHINE_DEFINE_ATOMICS(1,tbb::internal::int8_t, acquire)
__TBB_MACHINE_DEFINE_ATOMICS(1,tbb::internal::int8_t, release)
__TBB_MACHINE_DEFINE_ATOMICS(1,tbb::internal::int8_t, relaxed)
__TBB_MACHINE_DEFINE_ATOMICS(2,tbb::internal::int16_t, full_fence)
__TBB_MACHINE_DEFINE_ATOMICS(2,tbb::internal::int16_t, acquire)
__TBB_MACHINE_DEFINE_ATOMICS(2,tbb::internal::int16_t, release)
__TBB_MACHINE_DEFINE_ATOMICS(2,tbb::internal::int16_t, relaxed)
__TBB_MACHINE_DEFINE_ATOMICS(4,tbb::internal::int32_t, full_fence)
__TBB_MACHINE_DEFINE_ATOMICS(4,tbb::internal::int32_t, acquire)
__TBB_MACHINE_DEFINE_ATOMICS(4,tbb::internal::int32_t, release)
__TBB_MACHINE_DEFINE_ATOMICS(4,tbb::internal::int32_t, relaxed)
__TBB_MACHINE_DEFINE_ATOMICS(8,tbb::internal::int64_t, full_fence)
__TBB_MACHINE_DEFINE_ATOMICS(8,tbb::internal::int64_t, acquire)
__TBB_MACHINE_DEFINE_ATOMICS(8,tbb::internal::int64_t, release)
__TBB_MACHINE_DEFINE_ATOMICS(8,tbb::internal::int64_t, relaxed)
#undef __TBB_MACHINE_DEFINE_ATOMICS
#define __TBB_USE_FENCED_ATOMICS 1
namespace tbb { namespace internal {
#if __TBB_FORCE_64BIT_ALIGNMENT_BROKEN
__TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(full_fence)
__TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(full_fence)
__TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(acquire)
__TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(release)
__TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(relaxed)
__TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(relaxed)
template <typename T>
struct machine_load_store<T,8> {
static T load_with_acquire ( const volatile T& location ) {
if( tbb::internal::is_aligned(&location,8)) {
return __atomic_load_explicit(&location, memory_order_acquire);
} else {
return __TBB_machine_generic_load8acquire(&location);
}
}
static void store_with_release ( volatile T &location, T value ) {
if( tbb::internal::is_aligned(&location,8)) {
__atomic_store_explicit(&location, icc_intrinsics_port::convert_argument(value), memory_order_release);
} else {
return __TBB_machine_generic_store8release(&location,value);
}
}
};
template <typename T>
struct machine_load_store_relaxed<T,8> {
static T load( const volatile T& location ) {
if( tbb::internal::is_aligned(&location,8)) {
return __atomic_load_explicit(&location, memory_order_relaxed);
} else {
return __TBB_machine_generic_load8relaxed(&location);
}
}
static void store( volatile T &location, T value ) {
if( tbb::internal::is_aligned(&location,8)) {
__atomic_store_explicit(&location, icc_intrinsics_port::convert_argument(value), memory_order_relaxed);
} else {
return __TBB_machine_generic_store8relaxed(&location,value);
}
}
};
template <typename T >
struct machine_load_store_seq_cst<T,8> {
static T load ( const volatile T& location ) {
if( tbb::internal::is_aligned(&location,8)) {
return __atomic_load_explicit(&location, memory_order_seq_cst);
} else {
return __TBB_machine_generic_load8full_fence(&location);
}
}
static void store ( volatile T &location, T value ) {
if( tbb::internal::is_aligned(&location,8)) {
__atomic_store_explicit(&location, value, memory_order_seq_cst);
} else {
return __TBB_machine_generic_store8full_fence(&location,value);
}
}
};
#endif
}} // namespace tbb::internal
template <typename T>
inline void __TBB_machine_OR( T *operand, T addend ) {
__atomic_fetch_or_explicit(operand, addend, tbb::internal::memory_order_seq_cst);
}
template <typename T>
inline void __TBB_machine_AND( T *operand, T addend ) {
__atomic_fetch_and_explicit(operand, addend, tbb::internal::memory_order_seq_cst);
}

View File

@@ -0,0 +1,84 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_machine_H
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#include <sched.h>
#define __TBB_Yield() sched_yield()
#include <unistd.h>
/* Futex definitions */
#include <sys/syscall.h>
#if defined(SYS_futex)
#define __TBB_USE_FUTEX 1
#include <limits.h>
#include <errno.h>
// Unfortunately, some versions of Linux do not have a header that defines FUTEX_WAIT and FUTEX_WAKE.
#ifdef FUTEX_WAIT
#define __TBB_FUTEX_WAIT FUTEX_WAIT
#else
#define __TBB_FUTEX_WAIT 0
#endif
#ifdef FUTEX_WAKE
#define __TBB_FUTEX_WAKE FUTEX_WAKE
#else
#define __TBB_FUTEX_WAKE 1
#endif
#ifndef __TBB_ASSERT
#error machine specific headers must be included after tbb_stddef.h
#endif
namespace tbb {
namespace internal {
inline int futex_wait( void *futex, int comparand ) {
int r = syscall( SYS_futex,futex,__TBB_FUTEX_WAIT,comparand,NULL,NULL,0 );
#if TBB_USE_ASSERT
int e = errno;
__TBB_ASSERT( r==0||r==EWOULDBLOCK||(r==-1&&(e==EAGAIN||e==EINTR)), "futex_wait failed." );
#endif /* TBB_USE_ASSERT */
return r;
}
inline int futex_wakeup_one( void *futex ) {
int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAKE,1,NULL,NULL,0 );
__TBB_ASSERT( r==0||r==1, "futex_wakeup_one: more than one thread woken up?" );
return r;
}
inline int futex_wakeup_all( void *futex ) {
int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAKE,INT_MAX,NULL,NULL,0 );
__TBB_ASSERT( r>=0, "futex_wakeup_all: error in waking up threads" );
return r;
}
} /* namespace internal */
} /* namespace tbb */
#endif /* SYS_futex */

View File

@@ -0,0 +1,232 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#if !defined(__TBB_machine_H) || defined(__TBB_machine_linux_ia32_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#define __TBB_machine_linux_ia32_H
#include <stdint.h>
#include "gcc_ia32_common.h"
#define __TBB_WORDSIZE 4
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
#define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
#define __TBB_release_consistency_helper() __TBB_compiler_fence()
#define __TBB_full_memory_fence() __asm__ __volatile__("mfence": : :"memory")
#if __TBB_ICC_ASM_VOLATILE_BROKEN
#define __TBB_VOLATILE
#else
#define __TBB_VOLATILE volatile
#endif
#define __TBB_MACHINE_DEFINE_ATOMICS(S,T,X,R) \
static inline T __TBB_machine_cmpswp##S (volatile void *ptr, T value, T comparand ) \
{ \
T result; \
\
__asm__ __volatile__("lock\ncmpxchg" X " %2,%1" \
: "=a"(result), "=m"(*(__TBB_VOLATILE T*)ptr) \
: "q"(value), "0"(comparand), "m"(*(__TBB_VOLATILE T*)ptr) \
: "memory"); \
return result; \
} \
\
static inline T __TBB_machine_fetchadd##S(volatile void *ptr, T addend) \
{ \
T result; \
__asm__ __volatile__("lock\nxadd" X " %0,%1" \
: R (result), "=m"(*(__TBB_VOLATILE T*)ptr) \
: "0"(addend), "m"(*(__TBB_VOLATILE T*)ptr) \
: "memory"); \
return result; \
} \
\
static inline T __TBB_machine_fetchstore##S(volatile void *ptr, T value) \
{ \
T result; \
__asm__ __volatile__("lock\nxchg" X " %0,%1" \
: R (result), "=m"(*(__TBB_VOLATILE T*)ptr) \
: "0"(value), "m"(*(__TBB_VOLATILE T*)ptr) \
: "memory"); \
return result; \
} \
__TBB_MACHINE_DEFINE_ATOMICS(1,int8_t,"","=q")
__TBB_MACHINE_DEFINE_ATOMICS(2,int16_t,"","=r")
__TBB_MACHINE_DEFINE_ATOMICS(4,int32_t,"l","=r")
#if __INTEL_COMPILER
#pragma warning( push )
// reference to EBX in a function requiring stack alignment
#pragma warning( disable: 998 )
#endif
#if __TBB_GCC_CAS8_BUILTIN_INLINING_BROKEN
#define __TBB_IA32_CAS8_NOINLINE __attribute__ ((noinline))
#else
#define __TBB_IA32_CAS8_NOINLINE
#endif
static inline __TBB_IA32_CAS8_NOINLINE int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, int64_t comparand ) {
//TODO: remove the extra part of condition once __TBB_GCC_BUILTIN_ATOMICS_PRESENT is lowered to gcc version 4.1.2
#if (__TBB_GCC_BUILTIN_ATOMICS_PRESENT || (__TBB_GCC_VERSION >= 40102)) && !__TBB_GCC_64BIT_ATOMIC_BUILTINS_BROKEN
return __sync_val_compare_and_swap( reinterpret_cast<volatile int64_t*>(ptr), comparand, value );
#else /* !__TBB_GCC_BUILTIN_ATOMICS_PRESENT */
//TODO: look like ICC 13.0 has some issues with this code, investigate it more deeply
int64_t result;
union {
int64_t i64;
int32_t i32[2];
};
i64 = value;
#if __PIC__
/* compiling position-independent code */
// EBX register preserved for compliance with position-independent code rules on IA32
int32_t tmp;
__asm__ __volatile__ (
"movl %%ebx,%2\n\t"
"movl %5,%%ebx\n\t"
#if __GNUC__==3
"lock\n\t cmpxchg8b %1\n\t"
#else
"lock\n\t cmpxchg8b (%3)\n\t"
#endif
"movl %2,%%ebx"
: "=A"(result)
, "=m"(*(__TBB_VOLATILE int64_t *)ptr)
, "=m"(tmp)
#if __GNUC__==3
: "m"(*(__TBB_VOLATILE int64_t *)ptr)
#else
: "SD"(ptr)
#endif
, "0"(comparand)
, "m"(i32[0]), "c"(i32[1])
: "memory"
#if __INTEL_COMPILER
,"ebx"
#endif
);
#else /* !__PIC__ */
__asm__ __volatile__ (
"lock\n\t cmpxchg8b %1\n\t"
: "=A"(result), "=m"(*(__TBB_VOLATILE int64_t *)ptr)
: "m"(*(__TBB_VOLATILE int64_t *)ptr)
, "0"(comparand)
, "b"(i32[0]), "c"(i32[1])
: "memory"
);
#endif /* __PIC__ */
return result;
#endif /* !__TBB_GCC_BUILTIN_ATOMICS_PRESENT */
}
#undef __TBB_IA32_CAS8_NOINLINE
#if __INTEL_COMPILER
#pragma warning( pop )
#endif // warning 998 is back
static inline void __TBB_machine_or( volatile void *ptr, uint32_t addend ) {
__asm__ __volatile__("lock\norl %1,%0" : "=m"(*(__TBB_VOLATILE uint32_t *)ptr) : "r"(addend), "m"(*(__TBB_VOLATILE uint32_t *)ptr) : "memory");
}
static inline void __TBB_machine_and( volatile void *ptr, uint32_t addend ) {
__asm__ __volatile__("lock\nandl %1,%0" : "=m"(*(__TBB_VOLATILE uint32_t *)ptr) : "r"(addend), "m"(*(__TBB_VOLATILE uint32_t *)ptr) : "memory");
}
//TODO: Check if it possible and profitable for IA-32 architecture on (Linux* and Windows*)
//to use of 64-bit load/store via floating point registers together with full fence
//for sequentially consistent load/store, instead of CAS.
#if __clang__
#define __TBB_fildq "fildll"
#define __TBB_fistpq "fistpll"
#else
#define __TBB_fildq "fildq"
#define __TBB_fistpq "fistpq"
#endif
static inline int64_t __TBB_machine_aligned_load8 (const volatile void *ptr) {
__TBB_ASSERT(tbb::internal::is_aligned(ptr,8),"__TBB_machine_aligned_load8 should be used with 8 byte aligned locations only \n");
int64_t result;
__asm__ __volatile__ ( __TBB_fildq " %1\n\t"
__TBB_fistpq " %0" : "=m"(result) : "m"(*(const __TBB_VOLATILE uint64_t*)ptr) : "memory" );
return result;
}
static inline void __TBB_machine_aligned_store8 (volatile void *ptr, int64_t value ) {
__TBB_ASSERT(tbb::internal::is_aligned(ptr,8),"__TBB_machine_aligned_store8 should be used with 8 byte aligned locations only \n");
// Aligned store
__asm__ __volatile__ ( __TBB_fildq " %1\n\t"
__TBB_fistpq " %0" : "=m"(*(__TBB_VOLATILE int64_t*)ptr) : "m"(value) : "memory" );
}
static inline int64_t __TBB_machine_load8 (const volatile void *ptr) {
#if __TBB_FORCE_64BIT_ALIGNMENT_BROKEN
if( tbb::internal::is_aligned(ptr,8)) {
#endif
return __TBB_machine_aligned_load8(ptr);
#if __TBB_FORCE_64BIT_ALIGNMENT_BROKEN
} else {
// Unaligned load
return __TBB_machine_cmpswp8(const_cast<void*>(ptr),0,0);
}
#endif
}
//! Handles misaligned 8-byte store
/** Defined in tbb_misc.cpp */
extern "C" void __TBB_machine_store8_slow( volatile void *ptr, int64_t value );
extern "C" void __TBB_machine_store8_slow_perf_warning( volatile void *ptr );
static inline void __TBB_machine_store8(volatile void *ptr, int64_t value) {
#if __TBB_FORCE_64BIT_ALIGNMENT_BROKEN
if( tbb::internal::is_aligned(ptr,8)) {
#endif
__TBB_machine_aligned_store8(ptr,value);
#if __TBB_FORCE_64BIT_ALIGNMENT_BROKEN
} else {
// Unaligned store
#if TBB_USE_PERFORMANCE_WARNINGS
__TBB_machine_store8_slow_perf_warning(ptr);
#endif /* TBB_USE_PERFORMANCE_WARNINGS */
__TBB_machine_store8_slow(ptr,value);
}
#endif
}
// Machine specific atomic operations
#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
#define __TBB_USE_GENERIC_DWORD_FETCH_ADD 1
#define __TBB_USE_GENERIC_DWORD_FETCH_STORE 1
#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1

View File

@@ -0,0 +1,181 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#if !defined(__TBB_machine_H) || defined(__TBB_machine_linux_ia64_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#define __TBB_machine_linux_ia64_H
#include <stdint.h>
#include <ia64intrin.h>
#define __TBB_WORDSIZE 8
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
#if __INTEL_COMPILER
#define __TBB_compiler_fence()
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
#define __TBB_acquire_consistency_helper()
#define __TBB_release_consistency_helper()
#define __TBB_full_memory_fence() __mf()
#else
#define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
// Even though GCC imbues volatile loads with acquire semantics, it sometimes moves
// loads over the acquire fence. The following helpers stop such incorrect code motion.
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
#define __TBB_release_consistency_helper() __TBB_compiler_fence()
#define __TBB_full_memory_fence() __asm__ __volatile__("mf": : :"memory")
#endif /* !__INTEL_COMPILER */
// Most of the functions will be in a .s file
// TODO: revise dynamic_link, memory pools and etc. if the library dependency is removed.
extern "C" {
int8_t __TBB_machine_fetchadd1__TBB_full_fence (volatile void *ptr, int8_t addend);
int8_t __TBB_machine_fetchadd1acquire(volatile void *ptr, int8_t addend);
int8_t __TBB_machine_fetchadd1release(volatile void *ptr, int8_t addend);
int16_t __TBB_machine_fetchadd2__TBB_full_fence (volatile void *ptr, int16_t addend);
int16_t __TBB_machine_fetchadd2acquire(volatile void *ptr, int16_t addend);
int16_t __TBB_machine_fetchadd2release(volatile void *ptr, int16_t addend);
int32_t __TBB_machine_fetchadd4__TBB_full_fence (volatile void *ptr, int32_t value);
int32_t __TBB_machine_fetchadd4acquire(volatile void *ptr, int32_t addend);
int32_t __TBB_machine_fetchadd4release(volatile void *ptr, int32_t addend);
int64_t __TBB_machine_fetchadd8__TBB_full_fence (volatile void *ptr, int64_t value);
int64_t __TBB_machine_fetchadd8acquire(volatile void *ptr, int64_t addend);
int64_t __TBB_machine_fetchadd8release(volatile void *ptr, int64_t addend);
int8_t __TBB_machine_fetchstore1__TBB_full_fence (volatile void *ptr, int8_t value);
int8_t __TBB_machine_fetchstore1acquire(volatile void *ptr, int8_t value);
int8_t __TBB_machine_fetchstore1release(volatile void *ptr, int8_t value);
int16_t __TBB_machine_fetchstore2__TBB_full_fence (volatile void *ptr, int16_t value);
int16_t __TBB_machine_fetchstore2acquire(volatile void *ptr, int16_t value);
int16_t __TBB_machine_fetchstore2release(volatile void *ptr, int16_t value);
int32_t __TBB_machine_fetchstore4__TBB_full_fence (volatile void *ptr, int32_t value);
int32_t __TBB_machine_fetchstore4acquire(volatile void *ptr, int32_t value);
int32_t __TBB_machine_fetchstore4release(volatile void *ptr, int32_t value);
int64_t __TBB_machine_fetchstore8__TBB_full_fence (volatile void *ptr, int64_t value);
int64_t __TBB_machine_fetchstore8acquire(volatile void *ptr, int64_t value);
int64_t __TBB_machine_fetchstore8release(volatile void *ptr, int64_t value);
int8_t __TBB_machine_cmpswp1__TBB_full_fence (volatile void *ptr, int8_t value, int8_t comparand);
int8_t __TBB_machine_cmpswp1acquire(volatile void *ptr, int8_t value, int8_t comparand);
int8_t __TBB_machine_cmpswp1release(volatile void *ptr, int8_t value, int8_t comparand);
int16_t __TBB_machine_cmpswp2__TBB_full_fence (volatile void *ptr, int16_t value, int16_t comparand);
int16_t __TBB_machine_cmpswp2acquire(volatile void *ptr, int16_t value, int16_t comparand);
int16_t __TBB_machine_cmpswp2release(volatile void *ptr, int16_t value, int16_t comparand);
int32_t __TBB_machine_cmpswp4__TBB_full_fence (volatile void *ptr, int32_t value, int32_t comparand);
int32_t __TBB_machine_cmpswp4acquire(volatile void *ptr, int32_t value, int32_t comparand);
int32_t __TBB_machine_cmpswp4release(volatile void *ptr, int32_t value, int32_t comparand);
int64_t __TBB_machine_cmpswp8__TBB_full_fence (volatile void *ptr, int64_t value, int64_t comparand);
int64_t __TBB_machine_cmpswp8acquire(volatile void *ptr, int64_t value, int64_t comparand);
int64_t __TBB_machine_cmpswp8release(volatile void *ptr, int64_t value, int64_t comparand);
int64_t __TBB_machine_lg(uint64_t value);
void __TBB_machine_pause(int32_t delay);
bool __TBB_machine_trylockbyte( volatile unsigned char &ptr );
int64_t __TBB_machine_lockbyte( volatile unsigned char &ptr );
//! Retrieves the current RSE backing store pointer. IA64 specific.
void* __TBB_get_bsp();
int32_t __TBB_machine_load1_relaxed(const void *ptr);
int32_t __TBB_machine_load2_relaxed(const void *ptr);
int32_t __TBB_machine_load4_relaxed(const void *ptr);
int64_t __TBB_machine_load8_relaxed(const void *ptr);
void __TBB_machine_store1_relaxed(void *ptr, int32_t value);
void __TBB_machine_store2_relaxed(void *ptr, int32_t value);
void __TBB_machine_store4_relaxed(void *ptr, int32_t value);
void __TBB_machine_store8_relaxed(void *ptr, int64_t value);
} // extern "C"
// Mapping old entry points to the names corresponding to the new full_fence identifier.
#define __TBB_machine_fetchadd1full_fence __TBB_machine_fetchadd1__TBB_full_fence
#define __TBB_machine_fetchadd2full_fence __TBB_machine_fetchadd2__TBB_full_fence
#define __TBB_machine_fetchadd4full_fence __TBB_machine_fetchadd4__TBB_full_fence
#define __TBB_machine_fetchadd8full_fence __TBB_machine_fetchadd8__TBB_full_fence
#define __TBB_machine_fetchstore1full_fence __TBB_machine_fetchstore1__TBB_full_fence
#define __TBB_machine_fetchstore2full_fence __TBB_machine_fetchstore2__TBB_full_fence
#define __TBB_machine_fetchstore4full_fence __TBB_machine_fetchstore4__TBB_full_fence
#define __TBB_machine_fetchstore8full_fence __TBB_machine_fetchstore8__TBB_full_fence
#define __TBB_machine_cmpswp1full_fence __TBB_machine_cmpswp1__TBB_full_fence
#define __TBB_machine_cmpswp2full_fence __TBB_machine_cmpswp2__TBB_full_fence
#define __TBB_machine_cmpswp4full_fence __TBB_machine_cmpswp4__TBB_full_fence
#define __TBB_machine_cmpswp8full_fence __TBB_machine_cmpswp8__TBB_full_fence
// Mapping relaxed operations to the entry points implementing them.
/** On IA64 RMW operations implicitly have acquire semantics. Thus one cannot
actually have completely relaxed RMW operation here. **/
#define __TBB_machine_fetchadd1relaxed __TBB_machine_fetchadd1acquire
#define __TBB_machine_fetchadd2relaxed __TBB_machine_fetchadd2acquire
#define __TBB_machine_fetchadd4relaxed __TBB_machine_fetchadd4acquire
#define __TBB_machine_fetchadd8relaxed __TBB_machine_fetchadd8acquire
#define __TBB_machine_fetchstore1relaxed __TBB_machine_fetchstore1acquire
#define __TBB_machine_fetchstore2relaxed __TBB_machine_fetchstore2acquire
#define __TBB_machine_fetchstore4relaxed __TBB_machine_fetchstore4acquire
#define __TBB_machine_fetchstore8relaxed __TBB_machine_fetchstore8acquire
#define __TBB_machine_cmpswp1relaxed __TBB_machine_cmpswp1acquire
#define __TBB_machine_cmpswp2relaxed __TBB_machine_cmpswp2acquire
#define __TBB_machine_cmpswp4relaxed __TBB_machine_cmpswp4acquire
#define __TBB_machine_cmpswp8relaxed __TBB_machine_cmpswp8acquire
#define __TBB_MACHINE_DEFINE_ATOMICS(S,V) \
template <typename T> \
struct machine_load_store_relaxed<T,S> { \
static inline T load ( const T& location ) { \
return (T)__TBB_machine_load##S##_relaxed(&location); \
} \
static inline void store ( T& location, T value ) { \
__TBB_machine_store##S##_relaxed(&location, (V)value); \
} \
}
namespace tbb {
namespace internal {
__TBB_MACHINE_DEFINE_ATOMICS(1,int8_t);
__TBB_MACHINE_DEFINE_ATOMICS(2,int16_t);
__TBB_MACHINE_DEFINE_ATOMICS(4,int32_t);
__TBB_MACHINE_DEFINE_ATOMICS(8,int64_t);
}} // namespaces internal, tbb
#undef __TBB_MACHINE_DEFINE_ATOMICS
#define __TBB_USE_FENCED_ATOMICS 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
// Definition of Lock functions
#define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)
#define __TBB_LockByte(P) __TBB_machine_lockbyte(P)
// Definition of other utility functions
#define __TBB_Pause(V) __TBB_machine_pause(V)
#define __TBB_Log2(V) __TBB_machine_lg(V)

View File

@@ -0,0 +1,96 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#if !defined(__TBB_machine_H) || defined(__TBB_machine_linux_intel64_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#define __TBB_machine_linux_intel64_H
#include <stdint.h>
#include "gcc_ia32_common.h"
#define __TBB_WORDSIZE 8
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
#define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
#define __TBB_release_consistency_helper() __TBB_compiler_fence()
#ifndef __TBB_full_memory_fence
#define __TBB_full_memory_fence() __asm__ __volatile__("mfence": : :"memory")
#endif
#define __TBB_MACHINE_DEFINE_ATOMICS(S,T,X) \
static inline T __TBB_machine_cmpswp##S (volatile void *ptr, T value, T comparand ) \
{ \
T result; \
\
__asm__ __volatile__("lock\ncmpxchg" X " %2,%1" \
: "=a"(result), "=m"(*(volatile T*)ptr) \
: "q"(value), "0"(comparand), "m"(*(volatile T*)ptr) \
: "memory"); \
return result; \
} \
\
static inline T __TBB_machine_fetchadd##S(volatile void *ptr, T addend) \
{ \
T result; \
__asm__ __volatile__("lock\nxadd" X " %0,%1" \
: "=r"(result),"=m"(*(volatile T*)ptr) \
: "0"(addend), "m"(*(volatile T*)ptr) \
: "memory"); \
return result; \
} \
\
static inline T __TBB_machine_fetchstore##S(volatile void *ptr, T value) \
{ \
T result; \
__asm__ __volatile__("lock\nxchg" X " %0,%1" \
: "=r"(result),"=m"(*(volatile T*)ptr) \
: "0"(value), "m"(*(volatile T*)ptr) \
: "memory"); \
return result; \
} \
__TBB_MACHINE_DEFINE_ATOMICS(1,int8_t,"")
__TBB_MACHINE_DEFINE_ATOMICS(2,int16_t,"")
__TBB_MACHINE_DEFINE_ATOMICS(4,int32_t,"")
__TBB_MACHINE_DEFINE_ATOMICS(8,int64_t,"q")
#undef __TBB_MACHINE_DEFINE_ATOMICS
static inline void __TBB_machine_or( volatile void *ptr, uint64_t value ) {
__asm__ __volatile__("lock\norq %1,%0" : "=m"(*(volatile uint64_t*)ptr) : "r"(value), "m"(*(volatile uint64_t*)ptr) : "memory");
}
static inline void __TBB_machine_and( volatile void *ptr, uint64_t value ) {
__asm__ __volatile__("lock\nandq %1,%0" : "=m"(*(volatile uint64_t*)ptr) : "r"(value), "m"(*(volatile uint64_t*)ptr) : "memory");
}
#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1

View File

@@ -0,0 +1,313 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#if !defined(__TBB_machine_H) || defined(__TBB_machine_gcc_power_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#define __TBB_machine_gcc_power_H
#include <stdint.h>
#include <unistd.h>
// TODO: rename to gcc_power.h?
// This file is for Power Architecture with compilers supporting GNU inline-assembler syntax (currently GNU g++ and IBM XL).
// Note that XL V9.0 (sometimes?) has trouble dealing with empty input and/or clobber lists, so they should be avoided.
#if __powerpc64__ || __ppc64__
// IBM XL documents __powerpc64__ (and __PPC64__).
// Apple documents __ppc64__ (with __ppc__ only on 32-bit).
#define __TBB_WORDSIZE 8
#else
#define __TBB_WORDSIZE 4
#endif
// Traditionally Power Architecture is big-endian.
// Little-endian could be just an address manipulation (compatibility with TBB not verified),
// or normal little-endian (on more recent systems). Embedded PowerPC systems may support
// page-specific endianness, but then one endianness must be hidden from TBB so that it still sees only one.
#if __BIG_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__)
#define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
#elif __LITTLE_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__)
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
#elif defined(__BYTE_ORDER__)
#define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
#else
#define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT
#endif
// On Power Architecture, (lock-free) 64-bit atomics require 64-bit hardware:
#if __TBB_WORDSIZE==8
// Do not change the following definition, because TBB itself will use 64-bit atomics in 64-bit builds.
#define __TBB_64BIT_ATOMICS 1
#elif __bgp__
// Do not change the following definition, because this is known 32-bit hardware.
#define __TBB_64BIT_ATOMICS 0
#else
// To enable 64-bit atomics in 32-bit builds, set the value below to 1 instead of 0.
// You must make certain that the program will only use them on actual 64-bit hardware
// (which typically means that the entire program is only executed on such hardware),
// because their implementation involves machine instructions that are illegal elsewhere.
// The setting can be chosen independently per compilation unit,
// which also means that TBB itself does not need to be rebuilt.
// Alternatively (but only for the current architecture and TBB version),
// override the default as a predefined macro when invoking the compiler.
#ifndef __TBB_64BIT_ATOMICS
#define __TBB_64BIT_ATOMICS 0
#endif
#endif
inline int32_t __TBB_machine_cmpswp4 (volatile void *ptr, int32_t value, int32_t comparand )
{
int32_t result;
__asm__ __volatile__("sync\n"
"0:\n\t"
"lwarx %[res],0,%[ptr]\n\t" /* load w/ reservation */
"cmpw %[res],%[cmp]\n\t" /* compare against comparand */
"bne- 1f\n\t" /* exit if not same */
"stwcx. %[val],0,%[ptr]\n\t" /* store new value */
"bne- 0b\n" /* retry if reservation lost */
"1:\n\t" /* the exit */
"isync"
: [res]"=&r"(result)
, "+m"(* (int32_t*) ptr) /* redundant with "memory" */
: [ptr]"r"(ptr)
, [val]"r"(value)
, [cmp]"r"(comparand)
: "memory" /* compiler full fence */
, "cr0" /* clobbered by cmp and/or stwcx. */
);
return result;
}
#if __TBB_WORDSIZE==8
inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, int64_t comparand )
{
int64_t result;
__asm__ __volatile__("sync\n"
"0:\n\t"
"ldarx %[res],0,%[ptr]\n\t" /* load w/ reservation */
"cmpd %[res],%[cmp]\n\t" /* compare against comparand */
"bne- 1f\n\t" /* exit if not same */
"stdcx. %[val],0,%[ptr]\n\t" /* store new value */
"bne- 0b\n" /* retry if reservation lost */
"1:\n\t" /* the exit */
"isync"
: [res]"=&r"(result)
, "+m"(* (int64_t*) ptr) /* redundant with "memory" */
: [ptr]"r"(ptr)
, [val]"r"(value)
, [cmp]"r"(comparand)
: "memory" /* compiler full fence */
, "cr0" /* clobbered by cmp and/or stdcx. */
);
return result;
}
#elif __TBB_64BIT_ATOMICS /* && __TBB_WORDSIZE==4 */
inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, int64_t comparand )
{
int64_t result;
int64_t value_register, comparand_register, result_register; // dummy variables to allocate registers
__asm__ __volatile__("sync\n\t"
"ld %[val],%[valm]\n\t"
"ld %[cmp],%[cmpm]\n"
"0:\n\t"
"ldarx %[res],0,%[ptr]\n\t" /* load w/ reservation */
"cmpd %[res],%[cmp]\n\t" /* compare against comparand */
"bne- 1f\n\t" /* exit if not same */
"stdcx. %[val],0,%[ptr]\n\t" /* store new value */
"bne- 0b\n" /* retry if reservation lost */
"1:\n\t" /* the exit */
"std %[res],%[resm]\n\t"
"isync"
: [resm]"=m"(result)
, [res] "=&r"( result_register)
, [val] "=&r"( value_register)
, [cmp] "=&r"(comparand_register)
, "+m"(* (int64_t*) ptr) /* redundant with "memory" */
: [ptr] "r"(ptr)
, [valm]"m"(value)
, [cmpm]"m"(comparand)
: "memory" /* compiler full fence */
, "cr0" /* clobbered by cmpd and/or stdcx. */
);
return result;
}
#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
#define __TBB_MACHINE_DEFINE_LOAD_STORE(S,ldx,stx,cmpx) \
template <typename T> \
struct machine_load_store<T,S> { \
static inline T load_with_acquire(const volatile T& location) { \
T result; \
__asm__ __volatile__(ldx " %[res],0(%[ptr])\n" \
"0:\n\t" \
cmpx " %[res],%[res]\n\t" \
"bne- 0b\n\t" \
"isync" \
: [res]"=r"(result) \
: [ptr]"b"(&location) /* cannot use register 0 here */ \
, "m"(location) /* redundant with "memory" */ \
: "memory" /* compiler acquire fence */ \
, "cr0" /* clobbered by cmpw/cmpd */); \
return result; \
} \
static inline void store_with_release(volatile T &location, T value) { \
__asm__ __volatile__("lwsync\n\t" \
stx " %[val],0(%[ptr])" \
: "=m"(location) /* redundant with "memory" */ \
: [ptr]"b"(&location) /* cannot use register 0 here */ \
, [val]"r"(value) \
: "memory"/*compiler release fence*/ /*(cr0 not affected)*/); \
} \
}; \
\
template <typename T> \
struct machine_load_store_relaxed<T,S> { \
static inline T load (const __TBB_atomic T& location) { \
T result; \
__asm__ __volatile__(ldx " %[res],0(%[ptr])" \
: [res]"=r"(result) \
: [ptr]"b"(&location) /* cannot use register 0 here */ \
, "m"(location) \
); /*(no compiler fence)*/ /*(cr0 not affected)*/ \
return result; \
} \
static inline void store (__TBB_atomic T &location, T value) { \
__asm__ __volatile__(stx " %[val],0(%[ptr])" \
: "=m"(location) \
: [ptr]"b"(&location) /* cannot use register 0 here */ \
, [val]"r"(value) \
); /*(no compiler fence)*/ /*(cr0 not affected)*/ \
} \
};
namespace tbb {
namespace internal {
__TBB_MACHINE_DEFINE_LOAD_STORE(1,"lbz","stb","cmpw")
__TBB_MACHINE_DEFINE_LOAD_STORE(2,"lhz","sth","cmpw")
__TBB_MACHINE_DEFINE_LOAD_STORE(4,"lwz","stw","cmpw")
#if __TBB_WORDSIZE==8
__TBB_MACHINE_DEFINE_LOAD_STORE(8,"ld" ,"std","cmpd")
#elif __TBB_64BIT_ATOMICS /* && __TBB_WORDSIZE==4 */
template <typename T>
struct machine_load_store<T,8> {
static inline T load_with_acquire(const volatile T& location) {
T result;
T result_register; // dummy variable to allocate a register
__asm__ __volatile__("ld %[res],0(%[ptr])\n\t"
"std %[res],%[resm]\n"
"0:\n\t"
"cmpd %[res],%[res]\n\t"
"bne- 0b\n\t"
"isync"
: [resm]"=m"(result)
, [res]"=&r"(result_register)
: [ptr]"b"(&location) /* cannot use register 0 here */
, "m"(location) /* redundant with "memory" */
: "memory" /* compiler acquire fence */
, "cr0" /* clobbered by cmpd */);
return result;
}
static inline void store_with_release(volatile T &location, T value) {
T value_register; // dummy variable to allocate a register
__asm__ __volatile__("lwsync\n\t"
"ld %[val],%[valm]\n\t"
"std %[val],0(%[ptr])"
: "=m"(location) /* redundant with "memory" */
, [val]"=&r"(value_register)
: [ptr]"b"(&location) /* cannot use register 0 here */
, [valm]"m"(value)
: "memory"/*compiler release fence*/ /*(cr0 not affected)*/);
}
};
struct machine_load_store_relaxed<T,8> {
static inline T load (const volatile T& location) {
T result;
T result_register; // dummy variable to allocate a register
__asm__ __volatile__("ld %[res],0(%[ptr])\n\t"
"std %[res],%[resm]"
: [resm]"=m"(result)
, [res]"=&r"(result_register)
: [ptr]"b"(&location) /* cannot use register 0 here */
, "m"(location)
); /*(no compiler fence)*/ /*(cr0 not affected)*/
return result;
}
static inline void store (volatile T &location, T value) {
T value_register; // dummy variable to allocate a register
__asm__ __volatile__("ld %[val],%[valm]\n\t"
"std %[val],0(%[ptr])"
: "=m"(location)
, [val]"=&r"(value_register)
: [ptr]"b"(&location) /* cannot use register 0 here */
, [valm]"m"(value)
); /*(no compiler fence)*/ /*(cr0 not affected)*/
}
};
#define __TBB_machine_load_store_relaxed_8
#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
}} // namespaces internal, tbb
#undef __TBB_MACHINE_DEFINE_LOAD_STORE
#define __TBB_USE_GENERIC_PART_WORD_CAS 1
#define __TBB_USE_GENERIC_FETCH_ADD 1
#define __TBB_USE_GENERIC_FETCH_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
#define __TBB_control_consistency_helper() __asm__ __volatile__("isync": : :"memory")
#define __TBB_full_memory_fence() __asm__ __volatile__( "sync": : :"memory")
static inline intptr_t __TBB_machine_lg( uintptr_t x ) {
__TBB_ASSERT(x, "__TBB_Log2(0) undefined");
// cntlzd/cntlzw starts counting at 2^63/2^31 (ignoring any higher-order bits), and does not affect cr0
#if __TBB_WORDSIZE==8
__asm__ __volatile__ ("cntlzd %0,%0" : "+r"(x));
return 63-static_cast<intptr_t>(x);
#else
__asm__ __volatile__ ("cntlzw %0,%0" : "+r"(x));
return 31-static_cast<intptr_t>(x);
#endif
}
#define __TBB_Log2(V) __TBB_machine_lg(V)
// Assumes implicit alignment for any 32-bit value
typedef uint32_t __TBB_Flag;
#define __TBB_Flag __TBB_Flag
inline bool __TBB_machine_trylockbyte( __TBB_atomic __TBB_Flag &flag ) {
return __TBB_machine_cmpswp4(&flag,1,0)==0;
}
#define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)

View File

@@ -0,0 +1,133 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#if !defined(__TBB_machine_H) || defined(__TBB_machine_macos_common_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#define __TBB_machine_macos_common_H
#include <sched.h>
#define __TBB_Yield() sched_yield()
// __TBB_HardwareConcurrency
#include <sys/types.h>
#include <sys/sysctl.h>
static inline int __TBB_macos_available_cpu() {
int name[2] = {CTL_HW, HW_AVAILCPU};
int ncpu;
size_t size = sizeof(ncpu);
sysctl( name, 2, &ncpu, &size, NULL, 0 );
return ncpu;
}
#define __TBB_HardwareConcurrency() __TBB_macos_available_cpu()
#ifndef __TBB_full_memory_fence
// TBB has not recognized the architecture (none of the architecture abstraction
// headers was included).
#define __TBB_UnknownArchitecture 1
#endif
#if __TBB_UnknownArchitecture
// Implementation of atomic operations based on OS provided primitives
#include <libkern/OSAtomic.h>
static inline int64_t __TBB_machine_cmpswp8_OsX(volatile void *ptr, int64_t value, int64_t comparand)
{
__TBB_ASSERT( tbb::internal::is_aligned(ptr,8), "address not properly aligned for OS X* atomics");
int64_t* address = (int64_t*)ptr;
while( !OSAtomicCompareAndSwap64Barrier(comparand, value, address) ){
#if __TBB_WORDSIZE==8
int64_t snapshot = *address;
#else
int64_t snapshot = OSAtomicAdd64( 0, address );
#endif
if( snapshot!=comparand ) return snapshot;
}
return comparand;
}
#define __TBB_machine_cmpswp8 __TBB_machine_cmpswp8_OsX
#endif /* __TBB_UnknownArchitecture */
#if __TBB_UnknownArchitecture
#ifndef __TBB_WORDSIZE
#define __TBB_WORDSIZE 4
#endif
#ifdef __TBB_ENDIANNESS
// Already determined based on hardware architecture.
#elif __BIG_ENDIAN__
#define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
#elif __LITTLE_ENDIAN__
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
#else
#define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
#endif
/** As this generic implementation has absolutely no information about underlying
hardware, its performance most likely will be sub-optimal because of full memory
fence usages where a more lightweight synchronization means (or none at all)
could suffice. Thus if you use this header to enable TBB on a new platform,
consider forking it and relaxing below helpers as appropriate. **/
#define __TBB_control_consistency_helper() OSMemoryBarrier()
#define __TBB_acquire_consistency_helper() OSMemoryBarrier()
#define __TBB_release_consistency_helper() OSMemoryBarrier()
#define __TBB_full_memory_fence() OSMemoryBarrier()
static inline int32_t __TBB_machine_cmpswp4(volatile void *ptr, int32_t value, int32_t comparand)
{
__TBB_ASSERT( tbb::internal::is_aligned(ptr,4), "address not properly aligned for OS X* atomics");
int32_t* address = (int32_t*)ptr;
while( !OSAtomicCompareAndSwap32Barrier(comparand, value, address) ){
int32_t snapshot = *address;
if( snapshot!=comparand ) return snapshot;
}
return comparand;
}
static inline int32_t __TBB_machine_fetchadd4(volatile void *ptr, int32_t addend)
{
__TBB_ASSERT( tbb::internal::is_aligned(ptr,4), "address not properly aligned for OS X* atomics");
return OSAtomicAdd32Barrier(addend, (int32_t*)ptr) - addend;
}
static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t addend)
{
__TBB_ASSERT( tbb::internal::is_aligned(ptr,8), "address not properly aligned for OS X* atomics");
return OSAtomicAdd64Barrier(addend, (int64_t*)ptr) - addend;
}
#define __TBB_USE_GENERIC_PART_WORD_CAS 1
#define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 1
#define __TBB_USE_GENERIC_FETCH_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#if __TBB_WORDSIZE == 4
#define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1
#endif
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
#endif /* __TBB_UnknownArchitecture */

View File

@@ -0,0 +1,61 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_mic_common_H
#define __TBB_mic_common_H
#ifndef __TBB_machine_H
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#if ! __TBB_DEFINE_MIC
#error mic_common.h should be included only when building for Intel(R) Many Integrated Core Architecture
#endif
#ifndef __TBB_PREFETCHING
#define __TBB_PREFETCHING 1
#endif
#if __TBB_PREFETCHING
#include <immintrin.h>
#define __TBB_cl_prefetch(p) _mm_prefetch((const char*)p, _MM_HINT_T1)
#define __TBB_cl_evict(p) _mm_clevict(p, _MM_HINT_T1)
#endif
/** Intel(R) Many Integrated Core Architecture does not support mfence and pause instructions **/
#define __TBB_full_memory_fence() __asm__ __volatile__("lock; addl $0,(%%rsp)":::"memory")
#define __TBB_Pause(x) _mm_delay_32(16*(x))
#define __TBB_STEALING_PAUSE 1500/16
#include <sched.h>
#define __TBB_Yield() sched_yield()
// low-level timing intrinsic and its type
#define __TBB_machine_time_stamp() _rdtsc()
typedef uint64_t machine_tsc_t;
/** Specifics **/
#define __TBB_STEALING_ABORT_ON_CONTENTION 1
#define __TBB_YIELD2P 1
#define __TBB_HOARD_NONLOCAL_TASKS 1
#if ! ( __FreeBSD__ || __linux__ )
#error Intel(R) Many Integrated Core Compiler does not define __FreeBSD__ or __linux__ anymore. Check for the __TBB_XXX_BROKEN defined under __FreeBSD__ or __linux__.
#endif /* ! ( __FreeBSD__ || __linux__ ) */
#endif /* __TBB_mic_common_H */

View File

@@ -0,0 +1,171 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#if !defined(__TBB_machine_H) || defined(__TBB_msvc_armv7_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#define __TBB_msvc_armv7_H
#include <intrin.h>
#include <float.h>
#define __TBB_WORDSIZE 4
#define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
#if defined(TBB_WIN32_USE_CL_BUILTINS)
// We can test this on _M_IX86
#pragma intrinsic(_ReadWriteBarrier)
#pragma intrinsic(_mm_mfence)
#define __TBB_compiler_fence() _ReadWriteBarrier()
#define __TBB_full_memory_fence() _mm_mfence()
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
#define __TBB_release_consistency_helper() __TBB_compiler_fence()
#else
//Now __dmb(_ARM_BARRIER_SY) is used for both compiler and memory fences
//This might be changed later after testing
#define __TBB_compiler_fence() __dmb(_ARM_BARRIER_SY)
#define __TBB_full_memory_fence() __dmb(_ARM_BARRIER_SY)
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
#define __TBB_acquire_consistency_helper() __TBB_full_memory_fence()
#define __TBB_release_consistency_helper() __TBB_full_memory_fence()
#endif
//--------------------------------------------------
// Compare and swap
//--------------------------------------------------
/**
* Atomic CAS for 32 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
* @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
* @param value value to assign *ptr to if *ptr==comparand
* @param comparand value to compare with *ptr
* @return value originally in memory at ptr, regardless of success
*/
#define __TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(S,T,F) \
inline T __TBB_machine_cmpswp##S( volatile void *ptr, T value, T comparand ) { \
return _InterlockedCompareExchange##F(reinterpret_cast<volatile T *>(ptr),value,comparand); \
} \
#define __TBB_MACHINE_DEFINE_ATOMICS_FETCHADD(S,T,F) \
inline T __TBB_machine_fetchadd##S( volatile void *ptr, T value ) { \
return _InterlockedExchangeAdd##F(reinterpret_cast<volatile T *>(ptr),value); \
} \
__TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(1,char,8)
__TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(2,short,16)
__TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(4,long,)
__TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(8,__int64,64)
__TBB_MACHINE_DEFINE_ATOMICS_FETCHADD(4,long,)
#if defined(TBB_WIN32_USE_CL_BUILTINS)
// No _InterlockedExchangeAdd64 intrinsic on _M_IX86
#define __TBB_64BIT_ATOMICS 0
#else
__TBB_MACHINE_DEFINE_ATOMICS_FETCHADD(8,__int64,64)
#endif
inline void __TBB_machine_pause (int32_t delay )
{
while(delay>0)
{
__TBB_compiler_fence();
delay--;
}
}
// API to retrieve/update FPU control setting
#define __TBB_CPU_CTL_ENV_PRESENT 1
namespace tbb {
namespace internal {
template <typename T, size_t S>
struct machine_load_store_relaxed {
static inline T load ( const volatile T& location ) {
const T value = location;
/*
* An extra memory barrier is required for errata #761319
* Please see http://infocenter.arm.com/help/topic/com.arm.doc.uan0004a
*/
__TBB_acquire_consistency_helper();
return value;
}
static inline void store ( volatile T& location, T value ) {
location = value;
}
};
class cpu_ctl_env {
private:
unsigned int my_ctl;
public:
bool operator!=( const cpu_ctl_env& ctl ) const { return my_ctl != ctl.my_ctl; }
void get_env() { my_ctl = _control87(0, 0); }
void set_env() const { _control87( my_ctl, ~0U ); }
};
} // namespace internal
} // namespaces tbb
// Machine specific atomic operations
#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
#define __TBB_Pause(V) __TBB_machine_pause(V)
// Use generics for some things
#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 1
#define __TBB_USE_GENERIC_PART_WORD_FETCH_STORE 1
#define __TBB_USE_GENERIC_FETCH_STORE 1
#define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
#if defined(TBB_WIN32_USE_CL_BUILTINS)
#if !__TBB_WIN8UI_SUPPORT
extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
#define __TBB_Yield() SwitchToThread()
#else
#include<thread>
#define __TBB_Yield() std::this_thread::yield()
#endif
#else
#define __TBB_Yield() __yield()
#endif
// Machine specific atomic operations
#define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V)
#define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V)
template <typename T1,typename T2>
inline void __TBB_machine_OR( T1 *operand, T2 addend ) {
_InterlockedOr((long volatile *)operand, (long)addend);
}
template <typename T1,typename T2>
inline void __TBB_machine_AND( T1 *operand, T2 addend ) {
_InterlockedAnd((long volatile *)operand, (long)addend);
}

View File

@@ -0,0 +1,216 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_machine_msvc_ia32_common_H
#define __TBB_machine_msvc_ia32_common_H
#include <intrin.h>
//TODO: consider moving this macro to tbb_config.h and used there MSVC asm is used
#if !_M_X64 || __INTEL_COMPILER
#define __TBB_X86_MSVC_INLINE_ASM_AVAILABLE 1
#if _M_X64
#define __TBB_r(reg_name) r##reg_name
#else
#define __TBB_r(reg_name) e##reg_name
#endif
#else
//MSVC in x64 mode does not accept inline assembler
#define __TBB_X86_MSVC_INLINE_ASM_AVAILABLE 0
#endif
#define __TBB_NO_X86_MSVC_INLINE_ASM_MSG "The compiler being used is not supported (outdated?)"
#if (_MSC_VER >= 1300) || (__INTEL_COMPILER) //Use compiler intrinsic when available
#define __TBB_PAUSE_USE_INTRINSIC 1
#pragma intrinsic(_mm_pause)
namespace tbb { namespace internal { namespace intrinsics { namespace msvc {
static inline void __TBB_machine_pause (uintptr_t delay ) {
for (;delay>0; --delay )
_mm_pause();
}
}}}}
#else
#if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
#error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
#endif
namespace tbb { namespace internal { namespace inline_asm { namespace msvc {
static inline void __TBB_machine_pause (uintptr_t delay ) {
_asm
{
mov __TBB_r(ax), delay
__TBB_L1:
pause
add __TBB_r(ax), -1
jne __TBB_L1
}
return;
}
}}}}
#endif
static inline void __TBB_machine_pause (uintptr_t delay ){
#if __TBB_PAUSE_USE_INTRINSIC
tbb::internal::intrinsics::msvc::__TBB_machine_pause(delay);
#else
tbb::internal::inline_asm::msvc::__TBB_machine_pause(delay);
#endif
}
//TODO: move this function to windows_api.h or to place where it is used
#if (_MSC_VER<1400) && (!_WIN64) && (__TBB_X86_MSVC_INLINE_ASM_AVAILABLE)
static inline void* __TBB_machine_get_current_teb () {
void* pteb;
__asm mov eax, fs:[0x18]
__asm mov pteb, eax
return pteb;
}
#endif
#if ( _MSC_VER>=1400 && !defined(__INTEL_COMPILER) ) || (__INTEL_COMPILER>=1200)
// MSVC did not have this intrinsic prior to VC8.
// ICL 11.1 fails to compile a TBB example if __TBB_Log2 uses the intrinsic.
#define __TBB_LOG2_USE_BSR_INTRINSIC 1
#if _M_X64
#define __TBB_BSR_INTRINSIC _BitScanReverse64
#else
#define __TBB_BSR_INTRINSIC _BitScanReverse
#endif
#pragma intrinsic(__TBB_BSR_INTRINSIC)
namespace tbb { namespace internal { namespace intrinsics { namespace msvc {
inline uintptr_t __TBB_machine_lg( uintptr_t i ){
unsigned long j;
__TBB_BSR_INTRINSIC( &j, i );
return j;
}
}}}}
#else
#if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
#error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
#endif
namespace tbb { namespace internal { namespace inline_asm { namespace msvc {
inline uintptr_t __TBB_machine_lg( uintptr_t i ){
uintptr_t j;
__asm
{
bsr __TBB_r(ax), i
mov j, __TBB_r(ax)
}
return j;
}
}}}}
#endif
static inline intptr_t __TBB_machine_lg( uintptr_t i ) {
#if __TBB_LOG2_USE_BSR_INTRINSIC
return tbb::internal::intrinsics::msvc::__TBB_machine_lg(i);
#else
return tbb::internal::inline_asm::msvc::__TBB_machine_lg(i);
#endif
}
// API to retrieve/update FPU control setting
#define __TBB_CPU_CTL_ENV_PRESENT 1
namespace tbb { namespace internal { class cpu_ctl_env; } }
#if __TBB_X86_MSVC_INLINE_ASM_AVAILABLE
inline void __TBB_get_cpu_ctl_env ( tbb::internal::cpu_ctl_env* ctl ) {
__asm {
__asm mov __TBB_r(ax), ctl
__asm stmxcsr [__TBB_r(ax)]
__asm fstcw [__TBB_r(ax)+4]
}
}
inline void __TBB_set_cpu_ctl_env ( const tbb::internal::cpu_ctl_env* ctl ) {
__asm {
__asm mov __TBB_r(ax), ctl
__asm ldmxcsr [__TBB_r(ax)]
__asm fldcw [__TBB_r(ax)+4]
}
}
#else
extern "C" {
void __TBB_EXPORTED_FUNC __TBB_get_cpu_ctl_env ( tbb::internal::cpu_ctl_env* );
void __TBB_EXPORTED_FUNC __TBB_set_cpu_ctl_env ( const tbb::internal::cpu_ctl_env* );
}
#endif
namespace tbb {
namespace internal {
class cpu_ctl_env {
private:
int mxcsr;
short x87cw;
static const int MXCSR_CONTROL_MASK = ~0x3f; /* all except last six status bits */
public:
bool operator!=( const cpu_ctl_env& ctl ) const { return mxcsr != ctl.mxcsr || x87cw != ctl.x87cw; }
void get_env() {
__TBB_get_cpu_ctl_env( this );
mxcsr &= MXCSR_CONTROL_MASK;
}
void set_env() const { __TBB_set_cpu_ctl_env( this ); }
};
} // namespace internal
} // namespace tbb
#if !__TBB_WIN8UI_SUPPORT
extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
#define __TBB_Yield() SwitchToThread()
#else
#include<thread>
#define __TBB_Yield() std::this_thread::yield()
#endif
#define __TBB_Pause(V) __TBB_machine_pause(V)
#define __TBB_Log2(V) __TBB_machine_lg(V)
#undef __TBB_r
extern "C" {
__int8 __TBB_EXPORTED_FUNC __TBB_machine_try_lock_elided (volatile void* ptr);
void __TBB_EXPORTED_FUNC __TBB_machine_unlock_elided (volatile void* ptr);
// 'pause' instruction aborts HLE/RTM transactions
#if __TBB_PAUSE_USE_INTRINSIC
inline static void __TBB_machine_try_lock_elided_cancel() { _mm_pause(); }
#else
inline static void __TBB_machine_try_lock_elided_cancel() { _asm pause; }
#endif
#if __TBB_TSX_INTRINSICS_PRESENT
#define __TBB_machine_is_in_transaction _xtest
#define __TBB_machine_begin_transaction _xbegin
#define __TBB_machine_end_transaction _xend
// The value (0xFF) below comes from the
// Intel(R) 64 and IA-32 Architectures Optimization Reference Manual 12.4.5 lock not free
#define __TBB_machine_transaction_conflict_abort() _xabort(0xFF)
#else
__int8 __TBB_EXPORTED_FUNC __TBB_machine_is_in_transaction();
unsigned __int32 __TBB_EXPORTED_FUNC __TBB_machine_begin_transaction();
void __TBB_EXPORTED_FUNC __TBB_machine_end_transaction();
void __TBB_EXPORTED_FUNC __TBB_machine_transaction_conflict_abort();
#endif /* __TBB_TSX_INTRINSICS_PRESENT */
}
#endif /* __TBB_machine_msvc_ia32_common_H */

View File

@@ -0,0 +1,203 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#if !defined(__TBB_machine_H) || defined(__TBB_machine_sunos_sparc_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#define __TBB_machine_sunos_sparc_H
#include <stdint.h>
#include <unistd.h>
#define __TBB_WORDSIZE 8
// Big endian is assumed for SPARC.
// While hardware may support page-specific bi-endianness, only big endian pages may be exposed to TBB
#define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
/** To those working on SPARC hardware. Consider relaxing acquire and release
consistency helpers to no-op (as this port covers TSO mode only). **/
#define __TBB_compiler_fence() __asm__ __volatile__ ("": : :"memory")
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
#define __TBB_release_consistency_helper() __TBB_compiler_fence()
#define __TBB_full_memory_fence() __asm__ __volatile__("membar #LoadLoad|#LoadStore|#StoreStore|#StoreLoad": : : "memory")
//--------------------------------------------------
// Compare and swap
//--------------------------------------------------
/**
* Atomic CAS for 32 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
* @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
* @param value value to assign *ptr to if *ptr==comparand
* @param comparand value to compare with *ptr
( @return value originally in memory at ptr, regardless of success
*/
static inline int32_t __TBB_machine_cmpswp4(volatile void *ptr, int32_t value, int32_t comparand ){
int32_t result;
__asm__ __volatile__(
"cas\t[%5],%4,%1"
: "=m"(*(int32_t *)ptr), "=r"(result)
: "m"(*(int32_t *)ptr), "1"(value), "r"(comparand), "r"(ptr)
: "memory");
return result;
}
/**
* Atomic CAS for 64 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
* @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
* @param value value to assign *ptr to if *ptr==comparand
* @param comparand value to compare with *ptr
( @return value originally in memory at ptr, regardless of success
*/
static inline int64_t __TBB_machine_cmpswp8(volatile void *ptr, int64_t value, int64_t comparand ){
int64_t result;
__asm__ __volatile__(
"casx\t[%5],%4,%1"
: "=m"(*(int64_t *)ptr), "=r"(result)
: "m"(*(int64_t *)ptr), "1"(value), "r"(comparand), "r"(ptr)
: "memory");
return result;
}
//---------------------------------------------------
// Fetch and add
//---------------------------------------------------
/**
* Atomic fetch and add for 32 bit values, in this case implemented by continuously checking success of atomicity
* @param ptr pointer to value to add addend to
* @param addened value to add to *ptr
* @return value at ptr before addened was added
*/
static inline int32_t __TBB_machine_fetchadd4(volatile void *ptr, int32_t addend){
int32_t result;
__asm__ __volatile__ (
"0:\t add\t %3, %4, %0\n" // do addition
"\t cas\t [%2], %3, %0\n" // cas to store result in memory
"\t cmp\t %3, %0\n" // check if value from memory is original
"\t bne,a,pn\t %%icc, 0b\n" // if not try again
"\t mov %0, %3\n" // use branch delay slot to move new value in memory to be added
: "=&r"(result), "=m"(*(int32_t *)ptr)
: "r"(ptr), "r"(*(int32_t *)ptr), "r"(addend), "m"(*(int32_t *)ptr)
: "ccr", "memory");
return result;
}
/**
* Atomic fetch and add for 64 bit values, in this case implemented by continuously checking success of atomicity
* @param ptr pointer to value to add addend to
* @param addened value to add to *ptr
* @return value at ptr before addened was added
*/
static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t addend){
int64_t result;
__asm__ __volatile__ (
"0:\t add\t %3, %4, %0\n" // do addition
"\t casx\t [%2], %3, %0\n" // cas to store result in memory
"\t cmp\t %3, %0\n" // check if value from memory is original
"\t bne,a,pn\t %%xcc, 0b\n" // if not try again
"\t mov %0, %3\n" // use branch delay slot to move new value in memory to be added
: "=&r"(result), "=m"(*(int64_t *)ptr)
: "r"(ptr), "r"(*(int64_t *)ptr), "r"(addend), "m"(*(int64_t *)ptr)
: "ccr", "memory");
return result;
}
//--------------------------------------------------------
// Logarithm (base two, integer)
//--------------------------------------------------------
static inline int64_t __TBB_machine_lg( uint64_t x ) {
__TBB_ASSERT(x, "__TBB_Log2(0) undefined");
uint64_t count;
// one hot encode
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
x |= (x >> 32);
// count 1's
__asm__ ("popc %1, %0" : "=r"(count) : "r"(x) );
return count-1;
}
//--------------------------------------------------------
static inline void __TBB_machine_or( volatile void *ptr, uint64_t value ) {
__asm__ __volatile__ (
"0:\t or\t %2, %3, %%g1\n" // do operation
"\t casx\t [%1], %2, %%g1\n" // cas to store result in memory
"\t cmp\t %2, %%g1\n" // check if value from memory is original
"\t bne,a,pn\t %%xcc, 0b\n" // if not try again
"\t mov %%g1, %2\n" // use branch delay slot to move new value in memory to be added
: "=m"(*(int64_t *)ptr)
: "r"(ptr), "r"(*(int64_t *)ptr), "r"(value), "m"(*(int64_t *)ptr)
: "ccr", "g1", "memory");
}
static inline void __TBB_machine_and( volatile void *ptr, uint64_t value ) {
__asm__ __volatile__ (
"0:\t and\t %2, %3, %%g1\n" // do operation
"\t casx\t [%1], %2, %%g1\n" // cas to store result in memory
"\t cmp\t %2, %%g1\n" // check if value from memory is original
"\t bne,a,pn\t %%xcc, 0b\n" // if not try again
"\t mov %%g1, %2\n" // use branch delay slot to move new value in memory to be added
: "=m"(*(int64_t *)ptr)
: "r"(ptr), "r"(*(int64_t *)ptr), "r"(value), "m"(*(int64_t *)ptr)
: "ccr", "g1", "memory");
}
static inline void __TBB_machine_pause( int32_t delay ) {
// do nothing, inlined, doesn't matter
}
// put 0xff in memory location, return memory value,
// generic trylockbyte puts 0x01, however this is fine
// because all that matters is that 0 is unlocked
static inline bool __TBB_machine_trylockbyte(unsigned char &flag){
unsigned char result;
__asm__ __volatile__ (
"ldstub\t [%2], %0\n"
: "=r"(result), "=m"(flag)
: "r"(&flag), "m"(flag)
: "memory");
return result == 0;
}
#define __TBB_USE_GENERIC_PART_WORD_CAS 1
#define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 1
#define __TBB_USE_GENERIC_FETCH_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
// Definition of other functions
#define __TBB_Pause(V) __TBB_machine_pause(V)
#define __TBB_Log2(V) __TBB_machine_lg(V)
#define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)

View File

@@ -0,0 +1,79 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_machine_windows_api_H
#define __TBB_machine_windows_api_H
#if _WIN32 || _WIN64
#if _XBOX
#define NONET
#define NOD3D
#include <xtl.h>
#else // Assume "usual" Windows
#include <windows.h>
#endif // _XBOX
#if _WIN32_WINNT < 0x0600
// The following Windows API function is declared explicitly;
// otherwise it fails to compile by VS2005.
#if !defined(WINBASEAPI) || (_WIN32_WINNT < 0x0501 && _MSC_VER == 1400)
#define __TBB_WINBASEAPI extern "C"
#else
#define __TBB_WINBASEAPI WINBASEAPI
#endif
__TBB_WINBASEAPI BOOL WINAPI TryEnterCriticalSection( LPCRITICAL_SECTION );
__TBB_WINBASEAPI BOOL WINAPI InitializeCriticalSectionAndSpinCount( LPCRITICAL_SECTION, DWORD );
// Overloading WINBASEAPI macro and using local functions missing in Windows XP/2003
#define InitializeCriticalSectionEx inlineInitializeCriticalSectionEx
#define CreateSemaphoreEx inlineCreateSemaphoreEx
#define CreateEventEx inlineCreateEventEx
inline BOOL WINAPI inlineInitializeCriticalSectionEx( LPCRITICAL_SECTION lpCriticalSection, DWORD dwSpinCount, DWORD )
{
return InitializeCriticalSectionAndSpinCount( lpCriticalSection, dwSpinCount );
}
inline HANDLE WINAPI inlineCreateSemaphoreEx( LPSECURITY_ATTRIBUTES lpSemaphoreAttributes, LONG lInitialCount, LONG lMaximumCount, LPCTSTR lpName, DWORD, DWORD )
{
return CreateSemaphore( lpSemaphoreAttributes, lInitialCount, lMaximumCount, lpName );
}
inline HANDLE WINAPI inlineCreateEventEx( LPSECURITY_ATTRIBUTES lpEventAttributes, LPCTSTR lpName, DWORD dwFlags, DWORD )
{
BOOL manual_reset = dwFlags&0x00000001 ? TRUE : FALSE; // CREATE_EVENT_MANUAL_RESET
BOOL initial_set = dwFlags&0x00000002 ? TRUE : FALSE; // CREATE_EVENT_INITIAL_SET
return CreateEvent( lpEventAttributes, manual_reset, initial_set, lpName );
}
#endif
#if defined(RTL_SRWLOCK_INIT)
#ifndef __TBB_USE_SRWLOCK
// TODO: turn it on when bug 1952 will be fixed
#define __TBB_USE_SRWLOCK 0
#endif
#endif
#else
#error tbb/machine/windows_api.h should only be used for Windows based platforms
#endif // _WIN32 || _WIN64
#endif // __TBB_machine_windows_api_H

View File

@@ -0,0 +1,144 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#if !defined(__TBB_machine_H) || defined(__TBB_machine_windows_ia32_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#define __TBB_machine_windows_ia32_H
#include "msvc_ia32_common.h"
#define __TBB_WORDSIZE 4
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
#if __INTEL_COMPILER && (__INTEL_COMPILER < 1100)
#define __TBB_compiler_fence() __asm { __asm nop }
#define __TBB_full_memory_fence() __asm { __asm mfence }
#elif _MSC_VER >= 1300 || __INTEL_COMPILER
#pragma intrinsic(_ReadWriteBarrier)
#pragma intrinsic(_mm_mfence)
#define __TBB_compiler_fence() _ReadWriteBarrier()
#define __TBB_full_memory_fence() _mm_mfence()
#else
#error Unsupported compiler - need to define __TBB_{control,acquire,release}_consistency_helper to support it
#endif
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
#define __TBB_release_consistency_helper() __TBB_compiler_fence()
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
// Workaround for overzealous compiler warnings in /Wp64 mode
#pragma warning (push)
#pragma warning (disable: 4244 4267)
#endif
extern "C" {
__int64 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp8 (volatile void *ptr, __int64 value, __int64 comparand );
__int64 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd8 (volatile void *ptr, __int64 addend );
__int64 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore8 (volatile void *ptr, __int64 value );
void __TBB_EXPORTED_FUNC __TBB_machine_store8 (volatile void *ptr, __int64 value );
__int64 __TBB_EXPORTED_FUNC __TBB_machine_load8 (const volatile void *ptr);
}
//TODO: use _InterlockedXXX intrinsics as they available since VC 2005
#define __TBB_MACHINE_DEFINE_ATOMICS(S,T,U,A,C) \
static inline T __TBB_machine_cmpswp##S ( volatile void * ptr, U value, U comparand ) { \
T result; \
volatile T *p = (T *)ptr; \
__asm \
{ \
__asm mov edx, p \
__asm mov C , value \
__asm mov A , comparand \
__asm lock cmpxchg [edx], C \
__asm mov result, A \
} \
return result; \
} \
\
static inline T __TBB_machine_fetchadd##S ( volatile void * ptr, U addend ) { \
T result; \
volatile T *p = (T *)ptr; \
__asm \
{ \
__asm mov edx, p \
__asm mov A, addend \
__asm lock xadd [edx], A \
__asm mov result, A \
} \
return result; \
}\
\
static inline T __TBB_machine_fetchstore##S ( volatile void * ptr, U value ) { \
T result; \
volatile T *p = (T *)ptr; \
__asm \
{ \
__asm mov edx, p \
__asm mov A, value \
__asm lock xchg [edx], A \
__asm mov result, A \
} \
return result; \
}
__TBB_MACHINE_DEFINE_ATOMICS(1, __int8, __int8, al, cl)
__TBB_MACHINE_DEFINE_ATOMICS(2, __int16, __int16, ax, cx)
__TBB_MACHINE_DEFINE_ATOMICS(4, ptrdiff_t, ptrdiff_t, eax, ecx)
#undef __TBB_MACHINE_DEFINE_ATOMICS
static inline void __TBB_machine_OR( volatile void *operand, __int32 addend ) {
__asm
{
mov eax, addend
mov edx, [operand]
lock or [edx], eax
}
}
static inline void __TBB_machine_AND( volatile void *operand, __int32 addend ) {
__asm
{
mov eax, addend
mov edx, [operand]
lock and [edx], eax
}
}
#define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V)
#define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V)
//TODO: Check if it possible and profitable for IA-32 architecture on (Linux and Windows)
//to use of 64-bit load/store via floating point registers together with full fence
//for sequentially consistent load/store, instead of CAS.
#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
#pragma warning (pop)
#endif // warnings 4244, 4267 are back

View File

@@ -0,0 +1,105 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#if !defined(__TBB_machine_H) || defined(__TBB_machine_windows_intel64_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#define __TBB_machine_windows_intel64_H
#define __TBB_WORDSIZE 8
#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
#include <intrin.h>
#include "msvc_ia32_common.h"
//TODO: Use _InterlockedXXX16 intrinsics for 2 byte operations
#if !__INTEL_COMPILER
#pragma intrinsic(_InterlockedOr64)
#pragma intrinsic(_InterlockedAnd64)
#pragma intrinsic(_InterlockedCompareExchange)
#pragma intrinsic(_InterlockedCompareExchange64)
#pragma intrinsic(_InterlockedExchangeAdd)
#pragma intrinsic(_InterlockedExchangeAdd64)
#pragma intrinsic(_InterlockedExchange)
#pragma intrinsic(_InterlockedExchange64)
#endif /* !(__INTEL_COMPILER) */
#if __INTEL_COMPILER && (__INTEL_COMPILER < 1100)
#define __TBB_compiler_fence() __asm { __asm nop }
#define __TBB_full_memory_fence() __asm { __asm mfence }
#elif _MSC_VER >= 1300 || __INTEL_COMPILER
#pragma intrinsic(_ReadWriteBarrier)
#pragma intrinsic(_mm_mfence)
#define __TBB_compiler_fence() _ReadWriteBarrier()
#define __TBB_full_memory_fence() _mm_mfence()
#endif
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
#define __TBB_release_consistency_helper() __TBB_compiler_fence()
// ATTENTION: if you ever change argument types in machine-specific primitives,
// please take care of atomic_word<> specializations in tbb/atomic.h
extern "C" {
__int8 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp1 (volatile void *ptr, __int8 value, __int8 comparand );
__int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd1 (volatile void *ptr, __int8 addend );
__int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore1 (volatile void *ptr, __int8 value );
__int16 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp2 (volatile void *ptr, __int16 value, __int16 comparand );
__int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd2 (volatile void *ptr, __int16 addend );
__int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore2 (volatile void *ptr, __int16 value );
}
inline long __TBB_machine_cmpswp4 (volatile void *ptr, __int32 value, __int32 comparand ) {
return _InterlockedCompareExchange( (long*)ptr, value, comparand );
}
inline long __TBB_machine_fetchadd4 (volatile void *ptr, __int32 addend ) {
return _InterlockedExchangeAdd( (long*)ptr, addend );
}
inline long __TBB_machine_fetchstore4 (volatile void *ptr, __int32 value ) {
return _InterlockedExchange( (long*)ptr, value );
}
inline __int64 __TBB_machine_cmpswp8 (volatile void *ptr, __int64 value, __int64 comparand ) {
return _InterlockedCompareExchange64( (__int64*)ptr, value, comparand );
}
inline __int64 __TBB_machine_fetchadd8 (volatile void *ptr, __int64 addend ) {
return _InterlockedExchangeAdd64( (__int64*)ptr, addend );
}
inline __int64 __TBB_machine_fetchstore8 (volatile void *ptr, __int64 value ) {
return _InterlockedExchange64( (__int64*)ptr, value );
}
#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
inline void __TBB_machine_OR( volatile void *operand, intptr_t addend ) {
_InterlockedOr64((__int64*)operand, addend);
}
inline void __TBB_machine_AND( volatile void *operand, intptr_t addend ) {
_InterlockedAnd64((__int64*)operand, addend);
}
#define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V)
#define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V)

View File

@@ -0,0 +1,119 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
// TODO: revise by comparing with mac_ppc.h
#if !defined(__TBB_machine_H) || defined(__TBB_machine_xbox360_ppc_H)
#error Do not #include this internal file directly; use public TBB headers instead.
#endif
#define __TBB_machine_xbox360_ppc_H
#define NONET
#define NOD3D
#include "xtl.h"
#include "ppcintrinsics.h"
#if _MSC_VER >= 1300
extern "C" void _MemoryBarrier();
#pragma intrinsic(_MemoryBarrier)
#define __TBB_control_consistency_helper() __isync()
#define __TBB_acquire_consistency_helper() _MemoryBarrier()
#define __TBB_release_consistency_helper() _MemoryBarrier()
#endif
#define __TBB_full_memory_fence() __sync()
#define __TBB_WORDSIZE 4
#define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
//todo: define __TBB_USE_FENCED_ATOMICS and define acquire/release primitives to maximize performance
inline __int32 __TBB_machine_cmpswp4(volatile void *ptr, __int32 value, __int32 comparand ) {
__sync();
__int32 result = InterlockedCompareExchange((volatile LONG*)ptr, value, comparand);
__isync();
return result;
}
inline __int64 __TBB_machine_cmpswp8(volatile void *ptr, __int64 value, __int64 comparand )
{
__sync();
__int64 result = InterlockedCompareExchange64((volatile LONG64*)ptr, value, comparand);
__isync();
return result;
}
#define __TBB_USE_GENERIC_PART_WORD_CAS 1
#define __TBB_USE_GENERIC_FETCH_ADD 1
#define __TBB_USE_GENERIC_FETCH_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
#pragma optimize( "", off )
inline void __TBB_machine_pause (__int32 delay )
{
for (__int32 i=0; i<delay; i++) {;};
}
#pragma optimize( "", on )
#define __TBB_Yield() Sleep(0)
#define __TBB_Pause(V) __TBB_machine_pause(V)
// This port uses only 2 hardware threads for TBB on XBOX 360.
// Others are left to sound etc.
// Change the following mask to allow TBB use more HW threads.
static const int __TBB_XBOX360_HARDWARE_THREAD_MASK = 0x0C;
static inline int __TBB_XBOX360_DetectNumberOfWorkers()
{
char a[__TBB_XBOX360_HARDWARE_THREAD_MASK]; //compile time assert - at least one bit should be set always
a[0]=0;
return ((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 0) & 1) +
((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 1) & 1) +
((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 2) & 1) +
((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 3) & 1) +
((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 4) & 1) +
((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 5) & 1) + 1; // +1 accomodates for the master thread
}
static inline int __TBB_XBOX360_GetHardwareThreadIndex(int workerThreadIndex)
{
workerThreadIndex %= __TBB_XBOX360_DetectNumberOfWorkers()-1;
int m = __TBB_XBOX360_HARDWARE_THREAD_MASK;
int index = 0;
int skipcount = workerThreadIndex;
while (true)
{
if ((m & 1)!=0)
{
if (skipcount==0) break;
skipcount--;
}
m >>= 1;
index++;
}
return index;
}
#define __TBB_HardwareConcurrency() __TBB_XBOX360_DetectNumberOfWorkers()

View File

@@ -0,0 +1,269 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_memory_pool_H
#define __TBB_memory_pool_H
#if !TBB_PREVIEW_MEMORY_POOL
#error Set TBB_PREVIEW_MEMORY_POOL to include memory_pool.h
#endif
/** @file */
#include "scalable_allocator.h"
#include <new> // std::bad_alloc
#if __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
#include <utility> // std::forward
#endif
#if __TBB_EXTRA_DEBUG
#define __TBBMALLOC_ASSERT ASSERT
#else
#define __TBBMALLOC_ASSERT(a,b) ((void)0)
#endif
namespace tbb {
namespace interface6 {
//! @cond INTERNAL
namespace internal {
//! Base of thread-safe pool allocator for variable-size requests
class pool_base : tbb::internal::no_copy {
// Pool interface is separate from standard allocator classes because it has
// to maintain internal state, no copy or assignment. Move and swap are possible.
public:
//! Reset pool to reuse its memory (free all objects at once)
void recycle() { rml::pool_reset(my_pool); }
//! The "malloc" analogue to allocate block of memory of size bytes
void *malloc(size_t size) { return rml::pool_malloc(my_pool, size); }
//! The "free" analogue to discard a previously allocated piece of memory.
void free(void* ptr) { rml::pool_free(my_pool, ptr); }
//! The "realloc" analogue complementing pool_malloc.
// Enables some low-level optimization possibilities
void *realloc(void* ptr, size_t size) {
return rml::pool_realloc(my_pool, ptr, size);
}
protected:
//! destroy pool - must be called in a child class
void destroy() { rml::pool_destroy(my_pool); }
rml::MemoryPool *my_pool;
};
} // namespace internal
//! @endcond
#if _MSC_VER && !defined(__INTEL_COMPILER)
// Workaround for erroneous "unreferenced parameter" warning in method destroy.
#pragma warning (push)
#pragma warning (disable: 4100)
#endif
//! Meets "allocator" requirements of ISO C++ Standard, Section 20.1.5
/** @ingroup memory_allocation */
template<typename T, typename P = internal::pool_base>
class memory_pool_allocator {
protected:
typedef P pool_type;
pool_type *my_pool;
template<typename U, typename R>
friend class memory_pool_allocator;
template<typename V, typename U, typename R>
friend bool operator==( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b);
template<typename V, typename U, typename R>
friend bool operator!=( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b);
public:
typedef typename tbb::internal::allocator_type<T>::value_type value_type;
typedef value_type* pointer;
typedef const value_type* const_pointer;
typedef value_type& reference;
typedef const value_type& const_reference;
typedef size_t size_type;
typedef ptrdiff_t difference_type;
template<typename U> struct rebind {
typedef memory_pool_allocator<U, P> other;
};
memory_pool_allocator(pool_type &pool) throw() : my_pool(&pool) {}
memory_pool_allocator(const memory_pool_allocator& src) throw() : my_pool(src.my_pool) {}
template<typename U>
memory_pool_allocator(const memory_pool_allocator<U,P>& src) throw() : my_pool(src.my_pool) {}
pointer address(reference x) const { return &x; }
const_pointer address(const_reference x) const { return &x; }
//! Allocate space for n objects.
pointer allocate( size_type n, const void* /*hint*/ = 0) {
return static_cast<pointer>( my_pool->malloc( n*sizeof(value_type) ) );
}
//! Free previously allocated block of memory.
void deallocate( pointer p, size_type ) {
my_pool->free(p);
}
//! Largest value for which method allocate might succeed.
size_type max_size() const throw() {
size_type max = static_cast<size_type>(-1) / sizeof (value_type);
return (max > 0 ? max : 1);
}
//! Copy-construct value at location pointed to by p.
#if __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
template<typename U, typename... Args>
void construct(U *p, Args&&... args)
{ ::new((void *)p) U(std::forward<Args>(args)...); }
#else // __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
#if __TBB_CPP11_RVALUE_REF_PRESENT
void construct( pointer p, value_type&& value ) {::new((void*)(p)) value_type(std::move(value));}
#endif
void construct( pointer p, const value_type& value ) { ::new((void*)(p)) value_type(value); }
#endif // __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
//! Destroy value at location pointed to by p.
void destroy( pointer p ) { p->~value_type(); }
};
#if _MSC_VER && !defined(__INTEL_COMPILER)
#pragma warning (pop)
#endif // warning 4100 is back
//! Analogous to std::allocator<void>, as defined in ISO C++ Standard, Section 20.4.1
/** @ingroup memory_allocation */
template<typename P>
class memory_pool_allocator<void, P> {
public:
typedef P pool_type;
typedef void* pointer;
typedef const void* const_pointer;
typedef void value_type;
template<typename U> struct rebind {
typedef memory_pool_allocator<U, P> other;
};
memory_pool_allocator( pool_type &pool) throw() : my_pool(&pool) {}
memory_pool_allocator( const memory_pool_allocator& src) throw() : my_pool(src.my_pool) {}
template<typename U>
memory_pool_allocator(const memory_pool_allocator<U,P>& src) throw() : my_pool(src.my_pool) {}
protected:
pool_type *my_pool;
template<typename U, typename R>
friend class memory_pool_allocator;
template<typename V, typename U, typename R>
friend bool operator==( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b);
template<typename V, typename U, typename R>
friend bool operator!=( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b);
};
template<typename T, typename U, typename P>
inline bool operator==( const memory_pool_allocator<T,P>& a, const memory_pool_allocator<U,P>& b) {return a.my_pool==b.my_pool;}
template<typename T, typename U, typename P>
inline bool operator!=( const memory_pool_allocator<T,P>& a, const memory_pool_allocator<U,P>& b) {return a.my_pool!=b.my_pool;}
//! Thread-safe growable pool allocator for variable-size requests
template <typename Alloc>
class memory_pool : public internal::pool_base {
Alloc my_alloc; // TODO: base-class optimization
static void *allocate_request(intptr_t pool_id, size_t & bytes);
static int deallocate_request(intptr_t pool_id, void*, size_t raw_bytes);
public:
//! construct pool with underlying allocator
memory_pool(const Alloc &src = Alloc());
//! destroy pool
~memory_pool() { destroy(); } // call the callbacks first and destroy my_alloc latter
};
class fixed_pool : public internal::pool_base {
void *my_buffer;
size_t my_size;
inline static void *allocate_request(intptr_t pool_id, size_t & bytes);
public:
//! construct pool with underlying allocator
inline fixed_pool(void *buf, size_t size);
//! destroy pool
~fixed_pool() { destroy(); }
};
//////////////// Implementation ///////////////
template <typename Alloc>
memory_pool<Alloc>::memory_pool(const Alloc &src) : my_alloc(src) {
rml::MemPoolPolicy args(allocate_request, deallocate_request,
sizeof(typename Alloc::value_type));
rml::MemPoolError res = rml::pool_create_v1(intptr_t(this), &args, &my_pool);
if( res!=rml::POOL_OK ) __TBB_THROW(std::bad_alloc());
}
template <typename Alloc>
void *memory_pool<Alloc>::allocate_request(intptr_t pool_id, size_t & bytes) {
memory_pool<Alloc> &self = *reinterpret_cast<memory_pool<Alloc>*>(pool_id);
const size_t unit_size = sizeof(typename Alloc::value_type);
__TBBMALLOC_ASSERT( 0 == bytes%unit_size, NULL);
void *ptr;
__TBB_TRY { ptr = self.my_alloc.allocate( bytes/unit_size ); }
__TBB_CATCH(...) { return 0; }
return ptr;
}
#if __TBB_MSVC_UNREACHABLE_CODE_IGNORED
// Workaround for erroneous "unreachable code" warning in the template below.
// Specific for VC++ 17-18 compiler
#pragma warning (push)
#pragma warning (disable: 4702)
#endif
template <typename Alloc>
int memory_pool<Alloc>::deallocate_request(intptr_t pool_id, void* raw_ptr, size_t raw_bytes) {
memory_pool<Alloc> &self = *reinterpret_cast<memory_pool<Alloc>*>(pool_id);
const size_t unit_size = sizeof(typename Alloc::value_type);
__TBBMALLOC_ASSERT( 0 == raw_bytes%unit_size, NULL);
self.my_alloc.deallocate( static_cast<typename Alloc::value_type*>(raw_ptr), raw_bytes/unit_size );
return 0;
}
#if __TBB_MSVC_UNREACHABLE_CODE_IGNORED
#pragma warning (pop)
#endif
inline fixed_pool::fixed_pool(void *buf, size_t size) : my_buffer(buf), my_size(size) {
if( !buf || !size ) __TBB_THROW(std::bad_alloc());
rml::MemPoolPolicy args(allocate_request, 0, size, /*fixedPool=*/true);
rml::MemPoolError res = rml::pool_create_v1(intptr_t(this), &args, &my_pool);
if( res!=rml::POOL_OK ) __TBB_THROW(std::bad_alloc());
}
inline void *fixed_pool::allocate_request(intptr_t pool_id, size_t & bytes) {
fixed_pool &self = *reinterpret_cast<fixed_pool*>(pool_id);
__TBBMALLOC_ASSERT(0 != self.my_size, "The buffer must not be used twice.");
bytes = self.my_size;
self.my_size = 0; // remember that buffer has been used
return self.my_buffer;
}
} //namespace interface6
using interface6::memory_pool_allocator;
using interface6::memory_pool;
using interface6::fixed_pool;
} //namespace tbb
#undef __TBBMALLOC_ASSERT
#endif// __TBB_memory_pool_H

234
Research/inc/tbb/mutex.h Normal file
View File

@@ -0,0 +1,234 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_mutex_H
#define __TBB_mutex_H
#if _WIN32||_WIN64
#include "machine/windows_api.h"
#else
#include <pthread.h>
#endif /* _WIN32||_WIN64 */
#include <new>
#include "aligned_space.h"
#include "tbb_stddef.h"
#include "tbb_profiling.h"
namespace tbb {
//! Wrapper around the platform's native reader-writer lock.
/** For testing purposes only.
@ingroup synchronization */
class mutex : internal::mutex_copy_deprecated_and_disabled {
public:
//! Construct unacquired mutex.
mutex() {
#if TBB_USE_ASSERT || TBB_USE_THREADING_TOOLS
internal_construct();
#else
#if _WIN32||_WIN64
InitializeCriticalSectionEx(&impl, 4000, 0);
#else
int error_code = pthread_mutex_init(&impl,NULL);
if( error_code )
tbb::internal::handle_perror(error_code,"mutex: pthread_mutex_init failed");
#endif /* _WIN32||_WIN64*/
#endif /* TBB_USE_ASSERT */
};
~mutex() {
#if TBB_USE_ASSERT
internal_destroy();
#else
#if _WIN32||_WIN64
DeleteCriticalSection(&impl);
#else
pthread_mutex_destroy(&impl);
#endif /* _WIN32||_WIN64 */
#endif /* TBB_USE_ASSERT */
};
class scoped_lock;
friend class scoped_lock;
//! The scoped locking pattern
/** It helps to avoid the common problem of forgetting to release lock.
It also nicely provides the "node" for queuing locks. */
class scoped_lock : internal::no_copy {
public:
//! Construct lock that has not acquired a mutex.
scoped_lock() : my_mutex(NULL) {};
//! Acquire lock on given mutex.
scoped_lock( mutex& mutex ) {
acquire( mutex );
}
//! Release lock (if lock is held).
~scoped_lock() {
if( my_mutex )
release();
}
//! Acquire lock on given mutex.
void acquire( mutex& mutex ) {
#if TBB_USE_ASSERT
internal_acquire(mutex);
#else
mutex.lock();
my_mutex = &mutex;
#endif /* TBB_USE_ASSERT */
}
//! Try acquire lock on given mutex.
bool try_acquire( mutex& mutex ) {
#if TBB_USE_ASSERT
return internal_try_acquire (mutex);
#else
bool result = mutex.try_lock();
if( result )
my_mutex = &mutex;
return result;
#endif /* TBB_USE_ASSERT */
}
//! Release lock
void release() {
#if TBB_USE_ASSERT
internal_release ();
#else
my_mutex->unlock();
my_mutex = NULL;
#endif /* TBB_USE_ASSERT */
}
private:
//! The pointer to the current mutex to work
mutex* my_mutex;
//! All checks from acquire using mutex.state were moved here
void __TBB_EXPORTED_METHOD internal_acquire( mutex& m );
//! All checks from try_acquire using mutex.state were moved here
bool __TBB_EXPORTED_METHOD internal_try_acquire( mutex& m );
//! All checks from release using mutex.state were moved here
void __TBB_EXPORTED_METHOD internal_release();
friend class mutex;
};
// Mutex traits
static const bool is_rw_mutex = false;
static const bool is_recursive_mutex = false;
static const bool is_fair_mutex = false;
// ISO C++0x compatibility methods
//! Acquire lock
void lock() {
#if TBB_USE_ASSERT
aligned_space<scoped_lock> tmp;
new(tmp.begin()) scoped_lock(*this);
#else
#if _WIN32||_WIN64
EnterCriticalSection(&impl);
#else
int error_code = pthread_mutex_lock(&impl);
if( error_code )
tbb::internal::handle_perror(error_code,"mutex: pthread_mutex_lock failed");
#endif /* _WIN32||_WIN64 */
#endif /* TBB_USE_ASSERT */
}
//! Try acquiring lock (non-blocking)
/** Return true if lock acquired; false otherwise. */
bool try_lock() {
#if TBB_USE_ASSERT
aligned_space<scoped_lock> tmp;
scoped_lock& s = *tmp.begin();
s.my_mutex = NULL;
return s.internal_try_acquire(*this);
#else
#if _WIN32||_WIN64
return TryEnterCriticalSection(&impl)!=0;
#else
return pthread_mutex_trylock(&impl)==0;
#endif /* _WIN32||_WIN64 */
#endif /* TBB_USE_ASSERT */
}
//! Release lock
void unlock() {
#if TBB_USE_ASSERT
aligned_space<scoped_lock> tmp;
scoped_lock& s = *tmp.begin();
s.my_mutex = this;
s.internal_release();
#else
#if _WIN32||_WIN64
LeaveCriticalSection(&impl);
#else
pthread_mutex_unlock(&impl);
#endif /* _WIN32||_WIN64 */
#endif /* TBB_USE_ASSERT */
}
//! Return native_handle
#if _WIN32||_WIN64
typedef LPCRITICAL_SECTION native_handle_type;
#else
typedef pthread_mutex_t* native_handle_type;
#endif
native_handle_type native_handle() { return (native_handle_type) &impl; }
enum state_t {
INITIALIZED=0x1234,
DESTROYED=0x789A,
HELD=0x56CD
};
private:
#if _WIN32||_WIN64
CRITICAL_SECTION impl;
enum state_t state;
#else
pthread_mutex_t impl;
#endif /* _WIN32||_WIN64 */
//! All checks from mutex constructor using mutex.state were moved here
void __TBB_EXPORTED_METHOD internal_construct();
//! All checks from mutex destructor using mutex.state were moved here
void __TBB_EXPORTED_METHOD internal_destroy();
#if _WIN32||_WIN64
public:
//! Set the internal state
void set_state( state_t to ) { state = to; }
#endif
};
__TBB_DEFINE_PROFILING_SET_NAME(mutex)
} // namespace tbb
#endif /* __TBB_mutex_H */

View File

@@ -0,0 +1,54 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_null_mutex_H
#define __TBB_null_mutex_H
#include "tbb_stddef.h"
namespace tbb {
//! A mutex which does nothing
/** A null_mutex does no operation and simulates success.
@ingroup synchronization */
class null_mutex : internal::mutex_copy_deprecated_and_disabled {
public:
//! Represents acquisition of a mutex.
class scoped_lock : internal::no_copy {
public:
scoped_lock() {}
scoped_lock( null_mutex& ) {}
~scoped_lock() {}
void acquire( null_mutex& ) {}
bool try_acquire( null_mutex& ) { return true; }
void release() {}
};
null_mutex() {}
// Mutex traits
static const bool is_rw_mutex = false;
static const bool is_recursive_mutex = true;
static const bool is_fair_mutex = true;
};
}
#endif /* __TBB_null_mutex_H */

View File

@@ -0,0 +1,56 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_null_rw_mutex_H
#define __TBB_null_rw_mutex_H
#include "tbb_stddef.h"
namespace tbb {
//! A rw mutex which does nothing
/** A null_rw_mutex is a rw mutex that does nothing and simulates successful operation.
@ingroup synchronization */
class null_rw_mutex : internal::mutex_copy_deprecated_and_disabled {
public:
//! Represents acquisition of a mutex.
class scoped_lock : internal::no_copy {
public:
scoped_lock() {}
scoped_lock( null_rw_mutex& , bool = true ) {}
~scoped_lock() {}
void acquire( null_rw_mutex& , bool = true ) {}
bool upgrade_to_writer() { return true; }
bool downgrade_to_reader() { return true; }
bool try_acquire( null_rw_mutex& , bool = true ) { return true; }
void release() {}
};
null_rw_mutex() {}
// Mutex traits
static const bool is_rw_mutex = true;
static const bool is_recursive_mutex = true;
static const bool is_fair_mutex = true;
};
}
#endif /* __TBB_null_rw_mutex_H */

View File

@@ -0,0 +1,522 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_parallel_do_H
#define __TBB_parallel_do_H
#include "internal/_range_iterator.h"
#include "task.h"
#include "aligned_space.h"
#include <iterator>
namespace tbb {
//! @cond INTERNAL
namespace internal {
template<typename Body, typename Item> class parallel_do_feeder_impl;
template<typename Body> class do_group_task;
//! Strips its template type argument from 'cv' and '&' qualifiers
template<typename T>
struct strip { typedef T type; };
template<typename T>
struct strip<T&> { typedef T type; };
template<typename T>
struct strip<const T&> { typedef T type; };
template<typename T>
struct strip<volatile T&> { typedef T type; };
template<typename T>
struct strip<const volatile T&> { typedef T type; };
// Most of the compilers remove cv-qualifiers from non-reference function argument types.
// But unfortunately there are those that don't.
template<typename T>
struct strip<const T> { typedef T type; };
template<typename T>
struct strip<volatile T> { typedef T type; };
template<typename T>
struct strip<const volatile T> { typedef T type; };
} // namespace internal
//! @endcond
//! Class the user supplied algorithm body uses to add new tasks
/** \param Item Work item type **/
template<typename Item>
class parallel_do_feeder: internal::no_copy
{
parallel_do_feeder() {}
virtual ~parallel_do_feeder () {}
virtual void internal_add( const Item& item ) = 0;
template<typename Body_, typename Item_> friend class internal::parallel_do_feeder_impl;
public:
//! Add a work item to a running parallel_do.
void add( const Item& item ) {internal_add(item);}
};
//! @cond INTERNAL
namespace internal {
//! For internal use only.
/** Selects one of the two possible forms of function call member operator.
@ingroup algorithms **/
template<class Body, typename Item>
class parallel_do_operator_selector
{
typedef parallel_do_feeder<Item> Feeder;
template<typename A1, typename A2, typename CvItem >
static void internal_call( const Body& obj, A1& arg1, A2&, void (Body::*)(CvItem) const ) {
obj(arg1);
}
template<typename A1, typename A2, typename CvItem >
static void internal_call( const Body& obj, A1& arg1, A2& arg2, void (Body::*)(CvItem, parallel_do_feeder<Item>&) const ) {
obj(arg1, arg2);
}
public:
template<typename A1, typename A2 >
static void call( const Body& obj, A1& arg1, A2& arg2 )
{
internal_call( obj, arg1, arg2, &Body::operator() );
}
};
//! For internal use only.
/** Executes one iteration of a do.
@ingroup algorithms */
template<typename Body, typename Item>
class do_iteration_task: public task
{
typedef parallel_do_feeder_impl<Body, Item> feeder_type;
Item my_value;
feeder_type& my_feeder;
do_iteration_task( const Item& value, feeder_type& feeder ) :
my_value(value), my_feeder(feeder)
{}
/*override*/
task* execute()
{
parallel_do_operator_selector<Body, Item>::call(*my_feeder.my_body, my_value, my_feeder);
return NULL;
}
template<typename Body_, typename Item_> friend class parallel_do_feeder_impl;
}; // class do_iteration_task
template<typename Iterator, typename Body, typename Item>
class do_iteration_task_iter: public task
{
typedef parallel_do_feeder_impl<Body, Item> feeder_type;
Iterator my_iter;
feeder_type& my_feeder;
do_iteration_task_iter( const Iterator& iter, feeder_type& feeder ) :
my_iter(iter), my_feeder(feeder)
{}
/*override*/
task* execute()
{
parallel_do_operator_selector<Body, Item>::call(*my_feeder.my_body, *my_iter, my_feeder);
return NULL;
}
template<typename Iterator_, typename Body_, typename Item_> friend class do_group_task_forward;
template<typename Body_, typename Item_> friend class do_group_task_input;
template<typename Iterator_, typename Body_, typename Item_> friend class do_task_iter;
}; // class do_iteration_task_iter
//! For internal use only.
/** Implements new task adding procedure.
@ingroup algorithms **/
template<class Body, typename Item>
class parallel_do_feeder_impl : public parallel_do_feeder<Item>
{
/*override*/
void internal_add( const Item& item )
{
typedef do_iteration_task<Body, Item> iteration_type;
iteration_type& t = *new (task::allocate_additional_child_of(*my_barrier)) iteration_type(item, *this);
t.spawn( t );
}
public:
const Body* my_body;
empty_task* my_barrier;
parallel_do_feeder_impl()
{
my_barrier = new( task::allocate_root() ) empty_task();
__TBB_ASSERT(my_barrier, "root task allocation failed");
}
#if __TBB_TASK_GROUP_CONTEXT
parallel_do_feeder_impl(tbb::task_group_context &context)
{
my_barrier = new( task::allocate_root(context) ) empty_task();
__TBB_ASSERT(my_barrier, "root task allocation failed");
}
#endif
~parallel_do_feeder_impl()
{
my_barrier->destroy(*my_barrier);
}
}; // class parallel_do_feeder_impl
//! For internal use only
/** Unpacks a block of iterations.
@ingroup algorithms */
template<typename Iterator, typename Body, typename Item>
class do_group_task_forward: public task
{
static const size_t max_arg_size = 4;
typedef parallel_do_feeder_impl<Body, Item> feeder_type;
feeder_type& my_feeder;
Iterator my_first;
size_t my_size;
do_group_task_forward( Iterator first, size_t size, feeder_type& feeder )
: my_feeder(feeder), my_first(first), my_size(size)
{}
/*override*/ task* execute()
{
typedef do_iteration_task_iter<Iterator, Body, Item> iteration_type;
__TBB_ASSERT( my_size>0, NULL );
task_list list;
task* t;
size_t k=0;
for(;;) {
t = new( allocate_child() ) iteration_type( my_first, my_feeder );
++my_first;
if( ++k==my_size ) break;
list.push_back(*t);
}
set_ref_count(int(k+1));
spawn(list);
spawn_and_wait_for_all(*t);
return NULL;
}
template<typename Iterator_, typename Body_, typename _Item> friend class do_task_iter;
}; // class do_group_task_forward
template<typename Body, typename Item>
class do_group_task_input: public task
{
static const size_t max_arg_size = 4;
typedef parallel_do_feeder_impl<Body, Item> feeder_type;
feeder_type& my_feeder;
size_t my_size;
aligned_space<Item, max_arg_size> my_arg;
do_group_task_input( feeder_type& feeder )
: my_feeder(feeder), my_size(0)
{}
/*override*/ task* execute()
{
typedef do_iteration_task_iter<Item*, Body, Item> iteration_type;
__TBB_ASSERT( my_size>0, NULL );
task_list list;
task* t;
size_t k=0;
for(;;) {
t = new( allocate_child() ) iteration_type( my_arg.begin() + k, my_feeder );
if( ++k==my_size ) break;
list.push_back(*t);
}
set_ref_count(int(k+1));
spawn(list);
spawn_and_wait_for_all(*t);
return NULL;
}
~do_group_task_input(){
for( size_t k=0; k<my_size; ++k)
(my_arg.begin() + k)->~Item();
}
template<typename Iterator_, typename Body_, typename Item_> friend class do_task_iter;
}; // class do_group_task_input
//! For internal use only.
/** Gets block of iterations and packages them into a do_group_task.
@ingroup algorithms */
template<typename Iterator, typename Body, typename Item>
class do_task_iter: public task
{
typedef parallel_do_feeder_impl<Body, Item> feeder_type;
public:
do_task_iter( Iterator first, Iterator last , feeder_type& feeder ) :
my_first(first), my_last(last), my_feeder(feeder)
{}
private:
Iterator my_first;
Iterator my_last;
feeder_type& my_feeder;
/* Do not merge run(xxx) and run_xxx() methods. They are separated in order
to make sure that compilers will eliminate unused argument of type xxx
(that is will not put it on stack). The sole purpose of this argument
is overload resolution.
An alternative could be using template functions, but explicit specialization
of member function templates is not supported for non specialized class
templates. Besides template functions would always fall back to the least
efficient variant (the one for input iterators) in case of iterators having
custom tags derived from basic ones. */
/*override*/ task* execute()
{
typedef typename std::iterator_traits<Iterator>::iterator_category iterator_tag;
return run( (iterator_tag*)NULL );
}
/** This is the most restricted variant that operates on input iterators or
iterators with unknown tags (tags not derived from the standard ones). **/
inline task* run( void* ) { return run_for_input_iterator(); }
task* run_for_input_iterator() {
typedef do_group_task_input<Body, Item> block_type;
block_type& t = *new( allocate_additional_child_of(*my_feeder.my_barrier) ) block_type(my_feeder);
size_t k=0;
while( !(my_first == my_last) ) {
new (t.my_arg.begin() + k) Item(*my_first);
++my_first;
if( ++k==block_type::max_arg_size ) {
if ( !(my_first == my_last) )
recycle_to_reexecute();
break;
}
}
if( k==0 ) {
destroy(t);
return NULL;
} else {
t.my_size = k;
return &t;
}
}
inline task* run( std::forward_iterator_tag* ) { return run_for_forward_iterator(); }
task* run_for_forward_iterator() {
typedef do_group_task_forward<Iterator, Body, Item> block_type;
Iterator first = my_first;
size_t k=0;
while( !(my_first==my_last) ) {
++my_first;
if( ++k==block_type::max_arg_size ) {
if ( !(my_first==my_last) )
recycle_to_reexecute();
break;
}
}
return k==0 ? NULL : new( allocate_additional_child_of(*my_feeder.my_barrier) ) block_type(first, k, my_feeder);
}
inline task* run( std::random_access_iterator_tag* ) { return run_for_random_access_iterator(); }
task* run_for_random_access_iterator() {
typedef do_group_task_forward<Iterator, Body, Item> block_type;
typedef do_iteration_task_iter<Iterator, Body, Item> iteration_type;
size_t k = static_cast<size_t>(my_last-my_first);
if( k > block_type::max_arg_size ) {
Iterator middle = my_first + k/2;
empty_task& c = *new( allocate_continuation() ) empty_task;
do_task_iter& b = *new( c.allocate_child() ) do_task_iter(middle, my_last, my_feeder);
recycle_as_child_of(c);
my_last = middle;
c.set_ref_count(2);
c.spawn(b);
return this;
}else if( k != 0 ) {
task_list list;
task* t;
size_t k1=0;
for(;;) {
t = new( allocate_child() ) iteration_type(my_first, my_feeder);
++my_first;
if( ++k1==k ) break;
list.push_back(*t);
}
set_ref_count(int(k+1));
spawn(list);
spawn_and_wait_for_all(*t);
}
return NULL;
}
}; // class do_task_iter
//! For internal use only.
/** Implements parallel iteration over a range.
@ingroup algorithms */
template<typename Iterator, typename Body, typename Item>
void run_parallel_do( Iterator first, Iterator last, const Body& body
#if __TBB_TASK_GROUP_CONTEXT
, task_group_context& context
#endif
)
{
typedef do_task_iter<Iterator, Body, Item> root_iteration_task;
#if __TBB_TASK_GROUP_CONTEXT
parallel_do_feeder_impl<Body, Item> feeder(context);
#else
parallel_do_feeder_impl<Body, Item> feeder;
#endif
feeder.my_body = &body;
root_iteration_task &t = *new( feeder.my_barrier->allocate_child() ) root_iteration_task(first, last, feeder);
feeder.my_barrier->set_ref_count(2);
feeder.my_barrier->spawn_and_wait_for_all(t);
}
//! For internal use only.
/** Detects types of Body's operator function arguments.
@ingroup algorithms **/
template<typename Iterator, typename Body, typename Item>
void select_parallel_do( Iterator first, Iterator last, const Body& body, void (Body::*)(Item) const
#if __TBB_TASK_GROUP_CONTEXT
, task_group_context& context
#endif // __TBB_TASK_GROUP_CONTEXT
)
{
run_parallel_do<Iterator, Body, typename strip<Item>::type>( first, last, body
#if __TBB_TASK_GROUP_CONTEXT
, context
#endif // __TBB_TASK_GROUP_CONTEXT
);
}
//! For internal use only.
/** Detects types of Body's operator function arguments.
@ingroup algorithms **/
template<typename Iterator, typename Body, typename Item, typename _Item>
void select_parallel_do( Iterator first, Iterator last, const Body& body, void (Body::*)(Item, parallel_do_feeder<_Item>&) const
#if __TBB_TASK_GROUP_CONTEXT
, task_group_context& context
#endif // __TBB_TASK_GROUP_CONTEXT
)
{
run_parallel_do<Iterator, Body, typename strip<Item>::type>( first, last, body
#if __TBB_TASK_GROUP_CONTEXT
, context
#endif // __TBB_TASK_GROUP_CONTEXT
);
}
} // namespace internal
//! @endcond
/** \page parallel_do_body_req Requirements on parallel_do body
Class \c Body implementing the concept of parallel_do body must define:
- \code
B::operator()(
cv_item_type item,
parallel_do_feeder<item_type>& feeder
) const
OR
B::operator()( cv_item_type& item ) const
\endcode Process item.
May be invoked concurrently for the same \c this but different \c item.
- \code item_type( const item_type& ) \endcode
Copy a work item.
- \code ~item_type() \endcode Destroy a work item
**/
/** \name parallel_do
See also requirements on \ref parallel_do_body_req "parallel_do Body". **/
//@{
//! Parallel iteration over a range, with optional addition of more work.
/** @ingroup algorithms */
template<typename Iterator, typename Body>
void parallel_do( Iterator first, Iterator last, const Body& body )
{
if ( first == last )
return;
#if __TBB_TASK_GROUP_CONTEXT
task_group_context context;
#endif // __TBB_TASK_GROUP_CONTEXT
internal::select_parallel_do( first, last, body, &Body::operator()
#if __TBB_TASK_GROUP_CONTEXT
, context
#endif // __TBB_TASK_GROUP_CONTEXT
);
}
template<typename Range, typename Body>
void parallel_do(Range& rng, const Body& body) {
parallel_do(tbb::internal::first(rng), tbb::internal::last(rng), body);
}
template<typename Range, typename Body>
void parallel_do(const Range& rng, const Body& body) {
parallel_do(tbb::internal::first(rng), tbb::internal::last(rng), body);
}
#if __TBB_TASK_GROUP_CONTEXT
//! Parallel iteration over a range, with optional addition of more work and user-supplied context
/** @ingroup algorithms */
template<typename Iterator, typename Body>
void parallel_do( Iterator first, Iterator last, const Body& body, task_group_context& context )
{
if ( first == last )
return;
internal::select_parallel_do( first, last, body, &Body::operator(), context );
}
template<typename Range, typename Body>
void parallel_do(Range& rng, const Body& body, task_group_context& context) {
parallel_do(tbb::internal::first(rng), tbb::internal::last(rng), body, context);
}
template<typename Range, typename Body>
void parallel_do(const Range& rng, const Body& body, task_group_context& context) {
parallel_do(tbb::internal::first(rng), tbb::internal::last(rng), body, context);
}
#endif // __TBB_TASK_GROUP_CONTEXT
//@}
} // namespace
#endif /* __TBB_parallel_do_H */

View File

@@ -0,0 +1,373 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_parallel_for_H
#define __TBB_parallel_for_H
#include <new>
#include "task.h"
#include "partitioner.h"
#include "blocked_range.h"
#include "tbb_exception.h"
namespace tbb {
namespace interface7 {
//! @cond INTERNAL
namespace internal {
//! allocate right task with new parent
void* allocate_sibling(task* start_for_task, size_t bytes);
//! Task type used in parallel_for
/** @ingroup algorithms */
template<typename Range, typename Body, typename Partitioner>
class start_for: public task {
Range my_range;
const Body my_body;
typename Partitioner::task_partition_type my_partition;
/*override*/ task* execute();
//! Update affinity info, if any.
/*override*/ void note_affinity( affinity_id id ) {
my_partition.note_affinity( id );
}
public:
//! Constructor for root task.
start_for( const Range& range, const Body& body, Partitioner& partitioner ) :
my_range(range),
my_body(body),
my_partition(partitioner)
{
}
//! Splitting constructor used to generate children.
/** parent_ becomes left child. Newly constructed object is right child. */
start_for( start_for& parent_, typename Partitioner::split_type& split_obj) :
my_range(parent_.my_range, split_obj),
my_body(parent_.my_body),
my_partition(parent_.my_partition, split_obj)
{
my_partition.set_affinity(*this);
}
//! Construct right child from the given range as response to the demand.
/** parent_ remains left child. Newly constructed object is right child. */
start_for( start_for& parent_, const Range& r, depth_t d ) :
my_range(r),
my_body(parent_.my_body),
my_partition(parent_.my_partition, split())
{
my_partition.set_affinity(*this);
my_partition.align_depth( d );
}
static void run( const Range& range, const Body& body, Partitioner& partitioner ) {
if( !range.empty() ) {
#if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
start_for& a = *new(task::allocate_root()) start_for(range,body,partitioner);
#else
// Bound context prevents exceptions from body to affect nesting or sibling algorithms,
// and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
task_group_context context;
start_for& a = *new(task::allocate_root(context)) start_for(range,body,partitioner);
#endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
task::spawn_root_and_wait(a);
}
}
#if __TBB_TASK_GROUP_CONTEXT
static void run( const Range& range, const Body& body, Partitioner& partitioner, task_group_context& context ) {
if( !range.empty() ) {
start_for& a = *new(task::allocate_root(context)) start_for(range,body,partitioner);
task::spawn_root_and_wait(a);
}
}
#endif /* __TBB_TASK_GROUP_CONTEXT */
//! Run body for range, serves as callback for partitioner
void run_body( Range &r ) { my_body( r ); }
//! spawn right task, serves as callback for partitioner
void offer_work(typename Partitioner::split_type& split_obj) {
spawn( *new( allocate_sibling(static_cast<task*>(this), sizeof(start_for)) ) start_for(*this, split_obj) );
}
//! spawn right task, serves as callback for partitioner
void offer_work(const Range& r, depth_t d = 0) {
spawn( *new( allocate_sibling(static_cast<task*>(this), sizeof(start_for)) ) start_for(*this, r, d) );
}
};
//! allocate right task with new parent
// TODO: 'inline' here is to avoid multiple definition error but for sake of code size this should not be inlined
inline void* allocate_sibling(task* start_for_task, size_t bytes) {
task* parent_ptr = new( start_for_task->allocate_continuation() ) flag_task();
start_for_task->set_parent(parent_ptr);
parent_ptr->set_ref_count(2);
return &parent_ptr->allocate_child().allocate(bytes);
}
//! execute task for parallel_for
template<typename Range, typename Body, typename Partitioner>
task* start_for<Range,Body,Partitioner>::execute() {
my_partition.check_being_stolen( *this );
my_partition.execute(*this, my_range);
return NULL;
}
} // namespace internal
//! @endcond
} // namespace interfaceX
//! @cond INTERNAL
namespace internal {
using interface7::internal::start_for;
//! Calls the function with values from range [begin, end) with a step provided
template<typename Function, typename Index>
class parallel_for_body : internal::no_assign {
const Function &my_func;
const Index my_begin;
const Index my_step;
public:
parallel_for_body( const Function& _func, Index& _begin, Index& _step )
: my_func(_func), my_begin(_begin), my_step(_step) {}
void operator()( const tbb::blocked_range<Index>& r ) const {
// A set of local variables to help the compiler with vectorization of the following loop.
Index b = r.begin();
Index e = r.end();
Index ms = my_step;
Index k = my_begin + b*ms;
#if __INTEL_COMPILER
#pragma ivdep
#if __TBB_ASSERT_ON_VECTORIZATION_FAILURE
#pragma vector always assert
#endif
#endif
for ( Index i = b; i < e; ++i, k += ms ) {
my_func( k );
}
}
};
} // namespace internal
//! @endcond
// Requirements on Range concept are documented in blocked_range.h
/** \page parallel_for_body_req Requirements on parallel_for body
Class \c Body implementing the concept of parallel_for body must define:
- \code Body::Body( const Body& ); \endcode Copy constructor
- \code Body::~Body(); \endcode Destructor
- \code void Body::operator()( Range& r ) const; \endcode Function call operator applying the body to range \c r.
**/
/** \name parallel_for
See also requirements on \ref range_req "Range" and \ref parallel_for_body_req "parallel_for Body". **/
//@{
//! Parallel iteration over range with default partitioner.
/** @ingroup algorithms **/
template<typename Range, typename Body>
void parallel_for( const Range& range, const Body& body ) {
internal::start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
}
//! Parallel iteration over range with simple partitioner.
/** @ingroup algorithms **/
template<typename Range, typename Body>
void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner ) {
internal::start_for<Range,Body,const simple_partitioner>::run(range,body,partitioner);
}
//! Parallel iteration over range with auto_partitioner.
/** @ingroup algorithms **/
template<typename Range, typename Body>
void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner ) {
internal::start_for<Range,Body,const auto_partitioner>::run(range,body,partitioner);
}
//! Parallel iteration over range with affinity_partitioner.
/** @ingroup algorithms **/
template<typename Range, typename Body>
void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner ) {
internal::start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner);
}
#if __TBB_TASK_GROUP_CONTEXT
//! Parallel iteration over range with default partitioner and user-supplied context.
/** @ingroup algorithms **/
template<typename Range, typename Body>
void parallel_for( const Range& range, const Body& body, task_group_context& context ) {
internal::start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range, body, __TBB_DEFAULT_PARTITIONER(), context);
}
//! Parallel iteration over range with simple partitioner and user-supplied context.
/** @ingroup algorithms **/
template<typename Range, typename Body>
void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
internal::start_for<Range,Body,const simple_partitioner>::run(range, body, partitioner, context);
}
//! Parallel iteration over range with auto_partitioner and user-supplied context.
/** @ingroup algorithms **/
template<typename Range, typename Body>
void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
internal::start_for<Range,Body,const auto_partitioner>::run(range, body, partitioner, context);
}
//! Parallel iteration over range with affinity_partitioner and user-supplied context.
/** @ingroup algorithms **/
template<typename Range, typename Body>
void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
internal::start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner, context);
}
#endif /* __TBB_TASK_GROUP_CONTEXT */
//@}
namespace strict_ppl {
//@{
//! Implementation of parallel iteration over stepped range of integers with explicit step and partitioner
template <typename Index, typename Function, typename Partitioner>
void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner) {
if (step <= 0 )
internal::throw_exception(internal::eid_nonpositive_step); // throws std::invalid_argument
else if (last > first) {
// Above "else" avoids "potential divide by zero" warning on some platforms
Index end = (last - first - Index(1)) / step + Index(1);
tbb::blocked_range<Index> range(static_cast<Index>(0), end);
internal::parallel_for_body<Function, Index> body(f, first, step);
tbb::parallel_for(range, body, partitioner);
}
}
//! Parallel iteration over a range of integers with a step provided and default partitioner
template <typename Index, typename Function>
void parallel_for(Index first, Index last, Index step, const Function& f) {
parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner());
}
//! Parallel iteration over a range of integers with a step provided and simple partitioner
template <typename Index, typename Function>
void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner) {
parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner);
}
//! Parallel iteration over a range of integers with a step provided and auto partitioner
template <typename Index, typename Function>
void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner) {
parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner);
}
//! Parallel iteration over a range of integers with a step provided and affinity partitioner
template <typename Index, typename Function>
void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner) {
parallel_for_impl(first, last, step, f, partitioner);
}
//! Parallel iteration over a range of integers with a default step value and default partitioner
template <typename Index, typename Function>
void parallel_for(Index first, Index last, const Function& f) {
parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner());
}
//! Parallel iteration over a range of integers with a default step value and simple partitioner
template <typename Index, typename Function>
void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner) {
parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
}
//! Parallel iteration over a range of integers with a default step value and auto partitioner
template <typename Index, typename Function>
void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner) {
parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
}
//! Parallel iteration over a range of integers with a default step value and affinity partitioner
template <typename Index, typename Function>
void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner) {
parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner);
}
#if __TBB_TASK_GROUP_CONTEXT
//! Implementation of parallel iteration over stepped range of integers with explicit step, task group context, and partitioner
template <typename Index, typename Function, typename Partitioner>
void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner, tbb::task_group_context &context) {
if (step <= 0 )
internal::throw_exception(internal::eid_nonpositive_step); // throws std::invalid_argument
else if (last > first) {
// Above "else" avoids "potential divide by zero" warning on some platforms
Index end = (last - first - Index(1)) / step + Index(1);
tbb::blocked_range<Index> range(static_cast<Index>(0), end);
internal::parallel_for_body<Function, Index> body(f, first, step);
tbb::parallel_for(range, body, partitioner, context);
}
}
//! Parallel iteration over a range of integers with explicit step, task group context, and default partitioner
template <typename Index, typename Function>
void parallel_for(Index first, Index last, Index step, const Function& f, tbb::task_group_context &context) {
parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner(), context);
}
//! Parallel iteration over a range of integers with explicit step, task group context, and simple partitioner
template <typename Index, typename Function>
void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner, tbb::task_group_context &context) {
parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner, context);
}
//! Parallel iteration over a range of integers with explicit step, task group context, and auto partitioner
template <typename Index, typename Function>
void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner, tbb::task_group_context &context) {
parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner, context);
}
//! Parallel iteration over a range of integers with explicit step, task group context, and affinity partitioner
template <typename Index, typename Function>
void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner, tbb::task_group_context &context) {
parallel_for_impl(first, last, step, f, partitioner, context);
}
//! Parallel iteration over a range of integers with a default step value, explicit task group context, and default partitioner
template <typename Index, typename Function>
void parallel_for(Index first, Index last, const Function& f, tbb::task_group_context &context) {
parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner(), context);
}
//! Parallel iteration over a range of integers with a default step value, explicit task group context, and simple partitioner
template <typename Index, typename Function, typename Partitioner>
void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner, tbb::task_group_context &context) {
parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
}
//! Parallel iteration over a range of integers with a default step value, explicit task group context, and auto partitioner
template <typename Index, typename Function, typename Partitioner>
void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner, tbb::task_group_context &context) {
parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
}
//! Parallel iteration over a range of integers with a default step value, explicit task group context, and affinity_partitioner
template <typename Index, typename Function, typename Partitioner>
void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner, tbb::task_group_context &context) {
parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner, context);
}
#endif /* __TBB_TASK_GROUP_CONTEXT */
//@}
} // namespace strict_ppl
using strict_ppl::parallel_for;
} // namespace tbb
#if TBB_PREVIEW_SERIAL_SUBSET
#define __TBB_NORMAL_EXECUTION
#include "../serial/tbb/parallel_for.h"
#undef __TBB_NORMAL_EXECUTION
#endif
#endif /* __TBB_parallel_for_H */

View File

@@ -0,0 +1,95 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_parallel_for_each_H
#define __TBB_parallel_for_each_H
#include "parallel_do.h"
namespace tbb {
//! @cond INTERNAL
namespace internal {
// The class calls user function in operator()
template <typename Function, typename Iterator>
class parallel_for_each_body : internal::no_assign {
const Function &my_func;
public:
parallel_for_each_body(const Function &_func) : my_func(_func) {}
parallel_for_each_body(const parallel_for_each_body<Function, Iterator> &_caller) : my_func(_caller.my_func) {}
void operator() ( typename std::iterator_traits<Iterator>::reference value ) const {
my_func(value);
}
};
} // namespace internal
//! @endcond
/** \name parallel_for_each
**/
//@{
//! Calls function f for all items from [first, last) interval using user-supplied context
/** @ingroup algorithms */
#if __TBB_TASK_GROUP_CONTEXT
template<typename InputIterator, typename Function>
void parallel_for_each(InputIterator first, InputIterator last, const Function& f, task_group_context &context) {
internal::parallel_for_each_body<Function, InputIterator> body(f);
tbb::parallel_do (first, last, body, context);
}
//! Calls function f for all items from rng using user-supplied context
/** @ingroup algorithms */
template<typename Range, typename Function>
void parallel_for_each(Range& rng, const Function& f, task_group_context& context) {
parallel_for_each(tbb::internal::first(rng), tbb::internal::last(rng), f, context);
}
//! Calls function f for all items from const rng user-supplied context
/** @ingroup algorithms */
template<typename Range, typename Function>
void parallel_for_each(const Range& rng, const Function& f, task_group_context& context) {
parallel_for_each(tbb::internal::first(rng), tbb::internal::last(rng), f, context);
}
#endif /* __TBB_TASK_GROUP_CONTEXT */
//! Uses default context
template<typename InputIterator, typename Function>
void parallel_for_each(InputIterator first, InputIterator last, const Function& f) {
internal::parallel_for_each_body<Function, InputIterator> body(f);
tbb::parallel_do (first, last, body);
}
//! Uses default context
template<typename Range, typename Function>
void parallel_for_each(Range& rng, const Function& f) {
parallel_for_each(tbb::internal::first(rng), tbb::internal::last(rng), f);
}
//! Uses default context
template<typename Range, typename Function>
void parallel_for_each(const Range& rng, const Function& f) {
parallel_for_each(tbb::internal::first(rng), tbb::internal::last(rng), f);
}
//@}
} // namespace
#endif /* __TBB_parallel_for_each_H */

View File

@@ -0,0 +1,456 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_parallel_invoke_H
#define __TBB_parallel_invoke_H
#include "task.h"
#if __TBB_VARIADIC_PARALLEL_INVOKE
#include <utility>
#endif
namespace tbb {
#if !__TBB_TASK_GROUP_CONTEXT
/** Dummy to avoid cluttering the bulk of the header with enormous amount of ifdefs. **/
struct task_group_context {};
#endif /* __TBB_TASK_GROUP_CONTEXT */
//! @cond INTERNAL
namespace internal {
// Simple task object, executing user method
template<typename function>
class function_invoker : public task{
public:
function_invoker(const function& _function) : my_function(_function) {}
private:
const function &my_function;
/*override*/
task* execute()
{
my_function();
return NULL;
}
};
// The class spawns two or three child tasks
template <size_t N, typename function1, typename function2, typename function3>
class spawner : public task {
private:
const function1& my_func1;
const function2& my_func2;
const function3& my_func3;
bool is_recycled;
task* execute (){
if(is_recycled){
return NULL;
}else{
__TBB_ASSERT(N==2 || N==3, "Number of arguments passed to spawner is wrong");
set_ref_count(N);
recycle_as_safe_continuation();
internal::function_invoker<function2>* invoker2 = new (allocate_child()) internal::function_invoker<function2>(my_func2);
__TBB_ASSERT(invoker2, "Child task allocation failed");
spawn(*invoker2);
size_t n = N; // To prevent compiler warnings
if (n>2) {
internal::function_invoker<function3>* invoker3 = new (allocate_child()) internal::function_invoker<function3>(my_func3);
__TBB_ASSERT(invoker3, "Child task allocation failed");
spawn(*invoker3);
}
my_func1();
is_recycled = true;
return NULL;
}
} // execute
public:
spawner(const function1& _func1, const function2& _func2, const function3& _func3) : my_func1(_func1), my_func2(_func2), my_func3(_func3), is_recycled(false) {}
};
// Creates and spawns child tasks
class parallel_invoke_helper : public empty_task {
public:
// Dummy functor class
class parallel_invoke_noop {
public:
void operator() () const {}
};
// Creates a helper object with user-defined number of children expected
parallel_invoke_helper(int number_of_children)
{
set_ref_count(number_of_children + 1);
}
#if __TBB_VARIADIC_PARALLEL_INVOKE
void add_children() {}
void add_children(tbb::task_group_context&) {}
template <typename function>
void add_children(function&& _func)
{
internal::function_invoker<function>* invoker = new (allocate_child()) internal::function_invoker<function>(std::forward<function>(_func));
__TBB_ASSERT(invoker, "Child task allocation failed");
spawn(*invoker);
}
template<typename function>
void add_children(function&& _func, tbb::task_group_context&)
{
add_children(std::forward<function>(_func));
}
// Adds child(ren) task(s) and spawns them
template <typename function1, typename function2, typename... function>
void add_children(function1&& _func1, function2&& _func2, function&&... _func)
{
// The third argument is dummy, it is ignored actually.
parallel_invoke_noop noop;
typedef internal::spawner<2, function1, function2, parallel_invoke_noop> spawner_type;
spawner_type & sub_root = *new(allocate_child()) spawner_type(std::forward<function1>(_func1), std::forward<function2>(_func2), noop);
spawn(sub_root);
add_children(std::forward<function>(_func)...);
}
#else
// Adds child task and spawns it
template <typename function>
void add_children (const function &_func)
{
internal::function_invoker<function>* invoker = new (allocate_child()) internal::function_invoker<function>(_func);
__TBB_ASSERT(invoker, "Child task allocation failed");
spawn(*invoker);
}
// Adds a task with multiple child tasks and spawns it
// two arguments
template <typename function1, typename function2>
void add_children (const function1& _func1, const function2& _func2)
{
// The third argument is dummy, it is ignored actually.
parallel_invoke_noop noop;
internal::spawner<2, function1, function2, parallel_invoke_noop>& sub_root = *new(allocate_child())internal::spawner<2, function1, function2, parallel_invoke_noop>(_func1, _func2, noop);
spawn(sub_root);
}
// three arguments
template <typename function1, typename function2, typename function3>
void add_children (const function1& _func1, const function2& _func2, const function3& _func3)
{
internal::spawner<3, function1, function2, function3>& sub_root = *new(allocate_child())internal::spawner<3, function1, function2, function3>(_func1, _func2, _func3);
spawn(sub_root);
}
#endif // __TBB_VARIADIC_PARALLEL_INVOKE
// Waits for all child tasks
template <typename F0>
void run_and_finish(const F0& f0)
{
internal::function_invoker<F0>* invoker = new (allocate_child()) internal::function_invoker<F0>(f0);
__TBB_ASSERT(invoker, "Child task allocation failed");
spawn_and_wait_for_all(*invoker);
}
};
// The class destroys root if exception occurred as well as in normal case
class parallel_invoke_cleaner: internal::no_copy {
public:
#if __TBB_TASK_GROUP_CONTEXT
parallel_invoke_cleaner(int number_of_children, tbb::task_group_context& context)
: root(*new(task::allocate_root(context)) internal::parallel_invoke_helper(number_of_children))
#else
parallel_invoke_cleaner(int number_of_children, tbb::task_group_context&)
: root(*new(task::allocate_root()) internal::parallel_invoke_helper(number_of_children))
#endif /* !__TBB_TASK_GROUP_CONTEXT */
{}
~parallel_invoke_cleaner(){
root.destroy(root);
}
internal::parallel_invoke_helper& root;
};
#if __TBB_VARIADIC_PARALLEL_INVOKE
// Determine whether the last parameter in a pack is task_group_context
template<typename... T> struct impl_selector; // to workaround a GCC bug
template<typename T1, typename... T> struct impl_selector<T1, T...> {
typedef typename impl_selector<T...>::type type;
};
template<typename T> struct impl_selector<T> {
typedef false_type type;
};
template<> struct impl_selector<task_group_context&> {
typedef true_type type;
};
// Select task_group_context parameter from the back of a pack
inline task_group_context& get_context( task_group_context& tgc ) { return tgc; }
template<typename T1, typename... T>
task_group_context& get_context( T1&& /*ignored*/, T&&... t )
{ return get_context( std::forward<T>(t)... ); }
// task_group_context is known to be at the back of the parameter pack
template<typename F0, typename F1, typename... F>
void parallel_invoke_impl(true_type, F0&& f0, F1&& f1, F&&... f) {
__TBB_STATIC_ASSERT(sizeof...(F)>0, "Variadic parallel_invoke implementation broken?");
// # of child tasks: f0, f1, and a task for each two elements of the pack except the last
const size_t number_of_children = 2 + sizeof...(F)/2;
parallel_invoke_cleaner cleaner(number_of_children, get_context(std::forward<F>(f)...));
parallel_invoke_helper& root = cleaner.root;
root.add_children(std::forward<F>(f)...);
root.add_children(std::forward<F1>(f1));
root.run_and_finish(std::forward<F0>(f0));
}
// task_group_context is not in the pack, needs to be added
template<typename F0, typename F1, typename... F>
void parallel_invoke_impl(false_type, F0&& f0, F1&& f1, F&&... f) {
tbb::task_group_context context;
// Add context to the arguments, and redirect to the other overload
parallel_invoke_impl(true_type(), std::forward<F0>(f0), std::forward<F1>(f1), std::forward<F>(f)..., context);
}
#endif
} // namespace internal
//! @endcond
/** \name parallel_invoke
**/
//@{
//! Executes a list of tasks in parallel and waits for all tasks to complete.
/** @ingroup algorithms */
#if __TBB_VARIADIC_PARALLEL_INVOKE
// parallel_invoke for two or more arguments via variadic templates
// presence of task_group_context is defined automatically
template<typename F0, typename F1, typename... F>
void parallel_invoke(F0&& f0, F1&& f1, F&&... f) {
typedef typename internal::impl_selector<internal::false_type, F...>::type selector_type;
internal::parallel_invoke_impl(selector_type(), std::forward<F0>(f0), std::forward<F1>(f1), std::forward<F>(f)...);
}
#else
// parallel_invoke with user-defined context
// two arguments
template<typename F0, typename F1 >
void parallel_invoke(const F0& f0, const F1& f1, tbb::task_group_context& context) {
internal::parallel_invoke_cleaner cleaner(2, context);
internal::parallel_invoke_helper& root = cleaner.root;
root.add_children(f1);
root.run_and_finish(f0);
}
// three arguments
template<typename F0, typename F1, typename F2 >
void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, tbb::task_group_context& context) {
internal::parallel_invoke_cleaner cleaner(3, context);
internal::parallel_invoke_helper& root = cleaner.root;
root.add_children(f2);
root.add_children(f1);
root.run_and_finish(f0);
}
// four arguments
template<typename F0, typename F1, typename F2, typename F3>
void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3,
tbb::task_group_context& context)
{
internal::parallel_invoke_cleaner cleaner(4, context);
internal::parallel_invoke_helper& root = cleaner.root;
root.add_children(f3);
root.add_children(f2);
root.add_children(f1);
root.run_and_finish(f0);
}
// five arguments
template<typename F0, typename F1, typename F2, typename F3, typename F4 >
void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
tbb::task_group_context& context)
{
internal::parallel_invoke_cleaner cleaner(3, context);
internal::parallel_invoke_helper& root = cleaner.root;
root.add_children(f4, f3);
root.add_children(f2, f1);
root.run_and_finish(f0);
}
// six arguments
template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5>
void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4, const F5& f5,
tbb::task_group_context& context)
{
internal::parallel_invoke_cleaner cleaner(3, context);
internal::parallel_invoke_helper& root = cleaner.root;
root.add_children(f5, f4, f3);
root.add_children(f2, f1);
root.run_and_finish(f0);
}
// seven arguments
template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5, typename F6>
void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
const F5& f5, const F6& f6,
tbb::task_group_context& context)
{
internal::parallel_invoke_cleaner cleaner(3, context);
internal::parallel_invoke_helper& root = cleaner.root;
root.add_children(f6, f5, f4);
root.add_children(f3, f2, f1);
root.run_and_finish(f0);
}
// eight arguments
template<typename F0, typename F1, typename F2, typename F3, typename F4,
typename F5, typename F6, typename F7>
void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
const F5& f5, const F6& f6, const F7& f7,
tbb::task_group_context& context)
{
internal::parallel_invoke_cleaner cleaner(4, context);
internal::parallel_invoke_helper& root = cleaner.root;
root.add_children(f7, f6, f5);
root.add_children(f4, f3);
root.add_children(f2, f1);
root.run_and_finish(f0);
}
// nine arguments
template<typename F0, typename F1, typename F2, typename F3, typename F4,
typename F5, typename F6, typename F7, typename F8>
void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
const F5& f5, const F6& f6, const F7& f7, const F8& f8,
tbb::task_group_context& context)
{
internal::parallel_invoke_cleaner cleaner(4, context);
internal::parallel_invoke_helper& root = cleaner.root;
root.add_children(f8, f7, f6);
root.add_children(f5, f4, f3);
root.add_children(f2, f1);
root.run_and_finish(f0);
}
// ten arguments
template<typename F0, typename F1, typename F2, typename F3, typename F4,
typename F5, typename F6, typename F7, typename F8, typename F9>
void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
const F5& f5, const F6& f6, const F7& f7, const F8& f8, const F9& f9,
tbb::task_group_context& context)
{
internal::parallel_invoke_cleaner cleaner(4, context);
internal::parallel_invoke_helper& root = cleaner.root;
root.add_children(f9, f8, f7);
root.add_children(f6, f5, f4);
root.add_children(f3, f2, f1);
root.run_and_finish(f0);
}
// two arguments
template<typename F0, typename F1>
void parallel_invoke(const F0& f0, const F1& f1) {
task_group_context context;
parallel_invoke<F0, F1>(f0, f1, context);
}
// three arguments
template<typename F0, typename F1, typename F2>
void parallel_invoke(const F0& f0, const F1& f1, const F2& f2) {
task_group_context context;
parallel_invoke<F0, F1, F2>(f0, f1, f2, context);
}
// four arguments
template<typename F0, typename F1, typename F2, typename F3 >
void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3) {
task_group_context context;
parallel_invoke<F0, F1, F2, F3>(f0, f1, f2, f3, context);
}
// five arguments
template<typename F0, typename F1, typename F2, typename F3, typename F4>
void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4) {
task_group_context context;
parallel_invoke<F0, F1, F2, F3, F4>(f0, f1, f2, f3, f4, context);
}
// six arguments
template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5>
void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4, const F5& f5) {
task_group_context context;
parallel_invoke<F0, F1, F2, F3, F4, F5>(f0, f1, f2, f3, f4, f5, context);
}
// seven arguments
template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5, typename F6>
void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
const F5& f5, const F6& f6)
{
task_group_context context;
parallel_invoke<F0, F1, F2, F3, F4, F5, F6>(f0, f1, f2, f3, f4, f5, f6, context);
}
// eight arguments
template<typename F0, typename F1, typename F2, typename F3, typename F4,
typename F5, typename F6, typename F7>
void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
const F5& f5, const F6& f6, const F7& f7)
{
task_group_context context;
parallel_invoke<F0, F1, F2, F3, F4, F5, F6, F7>(f0, f1, f2, f3, f4, f5, f6, f7, context);
}
// nine arguments
template<typename F0, typename F1, typename F2, typename F3, typename F4,
typename F5, typename F6, typename F7, typename F8>
void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
const F5& f5, const F6& f6, const F7& f7, const F8& f8)
{
task_group_context context;
parallel_invoke<F0, F1, F2, F3, F4, F5, F6, F7, F8>(f0, f1, f2, f3, f4, f5, f6, f7, f8, context);
}
// ten arguments
template<typename F0, typename F1, typename F2, typename F3, typename F4,
typename F5, typename F6, typename F7, typename F8, typename F9>
void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
const F5& f5, const F6& f6, const F7& f7, const F8& f8, const F9& f9)
{
task_group_context context;
parallel_invoke<F0, F1, F2, F3, F4, F5, F6, F7, F8, F9>(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, context);
}
#endif // __TBB_VARIADIC_PARALLEL_INVOKE
//@}
} // namespace
#endif /* __TBB_parallel_invoke_H */

View File

@@ -0,0 +1,533 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_parallel_reduce_H
#define __TBB_parallel_reduce_H
#include <new>
#include "task.h"
#include "aligned_space.h"
#include "partitioner.h"
#include "tbb_profiling.h"
namespace tbb {
namespace interface7 {
//! @cond INTERNAL
namespace internal {
using namespace tbb::internal;
/** Values for reduction_context. */
enum {
root_task, left_child, right_child
};
/** Represented as a char, not enum, for compactness. */
typedef char reduction_context;
//! Task type used to combine the partial results of parallel_reduce.
/** @ingroup algorithms */
template<typename Body>
class finish_reduce: public flag_task {
//! Pointer to body, or NULL if the left child has not yet finished.
bool has_right_zombie;
const reduction_context my_context;
Body* my_body;
aligned_space<Body> zombie_space;
finish_reduce( reduction_context context_ ) :
has_right_zombie(false), // TODO: substitute by flag_task::child_stolen?
my_context(context_),
my_body(NULL)
{
}
~finish_reduce() {
if( has_right_zombie )
zombie_space.begin()->~Body();
}
task* execute() {
if( has_right_zombie ) {
// Right child was stolen.
Body* s = zombie_space.begin();
my_body->join( *s );
// Body::join() won't be called if canceled. Defer destruction to destructor
}
if( my_context==left_child )
itt_store_word_with_release( static_cast<finish_reduce*>(parent())->my_body, my_body );
return NULL;
}
template<typename Range,typename Body_, typename Partitioner>
friend class start_reduce;
};
//! allocate right task with new parent
void allocate_sibling(task* start_reduce_task, task *tasks[], size_t start_bytes, size_t finish_bytes);
//! Task type used to split the work of parallel_reduce.
/** @ingroup algorithms */
template<typename Range, typename Body, typename Partitioner>
class start_reduce: public task {
typedef finish_reduce<Body> finish_type;
Body* my_body;
Range my_range;
typename Partitioner::task_partition_type my_partition;
reduction_context my_context;
/*override*/ task* execute();
//! Update affinity info, if any
/*override*/ void note_affinity( affinity_id id ) {
my_partition.note_affinity( id );
}
template<typename Body_>
friend class finish_reduce;
public:
//! Constructor used for root task
start_reduce( const Range& range, Body* body, Partitioner& partitioner ) :
my_body(body),
my_range(range),
my_partition(partitioner),
my_context(root_task)
{
}
//! Splitting constructor used to generate children.
/** parent_ becomes left child. Newly constructed object is right child. */
start_reduce( start_reduce& parent_, typename Partitioner::split_type& split_obj ) :
my_body(parent_.my_body),
my_range(parent_.my_range, split_obj),
my_partition(parent_.my_partition, split_obj),
my_context(right_child)
{
my_partition.set_affinity(*this);
parent_.my_context = left_child;
}
//! Construct right child from the given range as response to the demand.
/** parent_ remains left child. Newly constructed object is right child. */
start_reduce( start_reduce& parent_, const Range& r, depth_t d ) :
my_body(parent_.my_body),
my_range(r),
my_partition(parent_.my_partition, split()),
my_context(right_child)
{
my_partition.set_affinity(*this);
my_partition.align_depth( d ); // TODO: move into constructor of partitioner
parent_.my_context = left_child;
}
static void run( const Range& range, Body& body, Partitioner& partitioner ) {
if( !range.empty() ) {
#if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
task::spawn_root_and_wait( *new(task::allocate_root()) start_reduce(range,&body,partitioner) );
#else
// Bound context prevents exceptions from body to affect nesting or sibling algorithms,
// and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
task_group_context context;
task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
#endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
}
}
#if __TBB_TASK_GROUP_CONTEXT
static void run( const Range& range, Body& body, Partitioner& partitioner, task_group_context& context ) {
if( !range.empty() )
task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
}
#endif /* __TBB_TASK_GROUP_CONTEXT */
//! Run body for range
void run_body( Range &r ) { (*my_body)( r ); }
//! spawn right task, serves as callback for partitioner
// TODO: remove code duplication from 'offer_work' methods
void offer_work(typename Partitioner::split_type& split_obj) {
task *tasks[2];
allocate_sibling(static_cast<task*>(this), tasks, sizeof(start_reduce), sizeof(finish_type));
new((void*)tasks[0]) finish_type(my_context);
new((void*)tasks[1]) start_reduce(*this, split_obj);
spawn(*tasks[1]);
}
//! spawn right task, serves as callback for partitioner
void offer_work(const Range& r, depth_t d = 0) {
task *tasks[2];
allocate_sibling(static_cast<task*>(this), tasks, sizeof(start_reduce), sizeof(finish_type));
new((void*)tasks[0]) finish_type(my_context);
new((void*)tasks[1]) start_reduce(*this, r, d);
spawn(*tasks[1]);
}
};
//! allocate right task with new parent
// TODO: 'inline' here is to avoid multiple definition error but for sake of code size this should not be inlined
inline void allocate_sibling(task* start_reduce_task, task *tasks[], size_t start_bytes, size_t finish_bytes) {
tasks[0] = &start_reduce_task->allocate_continuation().allocate(finish_bytes);
start_reduce_task->set_parent(tasks[0]);
tasks[0]->set_ref_count(2);
tasks[1] = &tasks[0]->allocate_child().allocate(start_bytes);
}
template<typename Range, typename Body, typename Partitioner>
task* start_reduce<Range,Body,Partitioner>::execute() {
my_partition.check_being_stolen( *this );
if( my_context==right_child ) {
finish_type* parent_ptr = static_cast<finish_type*>(parent());
if( !itt_load_word_with_acquire(parent_ptr->my_body) ) { // TODO: replace by is_stolen_task() or by parent_ptr->ref_count() == 2???
my_body = new( parent_ptr->zombie_space.begin() ) Body(*my_body,split());
parent_ptr->has_right_zombie = true;
}
} else __TBB_ASSERT(my_context==root_task,NULL);// because left leaf spawns right leafs without recycling
my_partition.execute(*this, my_range);
if( my_context==left_child ) {
finish_type* parent_ptr = static_cast<finish_type*>(parent());
__TBB_ASSERT(my_body!=parent_ptr->zombie_space.begin(),NULL);
itt_store_word_with_release(parent_ptr->my_body, my_body );
}
return NULL;
}
//! Task type used to combine the partial results of parallel_deterministic_reduce.
/** @ingroup algorithms */
template<typename Body>
class finish_deterministic_reduce: public task {
Body &my_left_body;
Body my_right_body;
finish_deterministic_reduce( Body &body ) :
my_left_body( body ),
my_right_body( body, split() )
{
}
task* execute() {
my_left_body.join( my_right_body );
return NULL;
}
template<typename Range,typename Body_>
friend class start_deterministic_reduce;
};
//! Task type used to split the work of parallel_deterministic_reduce.
/** @ingroup algorithms */
template<typename Range, typename Body>
class start_deterministic_reduce: public task {
typedef finish_deterministic_reduce<Body> finish_type;
Body &my_body;
Range my_range;
/*override*/ task* execute();
//! Constructor used for root task
start_deterministic_reduce( const Range& range, Body& body ) :
my_body( body ),
my_range( range )
{
}
//! Splitting constructor used to generate children.
/** parent_ becomes left child. Newly constructed object is right child. */
start_deterministic_reduce( start_deterministic_reduce& parent_, finish_type& c ) :
my_body( c.my_right_body ),
my_range( parent_.my_range, split() )
{
}
public:
static void run( const Range& range, Body& body ) {
if( !range.empty() ) {
#if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
task::spawn_root_and_wait( *new(task::allocate_root()) start_deterministic_reduce(range,&body) );
#else
// Bound context prevents exceptions from body to affect nesting or sibling algorithms,
// and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
task_group_context context;
task::spawn_root_and_wait( *new(task::allocate_root(context)) start_deterministic_reduce(range,body) );
#endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
}
}
#if __TBB_TASK_GROUP_CONTEXT
static void run( const Range& range, Body& body, task_group_context& context ) {
if( !range.empty() )
task::spawn_root_and_wait( *new(task::allocate_root(context)) start_deterministic_reduce(range,body) );
}
#endif /* __TBB_TASK_GROUP_CONTEXT */
};
template<typename Range, typename Body>
task* start_deterministic_reduce<Range,Body>::execute() {
if( !my_range.is_divisible() ) {
my_body( my_range );
return NULL;
} else {
finish_type& c = *new( allocate_continuation() ) finish_type( my_body );
recycle_as_child_of(c);
c.set_ref_count(2);
start_deterministic_reduce& b = *new( c.allocate_child() ) start_deterministic_reduce( *this, c );
task::spawn(b);
return this;
}
}
} // namespace internal
//! @endcond
} //namespace interfaceX
//! @cond INTERNAL
namespace internal {
using interface7::internal::start_reduce;
using interface7::internal::start_deterministic_reduce;
//! Auxiliary class for parallel_reduce; for internal use only.
/** The adaptor class that implements \ref parallel_reduce_body_req "parallel_reduce Body"
using given \ref parallel_reduce_lambda_req "anonymous function objects".
**/
/** @ingroup algorithms */
template<typename Range, typename Value, typename RealBody, typename Reduction>
class lambda_reduce_body {
//FIXME: decide if my_real_body, my_reduction, and identity_element should be copied or referenced
// (might require some performance measurements)
const Value& identity_element;
const RealBody& my_real_body;
const Reduction& my_reduction;
Value my_value;
lambda_reduce_body& operator= ( const lambda_reduce_body& other );
public:
lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction )
: identity_element(identity)
, my_real_body(body)
, my_reduction(reduction)
, my_value(identity)
{ }
lambda_reduce_body( const lambda_reduce_body& other )
: identity_element(other.identity_element)
, my_real_body(other.my_real_body)
, my_reduction(other.my_reduction)
, my_value(other.my_value)
{ }
lambda_reduce_body( lambda_reduce_body& other, tbb::split )
: identity_element(other.identity_element)
, my_real_body(other.my_real_body)
, my_reduction(other.my_reduction)
, my_value(other.identity_element)
{ }
void operator()(Range& range) {
my_value = my_real_body(range, const_cast<const Value&>(my_value));
}
void join( lambda_reduce_body& rhs ) {
my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value));
}
Value result() const {
return my_value;
}
};
} // namespace internal
//! @endcond
// Requirements on Range concept are documented in blocked_range.h
/** \page parallel_reduce_body_req Requirements on parallel_reduce body
Class \c Body implementing the concept of parallel_reduce body must define:
- \code Body::Body( Body&, split ); \endcode Splitting constructor.
Must be able to run concurrently with operator() and method \c join
- \code Body::~Body(); \endcode Destructor
- \code void Body::operator()( Range& r ); \endcode Function call operator applying body to range \c r
and accumulating the result
- \code void Body::join( Body& b ); \endcode Join results.
The result in \c b should be merged into the result of \c this
**/
/** \page parallel_reduce_lambda_req Requirements on parallel_reduce anonymous function objects (lambda functions)
TO BE DOCUMENTED
**/
/** \name parallel_reduce
See also requirements on \ref range_req "Range" and \ref parallel_reduce_body_req "parallel_reduce Body". **/
//@{
//! Parallel iteration with reduction and default partitioner.
/** @ingroup algorithms **/
template<typename Range, typename Body>
void parallel_reduce( const Range& range, Body& body ) {
internal::start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() );
}
//! Parallel iteration with reduction and simple_partitioner
/** @ingroup algorithms **/
template<typename Range, typename Body>
void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner );
}
//! Parallel iteration with reduction and auto_partitioner
/** @ingroup algorithms **/
template<typename Range, typename Body>
void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) {
internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner );
}
//! Parallel iteration with reduction and affinity_partitioner
/** @ingroup algorithms **/
template<typename Range, typename Body>
void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) {
internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner );
}
#if __TBB_TASK_GROUP_CONTEXT
//! Parallel iteration with reduction, simple partitioner and user-supplied context.
/** @ingroup algorithms **/
template<typename Range, typename Body>
void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context );
}
//! Parallel iteration with reduction, auto_partitioner and user-supplied context
/** @ingroup algorithms **/
template<typename Range, typename Body>
void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context );
}
//! Parallel iteration with reduction, affinity_partitioner and user-supplied context
/** @ingroup algorithms **/
template<typename Range, typename Body>
void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context );
}
#endif /* __TBB_TASK_GROUP_CONTEXT */
/** parallel_reduce overloads that work with anonymous function objects
(see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/
//! Parallel iteration with reduction and default partitioner.
/** @ingroup algorithms **/
template<typename Range, typename Value, typename RealBody, typename Reduction>
Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
::run(range, body, __TBB_DEFAULT_PARTITIONER() );
return body.result();
}
//! Parallel iteration with reduction and simple_partitioner.
/** @ingroup algorithms **/
template<typename Range, typename Value, typename RealBody, typename Reduction>
Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
const simple_partitioner& partitioner ) {
internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
::run(range, body, partitioner );
return body.result();
}
//! Parallel iteration with reduction and auto_partitioner
/** @ingroup algorithms **/
template<typename Range, typename Value, typename RealBody, typename Reduction>
Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
const auto_partitioner& partitioner ) {
internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
::run( range, body, partitioner );
return body.result();
}
//! Parallel iteration with reduction and affinity_partitioner
/** @ingroup algorithms **/
template<typename Range, typename Value, typename RealBody, typename Reduction>
Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
affinity_partitioner& partitioner ) {
internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
::run( range, body, partitioner );
return body.result();
}
#if __TBB_TASK_GROUP_CONTEXT
//! Parallel iteration with reduction, simple partitioner and user-supplied context.
/** @ingroup algorithms **/
template<typename Range, typename Value, typename RealBody, typename Reduction>
Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
const simple_partitioner& partitioner, task_group_context& context ) {
internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
::run( range, body, partitioner, context );
return body.result();
}
//! Parallel iteration with reduction, auto_partitioner and user-supplied context
/** @ingroup algorithms **/
template<typename Range, typename Value, typename RealBody, typename Reduction>
Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
const auto_partitioner& partitioner, task_group_context& context ) {
internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
::run( range, body, partitioner, context );
return body.result();
}
//! Parallel iteration with reduction, affinity_partitioner and user-supplied context
/** @ingroup algorithms **/
template<typename Range, typename Value, typename RealBody, typename Reduction>
Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
affinity_partitioner& partitioner, task_group_context& context ) {
internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
::run( range, body, partitioner, context );
return body.result();
}
#endif /* __TBB_TASK_GROUP_CONTEXT */
//! Parallel iteration with deterministic reduction and default partitioner.
/** @ingroup algorithms **/
template<typename Range, typename Body>
void parallel_deterministic_reduce( const Range& range, Body& body ) {
internal::start_deterministic_reduce<Range,Body>::run( range, body );
}
#if __TBB_TASK_GROUP_CONTEXT
//! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context.
/** @ingroup algorithms **/
template<typename Range, typename Body>
void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) {
internal::start_deterministic_reduce<Range,Body>::run( range, body, context );
}
#endif /* __TBB_TASK_GROUP_CONTEXT */
/** parallel_reduce overloads that work with anonymous function objects
(see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/
//! Parallel iteration with deterministic reduction and default partitioner.
/** @ingroup algorithms **/
template<typename Range, typename Value, typename RealBody, typename Reduction>
Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
internal::start_deterministic_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction> >
::run(range, body);
return body.result();
}
#if __TBB_TASK_GROUP_CONTEXT
//! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context.
/** @ingroup algorithms **/
template<typename Range, typename Value, typename RealBody, typename Reduction>
Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
task_group_context& context ) {
internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
internal::start_deterministic_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction> >
::run( range, body, context );
return body.result();
}
#endif /* __TBB_TASK_GROUP_CONTEXT */
//@}
} // namespace tbb
#endif /* __TBB_parallel_reduce_H */

View File

@@ -0,0 +1,346 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_parallel_scan_H
#define __TBB_parallel_scan_H
#include "task.h"
#include "aligned_space.h"
#include <new>
#include "partitioner.h"
namespace tbb {
//! Used to indicate that the initial scan is being performed.
/** @ingroup algorithms */
struct pre_scan_tag {
static bool is_final_scan() {return false;}
};
//! Used to indicate that the final scan is being performed.
/** @ingroup algorithms */
struct final_scan_tag {
static bool is_final_scan() {return true;}
};
//! @cond INTERNAL
namespace internal {
//! Performs final scan for a leaf
/** @ingroup algorithms */
template<typename Range, typename Body>
class final_sum: public task {
public:
Body my_body;
private:
aligned_space<Range> my_range;
//! Where to put result of last subrange, or NULL if not last subrange.
Body* my_stuff_last;
public:
final_sum( Body& body_ ) :
my_body(body_,split())
{
poison_pointer(my_stuff_last);
}
~final_sum() {
my_range.begin()->~Range();
}
void finish_construction( const Range& range_, Body* stuff_last_ ) {
new( my_range.begin() ) Range(range_);
my_stuff_last = stuff_last_;
}
private:
/*override*/ task* execute() {
my_body( *my_range.begin(), final_scan_tag() );
if( my_stuff_last )
my_stuff_last->assign(my_body);
return NULL;
}
};
//! Split work to be done in the scan.
/** @ingroup algorithms */
template<typename Range, typename Body>
class sum_node: public task {
typedef final_sum<Range,Body> final_sum_type;
public:
final_sum_type *my_incoming;
final_sum_type *my_body;
Body *my_stuff_last;
private:
final_sum_type *my_left_sum;
sum_node *my_left;
sum_node *my_right;
bool my_left_is_final;
Range my_range;
sum_node( const Range range_, bool left_is_final_ ) :
my_left_sum(NULL),
my_left(NULL),
my_right(NULL),
my_left_is_final(left_is_final_),
my_range(range_)
{
// Poison fields that will be set by second pass.
poison_pointer(my_body);
poison_pointer(my_incoming);
}
task* create_child( const Range& range_, final_sum_type& f, sum_node* n, final_sum_type* incoming_, Body* stuff_last_ ) {
if( !n ) {
f.recycle_as_child_of( *this );
f.finish_construction( range_, stuff_last_ );
return &f;
} else {
n->my_body = &f;
n->my_incoming = incoming_;
n->my_stuff_last = stuff_last_;
return n;
}
}
/*override*/ task* execute() {
if( my_body ) {
if( my_incoming )
my_left_sum->my_body.reverse_join( my_incoming->my_body );
recycle_as_continuation();
sum_node& c = *this;
task* b = c.create_child(Range(my_range,split()),*my_left_sum,my_right,my_left_sum,my_stuff_last);
task* a = my_left_is_final ? NULL : c.create_child(my_range,*my_body,my_left,my_incoming,NULL);
set_ref_count( (a!=NULL)+(b!=NULL) );
my_body = NULL;
if( a ) spawn(*b);
else a = b;
return a;
} else {
return NULL;
}
}
template<typename Range_,typename Body_,typename Partitioner_>
friend class start_scan;
template<typename Range_,typename Body_>
friend class finish_scan;
};
//! Combine partial results
/** @ingroup algorithms */
template<typename Range, typename Body>
class finish_scan: public task {
typedef sum_node<Range,Body> sum_node_type;
typedef final_sum<Range,Body> final_sum_type;
final_sum_type** const my_sum;
sum_node_type*& my_return_slot;
public:
final_sum_type* my_right_zombie;
sum_node_type& my_result;
/*override*/ task* execute() {
__TBB_ASSERT( my_result.ref_count()==(my_result.my_left!=NULL)+(my_result.my_right!=NULL), NULL );
if( my_result.my_left )
my_result.my_left_is_final = false;
if( my_right_zombie && my_sum )
((*my_sum)->my_body).reverse_join(my_result.my_left_sum->my_body);
__TBB_ASSERT( !my_return_slot, NULL );
if( my_right_zombie || my_result.my_right ) {
my_return_slot = &my_result;
} else {
destroy( my_result );
}
if( my_right_zombie && !my_sum && !my_result.my_right ) {
destroy(*my_right_zombie);
my_right_zombie = NULL;
}
return NULL;
}
finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) :
my_sum(sum_),
my_return_slot(return_slot_),
my_right_zombie(NULL),
my_result(result_)
{
__TBB_ASSERT( !my_return_slot, NULL );
}
};
//! Initial task to split the work
/** @ingroup algorithms */
template<typename Range, typename Body, typename Partitioner=simple_partitioner>
class start_scan: public task {
typedef sum_node<Range,Body> sum_node_type;
typedef final_sum<Range,Body> final_sum_type;
final_sum_type* my_body;
/** Non-null if caller is requesting total. */
final_sum_type** my_sum;
sum_node_type** my_return_slot;
/** Null if computing root. */
sum_node_type* my_parent_sum;
bool my_is_final;
bool my_is_right_child;
Range my_range;
typename Partitioner::partition_type my_partition;
/*override*/ task* execute();
public:
start_scan( sum_node_type*& return_slot_, start_scan& parent_, sum_node_type* parent_sum_ ) :
my_body(parent_.my_body),
my_sum(parent_.my_sum),
my_return_slot(&return_slot_),
my_parent_sum(parent_sum_),
my_is_final(parent_.my_is_final),
my_is_right_child(false),
my_range(parent_.my_range,split()),
my_partition(parent_.my_partition,split())
{
__TBB_ASSERT( !*my_return_slot, NULL );
}
start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_) :
my_body(&body_),
my_sum(NULL),
my_return_slot(&return_slot_),
my_parent_sum(NULL),
my_is_final(true),
my_is_right_child(false),
my_range(range_),
my_partition(partitioner_)
{
__TBB_ASSERT( !*my_return_slot, NULL );
}
static void run( const Range& range_, Body& body_, const Partitioner& partitioner_ ) {
if( !range_.empty() ) {
typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
internal::sum_node<Range,Body>* root = NULL;
typedef internal::final_sum<Range,Body> final_sum_type;
final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body_ );
start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type(
/*my_return_slot=*/root,
range_,
*temp_body,
partitioner_ );
task::spawn_root_and_wait( pass1 );
if( root ) {
root->my_body = temp_body;
root->my_incoming = NULL;
root->my_stuff_last = &body_;
task::spawn_root_and_wait( *root );
} else {
body_.assign(temp_body->my_body);
temp_body->finish_construction( range_, NULL );
temp_body->destroy(*temp_body);
}
}
}
};
template<typename Range, typename Body, typename Partitioner>
task* start_scan<Range,Body,Partitioner>::execute() {
typedef internal::finish_scan<Range,Body> finish_pass1_type;
finish_pass1_type* p = my_parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL;
// Inspecting p->result.left_sum would ordinarily be a race condition.
// But we inspect it only if we are not a stolen task, in which case we
// know that task assigning to p->result.left_sum has completed.
bool treat_as_stolen = my_is_right_child && (is_stolen_task() || my_body!=p->my_result.my_left_sum);
if( treat_as_stolen ) {
// Invocation is for right child that has been really stolen or needs to be virtually stolen
p->my_right_zombie = my_body = new( allocate_root() ) final_sum_type(my_body->my_body);
my_is_final = false;
}
task* next_task = NULL;
if( (my_is_right_child && !treat_as_stolen) || !my_range.is_divisible() || my_partition.should_execute_range(*this) ) {
if( my_is_final )
(my_body->my_body)( my_range, final_scan_tag() );
else if( my_sum )
(my_body->my_body)( my_range, pre_scan_tag() );
if( my_sum )
*my_sum = my_body;
__TBB_ASSERT( !*my_return_slot, NULL );
} else {
sum_node_type* result;
if( my_parent_sum )
result = new(allocate_additional_child_of(*my_parent_sum)) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
else
result = new(task::allocate_root()) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*my_return_slot,my_sum,*result);
// Split off right child
start_scan& b = *new( c.allocate_child() ) start_scan( /*my_return_slot=*/result->my_right, *this, result );
b.my_is_right_child = true;
// Left child is recycling of *this. Must recycle this before spawning b,
// otherwise b might complete and decrement c.ref_count() to zero, which
// would cause c.execute() to run prematurely.
recycle_as_child_of(c);
c.set_ref_count(2);
c.spawn(b);
my_sum = &result->my_left_sum;
my_return_slot = &result->my_left;
my_is_right_child = false;
next_task = this;
my_parent_sum = result;
__TBB_ASSERT( !*my_return_slot, NULL );
}
return next_task;
}
} // namespace internal
//! @endcond
// Requirements on Range concept are documented in blocked_range.h
/** \page parallel_scan_body_req Requirements on parallel_scan body
Class \c Body implementing the concept of parallel_scan body must define:
- \code Body::Body( Body&, split ); \endcode Splitting constructor.
Split \c b so that \c this and \c b can accumulate separately
- \code Body::~Body(); \endcode Destructor
- \code void Body::operator()( const Range& r, pre_scan_tag ); \endcode
Preprocess iterations for range \c r
- \code void Body::operator()( const Range& r, final_scan_tag ); \endcode
Do final processing for iterations of range \c r
- \code void Body::reverse_join( Body& a ); \endcode
Merge preprocessing state of \c a into \c this, where \c a was
created earlier from \c b by b's splitting constructor
**/
/** \name parallel_scan
See also requirements on \ref range_req "Range" and \ref parallel_scan_body_req "parallel_scan Body". **/
//@{
//! Parallel prefix with default partitioner
/** @ingroup algorithms **/
template<typename Range, typename Body>
void parallel_scan( const Range& range, Body& body ) {
internal::start_scan<Range,Body,__TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
}
//! Parallel prefix with simple_partitioner
/** @ingroup algorithms **/
template<typename Range, typename Body>
void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
internal::start_scan<Range,Body,simple_partitioner>::run(range,body,partitioner);
}
//! Parallel prefix with auto_partitioner
/** @ingroup algorithms **/
template<typename Range, typename Body>
void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
internal::start_scan<Range,Body,auto_partitioner>::run(range,body,partitioner);
}
//@}
} // namespace tbb
#endif /* __TBB_parallel_scan_H */

View File

@@ -0,0 +1,253 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_parallel_sort_H
#define __TBB_parallel_sort_H
#include "parallel_for.h"
#include "blocked_range.h"
#include "internal/_range_iterator.h"
#include <algorithm>
#include <iterator>
#include <functional>
namespace tbb {
//! @cond INTERNAL
namespace internal {
//! Range used in quicksort to split elements into subranges based on a value.
/** The split operation selects a splitter and places all elements less than or equal
to the value in the first range and the remaining elements in the second range.
@ingroup algorithms */
template<typename RandomAccessIterator, typename Compare>
class quick_sort_range: private no_assign {
inline size_t median_of_three(const RandomAccessIterator &array, size_t l, size_t m, size_t r) const {
return comp(array[l], array[m]) ? ( comp(array[m], array[r]) ? m : ( comp( array[l], array[r]) ? r : l ) )
: ( comp(array[r], array[m]) ? m : ( comp( array[r], array[l] ) ? r : l ) );
}
inline size_t pseudo_median_of_nine( const RandomAccessIterator &array, const quick_sort_range &range ) const {
size_t offset = range.size/8u;
return median_of_three(array,
median_of_three(array, 0, offset, offset*2),
median_of_three(array, offset*3, offset*4, offset*5),
median_of_three(array, offset*6, offset*7, range.size - 1) );
}
public:
static const size_t grainsize = 500;
const Compare &comp;
RandomAccessIterator begin;
size_t size;
quick_sort_range( RandomAccessIterator begin_, size_t size_, const Compare &comp_ ) :
comp(comp_), begin(begin_), size(size_) {}
bool empty() const {return size==0;}
bool is_divisible() const {return size>=grainsize;}
quick_sort_range( quick_sort_range& range, split ) : comp(range.comp) {
using std::swap;
RandomAccessIterator array = range.begin;
RandomAccessIterator key0 = range.begin;
size_t m = pseudo_median_of_nine(array, range);
if (m) swap ( array[0], array[m] );
size_t i=0;
size_t j=range.size;
// Partition interval [i+1,j-1] with key *key0.
for(;;) {
__TBB_ASSERT( i<j, NULL );
// Loop must terminate since array[l]==*key0.
do {
--j;
__TBB_ASSERT( i<=j, "bad ordering relation?" );
} while( comp( *key0, array[j] ));
do {
__TBB_ASSERT( i<=j, NULL );
if( i==j ) goto partition;
++i;
} while( comp( array[i],*key0 ));
if( i==j ) goto partition;
swap( array[i], array[j] );
}
partition:
// Put the partition key were it belongs
swap( array[j], *key0 );
// array[l..j) is less or equal to key.
// array(j..r) is greater or equal to key.
// array[j] is equal to key
i=j+1;
begin = array+i;
size = range.size-i;
range.size = j;
}
};
#if __TBB_TASK_GROUP_CONTEXT
//! Body class used to test if elements in a range are presorted
/** @ingroup algorithms */
template<typename RandomAccessIterator, typename Compare>
class quick_sort_pretest_body : internal::no_assign {
const Compare &comp;
public:
quick_sort_pretest_body(const Compare &_comp) : comp(_comp) {}
void operator()( const blocked_range<RandomAccessIterator>& range ) const {
task &my_task = task::self();
RandomAccessIterator my_end = range.end();
int i = 0;
for (RandomAccessIterator k = range.begin(); k != my_end; ++k, ++i) {
if ( i%64 == 0 && my_task.is_cancelled() ) break;
// The k-1 is never out-of-range because the first chunk starts at begin+serial_cutoff+1
if ( comp( *(k), *(k-1) ) ) {
my_task.cancel_group_execution();
break;
}
}
}
};
#endif /* __TBB_TASK_GROUP_CONTEXT */
//! Body class used to sort elements in a range that is smaller than the grainsize.
/** @ingroup algorithms */
template<typename RandomAccessIterator, typename Compare>
struct quick_sort_body {
void operator()( const quick_sort_range<RandomAccessIterator,Compare>& range ) const {
//SerialQuickSort( range.begin, range.size, range.comp );
std::sort( range.begin, range.begin + range.size, range.comp );
}
};
//! Wrapper method to initiate the sort by calling parallel_for.
/** @ingroup algorithms */
template<typename RandomAccessIterator, typename Compare>
void parallel_quick_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp ) {
#if __TBB_TASK_GROUP_CONTEXT
task_group_context my_context;
const int serial_cutoff = 9;
__TBB_ASSERT( begin + serial_cutoff < end, "min_parallel_size is smaller than serial cutoff?" );
RandomAccessIterator k;
for ( k = begin ; k != begin + serial_cutoff; ++k ) {
if ( comp( *(k+1), *k ) ) {
goto do_parallel_quick_sort;
}
}
parallel_for( blocked_range<RandomAccessIterator>(k+1, end),
quick_sort_pretest_body<RandomAccessIterator,Compare>(comp),
auto_partitioner(),
my_context);
if (my_context.is_group_execution_cancelled())
do_parallel_quick_sort:
#endif /* __TBB_TASK_GROUP_CONTEXT */
parallel_for( quick_sort_range<RandomAccessIterator,Compare>(begin, end-begin, comp ),
quick_sort_body<RandomAccessIterator,Compare>(),
auto_partitioner() );
}
} // namespace internal
//! @endcond
/** \page parallel_sort_iter_req Requirements on iterators for parallel_sort
Requirements on value type \c T of \c RandomAccessIterator for \c parallel_sort:
- \code void swap( T& x, T& y ) \endcode Swaps \c x and \c y
- \code bool Compare::operator()( const T& x, const T& y ) \endcode
True if x comes before y;
**/
/** \name parallel_sort
See also requirements on \ref parallel_sort_iter_req "iterators for parallel_sort". **/
//@{
//! Sorts the data in [begin,end) using the given comparator
/** The compare function object is used for all comparisons between elements during sorting.
The compare object must define a bool operator() function.
@ingroup algorithms **/
template<typename RandomAccessIterator, typename Compare>
void parallel_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp) {
const int min_parallel_size = 500;
if( end > begin ) {
if (end - begin < min_parallel_size) {
std::sort(begin, end, comp);
} else {
internal::parallel_quick_sort(begin, end, comp);
}
}
}
//! Sorts the data in [begin,end) with a default comparator \c std::less<RandomAccessIterator>
/** @ingroup algorithms **/
template<typename RandomAccessIterator>
inline void parallel_sort( RandomAccessIterator begin, RandomAccessIterator end ) {
parallel_sort( begin, end, std::less< typename std::iterator_traits<RandomAccessIterator>::value_type >() );
}
//! Sorts the data in rng using the given comparator
/** @ingroup algorithms **/
template<typename Range, typename Compare>
void parallel_sort(Range& rng, const Compare& comp) {
parallel_sort(tbb::internal::first(rng), tbb::internal::last(rng), comp);
}
//! Sorts the data in const rng using the given comparator
/** @ingroup algorithms **/
template<typename Range, typename Compare>
void parallel_sort(const Range& rng, const Compare& comp) {
parallel_sort(tbb::internal::first(rng), tbb::internal::last(rng), comp);
}
//! Sorts the data in rng with a default comparator \c std::less<RandomAccessIterator>
/** @ingroup algorithms **/
template<typename Range>
void parallel_sort(Range& rng) {
parallel_sort(tbb::internal::first(rng), tbb::internal::last(rng));
}
//! Sorts the data in const rng with a default comparator \c std::less<RandomAccessIterator>
/** @ingroup algorithms **/
template<typename Range>
void parallel_sort(const Range& rng) {
parallel_sort(tbb::internal::first(rng), tbb::internal::last(rng));
}
//! Sorts the data in the range \c [begin,end) with a default comparator \c std::less<T>
/** @ingroup algorithms **/
template<typename T>
inline void parallel_sort( T * begin, T * end ) {
parallel_sort( begin, end, std::less< T >() );
}
//@}
} // namespace tbb
#endif

View File

@@ -0,0 +1,186 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_parallel_while
#define __TBB_parallel_while
#include "task.h"
#include <new>
namespace tbb {
template<typename Body>
class parallel_while;
//! @cond INTERNAL
namespace internal {
template<typename Stream, typename Body> class while_task;
//! For internal use only.
/** Executes one iteration of a while.
@ingroup algorithms */
template<typename Body>
class while_iteration_task: public task {
const Body& my_body;
typename Body::argument_type my_value;
/*override*/ task* execute() {
my_body(my_value);
return NULL;
}
while_iteration_task( const typename Body::argument_type& value, const Body& body ) :
my_body(body), my_value(value)
{}
template<typename Body_> friend class while_group_task;
friend class tbb::parallel_while<Body>;
};
//! For internal use only
/** Unpacks a block of iterations.
@ingroup algorithms */
template<typename Body>
class while_group_task: public task {
static const size_t max_arg_size = 4;
const Body& my_body;
size_t size;
typename Body::argument_type my_arg[max_arg_size];
while_group_task( const Body& body ) : my_body(body), size(0) {}
/*override*/ task* execute() {
typedef while_iteration_task<Body> iteration_type;
__TBB_ASSERT( size>0, NULL );
task_list list;
task* t;
size_t k=0;
for(;;) {
t = new( allocate_child() ) iteration_type(my_arg[k],my_body);
if( ++k==size ) break;
list.push_back(*t);
}
set_ref_count(int(k+1));
spawn(list);
spawn_and_wait_for_all(*t);
return NULL;
}
template<typename Stream, typename Body_> friend class while_task;
};
//! For internal use only.
/** Gets block of iterations from a stream and packages them into a while_group_task.
@ingroup algorithms */
template<typename Stream, typename Body>
class while_task: public task {
Stream& my_stream;
const Body& my_body;
empty_task& my_barrier;
/*override*/ task* execute() {
typedef while_group_task<Body> block_type;
block_type& t = *new( allocate_additional_child_of(my_barrier) ) block_type(my_body);
size_t k=0;
while( my_stream.pop_if_present(t.my_arg[k]) ) {
if( ++k==block_type::max_arg_size ) {
// There might be more iterations.
recycle_to_reexecute();
break;
}
}
if( k==0 ) {
destroy(t);
return NULL;
} else {
t.size = k;
return &t;
}
}
while_task( Stream& stream, const Body& body, empty_task& barrier ) :
my_stream(stream),
my_body(body),
my_barrier(barrier)
{}
friend class tbb::parallel_while<Body>;
};
} // namespace internal
//! @endcond
//! Parallel iteration over a stream, with optional addition of more work.
/** The Body b has the requirement: \n
"b(v)" \n
"b.argument_type" \n
where v is an argument_type
@ingroup algorithms */
template<typename Body>
class parallel_while: internal::no_copy {
public:
//! Construct empty non-running parallel while.
parallel_while() : my_body(NULL), my_barrier(NULL) {}
//! Destructor cleans up data members before returning.
~parallel_while() {
if( my_barrier ) {
my_barrier->destroy(*my_barrier);
my_barrier = NULL;
}
}
//! Type of items
typedef typename Body::argument_type value_type;
//! Apply body.apply to each item in the stream.
/** A Stream s has the requirements \n
"S::value_type" \n
"s.pop_if_present(value) is convertible to bool */
template<typename Stream>
void run( Stream& stream, const Body& body );
//! Add a work item while running.
/** Should be executed only by body.apply or a thread spawned therefrom. */
void add( const value_type& item );
private:
const Body* my_body;
empty_task* my_barrier;
};
template<typename Body>
template<typename Stream>
void parallel_while<Body>::run( Stream& stream, const Body& body ) {
using namespace internal;
empty_task& barrier = *new( task::allocate_root() ) empty_task();
my_body = &body;
my_barrier = &barrier;
my_barrier->set_ref_count(2);
while_task<Stream,Body>& w = *new( my_barrier->allocate_child() ) while_task<Stream,Body>( stream, body, barrier );
my_barrier->spawn_and_wait_for_all(w);
my_barrier->destroy(*my_barrier);
my_barrier = NULL;
my_body = NULL;
}
template<typename Body>
void parallel_while<Body>::add( const value_type& item ) {
__TBB_ASSERT(my_barrier,"attempt to add to parallel_while that is not running");
typedef internal::while_iteration_task<Body> iteration_type;
iteration_type& i = *new( task::allocate_additional_child_of(*my_barrier) ) iteration_type(item,*my_body);
task::self().spawn( i );
}
} // namespace
#endif /* __TBB_parallel_while */

View File

@@ -0,0 +1,633 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_partitioner_H
#define __TBB_partitioner_H
#ifndef __TBB_INITIAL_CHUNKS
// initial task divisions per thread
#define __TBB_INITIAL_CHUNKS 2
#endif
#ifndef __TBB_RANGE_POOL_CAPACITY
// maximum number of elements in range pool
#define __TBB_RANGE_POOL_CAPACITY 8
#endif
#ifndef __TBB_INIT_DEPTH
// initial value for depth of range pool
#define __TBB_INIT_DEPTH 5
#endif
#ifndef __TBB_DEMAND_DEPTH_ADD
// when imbalance is found range splits this value times more
#define __TBB_DEMAND_DEPTH_ADD 2
#endif
#ifndef __TBB_STATIC_THRESHOLD
// necessary number of clocks for the work to be distributed among all tasks
#define __TBB_STATIC_THRESHOLD 40000
#endif
#if __TBB_DEFINE_MIC
#define __TBB_NONUNIFORM_TASK_CREATION 1
#ifdef __TBB_machine_time_stamp
#define __TBB_USE_MACHINE_TIME_STAMPS 1
#define __TBB_task_duration() __TBB_STATIC_THRESHOLD
#endif // __TBB_machine_time_stamp
#endif // __TBB_DEFINE_MIC
#include "task.h"
#include "aligned_space.h"
#include "atomic.h"
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
// Workaround for overzealous compiler warnings
#pragma warning (push)
#pragma warning (disable: 4244)
#endif
namespace tbb {
class auto_partitioner;
class simple_partitioner;
class affinity_partitioner;
namespace interface7 {
namespace internal {
class affinity_partition_type;
}
}
namespace internal { //< @cond INTERNAL
size_t __TBB_EXPORTED_FUNC get_initial_auto_partitioner_divisor();
//! Defines entry point for affinity partitioner into tbb run-time library.
class affinity_partitioner_base_v3: no_copy {
friend class tbb::affinity_partitioner;
friend class tbb::interface7::internal::affinity_partition_type;
//! Array that remembers affinities of tree positions to affinity_id.
/** NULL if my_size==0. */
affinity_id* my_array;
//! Number of elements in my_array.
size_t my_size;
//! Zeros the fields.
affinity_partitioner_base_v3() : my_array(NULL), my_size(0) {}
//! Deallocates my_array.
~affinity_partitioner_base_v3() {resize(0);}
//! Resize my_array.
/** Retains values if resulting size is the same. */
void __TBB_EXPORTED_METHOD resize( unsigned factor );
};
//! Provides backward-compatible methods for partition objects without affinity.
class partition_type_base {
public:
void set_affinity( task & ) {}
void note_affinity( task::affinity_id ) {}
task* continue_after_execute_range() {return NULL;}
bool decide_whether_to_delay() {return false;}
void spawn_or_delay( bool, task& b ) {
task::spawn(b);
}
};
template<typename Range, typename Body, typename Partitioner> class start_scan;
} //< namespace internal @endcond
namespace serial {
namespace interface7 {
template<typename Range, typename Body, typename Partitioner> class start_for;
}
}
namespace interface7 {
//! @cond INTERNAL
namespace internal {
using namespace tbb::internal;
template<typename Range, typename Body, typename Partitioner> class start_for;
template<typename Range, typename Body, typename Partitioner> class start_reduce;
//! Join task node that contains shared flag for stealing feedback
class flag_task: public task {
public:
tbb::atomic<bool> my_child_stolen;
flag_task() { my_child_stolen = false; }
task* execute() { return NULL; }
static void mark_task_stolen(task &t) {
tbb::atomic<bool> &flag = static_cast<flag_task*>(t.parent())->my_child_stolen;
#if TBB_USE_THREADING_TOOLS
// Threading tools respect lock prefix but report false-positive data-race via plain store
flag.fetch_and_store<release>(true);
#else
flag = true;
#endif //TBB_USE_THREADING_TOOLS
}
static bool is_peer_stolen(task &t) {
return static_cast<flag_task*>(t.parent())->my_child_stolen;
}
};
//! Depth is a relative depth of recursive division inside a range pool. Relative depth allows
//! infinite absolute depth of the recursion for heavily unbalanced workloads with range represented
//! by a number that cannot fit into machine word.
typedef unsigned char depth_t;
//! Range pool stores ranges of type T in a circular buffer with MaxCapacity
template <typename T, depth_t MaxCapacity>
class range_vector {
depth_t my_head;
depth_t my_tail;
depth_t my_size;
depth_t my_depth[MaxCapacity]; // relative depths of stored ranges
tbb::aligned_space<T, MaxCapacity> my_pool;
public:
//! initialize via first range in pool
range_vector(const T& elem) : my_head(0), my_tail(0), my_size(1) {
my_depth[0] = 0;
new( static_cast<void *>(my_pool.begin()) ) T(elem);//TODO: std::move?
}
~range_vector() {
while( !empty() ) pop_back();
}
bool empty() const { return my_size == 0; }
depth_t size() const { return my_size; }
//! Populates range pool via ranges up to max depth or while divisible
//! max_depth starts from 0, e.g. value 2 makes 3 ranges in the pool up to two 1/4 pieces
void split_to_fill(depth_t max_depth) {
while( my_size < MaxCapacity && is_divisible(max_depth) ) {
depth_t prev = my_head;
my_head = (my_head + 1) % MaxCapacity;
new(my_pool.begin()+my_head) T(my_pool.begin()[prev]); // copy TODO: std::move?
my_pool.begin()[prev].~T(); // instead of assignment
new(my_pool.begin()+prev) T(my_pool.begin()[my_head], split()); // do 'inverse' split
my_depth[my_head] = ++my_depth[prev];
my_size++;
}
}
void pop_back() {
__TBB_ASSERT(my_size > 0, "range_vector::pop_back() with empty size");
my_pool.begin()[my_head].~T();
my_size--;
my_head = (my_head + MaxCapacity - 1) % MaxCapacity;
}
void pop_front() {
__TBB_ASSERT(my_size > 0, "range_vector::pop_front() with empty size");
my_pool.begin()[my_tail].~T();
my_size--;
my_tail = (my_tail + 1) % MaxCapacity;
}
T& back() {
__TBB_ASSERT(my_size > 0, "range_vector::back() with empty size");
return my_pool.begin()[my_head];
}
T& front() {
__TBB_ASSERT(my_size > 0, "range_vector::front() with empty size");
return my_pool.begin()[my_tail];
}
//! similarly to front(), returns depth of the first range in the pool
depth_t front_depth() {
__TBB_ASSERT(my_size > 0, "range_vector::front_depth() with empty size");
return my_depth[my_tail];
}
depth_t back_depth() {
__TBB_ASSERT(my_size > 0, "range_vector::back_depth() with empty size");
return my_depth[my_head];
}
bool is_divisible(depth_t max_depth) {
return back_depth() < max_depth && back().is_divisible();
}
};
//! Provides default methods for partition objects and common algorithm blocks.
template <typename Partition>
struct partition_type_base {
typedef split split_type;
// decision makers
void set_affinity( task & ) {}
void note_affinity( task::affinity_id ) {}
bool check_being_stolen(task &) { return false; } // part of old should_execute_range()
bool check_for_demand(task &) { return false; }
bool is_divisible() { return true; } // part of old should_execute_range()
depth_t max_depth() { return 0; }
void align_depth(depth_t) { }
template <typename Range> split_type get_split() { return split(); }
// common function blocks
Partition& self() { return *static_cast<Partition*>(this); } // CRTP helper
template<typename StartType, typename Range>
void execute(StartType &start, Range &range) {
// The algorithm in a few words ([]-denotes calls to decision methods of partitioner):
// [If this task is stolen, adjust depth and divisions if necessary, set flag].
// If range is divisible {
// Spread the work while [initial divisions left];
// Create trap task [if necessary];
// }
// If not divisible or [max depth is reached], execute, else do the range pool part
if ( range.is_divisible() ) {
if ( self().is_divisible() ) {
do { // split until is divisible
typename Partition::split_type split_obj = self().template get_split<Range>();
start.offer_work( split_obj );
} while ( range.is_divisible() && self().is_divisible() );
}
}
if( !range.is_divisible() || !self().max_depth() )
start.run_body( range ); // simple partitioner goes always here
else { // do range pool
internal::range_vector<Range, Partition::range_pool_size> range_pool(range);
do {
range_pool.split_to_fill(self().max_depth()); // fill range pool
if( self().check_for_demand( start ) ) {
if( range_pool.size() > 1 ) {
start.offer_work( range_pool.front(), range_pool.front_depth() );
range_pool.pop_front();
continue;
}
if( range_pool.is_divisible(self().max_depth()) ) // was not enough depth to fork a task
continue; // note: next split_to_fill() should split range at least once
}
start.run_body( range_pool.back() );
range_pool.pop_back();
} while( !range_pool.empty() && !start.is_cancelled() );
}
}
};
//! Provides default methods for auto (adaptive) partition objects.
template <typename Partition>
struct adaptive_partition_type_base : partition_type_base<Partition> {
size_t my_divisor;
depth_t my_max_depth;
static const unsigned factor = 1;
adaptive_partition_type_base() : my_max_depth(__TBB_INIT_DEPTH) {
my_divisor = tbb::internal::get_initial_auto_partitioner_divisor() / 4 * Partition::factor;
__TBB_ASSERT(my_divisor, "initial value of get_initial_auto_partitioner_divisor() is not valid");
}
adaptive_partition_type_base(adaptive_partition_type_base &src, split) {
my_max_depth = src.my_max_depth;
#if TBB_USE_ASSERT
size_t old_divisor = src.my_divisor;
#endif
#if __TBB_INITIAL_TASK_IMBALANCE
if( src.my_divisor <= 1 ) my_divisor = 0;
else my_divisor = src.my_divisor = (src.my_divisor + 1u) / 2u;
#else
my_divisor = src.my_divisor / 2u;
src.my_divisor = src.my_divisor - my_divisor; // TODO: check the effect separately
if (my_divisor) src.my_max_depth += static_cast<depth_t>(__TBB_Log2(src.my_divisor / my_divisor));
#endif
// For affinity_partitioner, my_divisor indicates the number of affinity array indices the task reserves.
// A task which has only one index must produce the right split without reserved index in order to avoid
// it to be overwritten in note_affinity() of the created (right) task.
// I.e. a task created deeper than the affinity array can remember must not save its affinity (LIFO order)
__TBB_ASSERT( (old_divisor <= 1 && my_divisor == 0) ||
(old_divisor > 1 && my_divisor != 0), NULL);
}
adaptive_partition_type_base(adaptive_partition_type_base &src, const proportional_split& split_obj) {
my_max_depth = src.my_max_depth;
#if __TBB_ENABLE_RANGE_FEEDBACK
my_divisor = size_t(float(src.my_divisor) * float(split_obj.right())
/ float(split_obj.left() + split_obj.right()));
#else
my_divisor = split_obj.right() * Partition::factor;
#endif
src.my_divisor -= my_divisor;
}
bool check_being_stolen( task &t) { // part of old should_execute_range()
if( !my_divisor ) { // if not from the top P tasks of binary tree
my_divisor = 1; // TODO: replace by on-stack flag (partition_state's member)?
if( t.is_stolen_task() && t.parent()->ref_count() >= 2 ) { // runs concurrently with the left task
#if TBB_USE_EXCEPTIONS
// RTTI is available, check whether the cast is valid
__TBB_ASSERT(dynamic_cast<flag_task*>(t.parent()), 0);
// correctness of the cast relies on avoiding the root task for which:
// - initial value of my_divisor != 0 (protected by separate assertion)
// - is_stolen_task() always returns false for the root task.
#endif
flag_task::mark_task_stolen(t);
if( !my_max_depth ) my_max_depth++;
my_max_depth += __TBB_DEMAND_DEPTH_ADD;
return true;
}
}
return false;
}
void align_depth(depth_t base) {
__TBB_ASSERT(base <= my_max_depth, 0);
my_max_depth -= base;
}
depth_t max_depth() { return my_max_depth; }
};
//! Helper that enables one or the other code branches (see example in is_splittable_in_proportion)
template<bool C, typename T = void> struct enable_if { typedef T type; };
template<typename T> struct enable_if<false, T> { };
//! Class determines whether template parameter has static boolean constant
//! 'is_splittable_in_proportion' initialized with value of 'true' or not.
/** If template parameter has such field that has been initialized with non-zero
* value then class field will be set to 'true', otherwise - 'false'
*/
template <typename Range>
class is_splittable_in_proportion {
private:
typedef char yes[1];
typedef char no [2];
template <typename range_type> static yes& decide(typename enable_if<range_type::is_splittable_in_proportion>::type *);
template <typename range_type> static no& decide(...);
public:
// equals to 'true' if and only if static const variable 'is_splittable_in_proportion' of template parameter
// initialized with the value of 'true'
static const bool value = (sizeof(decide<Range>(0)) == sizeof(yes));
};
//! Provides default methods for affinity (adaptive) partition objects.
class affinity_partition_type : public adaptive_partition_type_base<affinity_partition_type> {
static const unsigned factor_power = 4;
enum {
start = 0,
run,
pass
} my_delay;
#ifdef __TBB_USE_MACHINE_TIME_STAMPS
machine_tsc_t my_dst_tsc;
#endif
size_t my_begin;
tbb::internal::affinity_id* my_array;
public:
static const unsigned factor = 1 << factor_power; // number of slots in affinity array per task
typedef proportional_split split_type;
affinity_partition_type( tbb::internal::affinity_partitioner_base_v3& ap )
: adaptive_partition_type_base<affinity_partition_type>(), my_delay(start)
#ifdef __TBB_USE_MACHINE_TIME_STAMPS
, my_dst_tsc(0)
#endif
{
__TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two" );
ap.resize(factor);
my_array = ap.my_array;
my_begin = 0;
my_max_depth = factor_power + 1; // the first factor_power ranges will be spawned, and >=1 ranges should be left
__TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, 0 );
}
affinity_partition_type(affinity_partition_type& p, split)
: adaptive_partition_type_base<affinity_partition_type>(p, split()),
my_delay(pass),
#ifdef __TBB_USE_MACHINE_TIME_STAMPS
my_dst_tsc(0),
#endif
my_array(p.my_array) {
// the sum of the divisors represents original value of p.my_divisor before split
__TBB_ASSERT(my_divisor + p.my_divisor <= factor, NULL);
my_begin = p.my_begin + p.my_divisor;
}
affinity_partition_type(affinity_partition_type& p, const proportional_split& split_obj)
: adaptive_partition_type_base<affinity_partition_type>(p, split_obj),
my_delay(start),
#ifdef __TBB_USE_MACHINE_TIME_STAMPS
my_dst_tsc(0),
#endif
my_array(p.my_array) {
size_t total_divisor = my_divisor + p.my_divisor;
__TBB_ASSERT(total_divisor % factor == 0, NULL);
my_divisor = (my_divisor + factor/2) & (0u - factor);
#if __TBB_ENABLE_RANGE_FEEDBACK
if (!my_divisor)
my_divisor = factor;
else if (my_divisor == total_divisor)
my_divisor = total_divisor - factor;
#endif
p.my_divisor = total_divisor - my_divisor;
__TBB_ASSERT(my_divisor && p.my_divisor, NULL);
my_begin = p.my_begin + p.my_divisor;
}
void set_affinity( task &t ) {
if( my_divisor ) {
if( !my_array[my_begin] )
// TODO: consider code reuse for static_paritioner
t.set_affinity( affinity_id(my_begin / factor + 1) );
else
t.set_affinity( my_array[my_begin] );
}
}
void note_affinity( task::affinity_id id ) {
if( my_divisor )
my_array[my_begin] = id;
}
bool check_for_demand( task &t ) {
if( pass == my_delay ) {
if( my_divisor > 1 ) // produce affinitized tasks while they have slot in array
return true; // do not do my_max_depth++ here, but be sure range_pool is splittable once more
else if( my_divisor && my_max_depth ) { // make balancing task
my_divisor = 0; // once for each task; depth will be decreased in align_depth()
return true;
}
else if( flag_task::is_peer_stolen(t) ) {
my_max_depth += __TBB_DEMAND_DEPTH_ADD;
return true;
}
} else if( start == my_delay ) {
#ifndef __TBB_USE_MACHINE_TIME_STAMPS
my_delay = pass;
#else
my_dst_tsc = __TBB_machine_time_stamp() + __TBB_task_duration();
my_delay = run;
} else if( run == my_delay ) {
if( __TBB_machine_time_stamp() < my_dst_tsc ) {
__TBB_ASSERT(my_max_depth > 0, NULL);
return false;
}
my_delay = pass;
return true;
#endif // __TBB_USE_MACHINE_TIME_STAMPS
}
return false;
}
bool is_divisible() { // part of old should_execute_range()
return my_divisor > factor;
}
#if _MSC_VER && !defined(__INTEL_COMPILER)
// Suppress "conditional expression is constant" warning.
#pragma warning( push )
#pragma warning( disable: 4127 )
#endif
template <typename Range>
split_type get_split() {
if (is_splittable_in_proportion<Range>::value) {
size_t size = my_divisor / factor;
#if __TBB_NONUNIFORM_TASK_CREATION
size_t right = (size + 2) / 3;
#else
size_t right = size / 2;
#endif
size_t left = size - right;
return split_type(left, right);
} else {
return split_type(1, 1);
}
}
#if _MSC_VER && !defined(__INTEL_COMPILER)
#pragma warning( pop )
#endif // warning 4127 is back
static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY;
};
class auto_partition_type: public adaptive_partition_type_base<auto_partition_type> {
public:
auto_partition_type( const auto_partitioner& ) {
my_divisor *= __TBB_INITIAL_CHUNKS;
}
auto_partition_type( auto_partition_type& src, split)
: adaptive_partition_type_base<auto_partition_type>(src, split()) {}
bool is_divisible() { // part of old should_execute_range()
if( my_divisor > 1 ) return true;
if( my_divisor && my_max_depth ) { // can split the task. TODO: on-stack flag instead
// keep same fragmentation while splitting for the local task pool
my_max_depth--;
my_divisor = 0; // decrease max_depth once per task
return true;
} else return false;
}
bool check_for_demand(task &t) {
if( flag_task::is_peer_stolen(t) ) {
my_max_depth += __TBB_DEMAND_DEPTH_ADD;
return true;
} else return false;
}
static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY;
};
class simple_partition_type: public partition_type_base<simple_partition_type> {
public:
simple_partition_type( const simple_partitioner& ) {}
simple_partition_type( const simple_partition_type&, split ) {}
//! simplified algorithm
template<typename StartType, typename Range>
void execute(StartType &start, Range &range) {
split_type split_obj = split(); // start.offer_work accepts split_type as reference
while( range.is_divisible() )
start.offer_work( split_obj );
start.run_body( range );
}
//static const unsigned range_pool_size = 1; - not necessary because execute() is overridden
};
//! Backward-compatible partition for auto and affinity partition objects.
class old_auto_partition_type: public tbb::internal::partition_type_base {
size_t num_chunks;
static const size_t VICTIM_CHUNKS = 4;
public:
bool should_execute_range(const task &t) {
if( num_chunks<VICTIM_CHUNKS && t.is_stolen_task() )
num_chunks = VICTIM_CHUNKS;
return num_chunks==1;
}
old_auto_partition_type( const auto_partitioner& )
: num_chunks(internal::get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {}
old_auto_partition_type( const affinity_partitioner& )
: num_chunks(internal::get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {}
old_auto_partition_type( old_auto_partition_type& pt, split ) {
num_chunks = pt.num_chunks = (pt.num_chunks+1u) / 2u;
}
};
} // namespace interfaceX::internal
//! @endcond
} // namespace interfaceX
//! A simple partitioner
/** Divides the range until the range is not divisible.
@ingroup algorithms */
class simple_partitioner {
public:
simple_partitioner() {}
private:
template<typename Range, typename Body, typename Partitioner> friend class serial::interface7::start_for;
template<typename Range, typename Body, typename Partitioner> friend class interface7::internal::start_for;
template<typename Range, typename Body, typename Partitioner> friend class interface7::internal::start_reduce;
template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
// backward compatibility
class partition_type: public internal::partition_type_base {
public:
bool should_execute_range(const task& ) {return false;}
partition_type( const simple_partitioner& ) {}
partition_type( const partition_type&, split ) {}
};
// new implementation just extends existing interface
typedef interface7::internal::simple_partition_type task_partition_type;
// TODO: consider to make split_type public
typedef interface7::internal::simple_partition_type::split_type split_type;
};
//! An auto partitioner
/** The range is initial divided into several large chunks.
Chunks are further subdivided into smaller pieces if demand detected and they are divisible.
@ingroup algorithms */
class auto_partitioner {
public:
auto_partitioner() {}
private:
template<typename Range, typename Body, typename Partitioner> friend class serial::interface7::start_for;
template<typename Range, typename Body, typename Partitioner> friend class interface7::internal::start_for;
template<typename Range, typename Body, typename Partitioner> friend class interface7::internal::start_reduce;
template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
// backward compatibility
typedef interface7::internal::old_auto_partition_type partition_type;
// new implementation just extends existing interface
typedef interface7::internal::auto_partition_type task_partition_type;
// TODO: consider to make split_type public
typedef interface7::internal::auto_partition_type::split_type split_type;
};
//! An affinity partitioner
class affinity_partitioner: internal::affinity_partitioner_base_v3 {
public:
affinity_partitioner() {}
private:
template<typename Range, typename Body, typename Partitioner> friend class serial::interface7::start_for;
template<typename Range, typename Body, typename Partitioner> friend class interface7::internal::start_for;
template<typename Range, typename Body, typename Partitioner> friend class interface7::internal::start_reduce;
template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
// backward compatibility - for parallel_scan only
typedef interface7::internal::old_auto_partition_type partition_type;
// new implementation just extends existing interface
typedef interface7::internal::affinity_partition_type task_partition_type;
// TODO: consider to make split_type public
typedef interface7::internal::affinity_partition_type::split_type split_type;
};
} // namespace tbb
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
#pragma warning (pop)
#endif // warning 4244 is back
#undef __TBB_INITIAL_CHUNKS
#undef __TBB_RANGE_POOL_CAPACITY
#undef __TBB_INIT_DEPTH
#endif /* __TBB_partitioner_H */

664
Research/inc/tbb/pipeline.h Normal file
View File

@@ -0,0 +1,664 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_pipeline_H
#define __TBB_pipeline_H
#include "atomic.h"
#include "task.h"
#include "tbb_allocator.h"
#include <cstddef>
#if __TBB_CPP11_TYPE_PROPERTIES_PRESENT || __TBB_TR1_TYPE_PROPERTIES_IN_STD_PRESENT
#include <type_traits>
#endif
namespace tbb {
class pipeline;
class filter;
//! @cond INTERNAL
namespace internal {
// The argument for PIPELINE_VERSION should be an integer between 2 and 9
#define __TBB_PIPELINE_VERSION(x) ((unsigned char)(x-2)<<1)
typedef unsigned long Token;
typedef long tokendiff_t;
class stage_task;
class input_buffer;
class pipeline_root_task;
class pipeline_cleaner;
} // namespace internal
namespace interface6 {
template<typename T, typename U> class filter_t;
namespace internal {
class pipeline_proxy;
}
}
//! @endcond
//! A stage in a pipeline.
/** @ingroup algorithms */
class filter: internal::no_copy {
private:
//! Value used to mark "not in pipeline"
static filter* not_in_pipeline() {return reinterpret_cast<filter*>(intptr_t(-1));}
protected:
//! The lowest bit 0 is for parallel vs. serial
static const unsigned char filter_is_serial = 0x1;
//! 4th bit distinguishes ordered vs unordered filters.
/** The bit was not set for parallel filters in TBB 2.1 and earlier,
but is_ordered() function always treats parallel filters as out of order. */
static const unsigned char filter_is_out_of_order = 0x1<<4;
//! 5th bit distinguishes thread-bound and regular filters.
static const unsigned char filter_is_bound = 0x1<<5;
//! 6th bit marks input filters emitting small objects
static const unsigned char filter_may_emit_null = 0x1<<6;
//! 7th bit defines exception propagation mode expected by the application.
static const unsigned char exact_exception_propagation =
#if TBB_USE_CAPTURED_EXCEPTION
0x0;
#else
0x1<<7;
#endif /* TBB_USE_CAPTURED_EXCEPTION */
static const unsigned char current_version = __TBB_PIPELINE_VERSION(5);
static const unsigned char version_mask = 0x7<<1; // bits 1-3 are for version
public:
enum mode {
//! processes multiple items in parallel and in no particular order
parallel = current_version | filter_is_out_of_order,
//! processes items one at a time; all such filters process items in the same order
serial_in_order = current_version | filter_is_serial,
//! processes items one at a time and in no particular order
serial_out_of_order = current_version | filter_is_serial | filter_is_out_of_order,
//! @deprecated use serial_in_order instead
serial = serial_in_order
};
protected:
filter( bool is_serial_ ) :
next_filter_in_pipeline(not_in_pipeline()),
my_input_buffer(NULL),
my_filter_mode(static_cast<unsigned char>((is_serial_ ? serial : parallel) | exact_exception_propagation)),
prev_filter_in_pipeline(not_in_pipeline()),
my_pipeline(NULL),
next_segment(NULL)
{}
filter( mode filter_mode ) :
next_filter_in_pipeline(not_in_pipeline()),
my_input_buffer(NULL),
my_filter_mode(static_cast<unsigned char>(filter_mode | exact_exception_propagation)),
prev_filter_in_pipeline(not_in_pipeline()),
my_pipeline(NULL),
next_segment(NULL)
{}
// signal end-of-input for concrete_filters
void __TBB_EXPORTED_METHOD set_end_of_input();
public:
//! True if filter is serial.
bool is_serial() const {
return bool( my_filter_mode & filter_is_serial );
}
//! True if filter must receive stream in order.
bool is_ordered() const {
return (my_filter_mode & (filter_is_out_of_order|filter_is_serial))==filter_is_serial;
}
//! True if filter is thread-bound.
bool is_bound() const {
return ( my_filter_mode & filter_is_bound )==filter_is_bound;
}
//! true if an input filter can emit null
bool object_may_be_null() {
return ( my_filter_mode & filter_may_emit_null ) == filter_may_emit_null;
}
//! Operate on an item from the input stream, and return item for output stream.
/** Returns NULL if filter is a sink. */
virtual void* operator()( void* item ) = 0;
//! Destroy filter.
/** If the filter was added to a pipeline, the pipeline must be destroyed first. */
virtual __TBB_EXPORTED_METHOD ~filter();
#if __TBB_TASK_GROUP_CONTEXT
//! Destroys item if pipeline was cancelled.
/** Required to prevent memory leaks.
Note it can be called concurrently even for serial filters.*/
virtual void finalize( void* /*item*/ ) {};
#endif
private:
//! Pointer to next filter in the pipeline.
filter* next_filter_in_pipeline;
//! has the filter not yet processed all the tokens it will ever see?
// (pipeline has not yet reached end_of_input or this filter has not yet
// seen the last token produced by input_filter)
bool has_more_work();
//! Buffer for incoming tokens, or NULL if not required.
/** The buffer is required if the filter is serial or follows a thread-bound one. */
internal::input_buffer* my_input_buffer;
friend class internal::stage_task;
friend class internal::pipeline_root_task;
friend class pipeline;
friend class thread_bound_filter;
//! Storage for filter mode and dynamically checked implementation version.
const unsigned char my_filter_mode;
//! Pointer to previous filter in the pipeline.
filter* prev_filter_in_pipeline;
//! Pointer to the pipeline.
pipeline* my_pipeline;
//! Pointer to the next "segment" of filters, or NULL if not required.
/** In each segment, the first filter is not thread-bound but follows a thread-bound one. */
filter* next_segment;
};
//! A stage in a pipeline served by a user thread.
/** @ingroup algorithms */
class thread_bound_filter: public filter {
public:
enum result_type {
// item was processed
success,
// item is currently not available
item_not_available,
// there are no more items to process
end_of_stream
};
protected:
thread_bound_filter(mode filter_mode):
filter(static_cast<mode>(filter_mode | filter::filter_is_bound))
{
__TBB_ASSERT(filter_mode & filter::filter_is_serial, "thread-bound filters must be serial");
}
public:
//! If a data item is available, invoke operator() on that item.
/** This interface is non-blocking.
Returns 'success' if an item was processed.
Returns 'item_not_available' if no item can be processed now
but more may arrive in the future, or if token limit is reached.
Returns 'end_of_stream' if there are no more items to process. */
result_type __TBB_EXPORTED_METHOD try_process_item();
//! Wait until a data item becomes available, and invoke operator() on that item.
/** This interface is blocking.
Returns 'success' if an item was processed.
Returns 'end_of_stream' if there are no more items to process.
Never returns 'item_not_available', as it blocks until another return condition applies. */
result_type __TBB_EXPORTED_METHOD process_item();
private:
//! Internal routine for item processing
result_type internal_process_item(bool is_blocking);
};
//! A processing pipeline that applies filters to items.
/** @ingroup algorithms */
class pipeline {
public:
//! Construct empty pipeline.
__TBB_EXPORTED_METHOD pipeline();
/** Though the current implementation declares the destructor virtual, do not rely on this
detail. The virtualness is deprecated and may disappear in future versions of TBB. */
virtual __TBB_EXPORTED_METHOD ~pipeline();
//! Add filter to end of pipeline.
void __TBB_EXPORTED_METHOD add_filter( filter& filter_ );
//! Run the pipeline to completion.
void __TBB_EXPORTED_METHOD run( size_t max_number_of_live_tokens );
#if __TBB_TASK_GROUP_CONTEXT
//! Run the pipeline to completion with user-supplied context.
void __TBB_EXPORTED_METHOD run( size_t max_number_of_live_tokens, tbb::task_group_context& context );
#endif
//! Remove all filters from the pipeline.
void __TBB_EXPORTED_METHOD clear();
private:
friend class internal::stage_task;
friend class internal::pipeline_root_task;
friend class filter;
friend class thread_bound_filter;
friend class internal::pipeline_cleaner;
friend class tbb::interface6::internal::pipeline_proxy;
//! Pointer to first filter in the pipeline.
filter* filter_list;
//! Pointer to location where address of next filter to be added should be stored.
filter* filter_end;
//! task who's reference count is used to determine when all stages are done.
task* end_counter;
//! Number of idle tokens waiting for input stage.
atomic<internal::Token> input_tokens;
//! Global counter of tokens
atomic<internal::Token> token_counter;
//! False until fetch_input returns NULL.
bool end_of_input;
//! True if the pipeline contains a thread-bound filter; false otherwise.
bool has_thread_bound_filters;
//! Remove filter from pipeline.
void remove_filter( filter& filter_ );
//! Not used, but retained to satisfy old export files.
void __TBB_EXPORTED_METHOD inject_token( task& self );
#if __TBB_TASK_GROUP_CONTEXT
//! Does clean up if pipeline is cancelled or exception occurred
void clear_filters();
#endif
};
//------------------------------------------------------------------------
// Support for lambda-friendly parallel_pipeline interface
//------------------------------------------------------------------------
namespace interface6 {
namespace internal {
template<typename T, typename U, typename Body> class concrete_filter;
}
//! input_filter control to signal end-of-input for parallel_pipeline
class flow_control {
bool is_pipeline_stopped;
flow_control() { is_pipeline_stopped = false; }
template<typename T, typename U, typename Body> friend class internal::concrete_filter;
public:
void stop() { is_pipeline_stopped = true; }
};
//! @cond INTERNAL
namespace internal {
template<typename T> struct tbb_large_object {enum { value = sizeof(T) > sizeof(void *) }; };
// Obtain type properties in one or another way
#if __TBB_CPP11_TYPE_PROPERTIES_PRESENT
template<typename T> struct tbb_trivially_copyable { enum { value = std::is_trivially_copyable<T>::value }; };
#elif __TBB_TR1_TYPE_PROPERTIES_IN_STD_PRESENT
template<typename T> struct tbb_trivially_copyable { enum { value = std::has_trivial_copy_constructor<T>::value }; };
#else
// Explicitly list the types we wish to be placed as-is in the pipeline input_buffers.
template<typename T> struct tbb_trivially_copyable { enum { value = false }; };
template<typename T> struct tbb_trivially_copyable <T*> { enum { value = true }; };
template<> struct tbb_trivially_copyable <short> { enum { value = true }; };
template<> struct tbb_trivially_copyable <unsigned short> { enum { value = true }; };
template<> struct tbb_trivially_copyable <int> { enum { value = !tbb_large_object<int>::value }; };
template<> struct tbb_trivially_copyable <unsigned int> { enum { value = !tbb_large_object<int>::value }; };
template<> struct tbb_trivially_copyable <long> { enum { value = !tbb_large_object<long>::value }; };
template<> struct tbb_trivially_copyable <unsigned long> { enum { value = !tbb_large_object<long>::value }; };
template<> struct tbb_trivially_copyable <float> { enum { value = !tbb_large_object<float>::value }; };
template<> struct tbb_trivially_copyable <double> { enum { value = !tbb_large_object<double>::value }; };
#endif // Obtaining type properties
template<typename T> struct is_large_object {enum { value = tbb_large_object<T>::value || !tbb_trivially_copyable<T>::value }; };
template<typename T, bool> class token_helper;
// large object helper (uses tbb_allocator)
template<typename T>
class token_helper<T, true> {
public:
typedef typename tbb::tbb_allocator<T> allocator;
typedef T* pointer;
typedef T value_type;
static pointer create_token(const value_type & source) {
pointer output_t = allocator().allocate(1);
return new (output_t) T(source);
}
static value_type & token(pointer & t) { return *t;}
static void * cast_to_void_ptr(pointer ref) { return (void *) ref; }
static pointer cast_from_void_ptr(void * ref) { return (pointer)ref; }
static void destroy_token(pointer token) {
allocator().destroy(token);
allocator().deallocate(token,1);
}
};
// pointer specialization
template<typename T>
class token_helper<T*, false > {
public:
typedef T* pointer;
typedef T* value_type;
static pointer create_token(const value_type & source) { return source; }
static value_type & token(pointer & t) { return t;}
static void * cast_to_void_ptr(pointer ref) { return (void *)ref; }
static pointer cast_from_void_ptr(void * ref) { return (pointer)ref; }
static void destroy_token( pointer /*token*/) {}
};
// small object specialization (converts void* to the correct type, passes objects directly.)
template<typename T>
class token_helper<T, false> {
typedef union {
T actual_value;
void * void_overlay;
} type_to_void_ptr_map;
public:
typedef T pointer; // not really a pointer in this case.
typedef T value_type;
static pointer create_token(const value_type & source) {
return source; }
static value_type & token(pointer & t) { return t;}
static void * cast_to_void_ptr(pointer ref) {
type_to_void_ptr_map mymap;
mymap.void_overlay = NULL;
mymap.actual_value = ref;
return mymap.void_overlay;
}
static pointer cast_from_void_ptr(void * ref) {
type_to_void_ptr_map mymap;
mymap.void_overlay = ref;
return mymap.actual_value;
}
static void destroy_token( pointer /*token*/) {}
};
template<typename T, typename U, typename Body>
class concrete_filter: public tbb::filter {
const Body& my_body;
typedef token_helper<T,is_large_object<T>::value > t_helper;
typedef typename t_helper::pointer t_pointer;
typedef token_helper<U,is_large_object<U>::value > u_helper;
typedef typename u_helper::pointer u_pointer;
/*override*/ void* operator()(void* input) {
t_pointer temp_input = t_helper::cast_from_void_ptr(input);
u_pointer output_u = u_helper::create_token(my_body(t_helper::token(temp_input)));
t_helper::destroy_token(temp_input);
return u_helper::cast_to_void_ptr(output_u);
}
/*override*/ void finalize(void * input) {
t_pointer temp_input = t_helper::cast_from_void_ptr(input);
t_helper::destroy_token(temp_input);
}
public:
concrete_filter(tbb::filter::mode filter_mode, const Body& body) : filter(filter_mode), my_body(body) {}
};
// input
template<typename U, typename Body>
class concrete_filter<void,U,Body>: public filter {
const Body& my_body;
typedef token_helper<U, is_large_object<U>::value > u_helper;
typedef typename u_helper::pointer u_pointer;
/*override*/void* operator()(void*) {
flow_control control;
u_pointer output_u = u_helper::create_token(my_body(control));
if(control.is_pipeline_stopped) {
u_helper::destroy_token(output_u);
set_end_of_input();
return NULL;
}
return u_helper::cast_to_void_ptr(output_u);
}
public:
concrete_filter(tbb::filter::mode filter_mode, const Body& body) :
filter(static_cast<tbb::filter::mode>(filter_mode | filter_may_emit_null)),
my_body(body)
{}
};
template<typename T, typename Body>
class concrete_filter<T,void,Body>: public filter {
const Body& my_body;
typedef token_helper<T, is_large_object<T>::value > t_helper;
typedef typename t_helper::pointer t_pointer;
/*override*/ void* operator()(void* input) {
t_pointer temp_input = t_helper::cast_from_void_ptr(input);
my_body(t_helper::token(temp_input));
t_helper::destroy_token(temp_input);
return NULL;
}
/*override*/ void finalize(void* input) {
t_pointer temp_input = t_helper::cast_from_void_ptr(input);
t_helper::destroy_token(temp_input);
}
public:
concrete_filter(tbb::filter::mode filter_mode, const Body& body) : filter(filter_mode), my_body(body) {}
};
template<typename Body>
class concrete_filter<void,void,Body>: public filter {
const Body& my_body;
/** Override privately because it is always called virtually */
/*override*/ void* operator()(void*) {
flow_control control;
my_body(control);
void* output = control.is_pipeline_stopped ? NULL : (void*)(intptr_t)-1;
return output;
}
public:
concrete_filter(filter::mode filter_mode, const Body& body) : filter(filter_mode), my_body(body) {}
};
//! The class that represents an object of the pipeline for parallel_pipeline().
/** It primarily serves as RAII class that deletes heap-allocated filter instances. */
class pipeline_proxy {
tbb::pipeline my_pipe;
public:
pipeline_proxy( const filter_t<void,void>& filter_chain );
~pipeline_proxy() {
while( filter* f = my_pipe.filter_list )
delete f; // filter destructor removes it from the pipeline
}
tbb::pipeline* operator->() { return &my_pipe; }
};
//! Abstract base class that represents a node in a parse tree underlying a filter_t.
/** These nodes are always heap-allocated and can be shared by filter_t objects. */
class filter_node: tbb::internal::no_copy {
/** Count must be atomic because it is hidden state for user, but might be shared by threads. */
tbb::atomic<intptr_t> ref_count;
protected:
filter_node() {
ref_count = 0;
#ifdef __TBB_TEST_FILTER_NODE_COUNT
++(__TBB_TEST_FILTER_NODE_COUNT);
#endif
}
public:
//! Add concrete_filter to pipeline
virtual void add_to( pipeline& ) = 0;
//! Increment reference count
void add_ref() {++ref_count;}
//! Decrement reference count and delete if it becomes zero.
void remove_ref() {
__TBB_ASSERT(ref_count>0,"ref_count underflow");
if( --ref_count==0 )
delete this;
}
virtual ~filter_node() {
#ifdef __TBB_TEST_FILTER_NODE_COUNT
--(__TBB_TEST_FILTER_NODE_COUNT);
#endif
}
};
//! Node in parse tree representing result of make_filter.
template<typename T, typename U, typename Body>
class filter_node_leaf: public filter_node {
const tbb::filter::mode mode;
const Body body;
/*override*/void add_to( pipeline& p ) {
concrete_filter<T,U,Body>* f = new concrete_filter<T,U,Body>(mode,body);
p.add_filter( *f );
}
public:
filter_node_leaf( tbb::filter::mode m, const Body& b ) : mode(m), body(b) {}
};
//! Node in parse tree representing join of two filters.
class filter_node_join: public filter_node {
friend class filter_node; // to suppress GCC 3.2 warnings
filter_node& left;
filter_node& right;
/*override*/~filter_node_join() {
left.remove_ref();
right.remove_ref();
}
/*override*/void add_to( pipeline& p ) {
left.add_to(p);
right.add_to(p);
}
public:
filter_node_join( filter_node& x, filter_node& y ) : left(x), right(y) {
left.add_ref();
right.add_ref();
}
};
} // namespace internal
//! @endcond
//! Create a filter to participate in parallel_pipeline
template<typename T, typename U, typename Body>
filter_t<T,U> make_filter(tbb::filter::mode mode, const Body& body) {
return new internal::filter_node_leaf<T,U,Body>(mode, body);
}
template<typename T, typename V, typename U>
filter_t<T,U> operator& (const filter_t<T,V>& left, const filter_t<V,U>& right) {
__TBB_ASSERT(left.root,"cannot use default-constructed filter_t as left argument of '&'");
__TBB_ASSERT(right.root,"cannot use default-constructed filter_t as right argument of '&'");
return new internal::filter_node_join(*left.root,*right.root);
}
//! Class representing a chain of type-safe pipeline filters
template<typename T, typename U>
class filter_t {
typedef internal::filter_node filter_node;
filter_node* root;
filter_t( filter_node* root_ ) : root(root_) {
root->add_ref();
}
friend class internal::pipeline_proxy;
template<typename T_, typename U_, typename Body>
friend filter_t<T_,U_> make_filter(tbb::filter::mode, const Body& );
template<typename T_, typename V_, typename U_>
friend filter_t<T_,U_> operator& (const filter_t<T_,V_>& , const filter_t<V_,U_>& );
public:
filter_t() : root(NULL) {}
filter_t( const filter_t<T,U>& rhs ) : root(rhs.root) {
if( root ) root->add_ref();
}
template<typename Body>
filter_t( tbb::filter::mode mode, const Body& body ) :
root( new internal::filter_node_leaf<T,U,Body>(mode, body) ) {
root->add_ref();
}
void operator=( const filter_t<T,U>& rhs ) {
// Order of operations below carefully chosen so that reference counts remain correct
// in unlikely event that remove_ref throws exception.
filter_node* old = root;
root = rhs.root;
if( root ) root->add_ref();
if( old ) old->remove_ref();
}
~filter_t() {
if( root ) root->remove_ref();
}
void clear() {
// Like operator= with filter_t() on right side.
if( root ) {
filter_node* old = root;
root = NULL;
old->remove_ref();
}
}
};
inline internal::pipeline_proxy::pipeline_proxy( const filter_t<void,void>& filter_chain ) : my_pipe() {
__TBB_ASSERT( filter_chain.root, "cannot apply parallel_pipeline to default-constructed filter_t" );
filter_chain.root->add_to(my_pipe);
}
inline void parallel_pipeline(size_t max_number_of_live_tokens, const filter_t<void,void>& filter_chain
#if __TBB_TASK_GROUP_CONTEXT
, tbb::task_group_context& context
#endif
) {
internal::pipeline_proxy pipe(filter_chain);
// tbb::pipeline::run() is called via the proxy
pipe->run(max_number_of_live_tokens
#if __TBB_TASK_GROUP_CONTEXT
, context
#endif
);
}
#if __TBB_TASK_GROUP_CONTEXT
inline void parallel_pipeline(size_t max_number_of_live_tokens, const filter_t<void,void>& filter_chain) {
tbb::task_group_context context;
parallel_pipeline(max_number_of_live_tokens, filter_chain, context);
}
#endif // __TBB_TASK_GROUP_CONTEXT
} // interface6
using interface6::flow_control;
using interface6::filter_t;
using interface6::make_filter;
using interface6::parallel_pipeline;
} // tbb
#endif /* __TBB_pipeline_H */

View File

@@ -0,0 +1,123 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_queuing_mutex_H
#define __TBB_queuing_mutex_H
#include "tbb_config.h"
#if !TBB_USE_EXCEPTIONS && _MSC_VER
// Suppress "C++ exception handler used, but unwind semantics are not enabled" warning in STL headers
#pragma warning (push)
#pragma warning (disable: 4530)
#endif
#include <cstring>
#if !TBB_USE_EXCEPTIONS && _MSC_VER
#pragma warning (pop)
#endif
#include "atomic.h"
#include "tbb_profiling.h"
namespace tbb {
//! Queuing mutex with local-only spinning.
/** @ingroup synchronization */
class queuing_mutex : internal::mutex_copy_deprecated_and_disabled {
public:
//! Construct unacquired mutex.
queuing_mutex() {
q_tail = NULL;
#if TBB_USE_THREADING_TOOLS
internal_construct();
#endif
}
//! The scoped locking pattern
/** It helps to avoid the common problem of forgetting to release lock.
It also nicely provides the "node" for queuing locks. */
class scoped_lock: internal::no_copy {
//! Initialize fields to mean "no lock held".
void initialize() {
mutex = NULL;
#if TBB_USE_ASSERT
internal::poison_pointer(next);
#endif /* TBB_USE_ASSERT */
}
public:
//! Construct lock that has not acquired a mutex.
/** Equivalent to zero-initialization of *this. */
scoped_lock() {initialize();}
//! Acquire lock on given mutex.
scoped_lock( queuing_mutex& m ) {
initialize();
acquire(m);
}
//! Release lock (if lock is held).
~scoped_lock() {
if( mutex ) release();
}
//! Acquire lock on given mutex.
void __TBB_EXPORTED_METHOD acquire( queuing_mutex& m );
//! Acquire lock on given mutex if free (i.e. non-blocking)
bool __TBB_EXPORTED_METHOD try_acquire( queuing_mutex& m );
//! Release lock.
void __TBB_EXPORTED_METHOD release();
private:
//! The pointer to the mutex owned, or NULL if not holding a mutex.
queuing_mutex* mutex;
//! The pointer to the next competitor for a mutex
scoped_lock *next;
//! The local spin-wait variable
/** Inverted (0 - blocked, 1 - acquired the mutex) for the sake of
zero-initialization. Defining it as an entire word instead of
a byte seems to help performance slightly. */
uintptr_t going;
};
void __TBB_EXPORTED_METHOD internal_construct();
// Mutex traits
static const bool is_rw_mutex = false;
static const bool is_recursive_mutex = false;
static const bool is_fair_mutex = true;
private:
//! The last competitor requesting the lock
atomic<scoped_lock*> q_tail;
};
__TBB_DEFINE_PROFILING_SET_NAME(queuing_mutex)
} // namespace tbb
#endif /* __TBB_queuing_mutex_H */

View File

@@ -0,0 +1,163 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_queuing_rw_mutex_H
#define __TBB_queuing_rw_mutex_H
#include "tbb_config.h"
#if !TBB_USE_EXCEPTIONS && _MSC_VER
// Suppress "C++ exception handler used, but unwind semantics are not enabled" warning in STL headers
#pragma warning (push)
#pragma warning (disable: 4530)
#endif
#include <cstring>
#if !TBB_USE_EXCEPTIONS && _MSC_VER
#pragma warning (pop)
#endif
#include "atomic.h"
#include "tbb_profiling.h"
namespace tbb {
//! Queuing reader-writer mutex with local-only spinning.
/** Adapted from Krieger, Stumm, et al. pseudocode at
http://www.eecg.toronto.edu/parallel/pubs_abs.html#Krieger_etal_ICPP93
@ingroup synchronization */
class queuing_rw_mutex : internal::mutex_copy_deprecated_and_disabled {
public:
//! Construct unacquired mutex.
queuing_rw_mutex() {
q_tail = NULL;
#if TBB_USE_THREADING_TOOLS
internal_construct();
#endif
}
//! Destructor asserts if the mutex is acquired, i.e. q_tail is non-NULL
~queuing_rw_mutex() {
#if TBB_USE_ASSERT
__TBB_ASSERT( !q_tail, "destruction of an acquired mutex");
#endif
}
//! The scoped locking pattern
/** It helps to avoid the common problem of forgetting to release lock.
It also nicely provides the "node" for queuing locks. */
class scoped_lock: internal::no_copy {
//! Initialize fields to mean "no lock held".
void initialize() {
my_mutex = NULL;
#if TBB_USE_ASSERT
my_state = 0xFF; // Set to invalid state
internal::poison_pointer(my_next);
internal::poison_pointer(my_prev);
#endif /* TBB_USE_ASSERT */
}
public:
//! Construct lock that has not acquired a mutex.
/** Equivalent to zero-initialization of *this. */
scoped_lock() {initialize();}
//! Acquire lock on given mutex.
scoped_lock( queuing_rw_mutex& m, bool write=true ) {
initialize();
acquire(m,write);
}
//! Release lock (if lock is held).
~scoped_lock() {
if( my_mutex ) release();
}
//! Acquire lock on given mutex.
void acquire( queuing_rw_mutex& m, bool write=true );
//! Acquire lock on given mutex if free (i.e. non-blocking)
bool try_acquire( queuing_rw_mutex& m, bool write=true );
//! Release lock.
void release();
//! Upgrade reader to become a writer.
/** Returns whether the upgrade happened without releasing and re-acquiring the lock */
bool upgrade_to_writer();
//! Downgrade writer to become a reader.
bool downgrade_to_reader();
private:
//! The pointer to the mutex owned, or NULL if not holding a mutex.
queuing_rw_mutex* my_mutex;
//! The pointer to the previous and next competitors for a mutex
scoped_lock *__TBB_atomic my_prev, *__TBB_atomic my_next;
typedef unsigned char state_t;
//! State of the request: reader, writer, active reader, other service states
atomic<state_t> my_state;
//! The local spin-wait variable
/** Corresponds to "spin" in the pseudocode but inverted for the sake of zero-initialization */
unsigned char __TBB_atomic my_going;
//! A tiny internal lock
unsigned char my_internal_lock;
//! Acquire the internal lock
void acquire_internal_lock();
//! Try to acquire the internal lock
/** Returns true if lock was successfully acquired. */
bool try_acquire_internal_lock();
//! Release the internal lock
void release_internal_lock();
//! Wait for internal lock to be released
void wait_for_release_of_internal_lock();
//! A helper function
void unblock_or_wait_on_internal_lock( uintptr_t );
};
void __TBB_EXPORTED_METHOD internal_construct();
// Mutex traits
static const bool is_rw_mutex = true;
static const bool is_recursive_mutex = false;
static const bool is_fair_mutex = true;
private:
//! The last competitor requesting the lock
atomic<scoped_lock*> q_tail;
};
__TBB_DEFINE_PROFILING_SET_NAME(queuing_rw_mutex)
} // namespace tbb
#endif /* __TBB_queuing_rw_mutex_H */

View File

@@ -0,0 +1,232 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_reader_writer_lock_H
#define __TBB_reader_writer_lock_H
#include "tbb_thread.h"
#include "tbb_allocator.h"
#include "atomic.h"
namespace tbb {
namespace interface5 {
//! Writer-preference reader-writer lock with local-only spinning on readers.
/** Loosely adapted from Mellor-Crummey and Scott pseudocode at
http://www.cs.rochester.edu/research/synchronization/pseudocode/rw.html#s_wp
@ingroup synchronization */
class reader_writer_lock : tbb::internal::no_copy {
public:
friend class scoped_lock;
friend class scoped_lock_read;
//! Status type for nodes associated with lock instances
/** waiting_nonblocking: the wait state for nonblocking lock
instances; for writes, these transition straight to active
states; for reads, these are unused.
waiting: the start and spin state for all lock instances; these will
transition to active state when appropriate. Non-blocking write locks
transition from this state to waiting_nonblocking immediately.
active: the active state means that the lock instance holds
the lock; it will transition to invalid state during node deletion
invalid: the end state for all nodes; this is set in the
destructor so if we encounter this state, we are looking at
memory that has already been freed
The state diagrams below describe the status transitions.
Single arrows indicate that the thread that owns the node is
responsible for the transition; double arrows indicate that
any thread could make the transition.
State diagram for scoped_lock status:
waiting ----------> waiting_nonblocking
| _____________/ |
V V V
active -----------------> invalid
State diagram for scoped_lock_read status:
waiting
|
V
active ----------------->invalid
*/
enum status_t { waiting_nonblocking, waiting, active, invalid };
//! Constructs a new reader_writer_lock
reader_writer_lock() {
internal_construct();
}
//! Destructs a reader_writer_lock object
~reader_writer_lock() {
internal_destroy();
}
//! The scoped lock pattern for write locks
/** Scoped locks help avoid the common problem of forgetting to release the lock.
This type also serves as the node for queuing locks. */
class scoped_lock : tbb::internal::no_copy {
public:
friend class reader_writer_lock;
//! Construct with blocking attempt to acquire write lock on the passed-in lock
scoped_lock(reader_writer_lock& lock) {
internal_construct(lock);
}
//! Destructor, releases the write lock
~scoped_lock() {
internal_destroy();
}
void* operator new(size_t s) {
return tbb::internal::allocate_via_handler_v3(s);
}
void operator delete(void* p) {
tbb::internal::deallocate_via_handler_v3(p);
}
private:
//! The pointer to the mutex to lock
reader_writer_lock *mutex;
//! The next queued competitor for the mutex
scoped_lock* next;
//! Status flag of the thread associated with this node
atomic<status_t> status;
//! Construct scoped_lock that is not holding lock
scoped_lock();
void __TBB_EXPORTED_METHOD internal_construct(reader_writer_lock&);
void __TBB_EXPORTED_METHOD internal_destroy();
};
//! The scoped lock pattern for read locks
class scoped_lock_read : tbb::internal::no_copy {
public:
friend class reader_writer_lock;
//! Construct with blocking attempt to acquire read lock on the passed-in lock
scoped_lock_read(reader_writer_lock& lock) {
internal_construct(lock);
}
//! Destructor, releases the read lock
~scoped_lock_read() {
internal_destroy();
}
void* operator new(size_t s) {
return tbb::internal::allocate_via_handler_v3(s);
}
void operator delete(void* p) {
tbb::internal::deallocate_via_handler_v3(p);
}
private:
//! The pointer to the mutex to lock
reader_writer_lock *mutex;
//! The next queued competitor for the mutex
scoped_lock_read *next;
//! Status flag of the thread associated with this node
atomic<status_t> status;
//! Construct scoped_lock_read that is not holding lock
scoped_lock_read();
void __TBB_EXPORTED_METHOD internal_construct(reader_writer_lock&);
void __TBB_EXPORTED_METHOD internal_destroy();
};
//! Acquires the reader_writer_lock for write.
/** If the lock is currently held in write mode by another
context, the writer will block by spinning on a local
variable. Exceptions thrown: improper_lock The context tries
to acquire a reader_writer_lock that it already has write
ownership of.*/
void __TBB_EXPORTED_METHOD lock();
//! Tries to acquire the reader_writer_lock for write.
/** This function does not block. Return Value: True or false,
depending on whether the lock is acquired or not. If the lock
is already held by this acquiring context, try_lock() returns
false. */
bool __TBB_EXPORTED_METHOD try_lock();
//! Acquires the reader_writer_lock for read.
/** If the lock is currently held by a writer, this reader will
block and wait until the writers are done. Exceptions thrown:
improper_lock The context tries to acquire a
reader_writer_lock that it already has write ownership of. */
void __TBB_EXPORTED_METHOD lock_read();
//! Tries to acquire the reader_writer_lock for read.
/** This function does not block. Return Value: True or false,
depending on whether the lock is acquired or not. */
bool __TBB_EXPORTED_METHOD try_lock_read();
//! Releases the reader_writer_lock
void __TBB_EXPORTED_METHOD unlock();
private:
void __TBB_EXPORTED_METHOD internal_construct();
void __TBB_EXPORTED_METHOD internal_destroy();
//! Attempts to acquire write lock
/** If unavailable, spins in blocking case, returns false in non-blocking case. */
bool start_write(scoped_lock *);
//! Sets writer_head to w and attempts to unblock
void set_next_writer(scoped_lock *w);
//! Relinquishes write lock to next waiting writer or group of readers
void end_write(scoped_lock *);
//! Checks if current thread holds write lock
bool is_current_writer();
//! Attempts to acquire read lock
/** If unavailable, spins in blocking case, returns false in non-blocking case. */
void start_read(scoped_lock_read *);
//! Unblocks pending readers
void unblock_readers();
//! Relinquishes read lock by decrementing counter; last reader wakes pending writer
void end_read();
//! The list of pending readers
atomic<scoped_lock_read*> reader_head;
//! The list of pending writers
atomic<scoped_lock*> writer_head;
//! The last node in the list of pending writers
atomic<scoped_lock*> writer_tail;
//! Writer that owns the mutex; tbb_thread::id() otherwise.
tbb_thread::id my_current_writer;
//! Status of mutex
atomic<uintptr_t> rdr_count_and_flags; // used with __TBB_AtomicOR, which assumes uintptr_t
};
} // namespace interface5
using interface5::reader_writer_lock;
} // namespace tbb
#endif /* __TBB_reader_writer_lock_H */

View File

@@ -0,0 +1,234 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_recursive_mutex_H
#define __TBB_recursive_mutex_H
#if _WIN32||_WIN64
#include "machine/windows_api.h"
#else
#include <pthread.h>
#endif /* _WIN32||_WIN64 */
#include <new>
#include "aligned_space.h"
#include "tbb_stddef.h"
#include "tbb_profiling.h"
namespace tbb {
//! Mutex that allows recursive mutex acquisition.
/** Mutex that allows recursive mutex acquisition.
@ingroup synchronization */
class recursive_mutex : internal::mutex_copy_deprecated_and_disabled {
public:
//! Construct unacquired recursive_mutex.
recursive_mutex() {
#if TBB_USE_ASSERT || TBB_USE_THREADING_TOOLS
internal_construct();
#else
#if _WIN32||_WIN64
InitializeCriticalSectionEx(&impl, 4000, 0);
#else
pthread_mutexattr_t mtx_attr;
int error_code = pthread_mutexattr_init( &mtx_attr );
if( error_code )
tbb::internal::handle_perror(error_code,"recursive_mutex: pthread_mutexattr_init failed");
pthread_mutexattr_settype( &mtx_attr, PTHREAD_MUTEX_RECURSIVE );
error_code = pthread_mutex_init( &impl, &mtx_attr );
if( error_code )
tbb::internal::handle_perror(error_code,"recursive_mutex: pthread_mutex_init failed");
pthread_mutexattr_destroy( &mtx_attr );
#endif /* _WIN32||_WIN64*/
#endif /* TBB_USE_ASSERT */
};
~recursive_mutex() {
#if TBB_USE_ASSERT
internal_destroy();
#else
#if _WIN32||_WIN64
DeleteCriticalSection(&impl);
#else
pthread_mutex_destroy(&impl);
#endif /* _WIN32||_WIN64 */
#endif /* TBB_USE_ASSERT */
};
class scoped_lock;
friend class scoped_lock;
//! The scoped locking pattern
/** It helps to avoid the common problem of forgetting to release lock.
It also nicely provides the "node" for queuing locks. */
class scoped_lock: internal::no_copy {
public:
//! Construct lock that has not acquired a recursive_mutex.
scoped_lock() : my_mutex(NULL) {};
//! Acquire lock on given mutex.
scoped_lock( recursive_mutex& mutex ) {
#if TBB_USE_ASSERT
my_mutex = &mutex;
#endif /* TBB_USE_ASSERT */
acquire( mutex );
}
//! Release lock (if lock is held).
~scoped_lock() {
if( my_mutex )
release();
}
//! Acquire lock on given mutex.
void acquire( recursive_mutex& mutex ) {
#if TBB_USE_ASSERT
internal_acquire( mutex );
#else
my_mutex = &mutex;
mutex.lock();
#endif /* TBB_USE_ASSERT */
}
//! Try acquire lock on given recursive_mutex.
bool try_acquire( recursive_mutex& mutex ) {
#if TBB_USE_ASSERT
return internal_try_acquire( mutex );
#else
bool result = mutex.try_lock();
if( result )
my_mutex = &mutex;
return result;
#endif /* TBB_USE_ASSERT */
}
//! Release lock
void release() {
#if TBB_USE_ASSERT
internal_release();
#else
my_mutex->unlock();
my_mutex = NULL;
#endif /* TBB_USE_ASSERT */
}
private:
//! The pointer to the current recursive_mutex to work
recursive_mutex* my_mutex;
//! All checks from acquire using mutex.state were moved here
void __TBB_EXPORTED_METHOD internal_acquire( recursive_mutex& m );
//! All checks from try_acquire using mutex.state were moved here
bool __TBB_EXPORTED_METHOD internal_try_acquire( recursive_mutex& m );
//! All checks from release using mutex.state were moved here
void __TBB_EXPORTED_METHOD internal_release();
friend class recursive_mutex;
};
// Mutex traits
static const bool is_rw_mutex = false;
static const bool is_recursive_mutex = true;
static const bool is_fair_mutex = false;
// C++0x compatibility interface
//! Acquire lock
void lock() {
#if TBB_USE_ASSERT
aligned_space<scoped_lock> tmp;
new(tmp.begin()) scoped_lock(*this);
#else
#if _WIN32||_WIN64
EnterCriticalSection(&impl);
#else
int error_code = pthread_mutex_lock(&impl);
if( error_code )
tbb::internal::handle_perror(error_code,"recursive_mutex: pthread_mutex_lock failed");
#endif /* _WIN32||_WIN64 */
#endif /* TBB_USE_ASSERT */
}
//! Try acquiring lock (non-blocking)
/** Return true if lock acquired; false otherwise. */
bool try_lock() {
#if TBB_USE_ASSERT
aligned_space<scoped_lock> tmp;
return (new(tmp.begin()) scoped_lock)->internal_try_acquire(*this);
#else
#if _WIN32||_WIN64
return TryEnterCriticalSection(&impl)!=0;
#else
return pthread_mutex_trylock(&impl)==0;
#endif /* _WIN32||_WIN64 */
#endif /* TBB_USE_ASSERT */
}
//! Release lock
void unlock() {
#if TBB_USE_ASSERT
aligned_space<scoped_lock> tmp;
scoped_lock& s = *tmp.begin();
s.my_mutex = this;
s.internal_release();
#else
#if _WIN32||_WIN64
LeaveCriticalSection(&impl);
#else
pthread_mutex_unlock(&impl);
#endif /* _WIN32||_WIN64 */
#endif /* TBB_USE_ASSERT */
}
//! Return native_handle
#if _WIN32||_WIN64
typedef LPCRITICAL_SECTION native_handle_type;
#else
typedef pthread_mutex_t* native_handle_type;
#endif
native_handle_type native_handle() { return (native_handle_type) &impl; }
private:
#if _WIN32||_WIN64
CRITICAL_SECTION impl;
enum state_t {
INITIALIZED=0x1234,
DESTROYED=0x789A,
} state;
#else
pthread_mutex_t impl;
#endif /* _WIN32||_WIN64 */
//! All checks from mutex constructor using mutex.state were moved here
void __TBB_EXPORTED_METHOD internal_construct();
//! All checks from mutex destructor using mutex.state were moved here
void __TBB_EXPORTED_METHOD internal_destroy();
};
__TBB_DEFINE_PROFILING_SET_NAME(recursive_mutex)
} // namespace tbb
#endif /* __TBB_recursive_mutex_H */

View File

@@ -0,0 +1,180 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_runtime_loader_H
#define __TBB_runtime_loader_H
#if ! TBB_PREVIEW_RUNTIME_LOADER
#error Set TBB_PREVIEW_RUNTIME_LOADER to include runtime_loader.h
#endif
#include "tbb_stddef.h"
#include <climits>
#if _MSC_VER
#if ! __TBB_NO_IMPLICIT_LINKAGE
#ifdef _DEBUG
#pragma comment( linker, "/nodefaultlib:tbb_debug.lib" )
#pragma comment( linker, "/defaultlib:tbbproxy_debug.lib" )
#else
#pragma comment( linker, "/nodefaultlib:tbb.lib" )
#pragma comment( linker, "/defaultlib:tbbproxy.lib" )
#endif
#endif
#endif
namespace tbb {
namespace interface6 {
//! Load TBB at runtime.
/*!
\b Usage:
In source code:
\code
#include "tbb/runtime_loader.h"
char const * path[] = { "<install dir>/lib/ia32", NULL };
tbb::runtime_loader loader( path );
// Now use TBB.
\endcode
Link with \c tbbproxy.lib (or \c libtbbproxy.a) instead of \c tbb.lib (\c libtbb.dylib,
\c libtbb.so).
TBB library will be loaded at runtime from \c <install dir>/lib/ia32 directory.
\b Attention:
All \c runtime_loader objects (in the same module, i.e. exe or dll) share some global state.
The most noticeable piece of global state is loaded TBB library.
There are some implications:
- Only one TBB library can be loaded per module.
- If one object has already loaded TBB library, another object will not load TBB.
If the loaded TBB library is suitable for the second object, both will use TBB
cooperatively, otherwise the second object will report an error.
- \c runtime_loader objects will not work (correctly) in parallel due to absence of
synchronization.
*/
class runtime_loader : tbb::internal::no_copy {
public:
//! Error mode constants.
enum error_mode {
em_status, //!< Save status of operation and continue.
em_throw, //!< Throw an exception of tbb::runtime_loader::error_code type.
em_abort //!< Print message to \c stderr and call \c abort().
}; // error_mode
//! Error codes.
enum error_code {
ec_ok, //!< No errors.
ec_bad_call, //!< Invalid function call (e. g. load() called when TBB is already loaded).
ec_bad_arg, //!< Invalid argument passed.
ec_bad_lib, //!< Invalid library found (e. g. \c TBB_runtime_version symbol not found).
ec_bad_ver, //!< TBB found but version is not suitable.
ec_no_lib //!< No suitable TBB library found.
}; // error_code
//! Initialize object but do not load TBB.
runtime_loader( error_mode mode = em_abort );
//! Initialize object and load TBB.
/*!
See load() for details.
If error mode is \c em_status, call status() to check whether TBB was loaded or not.
*/
runtime_loader(
char const * path[], //!< List of directories to search TBB in.
int min_ver = TBB_INTERFACE_VERSION, //!< Minimal suitable version of TBB.
int max_ver = INT_MAX, //!< Maximal suitable version of TBB.
error_mode mode = em_abort //!< Error mode for this object.
);
//! Destroy object.
~runtime_loader();
//! Load TBB.
/*!
The method searches the directories specified in \c path[] array for the TBB library.
When the library is found, it is loaded and its version is checked. If the version is
not suitable, the library is unloaded, and the search continues.
\b Note:
For security reasons, avoid using relative directory names. For example, never load
TBB from current (\c "."), parent (\c "..") or any other relative directory (like
\c "lib" ). Use only absolute directory names (e. g. "/usr/local/lib").
For the same security reasons, avoid using system default directories (\c "") on
Windows. (See http://www.microsoft.com/technet/security/advisory/2269637.mspx for
details.)
Neglecting these rules may cause your program to execute 3-rd party malicious code.
\b Errors:
- \c ec_bad_call - TBB already loaded by this object.
- \c ec_bad_arg - \p min_ver and/or \p max_ver negative or zero,
or \p min_ver > \p max_ver.
- \c ec_bad_ver - TBB of unsuitable version already loaded by another object.
- \c ec_no_lib - No suitable library found.
*/
error_code
load(
char const * path[], //!< List of directories to search TBB in.
int min_ver = TBB_INTERFACE_VERSION, //!< Minimal suitable version of TBB.
int max_ver = INT_MAX //!< Maximal suitable version of TBB.
);
//! Report status.
/*!
If error mode is \c em_status, the function returns status of the last operation.
*/
error_code status();
private:
error_mode const my_mode;
error_code my_status;
bool my_loaded;
}; // class runtime_loader
} // namespace interface6
using interface6::runtime_loader;
} // namespace tbb
#endif /* __TBB_runtime_loader_H */

View File

@@ -0,0 +1,319 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_scalable_allocator_H
#define __TBB_scalable_allocator_H
/** @file */
#include <stddef.h> /* Need ptrdiff_t and size_t from here. */
#if !_MSC_VER
#include <stdint.h> /* Need intptr_t from here. */
#endif
#if !defined(__cplusplus) && __ICC==1100
#pragma warning (push)
#pragma warning (disable: 991)
#endif
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
#if _MSC_VER >= 1400
#define __TBB_EXPORTED_FUNC __cdecl
#else
#define __TBB_EXPORTED_FUNC
#endif
/** The "malloc" analogue to allocate block of memory of size bytes.
* @ingroup memory_allocation */
void * __TBB_EXPORTED_FUNC scalable_malloc (size_t size);
/** The "free" analogue to discard a previously allocated piece of memory.
@ingroup memory_allocation */
void __TBB_EXPORTED_FUNC scalable_free (void* ptr);
/** The "realloc" analogue complementing scalable_malloc.
@ingroup memory_allocation */
void * __TBB_EXPORTED_FUNC scalable_realloc (void* ptr, size_t size);
/** The "calloc" analogue complementing scalable_malloc.
@ingroup memory_allocation */
void * __TBB_EXPORTED_FUNC scalable_calloc (size_t nobj, size_t size);
/** The "posix_memalign" analogue.
@ingroup memory_allocation */
int __TBB_EXPORTED_FUNC scalable_posix_memalign (void** memptr, size_t alignment, size_t size);
/** The "_aligned_malloc" analogue.
@ingroup memory_allocation */
void * __TBB_EXPORTED_FUNC scalable_aligned_malloc (size_t size, size_t alignment);
/** The "_aligned_realloc" analogue.
@ingroup memory_allocation */
void * __TBB_EXPORTED_FUNC scalable_aligned_realloc (void* ptr, size_t size, size_t alignment);
/** The "_aligned_free" analogue.
@ingroup memory_allocation */
void __TBB_EXPORTED_FUNC scalable_aligned_free (void* ptr);
/** The analogue of _msize/malloc_size/malloc_usable_size.
Returns the usable size of a memory block previously allocated by scalable_*,
or 0 (zero) if ptr does not point to such a block.
@ingroup memory_allocation */
size_t __TBB_EXPORTED_FUNC scalable_msize (void* ptr);
/* Results for scalable_allocation_* functions */
typedef enum {
TBBMALLOC_OK,
TBBMALLOC_INVALID_PARAM,
TBBMALLOC_UNSUPPORTED,
TBBMALLOC_NO_MEMORY,
TBBMALLOC_NO_EFFECT
} ScalableAllocationResult;
/* Setting TBB_MALLOC_USE_HUGE_PAGES environment variable to 1 enables huge pages.
scalable_allocation_mode call has priority over environment variable. */
typedef enum {
TBBMALLOC_USE_HUGE_PAGES, /* value turns using huge pages on and off */
/* deprecated, kept for backward compatibility only */
USE_HUGE_PAGES = TBBMALLOC_USE_HUGE_PAGES,
/* try to limit memory consumption value Bytes, clean internal buffers
if limit is exceeded, but not prevents from requesting memory from OS */
TBBMALLOC_SET_SOFT_HEAP_LIMIT
} AllocationModeParam;
/** Set TBB allocator-specific allocation modes.
@ingroup memory_allocation */
int __TBB_EXPORTED_FUNC scalable_allocation_mode(int param, intptr_t value);
typedef enum {
/* Clean internal allocator buffers for all threads.
Returns TBBMALLOC_NO_EFFECT if no buffers cleaned,
TBBMALLOC_OK if some memory released from buffers. */
TBBMALLOC_CLEAN_ALL_BUFFERS,
/* Clean internal allocator buffer for current thread only.
Return values same as for TBBMALLOC_CLEAN_ALL_BUFFERS. */
TBBMALLOC_CLEAN_THREAD_BUFFERS
} ScalableAllocationCmd;
/** Call TBB allocator-specific commands.
@ingroup memory_allocation */
int __TBB_EXPORTED_FUNC scalable_allocation_command(int cmd, void *param);
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
#ifdef __cplusplus
//! The namespace rml contains components of low-level memory pool interface.
namespace rml {
class MemoryPool;
typedef void *(*rawAllocType)(intptr_t pool_id, size_t &bytes);
typedef int (*rawFreeType)(intptr_t pool_id, void* raw_ptr, size_t raw_bytes);
/*
MemPoolPolicy extension must be compatible with such structure fields layout
struct MemPoolPolicy {
rawAllocType pAlloc;
rawFreeType pFree;
size_t granularity; // granularity of pAlloc allocations
};
*/
struct MemPoolPolicy {
enum {
TBBMALLOC_POOL_VERSION = 1
};
rawAllocType pAlloc;
rawFreeType pFree;
// granularity of pAlloc allocations. 0 means default used.
size_t granularity;
int version;
// all memory consumed at 1st pAlloc call and never returned,
// no more pAlloc calls after 1st
unsigned fixedPool : 1,
// memory consumed but returned only at pool termination
keepAllMemory : 1,
reserved : 30;
MemPoolPolicy(rawAllocType pAlloc_, rawFreeType pFree_,
size_t granularity_ = 0, bool fixedPool_ = false,
bool keepAllMemory_ = false) :
pAlloc(pAlloc_), pFree(pFree_), granularity(granularity_), version(TBBMALLOC_POOL_VERSION),
fixedPool(fixedPool_), keepAllMemory(keepAllMemory_),
reserved(0) {}
};
// enums have same values as appropriate enums from ScalableAllocationResult
// TODO: use ScalableAllocationResult in pool_create directly
enum MemPoolError {
// pool created successfully
POOL_OK = TBBMALLOC_OK,
// invalid policy parameters found
INVALID_POLICY = TBBMALLOC_INVALID_PARAM,
// requested pool policy is not supported by allocator library
UNSUPPORTED_POLICY = TBBMALLOC_UNSUPPORTED,
// lack of memory during pool creation
NO_MEMORY = TBBMALLOC_NO_MEMORY,
// action takes no effect
NO_EFFECT = TBBMALLOC_NO_EFFECT
};
MemPoolError pool_create_v1(intptr_t pool_id, const MemPoolPolicy *policy,
rml::MemoryPool **pool);
bool pool_destroy(MemoryPool* memPool);
void *pool_malloc(MemoryPool* memPool, size_t size);
void *pool_realloc(MemoryPool* memPool, void *object, size_t size);
void *pool_aligned_malloc(MemoryPool* mPool, size_t size, size_t alignment);
void *pool_aligned_realloc(MemoryPool* mPool, void *ptr, size_t size, size_t alignment);
bool pool_reset(MemoryPool* memPool);
bool pool_free(MemoryPool *memPool, void *object);
}
#include <new> /* To use new with the placement argument */
/* Ensure that including this header does not cause implicit linkage with TBB */
#ifndef __TBB_NO_IMPLICIT_LINKAGE
#define __TBB_NO_IMPLICIT_LINKAGE 1
#include "tbb_stddef.h"
#undef __TBB_NO_IMPLICIT_LINKAGE
#else
#include "tbb_stddef.h"
#endif
#if __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
#include <utility> // std::forward
#endif
namespace tbb {
#if _MSC_VER && !defined(__INTEL_COMPILER)
// Workaround for erroneous "unreferenced parameter" warning in method destroy.
#pragma warning (push)
#pragma warning (disable: 4100)
#endif
//! Meets "allocator" requirements of ISO C++ Standard, Section 20.1.5
/** The members are ordered the same way they are in section 20.4.1
of the ISO C++ standard.
@ingroup memory_allocation */
template<typename T>
class scalable_allocator {
public:
typedef typename internal::allocator_type<T>::value_type value_type;
typedef value_type* pointer;
typedef const value_type* const_pointer;
typedef value_type& reference;
typedef const value_type& const_reference;
typedef size_t size_type;
typedef ptrdiff_t difference_type;
template<class U> struct rebind {
typedef scalable_allocator<U> other;
};
scalable_allocator() throw() {}
scalable_allocator( const scalable_allocator& ) throw() {}
template<typename U> scalable_allocator(const scalable_allocator<U>&) throw() {}
pointer address(reference x) const {return &x;}
const_pointer address(const_reference x) const {return &x;}
//! Allocate space for n objects.
pointer allocate( size_type n, const void* /*hint*/ =0 ) {
return static_cast<pointer>( scalable_malloc( n * sizeof(value_type) ) );
}
//! Free previously allocated block of memory
void deallocate( pointer p, size_type ) {
scalable_free( p );
}
//! Largest value for which method allocate might succeed.
size_type max_size() const throw() {
size_type absolutemax = static_cast<size_type>(-1) / sizeof (value_type);
return (absolutemax > 0 ? absolutemax : 1);
}
#if __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
template<typename U, typename... Args>
void construct(U *p, Args&&... args)
{ ::new((void *)p) U(std::forward<Args>(args)...); }
#else // __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
#if __TBB_CPP11_RVALUE_REF_PRESENT
void construct( pointer p, value_type&& value ) { ::new((void*)(p)) value_type( std::move( value ) ); }
#endif
void construct( pointer p, const value_type& value ) {::new((void*)(p)) value_type(value);}
#endif // __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
void destroy( pointer p ) {p->~value_type();}
};
#if _MSC_VER && !defined(__INTEL_COMPILER)
#pragma warning (pop)
#endif // warning 4100 is back
//! Analogous to std::allocator<void>, as defined in ISO C++ Standard, Section 20.4.1
/** @ingroup memory_allocation */
template<>
class scalable_allocator<void> {
public:
typedef void* pointer;
typedef const void* const_pointer;
typedef void value_type;
template<class U> struct rebind {
typedef scalable_allocator<U> other;
};
};
template<typename T, typename U>
inline bool operator==( const scalable_allocator<T>&, const scalable_allocator<U>& ) {return true;}
template<typename T, typename U>
inline bool operator!=( const scalable_allocator<T>&, const scalable_allocator<U>& ) {return false;}
} // namespace tbb
#if _MSC_VER
#if (__TBB_BUILD || __TBBMALLOC_BUILD) && !defined(__TBBMALLOC_NO_IMPLICIT_LINKAGE)
#define __TBBMALLOC_NO_IMPLICIT_LINKAGE 1
#endif
#if !__TBBMALLOC_NO_IMPLICIT_LINKAGE
#ifdef _DEBUG
#pragma comment(lib, "tbbmalloc_debug.lib")
#else
#pragma comment(lib, "tbbmalloc.lib")
#endif
#endif
#endif
#endif /* __cplusplus */
#if !defined(__cplusplus) && __ICC==1100
#pragma warning (pop)
#endif // ICC 11.0 warning 991 is back
#endif /* __TBB_scalable_allocator_H */

View File

@@ -0,0 +1,212 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_spin_mutex_H
#define __TBB_spin_mutex_H
#include <cstddef>
#include <new>
#include "aligned_space.h"
#include "tbb_stddef.h"
#include "tbb_machine.h"
#include "tbb_profiling.h"
#include "internal/_mutex_padding.h"
namespace tbb {
//! A lock that occupies a single byte.
/** A spin_mutex is a spin mutex that fits in a single byte.
It should be used only for locking short critical sections
(typically less than 20 instructions) when fairness is not an issue.
If zero-initialized, the mutex is considered unheld.
@ingroup synchronization */
class spin_mutex : internal::mutex_copy_deprecated_and_disabled {
//! 0 if lock is released, 1 if lock is acquired.
__TBB_atomic_flag flag;
public:
//! Construct unacquired lock.
/** Equivalent to zero-initialization of *this. */
spin_mutex() : flag(0) {
#if TBB_USE_THREADING_TOOLS
internal_construct();
#endif
}
//! Represents acquisition of a mutex.
class scoped_lock : internal::no_copy {
private:
//! Points to currently held mutex, or NULL if no lock is held.
spin_mutex* my_mutex;
//! Value to store into spin_mutex::flag to unlock the mutex.
/** This variable is no longer used. Instead, 0 and 1 are used to
represent that the lock is free and acquired, respectively.
We keep the member variable here to ensure backward compatibility */
__TBB_Flag my_unlock_value;
//! Like acquire, but with ITT instrumentation.
void __TBB_EXPORTED_METHOD internal_acquire( spin_mutex& m );
//! Like try_acquire, but with ITT instrumentation.
bool __TBB_EXPORTED_METHOD internal_try_acquire( spin_mutex& m );
//! Like release, but with ITT instrumentation.
void __TBB_EXPORTED_METHOD internal_release();
friend class spin_mutex;
public:
//! Construct without acquiring a mutex.
scoped_lock() : my_mutex(NULL), my_unlock_value(0) {}
//! Construct and acquire lock on a mutex.
scoped_lock( spin_mutex& m ) : my_unlock_value(0) {
internal::suppress_unused_warning(my_unlock_value);
#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
my_mutex=NULL;
internal_acquire(m);
#else
my_mutex=&m;
__TBB_LockByte(m.flag);
#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT*/
}
//! Acquire lock.
void acquire( spin_mutex& m ) {
#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
internal_acquire(m);
#else
my_mutex = &m;
__TBB_LockByte(m.flag);
#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT*/
}
//! Try acquiring lock (non-blocking)
/** Return true if lock acquired; false otherwise. */
bool try_acquire( spin_mutex& m ) {
#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
return internal_try_acquire(m);
#else
bool result = __TBB_TryLockByte(m.flag);
if( result )
my_mutex = &m;
return result;
#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT*/
}
//! Release lock
void release() {
#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
internal_release();
#else
__TBB_UnlockByte(my_mutex->flag);
my_mutex = NULL;
#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */
}
//! Destroy lock. If holding a lock, releases the lock first.
~scoped_lock() {
if( my_mutex ) {
#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
internal_release();
#else
__TBB_UnlockByte(my_mutex->flag);
#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */
}
}
};
//! Internal constructor with ITT instrumentation.
void __TBB_EXPORTED_METHOD internal_construct();
// Mutex traits
static const bool is_rw_mutex = false;
static const bool is_recursive_mutex = false;
static const bool is_fair_mutex = false;
// ISO C++0x compatibility methods
//! Acquire lock
void lock() {
#if TBB_USE_THREADING_TOOLS
aligned_space<scoped_lock> tmp;
new(tmp.begin()) scoped_lock(*this);
#else
__TBB_LockByte(flag);
#endif /* TBB_USE_THREADING_TOOLS*/
}
//! Try acquiring lock (non-blocking)
/** Return true if lock acquired; false otherwise. */
bool try_lock() {
#if TBB_USE_THREADING_TOOLS
aligned_space<scoped_lock> tmp;
return (new(tmp.begin()) scoped_lock)->internal_try_acquire(*this);
#else
return __TBB_TryLockByte(flag);
#endif /* TBB_USE_THREADING_TOOLS*/
}
//! Release lock
void unlock() {
#if TBB_USE_THREADING_TOOLS
aligned_space<scoped_lock> tmp;
scoped_lock& s = *tmp.begin();
s.my_mutex = this;
s.internal_release();
#else
__TBB_store_with_release(flag, 0);
#endif /* TBB_USE_THREADING_TOOLS */
}
friend class scoped_lock;
}; // end of spin_mutex
__TBB_DEFINE_PROFILING_SET_NAME(spin_mutex)
} // namespace tbb
#if ( __TBB_x86_32 || __TBB_x86_64 )
#include "internal/_x86_eliding_mutex_impl.h"
#endif
namespace tbb {
//! A cross-platform spin mutex with speculative lock acquisition.
/** On platforms with proper HW support, this lock may speculatively execute
its critical sections, using HW mechanisms to detect real data races and
ensure atomicity of the critical sections. In particular, it uses
Intel(R) Transactional Synchronization Extensions (Intel(R) TSX).
Without such HW support, it behaves like a spin_mutex.
It should be used for locking short critical sections where the lock is
contended but the data it protects are not. If zero-initialized, the
mutex is considered unheld.
@ingroup synchronization */
#if ( __TBB_x86_32 || __TBB_x86_64 )
typedef interface7::internal::padded_mutex<interface7::internal::x86_eliding_mutex,false> speculative_spin_mutex;
#else
typedef interface7::internal::padded_mutex<spin_mutex,false> speculative_spin_mutex;
#endif
__TBB_DEFINE_PROFILING_SET_NAME(speculative_spin_mutex)
} // namespace tbb
#endif /* __TBB_spin_mutex_H */

View File

@@ -0,0 +1,259 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_spin_rw_mutex_H
#define __TBB_spin_rw_mutex_H
#include "tbb_stddef.h"
#include "tbb_machine.h"
#include "tbb_profiling.h"
#include "internal/_mutex_padding.h"
namespace tbb {
#if __TBB_TSX_AVAILABLE
namespace interface8 { namespace internal {
class x86_rtm_rw_mutex;
}}
#endif
class spin_rw_mutex_v3;
typedef spin_rw_mutex_v3 spin_rw_mutex;
//! Fast, unfair, spinning reader-writer lock with backoff and writer-preference
/** @ingroup synchronization */
class spin_rw_mutex_v3 : internal::mutex_copy_deprecated_and_disabled {
//! @cond INTERNAL
//! Internal acquire write lock.
bool __TBB_EXPORTED_METHOD internal_acquire_writer();
//! Out of line code for releasing a write lock.
/** This code has debug checking and instrumentation for Intel(R) Thread Checker and Intel(R) Thread Profiler. */
void __TBB_EXPORTED_METHOD internal_release_writer();
//! Internal acquire read lock.
void __TBB_EXPORTED_METHOD internal_acquire_reader();
//! Internal upgrade reader to become a writer.
bool __TBB_EXPORTED_METHOD internal_upgrade();
//! Out of line code for downgrading a writer to a reader.
/** This code has debug checking and instrumentation for Intel(R) Thread Checker and Intel(R) Thread Profiler. */
void __TBB_EXPORTED_METHOD internal_downgrade();
//! Internal release read lock.
void __TBB_EXPORTED_METHOD internal_release_reader();
//! Internal try_acquire write lock.
bool __TBB_EXPORTED_METHOD internal_try_acquire_writer();
//! Internal try_acquire read lock.
bool __TBB_EXPORTED_METHOD internal_try_acquire_reader();
//! @endcond
public:
//! Construct unacquired mutex.
spin_rw_mutex_v3() : state(0) {
#if TBB_USE_THREADING_TOOLS
internal_construct();
#endif
}
#if TBB_USE_ASSERT
//! Destructor asserts if the mutex is acquired, i.e. state is zero.
~spin_rw_mutex_v3() {
__TBB_ASSERT( !state, "destruction of an acquired mutex");
};
#endif /* TBB_USE_ASSERT */
//! The scoped locking pattern
/** It helps to avoid the common problem of forgetting to release lock.
It also nicely provides the "node" for queuing locks. */
class scoped_lock : internal::no_copy {
#if __TBB_TSX_AVAILABLE
friend class tbb::interface8::internal::x86_rtm_rw_mutex;
// helper methods for x86_rtm_rw_mutex
spin_rw_mutex *internal_get_mutex() const { return mutex; }
void internal_set_mutex(spin_rw_mutex* m) { mutex = m; }
#endif
public:
//! Construct lock that has not acquired a mutex.
/** Equivalent to zero-initialization of *this. */
scoped_lock() : mutex(NULL), is_writer(false) {}
//! Acquire lock on given mutex.
scoped_lock( spin_rw_mutex& m, bool write = true ) : mutex(NULL) {
acquire(m, write);
}
//! Release lock (if lock is held).
~scoped_lock() {
if( mutex ) release();
}
//! Acquire lock on given mutex.
void acquire( spin_rw_mutex& m, bool write = true ) {
__TBB_ASSERT( !mutex, "holding mutex already" );
is_writer = write;
mutex = &m;
if( write ) mutex->internal_acquire_writer();
else mutex->internal_acquire_reader();
}
//! Upgrade reader to become a writer.
/** Returns whether the upgrade happened without releasing and re-acquiring the lock */
bool upgrade_to_writer() {
__TBB_ASSERT( mutex, "lock is not acquired" );
__TBB_ASSERT( !is_writer, "not a reader" );
is_writer = true;
return mutex->internal_upgrade();
}
//! Release lock.
void release() {
__TBB_ASSERT( mutex, "lock is not acquired" );
spin_rw_mutex *m = mutex;
mutex = NULL;
#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
if( is_writer ) m->internal_release_writer();
else m->internal_release_reader();
#else
if( is_writer ) __TBB_AtomicAND( &m->state, READERS );
else __TBB_FetchAndAddWrelease( &m->state, -(intptr_t)ONE_READER);
#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */
}
//! Downgrade writer to become a reader.
bool downgrade_to_reader() {
__TBB_ASSERT( mutex, "lock is not acquired" );
__TBB_ASSERT( is_writer, "not a writer" );
#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
mutex->internal_downgrade();
#else
__TBB_FetchAndAddW( &mutex->state, ((intptr_t)ONE_READER-WRITER));
#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */
is_writer = false;
return true;
}
//! Try acquire lock on given mutex.
bool try_acquire( spin_rw_mutex& m, bool write = true ) {
__TBB_ASSERT( !mutex, "holding mutex already" );
bool result;
is_writer = write;
result = write? m.internal_try_acquire_writer()
: m.internal_try_acquire_reader();
if( result )
mutex = &m;
return result;
}
protected:
//! The pointer to the current mutex that is held, or NULL if no mutex is held.
spin_rw_mutex* mutex;
//! If mutex!=NULL, then is_writer is true if holding a writer lock, false if holding a reader lock.
/** Not defined if not holding a lock. */
bool is_writer;
};
// Mutex traits
static const bool is_rw_mutex = true;
static const bool is_recursive_mutex = false;
static const bool is_fair_mutex = false;
// ISO C++0x compatibility methods
//! Acquire writer lock
void lock() {internal_acquire_writer();}
//! Try acquiring writer lock (non-blocking)
/** Return true if lock acquired; false otherwise. */
bool try_lock() {return internal_try_acquire_writer();}
//! Release lock
void unlock() {
#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
if( state&WRITER ) internal_release_writer();
else internal_release_reader();
#else
if( state&WRITER ) __TBB_AtomicAND( &state, READERS );
else __TBB_FetchAndAddWrelease( &state, -(intptr_t)ONE_READER);
#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */
}
// Methods for reader locks that resemble ISO C++0x compatibility methods.
//! Acquire reader lock
void lock_read() {internal_acquire_reader();}
//! Try acquiring reader lock (non-blocking)
/** Return true if reader lock acquired; false otherwise. */
bool try_lock_read() {return internal_try_acquire_reader();}
protected:
typedef intptr_t state_t;
static const state_t WRITER = 1;
static const state_t WRITER_PENDING = 2;
static const state_t READERS = ~(WRITER | WRITER_PENDING);
static const state_t ONE_READER = 4;
static const state_t BUSY = WRITER | READERS;
//! State of lock
/** Bit 0 = writer is holding lock
Bit 1 = request by a writer to acquire lock (hint to readers to wait)
Bit 2..N = number of readers holding lock */
state_t state;
private:
void __TBB_EXPORTED_METHOD internal_construct();
};
__TBB_DEFINE_PROFILING_SET_NAME(spin_rw_mutex)
} // namespace tbb
#if __TBB_TSX_AVAILABLE
#include "internal/_x86_rtm_rw_mutex_impl.h"
#endif
namespace tbb {
namespace interface8 {
//! A cross-platform spin reader/writer mutex with speculative lock acquisition.
/** On platforms with proper HW support, this lock may speculatively execute
its critical sections, using HW mechanisms to detect real data races and
ensure atomicity of the critical sections. In particular, it uses
Intel(R) Transactional Synchronization Extensions (Intel(R) TSX).
Without such HW support, it behaves like a spin_rw_mutex.
It should be used for locking short critical sections where the lock is
contended but the data it protects are not.
@ingroup synchronization */
#if __TBB_TSX_AVAILABLE
typedef interface7::internal::padded_mutex<tbb::interface8::internal::x86_rtm_rw_mutex,true> speculative_spin_rw_mutex;
#else
typedef interface7::internal::padded_mutex<tbb::spin_rw_mutex,true> speculative_spin_rw_mutex;
#endif
} // namespace interface8
using interface8::speculative_spin_rw_mutex;
__TBB_DEFINE_PROFILING_SET_NAME(speculative_spin_rw_mutex)
} // namespace tbb
#endif /* __TBB_spin_rw_mutex_H */

1007
Research/inc/tbb/task.h Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,257 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_task_arena_H
#define __TBB_task_arena_H
#include "task.h"
#include "tbb_exception.h"
#if TBB_USE_THREADING_TOOLS
#include "atomic.h" // for as_atomic
#endif
#if __TBB_TASK_ARENA
namespace tbb {
//! @cond INTERNAL
namespace internal {
//! Internal to library. Should not be used by clients.
/** @ingroup task_scheduling */
class arena;
class task_scheduler_observer_v3;
} // namespace internal
//! @endcond
namespace interface7 {
//! @cond INTERNAL
namespace internal {
using namespace tbb::internal; //e.g. function_task from task.h
class delegate_base : no_assign {
public:
virtual void operator()() const = 0;
virtual ~delegate_base() {}
};
template<typename F>
class delegated_function : public delegate_base {
F &my_func;
/*override*/ void operator()() const {
my_func();
}
public:
delegated_function ( F& f ) : my_func(f) {}
};
class task_arena_base {
protected:
//! NULL if not currently initialized.
internal::arena* my_arena;
#if __TBB_TASK_GROUP_CONTEXT
//! default context of the arena
task_group_context *my_context;
#endif
//! Concurrency level for deferred initialization
int my_max_concurrency;
//! Reserved master slots
unsigned my_master_slots;
//! Special settings
intptr_t my_version_and_traits;
enum {
default_flags = 0
#if __TBB_TASK_GROUP_CONTEXT
| (task_group_context::default_traits & task_group_context::exact_exception) // 0 or 1 << 16
, exact_exception_flag = task_group_context::exact_exception // used to specify flag for context directly
#endif
};
task_arena_base(int max_concurrency, unsigned reserved_for_masters)
: my_arena(0)
#if __TBB_TASK_GROUP_CONTEXT
, my_context(0)
#endif
, my_max_concurrency(max_concurrency)
, my_master_slots(reserved_for_masters)
, my_version_and_traits(default_flags)
{}
void __TBB_EXPORTED_METHOD internal_initialize( );
void __TBB_EXPORTED_METHOD internal_terminate( );
void __TBB_EXPORTED_METHOD internal_enqueue( task&, intptr_t ) const;
void __TBB_EXPORTED_METHOD internal_execute( delegate_base& ) const;
void __TBB_EXPORTED_METHOD internal_wait() const;
static int __TBB_EXPORTED_FUNC internal_current_slot();
public:
//! Typedef for number of threads that is automatic.
static const int automatic = -1; // any value < 1 means 'automatic'
};
} // namespace internal
//! @endcond
/** 1-to-1 proxy representation class of scheduler's arena
* Constructors set up settings only, real construction is deferred till the first method invocation
* Destructor only removes one of the references to the inner arena representation.
* Final destruction happens when all the references (and the work) are gone.
*/
class task_arena : public internal::task_arena_base {
friend class tbb::internal::task_scheduler_observer_v3;
bool my_initialized;
public:
//! Creates task_arena with certain concurrency limits
/** Sets up settings only, real construction is deferred till the first method invocation
* @arg max_concurrency specifies total number of slots in arena where threads work
* @arg reserved_for_masters specifies number of slots to be used by master threads only.
* Value of 1 is default and reflects behavior of implicit arenas.
**/
task_arena(int max_concurrency = automatic, unsigned reserved_for_masters = 1)
: task_arena_base(max_concurrency, reserved_for_masters)
, my_initialized(false)
{}
//! Copies settings from another task_arena
task_arena(const task_arena &s) // copy settings but not the reference or instance
: task_arena_base(s.my_max_concurrency, s.my_master_slots)
, my_initialized(false)
{}
//! Forces allocation of the resources for the task_arena as specified in constructor arguments
inline void initialize() {
if( !my_initialized ) {
internal_initialize();
#if TBB_USE_THREADING_TOOLS
// Threading tools respect lock prefix but report false-positive data-race via plain store
internal::as_atomic(my_initialized).fetch_and_store<release>(true);
#else
my_initialized = true;
#endif //TBB_USE_THREADING_TOOLS
}
}
//! Overrides concurrency level and forces initialization of internal representation
inline void initialize(int max_concurrency, unsigned reserved_for_masters = 1) {
__TBB_ASSERT( !my_arena, "Impossible to modify settings of an already initialized task_arena");
if( !my_initialized ) {
my_max_concurrency = max_concurrency;
my_master_slots = reserved_for_masters;
initialize();
}
}
//! Removes the reference to the internal arena representation.
//! Not thread safe wrt concurrent invocations of other methods.
inline void terminate() {
if( my_initialized ) {
internal_terminate();
my_initialized = false;
}
}
//! Removes the reference to the internal arena representation, and destroys the external object.
//! Not thread safe wrt concurrent invocations of other methods.
~task_arena() {
terminate();
}
//! Returns true if the arena is active (initialized); false otherwise.
//! The name was chosen to match a task_scheduler_init method with the same semantics.
bool is_active() const { return my_initialized; }
//! Enqueues a task into the arena to process a functor, and immediately returns.
//! Does not require the calling thread to join the arena
template<typename F>
void enqueue( const F& f ) {
initialize();
#if __TBB_TASK_GROUP_CONTEXT
__TBB_ASSERT(my_context, NULL);
internal_enqueue( *new( task::allocate_root(*my_context) ) internal::function_task<F>(f), 0 );
#else
internal_enqueue( *new( task::allocate_root() ) internal::function_task<F>(f), 0 );
#endif
}
#if __TBB_TASK_PRIORITY
//! Enqueues a task with priority p into the arena to process a functor f, and immediately returns.
//! Does not require the calling thread to join the arena
template<typename F>
void enqueue( const F& f, priority_t p ) {
__TBB_ASSERT( p == priority_low || p == priority_normal || p == priority_high, "Invalid priority level value" );
initialize();
#if __TBB_TASK_GROUP_CONTEXT
internal_enqueue( *new( task::allocate_root(*my_context) ) internal::function_task<F>(f), (intptr_t)p );
#else
internal_enqueue( *new( task::allocate_root() ) internal::function_task<F>(f), (intptr_t)p );
#endif
}
#endif// __TBB_TASK_PRIORITY
//! Joins the arena and executes a functor, then returns
//! If not possible to join, wraps the functor into a task, enqueues it and waits for task completion
//! Can decrement the arena demand for workers, causing a worker to leave and free a slot to the calling thread
template<typename F>
void execute(F& f) {
initialize();
internal::delegated_function<F> d(f);
internal_execute( d );
}
//! Joins the arena and executes a functor, then returns
//! If not possible to join, wraps the functor into a task, enqueues it and waits for task completion
//! Can decrement the arena demand for workers, causing a worker to leave and free a slot to the calling thread
template<typename F>
void execute(const F& f) {
initialize();
internal::delegated_function<const F> d(f);
internal_execute( d );
}
#if __TBB_EXTRA_DEBUG
//! Wait for all work in the arena to be completed
//! Even submitted by other application threads
//! Joins arena if/when possible (in the same way as execute())
void debug_wait_until_empty() {
initialize();
internal_wait();
}
#endif //__TBB_EXTRA_DEBUG
//! Returns the index, aka slot number, of the calling thread in its current arena
inline static int current_thread_index() {
return internal_current_slot();
}
};
} // namespace interfaceX
using interface7::task_arena;
} // namespace tbb
#endif /* __TBB_TASK_ARENA */
#endif /* __TBB_task_arena_H */

View File

@@ -0,0 +1,222 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_task_group_H
#define __TBB_task_group_H
#include "task.h"
#include "tbb_exception.h"
#if __TBB_TASK_GROUP_CONTEXT
namespace tbb {
namespace internal {
template<typename F> class task_handle_task;
}
class task_group;
class structured_task_group;
template<typename F>
class task_handle : internal::no_assign {
template<typename _F> friend class internal::task_handle_task;
friend class task_group;
friend class structured_task_group;
static const intptr_t scheduled = 0x1;
F my_func;
intptr_t my_state;
void mark_scheduled () {
// The check here is intentionally lax to avoid the impact of interlocked operation
if ( my_state & scheduled )
internal::throw_exception( internal::eid_invalid_multiple_scheduling );
my_state |= scheduled;
}
public:
task_handle( const F& f ) : my_func(f), my_state(0) {}
void operator() () const { my_func(); }
};
enum task_group_status {
not_complete,
complete,
canceled
};
namespace internal {
template<typename F>
class task_handle_task : public task {
task_handle<F>& my_handle;
/*override*/ task* execute() {
my_handle();
return NULL;
}
public:
task_handle_task( task_handle<F>& h ) : my_handle(h) { h.mark_scheduled(); }
};
class task_group_base : internal::no_copy {
protected:
empty_task* my_root;
task_group_context my_context;
task& owner () { return *my_root; }
template<typename F>
task_group_status internal_run_and_wait( F& f ) {
__TBB_TRY {
if ( !my_context.is_group_execution_cancelled() )
f();
} __TBB_CATCH( ... ) {
my_context.register_pending_exception();
}
return wait();
}
template<typename F, typename Task>
void internal_run( F& f ) {
owner().spawn( *new( owner().allocate_additional_child_of(*my_root) ) Task(f) );
}
public:
task_group_base( uintptr_t traits = 0 )
: my_context(task_group_context::bound, task_group_context::default_traits | traits)
{
my_root = new( task::allocate_root(my_context) ) empty_task;
my_root->set_ref_count(1);
}
~task_group_base() __TBB_NOEXCEPT(false) {
if( my_root->ref_count() > 1 ) {
bool stack_unwinding_in_progress = std::uncaught_exception();
// Always attempt to do proper cleanup to avoid inevitable memory corruption
// in case of missing wait (for the sake of better testability & debuggability)
if ( !is_canceling() )
cancel();
__TBB_TRY {
my_root->wait_for_all();
} __TBB_CATCH (...) {
task::destroy(*my_root);
__TBB_RETHROW();
}
task::destroy(*my_root);
if ( !stack_unwinding_in_progress )
internal::throw_exception( internal::eid_missing_wait );
}
else {
task::destroy(*my_root);
}
}
template<typename F>
void run( task_handle<F>& h ) {
internal_run< task_handle<F>, internal::task_handle_task<F> >( h );
}
task_group_status wait() {
__TBB_TRY {
my_root->wait_for_all();
} __TBB_CATCH( ... ) {
my_context.reset();
__TBB_RETHROW();
}
if ( my_context.is_group_execution_cancelled() ) {
my_context.reset();
return canceled;
}
return complete;
}
bool is_canceling() {
return my_context.is_group_execution_cancelled();
}
void cancel() {
my_context.cancel_group_execution();
}
}; // class task_group_base
} // namespace internal
class task_group : public internal::task_group_base {
public:
task_group () : task_group_base( task_group_context::concurrent_wait ) {}
#if __SUNPRO_CC
template<typename F>
void run( task_handle<F>& h ) {
internal_run< task_handle<F>, internal::task_handle_task<F> >( h );
}
#else
using task_group_base::run;
#endif
template<typename F>
void run( const F& f ) {
internal_run< const F, internal::function_task<F> >( f );
}
template<typename F>
task_group_status run_and_wait( const F& f ) {
return internal_run_and_wait<const F>( f );
}
template<typename F>
task_group_status run_and_wait( task_handle<F>& h ) {
h.mark_scheduled();
return internal_run_and_wait< task_handle<F> >( h );
}
}; // class task_group
class structured_task_group : public internal::task_group_base {
public:
template<typename F>
task_group_status run_and_wait ( task_handle<F>& h ) {
h.mark_scheduled();
return internal_run_and_wait< task_handle<F> >( h );
}
task_group_status wait() {
task_group_status res = task_group_base::wait();
my_root->set_ref_count(1);
return res;
}
}; // class structured_task_group
inline
bool is_current_task_group_canceling() {
return task::self().is_cancelled();
}
template<class F>
task_handle<F> make_task( const F& f ) {
return task_handle<F>( f );
}
} // namespace tbb
#endif /* __TBB_TASK_GROUP_CONTEXT */
#endif /* __TBB_task_group_H */

View File

@@ -0,0 +1,153 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_task_scheduler_init_H
#define __TBB_task_scheduler_init_H
#include "tbb_stddef.h"
#include "limits.h"
namespace tbb {
typedef std::size_t stack_size_type;
//! @cond INTERNAL
namespace internal {
//! Internal to library. Should not be used by clients.
/** @ingroup task_scheduling */
class scheduler;
} // namespace internal
//! @endcond
//! Class delimiting the scope of task scheduler activity.
/** A thread can construct a task_scheduler_init object and keep it alive
while it uses TBB's tasking subsystem (including parallel algorithms).
This class allows to customize properties of the TBB task pool to some extent.
For example it can limit concurrency level of parallel work initiated by the
given thread. It also can be used to specify stack size of the TBB worker threads,
though this setting is not effective if the thread pool has already been created.
If a parallel construct is used without task_scheduler_init object previously
created, the scheduler will be initialized automatically with default settings,
and will persist until this thread exits. Default concurrency level is defined
as described in task_scheduler_init::initialize().
@ingroup task_scheduling */
class task_scheduler_init: internal::no_copy {
enum ExceptionPropagationMode {
propagation_mode_exact = 1u,
propagation_mode_captured = 2u,
propagation_mode_mask = propagation_mode_exact | propagation_mode_captured
};
#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
enum {
wait_workers_in_terminate_flag = 128u
};
#endif
/** NULL if not currently initialized. */
internal::scheduler* my_scheduler;
public:
//! Typedef for number of threads that is automatic.
static const int automatic = -1;
//! Argument to initialize() or constructor that causes initialization to be deferred.
static const int deferred = -2;
//! Ensure that scheduler exists for this thread
/** A value of -1 lets TBB decide on the number of threads, which is usually
maximal hardware concurrency for this process, that is the number of logical
CPUs on the machine (possibly limited by the processor affinity mask of this
process (Windows) or of this thread (Linux, FreeBSD). It is preferable option
for production code because it helps to avoid nasty surprises when several
TBB based components run side-by-side or in a nested fashion inside the same
process.
The number_of_threads is ignored if any other task_scheduler_inits
currently exist. A thread may construct multiple task_scheduler_inits.
Doing so does no harm because the underlying scheduler is reference counted. */
void __TBB_EXPORTED_METHOD initialize( int number_of_threads=automatic );
//! The overloaded method with stack size parameter
/** Overloading is necessary to preserve ABI compatibility */
void __TBB_EXPORTED_METHOD initialize( int number_of_threads, stack_size_type thread_stack_size );
//! Inverse of method initialize.
void __TBB_EXPORTED_METHOD terminate();
//! Shorthand for default constructor followed by call to initialize(number_of_threads).
#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
task_scheduler_init( int number_of_threads=automatic, stack_size_type thread_stack_size=0, bool wait_workers_in_terminate = false ) : my_scheduler(NULL)
#else
task_scheduler_init( int number_of_threads=automatic, stack_size_type thread_stack_size=0 ) : my_scheduler(NULL)
#endif
{
// Two lowest order bits of the stack size argument may be taken to communicate
// default exception propagation mode of the client to be used when the
// client manually creates tasks in the master thread and does not use
// explicit task group context object. This is necessary because newer
// TBB binaries with exact propagation enabled by default may be used
// by older clients that expect tbb::captured_exception wrapper.
// All zeros mean old client - no preference.
__TBB_ASSERT( !(thread_stack_size & propagation_mode_mask), "Requested stack size is not aligned" );
#if TBB_USE_EXCEPTIONS
thread_stack_size |= TBB_USE_CAPTURED_EXCEPTION ? propagation_mode_captured : propagation_mode_exact;
#endif /* TBB_USE_EXCEPTIONS */
#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
if (wait_workers_in_terminate)
my_scheduler = (internal::scheduler*)wait_workers_in_terminate_flag;
#endif
initialize( number_of_threads, thread_stack_size );
}
//! Destroy scheduler for this thread if thread has no other live task_scheduler_inits.
~task_scheduler_init() {
if( my_scheduler )
terminate();
internal::poison_pointer( my_scheduler );
}
//! Returns the number of threads TBB scheduler would create if initialized by default.
/** Result returned by this method does not depend on whether the scheduler
has already been initialized.
Because tbb 2.0 does not support blocking tasks yet, you may use this method
to boost the number of threads in the tbb's internal pool, if your tasks are
doing I/O operations. The optimal number of additional threads depends on how
much time your tasks spend in the blocked state.
Before TBB 3.0 U4 this method returned the number of logical CPU in the
system. Currently on Windows, Linux and FreeBSD it returns the number of
logical CPUs available to the current process in accordance with its affinity
mask.
NOTE: The return value of this method never changes after its first invocation.
This means that changes in the process affinity mask that took place after
this method was first invoked will not affect the number of worker threads
in the TBB worker threads pool. */
static int __TBB_EXPORTED_FUNC default_num_threads ();
//! Returns true if scheduler is active (initialized); false otherwise
bool is_active() const { return my_scheduler != NULL; }
};
} // namespace tbb
#endif /* __TBB_task_scheduler_init_H */

View File

@@ -0,0 +1,167 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_task_scheduler_observer_H
#define __TBB_task_scheduler_observer_H
#include "atomic.h"
#if __TBB_TASK_ARENA
#include "task_arena.h"
#endif //__TBB_TASK_ARENA
#if __TBB_SCHEDULER_OBSERVER
namespace tbb {
namespace interface6 {
class task_scheduler_observer;
}
namespace internal {
class observer_proxy;
class observer_list;
class task_scheduler_observer_v3 {
friend class observer_proxy;
friend class observer_list;
friend class interface6::task_scheduler_observer;
//! Pointer to the proxy holding this observer.
/** Observers are proxied by the scheduler to maintain persistent lists of them. **/
observer_proxy* my_proxy;
//! Counter preventing the observer from being destroyed while in use by the scheduler.
/** Valid only when observation is on. **/
atomic<intptr_t> my_busy_count;
public:
//! Enable or disable observation
/** For local observers the method can be used only when the current thread
has the task scheduler initialized or is attached to an arena.
Repeated calls with the same state are no-ops. **/
void __TBB_EXPORTED_METHOD observe( bool state=true );
//! Returns true if observation is enabled, false otherwise.
bool is_observing() const {return my_proxy!=NULL;}
//! Construct observer with observation disabled.
task_scheduler_observer_v3() : my_proxy(NULL) { my_busy_count.store<relaxed>(0); }
//! Entry notification
/** Invoked from inside observe(true) call and whenever a worker enters the arena
this observer is associated with. If a thread is already in the arena when
the observer is activated, the entry notification is called before it
executes the first stolen task.
Obsolete semantics. For global observers it is called by a thread before
the first steal since observation became enabled. **/
virtual void on_scheduler_entry( bool /*is_worker*/ ) {}
//! Exit notification
/** Invoked from inside observe(false) call and whenever a worker leaves the
arena this observer is associated with.
Obsolete semantics. For global observers it is called by a thread before
the first steal since observation became enabled. **/
virtual void on_scheduler_exit( bool /*is_worker*/ ) {}
//! Destructor automatically switches observation off if it is enabled.
virtual ~task_scheduler_observer_v3() { if(my_proxy) observe(false);}
};
} // namespace internal
#if __TBB_ARENA_OBSERVER
namespace interface6 {
class task_scheduler_observer : public internal::task_scheduler_observer_v3 {
friend class internal::task_scheduler_observer_v3;
friend class internal::observer_proxy;
friend class internal::observer_list;
/** Negative numbers with the largest absolute value to minimize probability
of coincidence in case of a bug in busy count usage. **/
// TODO: take more high bits for version number
static const intptr_t v6_trait = (intptr_t)((~(uintptr_t)0 >> 1) + 1);
//! contains task_arena pointer or tag indicating local or global semantics of the observer
intptr_t my_context_tag;
enum { global_tag = 0, implicit_tag = 1 };
public:
//! Construct local or global observer in inactive state (observation disabled).
/** For a local observer entry/exit notifications are invoked whenever a worker
thread joins/leaves the arena of the observer's owner thread. If a thread is
already in the arena when the observer is activated, the entry notification is
called before it executes the first stolen task. **/
/** TODO: Obsolete.
Global observer semantics is obsolete as it violates master thread isolation
guarantees and is not composable. Thus the current default behavior of the
constructor is obsolete too and will be changed in one of the future versions
of the library. **/
task_scheduler_observer( bool local = false ) {
my_context_tag = local? implicit_tag : global_tag;
}
#if __TBB_TASK_ARENA
//! Construct local observer for a given arena in inactive state (observation disabled).
/** entry/exit notifications are invoked whenever a thread joins/leaves arena.
If a thread is already in the arena when the observer is activated, the entry notification
is called before it executes the first stolen task. **/
task_scheduler_observer( task_arena & a) {
my_context_tag = (intptr_t)&a;
}
#endif //__TBB_TASK_ARENA
/** Destructor protects instance of the observer from concurrent notification.
It is recommended to disable observation before destructor of a derived class starts,
otherwise it can lead to concurrent notification callback on partly destroyed object **/
virtual ~task_scheduler_observer() { if(my_proxy) observe(false); }
//! Enable or disable observation
/** Warning: concurrent invocations of this method are not safe.
Repeated calls with the same state are no-ops. **/
void observe( bool state=true ) {
if( state && !my_proxy ) {
__TBB_ASSERT( !my_busy_count, "Inconsistent state of task_scheduler_observer instance");
my_busy_count.store<relaxed>(v6_trait);
}
internal::task_scheduler_observer_v3::observe(state);
}
//! Return commands for may_sleep()
enum { keep_awake = false, allow_sleep = true };
//! The callback can be invoked by a worker thread before it goes to sleep.
/** If it returns false ('keep_awake'), the thread will keep spinning and looking for work.
It will not be called for master threads. **/
virtual bool may_sleep() { return allow_sleep; }
};
} //namespace interface6
using interface6::task_scheduler_observer;
#else /*__TBB_ARENA_OBSERVER*/
typedef tbb::internal::task_scheduler_observer_v3 task_scheduler_observer;
#endif /*__TBB_ARENA_OBSERVER*/
} // namespace tbb
#endif /* __TBB_SCHEDULER_OBSERVER */
#endif /* __TBB_task_scheduler_observer_H */

82
Research/inc/tbb/tbb.h Normal file
View File

@@ -0,0 +1,82 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_tbb_H
#define __TBB_tbb_H
/**
This header bulk-includes declarations or definitions of all the functionality
provided by TBB (save for malloc dependent headers).
If you use only a few TBB constructs, consider including specific headers only.
Any header listed below can be included independently of others.
**/
#if TBB_PREVIEW_AGGREGATOR
#include "aggregator.h"
#endif
#include "aligned_space.h"
#include "atomic.h"
#include "blocked_range.h"
#include "blocked_range2d.h"
#include "blocked_range3d.h"
#include "cache_aligned_allocator.h"
#include "combinable.h"
#include "concurrent_hash_map.h"
#if TBB_PREVIEW_CONCURRENT_LRU_CACHE
#include "concurrent_lru_cache.h"
#endif
#include "concurrent_priority_queue.h"
#include "concurrent_queue.h"
#include "concurrent_unordered_map.h"
#include "concurrent_unordered_set.h"
#include "concurrent_vector.h"
#include "critical_section.h"
#include "enumerable_thread_specific.h"
#include "flow_graph.h"
#include "mutex.h"
#include "null_mutex.h"
#include "null_rw_mutex.h"
#include "parallel_do.h"
#include "parallel_for.h"
#include "parallel_for_each.h"
#include "parallel_invoke.h"
#include "parallel_reduce.h"
#include "parallel_scan.h"
#include "parallel_sort.h"
#include "partitioner.h"
#include "pipeline.h"
#include "queuing_mutex.h"
#include "queuing_rw_mutex.h"
#include "reader_writer_lock.h"
#include "recursive_mutex.h"
#include "spin_mutex.h"
#include "spin_rw_mutex.h"
#include "task.h"
#include "task_arena.h"
#include "task_group.h"
#include "task_scheduler_init.h"
#include "task_scheduler_observer.h"
#include "tbb_allocator.h"
#include "tbb_exception.h"
#include "tbb_thread.h"
#include "tick_count.h"
#endif /* __TBB_tbb_H */

View File

@@ -0,0 +1,218 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_tbb_allocator_H
#define __TBB_tbb_allocator_H
#include "tbb_stddef.h"
#include <new>
#if __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
#include <utility> // std::forward
#endif
#if !TBB_USE_EXCEPTIONS && _MSC_VER
// Suppress "C++ exception handler used, but unwind semantics are not enabled" warning in STL headers
#pragma warning (push)
#pragma warning (disable: 4530)
#endif
#include <cstring>
#if !TBB_USE_EXCEPTIONS && _MSC_VER
#pragma warning (pop)
#endif
namespace tbb {
//! @cond INTERNAL
namespace internal {
//! Deallocates memory using FreeHandler
/** The function uses scalable_free if scalable allocator is available and free if not*/
void __TBB_EXPORTED_FUNC deallocate_via_handler_v3( void *p );
//! Allocates memory using MallocHandler
/** The function uses scalable_malloc if scalable allocator is available and malloc if not*/
void* __TBB_EXPORTED_FUNC allocate_via_handler_v3( size_t n );
//! Returns true if standard malloc/free are used to work with memory.
bool __TBB_EXPORTED_FUNC is_malloc_used_v3();
}
//! @endcond
#if _MSC_VER && !defined(__INTEL_COMPILER)
// Workaround for erroneous "unreferenced parameter" warning in method destroy.
#pragma warning (push)
#pragma warning (disable: 4100)
#endif
//! Meets "allocator" requirements of ISO C++ Standard, Section 20.1.5
/** The class selects the best memory allocation mechanism available
from scalable_malloc and standard malloc.
The members are ordered the same way they are in section 20.4.1
of the ISO C++ standard.
@ingroup memory_allocation */
template<typename T>
class tbb_allocator {
public:
typedef typename internal::allocator_type<T>::value_type value_type;
typedef value_type* pointer;
typedef const value_type* const_pointer;
typedef value_type& reference;
typedef const value_type& const_reference;
typedef size_t size_type;
typedef ptrdiff_t difference_type;
template<typename U> struct rebind {
typedef tbb_allocator<U> other;
};
//! Specifies current allocator
enum malloc_type {
scalable,
standard
};
tbb_allocator() throw() {}
tbb_allocator( const tbb_allocator& ) throw() {}
template<typename U> tbb_allocator(const tbb_allocator<U>&) throw() {}
pointer address(reference x) const {return &x;}
const_pointer address(const_reference x) const {return &x;}
//! Allocate space for n objects.
pointer allocate( size_type n, const void* /*hint*/ = 0) {
return pointer(internal::allocate_via_handler_v3( n * sizeof(value_type) ));
}
//! Free previously allocated block of memory.
void deallocate( pointer p, size_type ) {
internal::deallocate_via_handler_v3(p);
}
//! Largest value for which method allocate might succeed.
size_type max_size() const throw() {
size_type max = static_cast<size_type>(-1) / sizeof (value_type);
return (max > 0 ? max : 1);
}
//! Copy-construct value at location pointed to by p.
#if __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
template<typename U, typename... Args>
void construct(U *p, Args&&... args)
{ ::new((void *)p) U(std::forward<Args>(args)...); }
#else // __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
#if __TBB_CPP11_RVALUE_REF_PRESENT
void construct( pointer p, value_type&& value ) {::new((void*)(p)) value_type(std::move(value));}
#endif
void construct( pointer p, const value_type& value ) {::new((void*)(p)) value_type(value);}
#endif // __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
//! Destroy value at location pointed to by p.
void destroy( pointer p ) {p->~value_type();}
//! Returns current allocator
static malloc_type allocator_type() {
return internal::is_malloc_used_v3() ? standard : scalable;
}
};
#if _MSC_VER && !defined(__INTEL_COMPILER)
#pragma warning (pop)
#endif // warning 4100 is back
//! Analogous to std::allocator<void>, as defined in ISO C++ Standard, Section 20.4.1
/** @ingroup memory_allocation */
template<>
class tbb_allocator<void> {
public:
typedef void* pointer;
typedef const void* const_pointer;
typedef void value_type;
template<typename U> struct rebind {
typedef tbb_allocator<U> other;
};
};
template<typename T, typename U>
inline bool operator==( const tbb_allocator<T>&, const tbb_allocator<U>& ) {return true;}
template<typename T, typename U>
inline bool operator!=( const tbb_allocator<T>&, const tbb_allocator<U>& ) {return false;}
//! Meets "allocator" requirements of ISO C++ Standard, Section 20.1.5
/** The class is an adapter over an actual allocator that fills the allocation
using memset function with template argument C as the value.
The members are ordered the same way they are in section 20.4.1
of the ISO C++ standard.
@ingroup memory_allocation */
template <typename T, template<typename X> class Allocator = tbb_allocator>
class zero_allocator : public Allocator<T>
{
public:
typedef Allocator<T> base_allocator_type;
typedef typename base_allocator_type::value_type value_type;
typedef typename base_allocator_type::pointer pointer;
typedef typename base_allocator_type::const_pointer const_pointer;
typedef typename base_allocator_type::reference reference;
typedef typename base_allocator_type::const_reference const_reference;
typedef typename base_allocator_type::size_type size_type;
typedef typename base_allocator_type::difference_type difference_type;
template<typename U> struct rebind {
typedef zero_allocator<U, Allocator> other;
};
zero_allocator() throw() { }
zero_allocator(const zero_allocator &a) throw() : base_allocator_type( a ) { }
template<typename U>
zero_allocator(const zero_allocator<U> &a) throw() : base_allocator_type( Allocator<U>( a ) ) { }
pointer allocate(const size_type n, const void *hint = 0 ) {
pointer ptr = base_allocator_type::allocate( n, hint );
std::memset( ptr, 0, n * sizeof(value_type) );
return ptr;
}
};
//! Analogous to std::allocator<void>, as defined in ISO C++ Standard, Section 20.4.1
/** @ingroup memory_allocation */
template<template<typename T> class Allocator>
class zero_allocator<void, Allocator> : public Allocator<void> {
public:
typedef Allocator<void> base_allocator_type;
typedef typename base_allocator_type::value_type value_type;
typedef typename base_allocator_type::pointer pointer;
typedef typename base_allocator_type::const_pointer const_pointer;
template<typename U> struct rebind {
typedef zero_allocator<U, Allocator> other;
};
};
template<typename T1, template<typename X1> class B1, typename T2, template<typename X2> class B2>
inline bool operator==( const zero_allocator<T1,B1> &a, const zero_allocator<T2,B2> &b) {
return static_cast< B1<T1> >(a) == static_cast< B2<T2> >(b);
}
template<typename T1, template<typename X1> class B1, typename T2, template<typename X2> class B2>
inline bool operator!=( const zero_allocator<T1,B1> &a, const zero_allocator<T2,B2> &b) {
return static_cast< B1<T1> >(a) != static_cast< B2<T2> >(b);
}
} // namespace tbb
#endif /* __TBB_tbb_allocator_H */

View File

@@ -0,0 +1,646 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_tbb_config_H
#define __TBB_tbb_config_H
/** This header is supposed to contain macro definitions and C style comments only.
The macros defined here are intended to control such aspects of TBB build as
- presence of compiler features
- compilation modes
- feature sets
- known compiler/platform issues
**/
/*Check which standard library we use on OS X.*/
/*__TBB_SYMBOL is defined only while processing exported symbols list where C++ is not allowed.*/
#if !defined(__TBB_SYMBOL) && __APPLE__
#include <cstddef>
#endif
// note that when ICC is in use __TBB_GCC_VERSION might not closely match GCC version on the machine
#define __TBB_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
#if __clang__
/**according to clang documentation version can be vendor specific **/
#define __TBB_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__)
#endif
/** Preprocessor symbols to determine HW architecture **/
#if _WIN32||_WIN64
# if defined(_M_X64)||defined(__x86_64__) // the latter for MinGW support
# define __TBB_x86_64 1
# elif defined(_M_IA64)
# define __TBB_ipf 1
# elif defined(_M_IX86)||defined(__i386__) // the latter for MinGW support
# define __TBB_x86_32 1
# else
# define __TBB_generic_arch 1
# endif
#else /* Assume generic Unix */
# if !__linux__ && !__APPLE__
# define __TBB_generic_os 1
# endif
# if __x86_64__
# define __TBB_x86_64 1
# elif __ia64__
# define __TBB_ipf 1
# elif __i386__||__i386 // __i386 is for Sun OS
# define __TBB_x86_32 1
# else
# define __TBB_generic_arch 1
# endif
#endif
#if __MIC__ || __MIC2__
#define __TBB_DEFINE_MIC 1
#endif
#define __TBB_TSX_AVAILABLE (__TBB_x86_32 || __TBB_x86_64) && !__TBB_DEFINE_MIC
/** Presence of compiler features **/
#if __INTEL_COMPILER == 9999 && __INTEL_COMPILER_BUILD_DATE == 20110811
/* Intel(R) Composer XE 2011 Update 6 incorrectly sets __INTEL_COMPILER. Fix it. */
#undef __INTEL_COMPILER
#define __INTEL_COMPILER 1210
#endif
#if __TBB_GCC_VERSION >= 40400 && !defined(__INTEL_COMPILER)
/** warning suppression pragmas available in GCC since 4.4 **/
#define __TBB_GCC_WARNING_SUPPRESSION_PRESENT 1
#endif
/* Select particular features of C++11 based on compiler version.
ICC 12.1 (Linux), GCC 4.3 and higher, clang 2.9 and higher
set __GXX_EXPERIMENTAL_CXX0X__ in c++11 mode.
Compilers that mimics other compilers (ICC, clang) must be processed before
compilers they mimic (GCC, MSVC).
TODO: The following conditions should be extended when new compilers/runtimes
support added.
*/
#if __INTEL_COMPILER
/** C++11 mode detection macros for Intel C++ compiler (enabled by -std=c++0x option):
__INTEL_CXX11_MODE__ for version >=13.0
__STDC_HOSTED__ for version >=12.0 on Windows,
__GXX_EXPERIMENTAL_CXX0X__ for version >=12.0 on Linux and OS X. **/
// On Windows, C++11 features supported by Visual Studio 2010 and higher are enabled by default
#ifndef __INTEL_CXX11_MODE__
#define __INTEL_CXX11_MODE__ ((_MSC_VER && __STDC_HOSTED__) || __GXX_EXPERIMENTAL_CXX0X__)
// TODO: check if more conditions can be simplified with the above macro
#endif
#define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT (__INTEL_CXX11_MODE__ && __VARIADIC_TEMPLATES)
// Both r-value reference support in compiler and std::move/std::forward
// presence in C++ standard library is checked.
#define __TBB_CPP11_RVALUE_REF_PRESENT ((__GXX_EXPERIMENTAL_CXX0X__ && (__TBB_GCC_VERSION >= 40300 || _LIBCPP_VERSION) || _MSC_VER >= 1600) && __INTEL_COMPILER >= 1200)
#if _MSC_VER >= 1600
#define __TBB_EXCEPTION_PTR_PRESENT ( __INTEL_COMPILER > 1300 \
/*ICC 12.1 Upd 10 and 13 beta Upd 2 fixed exception_ptr linking issue*/ \
|| (__INTEL_COMPILER == 1300 && __INTEL_COMPILER_BUILD_DATE >= 20120530) \
|| (__INTEL_COMPILER == 1210 && __INTEL_COMPILER_BUILD_DATE >= 20120410) )
/** libstdc++ that comes with GCC 4.6 use C++11 features not supported by ICC 12.1.
* Because of that ICC 12.1 does not support C++11 mode with with gcc 4.6 (or higher),
* and therefore does not define __GXX_EXPERIMENTAL_CXX0X__ macro **/
#elif __TBB_GCC_VERSION >= 40404 && __TBB_GCC_VERSION < 40600
#define __TBB_EXCEPTION_PTR_PRESENT (__GXX_EXPERIMENTAL_CXX0X__ && __INTEL_COMPILER >= 1200)
#elif __TBB_GCC_VERSION >= 40600
#define __TBB_EXCEPTION_PTR_PRESENT (__GXX_EXPERIMENTAL_CXX0X__ && __INTEL_COMPILER >= 1300)
#elif _LIBCPP_VERSION
#define __TBB_EXCEPTION_PTR_PRESENT __GXX_EXPERIMENTAL_CXX0X__
#else
#define __TBB_EXCEPTION_PTR_PRESENT 0
#endif
#define __TBB_STATIC_ASSERT_PRESENT (__INTEL_CXX11_MODE__ || _MSC_VER >= 1600)
#define __TBB_CPP11_TUPLE_PRESENT (_MSC_VER >= 1600 || (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40300))
/**Intel C++ compiler 14.0 crashes on using __has_include. When it fixed, condition will need to be updated. **/
#if (__clang__ && __INTEL_COMPILER > 1400)
#if (__has_feature(__cxx_generalized_initializers__) && __has_include(<initializer_list>))
#define __TBB_INITIALIZER_LISTS_PRESENT 1
#endif
#else
/** TODO: when MSVC2013 is supported by Intel C++ compiler, it will be enabled silently by compiler, so rule will need to be updated.**/
#define __TBB_INITIALIZER_LISTS_PRESENT __INTEL_CXX11_MODE__ && __INTEL_COMPILER >= 1400 && (_MSC_VER >= 1800 || __TBB_GCC_VERSION >= 40400 || _LIBCPP_VERSION)
#endif
#define __TBB_CONSTEXPR_PRESENT __INTEL_CXX11_MODE__ && __INTEL_COMPILER >= 1400
#define __TBB_DEFAULTED_AND_DELETED_FUNC_PRESENT __INTEL_CXX11_MODE__ && __INTEL_COMPILER >= 1200
/** ICC seems to disable support of noexcept event in c++11 when compiling in compatibility mode for gcc <4.6 **/
#define __TBB_NOEXCEPT_PRESENT __INTEL_CXX11_MODE__ && __INTEL_COMPILER >= 1300 && (__TBB_GCC_VERSION >= 40600 || _LIBCPP_VERSION || _MSC_VER)
#define __TBB_CPP11_STD_BEGIN_END_PRESENT (_MSC_VER >= 1700 || __GXX_EXPERIMENTAL_CXX0X__ && __INTEL_COMPILER >= 1310 && (__TBB_GCC_VERSION >= 40600 || _LIBCPP_VERSION))
#define __TBB_CPP11_AUTO_PRESENT (_MSC_VER >= 1600 || __GXX_EXPERIMENTAL_CXX0X__ && __INTEL_COMPILER >= 1210)
#define __TBB_CPP11_DECLTYPE_PRESENT (_MSC_VER >= 1600 || __GXX_EXPERIMENTAL_CXX0X__ && __INTEL_COMPILER >= 1210)
#elif __clang__
//TODO: these options need to be rechecked
/** on OS X* the only way to get C++11 is to use clang. For library features (e.g. exception_ptr) libc++ is also
* required. So there is no need to check GCC version for clang**/
#define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT (__has_feature(__cxx_variadic_templates__))
#define __TBB_CPP11_RVALUE_REF_PRESENT (__has_feature(__cxx_rvalue_references__) && (__TBB_GCC_VERSION >= 40300 || _LIBCPP_VERSION))
/** TODO: extend exception_ptr related conditions to cover libstdc++ **/
#define __TBB_EXCEPTION_PTR_PRESENT (__cplusplus >= 201103L && _LIBCPP_VERSION)
#define __TBB_STATIC_ASSERT_PRESENT __has_feature(__cxx_static_assert__)
/**Clang (preprocessor) has problems with dealing with expression having __has_include in #ifs
* used inside C++ code. (At least version that comes with OS X 10.8 : Apple LLVM version 4.2 (clang-425.0.28) (based on LLVM 3.2svn)) **/
#if (__GXX_EXPERIMENTAL_CXX0X__ && __has_include(<tuple>))
#define __TBB_CPP11_TUPLE_PRESENT 1
#endif
#if (__has_feature(__cxx_generalized_initializers__) && __has_include(<initializer_list>))
#define __TBB_INITIALIZER_LISTS_PRESENT 1
#endif
#define __TBB_CONSTEXPR_PRESENT __has_feature(__cxx_constexpr__)
#define __TBB_DEFAULTED_AND_DELETED_FUNC_PRESENT (__has_feature(__cxx_defaulted_functions__) && __has_feature(__cxx_deleted_functions__))
/**For some unknown reason __has_feature(__cxx_noexcept) does not yield true for all cases. Compiler bug ? **/
#define __TBB_NOEXCEPT_PRESENT (__cplusplus >= 201103L)
#define __TBB_CPP11_STD_BEGIN_END_PRESENT (__has_feature(__cxx_range_for__) && _LIBCPP_VERSION)
#define __TBB_CPP11_AUTO_PRESENT __has_feature(__cxx_auto_type__)
#define __TBB_CPP11_DECLTYPE_PRESENT __has_feature(__cxx_decltype__)
#elif __GNUC__
#define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT __GXX_EXPERIMENTAL_CXX0X__
#define __TBB_CPP11_RVALUE_REF_PRESENT __GXX_EXPERIMENTAL_CXX0X__
/** __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 here is a substitution for _GLIBCXX_ATOMIC_BUILTINS_4, which is a prerequisite
for exception_ptr but cannot be used in this file because it is defined in a header, not by the compiler.
If the compiler has no atomic intrinsics, the C++ library should not expect those as well. **/
#define __TBB_EXCEPTION_PTR_PRESENT (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40404 && __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4)
#define __TBB_STATIC_ASSERT_PRESENT (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40300)
#define __TBB_CPP11_TUPLE_PRESENT (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40300)
#define __TBB_INITIALIZER_LISTS_PRESENT (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40400)
/** gcc seems have to support constexpr from 4.4 but tests in (test_atomic) seeming reasonable fail to compile prior 4.6**/
#define __TBB_CONSTEXPR_PRESENT (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40400)
#define __TBB_DEFAULTED_AND_DELETED_FUNC_PRESENT (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40400)
#define __TBB_NOEXCEPT_PRESENT (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40600)
#define __TBB_CPP11_STD_BEGIN_END_PRESENT (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40600)
#define __TBB_CPP11_AUTO_PRESENT (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40400)
#define __TBB_CPP11_DECLTYPE_PRESENT (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40400)
#elif _MSC_VER
#define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT (_MSC_VER >= 1800)
#define __TBB_CPP11_RVALUE_REF_PRESENT (_MSC_VER >= 1600)
#define __TBB_EXCEPTION_PTR_PRESENT (_MSC_VER >= 1600)
#define __TBB_STATIC_ASSERT_PRESENT (_MSC_VER >= 1600)
#define __TBB_CPP11_TUPLE_PRESENT (_MSC_VER >= 1600)
#define __TBB_INITIALIZER_LISTS_PRESENT (_MSC_VER >= 1800)
#define __TBB_CONSTEXPR_PRESENT 0
#define __TBB_DEFAULTED_AND_DELETED_FUNC_PRESENT (_MSC_VER >= 1800)
#define __TBB_NOEXCEPT_PRESENT 0 /*for _MSC_VER == 1800*/
#define __TBB_CPP11_STD_BEGIN_END_PRESENT (_MSC_VER >= 1700)
#define __TBB_CPP11_AUTO_PRESENT (_MSC_VER >= 1600)
#define __TBB_CPP11_DECLTYPE_PRESENT (_MSC_VER >= 1600)
#else
#define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT 0
#define __TBB_CPP11_RVALUE_REF_PRESENT 0
#define __TBB_EXCEPTION_PTR_PRESENT 0
#define __TBB_STATIC_ASSERT_PRESENT 0
#define __TBB_CPP11_TUPLE_PRESENT 0
#define __TBB_INITIALIZER_LISTS_PRESENT 0
#define __TBB_CONSTEXPR_PRESENT 0
#define __TBB_DEFAULTED_AND_DELETED_FUNC_PRESENT 0
#define __TBB_NOEXCEPT_PRESENT 0
#define __TBB_CPP11_STD_BEGIN_END_PRESENT 0
#define __TBB_CPP11_AUTO_PRESENT 0
#define __TBB_CPP11_DECLTYPE_PRESENT 0
#endif
// C++11 standard library features
#define __TBB_CPP11_TYPE_PROPERTIES_PRESENT (_LIBCPP_VERSION || _MSC_VER >= 1700)
#define __TBB_TR1_TYPE_PROPERTIES_IN_STD_PRESENT (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40300 || _MSC_VER >= 1600)
// GCC has a partial support of type properties
#define __TBB_CPP11_IS_COPY_CONSTRUCTIBLE_PRESENT (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40700 || __TBB_CPP11_TYPE_PROPERTIES_PRESENT)
// In GCC and MSVC, implementation of std::move_if_noexcept is not aligned with noexcept
#define __TBB_MOVE_IF_NOEXCEPT_PRESENT (__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40700 || _MSC_VER >= 1800 || __clang__ && _LIBCPP_VERSION && __TBB_NOEXCEPT_PRESENT)
//TODO: Probably more accurate way is to analyze version of stdlibc++ via__GLIBCXX__ instead of __TBB_GCC_VERSION
#define __TBB_ALLOCATOR_TRAITS_PRESENT (__cplusplus >= 201103L && _LIBCPP_VERSION || _MSC_VER >= 1700 || \
__GXX_EXPERIMENTAL_CXX0X__ && __TBB_GCC_VERSION >= 40700 && !(__TBB_GCC_VERSION == 40700 && __TBB_DEFINE_MIC) \
)
#define __TBB_MAKE_EXCEPTION_PTR_PRESENT (__TBB_EXCEPTION_PTR_PRESENT && (_MSC_VER >= 1700 || __TBB_GCC_VERSION >= 40600 || _LIBCPP_VERSION))
//TODO: not clear how exactly this macro affects exception_ptr - investigate
// On linux ICC fails to find existing std::exception_ptr in libstdc++ without this define
#if __INTEL_COMPILER && __GNUC__ && __TBB_EXCEPTION_PTR_PRESENT && !defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4)
#define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1
#endif
// Work around a bug in MinGW32
#if __MINGW32__ && __TBB_EXCEPTION_PTR_PRESENT && !defined(_GLIBCXX_ATOMIC_BUILTINS_4)
#define _GLIBCXX_ATOMIC_BUILTINS_4
#endif
#if __GNUC__ || __SUNPRO_CC || __IBMCPP__
/* ICC defines __GNUC__ and so is covered */
#define __TBB_ATTRIBUTE_ALIGNED_PRESENT 1
#elif _MSC_VER && (_MSC_VER >= 1300 || __INTEL_COMPILER)
#define __TBB_DECLSPEC_ALIGN_PRESENT 1
#endif
/* Actually ICC supports gcc __sync_* intrinsics starting 11.1,
* but 64 bit support for 32 bit target comes in later ones*/
/* TODO: change the version back to 4.1.2 once macro __TBB_WORD_SIZE become optional */
#if __TBB_GCC_VERSION >= 40306 || __INTEL_COMPILER >= 1200
/** built-in atomics available in GCC since 4.1.2 **/
#define __TBB_GCC_BUILTIN_ATOMICS_PRESENT 1
#endif
#if __INTEL_COMPILER >= 1200
/** built-in C++11 style atomics available in ICC since 12.0 **/
#define __TBB_ICC_BUILTIN_ATOMICS_PRESENT 1
#endif
#define __TBB_TSX_INTRINSICS_PRESENT ((__RTM__ || _MSC_VER>=1700 || __INTEL_COMPILER>=1300) && !__TBB_DEFINE_MIC && !__ANDROID__)
/** User controlled TBB features & modes **/
#ifndef TBB_USE_DEBUG
#ifdef _DEBUG
#define TBB_USE_DEBUG _DEBUG
#else
#define TBB_USE_DEBUG 0
#endif
#endif /* TBB_USE_DEBUG */
#ifndef TBB_USE_ASSERT
#define TBB_USE_ASSERT TBB_USE_DEBUG
#endif /* TBB_USE_ASSERT */
#ifndef TBB_USE_THREADING_TOOLS
#define TBB_USE_THREADING_TOOLS TBB_USE_DEBUG
#endif /* TBB_USE_THREADING_TOOLS */
#ifndef TBB_USE_PERFORMANCE_WARNINGS
#ifdef TBB_PERFORMANCE_WARNINGS
#define TBB_USE_PERFORMANCE_WARNINGS TBB_PERFORMANCE_WARNINGS
#else
#define TBB_USE_PERFORMANCE_WARNINGS TBB_USE_DEBUG
#endif /* TBB_PEFORMANCE_WARNINGS */
#endif /* TBB_USE_PERFORMANCE_WARNINGS */
#if !defined(__EXCEPTIONS) && !defined(_CPPUNWIND) && !defined(__SUNPRO_CC) || defined(_XBOX)
#if TBB_USE_EXCEPTIONS
#error Compilation settings do not support exception handling. Please do not set TBB_USE_EXCEPTIONS macro or set it to 0.
#elif !defined(TBB_USE_EXCEPTIONS)
#define TBB_USE_EXCEPTIONS 0
#endif
#elif !defined(TBB_USE_EXCEPTIONS)
#if __TBB_DEFINE_MIC
#define TBB_USE_EXCEPTIONS 0
#else
#define TBB_USE_EXCEPTIONS 1
#endif
#elif TBB_USE_EXCEPTIONS && __TBB_DEFINE_MIC
#error Please do not set TBB_USE_EXCEPTIONS macro or set it to 0.
#endif
#ifndef TBB_IMPLEMENT_CPP0X
/** By default, use C++11 classes if available **/
#if __GNUC__==4 && __GNUC_MINOR__>=4 && __GXX_EXPERIMENTAL_CXX0X__
#define TBB_IMPLEMENT_CPP0X 0
#elif __clang__ && __cplusplus >= 201103L
//TODO: consider introducing separate macros for each file?
//prevent injection of corresponding tbb names into std:: namespace if native headers are present
#if __has_include(<thread>) || __has_include(<condition_variable>)
#define TBB_IMPLEMENT_CPP0X 0
#else
#define TBB_IMPLEMENT_CPP0X 1
#endif
#elif _MSC_VER>=1700
#define TBB_IMPLEMENT_CPP0X 0
#elif __STDCPP_THREADS__
#define TBB_IMPLEMENT_CPP0X 0
#else
#define TBB_IMPLEMENT_CPP0X 1
#endif
#endif /* TBB_IMPLEMENT_CPP0X */
/* TBB_USE_CAPTURED_EXCEPTION should be explicitly set to either 0 or 1, as it is used as C++ const */
#ifndef TBB_USE_CAPTURED_EXCEPTION
/** IA-64 architecture pre-built TBB binaries do not support exception_ptr. **/
#if __TBB_EXCEPTION_PTR_PRESENT && !defined(__ia64__)
#define TBB_USE_CAPTURED_EXCEPTION 0
#else
#define TBB_USE_CAPTURED_EXCEPTION 1
#endif
#else /* defined TBB_USE_CAPTURED_EXCEPTION */
#if !TBB_USE_CAPTURED_EXCEPTION && !__TBB_EXCEPTION_PTR_PRESENT
#error Current runtime does not support std::exception_ptr. Set TBB_USE_CAPTURED_EXCEPTION and make sure that your code is ready to catch tbb::captured_exception.
#endif
#endif /* defined TBB_USE_CAPTURED_EXCEPTION */
/** Check whether the request to use GCC atomics can be satisfied **/
#if TBB_USE_GCC_BUILTINS && !__TBB_GCC_BUILTIN_ATOMICS_PRESENT
#error "GCC atomic built-ins are not supported."
#endif
/** Internal TBB features & modes **/
/** __TBB_WEAK_SYMBOLS_PRESENT denotes that the system supports the weak symbol mechanism **/
#ifndef __TBB_WEAK_SYMBOLS_PRESENT
#define __TBB_WEAK_SYMBOLS_PRESENT ( !_WIN32 && !__APPLE__ && !__sun && (__TBB_GCC_VERSION >= 40000 || __INTEL_COMPILER ) )
#endif
/** __TBB_DYNAMIC_LOAD_ENABLED describes the system possibility to load shared libraries at run time **/
#ifndef __TBB_DYNAMIC_LOAD_ENABLED
#define __TBB_DYNAMIC_LOAD_ENABLED 1
#endif
/** __TBB_SOURCE_DIRECTLY_INCLUDED is a mode used in whitebox testing when
it's necessary to test internal functions not exported from TBB DLLs
**/
#if (_WIN32||_WIN64) && (__TBB_SOURCE_DIRECTLY_INCLUDED || TBB_USE_PREVIEW_BINARY)
#define __TBB_NO_IMPLICIT_LINKAGE 1
#define __TBBMALLOC_NO_IMPLICIT_LINKAGE 1
#endif
#ifndef __TBB_COUNT_TASK_NODES
#define __TBB_COUNT_TASK_NODES TBB_USE_ASSERT
#endif
#ifndef __TBB_TASK_GROUP_CONTEXT
#define __TBB_TASK_GROUP_CONTEXT 1
#endif /* __TBB_TASK_GROUP_CONTEXT */
#ifndef __TBB_SCHEDULER_OBSERVER
#define __TBB_SCHEDULER_OBSERVER 1
#endif /* __TBB_SCHEDULER_OBSERVER */
#ifndef __TBB_FP_CONTEXT
#define __TBB_FP_CONTEXT __TBB_TASK_GROUP_CONTEXT
#endif /* __TBB_FP_CONTEXT */
#if __TBB_FP_CONTEXT && !__TBB_TASK_GROUP_CONTEXT
#error __TBB_FP_CONTEXT requires __TBB_TASK_GROUP_CONTEXT to be enabled
#endif
#ifndef __TBB_TASK_ARENA
#define __TBB_TASK_ARENA 1
#endif /* __TBB_TASK_ARENA */
#if __TBB_TASK_ARENA
#define __TBB_RECYCLE_TO_ENQUEUE __TBB_BUILD // keep non-official
#if !__TBB_SCHEDULER_OBSERVER
#error __TBB_TASK_ARENA requires __TBB_SCHEDULER_OBSERVER to be enabled
#endif
#endif /* __TBB_TASK_ARENA */
#ifndef __TBB_ARENA_OBSERVER
#define __TBB_ARENA_OBSERVER ((__TBB_BUILD||TBB_PREVIEW_LOCAL_OBSERVER)&& __TBB_SCHEDULER_OBSERVER)
#endif /* __TBB_ARENA_OBSERVER */
#ifndef __TBB_SLEEP_PERMISSION
#define __TBB_SLEEP_PERMISSION ((__TBB_CPF_BUILD||TBB_PREVIEW_LOCAL_OBSERVER)&& __TBB_SCHEDULER_OBSERVER)
#endif /* __TBB_SLEEP_PERMISSION */
#if TBB_PREVIEW_FLOW_GRAPH_TRACE
#define __TBB_NO_IMPLICIT_LINKAGE 1
#endif /* TBB_PREVIEW_FLOW_GRAPH_TRACE */
#ifndef __TBB_ITT_STRUCTURE_API
#define __TBB_ITT_STRUCTURE_API ( !__TBB_DEFINE_MIC && (__TBB_CPF_BUILD || TBB_PREVIEW_FLOW_GRAPH_TRACE) )
#endif
#if TBB_USE_EXCEPTIONS && !__TBB_TASK_GROUP_CONTEXT
#error TBB_USE_EXCEPTIONS requires __TBB_TASK_GROUP_CONTEXT to be enabled
#endif
#ifndef __TBB_TASK_PRIORITY
#define __TBB_TASK_PRIORITY (__TBB_TASK_GROUP_CONTEXT)
#endif /* __TBB_TASK_PRIORITY */
#if __TBB_TASK_PRIORITY && !__TBB_TASK_GROUP_CONTEXT
#error __TBB_TASK_PRIORITY requires __TBB_TASK_GROUP_CONTEXT to be enabled
#endif
#if TBB_PREVIEW_WAITING_FOR_WORKERS || __TBB_BUILD
#define __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE 1
#endif
#if !defined(__TBB_SURVIVE_THREAD_SWITCH) && \
(_WIN32 || _WIN64 || __APPLE__ || (__linux__ && !__ANDROID__))
#define __TBB_SURVIVE_THREAD_SWITCH 1
#endif /* __TBB_SURVIVE_THREAD_SWITCH */
#ifndef __TBB_DEFAULT_PARTITIONER
#if TBB_DEPRECATED
/** Default partitioner for parallel loop templates in TBB 1.0-2.1 */
#define __TBB_DEFAULT_PARTITIONER tbb::simple_partitioner
#else
/** Default partitioner for parallel loop templates since TBB 2.2 */
#define __TBB_DEFAULT_PARTITIONER tbb::auto_partitioner
#endif /* TBB_DEPRECATED */
#endif /* !defined(__TBB_DEFAULT_PARTITIONER */
#ifndef __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES
#define __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES 1
#endif
#ifndef __TBB_ENABLE_RANGE_FEEDBACK
#define __TBB_ENABLE_RANGE_FEEDBACK 0
#endif
#ifdef _VARIADIC_MAX
#define __TBB_VARIADIC_MAX _VARIADIC_MAX
#else
#if _MSC_VER >= 1700
#define __TBB_VARIADIC_MAX 5 /* current VS11 setting, may change. */
#else
#define __TBB_VARIADIC_MAX 10
#endif
#endif
/** __TBB_WIN8UI_SUPPORT enables support of New Windows*8 Store Apps and limit a possibility to load
shared libraries at run time only from application container **/
#if defined(WINAPI_FAMILY) && WINAPI_FAMILY == WINAPI_FAMILY_APP
#define __TBB_WIN8UI_SUPPORT 1
#else
#define __TBB_WIN8UI_SUPPORT 0
#endif
/** Macros of the form __TBB_XXX_BROKEN denote known issues that are caused by
the bugs in compilers, standard or OS specific libraries. They should be
removed as soon as the corresponding bugs are fixed or the buggy OS/compiler
versions go out of the support list.
**/
#if __ANDROID__ && __TBB_GCC_VERSION <= 40403 && !__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
/** Necessary because on Android 8-byte CAS and F&A are not available for some processor architectures,
but no mandatory warning message appears from GCC 4.4.3. Instead, only a linkage error occurs when
these atomic operations are used (such as in unit test test_atomic.exe). **/
#define __TBB_GCC_64BIT_ATOMIC_BUILTINS_BROKEN 1
#elif __TBB_x86_32 && __TBB_GCC_VERSION == 40102 && ! __GNUC_RH_RELEASE__
/** GCC 4.1.2 erroneously emit call to external function for 64 bit sync_ intrinsics.
However these functions are not defined anywhere. It seems that this problem was fixed later on
and RHEL got an updated version of gcc 4.1.2. **/
#define __TBB_GCC_64BIT_ATOMIC_BUILTINS_BROKEN 1
#endif
#if __GNUC__ && __TBB_x86_64 && __INTEL_COMPILER == 1200
#define __TBB_ICC_12_0_INL_ASM_FSTCW_BROKEN 1
#endif
#if _MSC_VER && __INTEL_COMPILER && (__INTEL_COMPILER<1110 || __INTEL_COMPILER==1110 && __INTEL_COMPILER_BUILD_DATE < 20091012)
/** Necessary to avoid ICL error (or warning in non-strict mode):
"exception specification for implicitly declared virtual destructor is
incompatible with that of overridden one". **/
#define __TBB_DEFAULT_DTOR_THROW_SPEC_BROKEN 1
#endif
#if !__INTEL_COMPILER && (_MSC_VER && _MSC_VER < 1500 || __TBB_GCC_VERSION && __TBB_GCC_VERSION < 40102)
/** gcc 3.4.6 (and earlier) and VS2005 (and earlier) do not allow declaring template class as a friend
of classes defined in other namespaces. **/
#define __TBB_TEMPLATE_FRIENDS_BROKEN 1
#endif
//TODO: recheck for different clang versions
#if __GLIBC__==2 && __GLIBC_MINOR__==3 || (__APPLE__ && ( __INTEL_COMPILER==1200 && !TBB_USE_DEBUG))
/** Macro controlling EH usages in TBB tests.
Some older versions of glibc crash when exception handling happens concurrently. **/
#define __TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN 1
#else
#define __TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN 0
#endif
#if (_WIN32||_WIN64) && __INTEL_COMPILER == 1110
/** That's a bug in Intel compiler 11.1.044/IA-32/Windows, that leads to a worker thread crash on the thread's startup. **/
#define __TBB_ICL_11_1_CODE_GEN_BROKEN 1
#endif
#if __clang__ || (__GNUC__==3 && __GNUC_MINOR__==3 && !defined(__INTEL_COMPILER))
/** Bugs with access to nested classes declared in protected area */
#define __TBB_PROTECTED_NESTED_CLASS_BROKEN 1
#endif
#if __MINGW32__ && __TBB_GCC_VERSION < 40200
/** MinGW has a bug with stack alignment for routines invoked from MS RTLs.
Since GCC 4.2, the bug can be worked around via a special attribute. **/
#define __TBB_SSE_STACK_ALIGNMENT_BROKEN 1
#else
#define __TBB_SSE_STACK_ALIGNMENT_BROKEN 0
#endif
#if __GNUC__==4 && __GNUC_MINOR__==3 && __GNUC_PATCHLEVEL__==0
/* GCC of this version may rashly ignore control dependencies */
#define __TBB_GCC_OPTIMIZER_ORDERING_BROKEN 1
#endif
#if __FreeBSD__
/** A bug in FreeBSD 8.0 results in kernel panic when there is contention
on a mutex created with this attribute. **/
#define __TBB_PRIO_INHERIT_BROKEN 1
/** A bug in FreeBSD 8.0 results in test hanging when an exception occurs
during (concurrent?) object construction by means of placement new operator. **/
#define __TBB_PLACEMENT_NEW_EXCEPTION_SAFETY_BROKEN 1
#endif /* __FreeBSD__ */
#if (__linux__ || __APPLE__) && __i386__ && defined(__INTEL_COMPILER)
/** The Intel compiler for IA-32 (Linux|OS X) crashes or generates
incorrect code when __asm__ arguments have a cast to volatile. **/
#define __TBB_ICC_ASM_VOLATILE_BROKEN 1
#endif
#if !__INTEL_COMPILER && (_MSC_VER || __GNUC__==3 && __GNUC_MINOR__<=2)
/** Bug in GCC 3.2 and MSVC compilers that sometimes return 0 for __alignof(T)
when T has not yet been instantiated. **/
#define __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN 1
#endif
#if __TBB_DEFINE_MIC
/** Main thread and user's thread have different default thread affinity masks. **/
#define __TBB_MAIN_THREAD_AFFINITY_BROKEN 1
#endif
#if __GXX_EXPERIMENTAL_CXX0X__ && !defined(__EXCEPTIONS) && \
((!__INTEL_COMPILER && !__clang__ && (__TBB_GCC_VERSION>=40400 && __TBB_GCC_VERSION<40600)) || \
(__INTEL_COMPILER<=1400 && (__TBB_GCC_VERSION>=40400 && __TBB_GCC_VERSION<=40801)))
/* There is an issue for specific GCC toolchain when C++11 is enabled
and exceptions are disabled:
exceprion_ptr.h/nested_exception.h use throw unconditionally.
GCC can ignore 'throw' since 4.6; but with ICC the issue still exists.
*/
#define __TBB_LIBSTDCPP_EXCEPTION_HEADERS_BROKEN 1
#else
#define __TBB_LIBSTDCPP_EXCEPTION_HEADERS_BROKEN 0
#endif
#if __INTEL_COMPILER==1300 && __TBB_GCC_VERSION>=40700 && defined(__GXX_EXPERIMENTAL_CXX0X__)
/* Some C++11 features used inside libstdc++ are not supported by Intel compiler.
* Checking version of gcc instead of libstdc++ because
* - they are directly connected,
* - for now it is not possible to check version of any standard library in this file
*/
#define __TBB_ICC_13_0_CPP11_STDLIB_SUPPORT_BROKEN 1
#else
#define __TBB_ICC_13_0_CPP11_STDLIB_SUPPORT_BROKEN 0
#endif
#if (__GNUC__==4 && __GNUC_MINOR__==4 ) && !defined(__INTEL_COMPILER) && !defined(__clang__)
/** excessive warnings related to strict aliasing rules in GCC 4.4 **/
#define __TBB_GCC_STRICT_ALIASING_BROKEN 1
/* topical remedy: #pragma GCC diagnostic ignored "-Wstrict-aliasing" */
#if !__TBB_GCC_WARNING_SUPPRESSION_PRESENT
#error Warning suppression is not supported, while should.
#endif
#endif
/*In a PIC mode some versions of GCC 4.1.2 generate incorrect inlined code for 8 byte __sync_val_compare_and_swap intrinsic */
#if __TBB_GCC_VERSION == 40102 && __PIC__ && !defined(__INTEL_COMPILER) && !defined(__clang__)
#define __TBB_GCC_CAS8_BUILTIN_INLINING_BROKEN 1
#endif
#if __TBB_x86_32 && (__linux__ || __APPLE__ || _WIN32 || __sun || __ANDROID__) && (__INTEL_COMPILER || (__GNUC__==3 && __GNUC_MINOR__==3 ) || __SUNPRO_CC)
// Some compilers for IA-32 fail to provide 8-byte alignment of objects on the stack,
// even if the object specifies 8-byte alignment. On such platforms, the IA-32 implementation
// of 64 bit atomics (e.g. atomic<long long>) use different tactics depending upon
// whether the object is properly aligned or not.
#define __TBB_FORCE_64BIT_ALIGNMENT_BROKEN 1
#else
#define __TBB_FORCE_64BIT_ALIGNMENT_BROKEN 0
#endif
#if __TBB_DEFAULTED_AND_DELETED_FUNC_PRESENT && __TBB_GCC_VERSION < 40700 && !defined(__INTEL_COMPILER) && !defined (__clang__)
#define __TBB_ZERO_INIT_WITH_DEFAULTED_CTOR_BROKEN 1
#endif
#if _MSC_VER && _MSC_VER <= 1800 && !__INTEL_COMPILER
// With MSVC, when an array is passed by const reference to a template function,
// constness from the function parameter may get propagated to the template parameter.
#define __TBB_CONST_REF_TO_ARRAY_TEMPLATE_PARAM_BROKEN 1
#endif
// A compiler bug: a disabled copy constructor prevents use of the moving constructor
#define __TBB_IF_NO_COPY_CTOR_MOVE_SEMANTICS_BROKEN (_MSC_VER && (__INTEL_COMPILER >= 1300 && __INTEL_COMPILER <= 1310) && !__INTEL_CXX11_MODE__)
// MSVC 2013 and ICC 15 seems do not generate implicit move constructor for empty derived class while should
#define __TBB_CPP11_IMPLICIT_MOVE_MEMBERS_GENERATION_FOR_DERIVED_BROKEN (__TBB_CPP11_RVALUE_REF_PRESENT && \
( !__INTEL_COMPILER && _MSC_VER && _MSC_VER <=1800 || __INTEL_COMPILER && __INTEL_COMPILER <= 1500 ))
/** End of __TBB_XXX_BROKEN macro section **/
#if defined(_MSC_VER) && _MSC_VER>=1500 && !defined(__INTEL_COMPILER)
// A macro to suppress erroneous or benign "unreachable code" MSVC warning (4702)
#define __TBB_MSVC_UNREACHABLE_CODE_IGNORED 1
#endif
#define __TBB_ATOMIC_CTORS (__TBB_CONSTEXPR_PRESENT && __TBB_DEFAULTED_AND_DELETED_FUNC_PRESENT && (!__TBB_ZERO_INIT_WITH_DEFAULTED_CTOR_BROKEN))
#define __TBB_ALLOCATOR_CONSTRUCT_VARIADIC (__TBB_CPP11_VARIADIC_TEMPLATES_PRESENT && __TBB_CPP11_RVALUE_REF_PRESENT)
#define __TBB_VARIADIC_PARALLEL_INVOKE (TBB_PREVIEW_VARIADIC_PARALLEL_INVOKE && __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT && __TBB_CPP11_RVALUE_REF_PRESENT)
#endif /* __TBB_tbb_config_H */

View File

@@ -0,0 +1,379 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_exception_H
#define __TBB_exception_H
#include "tbb_stddef.h"
#if !TBB_USE_EXCEPTIONS && _MSC_VER
// Suppress "C++ exception handler used, but unwind semantics are not enabled" warning in STL headers
#pragma warning (push)
#pragma warning (disable: 4530)
#endif
#include <exception>
#include <new> //required for bad_alloc definition, operators new
#include <string> // required to construct std exception classes
#if !TBB_USE_EXCEPTIONS && _MSC_VER
#pragma warning (pop)
#endif
namespace tbb {
//! Exception for concurrent containers
class bad_last_alloc : public std::bad_alloc {
public:
/*override*/ const char* what() const throw();
#if __TBB_DEFAULT_DTOR_THROW_SPEC_BROKEN
/*override*/ ~bad_last_alloc() throw() {}
#endif
};
//! Exception for PPL locks
class improper_lock : public std::exception {
public:
/*override*/ const char* what() const throw();
};
//! Exception for user-initiated abort
class user_abort : public std::exception {
public:
/*override*/ const char* what() const throw();
};
//! Exception for missing wait on structured_task_group
class missing_wait : public std::exception {
public:
/*override*/ const char* what() const throw();
};
//! Exception for repeated scheduling of the same task_handle
class invalid_multiple_scheduling : public std::exception {
public:
/*override*/ const char* what() const throw();
};
namespace internal {
//! Obsolete
void __TBB_EXPORTED_FUNC throw_bad_last_alloc_exception_v4();
enum exception_id {
eid_bad_alloc = 1,
eid_bad_last_alloc,
eid_nonpositive_step,
eid_out_of_range,
eid_segment_range_error,
eid_index_range_error,
eid_missing_wait,
eid_invalid_multiple_scheduling,
eid_improper_lock,
eid_possible_deadlock,
eid_operation_not_permitted,
eid_condvar_wait_failed,
eid_invalid_load_factor,
eid_reserved, // free slot for backward compatibility, can be reused.
eid_invalid_swap,
eid_reservation_length_error,
eid_invalid_key,
eid_user_abort,
eid_reserved1,
#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
// This id is used only inside library and only for support of CPF functionality.
// So, if we drop the functionality, eid_reserved1 can be safely renamed and reused.
eid_blocking_sch_init = eid_reserved1,
#endif
eid_bad_tagged_msg_cast,
//! The last enumerator tracks the number of defined IDs. It must remain the last one.
/** When adding new IDs, place them immediately _before_ this comment (that is
_after_ all the existing IDs. NEVER insert new IDs between the existing ones. **/
eid_max
};
//! Gathers all throw operators in one place.
/** Its purpose is to minimize code bloat that can be caused by throw operators
scattered in multiple places, especially in templates. **/
void __TBB_EXPORTED_FUNC throw_exception_v4 ( exception_id );
//! Versionless convenience wrapper for throw_exception_v4()
inline void throw_exception ( exception_id eid ) { throw_exception_v4(eid); }
} // namespace internal
} // namespace tbb
#if __TBB_TASK_GROUP_CONTEXT
#include "tbb_allocator.h"
#include <typeinfo> //for typeid
namespace tbb {
//! Interface to be implemented by all exceptions TBB recognizes and propagates across the threads.
/** If an unhandled exception of the type derived from tbb::tbb_exception is intercepted
by the TBB scheduler in one of the worker threads, it is delivered to and re-thrown in
the root thread. The root thread is the thread that has started the outermost algorithm
or root task sharing the same task_group_context with the guilty algorithm/task (the one
that threw the exception first).
Note: when documentation mentions workers with respect to exception handling,
masters are implied as well, because they are completely equivalent in this context.
Consequently a root thread can be master or worker thread.
NOTE: In case of nested algorithms or complex task hierarchies when the nested
levels share (explicitly or by means of implicit inheritance) the task group
context of the outermost level, the exception may be (re-)thrown multiple times
(ultimately - in each worker on each nesting level) before reaching the root
thread at the outermost level. IMPORTANT: if you intercept an exception derived
from this class on a nested level, you must re-throw it in the catch block by means
of the "throw;" operator.
TBB provides two implementations of this interface: tbb::captured_exception and
template class tbb::movable_exception. See their declarations for more info. **/
class tbb_exception : public std::exception
{
/** No operator new is provided because the TBB usage model assumes dynamic
creation of the TBB exception objects only by means of applying move()
operation on an exception thrown out of TBB scheduler. **/
void* operator new ( size_t );
public:
#if __clang__
// At -O3 or even -O2 optimization level, Clang may fully throw away an empty destructor
// of tbb_exception from destructors of derived classes. As a result, it does not create
// vtable for tbb_exception, which is a required part of TBB binary interface.
// Making the destructor non-empty (with just a semicolon) prevents that optimization.
~tbb_exception() throw() { /* keep the semicolon! */ ; }
#endif
//! Creates and returns pointer to the deep copy of this exception object.
/** Move semantics is allowed. **/
virtual tbb_exception* move () throw() = 0;
//! Destroys objects created by the move() method.
/** Frees memory and calls destructor for this exception object.
Can and must be used only on objects created by the move method. **/
virtual void destroy () throw() = 0;
//! Throws this exception object.
/** Make sure that if you have several levels of derivation from this interface
you implement or override this method on the most derived level. The implementation
is as simple as "throw *this;". Failure to do this will result in exception
of a base class type being thrown. **/
virtual void throw_self () = 0;
//! Returns RTTI name of the originally intercepted exception
virtual const char* name() const throw() = 0;
//! Returns the result of originally intercepted exception's what() method.
virtual const char* what() const throw() = 0;
/** Operator delete is provided only to allow using existing smart pointers
with TBB exception objects obtained as the result of applying move()
operation on an exception thrown out of TBB scheduler.
When overriding method move() make sure to override operator delete as well
if memory is allocated not by TBB's scalable allocator. **/
void operator delete ( void* p ) {
internal::deallocate_via_handler_v3(p);
}
};
//! This class is used by TBB to propagate information about unhandled exceptions into the root thread.
/** Exception of this type is thrown by TBB in the root thread (thread that started a parallel
algorithm ) if an unhandled exception was intercepted during the algorithm execution in one
of the workers.
\sa tbb::tbb_exception **/
class captured_exception : public tbb_exception
{
public:
captured_exception ( const captured_exception& src )
: tbb_exception(src), my_dynamic(false)
{
set(src.my_exception_name, src.my_exception_info);
}
captured_exception ( const char* name_, const char* info )
: my_dynamic(false)
{
set(name_, info);
}
__TBB_EXPORTED_METHOD ~captured_exception () throw();
captured_exception& operator= ( const captured_exception& src ) {
if ( this != &src ) {
clear();
set(src.my_exception_name, src.my_exception_info);
}
return *this;
}
/*override*/
captured_exception* __TBB_EXPORTED_METHOD move () throw();
/*override*/
void __TBB_EXPORTED_METHOD destroy () throw();
/*override*/
void throw_self () { __TBB_THROW(*this); }
/*override*/
const char* __TBB_EXPORTED_METHOD name() const throw();
/*override*/
const char* __TBB_EXPORTED_METHOD what() const throw();
void __TBB_EXPORTED_METHOD set ( const char* name, const char* info ) throw();
void __TBB_EXPORTED_METHOD clear () throw();
private:
//! Used only by method clone().
captured_exception() {}
//! Functionally equivalent to {captured_exception e(name,info); return e.clone();}
static captured_exception* allocate ( const char* name, const char* info );
bool my_dynamic;
const char* my_exception_name;
const char* my_exception_info;
};
//! Template that can be used to implement exception that transfers arbitrary ExceptionData to the root thread
/** Code using TBB can instantiate this template with an arbitrary ExceptionData type
and throw this exception object. Such exceptions are intercepted by the TBB scheduler
and delivered to the root thread ().
\sa tbb::tbb_exception **/
template<typename ExceptionData>
class movable_exception : public tbb_exception
{
typedef movable_exception<ExceptionData> self_type;
public:
movable_exception ( const ExceptionData& data_ )
: my_exception_data(data_)
, my_dynamic(false)
, my_exception_name(
#if TBB_USE_EXCEPTIONS
typeid(self_type).name()
#else /* !TBB_USE_EXCEPTIONS */
"movable_exception"
#endif /* !TBB_USE_EXCEPTIONS */
)
{}
movable_exception ( const movable_exception& src ) throw ()
: tbb_exception(src)
, my_exception_data(src.my_exception_data)
, my_dynamic(false)
, my_exception_name(src.my_exception_name)
{}
~movable_exception () throw() {}
const movable_exception& operator= ( const movable_exception& src ) {
if ( this != &src ) {
my_exception_data = src.my_exception_data;
my_exception_name = src.my_exception_name;
}
return *this;
}
ExceptionData& data () throw() { return my_exception_data; }
const ExceptionData& data () const throw() { return my_exception_data; }
/*override*/ const char* name () const throw() { return my_exception_name; }
/*override*/ const char* what () const throw() { return "tbb::movable_exception"; }
/*override*/
movable_exception* move () throw() {
void* e = internal::allocate_via_handler_v3(sizeof(movable_exception));
if ( e ) {
::new (e) movable_exception(*this);
((movable_exception*)e)->my_dynamic = true;
}
return (movable_exception*)e;
}
/*override*/
void destroy () throw() {
__TBB_ASSERT ( my_dynamic, "Method destroy can be called only on dynamically allocated movable_exceptions" );
if ( my_dynamic ) {
this->~movable_exception();
internal::deallocate_via_handler_v3(this);
}
}
/*override*/
void throw_self () { __TBB_THROW( *this ); }
protected:
//! User data
ExceptionData my_exception_data;
private:
//! Flag specifying whether this object has been dynamically allocated (by the move method)
bool my_dynamic;
//! RTTI name of this class
/** We rely on the fact that RTTI names are static string constants. **/
const char* my_exception_name;
};
#if !TBB_USE_CAPTURED_EXCEPTION
namespace internal {
//! Exception container that preserves the exact copy of the original exception
/** This class can be used only when the appropriate runtime support (mandated
by C++0x) is present **/
class tbb_exception_ptr {
std::exception_ptr my_ptr;
public:
static tbb_exception_ptr* allocate ();
static tbb_exception_ptr* allocate ( const tbb_exception& tag );
//! This overload uses move semantics (i.e. it empties src)
static tbb_exception_ptr* allocate ( captured_exception& src );
//! Destroys this objects
/** Note that objects of this type can be created only by the allocate() method. **/
void destroy () throw();
//! Throws the contained exception .
void throw_self () { std::rethrow_exception(my_ptr); }
private:
tbb_exception_ptr ( const std::exception_ptr& src ) : my_ptr(src) {}
tbb_exception_ptr ( const captured_exception& src ) :
#if __TBB_MAKE_EXCEPTION_PTR_PRESENT
my_ptr(std::make_exception_ptr(src)) // the final function name in C++11
#else
my_ptr(std::copy_exception(src)) // early C++0x drafts name
#endif
{}
}; // class tbb::internal::tbb_exception_ptr
} // namespace internal
#endif /* !TBB_USE_CAPTURED_EXCEPTION */
} // namespace tbb
#endif /* __TBB_TASK_GROUP_CONTEXT */
#endif /* __TBB_exception_H */

View File

@@ -0,0 +1,967 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_machine_H
#define __TBB_machine_H
/** This header provides basic platform abstraction layer by hooking up appropriate
architecture/OS/compiler specific headers from the /include/tbb/machine directory.
If a plug-in header does not implement all the required APIs, it must specify
the missing ones by setting one or more of the following macros:
__TBB_USE_GENERIC_PART_WORD_CAS
__TBB_USE_GENERIC_PART_WORD_FETCH_ADD
__TBB_USE_GENERIC_PART_WORD_FETCH_STORE
__TBB_USE_GENERIC_FETCH_ADD
__TBB_USE_GENERIC_FETCH_STORE
__TBB_USE_GENERIC_DWORD_FETCH_ADD
__TBB_USE_GENERIC_DWORD_FETCH_STORE
__TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE
__TBB_USE_GENERIC_FULL_FENCED_LOAD_STORE
__TBB_USE_GENERIC_RELAXED_LOAD_STORE
__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
In this case tbb_machine.h will add missing functionality based on a minimal set
of APIs that are required to be implemented by all plug-n headers as described
further.
Note that these generic implementations may be sub-optimal for a particular
architecture, and thus should be relied upon only after careful evaluation
or as the last resort.
Additionally __TBB_64BIT_ATOMICS can be set to 0 on a 32-bit architecture to
indicate that the port is not going to support double word atomics. It may also
be set to 1 explicitly, though normally this is not necessary as tbb_machine.h
will set it automatically.
__TBB_ENDIANNESS macro can be defined by the implementation as well.
It is used only if __TBB_USE_GENERIC_PART_WORD_CAS is set (or for testing),
and must specify the layout of aligned 16-bit and 32-bit data anywhere within a process
(while the details of unaligned 16-bit or 32-bit data or of 64-bit data are irrelevant).
The layout must be the same at all relevant memory locations within the current process;
in case of page-specific endianness, one endianness must be kept "out of sight".
Possible settings, reflecting hardware and possibly O.S. convention, are:
- __TBB_ENDIAN_BIG for big-endian data,
- __TBB_ENDIAN_LITTLE for little-endian data,
- __TBB_ENDIAN_DETECT for run-time detection iff exactly one of the above,
- __TBB_ENDIAN_UNSUPPORTED to prevent undefined behavior if none of the above.
Prerequisites for each architecture port
----------------------------------------
The following functions and macros have no generic implementation. Therefore they must be
implemented in each machine architecture specific header either as a conventional
function or as a functional macro.
__TBB_WORDSIZE
This is the size of machine word in bytes, i.e. for 32 bit systems it
should be defined to 4.
__TBB_Yield()
Signals OS that the current thread is willing to relinquish the remainder
of its time quantum.
__TBB_full_memory_fence()
Must prevent all memory operations from being reordered across it (both
by hardware and compiler). All such fences must be totally ordered (or
sequentially consistent).
__TBB_machine_cmpswp4( volatile void *ptr, int32_t value, int32_t comparand )
Must be provided if __TBB_USE_FENCED_ATOMICS is not set.
__TBB_machine_cmpswp8( volatile void *ptr, int32_t value, int64_t comparand )
Must be provided for 64-bit architectures if __TBB_USE_FENCED_ATOMICS is not set,
and for 32-bit architectures if __TBB_64BIT_ATOMICS is set
__TBB_machine_<op><S><fence>(...), where
<op> = {cmpswp, fetchadd, fetchstore}
<S> = {1, 2, 4, 8}
<fence> = {full_fence, acquire, release, relaxed}
Must be provided if __TBB_USE_FENCED_ATOMICS is set.
__TBB_control_consistency_helper()
Bridges the memory-semantics gap between architectures providing only
implicit C++0x "consume" semantics (like Power Architecture) and those
also implicitly obeying control dependencies (like IA-64 architecture).
It must be used only in conditional code where the condition is itself
data-dependent, and will then make subsequent code behave as if the
original data dependency were acquired.
It needs only a compiler fence where implied by the architecture
either specifically (like IA-64 architecture) or because generally stronger
"acquire" semantics are enforced (like x86).
It is always valid, though potentially suboptimal, to replace
control with acquire on the load and then remove the helper.
__TBB_acquire_consistency_helper(), __TBB_release_consistency_helper()
Must be provided if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE is set.
Enforce acquire and release semantics in generic implementations of fenced
store and load operations. Depending on the particular architecture/compiler
combination they may be a hardware fence, a compiler fence, both or nothing.
**/
#include "tbb_stddef.h"
namespace tbb {
namespace internal { //< @cond INTERNAL
////////////////////////////////////////////////////////////////////////////////
// Overridable helpers declarations
//
// A machine/*.h file may choose to define these templates, otherwise it must
// request default implementation by setting appropriate __TBB_USE_GENERIC_XXX macro(s).
//
template <typename T, std::size_t S>
struct machine_load_store;
template <typename T, std::size_t S>
struct machine_load_store_relaxed;
template <typename T, std::size_t S>
struct machine_load_store_seq_cst;
//
// End of overridable helpers declarations
////////////////////////////////////////////////////////////////////////////////
template<size_t S> struct atomic_selector;
template<> struct atomic_selector<1> {
typedef int8_t word;
inline static word fetch_store ( volatile void* location, word value );
};
template<> struct atomic_selector<2> {
typedef int16_t word;
inline static word fetch_store ( volatile void* location, word value );
};
template<> struct atomic_selector<4> {
#if _MSC_VER && !_WIN64
// Work-around that avoids spurious /Wp64 warnings
typedef intptr_t word;
#else
typedef int32_t word;
#endif
inline static word fetch_store ( volatile void* location, word value );
};
template<> struct atomic_selector<8> {
typedef int64_t word;
inline static word fetch_store ( volatile void* location, word value );
};
}} //< namespaces internal @endcond, tbb
#define __TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(M) \
inline void __TBB_machine_generic_store8##M(volatile void *ptr, int64_t value) { \
for(;;) { \
int64_t result = *(volatile int64_t *)ptr; \
if( __TBB_machine_cmpswp8##M(ptr,value,result)==result ) break; \
} \
} \
#define __TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(M) \
inline int64_t __TBB_machine_generic_load8##M(const volatile void *ptr) { \
/* Comparand and new value may be anything, they only must be equal, and */ \
/* the value should have a low probability to be actually found in 'location'.*/ \
const int64_t anyvalue = 2305843009213693951LL; \
return __TBB_machine_cmpswp8##M(const_cast<volatile void *>(ptr),anyvalue,anyvalue); \
} \
// The set of allowed values for __TBB_ENDIANNESS (see above for details)
#define __TBB_ENDIAN_UNSUPPORTED -1
#define __TBB_ENDIAN_LITTLE 0
#define __TBB_ENDIAN_BIG 1
#define __TBB_ENDIAN_DETECT 2
#if _WIN32||_WIN64
#ifdef _MANAGED
#pragma managed(push, off)
#endif
#if __MINGW64__ || __MINGW32__
extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
#define __TBB_Yield() SwitchToThread()
#if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
#include "machine/gcc_generic.h"
#elif __MINGW64__
#include "machine/linux_intel64.h"
#elif __MINGW32__
#include "machine/linux_ia32.h"
#endif
#elif (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
#include "machine/icc_generic.h"
#elif defined(_M_IX86) && !defined(__TBB_WIN32_USE_CL_BUILTINS)
#include "machine/windows_ia32.h"
#elif defined(_M_X64)
#include "machine/windows_intel64.h"
#elif defined(_XBOX)
#include "machine/xbox360_ppc.h"
#elif defined(_M_ARM) || defined(__TBB_WIN32_USE_CL_BUILTINS)
#include "machine/msvc_armv7.h"
#endif
#ifdef _MANAGED
#pragma managed(pop)
#endif
#elif __TBB_DEFINE_MIC
#include "machine/mic_common.h"
#if (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
#include "machine/icc_generic.h"
#else
#include "machine/linux_intel64.h"
#endif
#elif __linux__ || __FreeBSD__ || __NetBSD__
#if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
#include "machine/gcc_generic.h"
#elif (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
#include "machine/icc_generic.h"
#elif __i386__
#include "machine/linux_ia32.h"
#elif __x86_64__
#include "machine/linux_intel64.h"
#elif __ia64__
#include "machine/linux_ia64.h"
#elif __powerpc__
#include "machine/mac_ppc.h"
#elif __arm__
#include "machine/gcc_armv7.h"
#elif __TBB_GCC_BUILTIN_ATOMICS_PRESENT
#include "machine/gcc_generic.h"
#endif
#include "machine/linux_common.h"
#elif __APPLE__
//TODO: TBB_USE_GCC_BUILTINS is not used for Mac, Sun, Aix
#if (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
#include "machine/icc_generic.h"
#elif __i386__
#include "machine/linux_ia32.h"
#elif __x86_64__
#include "machine/linux_intel64.h"
#elif __POWERPC__
#include "machine/mac_ppc.h"
#endif
#include "machine/macos_common.h"
#elif _AIX
#include "machine/ibm_aix51.h"
#elif __sun || __SUNPRO_CC
#define __asm__ asm
#define __volatile__ volatile
#if __i386 || __i386__
#include "machine/linux_ia32.h"
#elif __x86_64__
#include "machine/linux_intel64.h"
#elif __sparc
#include "machine/sunos_sparc.h"
#endif
#include <sched.h>
#define __TBB_Yield() sched_yield()
#endif /* OS selection */
#ifndef __TBB_64BIT_ATOMICS
#define __TBB_64BIT_ATOMICS 1
#endif
//TODO: replace usage of these functions with usage of tbb::atomic, and then remove them
//TODO: map functions with W suffix to use cast to tbb::atomic and according op, i.e. as_atomic().op()
// Special atomic functions
#if __TBB_USE_FENCED_ATOMICS
#define __TBB_machine_cmpswp1 __TBB_machine_cmpswp1full_fence
#define __TBB_machine_cmpswp2 __TBB_machine_cmpswp2full_fence
#define __TBB_machine_cmpswp4 __TBB_machine_cmpswp4full_fence
#define __TBB_machine_cmpswp8 __TBB_machine_cmpswp8full_fence
#if __TBB_WORDSIZE==8
#define __TBB_machine_fetchadd8 __TBB_machine_fetchadd8full_fence
#define __TBB_machine_fetchstore8 __TBB_machine_fetchstore8full_fence
#define __TBB_FetchAndAddWrelease(P,V) __TBB_machine_fetchadd8release(P,V)
#define __TBB_FetchAndIncrementWacquire(P) __TBB_machine_fetchadd8acquire(P,1)
#define __TBB_FetchAndDecrementWrelease(P) __TBB_machine_fetchadd8release(P,(-1))
#else
#define __TBB_machine_fetchadd4 __TBB_machine_fetchadd4full_fence
#define __TBB_machine_fetchstore4 __TBB_machine_fetchstore4full_fence
#define __TBB_FetchAndAddWrelease(P,V) __TBB_machine_fetchadd4release(P,V)
#define __TBB_FetchAndIncrementWacquire(P) __TBB_machine_fetchadd4acquire(P,1)
#define __TBB_FetchAndDecrementWrelease(P) __TBB_machine_fetchadd4release(P,(-1))
#endif /* __TBB_WORDSIZE==4 */
#else /* !__TBB_USE_FENCED_ATOMICS */
#define __TBB_FetchAndAddWrelease(P,V) __TBB_FetchAndAddW(P,V)
#define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1)
#define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,(-1))
#endif /* !__TBB_USE_FENCED_ATOMICS */
#if __TBB_WORDSIZE==4
#define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp4(P,V,C)
#define __TBB_FetchAndAddW(P,V) __TBB_machine_fetchadd4(P,V)
#define __TBB_FetchAndStoreW(P,V) __TBB_machine_fetchstore4(P,V)
#elif __TBB_WORDSIZE==8
#if __TBB_USE_GENERIC_DWORD_LOAD_STORE || __TBB_USE_GENERIC_DWORD_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_STORE
#error These macros should only be used on 32-bit platforms.
#endif
#define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp8(P,V,C)
#define __TBB_FetchAndAddW(P,V) __TBB_machine_fetchadd8(P,V)
#define __TBB_FetchAndStoreW(P,V) __TBB_machine_fetchstore8(P,V)
#else /* __TBB_WORDSIZE != 8 */
#error Unsupported machine word size.
#endif /* __TBB_WORDSIZE */
#ifndef __TBB_Pause
inline void __TBB_Pause(int32_t) {
__TBB_Yield();
}
#endif
namespace tbb {
//! Sequentially consistent full memory fence.
inline void atomic_fence () { __TBB_full_memory_fence(); }
namespace internal { //< @cond INTERNAL
//! Class that implements exponential backoff.
/** See implementation of spin_wait_while_eq for an example. */
class atomic_backoff : no_copy {
//! Time delay, in units of "pause" instructions.
/** Should be equal to approximately the number of "pause" instructions
that take the same time as an context switch. */
static const int32_t LOOPS_BEFORE_YIELD = 16;
int32_t count;
public:
// In many cases, an object of this type is initialized eagerly on hot path,
// as in for(atomic_backoff b; ; b.pause()) { /*loop body*/ }
// For this reason, the construction cost must be very small!
atomic_backoff() : count(1) {}
// This constructor pauses immediately; do not use on hot paths!
atomic_backoff( bool ) : count(1) { pause(); }
//! Pause for a while.
void pause() {
if( count<=LOOPS_BEFORE_YIELD ) {
__TBB_Pause(count);
// Pause twice as long the next time.
count*=2;
} else {
// Pause is so long that we might as well yield CPU to scheduler.
__TBB_Yield();
}
}
// pause for a few times and then return false immediately.
bool bounded_pause() {
if( count<=LOOPS_BEFORE_YIELD ) {
__TBB_Pause(count);
// Pause twice as long the next time.
count*=2;
return true;
} else {
return false;
}
}
void reset() {
count = 1;
}
};
//! Spin WHILE the value of the variable is equal to a given value
/** T and U should be comparable types. */
template<typename T, typename U>
void spin_wait_while_eq( const volatile T& location, U value ) {
atomic_backoff backoff;
while( location==value ) backoff.pause();
}
//! Spin UNTIL the value of the variable is equal to a given value
/** T and U should be comparable types. */
template<typename T, typename U>
void spin_wait_until_eq( const volatile T& location, const U value ) {
atomic_backoff backoff;
while( location!=value ) backoff.pause();
}
template <typename predicate_type>
void spin_wait_while(predicate_type condition){
atomic_backoff backoff;
while( condition() ) backoff.pause();
}
////////////////////////////////////////////////////////////////////////////////
// Generic compare-and-swap applied to only a part of a machine word.
//
#ifndef __TBB_ENDIANNESS
#define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT
#endif
#if __TBB_USE_GENERIC_PART_WORD_CAS && __TBB_ENDIANNESS==__TBB_ENDIAN_UNSUPPORTED
#error Generic implementation of part-word CAS may not be used with __TBB_ENDIAN_UNSUPPORTED
#endif
#if __TBB_ENDIANNESS!=__TBB_ENDIAN_UNSUPPORTED
//
// This function is the only use of __TBB_ENDIANNESS.
// The following restrictions/limitations apply for this operation:
// - T must be an integer type of at most 4 bytes for the casts and calculations to work
// - T must also be less than 4 bytes to avoid compiler warnings when computing mask
// (and for the operation to be useful at all, so no workaround is applied)
// - the architecture must consistently use either little-endian or big-endian (same for all locations)
//
// TODO: static_assert for the type requirements stated above
template<typename T>
inline T __TBB_MaskedCompareAndSwap (volatile T * const ptr, const T value, const T comparand ) {
struct endianness{ static bool is_big_endian(){
#if __TBB_ENDIANNESS==__TBB_ENDIAN_DETECT
const uint32_t probe = 0x03020100;
return (((const char*)(&probe))[0]==0x03);
#elif __TBB_ENDIANNESS==__TBB_ENDIAN_BIG || __TBB_ENDIANNESS==__TBB_ENDIAN_LITTLE
return __TBB_ENDIANNESS==__TBB_ENDIAN_BIG;
#else
#error Unexpected value of __TBB_ENDIANNESS
#endif
}};
const uint32_t byte_offset = (uint32_t) ((uintptr_t)ptr & 0x3);
volatile uint32_t * const aligned_ptr = (uint32_t*)((uintptr_t)ptr - byte_offset );
// location of T within uint32_t for a C++ shift operation
const uint32_t bits_to_shift = 8*(endianness::is_big_endian() ? (4 - sizeof(T) - (byte_offset)) : byte_offset);
const uint32_t mask = (((uint32_t)1<<(sizeof(T)*8)) - 1 )<<bits_to_shift;
// for signed T, any sign extension bits in cast value/comparand are immediately clipped by mask
const uint32_t shifted_comparand = ((uint32_t)comparand << bits_to_shift)&mask;
const uint32_t shifted_value = ((uint32_t)value << bits_to_shift)&mask;
for( atomic_backoff b;;b.pause() ) {
const uint32_t surroundings = *aligned_ptr & ~mask ; // may have changed during the pause
const uint32_t big_comparand = surroundings | shifted_comparand ;
const uint32_t big_value = surroundings | shifted_value ;
// __TBB_machine_cmpswp4 presumed to have full fence.
// Cast shuts up /Wp64 warning
const uint32_t big_result = (uint32_t)__TBB_machine_cmpswp4( aligned_ptr, big_value, big_comparand );
if( big_result == big_comparand // CAS succeeded
|| ((big_result ^ big_comparand) & mask) != 0) // CAS failed and the bits of interest have changed
{
return T((big_result & mask) >> bits_to_shift);
}
else continue; // CAS failed but the bits of interest were not changed
}
}
#endif // __TBB_ENDIANNESS!=__TBB_ENDIAN_UNSUPPORTED
////////////////////////////////////////////////////////////////////////////////
template<size_t S, typename T>
inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T comparand );
template<>
inline int8_t __TBB_CompareAndSwapGeneric <1,int8_t> (volatile void *ptr, int8_t value, int8_t comparand ) {
#if __TBB_USE_GENERIC_PART_WORD_CAS
return __TBB_MaskedCompareAndSwap<int8_t>((volatile int8_t *)ptr,value,comparand);
#else
return __TBB_machine_cmpswp1(ptr,value,comparand);
#endif
}
template<>
inline int16_t __TBB_CompareAndSwapGeneric <2,int16_t> (volatile void *ptr, int16_t value, int16_t comparand ) {
#if __TBB_USE_GENERIC_PART_WORD_CAS
return __TBB_MaskedCompareAndSwap<int16_t>((volatile int16_t *)ptr,value,comparand);
#else
return __TBB_machine_cmpswp2(ptr,value,comparand);
#endif
}
template<>
inline int32_t __TBB_CompareAndSwapGeneric <4,int32_t> (volatile void *ptr, int32_t value, int32_t comparand ) {
// Cast shuts up /Wp64 warning
return (int32_t)__TBB_machine_cmpswp4(ptr,value,comparand);
}
#if __TBB_64BIT_ATOMICS
template<>
inline int64_t __TBB_CompareAndSwapGeneric <8,int64_t> (volatile void *ptr, int64_t value, int64_t comparand ) {
return __TBB_machine_cmpswp8(ptr,value,comparand);
}
#endif
template<size_t S, typename T>
inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) {
T result;
for( atomic_backoff b;;b.pause() ) {
result = *reinterpret_cast<volatile T *>(ptr);
// __TBB_CompareAndSwapGeneric presumed to have full fence.
if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result )==result )
break;
}
return result;
}
template<size_t S, typename T>
inline T __TBB_FetchAndStoreGeneric (volatile void *ptr, T value) {
T result;
for( atomic_backoff b;;b.pause() ) {
result = *reinterpret_cast<volatile T *>(ptr);
// __TBB_CompareAndSwapGeneric presumed to have full fence.
if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, value, result )==result )
break;
}
return result;
}
#if __TBB_USE_GENERIC_PART_WORD_CAS
#define __TBB_machine_cmpswp1 tbb::internal::__TBB_CompareAndSwapGeneric<1,int8_t>
#define __TBB_machine_cmpswp2 tbb::internal::__TBB_CompareAndSwapGeneric<2,int16_t>
#endif
#if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_PART_WORD_FETCH_ADD
#define __TBB_machine_fetchadd1 tbb::internal::__TBB_FetchAndAddGeneric<1,int8_t>
#define __TBB_machine_fetchadd2 tbb::internal::__TBB_FetchAndAddGeneric<2,int16_t>
#endif
#if __TBB_USE_GENERIC_FETCH_ADD
#define __TBB_machine_fetchadd4 tbb::internal::__TBB_FetchAndAddGeneric<4,int32_t>
#endif
#if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_ADD
#define __TBB_machine_fetchadd8 tbb::internal::__TBB_FetchAndAddGeneric<8,int64_t>
#endif
#if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_PART_WORD_FETCH_STORE
#define __TBB_machine_fetchstore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,int8_t>
#define __TBB_machine_fetchstore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,int16_t>
#endif
#if __TBB_USE_GENERIC_FETCH_STORE
#define __TBB_machine_fetchstore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,int32_t>
#endif
#if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_DWORD_FETCH_STORE
#define __TBB_machine_fetchstore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,int64_t>
#endif
#if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
#define __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(S) \
atomic_selector<S>::word atomic_selector<S>::fetch_store ( volatile void* location, word value ) { \
return __TBB_machine_fetchstore##S( location, value ); \
}
__TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(1)
__TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(2)
__TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(4)
__TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(8)
#undef __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE
#endif /* __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
#if __TBB_USE_GENERIC_DWORD_LOAD_STORE
/*TODO: find a more elegant way to handle function names difference*/
#if ! __TBB_USE_FENCED_ATOMICS
/* This name forwarding is needed for generic implementation of
* load8/store8 defined below (via macro) to pick the right CAS function*/
#define __TBB_machine_cmpswp8full_fence __TBB_machine_cmpswp8
#endif
__TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(full_fence)
__TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(full_fence)
#if ! __TBB_USE_FENCED_ATOMICS
#undef __TBB_machine_cmpswp8full_fence
#endif
#define __TBB_machine_store8 tbb::internal::__TBB_machine_generic_store8full_fence
#define __TBB_machine_load8 tbb::internal::__TBB_machine_generic_load8full_fence
#endif /* __TBB_USE_GENERIC_DWORD_LOAD_STORE */
#if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE
/** Fenced operations use volatile qualifier to prevent compiler from optimizing
them out, and on architectures with weak memory ordering to induce compiler
to generate code with appropriate acquire/release semantics.
On architectures like IA32, Intel64 (and likely Sparc TSO) volatile has
no effect on code gen, and consistency helpers serve as a compiler fence (the
latter being true for IA64/gcc as well to fix a bug in some gcc versions).
This code assumes that the generated instructions will operate atomically,
which typically requires a type that can be moved in a single instruction,
cooperation from the compiler for effective use of such an instruction,
and appropriate alignment of the data. **/
template <typename T, size_t S>
struct machine_load_store {
static T load_with_acquire ( const volatile T& location ) {
T to_return = location;
__TBB_acquire_consistency_helper();
return to_return;
}
static void store_with_release ( volatile T &location, T value ) {
__TBB_release_consistency_helper();
location = value;
}
};
//in general, plain load and store of 32bit compiler is not atomic for 64bit types
#if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
template <typename T>
struct machine_load_store<T,8> {
static T load_with_acquire ( const volatile T& location ) {
return (T)__TBB_machine_load8( (const volatile void*)&location );
}
static void store_with_release ( volatile T& location, T value ) {
__TBB_machine_store8( (volatile void*)&location, (int64_t)value );
}
};
#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
#endif /* __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE */
#if __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE
template <typename T, size_t S>
struct machine_load_store_seq_cst {
static T load ( const volatile T& location ) {
__TBB_full_memory_fence();
return machine_load_store<T,S>::load_with_acquire( location );
}
#if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
static void store ( volatile T &location, T value ) {
atomic_selector<S>::fetch_store( (volatile void*)&location, (typename atomic_selector<S>::word)value );
}
#else /* !__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
static void store ( volatile T &location, T value ) {
machine_load_store<T,S>::store_with_release( location, value );
__TBB_full_memory_fence();
}
#endif /* !__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
};
#if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
/** The implementation does not use functions __TBB_machine_load8/store8 as they
are not required to be sequentially consistent. **/
template <typename T>
struct machine_load_store_seq_cst<T,8> {
static T load ( const volatile T& location ) {
// Comparand and new value may be anything, they only must be equal, and
// the value should have a low probability to be actually found in 'location'.
const int64_t anyvalue = 2305843009213693951LL;
return __TBB_machine_cmpswp8( (volatile void*)const_cast<volatile T*>(&location), anyvalue, anyvalue );
}
static void store ( volatile T &location, T value ) {
int64_t result = (volatile int64_t&)location;
while ( __TBB_machine_cmpswp8((volatile void*)&location, (int64_t)value, result) != result )
result = (volatile int64_t&)location;
}
};
#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
#endif /*__TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE */
#if __TBB_USE_GENERIC_RELAXED_LOAD_STORE
// Relaxed operations add volatile qualifier to prevent compiler from optimizing them out.
/** Volatile should not incur any additional cost on IA32, Intel64, and Sparc TSO
architectures. However on architectures with weak memory ordering compiler may
generate code with acquire/release semantics for operations on volatile data. **/
template <typename T, size_t S>
struct machine_load_store_relaxed {
static inline T load ( const volatile T& location ) {
return location;
}
static inline void store ( volatile T& location, T value ) {
location = value;
}
};
#if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
template <typename T>
struct machine_load_store_relaxed<T,8> {
static inline T load ( const volatile T& location ) {
return (T)__TBB_machine_load8( (const volatile void*)&location );
}
static inline void store ( volatile T& location, T value ) {
__TBB_machine_store8( (volatile void*)&location, (int64_t)value );
}
};
#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
#endif /* __TBB_USE_GENERIC_RELAXED_LOAD_STORE */
#undef __TBB_WORDSIZE //this macro is forbidden to use outside of atomic machinery
template<typename T>
inline T __TBB_load_with_acquire(const volatile T &location) {
return machine_load_store<T,sizeof(T)>::load_with_acquire( location );
}
template<typename T, typename V>
inline void __TBB_store_with_release(volatile T& location, V value) {
machine_load_store<T,sizeof(T)>::store_with_release( location, T(value) );
}
//! Overload that exists solely to avoid /Wp64 warnings.
inline void __TBB_store_with_release(volatile size_t& location, size_t value) {
machine_load_store<size_t,sizeof(size_t)>::store_with_release( location, value );
}
template<typename T>
inline T __TBB_load_full_fence(const volatile T &location) {
return machine_load_store_seq_cst<T,sizeof(T)>::load( location );
}
template<typename T, typename V>
inline void __TBB_store_full_fence(volatile T& location, V value) {
machine_load_store_seq_cst<T,sizeof(T)>::store( location, T(value) );
}
//! Overload that exists solely to avoid /Wp64 warnings.
inline void __TBB_store_full_fence(volatile size_t& location, size_t value) {
machine_load_store_seq_cst<size_t,sizeof(size_t)>::store( location, value );
}
template<typename T>
inline T __TBB_load_relaxed (const volatile T& location) {
return machine_load_store_relaxed<T,sizeof(T)>::load( const_cast<T&>(location) );
}
template<typename T, typename V>
inline void __TBB_store_relaxed ( volatile T& location, V value ) {
machine_load_store_relaxed<T,sizeof(T)>::store( const_cast<T&>(location), T(value) );
}
//! Overload that exists solely to avoid /Wp64 warnings.
inline void __TBB_store_relaxed ( volatile size_t& location, size_t value ) {
machine_load_store_relaxed<size_t,sizeof(size_t)>::store( const_cast<size_t&>(location), value );
}
// Macro __TBB_TypeWithAlignmentAtLeastAsStrict(T) should be a type with alignment at least as
// strict as type T. The type should have a trivial default constructor and destructor, so that
// arrays of that type can be declared without initializers.
// It is correct (but perhaps a waste of space) if __TBB_TypeWithAlignmentAtLeastAsStrict(T) expands
// to a type bigger than T.
// The default definition here works on machines where integers are naturally aligned and the
// strictest alignment is 64.
#ifndef __TBB_TypeWithAlignmentAtLeastAsStrict
#if __TBB_ATTRIBUTE_ALIGNED_PRESENT
#define __TBB_DefineTypeWithAlignment(PowerOf2) \
struct __TBB_machine_type_with_alignment_##PowerOf2 { \
uint32_t member[PowerOf2/sizeof(uint32_t)]; \
} __attribute__((aligned(PowerOf2)));
#define __TBB_alignof(T) __alignof__(T)
#elif __TBB_DECLSPEC_ALIGN_PRESENT
#define __TBB_DefineTypeWithAlignment(PowerOf2) \
__declspec(align(PowerOf2)) \
struct __TBB_machine_type_with_alignment_##PowerOf2 { \
uint32_t member[PowerOf2/sizeof(uint32_t)]; \
};
#define __TBB_alignof(T) __alignof(T)
#else /* A compiler with unknown syntax for data alignment */
#error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T)
#endif
/* Now declare types aligned to useful powers of two */
// TODO: Is __TBB_DefineTypeWithAlignment(8) needed on 32 bit platforms?
__TBB_DefineTypeWithAlignment(16)
__TBB_DefineTypeWithAlignment(32)
__TBB_DefineTypeWithAlignment(64)
typedef __TBB_machine_type_with_alignment_64 __TBB_machine_type_with_strictest_alignment;
// Primary template is a declaration of incomplete type so that it fails with unknown alignments
template<size_t N> struct type_with_alignment;
// Specializations for allowed alignments
template<> struct type_with_alignment<1> { char member; };
template<> struct type_with_alignment<2> { uint16_t member; };
template<> struct type_with_alignment<4> { uint32_t member; };
template<> struct type_with_alignment<8> { uint64_t member; };
template<> struct type_with_alignment<16> {__TBB_machine_type_with_alignment_16 member; };
template<> struct type_with_alignment<32> {__TBB_machine_type_with_alignment_32 member; };
template<> struct type_with_alignment<64> {__TBB_machine_type_with_alignment_64 member; };
#if __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN
//! Work around for bug in GNU 3.2 and MSVC compilers.
/** Bug is that compiler sometimes returns 0 for __alignof(T) when T has not yet been instantiated.
The work-around forces instantiation by forcing computation of sizeof(T) before __alignof(T). */
template<size_t Size, typename T>
struct work_around_alignment_bug {
static const size_t alignment = __TBB_alignof(T);
};
#define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment>
#else
#define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<__TBB_alignof(T)>
#endif /* __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN */
#endif /* __TBB_TypeWithAlignmentAtLeastAsStrict */
// Template class here is to avoid instantiation of the static data for modules that don't use it
template<typename T>
struct reverse {
static const T byte_table[256];
};
// An efficient implementation of the reverse function utilizes a 2^8 lookup table holding the bit-reversed
// values of [0..2^8 - 1]. Those values can also be computed on the fly at a slightly higher cost.
template<typename T>
const T reverse<T>::byte_table[256] = {
0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
};
} // namespace internal @endcond
} // namespace tbb
// Preserving access to legacy APIs
using tbb::internal::__TBB_load_with_acquire;
using tbb::internal::__TBB_store_with_release;
// Mapping historically used names to the ones expected by atomic_load_store_traits
#define __TBB_load_acquire __TBB_load_with_acquire
#define __TBB_store_release __TBB_store_with_release
#ifndef __TBB_Log2
inline intptr_t __TBB_Log2( uintptr_t x ) {
if( x==0 ) return -1;
intptr_t result = 0;
#if !defined(_M_ARM)
uintptr_t tmp;
if( sizeof(x)>4 && (tmp = ((uint64_t)x)>>32) ) { x=tmp; result += 32; }
#endif
if( uintptr_t tmp = x>>16 ) { x=tmp; result += 16; }
if( uintptr_t tmp = x>>8 ) { x=tmp; result += 8; }
if( uintptr_t tmp = x>>4 ) { x=tmp; result += 4; }
if( uintptr_t tmp = x>>2 ) { x=tmp; result += 2; }
return (x&2)? result+1: result;
}
#endif
#ifndef __TBB_AtomicOR
inline void __TBB_AtomicOR( volatile void *operand, uintptr_t addend ) {
for( tbb::internal::atomic_backoff b;;b.pause() ) {
uintptr_t tmp = *(volatile uintptr_t *)operand;
uintptr_t result = __TBB_CompareAndSwapW(operand, tmp|addend, tmp);
if( result==tmp ) break;
}
}
#endif
#ifndef __TBB_AtomicAND
inline void __TBB_AtomicAND( volatile void *operand, uintptr_t addend ) {
for( tbb::internal::atomic_backoff b;;b.pause() ) {
uintptr_t tmp = *(volatile uintptr_t *)operand;
uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp);
if( result==tmp ) break;
}
}
#endif
#if __TBB_PREFETCHING
#ifndef __TBB_cl_prefetch
#error This platform does not define cache management primitives required for __TBB_PREFETCHING
#endif
#ifndef __TBB_cl_evict
#define __TBB_cl_evict(p)
#endif
#endif
#ifndef __TBB_Flag
typedef unsigned char __TBB_Flag;
#endif
typedef __TBB_atomic __TBB_Flag __TBB_atomic_flag;
#ifndef __TBB_TryLockByte
inline bool __TBB_TryLockByte( __TBB_atomic_flag &flag ) {
return __TBB_machine_cmpswp1(&flag,1,0)==0;
}
#endif
#ifndef __TBB_LockByte
inline __TBB_Flag __TBB_LockByte( __TBB_atomic_flag& flag ) {
tbb::internal::atomic_backoff backoff;
while( !__TBB_TryLockByte(flag) ) backoff.pause();
return 0;
}
#endif
#ifndef __TBB_UnlockByte
#define __TBB_UnlockByte(addr) __TBB_store_with_release((addr),0)
#endif
// lock primitives with TSX
#if ( __TBB_x86_32 || __TBB_x86_64 ) /* only on ia32/intel64 */
inline void __TBB_TryLockByteElidedCancel() { __TBB_machine_try_lock_elided_cancel(); }
inline bool __TBB_TryLockByteElided( __TBB_atomic_flag& flag ) {
bool res = __TBB_machine_try_lock_elided( &flag )!=0;
// to avoid the "lemming" effect, we need to abort the transaction
// if __TBB_machine_try_lock_elided returns false (i.e., someone else
// has acquired the mutex non-speculatively).
if( !res ) __TBB_TryLockByteElidedCancel();
return res;
}
inline void __TBB_LockByteElided( __TBB_atomic_flag& flag )
{
for(;;) {
tbb::internal::spin_wait_while_eq( flag, 1 );
if( __TBB_machine_try_lock_elided( &flag ) )
return;
// Another thread acquired the lock "for real".
// To avoid the "lemming" effect, we abort the transaction.
__TBB_TryLockByteElidedCancel();
}
}
inline void __TBB_UnlockByteElided( __TBB_atomic_flag& flag ) {
__TBB_machine_unlock_elided( &flag );
}
#endif
#ifndef __TBB_ReverseByte
inline unsigned char __TBB_ReverseByte(unsigned char src) {
return tbb::internal::reverse<unsigned char>::byte_table[src];
}
#endif
template<typename T>
T __TBB_ReverseBits(T src) {
T dst;
unsigned char *original = (unsigned char *) &src;
unsigned char *reversed = (unsigned char *) &dst;
for( int i = sizeof(T)-1; i >= 0; i-- )
reversed[i] = __TBB_ReverseByte( original[sizeof(T)-i-1] );
return dst;
}
#endif /* __TBB_machine_H */

View File

@@ -0,0 +1,271 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_profiling_H
#define __TBB_profiling_H
namespace tbb {
namespace internal {
//
// This is not under __TBB_ITT_STRUCTURE_API because these values are used directly in flow_graph.h.
//
// include list of index names
#define TBB_STRING_RESOURCE(index_name,str) index_name,
enum string_index {
#include "internal/_tbb_strings.h"
NUM_STRINGS
};
#undef TBB_STRING_RESOURCE
enum itt_relation
{
__itt_relation_is_unknown = 0,
__itt_relation_is_dependent_on, /**< "A is dependent on B" means that A cannot start until B completes */
__itt_relation_is_sibling_of, /**< "A is sibling of B" means that A and B were created as a group */
__itt_relation_is_parent_of, /**< "A is parent of B" means that A created B */
__itt_relation_is_continuation_of, /**< "A is continuation of B" means that A assumes the dependencies of B */
__itt_relation_is_child_of, /**< "A is child of B" means that A was created by B (inverse of is_parent_of) */
__itt_relation_is_continued_by, /**< "A is continued by B" means that B assumes the dependencies of A (inverse of is_continuation_of) */
__itt_relation_is_predecessor_to /**< "A is predecessor to B" means that B cannot start until A completes (inverse of is_dependent_on) */
};
}
}
// Check if the tools support is enabled
#if (_WIN32||_WIN64||__linux__) && !__MINGW32__ && TBB_USE_THREADING_TOOLS
#if _WIN32||_WIN64
#include <stdlib.h> /* mbstowcs_s */
#endif
#include "tbb_stddef.h"
namespace tbb {
namespace internal {
#if _WIN32||_WIN64
void __TBB_EXPORTED_FUNC itt_set_sync_name_v3( void *obj, const wchar_t* name );
inline size_t multibyte_to_widechar( wchar_t* wcs, const char* mbs, size_t bufsize) {
#if _MSC_VER>=1400
size_t len;
mbstowcs_s( &len, wcs, bufsize, mbs, _TRUNCATE );
return len; // mbstowcs_s counts null terminator
#else
size_t len = mbstowcs( wcs, mbs, bufsize );
if(wcs && len!=size_t(-1) )
wcs[len<bufsize-1? len: bufsize-1] = wchar_t('\0');
return len+1; // mbstowcs does not count null terminator
#endif
}
#else
void __TBB_EXPORTED_FUNC itt_set_sync_name_v3( void *obj, const char* name );
#endif
} // namespace internal
} // namespace tbb
//! Macro __TBB_DEFINE_PROFILING_SET_NAME(T) defines "set_name" methods for sync objects of type T
/** Should be used in the "tbb" namespace only.
Don't place semicolon after it to avoid compiler warnings. **/
#if _WIN32||_WIN64
#define __TBB_DEFINE_PROFILING_SET_NAME(sync_object_type) \
namespace profiling { \
inline void set_name( sync_object_type& obj, const wchar_t* name ) { \
tbb::internal::itt_set_sync_name_v3( &obj, name ); \
} \
inline void set_name( sync_object_type& obj, const char* name ) { \
size_t len = tbb::internal::multibyte_to_widechar(NULL, name, 0); \
wchar_t *wname = new wchar_t[len]; \
tbb::internal::multibyte_to_widechar(wname, name, len); \
set_name( obj, wname ); \
delete[] wname; \
} \
}
#else /* !WIN */
#define __TBB_DEFINE_PROFILING_SET_NAME(sync_object_type) \
namespace profiling { \
inline void set_name( sync_object_type& obj, const char* name ) { \
tbb::internal::itt_set_sync_name_v3( &obj, name ); \
} \
}
#endif /* !WIN */
#else /* no tools support */
#if _WIN32||_WIN64
#define __TBB_DEFINE_PROFILING_SET_NAME(sync_object_type) \
namespace profiling { \
inline void set_name( sync_object_type&, const wchar_t* ) {} \
inline void set_name( sync_object_type&, const char* ) {} \
}
#else /* !WIN */
#define __TBB_DEFINE_PROFILING_SET_NAME(sync_object_type) \
namespace profiling { \
inline void set_name( sync_object_type&, const char* ) {} \
}
#endif /* !WIN */
#endif /* no tools support */
#include "atomic.h"
// Need these to work regardless of tools support
namespace tbb {
namespace internal {
enum notify_type {prepare=0, cancel, acquired, releasing};
const uintptr_t NUM_NOTIFY_TYPES = 4; // set to # elements in enum above
void __TBB_EXPORTED_FUNC call_itt_notify_v5(int t, void *ptr);
void __TBB_EXPORTED_FUNC itt_store_pointer_with_release_v3(void *dst, void *src);
void* __TBB_EXPORTED_FUNC itt_load_pointer_with_acquire_v3(const void *src);
void* __TBB_EXPORTED_FUNC itt_load_pointer_v3( const void* src );
#if __TBB_ITT_STRUCTURE_API
enum itt_domain_enum { ITT_DOMAIN_FLOW=0 };
void __TBB_EXPORTED_FUNC itt_make_task_group_v7( itt_domain_enum domain, void *group, unsigned long long group_extra,
void *parent, unsigned long long parent_extra, string_index name_index );
void __TBB_EXPORTED_FUNC itt_metadata_str_add_v7( itt_domain_enum domain, void *addr, unsigned long long addr_extra,
string_index key, const char *value );
void __TBB_EXPORTED_FUNC itt_relation_add_v7( itt_domain_enum domain, void *addr0, unsigned long long addr0_extra,
itt_relation relation, void *addr1, unsigned long long addr1_extra );
void __TBB_EXPORTED_FUNC itt_task_begin_v7( itt_domain_enum domain, void *task, unsigned long long task_extra,
void *parent, unsigned long long parent_extra, string_index name_index );
void __TBB_EXPORTED_FUNC itt_task_end_v7( itt_domain_enum domain );
#endif // __TBB_ITT_STRUCTURE_API
// two template arguments are to workaround /Wp64 warning with tbb::atomic specialized for unsigned type
template <typename T, typename U>
inline void itt_store_word_with_release(tbb::atomic<T>& dst, U src) {
#if TBB_USE_THREADING_TOOLS
// This assertion should be replaced with static_assert
__TBB_ASSERT(sizeof(T) == sizeof(void *), "Type must be word-sized.");
itt_store_pointer_with_release_v3(&dst, (void *)uintptr_t(src));
#else
dst = src;
#endif // TBB_USE_THREADING_TOOLS
}
template <typename T>
inline T itt_load_word_with_acquire(const tbb::atomic<T>& src) {
#if TBB_USE_THREADING_TOOLS
// This assertion should be replaced with static_assert
__TBB_ASSERT(sizeof(T) == sizeof(void *), "Type must be word-sized.");
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
// Workaround for overzealous compiler warnings
#pragma warning (push)
#pragma warning (disable: 4311)
#endif
T result = (T)itt_load_pointer_with_acquire_v3(&src);
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
#pragma warning (pop)
#endif
return result;
#else
return src;
#endif // TBB_USE_THREADING_TOOLS
}
template <typename T>
inline void itt_store_word_with_release(T& dst, T src) {
#if TBB_USE_THREADING_TOOLS
// This assertion should be replaced with static_assert
__TBB_ASSERT(sizeof(T) == sizeof(void *), "Type must be word-sized.");
itt_store_pointer_with_release_v3(&dst, (void *)src);
#else
__TBB_store_with_release(dst, src);
#endif // TBB_USE_THREADING_TOOLS
}
template <typename T>
inline T itt_load_word_with_acquire(const T& src) {
#if TBB_USE_THREADING_TOOLS
// This assertion should be replaced with static_assert
__TBB_ASSERT(sizeof(T) == sizeof(void *), "Type must be word-sized");
return (T)itt_load_pointer_with_acquire_v3(&src);
#else
return __TBB_load_with_acquire(src);
#endif // TBB_USE_THREADING_TOOLS
}
template <typename T>
inline void itt_hide_store_word(T& dst, T src) {
#if TBB_USE_THREADING_TOOLS
//TODO: This assertion should be replaced with static_assert
__TBB_ASSERT(sizeof(T) == sizeof(void *), "Type must be word-sized");
itt_store_pointer_with_release_v3(&dst, (void *)src);
#else
dst = src;
#endif
}
//TODO: rename to itt_hide_load_word_relaxed
template <typename T>
inline T itt_hide_load_word(const T& src) {
#if TBB_USE_THREADING_TOOLS
//TODO: This assertion should be replaced with static_assert
__TBB_ASSERT(sizeof(T) == sizeof(void *), "Type must be word-sized.");
return (T)itt_load_pointer_v3(&src);
#else
return src;
#endif
}
#if TBB_USE_THREADING_TOOLS
inline void call_itt_notify(notify_type t, void *ptr) {
call_itt_notify_v5((int)t, ptr);
}
#else
inline void call_itt_notify(notify_type /*t*/, void * /*ptr*/) {}
#endif // TBB_USE_THREADING_TOOLS
#if __TBB_ITT_STRUCTURE_API
inline void itt_make_task_group( itt_domain_enum domain, void *group, unsigned long long group_extra,
void *parent, unsigned long long parent_extra, string_index name_index ) {
itt_make_task_group_v7( domain, group, group_extra, parent, parent_extra, name_index );
}
inline void itt_metadata_str_add( itt_domain_enum domain, void *addr, unsigned long long addr_extra,
string_index key, const char *value ) {
itt_metadata_str_add_v7( domain, addr, addr_extra, key, value );
}
inline void itt_relation_add( itt_domain_enum domain, void *addr0, unsigned long long addr0_extra,
itt_relation relation, void *addr1, unsigned long long addr1_extra ) {
itt_relation_add_v7( domain, addr0, addr0_extra, relation, addr1, addr1_extra );
}
inline void itt_task_begin( itt_domain_enum domain, void *task, unsigned long long task_extra,
void *parent, unsigned long long parent_extra, string_index name_index ) {
itt_task_begin_v7( domain, task, task_extra, parent, parent_extra, name_index );
}
inline void itt_task_end( itt_domain_enum domain ) {
itt_task_end_v7( domain );
}
#endif // __TBB_ITT_STRUCTURE_API
} // namespace internal
} // namespace tbb
#endif /* __TBB_profiling_H */

View File

@@ -0,0 +1,508 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_tbb_stddef_H
#define __TBB_tbb_stddef_H
// Marketing-driven product version
#define TBB_VERSION_MAJOR 4
#define TBB_VERSION_MINOR 3
// Engineering-focused interface version
#define TBB_INTERFACE_VERSION 8003
#define TBB_INTERFACE_VERSION_MAJOR TBB_INTERFACE_VERSION/1000
// The oldest major interface version still supported
// To be used in SONAME, manifests, etc.
#define TBB_COMPATIBLE_INTERFACE_VERSION 2
#define __TBB_STRING_AUX(x) #x
#define __TBB_STRING(x) __TBB_STRING_AUX(x)
// We do not need defines below for resource processing on windows
#if !defined RC_INVOKED
// Define groups for Doxygen documentation
/**
* @defgroup algorithms Algorithms
* @defgroup containers Containers
* @defgroup memory_allocation Memory Allocation
* @defgroup synchronization Synchronization
* @defgroup timing Timing
* @defgroup task_scheduling Task Scheduling
*/
// Simple text that is displayed on the main page of Doxygen documentation.
/**
* \mainpage Main Page
*
* Click the tabs above for information about the
* - <a href="./modules.html">Modules</a> (groups of functionality) implemented by the library
* - <a href="./annotated.html">Classes</a> provided by the library
* - <a href="./files.html">Files</a> constituting the library.
* .
* Please note that significant part of TBB functionality is implemented in the form of
* template functions, descriptions of which are not accessible on the <a href="./annotated.html">Classes</a>
* tab. Use <a href="./modules.html">Modules</a> or <a href="./namespacemembers.html">Namespace/Namespace Members</a>
* tabs to find them.
*
* Additional pieces of information can be found here
* - \subpage concepts
* .
*/
/** \page concepts TBB concepts
A concept is a set of requirements to a type, which are necessary and sufficient
for the type to model a particular behavior or a set of behaviors. Some concepts
are specific to a particular algorithm (e.g. algorithm body), while other ones
are common to several algorithms (e.g. range concept).
All TBB algorithms make use of different classes implementing various concepts.
Implementation classes are supplied by the user as type arguments of template
parameters and/or as objects passed as function call arguments. The library
provides predefined implementations of some concepts (e.g. several kinds of
\ref range_req "ranges"), while other ones must always be implemented by the user.
TBB defines a set of minimal requirements each concept must conform to. Here is
the list of different concepts hyperlinked to the corresponding requirements specifications:
- \subpage range_req
- \subpage parallel_do_body_req
- \subpage parallel_for_body_req
- \subpage parallel_reduce_body_req
- \subpage parallel_scan_body_req
- \subpage parallel_sort_iter_req
**/
// tbb_config.h should be included the first since it contains macro definitions used in other headers
#include "tbb_config.h"
#if _MSC_VER >=1400
#define __TBB_EXPORTED_FUNC __cdecl
#define __TBB_EXPORTED_METHOD __thiscall
#else
#define __TBB_EXPORTED_FUNC
#define __TBB_EXPORTED_METHOD
#endif
#if __INTEL_COMPILER || _MSC_VER
#define __TBB_NOINLINE(decl) __declspec(noinline) decl
#elif __GNUC__
#define __TBB_NOINLINE(decl) decl __attribute__ ((noinline))
#else
#define __TBB_NOINLINE(decl) decl
#endif
#if __TBB_NOEXCEPT_PRESENT
#define __TBB_NOEXCEPT(expression) noexcept(expression)
#else
#define __TBB_NOEXCEPT(expression)
#endif
#include <cstddef> /* Need size_t and ptrdiff_t */
#if _MSC_VER
#define __TBB_tbb_windef_H
#include "internal/_tbb_windef.h"
#undef __TBB_tbb_windef_H
#endif
#if !defined(_MSC_VER) || _MSC_VER>=1600
#include <stdint.h>
#endif
//! Type for an assertion handler
typedef void(*assertion_handler_type)( const char* filename, int line, const char* expression, const char * comment );
#if TBB_USE_ASSERT
#define __TBB_ASSERT_NS(predicate,message,ns) ((predicate)?((void)0) : ns::assertion_failure(__FILE__,__LINE__,#predicate,message))
//! Assert that x is true.
/** If x is false, print assertion failure message.
If the comment argument is not NULL, it is printed as part of the failure message.
The comment argument has no other effect. */
#if __TBBMALLOC_BUILD
namespace rml { namespace internal {
#define __TBB_ASSERT(predicate,message) __TBB_ASSERT_NS(predicate,message,rml::internal)
#else
namespace tbb {
#define __TBB_ASSERT(predicate,message) __TBB_ASSERT_NS(predicate,message,tbb)
#endif
#define __TBB_ASSERT_EX __TBB_ASSERT
//! Set assertion handler and return previous value of it.
assertion_handler_type __TBB_EXPORTED_FUNC set_assertion_handler( assertion_handler_type new_handler );
//! Process an assertion failure.
/** Normally called from __TBB_ASSERT macro.
If assertion handler is null, print message for assertion failure and abort.
Otherwise call the assertion handler. */
void __TBB_EXPORTED_FUNC assertion_failure( const char* filename, int line, const char* expression, const char* comment );
#if __TBBMALLOC_BUILD
}} // namespace rml::internal
#else
} // namespace tbb
#endif
#else /* !TBB_USE_ASSERT */
//! No-op version of __TBB_ASSERT.
#define __TBB_ASSERT(predicate,comment) ((void)0)
//! "Extended" version is useful to suppress warnings if a variable is only used with an assert
#define __TBB_ASSERT_EX(predicate,comment) ((void)(1 && (predicate)))
#endif /* !TBB_USE_ASSERT */
//! The namespace tbb contains all components of the library.
namespace tbb {
#if _MSC_VER && _MSC_VER<1600
namespace internal {
typedef __int8 int8_t;
typedef __int16 int16_t;
typedef __int32 int32_t;
typedef __int64 int64_t;
typedef unsigned __int8 uint8_t;
typedef unsigned __int16 uint16_t;
typedef unsigned __int32 uint32_t;
typedef unsigned __int64 uint64_t;
} // namespace internal
#else /* Posix */
namespace internal {
using ::int8_t;
using ::int16_t;
using ::int32_t;
using ::int64_t;
using ::uint8_t;
using ::uint16_t;
using ::uint32_t;
using ::uint64_t;
} // namespace internal
#endif /* Posix */
using std::size_t;
using std::ptrdiff_t;
//! The function returns the interface version of the TBB shared library being used.
/**
* The version it returns is determined at runtime, not at compile/link time.
* So it can be different than the value of TBB_INTERFACE_VERSION obtained at compile time.
*/
extern "C" int __TBB_EXPORTED_FUNC TBB_runtime_interface_version();
/**
* @cond INTERNAL
* @brief Identifiers declared inside namespace internal should never be used directly by client code.
*/
namespace internal {
//! Compile-time constant that is upper bound on cache line/sector size.
/** It should be used only in situations where having a compile-time upper
bound is more useful than a run-time exact answer.
@ingroup memory_allocation */
const size_t NFS_MaxLineSize = 128;
/** Label for data that may be accessed from different threads, and that may eventually become wrapped
in a formal atomic type.
Note that no problems have yet been observed relating to the definition currently being empty,
even if at least "volatile" would seem to be in order to avoid data sometimes temporarily hiding
in a register (although "volatile" as a "poor man's atomic" lacks several other features of a proper
atomic, some of which are now provided instead through specialized functions).
Note that usage is intentionally compatible with a definition as qualifier "volatile",
both as a way to have the compiler help enforce use of the label and to quickly rule out
one potential issue.
Note however that, with some architecture/compiler combinations, e.g. on IA-64 architecture, "volatile"
also has non-portable memory semantics that are needlessly expensive for "relaxed" operations.
Note that this must only be applied to data that will not change bit patterns when cast to/from
an integral type of the same length; tbb::atomic must be used instead for, e.g., floating-point types.
TODO: apply wherever relevant **/
#define __TBB_atomic // intentionally empty, see above
template<class T, size_t S, size_t R>
struct padded_base : T {
char pad[S - R];
};
template<class T, size_t S> struct padded_base<T, S, 0> : T {};
//! Pads type T to fill out to a multiple of cache line size.
template<class T, size_t S = NFS_MaxLineSize>
struct padded : padded_base<T, S, sizeof(T) % S> {};
//! Extended variant of the standard offsetof macro
/** The standard offsetof macro is not sufficient for TBB as it can be used for
POD-types only. The constant 0x1000 (not NULL) is necessary to appease GCC. **/
#define __TBB_offsetof(class_name, member_name) \
((ptrdiff_t)&(reinterpret_cast<class_name*>(0x1000)->member_name) - 0x1000)
//! Returns address of the object containing a member with the given name and address
#define __TBB_get_object_ref(class_name, member_name, member_addr) \
(*reinterpret_cast<class_name*>((char*)member_addr - __TBB_offsetof(class_name, member_name)))
//! Throws std::runtime_error with what() returning error_code description prefixed with aux_info
void __TBB_EXPORTED_FUNC handle_perror( int error_code, const char* aux_info );
#if TBB_USE_EXCEPTIONS
#define __TBB_TRY try
#define __TBB_CATCH(e) catch(e)
#define __TBB_THROW(e) throw e
#define __TBB_RETHROW() throw
#else /* !TBB_USE_EXCEPTIONS */
inline bool __TBB_false() { return false; }
#define __TBB_TRY
#define __TBB_CATCH(e) if ( tbb::internal::__TBB_false() )
#define __TBB_THROW(e) ((void)0)
#define __TBB_RETHROW() ((void)0)
#endif /* !TBB_USE_EXCEPTIONS */
//! Report a runtime warning.
void __TBB_EXPORTED_FUNC runtime_warning( const char* format, ... );
#if TBB_USE_ASSERT
static void* const poisoned_ptr = reinterpret_cast<void*>(-1);
//! Set p to invalid pointer value.
// Also works for regular (non-__TBB_atomic) pointers.
template<typename T>
inline void poison_pointer( T* __TBB_atomic & p ) { p = reinterpret_cast<T*>(poisoned_ptr); }
/** Expected to be used in assertions only, thus no empty form is defined. **/
template<typename T>
inline bool is_poisoned( T* p ) { return p == reinterpret_cast<T*>(poisoned_ptr); }
#else
template<typename T>
inline void poison_pointer( T* __TBB_atomic & ) {/*do nothing*/}
#endif /* !TBB_USE_ASSERT */
//! Cast between unrelated pointer types.
/** This method should be used sparingly as a last resort for dealing with
situations that inherently break strict ISO C++ aliasing rules. */
// T is a pointer type because it will be explicitly provided by the programmer as a template argument;
// U is a referent type to enable the compiler to check that "ptr" is a pointer, deducing U in the process.
template<typename T, typename U>
inline T punned_cast( U* ptr ) {
uintptr_t x = reinterpret_cast<uintptr_t>(ptr);
return reinterpret_cast<T>(x);
}
//! Base class for types that should not be assigned.
class no_assign {
// Deny assignment
void operator=( const no_assign& );
public:
#if __GNUC__
//! Explicitly define default construction, because otherwise gcc issues gratuitous warning.
no_assign() {}
#endif /* __GNUC__ */
};
//! Base class for types that should not be copied or assigned.
class no_copy: no_assign {
//! Deny copy construction
no_copy( const no_copy& );
public:
//! Allow default construction
no_copy() {}
};
#if TBB_DEPRECATED_MUTEX_COPYING
class mutex_copy_deprecated_and_disabled {};
#else
// By default various implementations of mutexes are not copy constructible
// and not copy assignable.
class mutex_copy_deprecated_and_disabled : no_copy {};
#endif
//! A function to check if passed in pointer is aligned on a specific border
template<typename T>
inline bool is_aligned(T* pointer, uintptr_t alignment) {
return 0==((uintptr_t)pointer & (alignment-1));
}
//! A function to check if passed integer is a power of 2
template<typename integer_type>
inline bool is_power_of_two(integer_type arg) {
return arg && (0 == (arg & (arg - 1)));
}
//! A function to compute arg modulo divisor where divisor is a power of 2.
template<typename argument_integer_type, typename divisor_integer_type>
inline argument_integer_type modulo_power_of_two(argument_integer_type arg, divisor_integer_type divisor) {
// Divisor is assumed to be a power of two (which is valid for current uses).
__TBB_ASSERT( is_power_of_two(divisor), "Divisor should be a power of two" );
return (arg & (divisor - 1));
}
//! A function to determine if "arg is a multiplication of a number and a power of 2".
// i.e. for strictly positive i and j, with j a power of 2,
// determines whether i==j<<k for some nonnegative k (so i==j yields true).
template<typename argument_integer_type, typename divisor_integer_type>
inline bool is_power_of_two_factor(argument_integer_type arg, divisor_integer_type divisor) {
// Divisor is assumed to be a power of two (which is valid for current uses).
__TBB_ASSERT( is_power_of_two(divisor), "Divisor should be a power of two" );
return 0 == (arg & (arg - divisor));
}
//! Utility template function to prevent "unused" warnings by various compilers.
template<typename T1> void suppress_unused_warning( const T1& ) {}
template<typename T1, typename T2> void suppress_unused_warning( const T1&, const T2& ) {}
template<typename T1, typename T2, typename T3> void suppress_unused_warning( const T1&, const T2&, const T3& ) {}
// Struct to be used as a version tag for inline functions.
/** Version tag can be necessary to prevent loader on Linux from using the wrong
symbol in debug builds (when inline functions are compiled as out-of-line). **/
struct version_tag_v3 {};
typedef version_tag_v3 version_tag;
} // internal
//! Dummy type that distinguishes splitting constructor from copy constructor.
/**
* See description of parallel_for and parallel_reduce for example usages.
* @ingroup algorithms
*/
class split {
};
//! Type enables transmission of splitting proportion from partitioners to range objects
/**
* In order to make use of such facility Range objects must implement
* splitting constructor with this type passed and initialize static
* constant boolean field 'is_splittable_in_proportion' with the value
* of 'true'
*/
class proportional_split: internal::no_assign {
public:
proportional_split(size_t _left = 1, size_t _right = 1) : my_left(_left), my_right(_right) { }
size_t left() const { return my_left; }
size_t right() const { return my_right; }
// used when range does not support proportional split
operator split() const { return split(); }
#if __TBB_ENABLE_RANGE_FEEDBACK
void set_proportion(size_t _left, size_t _right) {
my_left = _left;
my_right = _right;
}
#endif
private:
size_t my_left, my_right;
};
} // tbb
// Following is a set of classes and functions typically used in compile-time "metaprogramming".
// TODO: move all that to a separate header
#if __TBB_ALLOCATOR_TRAITS_PRESENT
#include <memory> //for allocator_traits
#endif
#if __TBB_CPP11_RVALUE_REF_PRESENT || _LIBCPP_VERSION
#include <utility> // for std::move
#endif
namespace tbb {
namespace internal {
//! Class for determining type of std::allocator<T>::value_type.
template<typename T>
struct allocator_type {
typedef T value_type;
};
#if _MSC_VER
//! Microsoft std::allocator has non-standard extension that strips const from a type.
template<typename T>
struct allocator_type<const T> {
typedef T value_type;
};
#endif
// Ad-hoc implementation of true_type & false_type
// Intended strictly for internal use! For public APIs (traits etc), use C++11 analogues.
template <bool v>
struct bool_constant {
static /*constexpr*/ const bool value = v;
};
typedef bool_constant<true> true_type;
typedef bool_constant<false> false_type;
#if __TBB_ALLOCATOR_TRAITS_PRESENT
using std::allocator_traits;
#else
template<typename allocator>
struct allocator_traits{
typedef tbb::internal::false_type propagate_on_container_move_assignment;
};
#endif
//! A template to select either 32-bit or 64-bit constant as compile time, depending on machine word size.
template <unsigned u, unsigned long long ull >
struct select_size_t_constant {
//Explicit cast is needed to avoid compiler warnings about possible truncation.
//The value of the right size, which is selected by ?:, is anyway not truncated or promoted.
static const size_t value = (size_t)((sizeof(size_t)==sizeof(u)) ? u : ull);
};
#if __TBB_CPP11_RVALUE_REF_PRESENT
using std::move;
#elif defined(_LIBCPP_NAMESPACE)
// libc++ defines "pre-C++11 move" similarly to our; use it to avoid name conflicts in some cases.
using std::_LIBCPP_NAMESPACE::move;
#else
template <typename T>
T& move( T& x ) { return x; }
#endif
template <bool condition>
struct STATIC_ASSERTION_FAILED;
template <>
struct STATIC_ASSERTION_FAILED<false> { enum {value=1};};
template<>
struct STATIC_ASSERTION_FAILED<true>; //intentionally left undefined to cause compile time error
//! @endcond
}} // namespace tbb::internal
#if __TBB_STATIC_ASSERT_PRESENT
#define __TBB_STATIC_ASSERT(condition,msg) static_assert(condition,msg)
#else
//please note condition is intentionally inverted to get a bit more understandable error msg
#define __TBB_STATIC_ASSERT_IMPL1(condition,msg,line) \
enum {static_assert_on_line_##line = tbb::internal::STATIC_ASSERTION_FAILED<!(condition)>::value}
#define __TBB_STATIC_ASSERT_IMPL(condition,msg,line) __TBB_STATIC_ASSERT_IMPL1(condition,msg,line)
//! Verify at compile time that passed in condition is hold
#define __TBB_STATIC_ASSERT(condition,msg) __TBB_STATIC_ASSERT_IMPL(condition,msg,__LINE__)
#endif
#endif /* RC_INVOKED */
#endif /* __TBB_tbb_stddef_H */

View File

@@ -0,0 +1,330 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_tbb_thread_H
#define __TBB_tbb_thread_H
#include "tbb_stddef.h"
#if _WIN32||_WIN64
#include "machine/windows_api.h"
#define __TBB_NATIVE_THREAD_ROUTINE unsigned WINAPI
#define __TBB_NATIVE_THREAD_ROUTINE_PTR(r) unsigned (WINAPI* r)( void* )
#if __TBB_WIN8UI_SUPPORT
typedef size_t thread_id_type;
#else // __TBB_WIN8UI_SUPPORT
typedef DWORD thread_id_type;
#endif // __TBB_WIN8UI_SUPPORT
#else
#define __TBB_NATIVE_THREAD_ROUTINE void*
#define __TBB_NATIVE_THREAD_ROUTINE_PTR(r) void* (*r)( void* )
#include <pthread.h>
#endif // _WIN32||_WIN64
#include "tick_count.h"
#if !TBB_USE_EXCEPTIONS && _MSC_VER
// Suppress "C++ exception handler used, but unwind semantics are not enabled" warning in STL headers
#pragma warning (push)
#pragma warning (disable: 4530)
#endif
#include <iosfwd>
#if !TBB_USE_EXCEPTIONS && _MSC_VER
#pragma warning (pop)
#endif
namespace tbb {
namespace internal {
class tbb_thread_v3;
}
inline void swap( internal::tbb_thread_v3& t1, internal::tbb_thread_v3& t2 ) __TBB_NOEXCEPT(true);
namespace internal {
//! Allocate a closure
void* __TBB_EXPORTED_FUNC allocate_closure_v3( size_t size );
//! Free a closure allocated by allocate_closure_v3
void __TBB_EXPORTED_FUNC free_closure_v3( void* );
struct thread_closure_base {
void* operator new( size_t size ) {return allocate_closure_v3(size);}
void operator delete( void* ptr ) {free_closure_v3(ptr);}
};
template<class F> struct thread_closure_0: thread_closure_base {
F function;
static __TBB_NATIVE_THREAD_ROUTINE start_routine( void* c ) {
thread_closure_0 *self = static_cast<thread_closure_0*>(c);
self->function();
delete self;
return 0;
}
thread_closure_0( const F& f ) : function(f) {}
};
//! Structure used to pass user function with 1 argument to thread.
template<class F, class X> struct thread_closure_1: thread_closure_base {
F function;
X arg1;
//! Routine passed to Windows's _beginthreadex by thread::internal_start() inside tbb.dll
static __TBB_NATIVE_THREAD_ROUTINE start_routine( void* c ) {
thread_closure_1 *self = static_cast<thread_closure_1*>(c);
self->function(self->arg1);
delete self;
return 0;
}
thread_closure_1( const F& f, const X& x ) : function(f), arg1(x) {}
};
template<class F, class X, class Y> struct thread_closure_2: thread_closure_base {
F function;
X arg1;
Y arg2;
//! Routine passed to Windows's _beginthreadex by thread::internal_start() inside tbb.dll
static __TBB_NATIVE_THREAD_ROUTINE start_routine( void* c ) {
thread_closure_2 *self = static_cast<thread_closure_2*>(c);
self->function(self->arg1, self->arg2);
delete self;
return 0;
}
thread_closure_2( const F& f, const X& x, const Y& y ) : function(f), arg1(x), arg2(y) {}
};
//! Versioned thread class.
class tbb_thread_v3 {
#if __TBB_IF_NO_COPY_CTOR_MOVE_SEMANTICS_BROKEN
// Workaround for a compiler bug: declaring the copy constructor as public
// enables use of the moving constructor.
// The definition is not provided in order to prohibit copying.
public:
#endif
tbb_thread_v3(const tbb_thread_v3&); // = delete; // Deny access
public:
#if _WIN32||_WIN64
typedef HANDLE native_handle_type;
#else
typedef pthread_t native_handle_type;
#endif // _WIN32||_WIN64
class id;
//! Constructs a thread object that does not represent a thread of execution.
tbb_thread_v3() __TBB_NOEXCEPT(true) : my_handle(0)
#if _WIN32||_WIN64
, my_thread_id(0)
#endif // _WIN32||_WIN64
{}
//! Constructs an object and executes f() in a new thread
template <class F> explicit tbb_thread_v3(F f) {
typedef internal::thread_closure_0<F> closure_type;
internal_start(closure_type::start_routine, new closure_type(f));
}
//! Constructs an object and executes f(x) in a new thread
template <class F, class X> tbb_thread_v3(F f, X x) {
typedef internal::thread_closure_1<F,X> closure_type;
internal_start(closure_type::start_routine, new closure_type(f,x));
}
//! Constructs an object and executes f(x,y) in a new thread
template <class F, class X, class Y> tbb_thread_v3(F f, X x, Y y) {
typedef internal::thread_closure_2<F,X,Y> closure_type;
internal_start(closure_type::start_routine, new closure_type(f,x,y));
}
#if __TBB_CPP11_RVALUE_REF_PRESENT
tbb_thread_v3(tbb_thread_v3&& x) __TBB_NOEXCEPT(true)
: my_handle(x.my_handle)
#if _WIN32||_WIN64
, my_thread_id(x.my_thread_id)
#endif
{
x.internal_wipe();
}
tbb_thread_v3& operator=(tbb_thread_v3&& x) __TBB_NOEXCEPT(true) {
internal_move(x);
return *this;
}
private:
tbb_thread_v3& operator=(const tbb_thread_v3& x); // = delete;
public:
#else // __TBB_CPP11_RVALUE_REF_PRESENT
tbb_thread_v3& operator=(tbb_thread_v3& x) {
internal_move(x);
return *this;
}
#endif // __TBB_CPP11_RVALUE_REF_PRESENT
void swap( tbb_thread_v3& t ) __TBB_NOEXCEPT(true) {tbb::swap( *this, t );}
bool joinable() const __TBB_NOEXCEPT(true) {return my_handle!=0; }
//! The completion of the thread represented by *this happens before join() returns.
void __TBB_EXPORTED_METHOD join();
//! When detach() returns, *this no longer represents the possibly continuing thread of execution.
void __TBB_EXPORTED_METHOD detach();
~tbb_thread_v3() {if( joinable() ) detach();}
inline id get_id() const __TBB_NOEXCEPT(true);
native_handle_type native_handle() { return my_handle; }
//! The number of hardware thread contexts.
/** Before TBB 3.0 U4 this methods returned the number of logical CPU in
the system. Currently on Windows, Linux and FreeBSD it returns the
number of logical CPUs available to the current process in accordance
with its affinity mask.
NOTE: The return value of this method never changes after its first
invocation. This means that changes in the process affinity mask that
took place after this method was first invoked will not affect the
number of worker threads in the TBB worker threads pool. **/
static unsigned __TBB_EXPORTED_FUNC hardware_concurrency() __TBB_NOEXCEPT(true);
private:
native_handle_type my_handle;
#if _WIN32||_WIN64
thread_id_type my_thread_id;
#endif // _WIN32||_WIN64
void internal_wipe() __TBB_NOEXCEPT(true) {
my_handle = 0;
#if _WIN32||_WIN64
my_thread_id = 0;
#endif
}
void internal_move(tbb_thread_v3& x) __TBB_NOEXCEPT(true) {
if (joinable()) detach();
my_handle = x.my_handle;
#if _WIN32||_WIN64
my_thread_id = x.my_thread_id;
#endif // _WIN32||_WIN64
x.internal_wipe();
}
/** Runs start_routine(closure) on another thread and sets my_handle to the handle of the created thread. */
void __TBB_EXPORTED_METHOD internal_start( __TBB_NATIVE_THREAD_ROUTINE_PTR(start_routine),
void* closure );
friend void __TBB_EXPORTED_FUNC move_v3( tbb_thread_v3& t1, tbb_thread_v3& t2 );
friend void tbb::swap( tbb_thread_v3& t1, tbb_thread_v3& t2 ) __TBB_NOEXCEPT(true);
};
class tbb_thread_v3::id {
#if _WIN32||_WIN64
thread_id_type my_id;
id( thread_id_type id_ ) : my_id(id_) {}
#else
pthread_t my_id;
id( pthread_t id_ ) : my_id(id_) {}
#endif // _WIN32||_WIN64
friend class tbb_thread_v3;
public:
id() __TBB_NOEXCEPT(true) : my_id(0) {}
friend bool operator==( tbb_thread_v3::id x, tbb_thread_v3::id y ) __TBB_NOEXCEPT(true);
friend bool operator!=( tbb_thread_v3::id x, tbb_thread_v3::id y ) __TBB_NOEXCEPT(true);
friend bool operator<( tbb_thread_v3::id x, tbb_thread_v3::id y ) __TBB_NOEXCEPT(true);
friend bool operator<=( tbb_thread_v3::id x, tbb_thread_v3::id y ) __TBB_NOEXCEPT(true);
friend bool operator>( tbb_thread_v3::id x, tbb_thread_v3::id y ) __TBB_NOEXCEPT(true);
friend bool operator>=( tbb_thread_v3::id x, tbb_thread_v3::id y ) __TBB_NOEXCEPT(true);
template<class charT, class traits>
friend std::basic_ostream<charT, traits>&
operator<< (std::basic_ostream<charT, traits> &out,
tbb_thread_v3::id id)
{
out << id.my_id;
return out;
}
friend tbb_thread_v3::id __TBB_EXPORTED_FUNC thread_get_id_v3();
}; // tbb_thread_v3::id
tbb_thread_v3::id tbb_thread_v3::get_id() const __TBB_NOEXCEPT(true) {
#if _WIN32||_WIN64
return id(my_thread_id);
#else
return id(my_handle);
#endif // _WIN32||_WIN64
}
void __TBB_EXPORTED_FUNC move_v3( tbb_thread_v3& t1, tbb_thread_v3& t2 );
tbb_thread_v3::id __TBB_EXPORTED_FUNC thread_get_id_v3();
void __TBB_EXPORTED_FUNC thread_yield_v3();
void __TBB_EXPORTED_FUNC thread_sleep_v3(const tick_count::interval_t &i);
inline bool operator==(tbb_thread_v3::id x, tbb_thread_v3::id y) __TBB_NOEXCEPT(true)
{
return x.my_id == y.my_id;
}
inline bool operator!=(tbb_thread_v3::id x, tbb_thread_v3::id y) __TBB_NOEXCEPT(true)
{
return x.my_id != y.my_id;
}
inline bool operator<(tbb_thread_v3::id x, tbb_thread_v3::id y) __TBB_NOEXCEPT(true)
{
return x.my_id < y.my_id;
}
inline bool operator<=(tbb_thread_v3::id x, tbb_thread_v3::id y) __TBB_NOEXCEPT(true)
{
return x.my_id <= y.my_id;
}
inline bool operator>(tbb_thread_v3::id x, tbb_thread_v3::id y) __TBB_NOEXCEPT(true)
{
return x.my_id > y.my_id;
}
inline bool operator>=(tbb_thread_v3::id x, tbb_thread_v3::id y) __TBB_NOEXCEPT(true)
{
return x.my_id >= y.my_id;
}
} // namespace internal;
//! Users reference thread class by name tbb_thread
typedef internal::tbb_thread_v3 tbb_thread;
using internal::operator==;
using internal::operator!=;
using internal::operator<;
using internal::operator>;
using internal::operator<=;
using internal::operator>=;
inline void move( tbb_thread& t1, tbb_thread& t2 ) {
internal::move_v3(t1, t2);
}
inline void swap( internal::tbb_thread_v3& t1, internal::tbb_thread_v3& t2 ) __TBB_NOEXCEPT(true) {
tbb::tbb_thread::native_handle_type h = t1.my_handle;
t1.my_handle = t2.my_handle;
t2.my_handle = h;
#if _WIN32||_WIN64
thread_id_type i = t1.my_thread_id;
t1.my_thread_id = t2.my_thread_id;
t2.my_thread_id = i;
#endif /* _WIN32||_WIN64 */
}
namespace this_tbb_thread {
inline tbb_thread::id get_id() { return internal::thread_get_id_v3(); }
//! Offers the operating system the opportunity to schedule another thread.
inline void yield() { internal::thread_yield_v3(); }
//! The current thread blocks at least until the time specified.
inline void sleep(const tick_count::interval_t &i) {
internal::thread_sleep_v3(i);
}
} // namespace this_tbb_thread
} // namespace tbb
#endif /* __TBB_tbb_thread_H */

View File

@@ -0,0 +1,66 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
/*
Replacing the standard memory allocation routines in Microsoft* C/C++ RTL
(malloc/free, global new/delete, etc.) with the TBB memory allocator.
Include the following header to a source of any binary which is loaded during
application startup
#include "tbb/tbbmalloc_proxy.h"
or add following parameters to the linker options for the binary which is
loaded during application startup. It can be either exe-file or dll.
For win32
tbbmalloc_proxy.lib /INCLUDE:"___TBB_malloc_proxy"
win64
tbbmalloc_proxy.lib /INCLUDE:"__TBB_malloc_proxy"
*/
#ifndef __TBB_tbbmalloc_proxy_H
#define __TBB_tbbmalloc_proxy_H
#if _MSC_VER
#ifdef _DEBUG
#pragma comment(lib, "tbbmalloc_proxy_debug.lib")
#else
#pragma comment(lib, "tbbmalloc_proxy.lib")
#endif
#if defined(_WIN64)
#pragma comment(linker, "/include:__TBB_malloc_proxy")
#else
#pragma comment(linker, "/include:___TBB_malloc_proxy")
#endif
#else
/* Primarily to support MinGW */
extern "C" void __TBB_malloc_proxy();
struct __TBB_malloc_proxy_caller {
__TBB_malloc_proxy_caller() { __TBB_malloc_proxy(); }
} volatile __TBB_malloc_proxy_helper_object;
#endif // _MSC_VER
#endif //__TBB_tbbmalloc_proxy_H

View File

@@ -0,0 +1,140 @@
/*
Copyright 2005-2015 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
you can redistribute it and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. Threading Building Blocks is
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. You should have received a copy of
the GNU General Public License along with Threading Building Blocks; if not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software library without
restriction. Specifically, if other files instantiate templates or use macros or inline
functions from this file, or you compile this file and link it with other files to produce
an executable, this file does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU General Public License.
*/
#ifndef __TBB_tick_count_H
#define __TBB_tick_count_H
#include "tbb_stddef.h"
#if _WIN32||_WIN64
#include "machine/windows_api.h"
#elif __linux__
#include <ctime>
#else /* generic Unix */
#include <sys/time.h>
#endif /* (choice of OS) */
namespace tbb {
//! Absolute timestamp
/** @ingroup timing */
class tick_count {
public:
//! Relative time interval.
class interval_t {
long long value;
explicit interval_t( long long value_ ) : value(value_) {}
public:
//! Construct a time interval representing zero time duration
interval_t() : value(0) {};
//! Construct a time interval representing sec seconds time duration
explicit interval_t( double sec );
//! Return the length of a time interval in seconds
double seconds() const;
friend class tbb::tick_count;
//! Extract the intervals from the tick_counts and subtract them.
friend interval_t operator-( const tick_count& t1, const tick_count& t0 );
//! Add two intervals.
friend interval_t operator+( const interval_t& i, const interval_t& j ) {
return interval_t(i.value+j.value);
}
//! Subtract two intervals.
friend interval_t operator-( const interval_t& i, const interval_t& j ) {
return interval_t(i.value-j.value);
}
//! Accumulation operator
interval_t& operator+=( const interval_t& i ) {value += i.value; return *this;}
//! Subtraction operator
interval_t& operator-=( const interval_t& i ) {value -= i.value; return *this;}
private:
static long long ticks_per_second(){
#if _WIN32||_WIN64
LARGE_INTEGER qpfreq;
int rval = QueryPerformanceFrequency(&qpfreq);
__TBB_ASSERT_EX(rval, "QueryPerformanceFrequency returned zero");
return static_cast<long long>(qpfreq.QuadPart);
#elif __linux__
return static_cast<long long>(1E9);
#else /* generic Unix */
return static_cast<long long>(1E6);
#endif /* (choice of OS) */
}
};
//! Construct an absolute timestamp initialized to zero.
tick_count() : my_count(0) {};
//! Return current time.
static tick_count now();
//! Subtract two timestamps to get the time interval between
friend interval_t operator-( const tick_count& t1, const tick_count& t0 );
//! Return the resolution of the clock in seconds per tick.
static double resolution() { return 1.0 / interval_t::ticks_per_second(); }
private:
long long my_count;
};
inline tick_count tick_count::now() {
tick_count result;
#if _WIN32||_WIN64
LARGE_INTEGER qpcnt;
int rval = QueryPerformanceCounter(&qpcnt);
__TBB_ASSERT_EX(rval, "QueryPerformanceCounter failed");
result.my_count = qpcnt.QuadPart;
#elif __linux__
struct timespec ts;
int status = clock_gettime( CLOCK_REALTIME, &ts );
__TBB_ASSERT_EX( status==0, "CLOCK_REALTIME not supported" );
result.my_count = static_cast<long long>(1000000000UL)*static_cast<long long>(ts.tv_sec) + static_cast<long long>(ts.tv_nsec);
#else /* generic Unix */
struct timeval tv;
int status = gettimeofday(&tv, NULL);
__TBB_ASSERT_EX( status==0, "gettimeofday failed" );
result.my_count = static_cast<long long>(1000000)*static_cast<long long>(tv.tv_sec) + static_cast<long long>(tv.tv_usec);
#endif /*(choice of OS) */
return result;
}
inline tick_count::interval_t::interval_t( double sec ) {
value = static_cast<long long>(sec*interval_t::ticks_per_second());
}
inline tick_count::interval_t operator-( const tick_count& t1, const tick_count& t0 ) {
return tick_count::interval_t( t1.my_count-t0.my_count );
}
inline double tick_count::interval_t::seconds() const {
return value*tick_count::resolution();
}
} // namespace tbb
#endif /* __TBB_tick_count_H */