Initial commit: Final state of the master project

2017-09-16 09:41:37 +02:00
commit 696180d43b
832 changed files with 169717 additions and 0 deletions
--- a/Research/inc/tbb/internal/_aggregator_impl.h
+++ b/Research/inc/tbb/internal/_aggregator_impl.h
@@ -0,0 +1,180 @@
+/*
+    Copyright 2005-2015 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks. Threading Building Blocks is free software;
+    you can redistribute it and/or modify it under the terms of the GNU General Public License
+    version 2  as  published  by  the  Free Software Foundation.  Threading Building Blocks is
+    distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
+    implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+    See  the GNU General Public License for more details.   You should have received a copy of
+    the  GNU General Public License along with Threading Building Blocks; if not, write to the
+    Free Software Foundation, Inc.,  51 Franklin St,  Fifth Floor,  Boston,  MA 02110-1301 USA
+
+    As a special exception,  you may use this file  as part of a free software library without
+    restriction.  Specifically,  if other files instantiate templates  or use macros or inline
+    functions from this file, or you compile this file and link it with other files to produce
+    an executable,  this file does not by itself cause the resulting executable to be covered
+    by the GNU General Public License. This exception does not however invalidate any other
+    reasons why the executable file might be covered by the GNU General Public License.
+*/
+
+#ifndef __TBB__aggregator_impl_H
+#define __TBB__aggregator_impl_H
+
+#include "../atomic.h"
+#if !__TBBMALLOC_BUILD
+#include "../tbb_profiling.h"
+#endif
+
+namespace tbb {
+namespace interface6 {
+namespace internal {
+
+using namespace tbb::internal;
+
+//! aggregated_operation base class
+template <typename Derived>
+class aggregated_operation {
+ public:
+    uintptr_t status;
+    Derived *next;
+    aggregated_operation() : status(0), next(NULL) {}
+};
+
+//! Aggregator base class
+/** An aggregator for collecting operations coming from multiple sources and executing
+    them serially on a single thread.  operation_type must be derived from
+    aggregated_operation. The parameter handler_type is a functor that will be passed the
+    list of operations and is expected to handle each operation appropriately, setting the
+    status of each operation to non-zero.*/
+template < typename operation_type >
+class aggregator_generic {
+public:
+    aggregator_generic() : handler_busy(false) { pending_operations = NULL; }
+
+    //! Place operation in list
+    /** Place operation in list and either handle list or wait for operation to
+        complete.
+        long_life_time specifies life time of an operation inserting in an aggregator.
+        "Long" (long_life_time == true) life time operation can be accessed
+        even after executing it.
+        "Short" (long_life_time == false) life time operations can be destroyed
+        during executing so any access to it after executing is invalid.*/
+    template < typename handler_type >
+    void execute(operation_type *op, handler_type &handle_operations, bool long_life_time = true) {
+        operation_type *res;
+        // op->status should be read before inserting the operation in the
+        // aggregator queue since it can become invalid after executing a
+        // handler (if the operation has 'short' life time.)
+        const uintptr_t status = op->status;
+
+        // ITT note: &(op->status) tag is used to cover accesses to this op node. This
+        // thread has created the operation, and now releases it so that the handler
+        // thread may handle the associated operation w/o triggering a race condition;
+        // thus this tag will be acquired just before the operation is handled in the
+        // handle_operations functor.
+        call_itt_notify(releasing, &(op->status));
+        // insert the operation in the queue.
+        do {
+            // ITT may flag the following line as a race; it is a false positive:
+            // This is an atomic read; we don't provide itt_hide_load_word for atomics
+            op->next = res = pending_operations; // NOT A RACE
+        } while (pending_operations.compare_and_swap(op, res) != res);
+        if (!res) { // first in the list; handle the operations.
+            // ITT note: &pending_operations tag covers access to the handler_busy flag,
+            // which this waiting handler thread will try to set before entering
+            // handle_operations.
+            call_itt_notify(acquired, &pending_operations);
+            start_handle_operations(handle_operations);
+            // The operation with 'short' life time can already be destroyed.
+            if (long_life_time)
+                __TBB_ASSERT(op->status, NULL);
+        }
+        // not first; wait for op to be ready.
+        else if (!status) { // operation is blocking here.
+            __TBB_ASSERT(long_life_time, "The blocking operation cannot have 'short' life time. Since it can already be destroyed.");
+            call_itt_notify(prepare, &(op->status));
+            spin_wait_while_eq(op->status, uintptr_t(0));
+            itt_load_word_with_acquire(op->status);
+        }
+    }
+
+ private:
+    //! An atomically updated list (aka mailbox) of pending operations
+    atomic<operation_type *> pending_operations;
+    //! Controls thread access to handle_operations
+    uintptr_t handler_busy;
+
+    //! Trigger the handling of operations when the handler is free
+    template < typename handler_type >
+    void start_handle_operations( handler_type &handle_operations ) {
+        operation_type *op_list;
+
+        // ITT note: &handler_busy tag covers access to pending_operations as it is passed
+        // between active and waiting handlers.  Below, the waiting handler waits until
+        // the active handler releases, and the waiting handler acquires &handler_busy as
+        // it becomes the active_handler. The release point is at the end of this
+        // function, when all operations in pending_operations have been handled by the
+        // owner of this aggregator.
+        call_itt_notify(prepare, &handler_busy);
+        // get the handler_busy:
+        // only one thread can possibly spin here at a time
+        spin_wait_until_eq(handler_busy, uintptr_t(0));
+        call_itt_notify(acquired, &handler_busy);
+        // acquire fence not necessary here due to causality rule and surrounding atomics
+        __TBB_store_with_release(handler_busy, uintptr_t(1));
+
+        // ITT note: &pending_operations tag covers access to the handler_busy flag
+        // itself. Capturing the state of the pending_operations signifies that
+        // handler_busy has been set and a new active handler will now process that list's
+        // operations.
+        call_itt_notify(releasing, &pending_operations);
+        // grab pending_operations
+        op_list = pending_operations.fetch_and_store(NULL);
+
+        // handle all the operations
+        handle_operations(op_list);
+
+        // release the handler
+        itt_store_word_with_release(handler_busy, uintptr_t(0));
+    }
+};
+
+template < typename handler_type, typename operation_type >
+class aggregator : public aggregator_generic<operation_type> {
+    handler_type handle_operations;
+public:
+    aggregator() {}
+    explicit aggregator(handler_type h) : handle_operations(h) {}
+
+    void initialize_handler(handler_type h) { handle_operations = h; }
+
+    void execute(operation_type *op) {
+        aggregator_generic<operation_type>::execute(op, handle_operations);
+    }
+};
+
+// the most-compatible friend declaration (vs, gcc, icc) is
+//    template<class U, class V> friend class aggregating_functor;
+template<typename aggregating_class, typename operation_list>
+class aggregating_functor {
+    aggregating_class *fi;
+public:
+    aggregating_functor() {}
+    aggregating_functor(aggregating_class *fi_) : fi(fi_) {}
+    void operator()(operation_list* op_list) { fi->handle_operations(op_list); }
+};
+
+} // namespace internal
+} // namespace interface6
+
+namespace internal {
+    using interface6::internal::aggregated_operation;
+    using interface6::internal::aggregator_generic;
+    using interface6::internal::aggregator;
+    using interface6::internal::aggregating_functor;
+} // namespace internal
+
+} // namespace tbb
+
+#endif  // __TBB__aggregator_impl_H
--- a/Research/inc/tbb/internal/_concurrent_queue_impl.h
+++ b/Research/inc/tbb/internal/_concurrent_queue_impl.h
--- a/Research/inc/tbb/internal/_concurrent_unordered_impl.h
+++ b/Research/inc/tbb/internal/_concurrent_unordered_impl.h
--- a/Research/inc/tbb/internal/_flow_graph_impl.h
+++ b/Research/inc/tbb/internal/_flow_graph_impl.h
@@ -0,0 +1,757 @@
+/*
+    Copyright 2005-2015 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks. Threading Building Blocks is free software;
+    you can redistribute it and/or modify it under the terms of the GNU General Public License
+    version 2  as  published  by  the  Free Software Foundation.  Threading Building Blocks is
+    distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
+    implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+    See  the GNU General Public License for more details.   You should have received a copy of
+    the  GNU General Public License along with Threading Building Blocks; if not, write to the
+    Free Software Foundation, Inc.,  51 Franklin St,  Fifth Floor,  Boston,  MA 02110-1301 USA
+
+    As a special exception,  you may use this file  as part of a free software library without
+    restriction.  Specifically,  if other files instantiate templates  or use macros or inline
+    functions from this file, or you compile this file and link it with other files to produce
+    an executable,  this file does not by itself cause the resulting executable to be covered
+    by the GNU General Public License. This exception does not however invalidate any other
+    reasons why the executable file might be covered by the GNU General Public License.
+*/
+
+#ifndef __TBB__flow_graph_impl_H
+#define __TBB__flow_graph_impl_H
+
+#ifndef __TBB_flow_graph_H
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+namespace internal {
+
+    namespace graph_policy_namespace {
+        enum graph_buffer_policy { rejecting, reserving, queueing, tag_matching };
+    }
+
+// -------------- function_body containers ----------------------
+
+    //! A functor that takes no input and generates a value of type Output
+    template< typename Output >
+    class source_body : tbb::internal::no_assign {
+    public:
+        virtual ~source_body() {}
+        virtual bool operator()(Output &output) = 0;
+        virtual source_body* clone() = 0;
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        virtual void reset_body() = 0;
+#endif
+    };
+
+    //! The leaf for source_body
+    template< typename Output, typename Body>
+    class source_body_leaf : public source_body<Output> {
+    public:
+        source_body_leaf( const Body &_body ) : body(_body), init_body(_body) { }
+        /*override*/ bool operator()(Output &output) { return body( output ); }
+        /*override*/ source_body_leaf* clone() {
+            return new source_body_leaf< Output, Body >(init_body);
+        }
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        /*override*/ void reset_body() {
+            body = init_body;
+        }
+#endif
+        Body get_body() { return body; }
+    private:
+        Body body;
+        Body init_body;
+    };
+
+    //! A functor that takes an Input and generates an Output
+    template< typename Input, typename Output >
+    class function_body : tbb::internal::no_assign {
+    public:
+        virtual ~function_body() {}
+        virtual Output operator()(const Input &input) = 0;
+        virtual function_body* clone() = 0;
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        virtual void reset_body() = 0;
+#endif
+    };
+
+    //! the leaf for function_body
+    template <typename Input, typename Output, typename B>
+    class function_body_leaf : public function_body< Input, Output > {
+    public:
+        function_body_leaf( const B &_body ) : body(_body), init_body(_body) { }
+        Output operator()(const Input &i) { return body(i); }
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        /*override*/ void reset_body() {
+            body = init_body;
+        }
+#endif
+        B get_body() { return body; }
+        /*override*/ function_body_leaf* clone() {
+            return new function_body_leaf< Input, Output, B >(init_body);
+        }
+    private:
+        B body;
+        B init_body;
+    };
+
+    //! the leaf for function_body specialized for Input and output of continue_msg
+    template <typename B>
+    class function_body_leaf< continue_msg, continue_msg, B> : public function_body< continue_msg, continue_msg > {
+    public:
+        function_body_leaf( const B &_body ) : body(_body), init_body(_body) { }
+        continue_msg operator()( const continue_msg &i ) {
+            body(i);
+            return i;
+        }
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        /*override*/ void reset_body() {
+            body = init_body;
+        }
+#endif
+        B get_body() { return body; }
+        /*override*/ function_body_leaf* clone() {
+           return new function_body_leaf< continue_msg, continue_msg, B >(init_body);
+        }
+    private:
+        B body;
+        B init_body;
+    };
+
+    //! the leaf for function_body specialized for Output of continue_msg
+    template <typename Input, typename B>
+    class function_body_leaf< Input, continue_msg, B> : public function_body< Input, continue_msg > {
+    public:
+        function_body_leaf( const B &_body ) : body(_body), init_body(_body) { }
+        continue_msg operator()(const Input &i) {
+            body(i);
+            return continue_msg();
+        }
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        /*override*/ void reset_body() {
+            body = init_body;
+        }
+#endif
+        B get_body() { return body; }
+        /*override*/ function_body_leaf* clone() {
+            return new function_body_leaf< Input, continue_msg, B >(init_body);
+        }
+    private:
+        B body;
+        B init_body;
+    };
+
+    //! the leaf for function_body specialized for Input of continue_msg
+    template <typename Output, typename B>
+    class function_body_leaf< continue_msg, Output, B > : public function_body< continue_msg, Output > {
+    public:
+        function_body_leaf( const B &_body ) : body(_body), init_body(_body) { }
+        Output operator()(const continue_msg &i) {
+            return body(i);
+        }
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        /*override*/ void reset_body() {
+            body = init_body;
+        }
+#endif
+        B get_body() { return body; }
+        /*override*/ function_body_leaf* clone() {
+            return new function_body_leaf< continue_msg, Output, B >(init_body);
+        }
+    private:
+        B body;
+        B init_body;
+    };
+
+    //! function_body that takes an Input and a set of output ports
+    template<typename Input, typename OutputSet>
+    class multifunction_body : tbb::internal::no_assign {
+    public:
+        virtual ~multifunction_body () {}
+        virtual void operator()(const Input &/* input*/, OutputSet &/*oset*/) = 0;
+        virtual multifunction_body* clone() = 0;
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        virtual void reset_body() = 0;
+#endif
+    };
+
+    //! leaf for multifunction.  OutputSet can be a std::tuple or a vector.
+    template<typename Input, typename OutputSet, typename B>
+    class multifunction_body_leaf : public multifunction_body<Input, OutputSet> {
+    public:
+        multifunction_body_leaf(const B &_body) : body(_body), init_body(_body) { }
+        void operator()(const Input &input, OutputSet &oset) {
+            body(input, oset); // body may explicitly put() to one or more of oset.
+        }
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        /*override*/ void reset_body() {
+            body = init_body;
+        }
+#endif
+        B get_body() { return body; }
+        /*override*/ multifunction_body_leaf* clone() {
+            return new multifunction_body_leaf<Input, OutputSet,B>(init_body);
+        }
+    private:
+        B body;
+        B init_body;
+    };
+
+// --------------------------- end of function_body containers ------------------------
+
+// --------------------------- node task bodies ---------------------------------------
+
+    //! A task that calls a node's forward_task function
+    template< typename NodeType >
+    class forward_task_bypass : public task {
+
+        NodeType &my_node;
+
+    public:
+
+        forward_task_bypass( NodeType &n ) : my_node(n) {}
+
+        task *execute() {
+            task * new_task = my_node.forward_task();
+            if (new_task == SUCCESSFULLY_ENQUEUED) new_task = NULL;
+            return new_task;
+        }
+    };
+
+    //! A task that calls a node's apply_body_bypass function, passing in an input of type Input
+    //  return the task* unless it is SUCCESSFULLY_ENQUEUED, in which case return NULL
+    template< typename NodeType, typename Input >
+    class apply_body_task_bypass : public task {
+
+        NodeType &my_node;
+        Input my_input;
+
+    public:
+
+        apply_body_task_bypass( NodeType &n, const Input &i ) : my_node(n), my_input(i) {}
+
+        task *execute() {
+            task * next_task = my_node.apply_body_bypass( my_input );
+            if(next_task == SUCCESSFULLY_ENQUEUED) next_task = NULL;
+            return next_task;
+        }
+    };
+
+    //! A task that calls a node's apply_body function with no input
+    template< typename NodeType >
+    class source_task_bypass : public task {
+
+        NodeType &my_node;
+
+    public:
+
+        source_task_bypass( NodeType &n ) : my_node(n) {}
+
+        task *execute() {
+            task *new_task = my_node.apply_body_bypass( );
+            if(new_task == SUCCESSFULLY_ENQUEUED) return NULL;
+            return new_task;
+        }
+    };
+
+// ------------------------ end of node task bodies -----------------------------------
+
+    //! An empty functor that takes an Input and returns a default constructed Output
+    template< typename Input, typename Output >
+    struct empty_body {
+       Output operator()( const Input & ) const { return Output(); }
+    };
+
+    //! A node_cache maintains a std::queue of elements of type T.  Each operation is protected by a lock.
+    template< typename T, typename M=spin_mutex >
+    class node_cache {
+        public:
+
+        typedef size_t size_type;
+
+        bool empty() {
+            typename my_mutex_type::scoped_lock lock( my_mutex );
+            return internal_empty();
+        }
+
+        void add( T &n ) {
+            typename my_mutex_type::scoped_lock lock( my_mutex );
+            internal_push(n);
+        }
+
+        void remove( T &n ) {
+            typename my_mutex_type::scoped_lock lock( my_mutex );
+            for ( size_t i = internal_size(); i != 0; --i ) {
+                T &s = internal_pop();
+                if ( &s == &n )  return;  // only remove one predecessor per request
+                internal_push(s);
+            }
+        }
+
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        typedef std::vector<T *> predecessor_vector_type;
+        void internal_add_built_predecessor( T &n ) {
+            typename my_mutex_type::scoped_lock lock( my_mutex );
+            my_built_predecessors.add_edge(n);
+        }
+
+        void internal_delete_built_predecessor( T &n ) {
+            typename my_mutex_type::scoped_lock lock( my_mutex );
+            my_built_predecessors.delete_edge(n);
+        }
+
+        void copy_predecessors( predecessor_vector_type &v) {
+            typename my_mutex_type::scoped_lock lock( my_mutex );
+            my_built_predecessors.copy_edges(v);
+        }
+
+        size_t predecessor_count() {
+            typename my_mutex_type::scoped_lock lock(my_mutex);
+            return (size_t)(my_built_predecessors.edge_count());
+        }
+#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */ 
+
+    protected:
+
+        typedef M my_mutex_type;
+        my_mutex_type my_mutex;
+        std::queue< T * > my_q;
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        edge_container<T> my_built_predecessors;
+#endif
+
+        // Assumes lock is held
+        inline bool internal_empty( )  {
+            return my_q.empty();
+        }
+
+        // Assumes lock is held
+        inline size_type internal_size( )  {
+            return my_q.size();
+        }
+
+        // Assumes lock is held
+        inline void internal_push( T &n )  {
+            my_q.push(&n);
+        }
+
+        // Assumes lock is held
+        inline T &internal_pop() {
+            T *v = my_q.front();
+            my_q.pop();
+            return *v;
+        }
+
+    };
+
+    //! A cache of predecessors that only supports try_get
+    template< typename T, typename M=spin_mutex >
+    class predecessor_cache : public node_cache< sender<T>, M > {
+    public:
+        typedef M my_mutex_type;
+        typedef T output_type;
+        typedef sender<output_type> predecessor_type;
+        typedef receiver<output_type> successor_type;
+
+        predecessor_cache( ) : my_owner( NULL ) { }
+
+        void set_owner( successor_type *owner ) { my_owner = owner; }
+
+        bool get_item( output_type &v ) {
+
+            bool msg = false;
+
+            do {
+                predecessor_type *src;
+                {
+                    typename my_mutex_type::scoped_lock lock(this->my_mutex);
+                    if ( this->internal_empty() ) {
+                        break;
+                    }
+                    src = &this->internal_pop();
+                }
+
+                // Try to get from this sender
+                msg = src->try_get( v );
+
+                if (msg == false) {
+                    // Relinquish ownership of the edge
+                    if ( my_owner)
+                        src->register_successor( *my_owner );
+                } else {
+                    // Retain ownership of the edge
+                    this->add(*src);
+                }
+            } while ( msg == false );
+            return msg;
+        }
+
+        void reset( __TBB_PFG_RESET_ARG(reset_flags f)) {
+            if(my_owner) {
+                for(;;) {
+                    predecessor_type *src;
+                    {
+                        if(this->internal_empty()) break;
+                        src = &this->internal_pop();
+                    }
+                        src->register_successor( *my_owner);
+                }
+            }
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+            if (f&rf_extract && my_owner) 
+                my_built_predecessors.receiver_extract(*my_owner);
+            __TBB_ASSERT(!(f&rf_extract) || this->internal_empty(), "predecessor cache not empty");
+#endif
+        }
+
+    protected:
+
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        using node_cache< sender<T>, M >::my_built_predecessors;
+#endif
+        successor_type *my_owner;
+    };
+
+    //! An cache of predecessors that supports requests and reservations
+    template< typename T, typename M=spin_mutex >
+    class reservable_predecessor_cache : public predecessor_cache< T, M > {
+    public:
+        typedef M my_mutex_type;
+        typedef T output_type;
+        typedef sender<T> predecessor_type;
+        typedef receiver<T> successor_type;
+
+        reservable_predecessor_cache( ) : reserved_src(NULL) { }
+
+        bool
+        try_reserve( output_type &v ) {
+            bool msg = false;
+
+            do {
+                {
+                    typename my_mutex_type::scoped_lock lock(this->my_mutex);
+                    if ( reserved_src || this->internal_empty() )
+                        return false;
+
+                    reserved_src = &this->internal_pop();
+                }
+
+                // Try to get from this sender
+                msg = reserved_src->try_reserve( v );
+
+                if (msg == false) {
+                    typename my_mutex_type::scoped_lock lock(this->my_mutex);
+                    // Relinquish ownership of the edge
+                    reserved_src->register_successor( *this->my_owner );
+                    reserved_src = NULL;
+                } else {
+                    // Retain ownership of the edge
+                    this->add( *reserved_src );
+                }
+            } while ( msg == false );
+
+            return msg;
+        }
+
+        bool
+        try_release( ) {
+            reserved_src->try_release( );
+            reserved_src = NULL;
+            return true;
+        }
+
+        bool
+        try_consume( ) {
+            reserved_src->try_consume( );
+            reserved_src = NULL;
+            return true;
+        }
+
+        void reset( __TBB_PFG_RESET_ARG(reset_flags f)) {
+            reserved_src = NULL;
+            predecessor_cache<T,M>::reset(__TBB_PFG_RESET_ARG(f));
+        }
+
+    private:
+        predecessor_type *reserved_src;
+    };
+
+
+    //! An abstract cache of successors
+    template<typename T, typename M=spin_rw_mutex >
+    class successor_cache : tbb::internal::no_copy {
+    protected:
+
+        typedef M my_mutex_type;
+        my_mutex_type my_mutex;
+
+        typedef receiver<T> *pointer_type;
+        typedef std::list< pointer_type > my_successors_type;
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        edge_container<receiver<T> > my_built_successors;
+#endif
+        my_successors_type my_successors;
+
+        sender<T> *my_owner;
+
+    public:
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        typedef std::vector<pointer_type> successor_vector_type;
+        void internal_add_built_successor( receiver<T> &r) {
+            typename my_mutex_type::scoped_lock l(my_mutex, true);
+            my_built_successors.add_edge( r );
+        }
+
+        void internal_delete_built_successor( receiver<T> &r) {
+            typename my_mutex_type::scoped_lock l(my_mutex, true);
+            my_built_successors.delete_edge(r);
+        }
+
+        void copy_successors( successor_vector_type &v) {
+            typename my_mutex_type::scoped_lock l(my_mutex, false);
+            my_built_successors.copy_edges(v);
+        }
+
+        size_t successor_count() {
+            typename my_mutex_type::scoped_lock l(my_mutex,false);
+            return my_built_successors.edge_count();
+        }
+
+        void reset( __TBB_PFG_RESET_ARG(reset_flags f)) {
+            if (f&rf_extract && my_owner) 
+                my_built_successors.sender_extract(*my_owner);
+        }
+#endif /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
+
+        successor_cache( ) : my_owner(NULL) {}
+
+        void set_owner( sender<T> *owner ) { my_owner = owner; }
+
+        virtual ~successor_cache() {}
+
+        void register_successor( receiver<T> &r ) {
+            typename my_mutex_type::scoped_lock l(my_mutex, true);
+            my_successors.push_back( &r );
+        }
+
+        void remove_successor( receiver<T> &r ) {
+            typename my_mutex_type::scoped_lock l(my_mutex, true);
+            for ( typename my_successors_type::iterator i = my_successors.begin();
+                  i != my_successors.end(); ++i ) {
+                if ( *i == & r ) {
+                    my_successors.erase(i);
+                    break;
+                }
+            }
+        }
+
+        bool empty() {
+            typename my_mutex_type::scoped_lock l(my_mutex, false);
+            return my_successors.empty();
+        }
+
+        void clear() {
+            my_successors.clear();
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+            my_built_successors.clear();
+#endif
+        }
+
+        virtual task * try_put_task( const T &t ) = 0;
+     };
+
+    //! An abstract cache of successors, specialized to continue_msg
+    template<>
+    class successor_cache< continue_msg > : tbb::internal::no_copy {
+    protected:
+
+        typedef spin_rw_mutex my_mutex_type;
+        my_mutex_type my_mutex;
+
+        typedef receiver<continue_msg> *pointer_type;
+        typedef std::list< pointer_type > my_successors_type;
+        my_successors_type my_successors;
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        edge_container<receiver<continue_msg> > my_built_successors;
+#endif
+
+        sender<continue_msg> *my_owner;
+
+    public:
+
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        typedef std::vector<pointer_type> successor_vector_type;
+        void internal_add_built_successor( receiver<continue_msg> &r) {
+            my_mutex_type::scoped_lock l(my_mutex, true);
+            my_built_successors.add_edge( r );
+        }
+
+        void internal_delete_built_successor( receiver<continue_msg> &r) {
+            my_mutex_type::scoped_lock l(my_mutex, true);
+            my_built_successors.delete_edge(r);
+        }
+
+        void copy_successors( successor_vector_type &v) {
+            my_mutex_type::scoped_lock l(my_mutex, false);
+            my_built_successors.copy_edges(v);
+        }
+
+        size_t successor_count() {
+            my_mutex_type::scoped_lock l(my_mutex,false);
+            return my_built_successors.edge_count();
+        }
+
+        void reset( __TBB_PFG_RESET_ARG(reset_flags f)) {
+            if (f&rf_extract && my_owner) 
+                my_built_successors.sender_extract(*my_owner);
+        }
+#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
+
+        successor_cache( ) : my_owner(NULL) {}
+
+        void set_owner( sender<continue_msg> *owner ) { my_owner = owner; }
+
+        virtual ~successor_cache() {}
+
+        void register_successor( receiver<continue_msg> &r ) {
+            my_mutex_type::scoped_lock l(my_mutex, true);
+            my_successors.push_back( &r );
+            if ( my_owner && r.is_continue_receiver() ) {
+                r.register_predecessor( *my_owner );
+            }
+        }
+
+        void remove_successor( receiver<continue_msg> &r ) {
+            my_mutex_type::scoped_lock l(my_mutex, true);
+            for ( my_successors_type::iterator i = my_successors.begin();
+                  i != my_successors.end(); ++i ) {
+                if ( *i == & r ) {
+                    // TODO: Check if we need to test for continue_receiver before
+                    // removing from r.
+                    if ( my_owner )
+                        r.remove_predecessor( *my_owner );
+                    my_successors.erase(i);
+                    break;
+                }
+            }
+        }
+
+        bool empty() {
+            my_mutex_type::scoped_lock l(my_mutex, false);
+            return my_successors.empty();
+        }
+
+        void clear() {
+            my_successors.clear();
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+            my_built_successors.clear();
+#endif
+        }
+
+        virtual task * try_put_task( const continue_msg &t ) = 0;
+
+     };
+
+    //! A cache of successors that are broadcast to
+    template<typename T, typename M=spin_rw_mutex>
+    class broadcast_cache : public successor_cache<T, M> {
+        typedef M my_mutex_type;
+        typedef std::list< receiver<T> * > my_successors_type;
+
+    public:
+
+        broadcast_cache( ) {}
+
+        // as above, but call try_put_task instead, and return the last task we received (if any)
+        /*override*/ task * try_put_task( const T &t ) {
+            task * last_task = NULL;
+            bool upgraded = true;
+            typename my_mutex_type::scoped_lock l(this->my_mutex, upgraded);
+            typename my_successors_type::iterator i = this->my_successors.begin();
+            while ( i != this->my_successors.end() ) {
+                task *new_task = (*i)->try_put_task(t);
+                last_task = combine_tasks(last_task, new_task);  // enqueue if necessary
+                if(new_task) {
+                    ++i;
+                }
+                else {  // failed
+                    if ( (*i)->register_predecessor(*this->my_owner) ) {
+                        if (!upgraded) {
+                            l.upgrade_to_writer();
+                            upgraded = true;
+                        }
+                        i = this->my_successors.erase(i);
+                    } else {
+                        ++i;
+                    }
+                }
+            }
+            return last_task;
+        }
+
+    };
+
+    //! A cache of successors that are put in a round-robin fashion
+    template<typename T, typename M=spin_rw_mutex >
+    class round_robin_cache : public successor_cache<T, M> {
+        typedef size_t size_type;
+        typedef M my_mutex_type;
+        typedef std::list< receiver<T> * > my_successors_type;
+
+    public:
+
+        round_robin_cache( ) {}
+
+        size_type size() {
+            typename my_mutex_type::scoped_lock l(this->my_mutex, false);
+            return this->my_successors.size();
+        }
+
+        /*override*/task *try_put_task( const T &t ) {
+            bool upgraded = true;
+            typename my_mutex_type::scoped_lock l(this->my_mutex, upgraded);
+            typename my_successors_type::iterator i = this->my_successors.begin();
+            while ( i != this->my_successors.end() ) {
+                task *new_task = (*i)->try_put_task(t);
+                if ( new_task ) {
+                    return new_task;
+                } else {
+                   if ( (*i)->register_predecessor(*this->my_owner) ) {
+                       if (!upgraded) {
+                           l.upgrade_to_writer();
+                           upgraded = true;
+                       }
+                       i = this->my_successors.erase(i);
+                   }
+                   else {
+                       ++i;
+                   }
+                }
+            }
+            return NULL;
+        }
+    };
+
+    template<typename T>
+    class decrementer : public continue_receiver, tbb::internal::no_copy {
+
+        T *my_node;
+
+        task *execute() {
+            return my_node->decrement_counter();
+        }
+
+    public:
+
+        typedef continue_msg input_type;
+        typedef continue_msg output_type;
+        decrementer( int number_of_predecessors = 0 ) : continue_receiver( number_of_predecessors ) { }
+        void set_owner( T *node ) { my_node = node; }
+    };
+
+}
+
+#endif // __TBB__flow_graph_impl_H
+
--- a/Research/inc/tbb/internal/_flow_graph_indexer_impl.h
+++ b/Research/inc/tbb/internal/_flow_graph_indexer_impl.h
@@ -0,0 +1,453 @@
+/*
+    Copyright 2005-2015 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks. Threading Building Blocks is free software;
+    you can redistribute it and/or modify it under the terms of the GNU General Public License
+    version 2  as  published  by  the  Free Software Foundation.  Threading Building Blocks is
+    distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
+    implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+    See  the GNU General Public License for more details.   You should have received a copy of
+    the  GNU General Public License along with Threading Building Blocks; if not, write to the
+    Free Software Foundation, Inc.,  51 Franklin St,  Fifth Floor,  Boston,  MA 02110-1301 USA
+
+    As a special exception,  you may use this file  as part of a free software library without
+    restriction.  Specifically,  if other files instantiate templates  or use macros or inline
+    functions from this file, or you compile this file and link it with other files to produce
+    an executable,  this file does not by itself cause the resulting executable to be covered
+    by the GNU General Public License. This exception does not however invalidate any other
+    reasons why the executable file might be covered by the GNU General Public License.
+*/
+
+#ifndef __TBB__flow_graph_indexer_impl_H
+#define __TBB__flow_graph_indexer_impl_H
+
+#ifndef __TBB_flow_graph_H
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#include "tbb/internal/_flow_graph_types_impl.h"
+
+namespace internal {
+
+    // Output of the indexer_node is a tbb::flow::tagged_msg, and will be of
+    // the form  tagged_msg<tag, result>
+    // where the value of tag will indicate which result was put to the
+    // successor.  
+    
+    template<typename IndexerNodeBaseType, typename T, size_t K>
+    task* do_try_put(const T &v, void *p) {
+        typename IndexerNodeBaseType::output_type o(K, v);
+        return reinterpret_cast<IndexerNodeBaseType *>(p)->try_put_task(&o);
+    }
+
+    template<typename TupleTypes,int N>
+    struct indexer_helper {
+        template<typename IndexerNodeBaseType, typename PortTuple>
+        static inline void set_indexer_node_pointer(PortTuple &my_input, IndexerNodeBaseType *p) {
+            typedef typename tuple_element<N-1, TupleTypes>::type T;
+            task *(*indexer_node_put_task)(const T&, void *) = do_try_put<IndexerNodeBaseType, T, N-1>;
+            tbb::flow::get<N-1>(my_input).set_up(p, indexer_node_put_task);
+            indexer_helper<TupleTypes,N-1>::template set_indexer_node_pointer<IndexerNodeBaseType,PortTuple>(my_input, p);
+        }
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        template<typename InputTuple>
+        static inline void reset_inputs(InputTuple &my_input, reset_flags f) {
+            join_helper<N-1>::reset_inputs(my_input, f);
+            tbb::flow::get<N-1>(my_input).reset_receiver(f);
+        }
+#endif
+    };
+
+    template<typename TupleTypes>
+    struct indexer_helper<TupleTypes,1> {
+        template<typename IndexerNodeBaseType, typename PortTuple>
+        static inline void set_indexer_node_pointer(PortTuple &my_input, IndexerNodeBaseType *p) {
+            typedef typename tuple_element<0, TupleTypes>::type T;
+            task *(*indexer_node_put_task)(const T&, void *) = do_try_put<IndexerNodeBaseType, T, 0>;
+            tbb::flow::get<0>(my_input).set_up(p, indexer_node_put_task);
+        }
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        template<typename InputTuple>
+        static inline void reset_inputs(InputTuple &my_input, reset_flags f) {
+            tbb::flow::get<0>(my_input).reset_receiver(f);
+        }
+#endif
+    };
+
+    template<typename T>
+    class indexer_input_port : public receiver<T> {
+    private:
+        void* my_indexer_ptr;
+        typedef task* (* forward_function_ptr)(T const &, void* );
+        forward_function_ptr my_try_put_task;
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        spin_mutex my_pred_mutex;
+        edge_container<sender<T> > my_built_predecessors;
+#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
+    public:
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        indexer_input_port() : my_pred_mutex() {}
+        indexer_input_port( const indexer_input_port & /*other*/ ) : receiver<T>(), my_pred_mutex() {
+        }
+#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
+        void set_up(void *p, forward_function_ptr f) {
+                my_indexer_ptr = p;
+                my_try_put_task = f;
+            }
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        typedef std::vector<sender<T> *> predecessor_vector_type;
+        /*override*/size_t predecessor_count() {
+            spin_mutex::scoped_lock l(my_pred_mutex);
+            return my_built_predecessors.edge_count();
+        }
+        /*override*/void internal_add_built_predecessor(sender<T> &p) {
+            spin_mutex::scoped_lock l(my_pred_mutex);
+            my_built_predecessors.add_edge(p);
+        }
+        /*override*/void internal_delete_built_predecessor(sender<T> &p) {
+            spin_mutex::scoped_lock l(my_pred_mutex);
+            my_built_predecessors.delete_edge(p);
+        }
+        /*override*/void copy_predecessors( predecessor_vector_type &v) {
+            spin_mutex::scoped_lock l(my_pred_mutex);
+            return my_built_predecessors.copy_edges(v);
+        }
+#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
+    protected:
+        template< typename R, typename B > friend class run_and_put_task;
+        template<typename X, typename Y> friend class internal::broadcast_cache;
+        template<typename X, typename Y> friend class internal::round_robin_cache;
+        task *try_put_task(const T &v) {
+            return my_try_put_task(v, my_indexer_ptr);
+        }
+
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+    public:
+        /*override*/void reset_receiver(__TBB_PFG_RESET_ARG(reset_flags f)) {
+            if(f&rf_extract) my_built_predecessors.receiver_extract(*this);
+        }
+#else
+        /*override*/void reset_receiver(__TBB_PFG_RESET_ARG(reset_flags /*f*/)) { }
+#endif
+
+    };
+
+    template<typename InputTuple, typename OutputType, typename StructTypes>
+    class indexer_node_FE {
+    public:
+        static const int N = tbb::flow::tuple_size<InputTuple>::value;
+        typedef OutputType output_type;
+        typedef InputTuple input_type;
+
+        input_type &input_ports() { return my_inputs; }
+    protected:
+        input_type my_inputs;
+    };
+
+    //! indexer_node_base
+    template<typename InputTuple, typename OutputType, typename StructTypes>
+    class indexer_node_base : public graph_node, public indexer_node_FE<InputTuple, OutputType,StructTypes>,
+                           public sender<OutputType> {
+    protected:
+       using graph_node::my_graph;
+    public:
+        static const size_t N = tbb::flow::tuple_size<InputTuple>::value;
+        typedef OutputType output_type;
+        typedef StructTypes tuple_types;
+        typedef receiver<output_type> successor_type;
+        typedef indexer_node_FE<InputTuple, output_type,StructTypes> input_ports_type;
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        typedef std::vector<successor_type *> successor_vector_type;
+#endif
+
+    private:
+        // ----------- Aggregator ------------
+        enum op_type { reg_succ, rem_succ, try__put_task
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+            , add_blt_succ, del_blt_succ,
+             blt_succ_cnt, blt_succ_cpy
+#endif
+        };
+        enum op_stat {WAIT=0, SUCCEEDED, FAILED};
+        typedef indexer_node_base<InputTuple,output_type,StructTypes> my_class;
+
+        class indexer_node_base_operation : public aggregated_operation<indexer_node_base_operation> {
+        public:
+            char type;
+            union {
+                output_type const *my_arg;
+                successor_type *my_succ;
+                task *bypass_t;
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+                size_t cnt_val;
+                successor_vector_type *succv;
+#endif
+            };
+            indexer_node_base_operation(const output_type* e, op_type t) :
+                type(char(t)), my_arg(e) {}
+            indexer_node_base_operation(const successor_type &s, op_type t) : type(char(t)), 
+                my_succ(const_cast<successor_type *>(&s)) {}
+            indexer_node_base_operation(op_type t) : type(char(t)) {}
+        };
+
+        typedef internal::aggregating_functor<my_class, indexer_node_base_operation> my_handler;
+        friend class internal::aggregating_functor<my_class, indexer_node_base_operation>;
+        aggregator<my_handler, indexer_node_base_operation> my_aggregator;
+
+        void handle_operations(indexer_node_base_operation* op_list) {
+            indexer_node_base_operation *current;
+            while(op_list) {
+                current = op_list;
+                op_list = op_list->next;
+                switch(current->type) {
+
+                case reg_succ:
+                    my_successors.register_successor(*(current->my_succ));
+                    __TBB_store_with_release(current->status, SUCCEEDED);
+                    break;
+
+                case rem_succ:
+                    my_successors.remove_successor(*(current->my_succ));
+                    __TBB_store_with_release(current->status, SUCCEEDED);
+                    break;
+                case try__put_task: {
+                        current->bypass_t = my_successors.try_put_task(*(current->my_arg));
+                        __TBB_store_with_release(current->status, SUCCEEDED);  // return of try_put_task actual return value
+                    }
+                    break;
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+                case add_blt_succ:
+                    my_successors.internal_add_built_successor(*(current->my_succ));
+                    __TBB_store_with_release(current->status, SUCCEEDED);
+                    break;
+                case del_blt_succ:
+                    my_successors.internal_delete_built_successor(*(current->my_succ));
+                    __TBB_store_with_release(current->status, SUCCEEDED);
+                    break;
+                case blt_succ_cnt:
+                    current->cnt_val = my_successors.successor_count();
+                    __TBB_store_with_release(current->status, SUCCEEDED);
+                    break;
+                case blt_succ_cpy:
+                    my_successors.copy_successors(*(current->succv));
+                    __TBB_store_with_release(current->status, SUCCEEDED);
+                    break;
+#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
+                }
+            }
+        }
+        // ---------- end aggregator -----------
+    public:
+        indexer_node_base(graph& g) : graph_node(g), input_ports_type() {
+            indexer_helper<StructTypes,N>::set_indexer_node_pointer(this->my_inputs, this);
+            my_successors.set_owner(this);
+            my_aggregator.initialize_handler(my_handler(this));
+        }
+
+        indexer_node_base(const indexer_node_base& other) : graph_node(other.my_graph), input_ports_type(), sender<output_type>() {
+            indexer_helper<StructTypes,N>::set_indexer_node_pointer(this->my_inputs, this);
+            my_successors.set_owner(this);
+            my_aggregator.initialize_handler(my_handler(this));
+        }
+
+        bool register_successor(successor_type &r) {
+            indexer_node_base_operation op_data(r, reg_succ);
+            my_aggregator.execute(&op_data);
+            return op_data.status == SUCCEEDED;
+        }
+
+        bool remove_successor( successor_type &r) {
+            indexer_node_base_operation op_data(r, rem_succ);
+            my_aggregator.execute(&op_data);
+            return op_data.status == SUCCEEDED;
+        }
+
+        task * try_put_task(output_type const *v) {
+            indexer_node_base_operation op_data(v, try__put_task);
+            my_aggregator.execute(&op_data);
+            return op_data.bypass_t;
+        }
+
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        void internal_add_built_successor( successor_type &r) {
+            indexer_node_base_operation op_data(r, add_blt_succ);
+            my_aggregator.execute(&op_data);
+        }
+
+        void internal_delete_built_successor( successor_type &r) {
+            indexer_node_base_operation op_data(r, del_blt_succ);
+            my_aggregator.execute(&op_data);
+        }
+
+        size_t successor_count() {
+            indexer_node_base_operation op_data(blt_succ_cnt);
+            my_aggregator.execute(&op_data);
+            return op_data.cnt_val;
+        }
+
+        void copy_successors( successor_vector_type &v) {
+            indexer_node_base_operation op_data(blt_succ_cpy);
+            op_data.succv = &v;
+            my_aggregator.execute(&op_data);
+        } 
+#endif /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
+    protected:
+        /*override*/void reset(__TBB_PFG_RESET_ARG(reset_flags f)) {
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+            my_successors.reset(f);
+            indexer_helper<StructTypes,N>::reset_inputs(this->my_inputs, f);
+#endif
+        }
+
+    private:
+        broadcast_cache<output_type, null_rw_mutex> my_successors;
+    };  //indexer_node_base
+
+
+    template<int N, typename InputTuple> struct input_types;
+
+    template<typename InputTuple>
+    struct input_types<1, InputTuple> {
+        typedef typename tuple_element<0, InputTuple>::type first_type;
+        typedef typename internal::tagged_msg<size_t, first_type > type;
+    };
+
+    template<typename InputTuple>
+    struct input_types<2, InputTuple> {
+        typedef typename tuple_element<0, InputTuple>::type first_type;
+        typedef typename tuple_element<1, InputTuple>::type second_type;
+        typedef typename internal::tagged_msg<size_t, first_type, second_type> type;
+    };
+
+    template<typename InputTuple>
+    struct input_types<3, InputTuple> {
+        typedef typename tuple_element<0, InputTuple>::type first_type;
+        typedef typename tuple_element<1, InputTuple>::type second_type;
+        typedef typename tuple_element<2, InputTuple>::type third_type;
+        typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type> type;
+    };
+    
+    template<typename InputTuple>
+    struct input_types<4, InputTuple> {
+        typedef typename tuple_element<0, InputTuple>::type first_type;
+        typedef typename tuple_element<1, InputTuple>::type second_type;
+        typedef typename tuple_element<2, InputTuple>::type third_type;
+        typedef typename tuple_element<3, InputTuple>::type fourth_type;
+        typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
+                                                      fourth_type> type;
+    };
+    
+    template<typename InputTuple>
+    struct input_types<5, InputTuple> {
+        typedef typename tuple_element<0, InputTuple>::type first_type;
+        typedef typename tuple_element<1, InputTuple>::type second_type;
+        typedef typename tuple_element<2, InputTuple>::type third_type;
+        typedef typename tuple_element<3, InputTuple>::type fourth_type;
+        typedef typename tuple_element<4, InputTuple>::type fifth_type;
+        typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
+                                                      fourth_type, fifth_type> type;
+    };
+    
+    template<typename InputTuple>
+    struct input_types<6, InputTuple> {
+        typedef typename tuple_element<0, InputTuple>::type first_type;
+        typedef typename tuple_element<1, InputTuple>::type second_type;
+        typedef typename tuple_element<2, InputTuple>::type third_type;
+        typedef typename tuple_element<3, InputTuple>::type fourth_type;
+        typedef typename tuple_element<4, InputTuple>::type fifth_type;
+        typedef typename tuple_element<5, InputTuple>::type sixth_type;
+        typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
+                                                      fourth_type, fifth_type, sixth_type> type;
+    };
+    
+    template<typename InputTuple>
+    struct input_types<7, InputTuple> {
+        typedef typename tuple_element<0, InputTuple>::type first_type;
+        typedef typename tuple_element<1, InputTuple>::type second_type;
+        typedef typename tuple_element<2, InputTuple>::type third_type;
+        typedef typename tuple_element<3, InputTuple>::type fourth_type;
+        typedef typename tuple_element<4, InputTuple>::type fifth_type;
+        typedef typename tuple_element<5, InputTuple>::type sixth_type;
+        typedef typename tuple_element<6, InputTuple>::type seventh_type;
+        typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
+                                                      fourth_type, fifth_type, sixth_type,
+                                                      seventh_type> type;
+    };
+
+
+    template<typename InputTuple>
+    struct input_types<8, InputTuple> {
+        typedef typename tuple_element<0, InputTuple>::type first_type;
+        typedef typename tuple_element<1, InputTuple>::type second_type;
+        typedef typename tuple_element<2, InputTuple>::type third_type;
+        typedef typename tuple_element<3, InputTuple>::type fourth_type;
+        typedef typename tuple_element<4, InputTuple>::type fifth_type;
+        typedef typename tuple_element<5, InputTuple>::type sixth_type;
+        typedef typename tuple_element<6, InputTuple>::type seventh_type;
+        typedef typename tuple_element<7, InputTuple>::type eighth_type;
+        typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
+                                                      fourth_type, fifth_type, sixth_type,
+                                                      seventh_type, eighth_type> type;
+    };
+
+ 
+    template<typename InputTuple>
+    struct input_types<9, InputTuple> {
+        typedef typename tuple_element<0, InputTuple>::type first_type;
+        typedef typename tuple_element<1, InputTuple>::type second_type;
+        typedef typename tuple_element<2, InputTuple>::type third_type;
+        typedef typename tuple_element<3, InputTuple>::type fourth_type;
+        typedef typename tuple_element<4, InputTuple>::type fifth_type;
+        typedef typename tuple_element<5, InputTuple>::type sixth_type;
+        typedef typename tuple_element<6, InputTuple>::type seventh_type;
+        typedef typename tuple_element<7, InputTuple>::type eighth_type;
+        typedef typename tuple_element<8, InputTuple>::type nineth_type;
+        typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
+                                                      fourth_type, fifth_type, sixth_type,
+                                                      seventh_type, eighth_type, nineth_type> type;
+    };
+
+    template<typename InputTuple>
+    struct input_types<10, InputTuple> {
+        typedef typename tuple_element<0, InputTuple>::type first_type;
+        typedef typename tuple_element<1, InputTuple>::type second_type;
+        typedef typename tuple_element<2, InputTuple>::type third_type;
+        typedef typename tuple_element<3, InputTuple>::type fourth_type;
+        typedef typename tuple_element<4, InputTuple>::type fifth_type;
+        typedef typename tuple_element<5, InputTuple>::type sixth_type;
+        typedef typename tuple_element<6, InputTuple>::type seventh_type;
+        typedef typename tuple_element<7, InputTuple>::type eighth_type;
+        typedef typename tuple_element<8, InputTuple>::type nineth_type;
+        typedef typename tuple_element<9, InputTuple>::type tenth_type;
+        typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
+                                                      fourth_type, fifth_type, sixth_type,
+                                                      seventh_type, eighth_type, nineth_type,
+                                                      tenth_type> type;
+    };
+
+    // type generators
+    template<typename OutputTuple>
+    struct indexer_types : public input_types<tuple_size<OutputTuple>::value, OutputTuple> {
+        static const int N = tbb::flow::tuple_size<OutputTuple>::value;
+        typedef typename input_types<N, OutputTuple>::type output_type;
+        typedef typename wrap_tuple_elements<N,indexer_input_port,OutputTuple>::type input_ports_type;
+        typedef internal::indexer_node_FE<input_ports_type,output_type,OutputTuple> indexer_FE_type;
+        typedef internal::indexer_node_base<input_ports_type, output_type, OutputTuple> indexer_base_type;
+    };
+
+    template<class OutputTuple>
+    class unfolded_indexer_node : public indexer_types<OutputTuple>::indexer_base_type {
+    public:
+        typedef typename indexer_types<OutputTuple>::input_ports_type input_ports_type;
+        typedef OutputTuple tuple_types;
+        typedef typename indexer_types<OutputTuple>::output_type output_type;
+    private:
+        typedef typename indexer_types<OutputTuple>::indexer_base_type base_type;
+    public:
+        unfolded_indexer_node(graph& g) : base_type(g) {}
+        unfolded_indexer_node(const unfolded_indexer_node &other) : base_type(other) {}
+    };
+
+} /* namespace internal */
+
+#endif  /* __TBB__flow_graph_indexer_impl_H */
--- a/Research/inc/tbb/internal/_flow_graph_item_buffer_impl.h
+++ b/Research/inc/tbb/internal/_flow_graph_item_buffer_impl.h
@@ -0,0 +1,279 @@
+/*
+    Copyright 2005-2015 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks. Threading Building Blocks is free software;
+    you can redistribute it and/or modify it under the terms of the GNU General Public License
+    version 2  as  published  by  the  Free Software Foundation.  Threading Building Blocks is
+    distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
+    implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+    See  the GNU General Public License for more details.   You should have received a copy of
+    the  GNU General Public License along with Threading Building Blocks; if not, write to the
+    Free Software Foundation, Inc.,  51 Franklin St,  Fifth Floor,  Boston,  MA 02110-1301 USA
+
+    As a special exception,  you may use this file  as part of a free software library without
+    restriction.  Specifically,  if other files instantiate templates  or use macros or inline
+    functions from this file, or you compile this file and link it with other files to produce
+    an executable,  this file does not by itself cause the resulting executable to be covered
+    by the GNU General Public License. This exception does not however invalidate any other
+    reasons why the executable file might be covered by the GNU General Public License.
+*/
+
+#ifndef __TBB__flow_graph_item_buffer_impl_H
+#define __TBB__flow_graph_item_buffer_impl_H
+
+#ifndef __TBB_flow_graph_H
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#include "tbb/internal/_flow_graph_types_impl.h"  // for aligned_pair
+
+// in namespace tbb::flow::interface7 (included in _flow_graph_node_impl.h)
+
+    //! Expandable buffer of items.  The possible operations are push, pop,
+    //* tests for empty and so forth.  No mutual exclusion is built in.
+    //* objects are constructed into and explicitly-destroyed.  get_my_item gives
+    // a read-only reference to the item in the buffer.  set_my_item may be called
+    // with either an empty or occupied slot.
+
+    using internal::aligned_pair;
+    using internal::alignment_of;
+
+namespace internal {
+
+    template <typename T, typename A=cache_aligned_allocator<T> >
+    class item_buffer {
+    public:
+        typedef T item_type;
+        enum buffer_item_state { no_item=0, has_item=1, reserved_item=2 };
+    protected:
+        typedef size_t size_type;
+        typedef typename aligned_pair<item_type, buffer_item_state>::type buffer_item_type;
+        typedef typename A::template rebind<buffer_item_type>::other allocator_type;
+
+        buffer_item_type *my_array;
+        size_type my_array_size;
+        static const size_type initial_buffer_size = 4;
+        size_type my_head;
+        size_type my_tail;
+
+        bool buffer_empty() { return my_head == my_tail; }
+
+        buffer_item_type &item(size_type i) {
+            __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].second))%alignment_of<buffer_item_state>::value),NULL);
+            __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].first))%alignment_of<item_type>::value), NULL);
+            return my_array[i & (my_array_size - 1) ];
+        }
+
+        bool my_item_valid(size_type i) { return (i < my_tail) && (i >= my_head) && (item(i).second != no_item); }
+        bool my_item_reserved(size_type i) { return item(i).second == reserved_item; }
+
+        // object management in buffer
+        const item_type &get_my_item(size_t i) {
+            __TBB_ASSERT(my_item_valid(i),"attempt to get invalid item");
+            item_type *itm = (tbb::internal::punned_cast<item_type *>(&(item(i).first)));
+            return *(const item_type *)itm;
+        }
+
+        // may be called with an empty slot or a slot that has already been constructed into.
+        void set_my_item(size_t i, const item_type &o) { 
+            if(item(i).second != no_item) {
+                destroy_item(i);
+            }
+            new(&(item(i).first)) item_type(o);
+            item(i).second = has_item;
+        }
+
+        // destructively-fetch an object from the buffer
+        void fetch_item(size_t i, item_type &o) {
+            __TBB_ASSERT(my_item_valid(i), "Trying to fetch an empty slot");
+            o = get_my_item(i);  // could have std::move assign semantics
+            destroy_item(i);
+        }
+
+        // move an existing item from one slot to another.  The moved-to slot must be unoccupied,
+        // the moved-from slot must exist and not be reserved.  The after, from will be empty,
+        // to will be occupied but not reserved
+        void move_item(size_t to, size_t from) {
+            __TBB_ASSERT(!my_item_valid(to), "Trying to move to a non-empty slot");
+            __TBB_ASSERT(my_item_valid(from), "Trying to move from an empty slot");
+            set_my_item(to, get_my_item(from));   // could have std::move semantics
+            destroy_item(from);
+
+        }
+
+        // put an item in an empty slot.  Return true if successful, else false
+        bool place_item(size_t here, const item_type &me) {
+#if !TBB_DEPRECATED_SEQUENCER_DUPLICATES
+            if(my_item_valid(here)) return false;
+#endif
+            set_my_item(here, me);
+            return true;
+        }
+
+        // could be implemented with std::move semantics
+        void swap_items(size_t i, size_t j) {
+            __TBB_ASSERT(my_item_valid(i) && my_item_valid(j), "attempt to swap invalid item(s)");
+            item_type temp = get_my_item(i);
+            set_my_item(i, get_my_item(j));
+            set_my_item(j, temp);
+        }
+
+        void destroy_item(size_type i) {
+            __TBB_ASSERT(my_item_valid(i), "destruction of invalid item");
+            (tbb::internal::punned_cast<item_type *>(&(item(i).first)))->~item_type();
+            item(i).second = no_item;
+        }
+
+        // returns a copy of the front
+        void copy_front(item_type &v) {
+            __TBB_ASSERT(my_item_valid(my_head), "attempt to fetch head non-item");
+            v = get_my_item(my_head);
+        }
+        // returns a copy of the back
+        void copy_back(item_type &v) {
+            __TBB_ASSERT(my_item_valid(my_tail-1), "attempt to fetch head non-item");
+            v = get_my_item(my_tail-1);
+        }
+
+        // following methods are for reservation of the front of a bufffer. 
+        void reserve_item(size_type i) { __TBB_ASSERT(my_item_valid(i) && !my_item_reserved(i), "item cannot be reserved"); item(i).second = reserved_item; }
+        void release_item(size_type i) { __TBB_ASSERT(my_item_reserved(i), "item is not reserved"); item(i).second = has_item; }
+
+        void destroy_front() { destroy_item(my_head); ++my_head; }
+        void destroy_back() { destroy_item(my_tail-1); --my_tail; }
+
+        // we have to be able to test against a new tail value without changing my_tail
+        // grow_array doesn't work if we change my_tail when the old array is too small
+        size_type size(size_t new_tail = 0) { return (new_tail ? new_tail : my_tail) - my_head; }
+        size_type capacity() { return my_array_size; }
+        // sequencer_node does not use this method, so we don't 
+        // need a version that passes in the new_tail value.
+        bool buffer_full() { return size() >= capacity(); }
+
+        //! Grows the internal array.
+        void grow_my_array( size_t minimum_size ) {
+            // test that we haven't made the structure inconsistent.
+            __TBB_ASSERT(capacity() >= my_tail - my_head, "total items exceed capacity");
+            size_type new_size = my_array_size ? 2*my_array_size : initial_buffer_size;
+            while( new_size<minimum_size )
+                new_size*=2;
+
+            buffer_item_type* new_array = allocator_type().allocate(new_size);
+
+            // initialize validity to "no"
+            for( size_type i=0; i<new_size; ++i ) { new_array[i].second = no_item; }
+
+            for( size_type i=my_head; i<my_tail; ++i) {
+                if(my_item_valid(i)) {  // sequencer_node may have empty slots
+                    // placement-new copy-construct; could be std::move
+                    char *new_space = (char *)&(new_array[i&(new_size-1)].first);
+                    (void)new(new_space) item_type(get_my_item(i));
+                    new_array[i&(new_size-1)].second = item(i).second;
+                }
+            }
+
+            clean_up_buffer(/*reset_pointers*/false);
+
+            my_array = new_array;
+            my_array_size = new_size;
+        }
+
+        bool push_back(item_type &v) {
+            if(buffer_full()) {
+                grow_my_array(size() + 1);
+            }
+            set_my_item(my_tail, v);
+            ++my_tail;
+            return true;
+        }
+
+        bool pop_back(item_type &v) {
+            if (!my_item_valid(my_tail-1)) {
+                return false;
+            }
+            copy_back(v);
+            destroy_back();
+            return true;
+        }
+
+        bool pop_front(item_type &v) {
+            if(!my_item_valid(my_head)) {
+                return false;
+            }
+            copy_front(v);
+            destroy_front();
+            return true;
+        }
+
+        // This is used both for reset and for grow_my_array.  In the case of grow_my_array
+        // we want to retain the values of the head and tail.
+        void clean_up_buffer(bool reset_pointers) {
+            if (my_array) {
+                for( size_type i=my_head; i<my_tail; ++i ) {
+                    if(my_item_valid(i))
+                        destroy_item(i);
+                }
+                allocator_type().deallocate(my_array,my_array_size); 
+            }
+            my_array = NULL;
+            if(reset_pointers) {
+                my_head = my_tail = my_array_size = 0;
+            }
+        }
+
+    public:
+        //! Constructor
+        item_buffer( ) : my_array(NULL), my_array_size(0),
+            my_head(0), my_tail(0) {
+            grow_my_array(initial_buffer_size);
+        }
+
+        ~item_buffer() {
+            clean_up_buffer(/*reset_pointers*/true);
+        }
+
+        void reset() { clean_up_buffer(/*reset_pointers*/true); grow_my_array(initial_buffer_size); }
+
+    };
+
+    //! item_buffer with reservable front-end.  NOTE: if reserving, do not
+    //* complete operation with pop_front(); use consume_front().  
+    //* No synchronization built-in.
+    template<typename T, typename A=cache_aligned_allocator<T> >
+    class reservable_item_buffer : public item_buffer<T, A> {
+    protected:
+        using item_buffer<T, A>::my_item_valid;
+        using item_buffer<T, A>::my_head;
+
+    public:
+        reservable_item_buffer() : item_buffer<T, A>(), my_reserved(false) {}
+        void reset() {my_reserved = false; item_buffer<T,A>::reset(); }
+    protected:
+
+        bool reserve_front(T &v) {
+            if(my_reserved || !my_item_valid(my_head)) return false;
+            my_reserved = true;
+            // reserving the head
+            this->copy_front(v);
+            this->reserve_item(this->my_head);
+            return true;
+        }
+
+        void consume_front() {
+            __TBB_ASSERT(my_reserved, "Attempt to consume a non-reserved item");
+            this->destroy_front();
+            my_reserved = false;
+        }
+
+        void release_front() {
+            __TBB_ASSERT(my_reserved, "Attempt to release a non-reserved item");
+            this->release_item(this->my_head);
+            my_reserved = false;
+        }
+
+        bool my_reserved;
+    };
+
+}  // namespace internal
+
+#endif // __TBB__flow_graph_item_buffer_impl_H
--- a/Research/inc/tbb/internal/_flow_graph_join_impl.h
+++ b/Research/inc/tbb/internal/_flow_graph_join_impl.h
--- a/Research/inc/tbb/internal/_flow_graph_node_impl.h
+++ b/Research/inc/tbb/internal/_flow_graph_node_impl.h
@@ -0,0 +1,742 @@
+/*
+    Copyright 2005-2015 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks. Threading Building Blocks is free software;
+    you can redistribute it and/or modify it under the terms of the GNU General Public License
+    version 2  as  published  by  the  Free Software Foundation.  Threading Building Blocks is
+    distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
+    implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+    See  the GNU General Public License for more details.   You should have received a copy of
+    the  GNU General Public License along with Threading Building Blocks; if not, write to the
+    Free Software Foundation, Inc.,  51 Franklin St,  Fifth Floor,  Boston,  MA 02110-1301 USA
+
+    As a special exception,  you may use this file  as part of a free software library without
+    restriction.  Specifically,  if other files instantiate templates  or use macros or inline
+    functions from this file, or you compile this file and link it with other files to produce
+    an executable,  this file does not by itself cause the resulting executable to be covered
+    by the GNU General Public License. This exception does not however invalidate any other
+    reasons why the executable file might be covered by the GNU General Public License.
+*/
+
+#ifndef __TBB__flow_graph_node_impl_H
+#define __TBB__flow_graph_node_impl_H
+
+#ifndef __TBB_flow_graph_H
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#include "_flow_graph_item_buffer_impl.h"
+
+//! @cond INTERNAL
+namespace internal {
+
+    using tbb::internal::aggregated_operation;
+    using tbb::internal::aggregating_functor;
+    using tbb::internal::aggregator;
+
+     template< typename T, typename A >
+     class function_input_queue : public item_buffer<T,A> {
+     public:
+         bool pop( T& t ) {
+             return this->pop_front( t );
+         }
+
+         bool push( T& t ) {
+             return this->push_back( t );
+         }
+     };
+
+    //! Input and scheduling for a function node that takes a type Input as input
+    //  The only up-ref is apply_body_impl, which should implement the function 
+    //  call and any handling of the result.
+    template< typename Input, typename A, typename ImplType >
+    class function_input_base : public receiver<Input>, tbb::internal::no_assign {
+        enum op_stat {WAIT=0, SUCCEEDED, FAILED};
+        enum op_type {reg_pred, rem_pred, app_body, try_fwd, tryput_bypass, app_body_bypass
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+            , add_blt_pred, del_blt_pred,
+            blt_pred_cnt, blt_pred_cpy   // create vector copies of preds and succs
+#endif 
+        };
+        typedef function_input_base<Input, A, ImplType> my_class;
+        
+    public:
+
+        //! The input type of this receiver
+        typedef Input input_type;
+        typedef sender<Input> predecessor_type;
+
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        typedef std::vector<predecessor_type *> predecessor_vector_type;
+#endif
+
+        //! Constructor for function_input_base
+        function_input_base( graph &g, size_t max_concurrency, function_input_queue<input_type,A> *q = NULL )
+            : my_graph(g), my_max_concurrency(max_concurrency), my_concurrency(0),
+              my_queue(q), forwarder_busy(false) {
+            my_predecessors.set_owner(this);
+            my_aggregator.initialize_handler(my_handler(this));
+        }
+        
+        //! Copy constructor
+        function_input_base( const function_input_base& src, function_input_queue<input_type,A> *q = NULL ) :
+            receiver<Input>(), tbb::internal::no_assign(),
+            my_graph(src.my_graph), my_max_concurrency(src.my_max_concurrency),
+            my_concurrency(0), my_queue(q), forwarder_busy(false)
+        {
+            my_predecessors.set_owner(this);
+            my_aggregator.initialize_handler(my_handler(this));
+        }
+
+        //! Destructor
+        virtual ~function_input_base() { 
+            if ( my_queue ) delete my_queue;
+        }
+        
+        //! Put to the node, returning a task if available
+        virtual task * try_put_task( const input_type &t ) {
+           if ( my_max_concurrency == 0 ) {
+               return create_body_task( t );
+           } else {
+               my_operation op_data(t, tryput_bypass);
+               my_aggregator.execute(&op_data);
+               if(op_data.status == SUCCEEDED ) {
+                   return op_data.bypass_t;
+               }
+               return NULL;
+           }
+        }
+
+        //! Adds src to the list of cached predecessors.
+        /* override */ bool register_predecessor( predecessor_type &src ) {
+            my_operation op_data(reg_pred);
+            op_data.r = &src;
+            my_aggregator.execute(&op_data);
+            return true;
+        }
+        
+        //! Removes src from the list of cached predecessors.
+        /* override */ bool remove_predecessor( predecessor_type &src ) {
+            my_operation op_data(rem_pred);
+            op_data.r = &src;
+            my_aggregator.execute(&op_data);
+            return true;
+        }
+
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        //! Adds to list of predecessors added by make_edge
+        /*override*/ void internal_add_built_predecessor( predecessor_type &src) {
+            my_operation op_data(add_blt_pred);
+            op_data.r = &src;
+            my_aggregator.execute(&op_data);
+        }
+
+        //! removes from to list of predecessors (used by remove_edge)
+        /*override*/ void internal_delete_built_predecessor( predecessor_type &src) {
+            my_operation op_data(del_blt_pred);
+            op_data.r = &src;
+            my_aggregator.execute(&op_data);
+        }
+
+        /*override*/ size_t predecessor_count() {
+            my_operation op_data(blt_pred_cnt);
+            my_aggregator.execute(&op_data);
+            return op_data.cnt_val;
+        }
+
+        /*override*/ void copy_predecessors(predecessor_vector_type &v) {
+            my_operation op_data(blt_pred_cpy);
+            op_data.predv = &v;
+            my_aggregator.execute(&op_data);
+        }
+#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
+
+    protected:
+
+        void reset_function_input_base( __TBB_PFG_RESET_ARG(reset_flags f)) {
+            my_concurrency = 0;
+            if(my_queue) {
+                my_queue->reset();
+            }
+            reset_receiver(__TBB_PFG_RESET_ARG(f));
+            forwarder_busy = false;
+        }
+
+        graph& my_graph;
+        const size_t my_max_concurrency;
+        size_t my_concurrency;
+        function_input_queue<input_type, A> *my_queue;
+        predecessor_cache<input_type, null_mutex > my_predecessors;
+        
+        /*override*/void reset_receiver( __TBB_PFG_RESET_ARG(reset_flags f)) {
+            my_predecessors.reset(__TBB_PFG_RESET_ARG(f));
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+            __TBB_ASSERT(!(f & rf_extract) || my_predecessors.empty(), "function_input_base reset failed");
+#endif
+        }
+
+    private:
+
+        friend class apply_body_task_bypass< my_class, input_type >;
+        friend class forward_task_bypass< my_class >;
+        
+        class my_operation : public aggregated_operation< my_operation > {
+        public:
+            char type;
+            union {
+                input_type *elem;
+                predecessor_type *r;
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+                size_t cnt_val;
+                predecessor_vector_type *predv;
+#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
+            };
+            tbb::task *bypass_t;
+            my_operation(const input_type& e, op_type t) :
+                type(char(t)), elem(const_cast<input_type*>(&e)) {}
+            my_operation(op_type t) : type(char(t)), r(NULL) {}
+        };
+        
+        bool forwarder_busy;
+        typedef internal::aggregating_functor<my_class, my_operation> my_handler;
+        friend class internal::aggregating_functor<my_class, my_operation>;
+        aggregator< my_handler, my_operation > my_aggregator;
+        
+        void handle_operations(my_operation *op_list) {
+            my_operation *tmp;
+            while (op_list) {
+                tmp = op_list;
+                op_list = op_list->next;
+                switch (tmp->type) {
+                case reg_pred:
+                    my_predecessors.add(*(tmp->r));
+                    __TBB_store_with_release(tmp->status, SUCCEEDED);
+                    if (!forwarder_busy) {
+                        forwarder_busy = true;
+                        spawn_forward_task();
+                    }
+                    break;
+                case rem_pred:
+                    my_predecessors.remove(*(tmp->r));
+                    __TBB_store_with_release(tmp->status, SUCCEEDED);
+                    break;
+                case app_body:
+                    __TBB_ASSERT(my_max_concurrency != 0, NULL);
+                    --my_concurrency;
+                    __TBB_store_with_release(tmp->status, SUCCEEDED);
+                    if (my_concurrency<my_max_concurrency) {
+                        input_type i;
+                        bool item_was_retrieved = false;
+                        if ( my_queue )
+                            item_was_retrieved = my_queue->pop(i);
+                        else
+                            item_was_retrieved = my_predecessors.get_item(i);
+                        if (item_was_retrieved) {
+                            ++my_concurrency;
+                            spawn_body_task(i);
+                        }
+                    }
+                    break;
+                case app_body_bypass: {
+                        task * new_task = NULL;
+                        __TBB_ASSERT(my_max_concurrency != 0, NULL);
+                        --my_concurrency;
+                        if (my_concurrency<my_max_concurrency) {
+                            input_type i;
+                            bool item_was_retrieved = false;
+                            if ( my_queue )
+                                item_was_retrieved = my_queue->pop(i);
+                            else 
+                                item_was_retrieved = my_predecessors.get_item(i);
+                            if (item_was_retrieved) {
+                                ++my_concurrency;
+                                new_task = create_body_task(i);
+                            }
+                        }
+                        tmp->bypass_t = new_task;
+                        __TBB_store_with_release(tmp->status, SUCCEEDED);
+                    }
+                    break;
+                case tryput_bypass: internal_try_put_task(tmp);  break;
+                case try_fwd: internal_forward(tmp);  break;
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+                case add_blt_pred: {
+                         my_predecessors.internal_add_built_predecessor(*(tmp->r));
+                        __TBB_store_with_release(tmp->status, SUCCEEDED);
+                    }
+                    break;
+                case del_blt_pred:
+                    my_predecessors.internal_delete_built_predecessor(*(tmp->r));
+                    __TBB_store_with_release(tmp->status, SUCCEEDED);
+                    break;
+                case blt_pred_cnt:
+                    tmp->cnt_val = my_predecessors.predecessor_count();
+                    __TBB_store_with_release(tmp->status, SUCCEEDED);
+                    break;
+                case blt_pred_cpy:
+                    my_predecessors.copy_predecessors( *(tmp->predv) );
+                    __TBB_store_with_release(tmp->status, SUCCEEDED);
+                    break;
+#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
+                }
+            }
+        }
+        
+        //! Put to the node, but return the task instead of enqueueing it
+        void internal_try_put_task(my_operation *op) {
+            __TBB_ASSERT(my_max_concurrency != 0, NULL);
+            if (my_concurrency < my_max_concurrency) {
+               ++my_concurrency;
+               task * new_task = create_body_task(*(op->elem));
+               op->bypass_t = new_task;
+               __TBB_store_with_release(op->status, SUCCEEDED);
+           } else if ( my_queue && my_queue->push(*(op->elem)) ) { 
+               op->bypass_t = SUCCESSFULLY_ENQUEUED;
+               __TBB_store_with_release(op->status, SUCCEEDED);
+           } else {
+               op->bypass_t = NULL;
+               __TBB_store_with_release(op->status, FAILED);
+           }
+        }
+        
+        //! Tries to spawn bodies if available and if concurrency allows
+        void internal_forward(my_operation *op) {
+            op->bypass_t = NULL;
+            if (my_concurrency<my_max_concurrency || !my_max_concurrency) {
+                input_type i;
+                bool item_was_retrieved = false;
+                if ( my_queue )
+                    item_was_retrieved = my_queue->pop(i);
+                else
+                    item_was_retrieved = my_predecessors.get_item(i);
+                if (item_was_retrieved) {
+                    ++my_concurrency;
+                    op->bypass_t = create_body_task(i);
+                    __TBB_store_with_release(op->status, SUCCEEDED);
+                    return;
+                }
+            }
+            __TBB_store_with_release(op->status, FAILED);
+            forwarder_busy = false;
+        }
+        
+        //! Applies the body to the provided input
+        //  then decides if more work is available 
+        void apply_body( input_type &i ) {
+            task *new_task = apply_body_bypass(i);
+            if(!new_task) return;
+            if(new_task == SUCCESSFULLY_ENQUEUED) return;
+            FLOW_SPAWN(*new_task);
+            return;
+        }
+        
+        //! Applies the body to the provided input
+        //  then decides if more work is available 
+        task * apply_body_bypass( input_type &i ) {
+            task * new_task = static_cast<ImplType *>(this)->apply_body_impl_bypass(i);
+            if ( my_max_concurrency != 0 ) {
+                my_operation op_data(app_body_bypass);  // tries to pop an item or get_item, enqueues another apply_body
+                my_aggregator.execute(&op_data);
+                tbb::task *ttask = op_data.bypass_t;
+                new_task = combine_tasks(new_task, ttask);
+            }
+            return new_task;
+        }
+        
+        //! allocates a task to call apply_body( input )
+        inline task * create_body_task( const input_type &input ) {
+            
+            task* tp = my_graph.root_task();
+            return (tp) ?
+                new(task::allocate_additional_child_of(*tp))
+                    apply_body_task_bypass < my_class, input_type >(*this, input) :
+                NULL;
+        }
+
+       //! Spawns a task that calls apply_body( input )
+       inline void spawn_body_task( const input_type &input ) {
+           task* tp = create_body_task(input);
+           // tp == NULL => g.reset(), which shouldn't occur in concurrent context
+           if(tp) {
+               FLOW_SPAWN(*tp);
+           }
+       }
+        
+       //! This is executed by an enqueued task, the "forwarder"
+       task *forward_task() {
+           my_operation op_data(try_fwd);
+           task *rval = NULL;
+           do {
+               op_data.status = WAIT;
+               my_aggregator.execute(&op_data);
+               if(op_data.status == SUCCEEDED) {
+                   tbb::task *ttask = op_data.bypass_t;
+                   rval = combine_tasks(rval, ttask);
+               }
+           } while (op_data.status == SUCCEEDED);
+           return rval;
+       }
+        
+       inline task *create_forward_task() {
+           task* tp = my_graph.root_task();
+           return (tp) ?
+               new(task::allocate_additional_child_of(*tp)) forward_task_bypass< my_class >(*this) :
+               NULL;
+       }
+
+       //! Spawns a task that calls forward()
+       inline void spawn_forward_task() {
+           task* tp = create_forward_task();
+           if(tp) {
+               FLOW_SPAWN(*tp);
+           }
+       }
+    };  // function_input_base
+
+    //! Implements methods for a function node that takes a type Input as input and sends
+    //  a type Output to its successors.
+    template< typename Input, typename Output, typename A>
+    class function_input : public function_input_base<Input, A, function_input<Input,Output,A> > {
+    public:
+        typedef Input input_type;
+        typedef Output output_type;
+        typedef function_input<Input,Output,A> my_class;
+        typedef function_input_base<Input, A, my_class> base_type;
+        typedef function_input_queue<input_type, A> input_queue_type;
+
+
+        // constructor
+        template<typename Body>
+        function_input( graph &g, size_t max_concurrency, Body& body, function_input_queue<input_type,A> *q = NULL ) :
+            base_type(g, max_concurrency, q),
+            my_body( new internal::function_body_leaf< input_type, output_type, Body>(body) ) {
+        }
+
+        //! Copy constructor
+        function_input( const function_input& src, input_queue_type *q = NULL ) : 
+                base_type(src, q),
+                my_body( src.my_body->clone() ) {
+        }
+
+        ~function_input() {
+            delete my_body;
+        }
+
+        template< typename Body >
+        Body copy_function_object() {
+            internal::function_body<input_type, output_type> &body_ref = *this->my_body;
+            return dynamic_cast< internal::function_body_leaf<input_type, output_type, Body> & >(body_ref).get_body(); 
+        } 
+
+        task * apply_body_impl_bypass( const input_type &i) {
+#if TBB_PREVIEW_FLOW_GRAPH_TRACE
+            // There is an extra copied needed to capture the
+            // body execution without the try_put
+            tbb::internal::fgt_begin_body( my_body );
+            output_type v = (*my_body)(i);
+            tbb::internal::fgt_end_body( my_body );
+            task * new_task = successors().try_put_task( v );
+#else       
+            task * new_task = successors().try_put_task( (*my_body)(i) );
+#endif
+            return new_task;
+        }
+
+    protected:
+
+        void reset_function_input(__TBB_PFG_RESET_ARG(reset_flags f)) {
+            base_type::reset_function_input_base(__TBB_PFG_RESET_ARG(f));
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+            if(f & rf_reset_bodies) my_body->reset_body();
+#endif
+        }
+
+        function_body<input_type, output_type> *my_body;
+        virtual broadcast_cache<output_type > &successors() = 0;
+
+    };  // function_input
+
+
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+    // helper templates to reset the successor edges of the output ports of an multifunction_node
+    template<int N>
+    struct reset_element {
+        template<typename P>
+        static void reset_this(P &p, reset_flags f) {
+            (void)tbb::flow::get<N-1>(p).successors().reset(f);
+            reset_element<N-1>::reset_this(p, f);
+        }
+        template<typename P>
+        static bool this_empty(P &p) {
+            if(tbb::flow::get<N-1>(p).successors().empty()) 
+                return reset_element<N-1>::this_empty(p);
+            return false;
+        }
+    };
+
+    template<>
+    struct reset_element<1> {
+        template<typename P>
+        static void reset_this(P &p, reset_flags f) {
+            (void)tbb::flow::get<0>(p).successors().reset(f);
+        }
+        template<typename P>
+        static bool this_empty(P &p) {
+            return tbb::flow::get<0>(p).successors().empty();
+        }
+    };
+#endif
+
+    //! Implements methods for a function node that takes a type Input as input
+    //  and has a tuple of output ports specified.  
+    template< typename Input, typename OutputPortSet, typename A>
+    class multifunction_input : public function_input_base<Input, A, multifunction_input<Input,OutputPortSet,A> > {
+    public:
+        static const int N = tbb::flow::tuple_size<OutputPortSet>::value;
+        typedef Input input_type;
+        typedef OutputPortSet output_ports_type;
+        typedef multifunction_input<Input,OutputPortSet,A> my_class;
+        typedef function_input_base<Input, A, my_class> base_type;
+        typedef function_input_queue<input_type, A> input_queue_type;
+
+
+        // constructor
+        template<typename Body>
+        multifunction_input( 
+                graph &g, 
+                size_t max_concurrency, 
+                Body& body,
+                function_input_queue<input_type,A> *q = NULL ) :
+            base_type(g, max_concurrency, q),
+            my_body( new internal::multifunction_body_leaf<input_type, output_ports_type, Body>(body) ) {
+        }
+
+        //! Copy constructor
+        multifunction_input( const multifunction_input& src, input_queue_type *q = NULL ) : 
+                base_type(src, q),
+                my_body( src.my_body->clone() ) {
+        }
+
+        ~multifunction_input() {
+            delete my_body;
+        }
+
+        template< typename Body >
+        Body copy_function_object() {
+            internal::multifunction_body<input_type, output_ports_type> &body_ref = *this->my_body;
+            return dynamic_cast< internal::multifunction_body_leaf<input_type, output_ports_type, Body> & >(body_ref).get_body(); 
+        } 
+
+        // for multifunction nodes we do not have a single successor as such.  So we just tell
+        // the task we were successful.
+        task * apply_body_impl_bypass( const input_type &i) {
+            tbb::internal::fgt_begin_body( my_body );
+            (*my_body)(i, my_output_ports);
+            tbb::internal::fgt_end_body( my_body );
+            task * new_task = SUCCESSFULLY_ENQUEUED;
+            return new_task;
+        }
+
+        output_ports_type &output_ports(){ return my_output_ports; }
+
+    protected:
+
+        /*override*/void reset(__TBB_PFG_RESET_ARG(reset_flags f)) { 
+            base_type::reset_function_input_base(__TBB_PFG_RESET_ARG(f));
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+            reset_element<N>::reset_this(my_output_ports, f);
+            if(f & rf_reset_bodies) my_body->reset_body();
+            __TBB_ASSERT(!(f & rf_extract) || reset_element<N>::this_empty(my_output_ports), "multifunction_node reset failed");
+#endif
+        }
+
+        multifunction_body<input_type, output_ports_type> *my_body;
+        output_ports_type my_output_ports;
+
+    };  // multifunction_input
+
+    // template to refer to an output port of a multifunction_node
+    template<size_t N, typename MOP>
+    typename tbb::flow::tuple_element<N, typename MOP::output_ports_type>::type &output_port(MOP &op) {
+        return tbb::flow::get<N>(op.output_ports()); 
+    }
+
+// helper structs for split_node
+    template<int N>
+    struct emit_element {
+        template<typename T, typename P>
+        static void emit_this(const T &t, P &p) {
+            (void)tbb::flow::get<N-1>(p).try_put(tbb::flow::get<N-1>(t));
+            emit_element<N-1>::emit_this(t,p);
+        }
+    };
+
+    template<>
+    struct emit_element<1> {
+        template<typename T, typename P>
+        static void emit_this(const T &t, P &p) {
+            (void)tbb::flow::get<0>(p).try_put(tbb::flow::get<0>(t));
+        }
+    };
+
+    //! Implements methods for an executable node that takes continue_msg as input
+    template< typename Output >
+    class continue_input : public continue_receiver {
+    public:
+        
+        //! The input type of this receiver
+        typedef continue_msg input_type;
+            
+        //! The output type of this receiver
+        typedef Output output_type;
+        
+        template< typename Body >
+        continue_input( graph &g, Body& body )
+            : my_graph_ptr(&g), 
+             my_body( new internal::function_body_leaf< input_type, output_type, Body>(body) ) { }
+        
+        template< typename Body >
+        continue_input( graph &g, int number_of_predecessors, Body& body )
+            : continue_receiver( number_of_predecessors ), my_graph_ptr(&g), 
+             my_body( new internal::function_body_leaf< input_type, output_type, Body>(body) ) { }
+
+        continue_input( const continue_input& src ) : continue_receiver(src), 
+            my_graph_ptr(src.my_graph_ptr), my_body( src.my_body->clone() ) {}
+
+        ~continue_input() {
+            delete my_body;
+        }
+
+        template< typename Body >
+        Body copy_function_object() {
+            internal::function_body<input_type, output_type> &body_ref = *my_body;
+            return dynamic_cast< internal::function_body_leaf<input_type, output_type, Body> & >(body_ref).get_body(); 
+        } 
+
+        /*override*/void reset_receiver( __TBB_PFG_RESET_ARG(reset_flags f)) {
+            continue_receiver::reset_receiver(__TBB_PFG_RESET_ARG(f));
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+            if(f & rf_reset_bodies) my_body->reset_body();
+#endif
+        }
+
+    protected:
+        
+        graph* my_graph_ptr;
+        function_body<input_type, output_type> *my_body;
+        
+        virtual broadcast_cache<output_type > &successors() = 0; 
+        
+        friend class apply_body_task_bypass< continue_input< Output >, continue_msg >;
+        
+        //! Applies the body to the provided input
+        /* override */ task *apply_body_bypass( input_type ) {
+#if TBB_PREVIEW_FLOW_GRAPH_TRACE
+            // There is an extra copied needed to capture the
+            // body execution without the try_put
+            tbb::internal::fgt_begin_body( my_body );
+            output_type v = (*my_body)( continue_msg() );
+            tbb::internal::fgt_end_body( my_body );
+            return successors().try_put_task( v );
+#else   
+            return successors().try_put_task( (*my_body)( continue_msg() ) );
+#endif
+        }
+        
+        //! Spawns a task that applies the body
+        /* override */ task *execute( ) {
+            task* tp = my_graph_ptr->root_task();
+            return (tp) ?
+                new ( task::allocate_additional_child_of( *tp ) ) 
+                    apply_body_task_bypass< continue_input< Output >, continue_msg >( *this, continue_msg() ) :
+                NULL;
+        }
+
+    };  // continue_input
+        
+    //! Implements methods for both executable and function nodes that puts Output to its successors
+    template< typename Output >
+    class function_output : public sender<Output> {
+    public:
+        
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        template<int N> friend struct reset_element;
+#endif
+        typedef Output output_type;
+        typedef receiver<output_type> successor_type;
+        typedef broadcast_cache<output_type> broadcast_cache_type;
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        typedef std::vector<successor_type *> successor_vector_type;
+#endif
+        
+        function_output() { my_successors.set_owner(this); }
+        function_output(const function_output & /*other*/) : sender<output_type>() {
+            my_successors.set_owner(this);
+        }
+        
+        //! Adds a new successor to this node
+        /* override */ bool register_successor( receiver<output_type> &r ) {
+            successors().register_successor( r );
+            return true;
+        }
+        
+        //! Removes a successor from this node
+        /* override */ bool remove_successor( receiver<output_type> &r ) {
+            successors().remove_successor( r );
+            return true;
+        }
+
+#if TBB_PREVIEW_FLOW_GRAPH_FEATURES
+        /*override*/ void internal_add_built_successor( receiver<output_type> &r) {
+            successors().internal_add_built_successor( r );
+        }
+
+        /*override*/ void internal_delete_built_successor( receiver<output_type> &r) {
+            successors().internal_delete_built_successor( r );
+        }
+
+        /*override*/ size_t successor_count() {
+            return successors().successor_count();
+        }
+
+        /*override*/ void  copy_successors( successor_vector_type &v) {
+            successors().copy_successors(v);
+        }
+#endif  /* TBB_PREVIEW_FLOW_GRAPH_FEATURES */
+
+        // for multifunction_node.  The function_body that implements
+        // the node will have an input and an output tuple of ports.  To put
+        // an item to a successor, the body should
+        //
+        //    get<I>(output_ports).try_put(output_value);
+        //
+        // return value will be bool returned from successors.try_put.
+        task *try_put_task(const output_type &i) { return my_successors.try_put_task(i); }
+          
+    protected:
+        broadcast_cache_type my_successors;
+        broadcast_cache_type &successors() { return my_successors; } 
+        
+    };  // function_output
+
+    template< typename Output >
+    class multifunction_output : public function_output<Output> {
+    public:
+        typedef Output output_type;
+        typedef function_output<output_type> base_type;
+        using base_type::my_successors;
+        
+        multifunction_output() : base_type() {my_successors.set_owner(this);}
+        multifunction_output( const multifunction_output &/*other*/) : base_type() { my_successors.set_owner(this); }
+
+        bool try_put(const output_type &i) {
+            task *res = my_successors.try_put_task(i);
+            if(!res) return false;
+            if(res != SUCCESSFULLY_ENQUEUED) FLOW_SPAWN(*res);
+            return true;
+        }
+    };  // multifunction_output
+
+}  // internal
+
+#endif // __TBB__flow_graph_node_impl_H
--- a/Research/inc/tbb/internal/_flow_graph_tagged_buffer_impl.h
+++ b/Research/inc/tbb/internal/_flow_graph_tagged_buffer_impl.h
@@ -0,0 +1,251 @@
+/*
+    Copyright 2005-2015 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks. Threading Building Blocks is free software;
+    you can redistribute it and/or modify it under the terms of the GNU General Public License
+    version 2  as  published  by  the  Free Software Foundation.  Threading Building Blocks is
+    distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
+    implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+    See  the GNU General Public License for more details.   You should have received a copy of
+    the  GNU General Public License along with Threading Building Blocks; if not, write to the
+    Free Software Foundation, Inc.,  51 Franklin St,  Fifth Floor,  Boston,  MA 02110-1301 USA
+
+    As a special exception,  you may use this file  as part of a free software library without
+    restriction.  Specifically,  if other files instantiate templates  or use macros or inline
+    functions from this file, or you compile this file and link it with other files to produce
+    an executable,  this file does not by itself cause the resulting executable to be covered
+    by the GNU General Public License. This exception does not however invalidate any other
+    reasons why the executable file might be covered by the GNU General Public License.
+*/
+
+// tagged buffer that can expand, and can support as many deletions as additions
+// list-based, with elements of list held in array (for destruction management),
+// multiplicative hashing (like ets).  No synchronization built-in.
+//
+
+#ifndef __TBB__flow_graph_tagged_buffer_impl_H
+#define __TBB__flow_graph_tagged_buffer_impl_H
+
+#ifndef __TBB_flow_graph_H
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+// included in namespace tbb::flow::interface7::internal
+
+template<typename T, typename U, size_t NoTagMark>
+struct otherData {
+    T t;
+    U next;
+    otherData() : t(NoTagMark), next(NULL) {}
+};
+
+template<typename TagType, typename ValueType, size_t NoTagMark>
+struct buffer_element_type {
+    // the second parameter below is void * because we can't forward-declare the type
+    // itself, so we just reinterpret_cast below.
+    typedef typename aligned_pair<ValueType, otherData<TagType, void *, NoTagMark> >::type type;
+};
+
+template
+    <
+     typename TagType, 
+     typename ValueType, 
+     size_t   NoTagMark = 0,
+     typename Allocator=tbb::cache_aligned_allocator< typename buffer_element_type<TagType, ValueType, NoTagMark>::type >
+    >
+class tagged_buffer {
+public:
+    static const size_t INITIAL_SIZE = 8;  // initial size of the hash pointer table
+    static const TagType NO_TAG = TagType(NoTagMark);
+    typedef ValueType value_type;
+    typedef typename buffer_element_type<TagType, ValueType, NO_TAG>::type element_type;
+    typedef value_type *pointer_type;
+    typedef element_type *list_array_type;  // array we manage manually
+    typedef list_array_type *pointer_array_type;
+    typedef typename Allocator::template rebind<list_array_type>::other pointer_array_allocator_type;
+    typedef typename Allocator::template rebind<element_type>::other elements_array_allocator;
+private:
+    size_t my_size;
+    size_t nelements;
+    pointer_array_type pointer_array;    // pointer_array[my_size]
+    list_array_type elements_array;      // elements_array[my_size / 2]
+    element_type* free_list;
+
+    size_t mask() { return my_size - 1; }
+
+    static size_t hash(TagType t) {
+        return uintptr_t(t)*tbb::internal::select_size_t_constant<0x9E3779B9,0x9E3779B97F4A7C15ULL>::value;
+    }
+
+    void set_up_free_list( element_type **p_free_list, list_array_type la, size_t sz) {
+        for(size_t i=0; i < sz - 1; ++i ) {  // construct free list
+            la[i].second.next = &(la[i+1]);
+            la[i].second.t = NO_TAG;
+        }
+        la[sz-1].second.next = NULL;
+        *p_free_list = &(la[0]);
+    }
+
+    // cleanup for exceptions
+    struct DoCleanup {
+        pointer_array_type *my_pa;
+        list_array_type *my_elements;
+        size_t my_size;
+
+        DoCleanup(pointer_array_type &pa, list_array_type &my_els, size_t sz) :
+            my_pa(&pa), my_elements(&my_els), my_size(sz) {  }
+        ~DoCleanup() {
+            if(my_pa) {
+                size_t dont_care = 0;
+                internal_free_buffer(*my_pa, *my_elements, my_size, dont_care);
+            }
+        }
+    };
+
+    // exception-safety requires we do all the potentially-throwing operations first
+    void grow_array() {
+        size_t new_size = my_size*2;
+        size_t new_nelements = nelements;  // internal_free_buffer zeroes this
+        list_array_type new_elements_array = NULL;
+        pointer_array_type new_pointer_array = NULL;
+        list_array_type new_free_list = NULL;
+        {
+            DoCleanup my_cleanup(new_pointer_array, new_elements_array, new_size);
+            new_elements_array = elements_array_allocator().allocate(my_size);
+            new_pointer_array = pointer_array_allocator_type().allocate(new_size);
+            for(size_t i=0; i < new_size; ++i) new_pointer_array[i] = NULL;
+            set_up_free_list(&new_free_list, new_elements_array, my_size );
+
+            for(size_t i=0; i < my_size; ++i) {
+                for( element_type* op = pointer_array[i]; op; op = (element_type *)(op->second.next)) {
+                    value_type *ov = reinterpret_cast<value_type *>(&(op->first));
+                    // could have std::move semantics
+                    internal_tagged_insert(new_pointer_array, new_size, new_free_list, op->second.t, *ov);
+                }
+            }
+            my_cleanup.my_pa = NULL;
+            my_cleanup.my_elements = NULL;
+        }
+
+        internal_free_buffer(pointer_array, elements_array, my_size, nelements);
+        free_list = new_free_list;
+        pointer_array = new_pointer_array;
+        elements_array = new_elements_array;
+        my_size = new_size;
+        nelements = new_nelements;
+    }
+
+    // v should have perfect forwarding if std::move implemented.
+    // we use this method to move elements in grow_array, so can't use class fields
+    void internal_tagged_insert( element_type **p_pointer_array, size_t p_sz, list_array_type &p_free_list,
+            const TagType t, const value_type &v) {
+        size_t l_mask = p_sz-1;
+        size_t h = hash(t) & l_mask;
+        __TBB_ASSERT(p_free_list, "Error: free list not set up.");
+        element_type* my_elem = p_free_list; p_free_list = (element_type *)(p_free_list->second.next);
+        my_elem->second.t = t;
+        (void) new(&(my_elem->first)) value_type(v);
+        my_elem->second.next = p_pointer_array[h];
+        p_pointer_array[h] = my_elem;
+    }
+
+    void internal_initialize_buffer() {
+        pointer_array = pointer_array_allocator_type().allocate(my_size);
+        for(size_t i = 0; i < my_size; ++i) pointer_array[i] = NULL;
+        elements_array = elements_array_allocator().allocate(my_size / 2);
+        set_up_free_list(&free_list, elements_array, my_size / 2);
+    }
+
+    // made static so an enclosed class can use to properly dispose of the internals
+    static void internal_free_buffer( pointer_array_type &pa, list_array_type &el, size_t &sz, size_t &ne ) {
+        if(pa) {
+            for(size_t i = 0; i < sz; ++i ) {
+                element_type *p_next;
+                for( element_type *p = pa[i]; p; p = p_next) {
+                    p_next = (element_type *)p->second.next;
+                    value_type *vp = reinterpret_cast<value_type *>(&(p->first));
+                    vp->~value_type();
+                }
+            }
+            pointer_array_allocator_type().deallocate(pa, sz); 
+            pa = NULL;
+        }
+        // Separate test (if allocation of pa throws, el may be allocated.
+        // but no elements will be constructed.)
+        if(el) {
+            elements_array_allocator().deallocate(el, sz / 2);
+            el = NULL;
+        }
+        sz = INITIAL_SIZE;
+        ne = 0;
+    }
+
+public:
+    tagged_buffer() : my_size(INITIAL_SIZE), nelements(0) {
+        internal_initialize_buffer();
+    }
+
+    ~tagged_buffer() {
+        internal_free_buffer(pointer_array, elements_array, my_size, nelements);
+    }
+
+    void reset() {
+        internal_free_buffer(pointer_array, elements_array, my_size, nelements);
+        internal_initialize_buffer();
+    }
+
+    bool tagged_insert(const TagType t, const value_type &v) {
+        pointer_type p;
+        if(tagged_find_ref(t, p)) {
+            p->~value_type();
+            (void) new(p) value_type(v);  // copy-construct into the space
+            return false;
+        }
+        ++nelements;
+        if(nelements*2 > my_size) grow_array();
+        internal_tagged_insert(pointer_array, my_size, free_list, t, v);
+        return true;
+    }
+
+    // returns reference to array element.v
+    bool tagged_find_ref(const TagType t, pointer_type &v) {
+        size_t i = hash(t) & mask();
+        for(element_type* p = pointer_array[i]; p; p = (element_type *)(p->second.next)) {
+            if(p->second.t == t) {
+                v = reinterpret_cast<pointer_type>(&(p->first));
+                return true;
+            }
+        }
+        return false;
+    }
+
+    bool tagged_find( const TagType t, value_type &v) {
+        value_type *p;
+        if(tagged_find_ref(t, p)) {
+            v = *p;
+            return true;
+        }
+        else
+            return false;
+    }
+
+    void tagged_delete(const TagType t) {
+        size_t h = hash(t) & mask();
+        element_type* prev = NULL;
+        for(element_type* p = pointer_array[h]; p; prev = p, p = (element_type *)(p->second.next)) {
+            if(p->second.t == t) {
+                value_type *vp = reinterpret_cast<value_type *>(&(p->first));
+                vp->~value_type();
+                p->second.t = NO_TAG;
+                if(prev) prev->second.next = p->second.next;
+                else pointer_array[h] = (element_type *)(p->second.next);
+                p->second.next = free_list;
+                free_list = p;
+                --nelements;
+                return;
+            }
+        }
+        __TBB_ASSERT(false, "tag not found for delete");
+    }
+};
+#endif // __TBB__flow_graph_tagged_buffer_impl_H
--- a/Research/inc/tbb/internal/_flow_graph_trace_impl.h
+++ b/Research/inc/tbb/internal/_flow_graph_trace_impl.h
@@ -0,0 +1,205 @@
+/*
+    Copyright 2005-2015 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks. Threading Building Blocks is free software;
+    you can redistribute it and/or modify it under the terms of the GNU General Public License
+    version 2  as  published  by  the  Free Software Foundation.  Threading Building Blocks is
+    distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
+    implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+    See  the GNU General Public License for more details.   You should have received a copy of
+    the  GNU General Public License along with Threading Building Blocks; if not, write to the
+    Free Software Foundation, Inc.,  51 Franklin St,  Fifth Floor,  Boston,  MA 02110-1301 USA
+
+    As a special exception,  you may use this file  as part of a free software library without
+    restriction.  Specifically,  if other files instantiate templates  or use macros or inline
+    functions from this file, or you compile this file and link it with other files to produce
+    an executable,  this file does not by itself cause the resulting executable to be covered
+    by the GNU General Public License. This exception does not however invalidate any other
+    reasons why the executable file might be covered by the GNU General Public License.
+*/
+
+#ifndef _FGT_GRAPH_TRACE_IMPL_H
+#define _FGT_GRAPH_TRACE_IMPL_H
+
+#include "../tbb_profiling.h"
+
+namespace tbb {
+    namespace internal {
+
+#if TBB_PREVIEW_FLOW_GRAPH_TRACE
+
+static inline void fgt_internal_create_input_port( void *node, void *p, string_index name_index ) {
+    itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_INPUT_PORT, node, FLOW_NODE, name_index );
+}
+
+static inline void fgt_internal_create_output_port( void *node, void *p, string_index name_index ) {
+    itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_OUTPUT_PORT, node, FLOW_NODE, name_index );
+}
+
+template < typename TypesTuple, typename PortsTuple, int N >
+struct fgt_internal_input_helper {
+    static void register_port( void *node, PortsTuple &ports ) {
+        fgt_internal_create_input_port( node, (void*)static_cast< tbb::flow::interface7::receiver< typename tbb::flow::tuple_element<N-1,TypesTuple>::type > * >(&(tbb::flow::get<N-1>(ports))),
+                                        static_cast<tbb::internal::string_index>(FLOW_INPUT_PORT_0 + N - 1) );
+        fgt_internal_input_helper<TypesTuple, PortsTuple, N-1>::register_port( node, ports );
+    } 
+};
+
+template < typename TypesTuple, typename PortsTuple >
+struct fgt_internal_input_helper<TypesTuple,PortsTuple,1> {
+    static void register_port( void *node, PortsTuple &ports ) {
+        fgt_internal_create_input_port( node, (void*)static_cast< tbb::flow::interface7::receiver< typename tbb::flow::tuple_element<0,TypesTuple>::type > * >(&(tbb::flow::get<0>(ports))),
+                                        FLOW_INPUT_PORT_0 );
+    } 
+};
+
+template < typename TypesTuple, typename PortsTuple, int N >
+struct fgt_internal_output_helper {
+    static void register_port( void *node, PortsTuple &ports ) {
+        fgt_internal_create_output_port( node, (void*)static_cast< tbb::flow::interface7::sender< typename tbb::flow::tuple_element<N-1,TypesTuple>::type > * >(&(tbb::flow::get<N-1>(ports))),
+                                         static_cast<tbb::internal::string_index>(FLOW_OUTPUT_PORT_0 + N - 1) ); 
+        fgt_internal_output_helper<TypesTuple, PortsTuple, N-1>::register_port( node, ports );
+    } 
+};
+
+template < typename TypesTuple, typename PortsTuple >
+struct fgt_internal_output_helper<TypesTuple,PortsTuple,1> {
+    static void register_port( void *node, PortsTuple &ports ) {
+        fgt_internal_create_output_port( node, (void*)static_cast< tbb::flow::interface7::sender< typename tbb::flow::tuple_element<0,TypesTuple>::type > * >(&(tbb::flow::get<0>(ports))),
+                                         FLOW_OUTPUT_PORT_0 ); 
+    } 
+};
+
+template< typename NodeType >
+void fgt_multioutput_node_desc( const NodeType *node, const char *desc ) {
+    void *addr =  (void *)( static_cast< tbb::flow::interface7::receiver< typename NodeType::input_type > * >(const_cast< NodeType *>(node)) ); 
+    itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc ); 
+}
+
+template< typename NodeType >
+static inline void fgt_node_desc( const NodeType *node, const char *desc ) {
+    void *addr =  (void *)( static_cast< tbb::flow::interface7::sender< typename NodeType::output_type > * >(const_cast< NodeType *>(node)) ); 
+    itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc ); 
+}
+
+static inline void fgt_graph_desc( void *g, const char *desc ) {
+    itt_metadata_str_add( ITT_DOMAIN_FLOW, g, FLOW_GRAPH, FLOW_OBJECT_NAME, desc ); 
+}
+
+static inline void fgt_body( void *node, void *body ) {
+    itt_relation_add( ITT_DOMAIN_FLOW, body, FLOW_BODY, __itt_relation_is_child_of, node, FLOW_NODE );
+}
+
+template< typename OutputTuple, int N, typename PortsTuple >
+static inline void fgt_multioutput_node( string_index t, void *g, void *input_port, PortsTuple &ports ) {
+    itt_make_task_group( ITT_DOMAIN_FLOW, input_port, FLOW_NODE, g, FLOW_GRAPH, t ); 
+    fgt_internal_create_input_port( input_port, input_port, FLOW_INPUT_PORT_0 ); 
+    fgt_internal_output_helper<OutputTuple, PortsTuple, N>::register_port( input_port, ports ); 
+}
+
+template< typename OutputTuple, int N, typename PortsTuple >
+static inline void fgt_multioutput_node_with_body( string_index t, void *g, void *input_port, PortsTuple &ports, void *body ) {
+    itt_make_task_group( ITT_DOMAIN_FLOW, input_port, FLOW_NODE, g, FLOW_GRAPH, t ); 
+    fgt_internal_create_input_port( input_port, input_port, FLOW_INPUT_PORT_0 ); 
+    fgt_internal_output_helper<OutputTuple, PortsTuple, N>::register_port( input_port, ports ); 
+    fgt_body( input_port, body );
+}
+
+
+template< typename InputTuple, int N, typename PortsTuple >
+static inline void fgt_multiinput_node( string_index t, void *g, PortsTuple &ports, void *output_port) {
+    itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t ); 
+    fgt_internal_create_output_port( output_port, output_port, FLOW_OUTPUT_PORT_0 ); 
+    fgt_internal_input_helper<InputTuple, PortsTuple, N>::register_port( output_port, ports ); 
+}
+
+static inline void fgt_node( string_index t, void *g, void *output_port ) {
+    itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t ); 
+    fgt_internal_create_output_port( output_port, output_port, FLOW_OUTPUT_PORT_0 ); 
+}
+
+static inline void fgt_node_with_body( string_index t, void *g, void *output_port, void *body ) {
+    itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t ); 
+    fgt_internal_create_output_port( output_port, output_port, FLOW_OUTPUT_PORT_0 ); 
+    fgt_body( output_port, body );
+}
+
+
+static inline void fgt_node( string_index t, void *g, void *input_port, void *output_port ) {
+    fgt_node( t, g, output_port );
+    fgt_internal_create_input_port( output_port, input_port, FLOW_INPUT_PORT_0 );
+}
+
+static inline void fgt_node_with_body( string_index t, void *g, void *input_port, void *output_port, void *body ) {
+    fgt_node_with_body( t, g, output_port, body );
+    fgt_internal_create_input_port( output_port, input_port, FLOW_INPUT_PORT_0 ); 
+}
+
+
+static inline void  fgt_node( string_index t, void *g, void *input_port, void *decrement_port, void *output_port ) {
+    fgt_node( t, g, input_port, output_port );
+    fgt_internal_create_input_port( output_port, decrement_port, FLOW_INPUT_PORT_1 ); 
+}
+
+static inline void fgt_make_edge( void *output_port, void *input_port ) {
+    itt_relation_add( ITT_DOMAIN_FLOW, output_port, FLOW_OUTPUT_PORT, __itt_relation_is_predecessor_to, input_port, FLOW_INPUT_PORT);
+}
+
+static inline void fgt_remove_edge( void *output_port, void *input_port ) {
+    itt_relation_add( ITT_DOMAIN_FLOW, output_port, FLOW_OUTPUT_PORT, __itt_relation_is_sibling_of, input_port, FLOW_INPUT_PORT);
+}
+
+static inline void fgt_graph( void *g ) {
+    itt_make_task_group( ITT_DOMAIN_FLOW, g, FLOW_GRAPH, NULL, FLOW_NULL, FLOW_GRAPH ); 
+}
+
+static inline void fgt_begin_body( void *body ) {
+    itt_task_begin( ITT_DOMAIN_FLOW, body, FLOW_BODY, NULL, FLOW_NULL, FLOW_NULL );
+}
+
+static inline void fgt_end_body( void * ) {
+    itt_task_end( ITT_DOMAIN_FLOW );
+}
+
+#else // TBB_PREVIEW_FLOW_GRAPH_TRACE
+
+static inline void fgt_graph( void * /*g*/ ) { }
+
+template< typename NodeType >
+static inline void fgt_multioutput_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { }
+
+template< typename NodeType >
+static inline void fgt_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { }
+
+static inline void fgt_graph_desc( void * /*g*/, const char * /*desc*/ ) { }
+
+static inline void fgt_body( void * /*node*/, void * /*body*/ ) { }
+
+template< typename OutputTuple, int N, typename PortsTuple > 
+static inline void fgt_multioutput_node( string_index /*t*/, void * /*g*/, void * /*input_port*/, PortsTuple & /*ports*/ ) { }
+
+template< typename OutputTuple, int N, typename PortsTuple >
+static inline void fgt_multioutput_node_with_body( string_index /*t*/, void * /*g*/, void * /*input_port*/, PortsTuple & /*ports*/, void * /*body*/ ) { }
+
+template< typename InputTuple, int N, typename PortsTuple >
+static inline void fgt_multiinput_node( string_index /*t*/, void * /*g*/, PortsTuple & /*ports*/, void * /*output_port*/ ) { }
+
+static inline void fgt_node( string_index /*t*/, void * /*g*/, void * /*output_port*/ ) { } 
+static inline void fgt_node( string_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*output_port*/ ) { } 
+static inline void  fgt_node( string_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*decrement_port*/, void * /*output_port*/ ) { }
+
+static inline void fgt_node_with_body( string_index /*t*/, void * /*g*/, void * /*output_port*/, void * /*body*/ ) { }
+static inline void fgt_node_with_body( string_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*output_port*/, void * /*body*/ ) { }
+
+static inline void fgt_make_edge( void * /*output_port*/, void * /*input_port*/ ) { }
+static inline void fgt_remove_edge( void * /*output_port*/, void * /*input_port*/ ) { }
+
+static inline void fgt_begin_body( void * /*body*/ ) { }
+static inline void fgt_end_body( void *  /*body*/) { }
+
+#endif // TBB_PREVIEW_FLOW_GRAPH_TRACE
+
+    } // namespace internal
+} // namespace tbb
+
+#endif
--- a/Research/inc/tbb/internal/_flow_graph_types_impl.h
+++ b/Research/inc/tbb/internal/_flow_graph_types_impl.h
@@ -0,0 +1,497 @@
+/*
+    Copyright 2005-2015 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks. Threading Building Blocks is free software;
+    you can redistribute it and/or modify it under the terms of the GNU General Public License
+    version 2  as  published  by  the  Free Software Foundation.  Threading Building Blocks is
+    distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
+    implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+    See  the GNU General Public License for more details.   You should have received a copy of
+    the  GNU General Public License along with Threading Building Blocks; if not, write to the
+    Free Software Foundation, Inc.,  51 Franklin St,  Fifth Floor,  Boston,  MA 02110-1301 USA
+
+    As a special exception,  you may use this file  as part of a free software library without
+    restriction.  Specifically,  if other files instantiate templates  or use macros or inline
+    functions from this file, or you compile this file and link it with other files to produce
+    an executable,  this file does not by itself cause the resulting executable to be covered
+    by the GNU General Public License. This exception does not however invalidate any other
+    reasons why the executable file might be covered by the GNU General Public License.
+*/
+
+#ifndef __TBB__flow_graph_types_impl_H
+#define __TBB__flow_graph_types_impl_H
+
+#ifndef __TBB_flow_graph_H
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+// included in namespace tbb::flow::interface7
+
+namespace internal {
+// wrap each element of a tuple in a template, and make a tuple of the result.
+
+    template<int N, template<class> class PT, typename TypeTuple>
+    struct wrap_tuple_elements;
+
+    template<template<class> class PT, typename TypeTuple>
+    struct wrap_tuple_elements<1, PT, TypeTuple> {
+        typedef typename tbb::flow::tuple<
+                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type> >
+            type;
+    };
+
+    template<template<class> class PT, typename TypeTuple>
+    struct wrap_tuple_elements<2, PT, TypeTuple> {
+        typedef typename tbb::flow::tuple<
+                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type> >
+            type;
+    };
+
+    template<template<class> class PT, typename TypeTuple>
+    struct wrap_tuple_elements<3, PT, TypeTuple> {
+        typedef typename tbb::flow::tuple<
+                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<2,TypeTuple>::type> >
+            type;
+    };
+
+    template<template<class> class PT, typename TypeTuple>
+    struct wrap_tuple_elements<4, PT, TypeTuple> {
+        typedef typename tbb::flow::tuple<
+                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<3,TypeTuple>::type> >
+            type;
+    };
+
+    template<template<class> class PT, typename TypeTuple>
+    struct wrap_tuple_elements<5, PT, TypeTuple> {
+        typedef typename tbb::flow::tuple<
+                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<3,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<4,TypeTuple>::type> >
+            type;
+    };
+
+#if __TBB_VARIADIC_MAX >= 6
+    template<template<class> class PT, typename TypeTuple>
+    struct wrap_tuple_elements<6, PT, TypeTuple> {
+        typedef typename tbb::flow::tuple<
+                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<3,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<4,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<5,TypeTuple>::type> >
+            type;
+    };
+#endif
+
+#if __TBB_VARIADIC_MAX >= 7
+    template<template<class> class PT, typename TypeTuple>
+    struct wrap_tuple_elements<7, PT, TypeTuple> {
+        typedef typename tbb::flow::tuple<
+                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<3,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<4,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<5,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<6,TypeTuple>::type> >
+            type;
+    };
+#endif
+
+#if __TBB_VARIADIC_MAX >= 8
+    template<template<class> class PT, typename TypeTuple>
+    struct wrap_tuple_elements<8, PT, TypeTuple> {
+        typedef typename tbb::flow::tuple<
+                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<3,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<4,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<5,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<6,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<7,TypeTuple>::type> >
+            type;
+    };
+#endif
+
+#if __TBB_VARIADIC_MAX >= 9
+    template<template<class> class PT, typename TypeTuple>
+    struct wrap_tuple_elements<9, PT, TypeTuple> {
+        typedef typename tbb::flow::tuple<
+                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<3,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<4,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<5,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<6,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<7,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<8,TypeTuple>::type> >
+            type;
+    };
+#endif
+
+#if __TBB_VARIADIC_MAX >= 10
+    template<template<class> class PT, typename TypeTuple>
+    struct wrap_tuple_elements<10, PT, TypeTuple> {
+        typedef typename tbb::flow::tuple<
+                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<3,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<4,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<5,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<6,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<7,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<8,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<9,TypeTuple>::type> >
+            type;
+    };
+#endif
+
+//! type mimicking std::pair but with trailing fill to ensure each element of an array
+//* will have the correct alignment
+    template<typename T1, typename T2, size_t REM>
+    struct type_plus_align {
+        char first[sizeof(T1)];
+        T2 second;
+        char fill1[REM];
+    };
+
+    template<typename T1, typename T2>
+    struct type_plus_align<T1,T2,0> {
+        char first[sizeof(T1)];
+        T2 second;
+    };
+
+    template<class U> struct alignment_of {
+        typedef struct { char t; U    padded; } test_alignment;
+        static const size_t value = sizeof(test_alignment) - sizeof(U);
+    };
+
+    // T1, T2 are actual types stored.  The space defined for T1 in the type returned
+    // is a char array of the correct size.  Type T2 should be trivially-constructible,
+    // T1 must be explicitly managed.
+    template<typename T1, typename T2>
+    struct aligned_pair {
+        static const size_t t1_align = alignment_of<T1>::value;
+        static const size_t t2_align = alignment_of<T2>::value;
+        typedef type_plus_align<T1, T2, 0 > just_pair;
+        static const size_t max_align = t1_align < t2_align ? t2_align : t1_align;
+        static const size_t extra_bytes = sizeof(just_pair) % max_align;
+        static const size_t remainder = extra_bytes ? max_align - extra_bytes : 0;
+    public:
+        typedef type_plus_align<T1,T2,remainder> type;
+    };  // aligned_pair
+
+// support for variant type
+// type we use when we're not storing a value
+struct default_constructed { };
+
+// type which contains another type, tests for what type is contained, and references to it.
+// internal::Wrapper<T>
+//     void CopyTo( void *newSpace) : builds a Wrapper<T> copy of itself in newSpace
+
+// struct to allow us to copy and test the type of objects
+struct WrapperBase {
+    virtual ~WrapperBase() {}
+    virtual void CopyTo(void* /*newSpace*/) const {  }
+};
+
+// Wrapper<T> contains a T, with the ability to test what T is.  The Wrapper<T> can be
+// constructed from a T, can be copy-constructed from another Wrapper<T>, and can be
+// examined via value(), but not modified.
+template<typename T>
+struct Wrapper: public WrapperBase {
+    typedef T value_type;
+    typedef T* pointer_type;
+private:
+    T value_space;
+public:
+    const value_type &value() const { return value_space; }
+
+private:
+    Wrapper();
+
+    // on exception will ensure the Wrapper will contain only a trivially-constructed object
+    struct _unwind_space {
+        pointer_type space;
+        _unwind_space(pointer_type p) : space(p) {}
+        ~_unwind_space() {
+            if(space) (void) new (space) Wrapper<default_constructed>(default_constructed());
+        }
+    };
+public:
+    explicit Wrapper( const T& other ) : value_space(other) { }
+    explicit Wrapper(const Wrapper& other) : value_space(other.value_space) { }
+
+    /*override*/void CopyTo(void* newSpace) const {
+        _unwind_space guard((pointer_type)newSpace);
+        (void) new(newSpace) Wrapper(value_space);
+        guard.space = NULL;
+    }
+    /*override*/~Wrapper() { }
+};
+
+// specialization for array objects
+template<typename T, size_t N>
+struct Wrapper<T[N]> : public WrapperBase {
+    typedef T value_type;
+    typedef T* pointer_type;
+    // space must be untyped.
+    typedef T ArrayType[N];
+private:
+    // The space is not of type T[N] because when copy-constructing, it would be
+    // default-initialized and then copied to in some fashion, resulting in two
+    // constructions and one destruction per element.  If the type is char[ ], we
+    // placement new into each element, resulting in one construction per element.
+    static const size_t space_size = sizeof(ArrayType) / sizeof(char);
+    char value_space[space_size];
+
+
+    // on exception will ensure the already-built objects will be destructed
+    // (the value_space is a char array, so it is already trivially-destructible.)
+    struct _unwind_class {
+        pointer_type space;
+        int    already_built;
+        _unwind_class(pointer_type p) : space(p), already_built(0) {}
+        ~_unwind_class() {
+            if(space) {
+                for(size_t i = already_built; i > 0 ; --i ) space[i-1].~value_type();
+                (void) new(space) Wrapper<default_constructed>(default_constructed());
+            }
+        }
+    };
+public:
+    const ArrayType &value() const {
+        char *vp = const_cast<char *>(value_space);
+        return reinterpret_cast<ArrayType &>(*vp);
+    }
+
+private:
+    Wrapper();
+public:
+    // have to explicitly construct because other decays to a const value_type*
+    explicit Wrapper(const ArrayType& other) {
+        _unwind_class guard((pointer_type)value_space);
+        pointer_type vp = reinterpret_cast<pointer_type>(&value_space);
+        for(size_t i = 0; i < N; ++i ) {
+            (void) new(vp++) value_type(other[i]);
+            ++(guard.already_built);
+        }
+        guard.space = NULL;
+    }
+    explicit Wrapper(const Wrapper& other) : WrapperBase() {
+        // we have to do the heavy lifting to copy contents
+        _unwind_class guard((pointer_type)value_space);
+        pointer_type dp = reinterpret_cast<pointer_type>(value_space);
+        pointer_type sp = reinterpret_cast<pointer_type>(const_cast<char *>(other.value_space));
+        for(size_t i = 0; i < N; ++i, ++dp, ++sp) {
+            (void) new(dp) value_type(*sp);
+            ++(guard.already_built);
+        }
+        guard.space = NULL;
+    }
+
+    /*override*/void CopyTo(void* newSpace) const {
+        (void) new(newSpace) Wrapper(*this);  // exceptions handled in copy constructor
+    }
+
+    /*override*/~Wrapper() {
+        // have to destroy explicitly in reverse order
+        pointer_type vp = reinterpret_cast<pointer_type>(&value_space);
+        for(size_t i = N; i > 0 ; --i ) vp[i-1].~value_type();
+    }
+};
+
+// given a tuple, return the type of the element that has the maximum alignment requirement.
+// Given a tuple and that type, return the number of elements of the object with the max
+// alignment requirement that is at least as big as the largest object in the tuple.
+
+template<bool, class T1, class T2> struct pick_one;
+template<class T1, class T2> struct pick_one<true , T1, T2> { typedef T1 type; };
+template<class T1, class T2> struct pick_one<false, T1, T2> { typedef T2 type; };
+
+template< template<class> class Selector, typename T1, typename T2 >
+struct pick_max {
+    typedef typename pick_one< (Selector<T1>::value > Selector<T2>::value), T1, T2 >::type type;
+};
+
+template<typename T> struct size_of { static const int value = sizeof(T); };
+
+template< size_t N, class Tuple, template<class> class Selector > struct pick_tuple_max {
+    typedef typename pick_tuple_max<N-1, Tuple, Selector>::type LeftMaxType;
+    typedef typename tbb::flow::tuple_element<N-1, Tuple>::type ThisType;
+    typedef typename pick_max<Selector, LeftMaxType, ThisType>::type type;
+};
+
+template< class Tuple, template<class> class Selector > struct pick_tuple_max<0, Tuple, Selector> {
+    typedef typename tbb::flow::tuple_element<0, Tuple>::type type;
+};
+
+// is the specified type included in a tuple?
+
+template<class U, class V> struct is_same_type      { static const bool value = false; };
+template<class W>          struct is_same_type<W,W> { static const bool value = true; };
+
+template<class Q, size_t N, class Tuple>
+struct is_element_of {
+    typedef typename tbb::flow::tuple_element<N-1, Tuple>::type T_i;
+    static const bool value = is_same_type<Q,T_i>::value || is_element_of<Q,N-1,Tuple>::value;
+};
+
+template<class Q, class Tuple>
+struct is_element_of<Q,0,Tuple> {
+    typedef typename tbb::flow::tuple_element<0, Tuple>::type T_i;
+    static const bool value = is_same_type<Q,T_i>::value;
+};
+
+// allow the construction of types that are listed tuple.  If a disallowed type
+// construction is written, a method involving this type is created.  The
+// type has no definition, so a syntax error is generated.
+template<typename T> struct ERROR_Type_Not_allowed_In_Tagged_Msg_Not_Member_Of_Tuple;
+
+template<typename T, bool BUILD_IT> struct do_if;
+template<typename T>
+struct do_if<T, true> {
+    static void construct(void *mySpace, const T& x) {
+        (void) new(mySpace) Wrapper<T>(x);
+    }
+};
+template<typename T>
+struct do_if<T, false> {
+    static void construct(void * /*mySpace*/, const T& x) {
+        // This method is instantiated when the type T does not match any of the
+        // element types in the Tuple in variant<Tuple>.
+        ERROR_Type_Not_allowed_In_Tagged_Msg_Not_Member_Of_Tuple<T>::bad_type(x);
+    }
+};
+
+// Tuple tells us the allowed types that variant can hold.  It determines the alignment of the space in
+// Wrapper, and how big Wrapper is.
+//
+// the object can only be tested for type, and a read-only reference can be fetched by cast_to<T>().
+
+using tbb::internal::punned_cast;
+struct tagged_null_type {};
+template<typename TagType, typename T0, typename T1=tagged_null_type, typename T2=tagged_null_type, typename T3=tagged_null_type, 
+                           typename T4=tagged_null_type, typename T5=tagged_null_type, typename T6=tagged_null_type,
+                           typename T7=tagged_null_type, typename T8=tagged_null_type, typename T9=tagged_null_type>
+class tagged_msg {
+    typedef tbb::flow::tuple<T0, T1, T2, T3, T4
+                  #if __TBB_VARIADIC_MAX >= 6
+                  , T5
+                  #endif
+                  #if __TBB_VARIADIC_MAX >= 7
+                  , T6
+                  #endif
+                  #if __TBB_VARIADIC_MAX >= 8
+                  , T7
+                  #endif
+                  #if __TBB_VARIADIC_MAX >= 9
+                  , T8
+                  #endif
+                  #if __TBB_VARIADIC_MAX >= 10
+                  , T9
+                  #endif
+                  > Tuple;   
+
+private:
+    class variant {
+        static const size_t N = tbb::flow::tuple_size<Tuple>::value;
+        typedef typename pick_tuple_max<N, Tuple, alignment_of>::type AlignType;
+        typedef typename pick_tuple_max<N, Tuple, size_of>::type MaxSizeType;
+        static const size_t MaxNBytes = (sizeof(Wrapper<MaxSizeType>)+sizeof(AlignType)-1);
+        static const size_t MaxNElements = MaxNBytes/sizeof(AlignType);
+        typedef typename tbb::aligned_space<AlignType, MaxNElements> SpaceType;
+        SpaceType my_space;
+        static const size_t MaxSize = sizeof(SpaceType);
+
+    public:
+        variant() { (void) new(&my_space) Wrapper<default_constructed>(default_constructed()); }
+
+        template<typename T>
+        variant( const T& x ) {
+            do_if<T, is_element_of<T, N, Tuple>::value>::construct(&my_space,x);
+        }
+
+        variant(const variant& other) {
+            const WrapperBase * h = punned_cast<const WrapperBase *>(&(other.my_space));
+            h->CopyTo(&my_space);
+        }
+
+        // assignment must destroy and re-create the Wrapper type, as there is no way
+        // to create a Wrapper-to-Wrapper assign even if we find they agree in type.
+        void operator=( const variant& rhs ) {
+            if(&rhs != this) {
+                WrapperBase *h = punned_cast<WrapperBase *>(&my_space);
+                h->~WrapperBase();
+                const WrapperBase *ch = punned_cast<const WrapperBase *>(&(rhs.my_space));
+                ch->CopyTo(&my_space);
+            }
+        }
+
+        template<typename U>
+        const U& variant_cast_to() const {
+            const Wrapper<U> *h = dynamic_cast<const Wrapper<U>*>(punned_cast<const WrapperBase *>(&my_space));
+            if(!h) {
+                tbb::internal::throw_exception(tbb::internal::eid_bad_tagged_msg_cast);
+            }
+            return h->value();
+        }
+        template<typename U>
+        bool variant_is_a() const { return dynamic_cast<const Wrapper<U>*>(punned_cast<const WrapperBase *>(&my_space)) != NULL; }
+
+        bool variant_is_default_constructed() const {return variant_is_a<default_constructed>();}
+
+        ~variant() {
+            WrapperBase *h = punned_cast<WrapperBase *>(&my_space);
+            h->~WrapperBase();
+        }
+    }; //class variant
+
+    TagType my_tag;
+    variant my_msg;
+
+public:
+    tagged_msg(): my_tag(TagType(~0)), my_msg(){} 
+
+    template<typename T, typename R>
+    tagged_msg(T const &index, R const &value) : my_tag(index), my_msg(value) {}
+    
+    #if __TBB_CONST_REF_TO_ARRAY_TEMPLATE_PARAM_BROKEN
+    template<typename T, typename R, size_t N>
+    tagged_msg(T const &index,  R (&value)[N]) : my_tag(index), my_msg(value) {}
+    #endif
+
+    void set_tag(TagType const &index) {my_tag = index;}
+    TagType tag() const {return my_tag;}
+
+    template<typename V>
+    const V& cast_to() const {return my_msg.template variant_cast_to<V>();}
+
+    template<typename V>
+    bool is_a() const {return my_msg.template variant_is_a<V>();}
+
+    bool is_default_constructed() const {return my_msg.variant_is_default_constructed();}
+}; //class tagged_msg
+
+// template to simplify cast and test for tagged_msg in template contexts
+template<typename T, typename V>
+const T& cast_to(V const &v) { return v.template cast_to<T>(); }
+
+template<typename T, typename V>
+bool is_a(V const &v) { return v.template is_a<T>(); }
+
+}  // namespace internal
+
+#endif  /* __TBB__flow_graph_types_impl_H */
--- a/Research/inc/tbb/internal/_mutex_padding.h
+++ b/Research/inc/tbb/internal/_mutex_padding.h
@@ -0,0 +1,102 @@
+/*
+    Copyright 2005-2015 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks. Threading Building Blocks is free software;
+    you can redistribute it and/or modify it under the terms of the GNU General Public License
+    version 2  as  published  by  the  Free Software Foundation.  Threading Building Blocks is
+    distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
+    implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+    See  the GNU General Public License for more details.   You should have received a copy of
+    the  GNU General Public License along with Threading Building Blocks; if not, write to the
+    Free Software Foundation, Inc.,  51 Franklin St,  Fifth Floor,  Boston,  MA 02110-1301 USA
+
+    As a special exception,  you may use this file  as part of a free software library without
+    restriction.  Specifically,  if other files instantiate templates  or use macros or inline
+    functions from this file, or you compile this file and link it with other files to produce
+    an executable,  this file does not by itself cause the resulting executable to be covered
+    by the GNU General Public License. This exception does not however invalidate any other
+    reasons why the executable file might be covered by the GNU General Public License.
+*/
+
+#ifndef __TBB_mutex_padding_H
+#define __TBB_mutex_padding_H
+
+// wrapper for padding mutexes to be alone on a cache line, without requiring they be allocated
+// from a pool.  Because we allow them to be defined anywhere they must be two cache lines in size.
+
+
+namespace tbb {
+namespace interface7 {
+namespace internal {
+
+static const size_t cache_line_size = 64;
+
+// Pad a mutex to occupy a number of full cache lines sufficient to avoid false sharing
+// with other data; space overhead is up to 2*cache_line_size-1.
+template<typename Mutex, bool is_rw> class padded_mutex;
+
+template<typename Mutex>
+class padded_mutex<Mutex,false> : tbb::internal::mutex_copy_deprecated_and_disabled {
+    typedef long pad_type;
+    pad_type my_pad[((sizeof(Mutex)+cache_line_size-1)/cache_line_size+1)*cache_line_size/sizeof(pad_type)];
+
+    Mutex *impl() { return (Mutex *)((uintptr_t(this)|(cache_line_size-1))+1);}
+
+public:
+    static const bool is_rw_mutex = Mutex::is_rw_mutex;
+    static const bool is_recursive_mutex = Mutex::is_recursive_mutex;
+    static const bool is_fair_mutex = Mutex::is_fair_mutex;
+
+    padded_mutex() { new(impl()) Mutex(); }
+    ~padded_mutex() { impl()->~Mutex(); }
+
+    //! Represents acquisition of a mutex.
+    class scoped_lock :  tbb::internal::no_copy {
+        typename Mutex::scoped_lock my_scoped_lock;
+    public:
+        scoped_lock() : my_scoped_lock() {}
+        scoped_lock( padded_mutex& m ) : my_scoped_lock(*m.impl()) { }
+        ~scoped_lock() {  }
+
+        void acquire( padded_mutex& m ) { my_scoped_lock.acquire(*m.impl()); }
+        bool try_acquire( padded_mutex& m ) { return my_scoped_lock.try_acquire(*m.impl()); }
+        void release() { my_scoped_lock.release(); }
+    };
+};
+
+template<typename Mutex>
+class padded_mutex<Mutex,true> : tbb::internal::mutex_copy_deprecated_and_disabled {
+    typedef long pad_type;
+    pad_type my_pad[((sizeof(Mutex)+cache_line_size-1)/cache_line_size+1)*cache_line_size/sizeof(pad_type)];
+
+    Mutex *impl() { return (Mutex *)((uintptr_t(this)|(cache_line_size-1))+1);}
+
+public:
+    static const bool is_rw_mutex = Mutex::is_rw_mutex;
+    static const bool is_recursive_mutex = Mutex::is_recursive_mutex;
+    static const bool is_fair_mutex = Mutex::is_fair_mutex;
+
+    padded_mutex() { new(impl()) Mutex(); }
+    ~padded_mutex() { impl()->~Mutex(); }
+
+    //! Represents acquisition of a mutex.
+    class scoped_lock :  tbb::internal::no_copy {
+        typename Mutex::scoped_lock my_scoped_lock;
+    public:
+        scoped_lock() : my_scoped_lock() {}
+        scoped_lock( padded_mutex& m, bool write = true ) : my_scoped_lock(*m.impl(),write) { }
+        ~scoped_lock() {  }
+
+        void acquire( padded_mutex& m, bool write = true ) { my_scoped_lock.acquire(*m.impl(),write); }
+        bool try_acquire( padded_mutex& m, bool write = true ) { return my_scoped_lock.try_acquire(*m.impl(),write); }
+        bool upgrade_to_writer() { return my_scoped_lock.upgrade_to_writer(); }
+        bool downgrade_to_reader() { return my_scoped_lock.downgrade_to_reader(); }
+        void release() { my_scoped_lock.release(); }
+    };
+};
+
+} // namespace internal
+} // namespace interface7
+} // namespace tbb
+
+#endif /* __TBB_mutex_padding_H */
--- a/Research/inc/tbb/internal/_range_iterator.h
+++ b/Research/inc/tbb/internal/_range_iterator.h
@@ -0,0 +1,70 @@
+/*
+    Copyright 2005-2015 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks. Threading Building Blocks is free software;
+    you can redistribute it and/or modify it under the terms of the GNU General Public License
+    version 2  as  published  by  the  Free Software Foundation.  Threading Building Blocks is
+    distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
+    implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+    See  the GNU General Public License for more details.   You should have received a copy of
+    the  GNU General Public License along with Threading Building Blocks; if not, write to the
+    Free Software Foundation, Inc.,  51 Franklin St,  Fifth Floor,  Boston,  MA 02110-1301 USA
+
+    As a special exception,  you may use this file  as part of a free software library without
+    restriction.  Specifically,  if other files instantiate templates  or use macros or inline
+    functions from this file, or you compile this file and link it with other files to produce
+    an executable,  this file does not by itself cause the resulting executable to be covered
+    by the GNU General Public License. This exception does not however invalidate any other
+    reasons why the executable file might be covered by the GNU General Public License.
+*/
+
+#ifndef __TBB_range_iterator_H
+#define __TBB_range_iterator_H
+
+#include "../tbb_stddef.h"
+
+#if __TBB_CPP11_STD_BEGIN_END_PRESENT && __TBB_CPP11_AUTO_PRESENT && __TBB_CPP11_DECLTYPE_PRESENT
+    #include <iterator>
+#endif
+
+namespace tbb {
+    // iterators to first and last elements of container
+    namespace internal {
+
+#if __TBB_CPP11_STD_BEGIN_END_PRESENT && __TBB_CPP11_AUTO_PRESENT && __TBB_CPP11_DECLTYPE_PRESENT
+        using std::begin;
+        using std::end;
+        template<typename Container>
+        auto first(Container& c)-> decltype(begin(c))  {return begin(c);}
+
+        template<typename Container>
+        auto first(const Container& c)-> decltype(begin(c))  {return begin(c);}
+
+        template<typename Container>
+        auto last(Container& c)-> decltype(begin(c))  {return end(c);}
+
+        template<typename Container>
+        auto last(const Container& c)-> decltype(begin(c)) {return end(c);}
+#else
+        template<typename Container>
+        typename Container::iterator first(Container& c) {return c.begin();}
+
+        template<typename Container>
+        typename Container::const_iterator first(const Container& c) {return c.begin();}
+
+        template<typename Container>
+        typename Container::iterator last(Container& c) {return c.end();}
+
+        template<typename Container>
+        typename Container::const_iterator last(const Container& c) {return c.end();}
+#endif
+
+        template<typename T, size_t size>
+        T* first(T (&arr) [size]) {return arr;}
+
+        template<typename T, size_t size>
+        T* last(T (&arr) [size]) {return arr + size;}
+    } //namespace internal
+}  //namespace tbb
+
+#endif // __TBB_range_iterator_H
--- a/Research/inc/tbb/internal/_tbb_strings.h
+++ b/Research/inc/tbb/internal/_tbb_strings.h
@@ -0,0 +1,65 @@
+/*
+    Copyright 2005-2015 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks. Threading Building Blocks is free software;
+    you can redistribute it and/or modify it under the terms of the GNU General Public License
+    version 2  as  published  by  the  Free Software Foundation.  Threading Building Blocks is
+    distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
+    implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+    See  the GNU General Public License for more details.   You should have received a copy of
+    the  GNU General Public License along with Threading Building Blocks; if not, write to the
+    Free Software Foundation, Inc.,  51 Franklin St,  Fifth Floor,  Boston,  MA 02110-1301 USA
+
+    As a special exception,  you may use this file  as part of a free software library without
+    restriction.  Specifically,  if other files instantiate templates  or use macros or inline
+    functions from this file, or you compile this file and link it with other files to produce
+    an executable,  this file does not by itself cause the resulting executable to be covered
+    by the GNU General Public License. This exception does not however invalidate any other
+    reasons why the executable file might be covered by the GNU General Public License.
+*/
+
+TBB_STRING_RESOURCE(FLOW_BROADCAST_NODE, "broadcast_node")
+TBB_STRING_RESOURCE(FLOW_BUFFER_NODE, "buffer_node")
+TBB_STRING_RESOURCE(FLOW_CONTINUE_NODE, "continue_node")
+TBB_STRING_RESOURCE(FLOW_FUNCTION_NODE, "function_node")
+TBB_STRING_RESOURCE(FLOW_JOIN_NODE_QUEUEING, "join_node (queueing)")
+TBB_STRING_RESOURCE(FLOW_JOIN_NODE_RESERVING, "join_node (reserving)")
+TBB_STRING_RESOURCE(FLOW_JOIN_NODE_TAG_MATCHING, "join_node (tag_matching)")
+TBB_STRING_RESOURCE(FLOW_LIMITER_NODE, "limiter_node")
+TBB_STRING_RESOURCE(FLOW_MULTIFUNCTION_NODE, "multifunction_node")
+TBB_STRING_RESOURCE(FLOW_OR_NODE, "or_node") //no longer in use, kept for backward compatibilty
+TBB_STRING_RESOURCE(FLOW_OVERWRITE_NODE, "overwrite_node")
+TBB_STRING_RESOURCE(FLOW_PRIORITY_QUEUE_NODE, "priority_queue_node")
+TBB_STRING_RESOURCE(FLOW_QUEUE_NODE, "queue_node")
+TBB_STRING_RESOURCE(FLOW_SEQUENCER_NODE, "sequencer_node")
+TBB_STRING_RESOURCE(FLOW_SOURCE_NODE, "source_node")
+TBB_STRING_RESOURCE(FLOW_SPLIT_NODE, "split_node")
+TBB_STRING_RESOURCE(FLOW_WRITE_ONCE_NODE, "write_once_node")
+TBB_STRING_RESOURCE(FLOW_BODY, "body")
+TBB_STRING_RESOURCE(FLOW_GRAPH, "graph")
+TBB_STRING_RESOURCE(FLOW_NODE, "node")
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT, "input_port")
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_0, "input_port_0")
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_1, "input_port_1")
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_2, "input_port_2")
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_3, "input_port_3")
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_4, "input_port_4")
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_5, "input_port_5")
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_6, "input_port_6")
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_7, "input_port_7")
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_8, "input_port_8")
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_9, "input_port_9")
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT, "output_port")
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_0, "output_port_0")
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_1, "output_port_1")
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_2, "output_port_2")
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_3, "output_port_3")
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_4, "output_port_4")
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_5, "output_port_5")
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_6, "output_port_6")
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_7, "output_port_7")
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_8, "output_port_8")
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_9, "output_port_9")
+TBB_STRING_RESOURCE(FLOW_OBJECT_NAME, "object_name")
+TBB_STRING_RESOURCE(FLOW_NULL, "null")
+TBB_STRING_RESOURCE(FLOW_INDEXER_NODE, "indexer_node")
--- a/Research/inc/tbb/internal/_tbb_windef.h
+++ b/Research/inc/tbb/internal/_tbb_windef.h
@@ -0,0 +1,73 @@
+/*
+    Copyright 2005-2015 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks. Threading Building Blocks is free software;
+    you can redistribute it and/or modify it under the terms of the GNU General Public License
+    version 2  as  published  by  the  Free Software Foundation.  Threading Building Blocks is
+    distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
+    implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+    See  the GNU General Public License for more details.   You should have received a copy of
+    the  GNU General Public License along with Threading Building Blocks; if not, write to the
+    Free Software Foundation, Inc.,  51 Franklin St,  Fifth Floor,  Boston,  MA 02110-1301 USA
+
+    As a special exception,  you may use this file  as part of a free software library without
+    restriction.  Specifically,  if other files instantiate templates  or use macros or inline
+    functions from this file, or you compile this file and link it with other files to produce
+    an executable,  this file does not by itself cause the resulting executable to be covered
+    by the GNU General Public License. This exception does not however invalidate any other
+    reasons why the executable file might be covered by the GNU General Public License.
+*/
+
+#ifndef __TBB_tbb_windef_H
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif /* __TBB_tbb_windef_H */
+
+// Check that the target Windows version has all API calls requried for TBB.
+// Do not increase the version in condition beyond 0x0500 without prior discussion!
+#if defined(_WIN32_WINNT) && _WIN32_WINNT<0x0501
+#error TBB is unable to run on old Windows versions; _WIN32_WINNT must be 0x0501 or greater.
+#endif
+
+#if !defined(_MT)
+#error TBB requires linkage with multithreaded C/C++ runtime library. \
+       Choose multithreaded DLL runtime in project settings, or use /MD[d] compiler switch.
+#endif
+
+// Workaround for the problem with MVSC headers failing to define namespace std
+namespace std {
+  using ::size_t; using ::ptrdiff_t;
+}
+
+#define __TBB_STRING_AUX(x) #x
+#define __TBB_STRING(x) __TBB_STRING_AUX(x)
+
+// Default setting of TBB_USE_DEBUG
+#ifdef TBB_USE_DEBUG
+#    if TBB_USE_DEBUG 
+#        if !defined(_DEBUG)
+#            pragma message(__FILE__ "(" __TBB_STRING(__LINE__) ") : Warning: Recommend using /MDd if compiling with TBB_USE_DEBUG!=0")
+#        endif
+#    else
+#        if defined(_DEBUG)
+#            pragma message(__FILE__ "(" __TBB_STRING(__LINE__) ") : Warning: Recommend using /MD if compiling with TBB_USE_DEBUG==0")
+#        endif
+#    endif
+#endif
+
+#if (__TBB_BUILD || __TBBMALLOC_BUILD) && !defined(__TBB_NO_IMPLICIT_LINKAGE)
+#define __TBB_NO_IMPLICIT_LINKAGE 1
+#endif
+
+#if _MSC_VER
+    #if !__TBB_NO_IMPLICIT_LINKAGE
+        #ifdef __TBB_LIB_NAME
+	        #pragma comment(lib, __TBB_STRING(__TBB_LIB_NAME))
+        #else
+			#ifdef _DEBUG
+				#pragma comment(lib, "tbb_debug.lib")
+			#else
+				#pragma comment(lib, "tbb.lib")
+			#endif
+        #endif
+    #endif
+#endif
--- a/Research/inc/tbb/internal/_x86_eliding_mutex_impl.h
+++ b/Research/inc/tbb/internal/_x86_eliding_mutex_impl.h
@@ -0,0 +1,148 @@
+/*
+    Copyright 2005-2015 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks. Threading Building Blocks is free software;
+    you can redistribute it and/or modify it under the terms of the GNU General Public License
+    version 2  as  published  by  the  Free Software Foundation.  Threading Building Blocks is
+    distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
+    implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+    See  the GNU General Public License for more details.   You should have received a copy of
+    the  GNU General Public License along with Threading Building Blocks; if not, write to the
+    Free Software Foundation, Inc.,  51 Franklin St,  Fifth Floor,  Boston,  MA 02110-1301 USA
+
+    As a special exception,  you may use this file  as part of a free software library without
+    restriction.  Specifically,  if other files instantiate templates  or use macros or inline
+    functions from this file, or you compile this file and link it with other files to produce
+    an executable,  this file does not by itself cause the resulting executable to be covered
+    by the GNU General Public License. This exception does not however invalidate any other
+    reasons why the executable file might be covered by the GNU General Public License.
+*/
+
+#ifndef __TBB__x86_eliding_mutex_impl_H
+#define __TBB__x86_eliding_mutex_impl_H
+
+#ifndef __TBB_spin_mutex_H
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#if ( __TBB_x86_32 || __TBB_x86_64 )
+
+namespace tbb {
+namespace interface7 {
+namespace internal {
+
+template<typename Mutex, bool is_rw>
+class padded_mutex;
+
+//! An eliding lock that occupies a single byte.
+/** A x86_eliding_mutex is an HLE-enabled spin mutex. It is recommended to
+    put the mutex on a cache line that is not shared by the data it protects.
+    It should be used for locking short critical sections where the lock is
+    contended but the data it protects are not.  If zero-initialized, the
+    mutex is considered unheld.
+    @ingroup synchronization */
+class x86_eliding_mutex : tbb::internal::mutex_copy_deprecated_and_disabled {
+    //! 0 if lock is released, 1 if lock is acquired.
+    __TBB_atomic_flag flag;
+
+    friend class padded_mutex<x86_eliding_mutex, false>;
+
+public:
+    //! Construct unacquired lock.
+    /** Equivalent to zero-initialization of *this. */
+    x86_eliding_mutex() : flag(0) {}
+
+// bug in gcc 3.x.x causes syntax error in spite of the friend declaration above.
+// Make the scoped_lock public in that case.
+#if __TBB_USE_X86_ELIDING_MUTEX || __TBB_GCC_VERSION < 40000
+#else
+    // by default we will not provide the scoped_lock interface.  The user
+    // should use the padded version of the mutex.  scoped_lock is used in
+    // padded_mutex template.
+private:
+#endif
+    // scoped_lock in padded_mutex<> is the interface to use.
+    //! Represents acquisition of a mutex.
+    class scoped_lock : tbb::internal::no_copy {
+    private:
+        //! Points to currently held mutex, or NULL if no lock is held.
+        x86_eliding_mutex* my_mutex;
+
+    public:
+        //! Construct without acquiring a mutex.
+        scoped_lock() : my_mutex(NULL) {}
+
+        //! Construct and acquire lock on a mutex.
+        scoped_lock( x86_eliding_mutex& m ) : my_mutex(NULL) { acquire(m); }
+
+        //! Acquire lock.
+        void acquire( x86_eliding_mutex& m ) {
+            __TBB_ASSERT( !my_mutex, "already holding a lock" );
+
+            my_mutex=&m;
+            my_mutex->lock();
+        }
+
+        //! Try acquiring lock (non-blocking)
+        /** Return true if lock acquired; false otherwise. */
+        bool try_acquire( x86_eliding_mutex& m ) {
+            __TBB_ASSERT( !my_mutex, "already holding a lock" );
+
+            bool result = m.try_lock();
+            if( result ) {
+                my_mutex = &m;
+            }
+            return result;
+        }
+
+        //! Release lock
+        void release() {
+            __TBB_ASSERT( my_mutex, "release on scoped_lock that is not holding a lock" );
+
+            my_mutex->unlock();
+            my_mutex = NULL;
+        }
+
+        //! Destroy lock.  If holding a lock, releases the lock first.
+        ~scoped_lock() {
+            if( my_mutex ) {
+                release();
+            }
+        }
+    };
+#if __TBB_USE_X86_ELIDING_MUTEX || __TBB_GCC_VERSION < 40000
+#else
+public:
+#endif  /* __TBB_USE_X86_ELIDING_MUTEX */
+
+    // Mutex traits
+    static const bool is_rw_mutex = false;
+    static const bool is_recursive_mutex = false;
+    static const bool is_fair_mutex = false;
+
+    // ISO C++0x compatibility methods
+
+    //! Acquire lock
+    void lock() {
+        __TBB_LockByteElided(flag);
+    }
+
+    //! Try acquiring lock (non-blocking)
+    /** Return true if lock acquired; false otherwise. */
+    bool try_lock() {
+        return __TBB_TryLockByteElided(flag);
+    }
+
+    //! Release lock
+    void unlock() {
+        __TBB_UnlockByteElided( flag );
+    }
+}; // end of x86_eliding_mutex
+
+} // namespace internal
+} // namespace interface7
+} // namespace tbb
+
+#endif /* ( __TBB_x86_32 || __TBB_x86_64 ) */
+
+#endif /* __TBB__x86_eliding_mutex_impl_H */
--- a/Research/inc/tbb/internal/_x86_rtm_rw_mutex_impl.h
+++ b/Research/inc/tbb/internal/_x86_rtm_rw_mutex_impl.h
@@ -0,0 +1,225 @@
+/*
+    Copyright 2005-2015 Intel Corporation.  All Rights Reserved.
+
+    This file is part of Threading Building Blocks. Threading Building Blocks is free software;
+    you can redistribute it and/or modify it under the terms of the GNU General Public License
+    version 2  as  published  by  the  Free Software Foundation.  Threading Building Blocks is
+    distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
+    implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+    See  the GNU General Public License for more details.   You should have received a copy of
+    the  GNU General Public License along with Threading Building Blocks; if not, write to the
+    Free Software Foundation, Inc.,  51 Franklin St,  Fifth Floor,  Boston,  MA 02110-1301 USA
+
+    As a special exception,  you may use this file  as part of a free software library without
+    restriction.  Specifically,  if other files instantiate templates  or use macros or inline
+    functions from this file, or you compile this file and link it with other files to produce
+    an executable,  this file does not by itself cause the resulting executable to be covered
+    by the GNU General Public License. This exception does not however invalidate any other
+    reasons why the executable file might be covered by the GNU General Public License.
+*/
+
+#ifndef __TBB__x86_rtm_rw_mutex_impl_H
+#define __TBB__x86_rtm_rw_mutex_impl_H
+
+#ifndef __TBB_spin_rw_mutex_H
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#if __TBB_TSX_AVAILABLE
+
+#include "../tbb_stddef.h"
+#include "../tbb_machine.h"
+#include "../tbb_profiling.h"
+#include "../spin_rw_mutex.h"
+
+namespace tbb {
+namespace interface8 {
+namespace internal {
+
+enum RTM_type {
+    RTM_not_in_mutex,
+    RTM_transacting_reader,
+    RTM_transacting_writer,
+    RTM_real_reader,
+    RTM_real_writer
+};
+
+static const unsigned long speculation_granularity = 64;
+
+//! Fast, unfair, spinning speculation-enabled reader-writer lock with backoff and
+//  writer-preference
+/** @ingroup synchronization */
+class x86_rtm_rw_mutex: private spin_rw_mutex {
+#if __TBB_USE_X86_RTM_RW_MUTEX || __TBB_GCC_VERSION < 40000
+// bug in gcc 3.x.x causes syntax error in spite of the friend declaration below.
+// Make the scoped_lock public in that case.
+public:
+#else
+private:
+#endif
+    friend class interface7::internal::padded_mutex<x86_rtm_rw_mutex,true>;
+    class scoped_lock;   // should be private 
+    friend class scoped_lock;
+private:
+    //! @cond INTERNAL
+
+    //! Internal construct unacquired mutex.
+    void __TBB_EXPORTED_METHOD internal_construct();
+
+    //! Internal acquire write lock.
+    // only_speculate == true if we're doing a try_lock, else false.
+    void __TBB_EXPORTED_METHOD internal_acquire_writer(x86_rtm_rw_mutex::scoped_lock&, bool only_speculate=false);
+
+    //! Internal acquire read lock.
+    // only_speculate == true if we're doing a try_lock, else false.
+    void __TBB_EXPORTED_METHOD internal_acquire_reader(x86_rtm_rw_mutex::scoped_lock&, bool only_speculate=false);
+
+    //! Internal upgrade reader to become a writer.
+    bool __TBB_EXPORTED_METHOD internal_upgrade( x86_rtm_rw_mutex::scoped_lock& );
+
+    //! Out of line code for downgrading a writer to a reader.
+    bool __TBB_EXPORTED_METHOD internal_downgrade( x86_rtm_rw_mutex::scoped_lock& );
+
+    //! Internal try_acquire write lock.
+    bool __TBB_EXPORTED_METHOD internal_try_acquire_writer( x86_rtm_rw_mutex::scoped_lock& );
+
+    //! Internal release lock.
+    void __TBB_EXPORTED_METHOD internal_release( x86_rtm_rw_mutex::scoped_lock& );
+
+    static x86_rtm_rw_mutex* internal_get_mutex( const spin_rw_mutex::scoped_lock& lock )
+    {
+        return static_cast<x86_rtm_rw_mutex*>( lock.internal_get_mutex() );
+    }
+    static void internal_set_mutex( spin_rw_mutex::scoped_lock& lock, spin_rw_mutex* mtx )
+    {
+        lock.internal_set_mutex( mtx );
+    }
+    //! @endcond
+public:
+    //! Construct unacquired mutex.
+    x86_rtm_rw_mutex() {
+        w_flag = false;
+#if TBB_USE_THREADING_TOOLS
+        internal_construct();
+#endif
+    }
+
+#if TBB_USE_ASSERT
+    //! Empty destructor.
+    ~x86_rtm_rw_mutex() {}
+#endif /* TBB_USE_ASSERT */
+
+    // Mutex traits
+    static const bool is_rw_mutex = true;
+    static const bool is_recursive_mutex = false;
+    static const bool is_fair_mutex = false;
+
+#if __TBB_USE_X86_RTM_RW_MUTEX || __TBB_GCC_VERSION < 40000
+#else
+    // by default we will not provide the scoped_lock interface.  The user
+    // should use the padded version of the mutex.  scoped_lock is used in
+    // padded_mutex template.
+private:
+#endif
+    //! The scoped locking pattern
+    /** It helps to avoid the common problem of forgetting to release lock.
+        It also nicely provides the "node" for queuing locks. */
+    // Speculation-enabled scoped lock for spin_rw_mutex
+    // The idea is to be able to reuse the acquire/release methods of spin_rw_mutex
+    // and its scoped lock wherever possible.  The only way to use a speculative lock is to use
+    // a scoped_lock. (because transaction_state must be local)
+
+    class scoped_lock : tbb::internal::no_copy {
+        friend class x86_rtm_rw_mutex;
+        spin_rw_mutex::scoped_lock my_scoped_lock;
+
+        RTM_type transaction_state;
+
+    public:
+        //! Construct lock that has not acquired a mutex.
+        /** Equivalent to zero-initialization of *this. */
+        scoped_lock() : my_scoped_lock(), transaction_state(RTM_not_in_mutex) {
+        }
+
+        //! Acquire lock on given mutex.
+        scoped_lock( x86_rtm_rw_mutex& m, bool write = true ) : my_scoped_lock(),
+            transaction_state(RTM_not_in_mutex) {
+            acquire(m, write);
+        }
+
+        //! Release lock (if lock is held).
+        ~scoped_lock() {
+            if(transaction_state != RTM_not_in_mutex) release();
+        }
+
+        //! Acquire lock on given mutex.
+        void acquire( x86_rtm_rw_mutex& m, bool write = true ) {
+            if( write ) m.internal_acquire_writer(*this);
+            else        m.internal_acquire_reader(*this);
+        }
+
+        //! Release lock
+        void release() {
+            x86_rtm_rw_mutex* mutex = x86_rtm_rw_mutex::internal_get_mutex(my_scoped_lock);
+            __TBB_ASSERT( mutex, "lock is not acquired" );
+            __TBB_ASSERT( transaction_state!=RTM_not_in_mutex, "lock is not acquired" );
+            return mutex->internal_release(*this);
+        }
+
+        //! Upgrade reader to become a writer.
+        /** Returns whether the upgrade happened without releasing and re-acquiring the lock */
+        bool upgrade_to_writer() {
+            x86_rtm_rw_mutex* mutex = x86_rtm_rw_mutex::internal_get_mutex(my_scoped_lock);
+            __TBB_ASSERT( mutex, "lock is not acquired" );
+            __TBB_ASSERT( transaction_state==RTM_transacting_reader || transaction_state==RTM_real_reader, "Invalid state for upgrade" );
+            return mutex->internal_upgrade(*this);
+        }
+
+        //! Downgrade writer to become a reader.
+        /** Returns whether the downgrade happened without releasing and re-acquiring the lock */
+        bool downgrade_to_reader() {
+            x86_rtm_rw_mutex* mutex = x86_rtm_rw_mutex::internal_get_mutex(my_scoped_lock);
+            __TBB_ASSERT( mutex, "lock is not acquired" );
+            __TBB_ASSERT( transaction_state==RTM_transacting_writer || transaction_state==RTM_real_writer, "Invalid state for downgrade" );
+            return mutex->internal_downgrade(*this);
+        }
+
+        //! Attempt to acquire mutex.
+        /** returns true if successful.  */
+        bool try_acquire( x86_rtm_rw_mutex& m, bool write = true ) {
+#if TBB_USE_ASSERT
+            x86_rtm_rw_mutex* mutex = x86_rtm_rw_mutex::internal_get_mutex(my_scoped_lock);
+            __TBB_ASSERT( !mutex, "lock is already acquired" );
+#endif
+            // have to assign m to our mutex.
+            // cannot set the mutex, because try_acquire in spin_rw_mutex depends on it being NULL.
+            if(write) return m.internal_try_acquire_writer(*this);
+            // speculatively acquire the lock.  If this fails, do try_acquire on the spin_rw_mutex.
+            m.internal_acquire_reader(*this, /*only_speculate=*/true);
+            if(transaction_state == RTM_transacting_reader) return true;
+            if( my_scoped_lock.try_acquire(m, false)) {
+                transaction_state = RTM_real_reader;
+                return true;
+            }
+            return false;
+        }
+
+        };  // class x86_rtm_rw_mutex::scoped_lock
+
+    // ISO C++0x compatibility methods not provided because we cannot maintain
+    // state about whether a thread is in a transaction.
+
+private:
+    char pad[speculation_granularity-sizeof(spin_rw_mutex)]; // padding
+
+    // If true, writer holds the spin_rw_mutex.
+    tbb::atomic<bool> w_flag;  // want this on a separate cache line
+
+};  // x86_rtm_rw_mutex
+
+}  // namespace internal
+}  // namespace interface8
+}  // namespace tbb
+
+#endif  /* __TBB_TSX_AVAILABLE */
+#endif /* __TBB__x86_rtm_rw_mutex_impl_H */