You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
685 lines
33 KiB
C++
685 lines
33 KiB
C++
/*
|
|
Copyright 2005-2014 Intel Corporation. All Rights Reserved.
|
|
|
|
This file is part of Threading Building Blocks. Threading Building Blocks is free software;
|
|
you can redistribute it and/or modify it under the terms of the GNU General Public License
|
|
version 2 as published by the Free Software Foundation. Threading Building Blocks is
|
|
distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
|
|
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
See the GNU General Public License for more details. You should have received a copy of
|
|
the GNU General Public License along with Threading Building Blocks; if not, write to the
|
|
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
As a special exception, you may use this file as part of a free software library without
|
|
restriction. Specifically, if other files instantiate templates or use macros or inline
|
|
functions from this file, or you compile this file and link it with other files to produce
|
|
an executable, this file does not by itself cause the resulting executable to be covered
|
|
by the GNU General Public License. This exception does not however invalidate any other
|
|
reasons why the executable file might be covered by the GNU General Public License.
|
|
*/
|
|
|
|
#ifndef _TBB_custom_scheduler_H
|
|
#define _TBB_custom_scheduler_H
|
|
|
|
#include "scheduler.h"
|
|
#include "observer_proxy.h"
|
|
#include "itt_notify.h"
|
|
|
|
namespace tbb {
|
|
namespace internal {
|
|
|
|
//! Amount of time to pause between steals.
|
|
/** The default values below were found to be best empirically for K-Means
|
|
on the 32-way Altix and 4-way (*2 for HT) fxqlin04. */
|
|
#ifdef __TBB_STEALING_PAUSE
|
|
static const long PauseTime = __TBB_STEALING_PAUSE;
|
|
#elif __TBB_ipf
|
|
static const long PauseTime = 1500;
|
|
#else
|
|
static const long PauseTime = 80;
|
|
#endif
|
|
|
|
//------------------------------------------------------------------------
|
|
//! Traits classes for scheduler
|
|
//------------------------------------------------------------------------
|
|
|
|
struct DefaultSchedulerTraits {
|
|
static const bool itt_possible = true;
|
|
static const bool has_slow_atomic = false;
|
|
};
|
|
|
|
struct IntelSchedulerTraits {
|
|
static const bool itt_possible = false;
|
|
#if __TBB_x86_32||__TBB_x86_64
|
|
static const bool has_slow_atomic = true;
|
|
#else
|
|
static const bool has_slow_atomic = false;
|
|
#endif /* __TBB_x86_32||__TBB_x86_64 */
|
|
};
|
|
|
|
//------------------------------------------------------------------------
|
|
// custom_scheduler
|
|
//------------------------------------------------------------------------
|
|
|
|
//! A scheduler with a customized evaluation loop.
|
|
/** The customization can use SchedulerTraits to make decisions without needing a run-time check. */
|
|
template<typename SchedulerTraits>
|
|
class custom_scheduler: private generic_scheduler {
|
|
typedef custom_scheduler<SchedulerTraits> scheduler_type;
|
|
|
|
//! Scheduler loop that dispatches tasks.
|
|
/** If child is non-NULL, it is dispatched first.
|
|
Then, until "parent" has a reference count of 1, other task are dispatched or stolen. */
|
|
/*override*/
|
|
void local_wait_for_all( task& parent, task* child );
|
|
|
|
//! Entry point from client code to the scheduler loop that dispatches tasks.
|
|
/** The method is virtual, but the *this object is used only for sake of dispatching on the correct vtable,
|
|
not necessarily the correct *this object. The correct *this object is looked up in TLS. */
|
|
/*override*/
|
|
void wait_for_all( task& parent, task* child ) {
|
|
static_cast<custom_scheduler*>(governor::local_scheduler())->scheduler_type::local_wait_for_all( parent, child );
|
|
}
|
|
|
|
//! Construct a custom_scheduler
|
|
custom_scheduler( arena* a, size_t index ) : generic_scheduler(a, index) {}
|
|
|
|
//! Decrements ref_count of a predecessor.
|
|
/** If it achieves 0, the predecessor is scheduled for execution.
|
|
When changing, remember that this is a hot path function. */
|
|
void tally_completion_of_predecessor( task& s, task*& bypass_slot ) {
|
|
task_prefix& p = s.prefix();
|
|
if( SchedulerTraits::itt_possible )
|
|
ITT_NOTIFY(sync_releasing, &p.ref_count);
|
|
if( SchedulerTraits::has_slow_atomic && p.ref_count==1 )
|
|
p.ref_count=0;
|
|
else if( __TBB_FetchAndDecrementWrelease(&p.ref_count) > 1 ) {// more references exist
|
|
// '__TBB_cl_evict(&p)' degraded performance of parallel_preorder example
|
|
return;
|
|
}
|
|
|
|
// Ordering on p.ref_count (superfluous if SchedulerTraits::has_slow_atomic)
|
|
__TBB_control_consistency_helper();
|
|
__TBB_ASSERT(p.ref_count==0, "completion of task caused predecessor's reference count to underflow");
|
|
if( SchedulerTraits::itt_possible )
|
|
ITT_NOTIFY(sync_acquired, &p.ref_count);
|
|
#if TBB_USE_ASSERT
|
|
p.extra_state &= ~es_ref_count_active;
|
|
#endif /* TBB_USE_ASSERT */
|
|
|
|
#if __TBB_RECYCLE_TO_ENQUEUE
|
|
if (p.state==task::to_enqueue) {
|
|
// related to __TBB_TASK_ARENA TODO: try keep priority of the task
|
|
// e.g. rework task_prefix to remember priority of received task and use here
|
|
my_arena->enqueue_task(s, 0, my_random );
|
|
} else
|
|
#endif /*__TBB_RECYCLE_TO_ENQUEUE*/
|
|
if( bypass_slot==NULL )
|
|
bypass_slot = &s;
|
|
else
|
|
local_spawn( s, s.prefix().next );
|
|
}
|
|
|
|
public:
|
|
static generic_scheduler* allocate_scheduler( arena* a, size_t index ) {
|
|
scheduler_type* s = (scheduler_type*)NFS_Allocate(1,sizeof(scheduler_type),NULL);
|
|
new( s ) scheduler_type( a, index );
|
|
s->assert_task_pool_valid();
|
|
ITT_SYNC_CREATE(s, SyncType_Scheduler, SyncObj_TaskPoolSpinning);
|
|
return s;
|
|
}
|
|
|
|
//! Try getting a task from the mailbox or stealing from another scheduler.
|
|
/** Returns the stolen task or NULL if all attempts fail. */
|
|
/* override */ task* receive_or_steal_task( __TBB_atomic reference_count& completion_ref_count, bool return_if_no_work );
|
|
|
|
}; // class custom_scheduler<>
|
|
|
|
//------------------------------------------------------------------------
|
|
// custom_scheduler methods
|
|
//------------------------------------------------------------------------
|
|
template<typename SchedulerTraits>
|
|
task* custom_scheduler<SchedulerTraits>::receive_or_steal_task( __TBB_atomic reference_count& completion_ref_count,
|
|
bool return_if_no_work ) {
|
|
task* t = NULL;
|
|
bool outermost_dispatch_level = return_if_no_work || master_outermost_level();
|
|
bool can_steal_here = can_steal();
|
|
my_inbox.set_is_idle( true );
|
|
#if __TBB_HOARD_NONLOCAL_TASKS
|
|
__TBB_ASSERT(!my_nonlocal_free_list, NULL);
|
|
#endif
|
|
#if __TBB_TASK_PRIORITY
|
|
if ( return_if_no_work && my_arena->my_skipped_fifo_priority ) {
|
|
// This thread can dequeue FIFO tasks, and some priority levels of
|
|
// FIFO tasks have been bypassed (to prevent deadlock caused by
|
|
// dynamic priority changes in nested task group hierarchy).
|
|
intptr_t skipped_priority = my_arena->my_skipped_fifo_priority;
|
|
if ( my_arena->my_skipped_fifo_priority.compare_and_swap(0, skipped_priority) == skipped_priority &&
|
|
skipped_priority > my_arena->my_top_priority )
|
|
{
|
|
my_market->update_arena_priority( *my_arena, skipped_priority );
|
|
}
|
|
}
|
|
task_stream *ts;
|
|
#else /* !__TBB_TASK_PRIORITY */
|
|
task_stream *ts = &my_arena->my_task_stream;
|
|
#endif /* !__TBB_TASK_PRIORITY */
|
|
// TODO: Try to find a place to reset my_limit (under market's lock)
|
|
// The number of slots potentially used in the arena. Updated once in a while, as my_limit changes rarely.
|
|
size_t n = my_arena->my_limit-1;
|
|
int yield_count = 0;
|
|
// The state "failure_count==-1" is used only when itt_possible is true,
|
|
// and denotes that a sync_prepare has not yet been issued.
|
|
for( int failure_count = -static_cast<int>(SchedulerTraits::itt_possible);; ++failure_count) {
|
|
__TBB_ASSERT( my_arena->my_limit > 0, NULL );
|
|
__TBB_ASSERT( my_arena_index <= n, NULL );
|
|
if( completion_ref_count==1 ) {
|
|
if( SchedulerTraits::itt_possible ) {
|
|
if( failure_count!=-1 ) {
|
|
ITT_NOTIFY(sync_prepare, &completion_ref_count);
|
|
// Notify Intel(R) Thread Profiler that thread has stopped spinning.
|
|
ITT_NOTIFY(sync_acquired, this);
|
|
}
|
|
ITT_NOTIFY(sync_acquired, &completion_ref_count);
|
|
}
|
|
__TBB_ASSERT( !t, NULL );
|
|
__TBB_control_consistency_helper(); // on ref_count
|
|
break; // exit stealing loop and return;
|
|
}
|
|
// Check if the resource manager requires our arena to relinquish some threads
|
|
if ( return_if_no_work && my_arena->my_num_workers_allotted < my_arena->num_workers_active() ) {
|
|
#if !__TBB_TASK_ARENA
|
|
__TBB_ASSERT( is_worker(), NULL );
|
|
#endif
|
|
if( SchedulerTraits::itt_possible && failure_count != -1 )
|
|
ITT_NOTIFY(sync_cancel, this);
|
|
return NULL;
|
|
}
|
|
#if __TBB_TASK_PRIORITY
|
|
ts = &my_arena->my_task_stream[my_arena->my_top_priority];
|
|
#endif
|
|
// Check if there are tasks mailed to this thread via task-to-thread affinity mechanism.
|
|
__TBB_ASSERT(my_affinity_id, NULL);
|
|
if ( n && !my_inbox.empty() && (t = get_mailbox_task()) ) {
|
|
GATHER_STATISTIC( ++my_counters.mails_received );
|
|
}
|
|
// Check if there are tasks in starvation-resistant stream.
|
|
// Only allowed for workers with empty stack, which is identified by return_if_no_work.
|
|
else if ( outermost_dispatch_level && !ts->empty() && (t = ts->pop( my_arena_slot->hint_for_pop)) ) {
|
|
ITT_NOTIFY(sync_acquired, ts);
|
|
// just proceed with the obtained task
|
|
}
|
|
#if __TBB_TASK_PRIORITY
|
|
// Check if any earlier offloaded non-top priority tasks become returned to the top level
|
|
else if ( my_offloaded_tasks && (t=reload_tasks()) ) {
|
|
// just proceed with the obtained task
|
|
}
|
|
#endif /* __TBB_TASK_PRIORITY */
|
|
else if ( can_steal_here && n ) {
|
|
// Try to steal a task from a random victim.
|
|
size_t k = my_random.get() % n;
|
|
arena_slot* victim = &my_arena->my_slots[k];
|
|
// The following condition excludes the master that might have
|
|
// already taken our previous place in the arena from the list .
|
|
// of potential victims. But since such a situation can take
|
|
// place only in case of significant oversubscription, keeping
|
|
// the checks simple seems to be preferable to complicating the code.
|
|
if( k >= my_arena_index )
|
|
++victim; // Adjusts random distribution to exclude self
|
|
task **pool = victim->task_pool;
|
|
if( pool == EmptyTaskPool || !(t = steal_task( *victim )) )
|
|
goto fail;
|
|
if( is_proxy(*t) ) {
|
|
task_proxy &tp = *(task_proxy*)t;
|
|
t = tp.extract_task<task_proxy::pool_bit>();
|
|
if ( !t ) {
|
|
// Proxy was empty, so it's our responsibility to free it
|
|
free_task<no_cache_small_task>(tp);
|
|
goto fail;
|
|
}
|
|
GATHER_STATISTIC( ++my_counters.proxies_stolen );
|
|
}
|
|
t->prefix().extra_state |= es_task_is_stolen;
|
|
if( is_version_3_task(*t) ) {
|
|
my_innermost_running_task = t;
|
|
t->prefix().owner = this;
|
|
t->note_affinity( my_affinity_id );
|
|
}
|
|
GATHER_STATISTIC( ++my_counters.steals_committed );
|
|
} // end of stealing branch
|
|
else
|
|
goto fail;
|
|
// A task was successfully obtained somewhere
|
|
__TBB_ASSERT(t,NULL);
|
|
#if __TBB_SCHEDULER_OBSERVER
|
|
my_arena->my_observers.notify_entry_observers( my_last_local_observer, is_worker() );
|
|
the_global_observer_list.notify_entry_observers( my_last_global_observer, is_worker() );
|
|
#endif /* __TBB_SCHEDULER_OBSERVER */
|
|
if ( SchedulerTraits::itt_possible && failure_count != -1 ) {
|
|
// FIXME - might be victim, or might be selected from a mailbox
|
|
// Notify Intel(R) Thread Profiler that thread has stopped spinning.
|
|
ITT_NOTIFY(sync_acquired, this);
|
|
}
|
|
break; // exit stealing loop and return
|
|
fail:
|
|
GATHER_STATISTIC( ++my_counters.steals_failed );
|
|
if( SchedulerTraits::itt_possible && failure_count==-1 ) {
|
|
// The first attempt to steal work failed, so notify Intel(R) Thread Profiler that
|
|
// the thread has started spinning. Ideally, we would do this notification
|
|
// *before* the first failed attempt to steal, but at that point we do not
|
|
// know that the steal will fail.
|
|
ITT_NOTIFY(sync_prepare, this);
|
|
failure_count = 0;
|
|
}
|
|
// Pause, even if we are going to yield, because the yield might return immediately.
|
|
__TBB_Pause(PauseTime);
|
|
const int failure_threshold = 2*int(n+1);
|
|
if( failure_count>=failure_threshold ) {
|
|
#if __TBB_YIELD2P
|
|
failure_count = 0;
|
|
#else
|
|
failure_count = failure_threshold;
|
|
#endif
|
|
__TBB_Yield();
|
|
#if __TBB_TASK_PRIORITY
|
|
// Check if there are tasks abandoned by other workers
|
|
if ( my_arena->my_orphaned_tasks ) {
|
|
// Epoch must be advanced before seizing the list pointer
|
|
++my_arena->my_abandonment_epoch;
|
|
task* orphans = (task*)__TBB_FetchAndStoreW( &my_arena->my_orphaned_tasks, 0 );
|
|
if ( orphans ) {
|
|
task** link = NULL;
|
|
// Get local counter out of the way (we've just brought in external tasks)
|
|
my_local_reload_epoch--;
|
|
t = reload_tasks( orphans, link, effective_reference_priority() );
|
|
if ( orphans ) {
|
|
*link = my_offloaded_tasks;
|
|
if ( !my_offloaded_tasks )
|
|
my_offloaded_task_list_tail_link = link;
|
|
my_offloaded_tasks = orphans;
|
|
}
|
|
__TBB_ASSERT( !my_offloaded_tasks == !my_offloaded_task_list_tail_link, NULL );
|
|
if ( t ) {
|
|
if( SchedulerTraits::itt_possible )
|
|
ITT_NOTIFY(sync_cancel, this);
|
|
break; // exit stealing loop and return
|
|
}
|
|
}
|
|
}
|
|
#endif /* __TBB_TASK_PRIORITY */
|
|
const int yield_threshold = 100;
|
|
if( yield_count++ >= yield_threshold ) {
|
|
// When a worker thread has nothing to do, return it to RML.
|
|
// For purposes of affinity support, the thread is considered idle while in RML.
|
|
#if __TBB_TASK_PRIORITY
|
|
if( return_if_no_work || my_arena->my_top_priority > my_arena->my_bottom_priority ) {
|
|
if ( my_arena->is_out_of_work() && return_if_no_work ) {
|
|
#else /* !__TBB_TASK_PRIORITY */
|
|
if ( return_if_no_work && my_arena->is_out_of_work() ) {
|
|
#endif /* !__TBB_TASK_PRIORITY */
|
|
if( SchedulerTraits::itt_possible )
|
|
ITT_NOTIFY(sync_cancel, this);
|
|
return NULL;
|
|
}
|
|
#if __TBB_TASK_PRIORITY
|
|
}
|
|
if ( my_offloaded_tasks ) {
|
|
// Safeguard against any sloppiness in managing reload epoch
|
|
// counter (e.g. on the hot path because of performance reasons).
|
|
my_local_reload_epoch--;
|
|
// Break the deadlock caused by a higher priority dispatch loop
|
|
// stealing and offloading a lower priority task. Priority check
|
|
// at the stealing moment cannot completely preclude such cases
|
|
// because priorities can changes dynamically.
|
|
if ( !return_if_no_work && *my_ref_top_priority > my_arena->my_top_priority ) {
|
|
GATHER_STATISTIC( ++my_counters.prio_ref_fixups );
|
|
my_ref_top_priority = &my_arena->my_top_priority;
|
|
// it's expected that only outermost workers can use global reload epoch
|
|
__TBB_ASSERT(!worker_outermost_level(), NULL);
|
|
__TBB_ASSERT(my_ref_reload_epoch == &my_arena->my_reload_epoch, NULL);
|
|
}
|
|
}
|
|
#endif /* __TBB_TASK_PRIORITY */
|
|
} // end of arena snapshot branch
|
|
// If several attempts did not find work, re-read the arena limit.
|
|
n = my_arena->my_limit-1;
|
|
} // end of yielding branch
|
|
} // end of nonlocal task retrieval loop
|
|
my_inbox.set_is_idle( false );
|
|
return t;
|
|
}
|
|
|
|
template<typename SchedulerTraits>
|
|
void custom_scheduler<SchedulerTraits>::local_wait_for_all( task& parent, task* child ) {
|
|
__TBB_ASSERT( governor::is_set(this), NULL );
|
|
__TBB_ASSERT( parent.ref_count() >= (child && child->parent() == &parent ? 2 : 1), "ref_count is too small" );
|
|
assert_task_pool_valid();
|
|
// Using parent's refcount in sync_prepare (in the stealing loop below) is
|
|
// a workaround for TP. We need to name it here to display correctly in Ampl.
|
|
if( SchedulerTraits::itt_possible )
|
|
ITT_SYNC_CREATE(&parent.prefix().ref_count, SyncType_Scheduler, SyncObj_TaskStealingLoop);
|
|
#if __TBB_TASK_GROUP_CONTEXT
|
|
__TBB_ASSERT( parent.prefix().context || (is_worker() && &parent == my_dummy_task), "parent task does not have context" );
|
|
#endif /* __TBB_TASK_GROUP_CONTEXT */
|
|
task* t = child;
|
|
// Constant all_local_work_done is an unreachable refcount value that prevents
|
|
// early quitting the dispatch loop. It is defined to be in the middle of the range
|
|
// of negative values representable by the reference_count type.
|
|
static const reference_count
|
|
// For normal dispatch loops
|
|
parents_work_done = 1,
|
|
// For termination dispatch loops in masters
|
|
all_local_work_done = (reference_count)3 << (sizeof(reference_count) * 8 - 2);
|
|
reference_count quit_point;
|
|
#if __TBB_TASK_PRIORITY
|
|
__TBB_ASSERT( (uintptr_t)*my_ref_top_priority < (uintptr_t)num_priority_levels, NULL );
|
|
volatile intptr_t *old_ref_top_priority = my_ref_top_priority;
|
|
// When entering nested parallelism level market level counter
|
|
// must be replaced with the one local to this arena.
|
|
volatile uintptr_t *old_ref_reload_epoch = my_ref_reload_epoch;
|
|
#endif /* __TBB_TASK_PRIORITY */
|
|
task* old_dispatching_task = my_dispatching_task;
|
|
my_dispatching_task = my_innermost_running_task;
|
|
if( master_outermost_level() ) {
|
|
// We are in the outermost task dispatch loop of a master thread or a worker which mimics master
|
|
__TBB_ASSERT( !is_worker() || my_dispatching_task != old_dispatching_task, NULL );
|
|
quit_point = &parent == my_dummy_task ? all_local_work_done : parents_work_done;
|
|
} else {
|
|
quit_point = parents_work_done;
|
|
#if __TBB_TASK_PRIORITY
|
|
if ( &parent != my_dummy_task ) {
|
|
// We are in a nested dispatch loop.
|
|
// Market or arena priority must not prevent child tasks from being
|
|
// executed so that dynamic priority changes did not cause deadlock.
|
|
my_ref_top_priority = &parent.prefix().context->my_priority;
|
|
my_ref_reload_epoch = &my_arena->my_reload_epoch;
|
|
if(my_ref_reload_epoch != old_ref_reload_epoch)
|
|
my_local_reload_epoch = *my_ref_reload_epoch-1;
|
|
}
|
|
#endif /* __TBB_TASK_PRIORITY */
|
|
}
|
|
|
|
cpu_ctl_env_helper cpu_ctl_helper;
|
|
if ( t )
|
|
cpu_ctl_helper.set_env( __TBB_CONTEXT_ARG1(t->prefix().context) );
|
|
|
|
#if TBB_USE_EXCEPTIONS
|
|
// Infinite safeguard EH loop
|
|
for (;;) {
|
|
try {
|
|
#endif /* TBB_USE_EXCEPTIONS */
|
|
// Outer loop receives tasks from global environment (via mailbox, FIFO queue(s),
|
|
// and by stealing from other threads' task pools).
|
|
// All exit points from the dispatch loop are located in its immediate scope.
|
|
for(;;) {
|
|
// Middle loop retrieves tasks from the local task pool.
|
|
for(;;) {
|
|
// Inner loop evaluates tasks coming from nesting loops and those returned
|
|
// by just executed tasks (bypassing spawn or enqueue calls).
|
|
while(t) {
|
|
__TBB_ASSERT( my_inbox.is_idle_state(false), NULL );
|
|
__TBB_ASSERT(!is_proxy(*t),"unexpected proxy");
|
|
__TBB_ASSERT( t->prefix().owner, NULL );
|
|
assert_task_valid(*t);
|
|
#if __TBB_TASK_GROUP_CONTEXT && TBB_USE_ASSERT
|
|
assert_context_valid(t->prefix().context);
|
|
if ( !t->prefix().context->my_cancellation_requested )
|
|
#endif
|
|
__TBB_ASSERT( 1L<<t->state() & (1L<<task::allocated|1L<<task::ready|1L<<task::reexecute), NULL );
|
|
assert_task_pool_valid();
|
|
#if __TBB_TASK_PRIORITY
|
|
intptr_t p = priority(*t);
|
|
if ( p != *my_ref_top_priority && (t->prefix().extra_state & es_task_enqueued) == 0) {
|
|
assert_priority_valid(p);
|
|
if ( p != my_arena->my_top_priority ) {
|
|
my_market->update_arena_priority( *my_arena, p );
|
|
}
|
|
if ( p < effective_reference_priority() ) {
|
|
if ( !my_offloaded_tasks ) {
|
|
my_offloaded_task_list_tail_link = &t->prefix().next_offloaded;
|
|
// Erase possible reference to the owner scheduler (next_offloaded is a union member)
|
|
*my_offloaded_task_list_tail_link = NULL;
|
|
}
|
|
offload_task( *t, p );
|
|
if ( in_arena() ) {
|
|
t = winnow_task_pool();
|
|
if ( t )
|
|
continue;
|
|
}
|
|
else {
|
|
// Mark arena as full to unlock arena priority level adjustment
|
|
// by arena::is_out_of_work(), and ensure worker's presence.
|
|
my_arena->advertise_new_work<false>();
|
|
}
|
|
goto stealing_ground;
|
|
}
|
|
}
|
|
#endif /* __TBB_TASK_PRIORITY */
|
|
task* t_next = NULL;
|
|
my_innermost_running_task = t;
|
|
t->prefix().owner = this;
|
|
t->prefix().state = task::executing;
|
|
#if __TBB_TASK_GROUP_CONTEXT
|
|
if ( !t->prefix().context->my_cancellation_requested )
|
|
#endif
|
|
{
|
|
GATHER_STATISTIC( ++my_counters.tasks_executed );
|
|
GATHER_STATISTIC( my_counters.avg_arena_concurrency += my_arena->num_workers_active() );
|
|
GATHER_STATISTIC( my_counters.avg_assigned_workers += my_arena->my_num_workers_allotted );
|
|
#if __TBB_TASK_PRIORITY
|
|
GATHER_STATISTIC( my_counters.avg_arena_prio += p );
|
|
GATHER_STATISTIC( my_counters.avg_market_prio += my_market->my_global_top_priority );
|
|
#endif /* __TBB_TASK_PRIORITY */
|
|
ITT_STACK(SchedulerTraits::itt_possible, callee_enter, t->prefix().context->itt_caller);
|
|
t_next = t->execute();
|
|
ITT_STACK(SchedulerTraits::itt_possible, callee_leave, t->prefix().context->itt_caller);
|
|
if (t_next) {
|
|
__TBB_ASSERT( t_next->state()==task::allocated,
|
|
"if task::execute() returns task, it must be marked as allocated" );
|
|
reset_extra_state(t_next);
|
|
#if TBB_USE_ASSERT
|
|
affinity_id next_affinity=t_next->prefix().affinity;
|
|
if (next_affinity != 0 && next_affinity != my_affinity_id)
|
|
GATHER_STATISTIC( ++my_counters.affinity_ignored );
|
|
#endif
|
|
}
|
|
}
|
|
assert_task_pool_valid();
|
|
switch( t->state() ) {
|
|
case task::executing: {
|
|
task* s = t->parent();
|
|
__TBB_ASSERT( my_innermost_running_task==t, NULL );
|
|
__TBB_ASSERT( t->prefix().ref_count==0, "Task still has children after it has been executed" );
|
|
t->~task();
|
|
if( s )
|
|
tally_completion_of_predecessor(*s, t_next);
|
|
free_task<no_hint>( *t );
|
|
assert_task_pool_valid();
|
|
break;
|
|
}
|
|
|
|
case task::recycle: // set by recycle_as_safe_continuation()
|
|
t->prefix().state = task::allocated;
|
|
#if __TBB_RECYCLE_TO_ENQUEUE
|
|
case task::to_enqueue: // set by recycle_to_enqueue()
|
|
#endif
|
|
__TBB_ASSERT( t_next != t, "a task returned from method execute() can not be recycled in another way" );
|
|
reset_extra_state(t);
|
|
// for safe continuation, need atomically decrement ref_count;
|
|
tally_completion_of_predecessor(*t, t_next);
|
|
assert_task_pool_valid();
|
|
break;
|
|
|
|
case task::reexecute: // set by recycle_to_reexecute()
|
|
__TBB_ASSERT( t_next, "reexecution requires that method execute() return another task" );
|
|
__TBB_ASSERT( t_next != t, "a task returned from method execute() can not be recycled in another way" );
|
|
t->prefix().state = task::allocated;
|
|
reset_extra_state(t);
|
|
local_spawn( *t, t->prefix().next );
|
|
assert_task_pool_valid();
|
|
break;
|
|
case task::allocated:
|
|
reset_extra_state(t);
|
|
break;
|
|
#if TBB_USE_ASSERT
|
|
case task::ready:
|
|
__TBB_ASSERT( false, "task is in READY state upon return from method execute()" );
|
|
break;
|
|
default:
|
|
__TBB_ASSERT( false, "illegal state" );
|
|
#else
|
|
default: // just to shut up some compilation warnings
|
|
break;
|
|
#endif /* TBB_USE_ASSERT */
|
|
}
|
|
GATHER_STATISTIC( t_next ? ++my_counters.spawns_bypassed : 0 );
|
|
t = t_next;
|
|
} // end of scheduler bypass loop
|
|
|
|
assert_task_pool_valid();
|
|
if ( parent.prefix().ref_count == quit_point ) {
|
|
__TBB_ASSERT( quit_point != all_local_work_done, NULL );
|
|
__TBB_control_consistency_helper(); // on ref_count
|
|
ITT_NOTIFY(sync_acquired, &parent.prefix().ref_count);
|
|
goto done;
|
|
}
|
|
if ( in_arena() ) {
|
|
t = get_task();
|
|
}
|
|
else {
|
|
__TBB_ASSERT( is_quiescent_local_task_pool_reset(), NULL );
|
|
break;
|
|
}
|
|
__TBB_ASSERT(!t || !is_proxy(*t),"unexpected proxy");
|
|
assert_task_pool_valid();
|
|
|
|
if ( !t ) break;
|
|
|
|
cpu_ctl_helper.set_env( __TBB_CONTEXT_ARG1(t->prefix().context) );
|
|
}; // end of local task pool retrieval loop
|
|
|
|
#if __TBB_TASK_PRIORITY
|
|
stealing_ground:
|
|
#endif /* __TBB_TASK_PRIORITY */
|
|
#if __TBB_HOARD_NONLOCAL_TASKS
|
|
// before stealing, previously stolen task objects are returned
|
|
for (; my_nonlocal_free_list; my_nonlocal_free_list = t ) {
|
|
t = my_nonlocal_free_list->prefix().next;
|
|
free_nonlocal_small_task( *my_nonlocal_free_list );
|
|
}
|
|
#endif
|
|
if ( quit_point == all_local_work_done ) {
|
|
__TBB_ASSERT( !in_arena() && is_quiescent_local_task_pool_reset(), NULL );
|
|
__TBB_ASSERT( !worker_outermost_level(), NULL );
|
|
my_innermost_running_task = my_dispatching_task;
|
|
my_dispatching_task = old_dispatching_task;
|
|
#if __TBB_TASK_PRIORITY
|
|
my_ref_top_priority = old_ref_top_priority;
|
|
if(my_ref_reload_epoch != old_ref_reload_epoch)
|
|
my_local_reload_epoch = *old_ref_reload_epoch-1;
|
|
my_ref_reload_epoch = old_ref_reload_epoch;
|
|
#endif /* __TBB_TASK_PRIORITY */
|
|
return;
|
|
}
|
|
// The following assertion may be falsely triggered in the presence of enqueued tasks
|
|
//__TBB_ASSERT( my_arena->my_max_num_workers > 0 || my_market->my_ref_count > 1
|
|
// || parent.prefix().ref_count == 1, "deadlock detected" );
|
|
|
|
// Dispatching task pointer is NULL *iff* this is a worker thread in its outermost
|
|
// dispatch loop (i.e. its execution stack is empty). In this case it should exit it
|
|
// either when there is no more work in the current arena, or when revoked by the market.
|
|
|
|
t = receive_or_steal_task( parent.prefix().ref_count, worker_outermost_level() );
|
|
if ( !t )
|
|
goto done;
|
|
__TBB_ASSERT(!is_proxy(*t),"unexpected proxy");
|
|
|
|
// The user can capture another the FPU settings to the context so the
|
|
// cached data in the helper can be out-of-date and we cannot do fast
|
|
// check.
|
|
cpu_ctl_helper.set_env( __TBB_CONTEXT_ARG1(t->prefix().context) );
|
|
} // end of infinite stealing loop
|
|
#if TBB_USE_EXCEPTIONS
|
|
__TBB_ASSERT( false, "Must never get here" );
|
|
} // end of try-block
|
|
TbbCatchAll( t->prefix().context );
|
|
// Complete post-processing ...
|
|
if( t->state() == task::recycle
|
|
#if __TBB_RECYCLE_TO_ENQUEUE
|
|
// TODO: the enqueue semantics gets lost below, consider reimplementing
|
|
|| t->state() == task::to_enqueue
|
|
#endif
|
|
) {
|
|
// ... for recycled tasks to atomically decrement ref_count
|
|
t->prefix().state = task::allocated;
|
|
if( SchedulerTraits::itt_possible )
|
|
ITT_NOTIFY(sync_releasing, &t->prefix().ref_count);
|
|
if( __TBB_FetchAndDecrementWrelease(&t->prefix().ref_count)==1 ) {
|
|
if( SchedulerTraits::itt_possible )
|
|
ITT_NOTIFY(sync_acquired, &t->prefix().ref_count);
|
|
}else{
|
|
t = NULL;
|
|
}
|
|
}
|
|
} // end of infinite EH loop
|
|
__TBB_ASSERT( false, "Must never get here too" );
|
|
#endif /* TBB_USE_EXCEPTIONS */
|
|
done:
|
|
my_innermost_running_task = my_dispatching_task;
|
|
my_dispatching_task = old_dispatching_task;
|
|
#if __TBB_TASK_PRIORITY
|
|
my_ref_top_priority = old_ref_top_priority;
|
|
if(my_ref_reload_epoch != old_ref_reload_epoch)
|
|
my_local_reload_epoch = *old_ref_reload_epoch-1;
|
|
my_ref_reload_epoch = old_ref_reload_epoch;
|
|
#endif /* __TBB_TASK_PRIORITY */
|
|
if ( !ConcurrentWaitsEnabled(parent) ) {
|
|
if ( parent.prefix().ref_count != parents_work_done ) {
|
|
// This is a worker that was revoked by the market.
|
|
#if __TBB_TASK_ARENA
|
|
__TBB_ASSERT( worker_outermost_level(),
|
|
"Worker thread exits nested dispatch loop prematurely" );
|
|
#else
|
|
__TBB_ASSERT( is_worker() && worker_outermost_level(),
|
|
"Worker thread exits nested dispatch loop prematurely" );
|
|
#endif
|
|
return;
|
|
}
|
|
parent.prefix().ref_count = 0;
|
|
}
|
|
#if TBB_USE_ASSERT
|
|
parent.prefix().extra_state &= ~es_ref_count_active;
|
|
#endif /* TBB_USE_ASSERT */
|
|
#if __TBB_TASK_GROUP_CONTEXT
|
|
__TBB_ASSERT(parent.prefix().context && default_context(), NULL);
|
|
task_group_context* parent_ctx = parent.prefix().context;
|
|
if ( parent_ctx->my_cancellation_requested ) {
|
|
task_group_context::exception_container_type *pe = parent_ctx->my_exception;
|
|
if ( master_outermost_level() && parent_ctx == default_context() ) {
|
|
// We are in the outermost task dispatch loop of a master thread, and
|
|
// the whole task tree has been collapsed. So we may clear cancellation data.
|
|
parent_ctx->my_cancellation_requested = 0;
|
|
// TODO: Add assertion that master's dummy task context does not have children
|
|
parent_ctx->my_state &= ~(uintptr_t)task_group_context::may_have_children;
|
|
}
|
|
if ( pe ) {
|
|
// On Windows, FPU control settings changed in the helper destructor are not visible
|
|
// outside a catch block. So restore the default settings manually before rethrowing
|
|
// the exception.
|
|
cpu_ctl_helper.restore_default();
|
|
pe->throw_self();
|
|
}
|
|
}
|
|
__TBB_ASSERT(!is_worker() || !CancellationInfoPresent(*my_dummy_task),
|
|
"Worker's dummy task context modified");
|
|
__TBB_ASSERT(!master_outermost_level() || !CancellationInfoPresent(*my_dummy_task),
|
|
"Unexpected exception or cancellation data in the master's dummy task");
|
|
#endif /* __TBB_TASK_GROUP_CONTEXT */
|
|
assert_task_pool_valid();
|
|
}
|
|
|
|
} // namespace internal
|
|
} // namespace tbb
|
|
|
|
#endif /* _TBB_custom_scheduler_H */
|