19#if BT_USE_OPENMP && BT_THREADSAFE
25#if BT_USE_PPL && BT_THREADSAFE
34#if BT_USE_TBB && BT_THREADSAFE
37#define __TBB_NO_IMPLICIT_LINKAGE 1
39#include <tbb/task_scheduler_init.h>
40#include <tbb/parallel_for.h>
41#include <tbb/blocked_range.h>
53#if __cplusplus >= 201103L
57#define USE_CPP11_ATOMICS 1
59#elif defined(_MSC_VER)
62#define USE_MSVC_INTRINSICS 1
64#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))
68#define USE_GCC_BUILTIN_ATOMICS 1
70#elif defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
73#define USE_GCC_BUILTIN_ATOMICS_OLD 1
82#define THREAD_LOCAL_STATIC thread_local static
86 std::atomic<int>* aDest =
reinterpret_cast<std::atomic<int>*
>(&
mLock);
88 return std::atomic_compare_exchange_weak_explicit(aDest, &expected,
int(1), std::memory_order_acq_rel, std::memory_order_acquire);
102 std::atomic<int>* aDest =
reinterpret_cast<std::atomic<int>*
>(&
mLock);
103 std::atomic_store_explicit(aDest,
int(0), std::memory_order_release);
106#elif USE_MSVC_INTRINSICS
108#define WIN32_LEAN_AND_MEAN
113#define THREAD_LOCAL_STATIC __declspec(thread) static
117 volatile long* aDest =
reinterpret_cast<long*
>(&
mLock);
118 return (0 == _InterlockedCompareExchange(aDest, 1, 0));
132 volatile long* aDest =
reinterpret_cast<long*
>(&
mLock);
133 _InterlockedExchange(aDest, 0);
136#elif USE_GCC_BUILTIN_ATOMICS
138#define THREAD_LOCAL_STATIC static __thread
144 const int memOrderSuccess = __ATOMIC_ACQ_REL;
145 const int memOrderFail = __ATOMIC_ACQUIRE;
146 return __atomic_compare_exchange_n(&
mLock, &expected,
int(1), weak, memOrderSuccess, memOrderFail);
160 __atomic_store_n(&
mLock,
int(0), __ATOMIC_RELEASE);
163#elif USE_GCC_BUILTIN_ATOMICS_OLD
165#define THREAD_LOCAL_STATIC static __thread
169 return __sync_bool_compare_and_swap(&
mLock,
int(0),
int(1));
184 __sync_fetch_and_and(&
mLock,
int(0));
189#error "no threading primitives defined -- unknown platform"
198 btAssert(!
"unimplemented btSpinMutex::lock() called");
203 btAssert(!
"unimplemented btSpinMutex::unlock() called");
208 btAssert(!
"unimplemented btSpinMutex::tryLock() called");
212#define THREAD_LOCAL_STATIC static
234 btAssert(!
"thread counter exceeded");
274#define BT_DETECT_BAD_THREAD_INDEX 0
276#if BT_DETECT_BAD_THREAD_INDEX
278typedef DWORD ThreadId_t;
279const static ThreadId_t kInvalidThreadId = 0;
282static ThreadId_t getDebugThreadId()
284 return GetCurrentThreadId();
292 const unsigned int kNullIndex = ~0
U;
294 if (sThreadIndex == kNullIndex)
299#if BT_DETECT_BAD_THREAD_INDEX
302 ThreadId_t tid = getDebugThreadId();
304 if (gDebugThreadIds[sThreadIndex] == kInvalidThreadId)
307 gDebugThreadIds[sThreadIndex] = tid;
311 if (gDebugThreadIds[sThreadIndex] != tid)
315 btAssert(!
"there are 2 or more threads with the same thread-index!");
391 btAssert(!
"btSetTaskScheduler must be called from the main thread!");
416#if BT_DETECT_BAD_THREAD_INDEX
422 gDebugThreadIds[i] = kInvalidThreadId;
433 btAssert(!
"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE");
443#if BT_DETECT_BAD_THREAD_INDEX
449 gDebugThreadIds[i] = kInvalidThreadId;
460 btAssert(!
"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE");
461 return body.
sumLoop(iBegin, iEnd);
480 body.forLoop(iBegin, iEnd);
485 return body.sumLoop(iBegin, iEnd);
489#if BT_USE_OPENMP && BT_THREADSAFE
504 return omp_get_max_threads();
516 omp_set_num_threads(1);
517 omp_set_num_threads(m_numThreads);
518 m_savedThreadCounter = 0;
528#pragma omp parallel for schedule(static, 1)
529 for (
int i = iBegin; i < iEnd; i += grainSize)
532 body.forLoop(i, (std::min)(i + grainSize, iEnd));
541#pragma omp parallel for schedule(static, 1) reduction(+ \
543 for (
int i = iBegin; i < iEnd; i += grainSize)
546 sum += body.sumLoop(i, (std::min)(i + grainSize, iEnd));
554#if BT_USE_TBB && BT_THREADSAFE
561 tbb::task_scheduler_init* m_tbbSchedulerInit;
567 m_tbbSchedulerInit = NULL;
569 ~btTaskSchedulerTBB()
571 if (m_tbbSchedulerInit)
573 delete m_tbbSchedulerInit;
574 m_tbbSchedulerInit = NULL;
580 return tbb::task_scheduler_init::default_num_threads();
589 if (m_tbbSchedulerInit)
592 delete m_tbbSchedulerInit;
593 m_tbbSchedulerInit = NULL;
595 m_tbbSchedulerInit =
new tbb::task_scheduler_init(m_numThreads);
596 m_savedThreadCounter = 0;
602 struct ForBodyAdapter
607 void operator()(
const tbb::blocked_range<int>& range)
const
610 mBody->
forLoop(range.begin(), range.end());
616 ForBodyAdapter tbbBody(&body);
618 tbb::parallel_for(tbb::blocked_range<int>(iBegin, iEnd, grainSize),
620 tbb::simple_partitioner());
623 struct SumBodyAdapter
629 SumBodyAdapter(
const SumBodyAdapter& src,
tbb::split) : mBody(src.mBody), mSum(
btScalar(0)) {}
630 void join(
const SumBodyAdapter& src) { mSum += src.mSum; }
631 void operator()(
const tbb::blocked_range<int>& range)
634 mSum += mBody->
sumLoop(range.begin(), range.end());
640 SumBodyAdapter tbbBody(&body);
642 tbb::parallel_deterministic_reduce(tbb::blocked_range<int>(iBegin, iEnd, grainSize), tbbBody);
649#if BT_USE_PPL && BT_THREADSAFE
656 concurrency::combinable<btScalar> m_sum;
664 return concurrency::GetProcessorCount();
674 m_numThreads = (std::max)(1, (std::min)(maxThreadCount, numThreads));
675 using namespace concurrency;
676 if (CurrentScheduler::Id() != -1)
678 CurrentScheduler::Detach();
680 SchedulerPolicy policy;
684 policy.SetConcurrencyLimits(1, 1);
685 CurrentScheduler::Create(policy);
686 CurrentScheduler::Detach();
688 policy.SetConcurrencyLimits(m_numThreads, m_numThreads);
689 CurrentScheduler::Create(policy);
690 m_savedThreadCounter = 0;
696 struct ForBodyAdapter
702 ForBodyAdapter(
const btIParallelForBody* body,
int grainSize,
int end) : mBody(body), mGrainSize(grainSize), mIndexEnd(end) {}
703 void operator()(
int i)
const
706 mBody->
forLoop(i, (std::min)(i + mGrainSize, mIndexEnd));
713 ForBodyAdapter pplBody(&body, grainSize, iEnd);
716 concurrency::parallel_for(iBegin,
722 struct SumBodyAdapter
725 concurrency::combinable<btScalar>* mSum;
729 SumBodyAdapter(
const btIParallelSumBody* body, concurrency::combinable<btScalar>*
sum,
int grainSize,
int end) : mBody(body), mSum(
sum), mGrainSize(grainSize), mIndexEnd(end) {}
730 void operator()(
int i)
const
733 mSum->local() += mBody->
sumLoop(i, (std::min)(i + mGrainSize, mIndexEnd));
741 SumBodyAdapter pplBody(&body, &m_sum, grainSize, iEnd);
744 concurrency::parallel_for(iBegin,
749 return m_sum.combine(sumFunc);
758 return &sTaskScheduler;
764#if BT_USE_OPENMP && BT_THREADSAFE
765 static btTaskSchedulerOpenMP sTaskScheduler;
766 return &sTaskScheduler;
775#if BT_USE_TBB && BT_THREADSAFE
776 static btTaskSchedulerTBB sTaskScheduler;
777 return &sTaskScheduler;
786#if BT_USE_PPL && BT_THREADSAFE
787 static btTaskSchedulerPPL sTaskScheduler;
788 return &sTaskScheduler;
static int split(btDbvtNode **leaves, int count, const btVector3 &org, const btVector3 &axis)
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
static T sum(const btAlignedObjectArray< T > &items)
btITaskScheduler * btGetTBBTaskScheduler()
void btPopThreadsAreRunning()
btITaskScheduler * btGetPPLTaskScheduler()
void btResetThreadIndexCounter()
static btITaskScheduler * gBtTaskScheduler
btScalar btParallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody &body)
static btSpinMutex gThreadsRunningCounterMutex
bool btThreadsAreRunning()
void btPushThreadsAreRunning()
static int gThreadsRunningCounter
btITaskScheduler * btGetOpenMPTaskScheduler()
unsigned int btGetCurrentThreadIndex()
static ThreadsafeCounter gThreadCounter
void btSetTaskScheduler(btITaskScheduler *ts)
btITaskScheduler * btGetTaskScheduler()
btITaskScheduler * btGetSequentialTaskScheduler()
#define THREAD_LOCAL_STATIC
void btParallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody &body)
const unsigned int BT_MAX_THREAD_COUNT
virtual void forLoop(int iBegin, int iEnd) const =0
virtual btScalar sumLoop(int iBegin, int iEnd) const =0
btITaskScheduler(const char *name)
virtual int getNumThreads() const =0
unsigned int m_savedThreadCounter
virtual int getMaxNumThreads() const =0
virtual void parallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody &body)=0
virtual void deactivate()
virtual void setNumThreads(int numThreads)=0
virtual btScalar parallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody &body)=0
btSpinMutex – lightweight spin-mutex implemented with atomic ops, never puts a thread to sleep becaus...
btTaskSchedulerSequential – non-threaded implementation of task scheduler (really just useful for tes...
virtual void setNumThreads(int numThreads) BT_OVERRIDE
virtual int getMaxNumThreads() const BT_OVERRIDE
virtual btScalar parallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody &body) BT_OVERRIDE
virtual int getNumThreads() const BT_OVERRIDE
btTaskSchedulerSequential()
virtual void parallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody &body) BT_OVERRIDE