Bullet Collision Detection & Physics Library
btThreadSupportWin32.cpp
Go to the documentation of this file.
1/*
2Bullet Continuous Collision Detection and Physics Library
3Copyright (c) 2003-2018 Erwin Coumans http://bulletphysics.com
4
5This software is provided 'as-is', without any express or implied warranty.
6In no event will the authors be held liable for any damages arising from the use of this software.
7Permission is granted to anyone to use this software for any purpose,
8including commercial applications, and to alter it and redistribute it freely,
9subject to the following restrictions:
10
111. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
122. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
133. This notice may not be removed or altered from any source distribution.
14*/
15
16#if defined(_WIN32) && BT_THREADSAFE
17
18#include "LinearMath/btScalar.h"
19#include "LinearMath/btMinMax.h"
23#include <windows.h>
24#include <stdio.h>
25
26struct btProcessorInfo
27{
29 int numCores;
30 int numNumaNodes;
31 int numL1Cache;
32 int numL2Cache;
33 int numL3Cache;
35 static const int maxNumTeamMasks = 32;
36 int numTeamMasks;
38};
39
41{
43 for (int i = 0; i < procInfo.numTeamMasks; ++i)
44 {
45 if (procMask & procInfo.processorTeamMasks[i])
46 {
47 return procInfo.processorTeamMasks[i];
48 }
49 }
50 return 0;
51}
52
54{
56 for (int i = 0; i < procInfo.numTeamMasks; ++i)
57 {
58 if (procMask & procInfo.processorTeamMasks[i])
59 {
60 return i;
61 }
62 }
63 return -1;
64}
65
67{
68 int count = 0;
69 while (bits)
70 {
71 if (bits & 1)
72 {
73 count++;
74 }
75 bits >>= 1;
76 }
77 return count;
78}
79
81
83{
84 memset(procInfo, 0, sizeof(*procInfo));
85#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) && !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
86 // Can't dlopen libraries on UWP.
87 return;
88#else
90 (Pfn_GetLogicalProcessorInformation)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation");
92 {
93 // no info
94 return;
95 }
97 DWORD bufSize = 0;
98 while (true)
99 {
101 {
102 break;
103 }
104 else
105 {
107 {
108 if (buf)
109 {
110 free(buf);
111 }
113 }
114 }
115 }
116
117 int len = bufSize / sizeof(*buf);
118 for (int i = 0; i < len; ++i)
119 {
121 switch (info->Relationship)
122 {
123 case RelationNumaNode:
124 procInfo->numNumaNodes++;
125 break;
126
128 procInfo->numCores++;
129 procInfo->numLogicalProcessors += countSetBits(info->ProcessorMask);
130 break;
131
132 case RelationCache:
133 if (info->Cache.Level == 1)
134 {
135 procInfo->numL1Cache++;
136 }
137 else if (info->Cache.Level == 2)
138 {
139 procInfo->numL2Cache++;
140 }
141 else if (info->Cache.Level == 3)
142 {
143 procInfo->numL3Cache++;
144 // processors that share L3 cache are considered to be on the same team
145 // because they can more easily work together on the same data.
146 // Large performance penalties will occur if 2 or more threads from different
147 // teams attempt to frequently read and modify the same cache lines.
148 //
149 // On the AMD Ryzen 7 CPU for example, the 8 cores on the CPU are split into
150 // 2 CCX units of 4 cores each. Each CCX has a separate L3 cache, so if both
151 // CCXs are operating on the same data, many cycles will be spent keeping the
152 // two caches coherent.
153 if (procInfo->numTeamMasks < btProcessorInfo::maxNumTeamMasks)
154 {
155 procInfo->processorTeamMasks[procInfo->numTeamMasks] = info->ProcessorMask;
156 procInfo->numTeamMasks++;
157 }
158 }
159 break;
160
162 procInfo->numPhysicalPackages++;
163 break;
164 }
165 }
166 free(buf);
167#endif
168}
169
172{
173public:
174 struct btThreadStatus
175 {
176 int m_taskId;
177 int m_commandId;
178 int m_status;
179
180 ThreadFunc m_userThreadFunc;
181 void* m_userPtr; //for taskDesc etc
182
183 void* m_threadHandle; //this one is calling 'Win32ThreadFunc'
184
185 void* m_eventStartHandle;
186 char m_eventStartHandleName[32];
187
190 };
191
192private:
195 int m_numThreads;
198
199 void startThreads(const ConstructionInfo& threadInfo);
200 void stopThreads();
201 int waitForResponse();
202
203public:
204 btThreadSupportWin32(const ConstructionInfo& threadConstructionInfo);
205 virtual ~btThreadSupportWin32();
206
207 virtual int getNumWorkerThreads() const BT_OVERRIDE { return m_numThreads; }
208 virtual int getCacheFriendlyNumThreads() const BT_OVERRIDE { return countSetBits(m_processorInfo.processorTeamMasks[0]); }
209 virtual int getLogicalToPhysicalCoreRatio() const BT_OVERRIDE { return m_processorInfo.numLogicalProcessors / m_processorInfo.numCores; }
210
211 virtual void runTask(int threadIndex, void* userData) BT_OVERRIDE;
212 virtual void waitForAllTasks() BT_OVERRIDE;
213
214 virtual btCriticalSection* createCriticalSection() BT_OVERRIDE;
215 virtual void deleteCriticalSection(btCriticalSection* criticalSection) BT_OVERRIDE;
216};
217
219{
221}
222
223btThreadSupportWin32::~btThreadSupportWin32()
224{
225 stopThreads();
226}
227
229{
230 btThreadSupportWin32::btThreadStatus* status = (btThreadSupportWin32::btThreadStatus*)lpParam;
231
232 while (1)
233 {
234 WaitForSingleObject(status->m_eventStartHandle, INFINITE);
235 void* userPtr = status->m_userPtr;
236
237 if (userPtr)
238 {
239 btAssert(status->m_status);
240 status->m_userThreadFunc(userPtr);
241 status->m_status = 2;
242 SetEvent(status->m_eventCompleteHandle);
243 }
244 else
245 {
246 //exit Thread
247 status->m_status = 3;
248 printf("Thread with taskId %i with handle %p exiting\n", status->m_taskId, status->m_threadHandle);
249 SetEvent(status->m_eventCompleteHandle);
250 break;
251 }
252 }
253 printf("Thread TERMINATED\n");
254 return 0;
255}
256
257void btThreadSupportWin32::runTask(int threadIndex, void* userData)
258{
260 btAssert(threadIndex >= 0);
262
263 threadStatus.m_commandId = 1;
264 threadStatus.m_status = 1;
265 threadStatus.m_userPtr = userData;
267
269 SetEvent(threadStatus.m_eventStartHandle);
270}
271
272int btThreadSupportWin32::waitForResponse()
273{
275
276 int last = -1;
279 last = res - WAIT_OBJECT_0;
280
282 btAssert(threadStatus.m_threadHandle);
283 btAssert(threadStatus.m_eventCompleteHandle);
284
285 //WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
286 btAssert(threadStatus.m_status > 1);
287 threadStatus.m_status = 0;
288
290 btAssert(last >= 0);
291 m_startedThreadMask &= ~(DWORD_PTR(1) << last);
292
293 return last;
294}
295
296void btThreadSupportWin32::waitForAllTasks()
297{
298 while (m_startedThreadMask)
299 {
301 }
302}
303
304void btThreadSupportWin32::startThreads(const ConstructionInfo& threadConstructionInfo)
305{
306 static int uniqueId = 0;
307 uniqueId++;
313 {
315 }
317 m_numThreads = btMin(procInfo.numLogicalProcessors, int(BT_MAX_THREAD_COUNT)) - 1; // cap to max thread count (-1 because main thread already exists)
318
322
323 // set main thread affinity
325 {
328 }
329
330 for (int i = 0; i < m_numThreads; i++)
331 {
332 printf("starting thread %d\n", i);
333
335
337 SIZE_T dwStackSize = threadConstructionInfo.m_threadStackSize;
342
343 threadStatus.m_userPtr = 0;
344
345 sprintf(threadStatus.m_eventStartHandleName, "es%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i);
346 threadStatus.m_eventStartHandle = CreateEventA(0, false, false, threadStatus.m_eventStartHandleName);
347
348 sprintf(threadStatus.m_eventCompleteHandleName, "ec%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i);
349 threadStatus.m_eventCompleteHandle = CreateEventA(0, false, false, threadStatus.m_eventCompleteHandleName);
350
351 m_completeHandles[i] = threadStatus.m_eventCompleteHandle;
352
354 //SetThreadPriority( handle, THREAD_PRIORITY_HIGHEST );
355 // highest priority -- can cause erratic performance when numThreads > numCores
356 // we don't want worker threads to be higher priority than the main thread or the main thread could get
357 // totally shut out and unable to tell the workers to stop
358 //SetThreadPriority( handle, THREAD_PRIORITY_BELOW_NORMAL );
359
360 {
361 int processorId = i + 1; // leave processor 0 for main thread
363 if (teamMask)
364 {
365 // bind each thread to only execute on processors of it's assigned team
366 // - for single-socket Intel x86 CPUs this has no effect (only a single, shared L3 cache so there is only 1 team)
367 // - for multi-socket Intel this will keep threads from migrating from one socket to another
368 // - for AMD Ryzen this will keep threads from migrating from one CCX to another
370 if (mask)
371 {
373 }
374 }
376 }
377
378 threadStatus.m_taskId = i;
379 threadStatus.m_commandId = 0;
380 threadStatus.m_status = 0;
381 threadStatus.m_threadHandle = handle;
382 threadStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
383
384 printf("started %s thread %d with threadHandle %p\n", threadConstructionInfo.m_uniqueName, i, handle);
385 }
386}
387
389void btThreadSupportWin32::stopThreads()
390{
391 for (int i = 0; i < m_activeThreadStatus.size(); i++)
392 {
394 if (threadStatus.m_status > 0)
395 {
396 WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
397 }
398
399 threadStatus.m_userPtr = NULL;
400 SetEvent(threadStatus.m_eventStartHandle);
401 WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
402
403 CloseHandle(threadStatus.m_eventCompleteHandle);
404 CloseHandle(threadStatus.m_eventStartHandle);
405 CloseHandle(threadStatus.m_threadHandle);
406 }
407
408 m_activeThreadStatus.clear();
409 m_completeHandles.clear();
410}
411
413{
414private:
416
417public:
419 {
421 }
422
424 {
426 }
427
428 void lock()
429 {
431 }
432
433 void unlock()
434 {
436 }
437};
438
439btCriticalSection* btThreadSupportWin32::createCriticalSection()
440{
441 unsigned char* mem = (unsigned char*)btAlignedAlloc(sizeof(btWin32CriticalSection), 16);
443 return cs;
444}
445
446void btThreadSupportWin32::deleteCriticalSection(btCriticalSection* criticalSection)
447{
450}
451
453{
454 return new btThreadSupportWin32(info);
455}
456
457#endif //defined(_WIN32) && BT_THREADSAFE
#define btAlignedFree(ptr)
#define btAlignedAlloc(size, alignment)
const T & btMax(const T &a, const T &b)
Definition btMinMax.h:27
const T & btMin(const T &a, const T &b)
Definition btMinMax.h:21
static int uniqueId
#define btAssert(x)
Definition btScalar.h:153
#define BT_OVERRIDE
Definition btThreads.h:26
const unsigned int BT_MAX_THREAD_COUNT
Definition btThreads.h:31
The btAlignedObjectArray template class uses a subset of the stl::vector interface for its methods It...
virtual int getCacheFriendlyNumThreads() const =0
virtual int getLogicalToPhysicalCoreRatio() const =0
virtual void waitForAllTasks()=0
static btThreadSupportInterface * create(const ConstructionInfo &info)
virtual void runTask(int threadIndex, void *userData)=0
virtual int getNumWorkerThreads() const =0