]> git.cworth.org Git - vogl/blob - src/voglcore/vogl_threading_win32.cpp
Initial vogl checkin
[vogl] / src / voglcore / vogl_threading_win32.cpp
1 /**************************************************************************
2  *
3  * Copyright 2013-2014 RAD Game Tools and Valve Software
4  * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  *
25  **************************************************************************/
26
27 // File: vogl_win32_threading.cpp
28 #include "vogl_core.h"
29 #include "vogl_threading_win32.h"
30 #include "vogl_winhdr.h"
31 #include <process.h>
32
33 namespace vogl
34 {
35     uint g_number_of_processors = 1;
36
37     void vogl_threading_init()
38     {
39         SYSTEM_INFO g_system_info;
40         GetSystemInfo(&g_system_info);
41
42         g_number_of_processors = math::maximum<uint>(1U, g_system_info.dwNumberOfProcessors);
43     }
44
45     vogl_thread_id_t vogl_get_current_thread_id()
46     {
47         return static_cast<vogl_thread_id_t>(GetCurrentThreadId());
48     }
49
50     void vogl_sleep(unsigned int milliseconds)
51     {
52         Sleep(milliseconds);
53     }
54
55     uint vogl_get_max_helper_threads()
56     {
57         if (g_number_of_processors > 1)
58         {
59             // use all CPU's
60             return VOGL_MIN((int)task_pool::cMaxThreads, (int)g_number_of_processors - 1);
61         }
62
63         return 0;
64     }
65
66     mutex::mutex(unsigned int spin_count)
67     {
68         VOGL_ASSUME(sizeof(mutex) >= sizeof(CRITICAL_SECTION));
69
70         void *p = m_buf;
71         CRITICAL_SECTION &m_cs = *static_cast<CRITICAL_SECTION *>(p);
72
73         BOOL status = true;
74         status = InitializeCriticalSectionAndSpinCount(&m_cs, spin_count);
75         if (!status)
76             vogl_fail("mutex::mutex: InitializeCriticalSectionAndSpinCount failed", __FILE__, __LINE__);
77
78 #ifdef VOGL_BUILD_DEBUG
79         m_lock_count = 0;
80 #endif
81     }
82
83     mutex::~mutex()
84     {
85         void *p = m_buf;
86         CRITICAL_SECTION &m_cs = *static_cast<CRITICAL_SECTION *>(p);
87
88 #ifdef VOGL_BUILD_DEBUG
89         if (m_lock_count)
90             vogl_assert("mutex::~mutex: mutex is still locked", __FILE__, __LINE__);
91 #endif
92         DeleteCriticalSection(&m_cs);
93     }
94
95     void mutex::lock()
96     {
97         void *p = m_buf;
98         CRITICAL_SECTION &m_cs = *static_cast<CRITICAL_SECTION *>(p);
99
100         EnterCriticalSection(&m_cs);
101 #ifdef VOGL_BUILD_DEBUG
102         m_lock_count++;
103 #endif
104     }
105
106     void mutex::unlock()
107     {
108         void *p = m_buf;
109         CRITICAL_SECTION &m_cs = *static_cast<CRITICAL_SECTION *>(p);
110
111 #ifdef VOGL_BUILD_DEBUG
112         if (!m_lock_count)
113             vogl_assert("mutex::unlock: mutex is not locked", __FILE__, __LINE__);
114         m_lock_count--;
115 #endif
116         LeaveCriticalSection(&m_cs);
117     }
118
119     void mutex::set_spin_count(unsigned int count)
120     {
121         void *p = m_buf;
122         CRITICAL_SECTION &m_cs = *static_cast<CRITICAL_SECTION *>(p);
123
124         SetCriticalSectionSpinCount(&m_cs, count);
125     }
126
127     void spinlock::lock(uint32 max_spins, bool yielding)
128     {
129         if (g_number_of_processors <= 1)
130             max_spins = 1;
131
132         uint32 spinCount = 0;
133         uint32 yieldCount = 0;
134
135         for (;;)
136         {
137             VOGL_ASSUME(sizeof(long) == sizeof(int32));
138             if (!InterlockedExchange((volatile long *)&m_flag, TRUE))
139                 break;
140
141             YieldProcessor();
142             YieldProcessor();
143             YieldProcessor();
144             YieldProcessor();
145             YieldProcessor();
146             YieldProcessor();
147             YieldProcessor();
148             YieldProcessor();
149
150             spinCount++;
151             if ((yielding) && (spinCount >= max_spins))
152             {
153                 switch (yieldCount)
154                 {
155                     case 0:
156                     {
157                         spinCount = 0;
158
159                         Sleep(0);
160
161                         yieldCount++;
162                         break;
163                     }
164                     case 1:
165                     {
166                         if (g_number_of_processors <= 1)
167                             spinCount = 0;
168                         else
169                             spinCount = max_spins / 2;
170
171                         Sleep(1);
172
173                         yieldCount++;
174                         break;
175                     }
176                     case 2:
177                     {
178                         if (g_number_of_processors <= 1)
179                             spinCount = 0;
180                         else
181                             spinCount = max_spins;
182
183                         Sleep(2);
184                         break;
185                     }
186                 }
187             }
188         }
189
190         VOGL_MEMORY_IMPORT_BARRIER
191     }
192
193     void spinlock::unlock()
194     {
195         VOGL_MEMORY_EXPORT_BARRIER
196
197         InterlockedExchange((volatile long *)&m_flag, FALSE);
198     }
199
200     semaphore::semaphore(int32 initialCount, int32 maximumCount, const char *pName)
201     {
202         m_handle = CreateSemaphoreA(NULL, initialCount, maximumCount, pName);
203         if (NULL == m_handle)
204         {
205             VOGL_FAIL("semaphore: CreateSemaphore() failed");
206         }
207     }
208
209     semaphore::~semaphore()
210     {
211         if (m_handle)
212         {
213             CloseHandle(m_handle);
214             m_handle = NULL;
215         }
216     }
217
218     void semaphore::release(int32 releaseCount, int32 *pPreviousCount)
219     {
220         VOGL_ASSUME(sizeof(LONG) == sizeof(int32));
221         if (0 == ReleaseSemaphore(m_handle, releaseCount, (LPLONG)pPreviousCount))
222         {
223             VOGL_FAIL("semaphore: ReleaseSemaphore() failed");
224         }
225     }
226
227     bool semaphore::try_release(int32 releaseCount, int32 *pPreviousCount)
228     {
229         VOGL_ASSUME(sizeof(LONG) == sizeof(int32));
230         return ReleaseSemaphore(m_handle, releaseCount, (LPLONG)pPreviousCount) != 0;
231     }
232
233     bool semaphore::wait(uint32 milliseconds)
234     {
235         uint32 result = WaitForSingleObject(m_handle, milliseconds);
236
237         if (WAIT_FAILED == result)
238         {
239             VOGL_FAIL("semaphore: WaitForSingleObject() failed");
240         }
241
242         return WAIT_OBJECT_0 == result;
243     }
244
245     task_pool::task_pool()
246         : m_pTask_stack(vogl_new<ts_task_stack_t>()),
247           m_num_threads(0),
248           m_tasks_available(0, 32767),
249           m_all_tasks_completed(0, 1),
250           m_total_submitted_tasks(0),
251           m_total_completed_tasks(0),
252           m_exit_flag(false)
253     {
254         utils::zero_object(m_threads);
255     }
256
257     task_pool::task_pool(uint num_threads)
258         : m_pTask_stack(vogl_new<ts_task_stack_t>()),
259           m_num_threads(0),
260           m_tasks_available(0, 32767),
261           m_all_tasks_completed(0, 1),
262           m_total_submitted_tasks(0),
263           m_total_completed_tasks(0),
264           m_exit_flag(false)
265     {
266         utils::zero_object(m_threads);
267
268         bool status = init(num_threads);
269         VOGL_VERIFY(status);
270     }
271
272     task_pool::~task_pool()
273     {
274         deinit();
275         vogl_delete(m_pTask_stack);
276     }
277
278     bool task_pool::init(uint num_threads)
279     {
280         VOGL_ASSERT(num_threads <= cMaxThreads);
281         num_threads = math::minimum<uint>(num_threads, cMaxThreads);
282
283         deinit();
284
285         bool succeeded = true;
286
287         m_num_threads = 0;
288         while (m_num_threads < num_threads)
289         {
290             m_threads[m_num_threads] = (HANDLE)_beginthreadex(NULL, 32768, thread_func, this, 0, NULL);
291             VOGL_ASSERT(m_threads[m_num_threads] != 0);
292
293             if (!m_threads[m_num_threads])
294             {
295                 succeeded = false;
296                 break;
297             }
298
299             m_num_threads++;
300         }
301
302         if (!succeeded)
303         {
304             deinit();
305             return false;
306         }
307
308         return true;
309     }
310
311     void task_pool::deinit()
312     {
313         if (m_num_threads)
314         {
315             join();
316
317             // Set exit flag, then release all threads. Each should wakeup and exit.
318             atomic_exchange32(&m_exit_flag, true);
319
320             m_tasks_available.release(m_num_threads);
321
322             // Now wait for each thread to exit.
323             for (uint i = 0; i < m_num_threads; i++)
324             {
325                 if (m_threads[i])
326                 {
327                     for (;;)
328                     {
329                         // Can be an INFINITE delay, but set at 30 seconds so this function always provably exits.
330                         DWORD result = WaitForSingleObject(m_threads[i], 30000);
331                         if ((result == WAIT_OBJECT_0) || (result == WAIT_ABANDONED))
332                             break;
333                     }
334
335                     CloseHandle(m_threads[i]);
336                     m_threads[i] = NULL;
337                 }
338             }
339
340             m_num_threads = 0;
341
342             atomic_exchange32(&m_exit_flag, false);
343         }
344
345         if (m_pTask_stack)
346             m_pTask_stack->clear();
347         m_total_submitted_tasks = 0;
348         m_total_completed_tasks = 0;
349     }
350
351     bool task_pool::queue_task(task_callback_func pFunc, uint64_t data, void *pData_ptr)
352     {
353         VOGL_ASSERT(pFunc);
354
355         task tsk;
356         tsk.m_callback = pFunc;
357         tsk.m_data = data;
358         tsk.m_pData_ptr = pData_ptr;
359         tsk.m_flags = 0;
360
361         atomic_increment32(&m_total_submitted_tasks);
362
363         if (!m_pTask_stack->try_push(tsk))
364         {
365             atomic_increment32(&m_total_completed_tasks);
366             return false;
367         }
368
369         m_tasks_available.release(1);
370
371         return true;
372     }
373
374     // It's the object's responsibility to delete pObj within the execute_task() method, if needed!
375     bool task_pool::queue_task(executable_task *pObj, uint64_t data, void *pData_ptr)
376     {
377         VOGL_ASSERT(pObj);
378
379         task tsk;
380         tsk.m_pObj = pObj;
381         tsk.m_data = data;
382         tsk.m_pData_ptr = pData_ptr;
383         tsk.m_flags = cTaskFlagObject;
384
385         atomic_increment32(&m_total_submitted_tasks);
386
387         if (!m_pTask_stack->try_push(tsk))
388         {
389             atomic_increment32(&m_total_completed_tasks);
390             return false;
391         }
392
393         m_tasks_available.release(1);
394
395         return true;
396     }
397
398     void task_pool::process_task(task &tsk)
399     {
400         if (tsk.m_flags & cTaskFlagObject)
401             tsk.m_pObj->execute_task(tsk.m_data, tsk.m_pData_ptr);
402         else
403             tsk.m_callback(tsk.m_data, tsk.m_pData_ptr);
404
405         if (atomic_increment32(&m_total_completed_tasks) == m_total_submitted_tasks)
406         {
407             // Try to signal the semaphore (the max count is 1 so this may actually fail).
408             m_all_tasks_completed.try_release();
409         }
410     }
411
412     void task_pool::join()
413     {
414         // Try to steal any outstanding tasks. This could cause one or more worker threads to wake up and immediately go back to sleep, which is wasteful but should be harmless.
415         task tsk;
416         while (m_pTask_stack->pop(tsk))
417             process_task(tsk);
418
419         // At this point the task stack is empty.
420         // Now wait for all concurrent tasks to complete. The m_all_tasks_completed semaphore has a max count of 1, so it's possible it could have saturated to 1 as the tasks
421         // where issued and asynchronously completed, so this loop may iterate a few times.
422         const int total_submitted_tasks = atomic_add32(&m_total_submitted_tasks, 0);
423         while (m_total_completed_tasks != total_submitted_tasks)
424         {
425             // If the previous (m_total_completed_tasks != total_submitted_tasks) check failed the semaphore MUST be eventually signalled once the last task completes.
426             // So I think this can actually be an INFINITE delay, but it shouldn't really matter if it's 1ms.
427             m_all_tasks_completed.wait(1);
428         }
429     }
430
431     unsigned __stdcall task_pool::thread_func(void *pContext)
432     {
433         task_pool *pPool = static_cast<task_pool *>(pContext);
434
435         for (;;)
436         {
437             if (!pPool->m_tasks_available.wait())
438                 break;
439
440             if (pPool->m_exit_flag)
441                 break;
442
443             task tsk;
444             if (pPool->m_pTask_stack->pop(tsk))
445                 pPool->process_task(tsk);
446         }
447
448         _endthreadex(0);
449         return 0;
450     }
451
452 } // namespace vogl