git.cworth.org Git - vogl/blob - src/voglcore/vogl_threading_win32.cpp

   1 /**************************************************************************
   2  *
   3  * Copyright 2013-2014 RAD Game Tools and Valve Software
   4  * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
   5  * All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a copy
   8  * of this software and associated documentation files (the "Software"), to deal
   9  * in the Software without restriction, including without limitation the rights
  10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11  * copies of the Software, and to permit persons to whom the Software is
  12  * furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included in
  15  * all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23  * THE SOFTWARE.
  24  *
  25  **************************************************************************/
  26
  27 // File: vogl_win32_threading.cpp
  28 #include "vogl_core.h"
  29 #include "vogl_threading_win32.h"
  30 #include "vogl_winhdr.h"
  31 #include <process.h>
  32
  33 namespace vogl
  34 {
  35     uint g_number_of_processors = 1;
  36
  37     void vogl_threading_init()
  38     {
  39         SYSTEM_INFO g_system_info;
  40         GetSystemInfo(&g_system_info);
  41
  42         g_number_of_processors = math::maximum<uint>(1U, g_system_info.dwNumberOfProcessors);
  43     }
  44
  45     vogl_thread_id_t vogl_get_current_thread_id()
  46     {
  47         return static_cast<vogl_thread_id_t>(GetCurrentThreadId());
  48     }
  49
  50     void vogl_sleep(unsigned int milliseconds)
  51     {
  52         Sleep(milliseconds);
  53     }
  54
  55     uint vogl_get_max_helper_threads()
  56     {
  57         if (g_number_of_processors > 1)
  58         {
  59             // use all CPU's
  60             return VOGL_MIN((int)task_pool::cMaxThreads, (int)g_number_of_processors - 1);
  61         }
  62
  63         return 0;
  64     }
  65
  66     mutex::mutex(unsigned int spin_count)
  67     {
  68         VOGL_ASSUME(sizeof(mutex) >= sizeof(CRITICAL_SECTION));
  69
  70         void *p = m_buf;
  71         CRITICAL_SECTION &m_cs = *static_cast<CRITICAL_SECTION *>(p);
  72
  73         BOOL status = true;
  74         status = InitializeCriticalSectionAndSpinCount(&m_cs, spin_count);
  75         if (!status)
  76             vogl_fail("mutex::mutex: InitializeCriticalSectionAndSpinCount failed", __FILE__, __LINE__);
  77
  78 #ifdef VOGL_BUILD_DEBUG
  79         m_lock_count = 0;
  80 #endif
  81     }
  82
  83     mutex::~mutex()
  84     {
  85         void *p = m_buf;
  86         CRITICAL_SECTION &m_cs = *static_cast<CRITICAL_SECTION *>(p);
  87
  88 #ifdef VOGL_BUILD_DEBUG
  89         if (m_lock_count)
  90             vogl_assert("mutex::~mutex: mutex is still locked", __FILE__, __LINE__);
  91 #endif
  92         DeleteCriticalSection(&m_cs);
  93     }
  94
  95     void mutex::lock()
  96     {
  97         void *p = m_buf;
  98         CRITICAL_SECTION &m_cs = *static_cast<CRITICAL_SECTION *>(p);
  99
 100         EnterCriticalSection(&m_cs);
 101 #ifdef VOGL_BUILD_DEBUG
 102         m_lock_count++;
 103 #endif
 104     }
 105
 106     void mutex::unlock()
 107     {
 108         void *p = m_buf;
 109         CRITICAL_SECTION &m_cs = *static_cast<CRITICAL_SECTION *>(p);
 110
 111 #ifdef VOGL_BUILD_DEBUG
 112         if (!m_lock_count)
 113             vogl_assert("mutex::unlock: mutex is not locked", __FILE__, __LINE__);
 114         m_lock_count--;
 115 #endif
 116         LeaveCriticalSection(&m_cs);
 117     }
 118
 119     void mutex::set_spin_count(unsigned int count)
 120     {
 121         void *p = m_buf;
 122         CRITICAL_SECTION &m_cs = *static_cast<CRITICAL_SECTION *>(p);
 123
 124         SetCriticalSectionSpinCount(&m_cs, count);
 125     }
 126
 127     void spinlock::lock(uint32 max_spins, bool yielding)
 128     {
 129         if (g_number_of_processors <= 1)
 130             max_spins = 1;
 131
 132         uint32 spinCount = 0;
 133         uint32 yieldCount = 0;
 134
 135         for (;;)
 136         {
 137             VOGL_ASSUME(sizeof(long) == sizeof(int32));
 138             if (!InterlockedExchange((volatile long *)&m_flag, TRUE))
 139                 break;
 140
 141             YieldProcessor();
 142             YieldProcessor();
 143             YieldProcessor();
 144             YieldProcessor();
 145             YieldProcessor();
 146             YieldProcessor();
 147             YieldProcessor();
 148             YieldProcessor();
 149
 150             spinCount++;
 151             if ((yielding) && (spinCount >= max_spins))
 152             {
 153                 switch (yieldCount)
 154                 {
 155                     case 0:
 156                     {
 157                         spinCount = 0;
 158
 159                         Sleep(0);
 160
 161                         yieldCount++;
 162                         break;
 163                     }
 164                     case 1:
 165                     {
 166                         if (g_number_of_processors <= 1)
 167                             spinCount = 0;
 168                         else
 169                             spinCount = max_spins / 2;
 170
 171                         Sleep(1);
 172
 173                         yieldCount++;
 174                         break;
 175                     }
 176                     case 2:
 177                     {
 178                         if (g_number_of_processors <= 1)
 179                             spinCount = 0;
 180                         else
 181                             spinCount = max_spins;
 182
 183                         Sleep(2);
 184                         break;
 185                     }
 186                 }
 187             }
 188         }
 189
 190         VOGL_MEMORY_IMPORT_BARRIER
 191     }
 192
 193     void spinlock::unlock()
 194     {
 195         VOGL_MEMORY_EXPORT_BARRIER
 196
 197         InterlockedExchange((volatile long *)&m_flag, FALSE);
 198     }
 199
 200     semaphore::semaphore(int32 initialCount, int32 maximumCount, const char *pName)
 201     {
 202         m_handle = CreateSemaphoreA(NULL, initialCount, maximumCount, pName);
 203         if (NULL == m_handle)
 204         {
 205             VOGL_FAIL("semaphore: CreateSemaphore() failed");
 206         }
 207     }
 208
 209     semaphore::~semaphore()
 210     {
 211         if (m_handle)
 212         {
 213             CloseHandle(m_handle);
 214             m_handle = NULL;
 215         }
 216     }
 217
 218     void semaphore::release(int32 releaseCount, int32 *pPreviousCount)
 219     {
 220         VOGL_ASSUME(sizeof(LONG) == sizeof(int32));
 221         if (0 == ReleaseSemaphore(m_handle, releaseCount, (LPLONG)pPreviousCount))
 222         {
 223             VOGL_FAIL("semaphore: ReleaseSemaphore() failed");
 224         }
 225     }
 226
 227     bool semaphore::try_release(int32 releaseCount, int32 *pPreviousCount)
 228     {
 229         VOGL_ASSUME(sizeof(LONG) == sizeof(int32));
 230         return ReleaseSemaphore(m_handle, releaseCount, (LPLONG)pPreviousCount) != 0;
 231     }
 232
 233     bool semaphore::wait(uint32 milliseconds)
 234     {
 235         uint32 result = WaitForSingleObject(m_handle, milliseconds);
 236
 237         if (WAIT_FAILED == result)
 238         {
 239             VOGL_FAIL("semaphore: WaitForSingleObject() failed");
 240         }
 241
 242         return WAIT_OBJECT_0 == result;
 243     }
 244
 245     task_pool::task_pool()
 246         : m_pTask_stack(vogl_new<ts_task_stack_t>()),
 247           m_num_threads(0),
 248           m_tasks_available(0, 32767),
 249           m_all_tasks_completed(0, 1),
 250           m_total_submitted_tasks(0),
 251           m_total_completed_tasks(0),
 252           m_exit_flag(false)
 253     {
 254         utils::zero_object(m_threads);
 255     }
 256
 257     task_pool::task_pool(uint num_threads)
 258         : m_pTask_stack(vogl_new<ts_task_stack_t>()),
 259           m_num_threads(0),
 260           m_tasks_available(0, 32767),
 261           m_all_tasks_completed(0, 1),
 262           m_total_submitted_tasks(0),
 263           m_total_completed_tasks(0),
 264           m_exit_flag(false)
 265     {
 266         utils::zero_object(m_threads);
 267
 268         bool status = init(num_threads);
 269         VOGL_VERIFY(status);
 270     }
 271
 272     task_pool::~task_pool()
 273     {
 274         deinit();
 275         vogl_delete(m_pTask_stack);
 276     }
 277
 278     bool task_pool::init(uint num_threads)
 279     {
 280         VOGL_ASSERT(num_threads <= cMaxThreads);
 281         num_threads = math::minimum<uint>(num_threads, cMaxThreads);
 282
 283         deinit();
 284
 285         bool succeeded = true;
 286
 287         m_num_threads = 0;
 288         while (m_num_threads < num_threads)
 289         {
 290             m_threads[m_num_threads] = (HANDLE)_beginthreadex(NULL, 32768, thread_func, this, 0, NULL);
 291             VOGL_ASSERT(m_threads[m_num_threads] != 0);
 292
 293             if (!m_threads[m_num_threads])
 294             {
 295                 succeeded = false;
 296                 break;
 297             }
 298
 299             m_num_threads++;
 300         }
 301
 302         if (!succeeded)
 303         {
 304             deinit();
 305             return false;
 306         }
 307
 308         return true;
 309     }
 310
 311     void task_pool::deinit()
 312     {
 313         if (m_num_threads)
 314         {
 315             join();
 316
 317             // Set exit flag, then release all threads. Each should wakeup and exit.
 318             atomic_exchange32(&m_exit_flag, true);
 319
 320             m_tasks_available.release(m_num_threads);
 321
 322             // Now wait for each thread to exit.
 323             for (uint i = 0; i < m_num_threads; i++)
 324             {
 325                 if (m_threads[i])
 326                 {
 327                     for (;;)
 328                     {
 329                         // Can be an INFINITE delay, but set at 30 seconds so this function always provably exits.
 330                         DWORD result = WaitForSingleObject(m_threads[i], 30000);
 331                         if ((result == WAIT_OBJECT_0) || (result == WAIT_ABANDONED))
 332                             break;
 333                     }
 334
 335                     CloseHandle(m_threads[i]);
 336                     m_threads[i] = NULL;
 337                 }
 338             }
 339
 340             m_num_threads = 0;
 341
 342             atomic_exchange32(&m_exit_flag, false);
 343         }
 344
 345         if (m_pTask_stack)
 346             m_pTask_stack->clear();
 347         m_total_submitted_tasks = 0;
 348         m_total_completed_tasks = 0;
 349     }
 350
 351     bool task_pool::queue_task(task_callback_func pFunc, uint64_t data, void *pData_ptr)
 352     {
 353         VOGL_ASSERT(pFunc);
 354
 355         task tsk;
 356         tsk.m_callback = pFunc;
 357         tsk.m_data = data;
 358         tsk.m_pData_ptr = pData_ptr;
 359         tsk.m_flags = 0;
 360
 361         atomic_increment32(&m_total_submitted_tasks);
 362
 363         if (!m_pTask_stack->try_push(tsk))
 364         {
 365             atomic_increment32(&m_total_completed_tasks);
 366             return false;
 367         }
 368
 369         m_tasks_available.release(1);
 370
 371         return true;
 372     }
 373
 374     // It's the object's responsibility to delete pObj within the execute_task() method, if needed!
 375     bool task_pool::queue_task(executable_task *pObj, uint64_t data, void *pData_ptr)
 376     {
 377         VOGL_ASSERT(pObj);
 378
 379         task tsk;
 380         tsk.m_pObj = pObj;
 381         tsk.m_data = data;
 382         tsk.m_pData_ptr = pData_ptr;
 383         tsk.m_flags = cTaskFlagObject;
 384
 385         atomic_increment32(&m_total_submitted_tasks);
 386
 387         if (!m_pTask_stack->try_push(tsk))
 388         {
 389             atomic_increment32(&m_total_completed_tasks);
 390             return false;
 391         }
 392
 393         m_tasks_available.release(1);
 394
 395         return true;
 396     }
 397
 398     void task_pool::process_task(task &tsk)
 399     {
 400         if (tsk.m_flags & cTaskFlagObject)
 401             tsk.m_pObj->execute_task(tsk.m_data, tsk.m_pData_ptr);
 402         else
 403             tsk.m_callback(tsk.m_data, tsk.m_pData_ptr);
 404
 405         if (atomic_increment32(&m_total_completed_tasks) == m_total_submitted_tasks)
 406         {
 407             // Try to signal the semaphore (the max count is 1 so this may actually fail).
 408             m_all_tasks_completed.try_release();
 409         }
 410     }
 411
 412     void task_pool::join()
 413     {
 414         // Try to steal any outstanding tasks. This could cause one or more worker threads to wake up and immediately go back to sleep, which is wasteful but should be harmless.
 415         task tsk;
 416         while (m_pTask_stack->pop(tsk))
 417             process_task(tsk);
 418
 419         // At this point the task stack is empty.
 420         // Now wait for all concurrent tasks to complete. The m_all_tasks_completed semaphore has a max count of 1, so it's possible it could have saturated to 1 as the tasks
 421         // where issued and asynchronously completed, so this loop may iterate a few times.
 422         const int total_submitted_tasks = atomic_add32(&m_total_submitted_tasks, 0);
 423         while (m_total_completed_tasks != total_submitted_tasks)
 424         {
 425             // If the previous (m_total_completed_tasks != total_submitted_tasks) check failed the semaphore MUST be eventually signalled once the last task completes.
 426             // So I think this can actually be an INFINITE delay, but it shouldn't really matter if it's 1ms.
 427             m_all_tasks_completed.wait(1);
 428         }
 429     }
 430
 431     unsigned __stdcall task_pool::thread_func(void *pContext)
 432     {
 433         task_pool *pPool = static_cast<task_pool *>(pContext);
 434
 435         for (;;)
 436         {
 437             if (!pPool->m_tasks_available.wait())
 438                 break;
 439
 440             if (pPool->m_exit_flag)
 441                 break;
 442
 443             task tsk;
 444             if (pPool->m_pTask_stack->pop(tsk))
 445                 pPool->process_task(tsk);
 446         }
 447
 448         _endthreadex(0);
 449         return 0;
 450     }
 451
 452 } // namespace vogl