thread.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. // ======================================================================== //
  2. // Copyright 2009-2019 Intel Corporation //
  3. // //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); //
  5. // you may not use this file except in compliance with the License. //
  6. // You may obtain a copy of the License at //
  7. // //
  8. // http://www.apache.org/licenses/LICENSE-2.0 //
  9. // //
  10. // Unless required by applicable law or agreed to in writing, software //
  11. // distributed under the License is distributed on an "AS IS" BASIS, //
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
  13. // See the License for the specific language governing permissions and //
  14. // limitations under the License. //
  15. // ======================================================================== //
  16. #if defined(_MSC_VER)
  17. #pragma warning (disable : 4146) // unary minus operator applied to unsigned type, result still unsigned
  18. #endif
  19. #if defined(__APPLE__)
  20. #include <mach/thread_act.h>
  21. #include <mach/mach_init.h>
  22. #endif
  23. #include "thread.h"
  24. #include <fstream>
  25. namespace oidn {
  26. #if defined(_WIN32)
  27. // --------------------------------------------------------------------------
  28. // ThreadAffinity - Windows
  29. // --------------------------------------------------------------------------
  30. ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose)
  31. : Verbose(verbose)
  32. {
  33. HMODULE hLib = GetModuleHandle(TEXT("kernel32"));
  34. pGetLogicalProcessorInformationEx = (GetLogicalProcessorInformationExFunc)GetProcAddress(hLib, "GetLogicalProcessorInformationEx");
  35. pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hLib, "SetThreadGroupAffinity");
  36. if (pGetLogicalProcessorInformationEx && pSetThreadGroupAffinity)
  37. {
  38. // Get logical processor information
  39. PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX buffer = nullptr;
  40. DWORD bufferSize = 0;
  41. // First call the function with an empty buffer to get the required buffer size
  42. BOOL result = pGetLogicalProcessorInformationEx(RelationProcessorCore, buffer, &bufferSize);
  43. if (result || GetLastError() != ERROR_INSUFFICIENT_BUFFER)
  44. {
  45. OIDN_WARNING("GetLogicalProcessorInformationEx failed");
  46. return;
  47. }
  48. // Allocate the buffer
  49. buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)malloc(bufferSize);
  50. if (!buffer)
  51. {
  52. OIDN_WARNING("SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX allocation failed");
  53. return;
  54. }
  55. // Call again the function but now with the properly sized buffer
  56. result = pGetLogicalProcessorInformationEx(RelationProcessorCore, buffer, &bufferSize);
  57. if (!result)
  58. {
  59. OIDN_WARNING("GetLogicalProcessorInformationEx failed");
  60. free(buffer);
  61. return;
  62. }
  63. // Iterate over the logical processor information structures
  64. // There should be one structure for each physical core
  65. char* ptr = (char*)buffer;
  66. while (ptr < (char*)buffer + bufferSize)
  67. {
  68. PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX item = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)ptr;
  69. if (item->Relationship == RelationProcessorCore && item->Processor.GroupCount > 0)
  70. {
  71. // Iterate over the groups
  72. int numThreads = 0;
  73. for (int group = 0; (group < item->Processor.GroupCount) && (numThreads < numThreadsPerCore); ++group)
  74. {
  75. GROUP_AFFINITY coreAffinity = item->Processor.GroupMask[group];
  76. while ((coreAffinity.Mask != 0) && (numThreads < numThreadsPerCore))
  77. {
  78. // Extract the next set bit/thread from the mask
  79. GROUP_AFFINITY threadAffinity = coreAffinity;
  80. threadAffinity.Mask = threadAffinity.Mask & -threadAffinity.Mask;
  81. // Push the affinity for this thread
  82. affinities.push_back(threadAffinity);
  83. oldAffinities.push_back(threadAffinity);
  84. numThreads++;
  85. // Remove this bit/thread from the mask
  86. coreAffinity.Mask ^= threadAffinity.Mask;
  87. }
  88. }
  89. }
  90. // Next structure
  91. ptr += item->Size;
  92. }
  93. // Free the buffer
  94. free(buffer);
  95. }
  96. }
  97. void ThreadAffinity::set(int threadIndex)
  98. {
  99. if (threadIndex >= (int)affinities.size())
  100. return;
  101. // Save the current affinity and set the new one
  102. const HANDLE thread = GetCurrentThread();
  103. if (!pSetThreadGroupAffinity(thread, &affinities[threadIndex], &oldAffinities[threadIndex]))
  104. OIDN_WARNING("SetThreadGroupAffinity failed");
  105. }
  106. void ThreadAffinity::restore(int threadIndex)
  107. {
  108. if (threadIndex >= (int)affinities.size())
  109. return;
  110. // Restore the original affinity
  111. const HANDLE thread = GetCurrentThread();
  112. if (!pSetThreadGroupAffinity(thread, &oldAffinities[threadIndex], nullptr))
  113. OIDN_WARNING("SetThreadGroupAffinity failed");
  114. }
  115. #elif defined(__linux__)
  116. // --------------------------------------------------------------------------
  117. // ThreadAffinity - Linux
  118. // --------------------------------------------------------------------------
  119. ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose)
  120. : Verbose(verbose)
  121. {
  122. std::vector<int> threadIds;
  123. // Parse the thread/CPU topology
  124. for (int cpuId = 0; ; cpuId++)
  125. {
  126. std::fstream fs;
  127. std::string cpu = std::string("/sys/devices/system/cpu/cpu") + std::to_string(cpuId) + std::string("/topology/thread_siblings_list");
  128. fs.open(cpu.c_str(), std::fstream::in);
  129. if (fs.fail()) break;
  130. int i;
  131. int j = 0;
  132. while ((j < numThreadsPerCore) && (fs >> i))
  133. {
  134. if (std::none_of(threadIds.begin(), threadIds.end(), [&](int id) { return id == i; }))
  135. threadIds.push_back(i);
  136. if (fs.peek() == ',')
  137. fs.ignore();
  138. j++;
  139. }
  140. fs.close();
  141. }
  142. #if 0
  143. for (size_t i = 0; i < thread_ids.size(); ++i)
  144. std::cout << "thread " << i << " -> " << thread_ids[i] << std::endl;
  145. #endif
  146. // Create the affinity structures
  147. affinities.resize(threadIds.size());
  148. oldAffinities.resize(threadIds.size());
  149. for (size_t i = 0; i < threadIds.size(); ++i)
  150. {
  151. cpu_set_t affinity;
  152. CPU_ZERO(&affinity);
  153. CPU_SET(threadIds[i], &affinity);
  154. affinities[i] = affinity;
  155. oldAffinities[i] = affinity;
  156. }
  157. }
  158. void ThreadAffinity::set(int threadIndex)
  159. {
  160. if (threadIndex >= (int)affinities.size())
  161. return;
  162. const pthread_t thread = pthread_self();
  163. // Save the current affinity
  164. if (pthread_getaffinity_np(thread, sizeof(cpu_set_t), &oldAffinities[threadIndex]) != 0)
  165. {
  166. OIDN_WARNING("pthread_getaffinity_np failed");
  167. oldAffinities[threadIndex] = affinities[threadIndex];
  168. return;
  169. }
  170. // Set the new affinity
  171. if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &affinities[threadIndex]) != 0)
  172. OIDN_WARNING("pthread_setaffinity_np failed");
  173. }
  174. void ThreadAffinity::restore(int threadIndex)
  175. {
  176. if (threadIndex >= (int)affinities.size())
  177. return;
  178. const pthread_t thread = pthread_self();
  179. // Restore the original affinity
  180. if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &oldAffinities[threadIndex]) != 0)
  181. OIDN_WARNING("pthread_setaffinity_np failed");
  182. }
  183. #elif defined(__APPLE__)
  184. // --------------------------------------------------------------------------
  185. // ThreadAffinity - macOS
  186. // --------------------------------------------------------------------------
  187. ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose)
  188. : Verbose(verbose)
  189. {
  190. // Query the thread/CPU topology
  191. int numPhysicalCpus;
  192. int numLogicalCpus;
  193. if (!getSysctl("hw.physicalcpu", numPhysicalCpus) || !getSysctl("hw.logicalcpu", numLogicalCpus))
  194. {
  195. OIDN_WARNING("sysctlbyname failed");
  196. return;
  197. }
  198. if ((numLogicalCpus % numPhysicalCpus != 0) && (numThreadsPerCore > 1))
  199. return; // this shouldn't happen
  200. const int maxThreadsPerCore = numLogicalCpus / numPhysicalCpus;
  201. // Create the affinity structures
  202. // macOS doesn't support binding a thread to a specific core, but we can at least group threads which
  203. // should be on the same core together
  204. for (int core = 1; core <= numPhysicalCpus; ++core) // tags start from 1!
  205. {
  206. thread_affinity_policy affinity;
  207. affinity.affinity_tag = core;
  208. for (int thread = 0; thread < min(numThreadsPerCore, maxThreadsPerCore); ++thread)
  209. {
  210. affinities.push_back(affinity);
  211. oldAffinities.push_back(affinity);
  212. }
  213. }
  214. }
  215. void ThreadAffinity::set(int threadIndex)
  216. {
  217. if (threadIndex >= (int)affinities.size())
  218. return;
  219. const auto thread = mach_thread_self();
  220. // Save the current affinity
  221. mach_msg_type_number_t policyCount = THREAD_AFFINITY_POLICY_COUNT;
  222. boolean_t getDefault = FALSE;
  223. if (thread_policy_get(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&oldAffinities[threadIndex], &policyCount, &getDefault) != KERN_SUCCESS)
  224. {
  225. OIDN_WARNING("thread_policy_get failed");
  226. oldAffinities[threadIndex] = affinities[threadIndex];
  227. return;
  228. }
  229. // Set the new affinity
  230. if (thread_policy_set(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&affinities[threadIndex], THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)
  231. OIDN_WARNING("thread_policy_set failed");
  232. }
  233. void ThreadAffinity::restore(int threadIndex)
  234. {
  235. if (threadIndex >= (int)affinities.size())
  236. return;
  237. const auto thread = mach_thread_self();
  238. // Restore the original affinity
  239. if (thread_policy_set(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&oldAffinities[threadIndex], THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)
  240. OIDN_WARNING("thread_policy_set failed");
  241. }
  242. #endif
  243. } // namespace oidn