123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298 |
- // ======================================================================== //
- // Copyright 2009-2019 Intel Corporation //
- // //
- // Licensed under the Apache License, Version 2.0 (the "License"); //
- // you may not use this file except in compliance with the License. //
- // You may obtain a copy of the License at //
- // //
- // http://www.apache.org/licenses/LICENSE-2.0 //
- // //
- // Unless required by applicable law or agreed to in writing, software //
- // distributed under the License is distributed on an "AS IS" BASIS, //
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
- // See the License for the specific language governing permissions and //
- // limitations under the License. //
- // ======================================================================== //
- #if defined(_MSC_VER)
- #pragma warning (disable : 4146) // unary minus operator applied to unsigned type, result still unsigned
- #endif
- #if defined(__APPLE__)
- #include <mach/thread_act.h>
- #include <mach/mach_init.h>
- #endif
- #include "thread.h"
- #include <fstream>
- namespace oidn {
- #if defined(_WIN32)
- // --------------------------------------------------------------------------
- // ThreadAffinity - Windows
- // --------------------------------------------------------------------------
- ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose)
- : Verbose(verbose)
- {
- HMODULE hLib = GetModuleHandle(TEXT("kernel32"));
- pGetLogicalProcessorInformationEx = (GetLogicalProcessorInformationExFunc)GetProcAddress(hLib, "GetLogicalProcessorInformationEx");
- pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hLib, "SetThreadGroupAffinity");
- if (pGetLogicalProcessorInformationEx && pSetThreadGroupAffinity)
- {
- // Get logical processor information
- PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX buffer = nullptr;
- DWORD bufferSize = 0;
- // First call the function with an empty buffer to get the required buffer size
- BOOL result = pGetLogicalProcessorInformationEx(RelationProcessorCore, buffer, &bufferSize);
- if (result || GetLastError() != ERROR_INSUFFICIENT_BUFFER)
- {
- OIDN_WARNING("GetLogicalProcessorInformationEx failed");
- return;
- }
- // Allocate the buffer
- buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)malloc(bufferSize);
- if (!buffer)
- {
- OIDN_WARNING("SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX allocation failed");
- return;
- }
- // Call again the function but now with the properly sized buffer
- result = pGetLogicalProcessorInformationEx(RelationProcessorCore, buffer, &bufferSize);
- if (!result)
- {
- OIDN_WARNING("GetLogicalProcessorInformationEx failed");
- free(buffer);
- return;
- }
- // Iterate over the logical processor information structures
- // There should be one structure for each physical core
- char* ptr = (char*)buffer;
- while (ptr < (char*)buffer + bufferSize)
- {
- PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX item = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)ptr;
- if (item->Relationship == RelationProcessorCore && item->Processor.GroupCount > 0)
- {
- // Iterate over the groups
- int numThreads = 0;
- for (int group = 0; (group < item->Processor.GroupCount) && (numThreads < numThreadsPerCore); ++group)
- {
- GROUP_AFFINITY coreAffinity = item->Processor.GroupMask[group];
- while ((coreAffinity.Mask != 0) && (numThreads < numThreadsPerCore))
- {
- // Extract the next set bit/thread from the mask
- GROUP_AFFINITY threadAffinity = coreAffinity;
- threadAffinity.Mask = threadAffinity.Mask & -threadAffinity.Mask;
- // Push the affinity for this thread
- affinities.push_back(threadAffinity);
- oldAffinities.push_back(threadAffinity);
- numThreads++;
- // Remove this bit/thread from the mask
- coreAffinity.Mask ^= threadAffinity.Mask;
- }
- }
- }
- // Next structure
- ptr += item->Size;
- }
- // Free the buffer
- free(buffer);
- }
- }
- void ThreadAffinity::set(int threadIndex)
- {
- if (threadIndex >= (int)affinities.size())
- return;
- // Save the current affinity and set the new one
- const HANDLE thread = GetCurrentThread();
- if (!pSetThreadGroupAffinity(thread, &affinities[threadIndex], &oldAffinities[threadIndex]))
- OIDN_WARNING("SetThreadGroupAffinity failed");
- }
- void ThreadAffinity::restore(int threadIndex)
- {
- if (threadIndex >= (int)affinities.size())
- return;
- // Restore the original affinity
- const HANDLE thread = GetCurrentThread();
- if (!pSetThreadGroupAffinity(thread, &oldAffinities[threadIndex], nullptr))
- OIDN_WARNING("SetThreadGroupAffinity failed");
- }
- #elif defined(__linux__)
- // --------------------------------------------------------------------------
- // ThreadAffinity - Linux
- // --------------------------------------------------------------------------
- ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose)
- : Verbose(verbose)
- {
- std::vector<int> threadIds;
- // Parse the thread/CPU topology
- for (int cpuId = 0; ; cpuId++)
- {
- std::fstream fs;
- std::string cpu = std::string("/sys/devices/system/cpu/cpu") + std::to_string(cpuId) + std::string("/topology/thread_siblings_list");
- fs.open(cpu.c_str(), std::fstream::in);
- if (fs.fail()) break;
- int i;
- int j = 0;
- while ((j < numThreadsPerCore) && (fs >> i))
- {
- if (std::none_of(threadIds.begin(), threadIds.end(), [&](int id) { return id == i; }))
- threadIds.push_back(i);
- if (fs.peek() == ',')
- fs.ignore();
- j++;
- }
- fs.close();
- }
- #if 0
- for (size_t i = 0; i < thread_ids.size(); ++i)
- std::cout << "thread " << i << " -> " << thread_ids[i] << std::endl;
- #endif
- // Create the affinity structures
- affinities.resize(threadIds.size());
- oldAffinities.resize(threadIds.size());
- for (size_t i = 0; i < threadIds.size(); ++i)
- {
- cpu_set_t affinity;
- CPU_ZERO(&affinity);
- CPU_SET(threadIds[i], &affinity);
- affinities[i] = affinity;
- oldAffinities[i] = affinity;
- }
- }
- void ThreadAffinity::set(int threadIndex)
- {
- if (threadIndex >= (int)affinities.size())
- return;
- const pthread_t thread = pthread_self();
- // Save the current affinity
- if (pthread_getaffinity_np(thread, sizeof(cpu_set_t), &oldAffinities[threadIndex]) != 0)
- {
- OIDN_WARNING("pthread_getaffinity_np failed");
- oldAffinities[threadIndex] = affinities[threadIndex];
- return;
- }
- // Set the new affinity
- if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &affinities[threadIndex]) != 0)
- OIDN_WARNING("pthread_setaffinity_np failed");
- }
- void ThreadAffinity::restore(int threadIndex)
- {
- if (threadIndex >= (int)affinities.size())
- return;
- const pthread_t thread = pthread_self();
- // Restore the original affinity
- if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &oldAffinities[threadIndex]) != 0)
- OIDN_WARNING("pthread_setaffinity_np failed");
- }
- #elif defined(__APPLE__)
- // --------------------------------------------------------------------------
- // ThreadAffinity - macOS
- // --------------------------------------------------------------------------
- ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose)
- : Verbose(verbose)
- {
- // Query the thread/CPU topology
- int numPhysicalCpus;
- int numLogicalCpus;
- if (!getSysctl("hw.physicalcpu", numPhysicalCpus) || !getSysctl("hw.logicalcpu", numLogicalCpus))
- {
- OIDN_WARNING("sysctlbyname failed");
- return;
- }
- if ((numLogicalCpus % numPhysicalCpus != 0) && (numThreadsPerCore > 1))
- return; // this shouldn't happen
- const int maxThreadsPerCore = numLogicalCpus / numPhysicalCpus;
- // Create the affinity structures
- // macOS doesn't support binding a thread to a specific core, but we can at least group threads which
- // should be on the same core together
- for (int core = 1; core <= numPhysicalCpus; ++core) // tags start from 1!
- {
- thread_affinity_policy affinity;
- affinity.affinity_tag = core;
- for (int thread = 0; thread < min(numThreadsPerCore, maxThreadsPerCore); ++thread)
- {
- affinities.push_back(affinity);
- oldAffinities.push_back(affinity);
- }
- }
- }
- void ThreadAffinity::set(int threadIndex)
- {
- if (threadIndex >= (int)affinities.size())
- return;
- const auto thread = mach_thread_self();
- // Save the current affinity
- mach_msg_type_number_t policyCount = THREAD_AFFINITY_POLICY_COUNT;
- boolean_t getDefault = FALSE;
- if (thread_policy_get(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&oldAffinities[threadIndex], &policyCount, &getDefault) != KERN_SUCCESS)
- {
- OIDN_WARNING("thread_policy_get failed");
- oldAffinities[threadIndex] = affinities[threadIndex];
- return;
- }
- // Set the new affinity
- if (thread_policy_set(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&affinities[threadIndex], THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)
- OIDN_WARNING("thread_policy_set failed");
- }
- void ThreadAffinity::restore(int threadIndex)
- {
- if (threadIndex >= (int)affinities.size())
- return;
- const auto thread = mach_thread_self();
- // Restore the original affinity
- if (thread_policy_set(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&oldAffinities[threadIndex], THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)
- OIDN_WARNING("thread_policy_set failed");
- }
- #endif
- } // namespace oidn
|