win_cpu.cpp 24 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106
  1. /*
  2. ===========================================================================
  3. Doom 3 BFG Edition GPL Source Code
  4. Copyright (C) 1993-2012 id Software LLC, a ZeniMax Media company.
  5. This file is part of the Doom 3 BFG Edition GPL Source Code ("Doom 3 BFG Edition Source Code").
  6. Doom 3 BFG Edition Source Code is free software: you can redistribute it and/or modify
  7. it under the terms of the GNU General Public License as published by
  8. the Free Software Foundation, either version 3 of the License, or
  9. (at your option) any later version.
  10. Doom 3 BFG Edition Source Code is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. GNU General Public License for more details.
  14. You should have received a copy of the GNU General Public License
  15. along with Doom 3 BFG Edition Source Code. If not, see <http://www.gnu.org/licenses/>.
  16. In addition, the Doom 3 BFG Edition Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 BFG Edition Source Code. If not, please request a copy in writing from id Software at the address below.
  17. If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
  18. ===========================================================================
  19. */
  20. #pragma hdrstop
  21. #include "../../idlib/precompiled.h"
  22. #include "win_local.h"
  23. #pragma warning(disable:4740) // warning C4740: flow in or out of inline asm code suppresses global optimization
  24. #pragma warning(disable:4731) // warning C4731: 'XXX' : frame pointer register 'ebx' modified by inline assembly code
  25. /*
  26. ==============================================================
  27. Clock ticks
  28. ==============================================================
  29. */
  30. /*
  31. ================
  32. Sys_GetClockTicks
  33. ================
  34. */
  35. double Sys_GetClockTicks() {
  36. #if 0
  37. LARGE_INTEGER li;
  38. QueryPerformanceCounter( &li );
  39. return = (double ) li.LowPart + (double) 0xFFFFFFFF * li.HighPart;
  40. #else
  41. unsigned long lo, hi;
  42. __asm {
  43. push ebx
  44. xor eax, eax
  45. cpuid
  46. rdtsc
  47. mov lo, eax
  48. mov hi, edx
  49. pop ebx
  50. }
  51. return (double ) lo + (double) 0xFFFFFFFF * hi;
  52. #endif
  53. }
  54. /*
  55. ================
  56. Sys_ClockTicksPerSecond
  57. ================
  58. */
  59. double Sys_ClockTicksPerSecond() {
  60. static double ticks = 0;
  61. #if 0
  62. if ( !ticks ) {
  63. LARGE_INTEGER li;
  64. QueryPerformanceFrequency( &li );
  65. ticks = li.QuadPart;
  66. }
  67. #else
  68. if ( !ticks ) {
  69. HKEY hKey;
  70. LPBYTE ProcSpeed;
  71. DWORD buflen, ret;
  72. if ( !RegOpenKeyEx( HKEY_LOCAL_MACHINE, "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", 0, KEY_READ, &hKey ) ) {
  73. ProcSpeed = 0;
  74. buflen = sizeof( ProcSpeed );
  75. ret = RegQueryValueEx( hKey, "~MHz", NULL, NULL, (LPBYTE) &ProcSpeed, &buflen );
  76. // If we don't succeed, try some other spellings.
  77. if ( ret != ERROR_SUCCESS ) {
  78. ret = RegQueryValueEx( hKey, "~Mhz", NULL, NULL, (LPBYTE) &ProcSpeed, &buflen );
  79. }
  80. if ( ret != ERROR_SUCCESS ) {
  81. ret = RegQueryValueEx( hKey, "~mhz", NULL, NULL, (LPBYTE) &ProcSpeed, &buflen );
  82. }
  83. RegCloseKey( hKey );
  84. if ( ret == ERROR_SUCCESS ) {
  85. ticks = (double) ((unsigned long)ProcSpeed) * 1000000;
  86. }
  87. }
  88. }
  89. #endif
  90. return ticks;
  91. }
  92. /*
  93. ==============================================================
  94. CPU
  95. ==============================================================
  96. */
  97. /*
  98. ================
  99. HasCPUID
  100. ================
  101. */
  102. static bool HasCPUID() {
  103. __asm
  104. {
  105. pushfd // save eflags
  106. pop eax
  107. test eax, 0x00200000 // check ID bit
  108. jz set21 // bit 21 is not set, so jump to set_21
  109. and eax, 0xffdfffff // clear bit 21
  110. push eax // save new value in register
  111. popfd // store new value in flags
  112. pushfd
  113. pop eax
  114. test eax, 0x00200000 // check ID bit
  115. jz good
  116. jmp err // cpuid not supported
  117. set21:
  118. or eax, 0x00200000 // set ID bit
  119. push eax // store new value
  120. popfd // store new value in EFLAGS
  121. pushfd
  122. pop eax
  123. test eax, 0x00200000 // if bit 21 is on
  124. jnz good
  125. jmp err
  126. }
  127. err:
  128. return false;
  129. good:
  130. return true;
  131. }
  132. #define _REG_EAX 0
  133. #define _REG_EBX 1
  134. #define _REG_ECX 2
  135. #define _REG_EDX 3
  136. /*
  137. ================
  138. CPUID
  139. ================
  140. */
  141. static void CPUID( int func, unsigned regs[4] ) {
  142. unsigned regEAX, regEBX, regECX, regEDX;
  143. __asm pusha
  144. __asm mov eax, func
  145. __asm __emit 00fh
  146. __asm __emit 0a2h
  147. __asm mov regEAX, eax
  148. __asm mov regEBX, ebx
  149. __asm mov regECX, ecx
  150. __asm mov regEDX, edx
  151. __asm popa
  152. regs[_REG_EAX] = regEAX;
  153. regs[_REG_EBX] = regEBX;
  154. regs[_REG_ECX] = regECX;
  155. regs[_REG_EDX] = regEDX;
  156. }
  157. /*
  158. ================
  159. IsAMD
  160. ================
  161. */
  162. static bool IsAMD() {
  163. char pstring[16];
  164. char processorString[13];
  165. // get name of processor
  166. CPUID( 0, ( unsigned int * ) pstring );
  167. processorString[0] = pstring[4];
  168. processorString[1] = pstring[5];
  169. processorString[2] = pstring[6];
  170. processorString[3] = pstring[7];
  171. processorString[4] = pstring[12];
  172. processorString[5] = pstring[13];
  173. processorString[6] = pstring[14];
  174. processorString[7] = pstring[15];
  175. processorString[8] = pstring[8];
  176. processorString[9] = pstring[9];
  177. processorString[10] = pstring[10];
  178. processorString[11] = pstring[11];
  179. processorString[12] = 0;
  180. if ( strcmp( processorString, "AuthenticAMD" ) == 0 ) {
  181. return true;
  182. }
  183. return false;
  184. }
  185. /*
  186. ================
  187. HasCMOV
  188. ================
  189. */
  190. static bool HasCMOV() {
  191. unsigned regs[4];
  192. // get CPU feature bits
  193. CPUID( 1, regs );
  194. // bit 15 of EDX denotes CMOV existence
  195. if ( regs[_REG_EDX] & ( 1 << 15 ) ) {
  196. return true;
  197. }
  198. return false;
  199. }
  200. /*
  201. ================
  202. Has3DNow
  203. ================
  204. */
  205. static bool Has3DNow() {
  206. unsigned regs[4];
  207. // check AMD-specific functions
  208. CPUID( 0x80000000, regs );
  209. if ( regs[_REG_EAX] < 0x80000000 ) {
  210. return false;
  211. }
  212. // bit 31 of EDX denotes 3DNow! support
  213. CPUID( 0x80000001, regs );
  214. if ( regs[_REG_EDX] & ( 1 << 31 ) ) {
  215. return true;
  216. }
  217. return false;
  218. }
  219. /*
  220. ================
  221. HasMMX
  222. ================
  223. */
  224. static bool HasMMX() {
  225. unsigned regs[4];
  226. // get CPU feature bits
  227. CPUID( 1, regs );
  228. // bit 23 of EDX denotes MMX existence
  229. if ( regs[_REG_EDX] & ( 1 << 23 ) ) {
  230. return true;
  231. }
  232. return false;
  233. }
  234. /*
  235. ================
  236. HasSSE
  237. ================
  238. */
  239. static bool HasSSE() {
  240. unsigned regs[4];
  241. // get CPU feature bits
  242. CPUID( 1, regs );
  243. // bit 25 of EDX denotes SSE existence
  244. if ( regs[_REG_EDX] & ( 1 << 25 ) ) {
  245. return true;
  246. }
  247. return false;
  248. }
  249. /*
  250. ================
  251. HasSSE2
  252. ================
  253. */
  254. static bool HasSSE2() {
  255. unsigned regs[4];
  256. // get CPU feature bits
  257. CPUID( 1, regs );
  258. // bit 26 of EDX denotes SSE2 existence
  259. if ( regs[_REG_EDX] & ( 1 << 26 ) ) {
  260. return true;
  261. }
  262. return false;
  263. }
  264. /*
  265. ================
  266. HasSSE3
  267. ================
  268. */
  269. static bool HasSSE3() {
  270. unsigned regs[4];
  271. // get CPU feature bits
  272. CPUID( 1, regs );
  273. // bit 0 of ECX denotes SSE3 existence
  274. if ( regs[_REG_ECX] & ( 1 << 0 ) ) {
  275. return true;
  276. }
  277. return false;
  278. }
  279. /*
  280. ================
  281. LogicalProcPerPhysicalProc
  282. ================
  283. */
  284. #define NUM_LOGICAL_BITS 0x00FF0000 // EBX[23:16] Bit 16-23 in ebx contains the number of logical
  285. // processors per physical processor when execute cpuid with
  286. // eax set to 1
  287. static unsigned char LogicalProcPerPhysicalProc() {
  288. unsigned int regebx = 0;
  289. __asm {
  290. mov eax, 1
  291. cpuid
  292. mov regebx, ebx
  293. }
  294. return (unsigned char) ((regebx & NUM_LOGICAL_BITS) >> 16);
  295. }
  296. /*
  297. ================
  298. GetAPIC_ID
  299. ================
  300. */
  301. #define INITIAL_APIC_ID_BITS 0xFF000000 // EBX[31:24] Bits 24-31 (8 bits) return the 8-bit unique
  302. // initial APIC ID for the processor this code is running on.
  303. // Default value = 0xff if HT is not supported
  304. static unsigned char GetAPIC_ID() {
  305. unsigned int regebx = 0;
  306. __asm {
  307. mov eax, 1
  308. cpuid
  309. mov regebx, ebx
  310. }
  311. return (unsigned char) ((regebx & INITIAL_APIC_ID_BITS) >> 24);
  312. }
  313. /*
  314. ================
  315. CPUCount
  316. logicalNum is the number of logical CPU per physical CPU
  317. physicalNum is the total number of physical processor
  318. returns one of the HT_* flags
  319. ================
  320. */
  321. #define HT_NOT_CAPABLE 0
  322. #define HT_ENABLED 1
  323. #define HT_DISABLED 2
  324. #define HT_SUPPORTED_NOT_ENABLED 3
  325. #define HT_CANNOT_DETECT 4
  326. int CPUCount( int &logicalNum, int &physicalNum ) {
  327. int statusFlag;
  328. SYSTEM_INFO info;
  329. physicalNum = 1;
  330. logicalNum = 1;
  331. statusFlag = HT_NOT_CAPABLE;
  332. info.dwNumberOfProcessors = 0;
  333. GetSystemInfo (&info);
  334. // Number of physical processors in a non-Intel system
  335. // or in a 32-bit Intel system with Hyper-Threading technology disabled
  336. physicalNum = info.dwNumberOfProcessors;
  337. unsigned char HT_Enabled = 0;
  338. logicalNum = LogicalProcPerPhysicalProc();
  339. if ( logicalNum >= 1 ) { // > 1 doesn't mean HT is enabled in the BIOS
  340. HANDLE hCurrentProcessHandle;
  341. DWORD dwProcessAffinity;
  342. DWORD dwSystemAffinity;
  343. DWORD dwAffinityMask;
  344. // Calculate the appropriate shifts and mask based on the
  345. // number of logical processors.
  346. unsigned char i = 1, PHY_ID_MASK = 0xFF, PHY_ID_SHIFT = 0;
  347. while( i < logicalNum ) {
  348. i *= 2;
  349. PHY_ID_MASK <<= 1;
  350. PHY_ID_SHIFT++;
  351. }
  352. hCurrentProcessHandle = GetCurrentProcess();
  353. GetProcessAffinityMask( hCurrentProcessHandle, &dwProcessAffinity, &dwSystemAffinity );
  354. // Check if available process affinity mask is equal to the
  355. // available system affinity mask
  356. if ( dwProcessAffinity != dwSystemAffinity ) {
  357. statusFlag = HT_CANNOT_DETECT;
  358. physicalNum = -1;
  359. return statusFlag;
  360. }
  361. dwAffinityMask = 1;
  362. while ( dwAffinityMask != 0 && dwAffinityMask <= dwProcessAffinity ) {
  363. // Check if this CPU is available
  364. if ( dwAffinityMask & dwProcessAffinity ) {
  365. if ( SetProcessAffinityMask( hCurrentProcessHandle, dwAffinityMask ) ) {
  366. unsigned char APIC_ID, LOG_ID, PHY_ID;
  367. Sleep( 0 ); // Give OS time to switch CPU
  368. APIC_ID = GetAPIC_ID();
  369. LOG_ID = APIC_ID & ~PHY_ID_MASK;
  370. PHY_ID = APIC_ID >> PHY_ID_SHIFT;
  371. if ( LOG_ID != 0 ) {
  372. HT_Enabled = 1;
  373. }
  374. }
  375. }
  376. dwAffinityMask = dwAffinityMask << 1;
  377. }
  378. // Reset the processor affinity
  379. SetProcessAffinityMask( hCurrentProcessHandle, dwProcessAffinity );
  380. if ( logicalNum == 1 ) { // Normal P4 : HT is disabled in hardware
  381. statusFlag = HT_DISABLED;
  382. } else {
  383. if ( HT_Enabled ) {
  384. // Total physical processors in a Hyper-Threading enabled system.
  385. physicalNum /= logicalNum;
  386. statusFlag = HT_ENABLED;
  387. } else {
  388. statusFlag = HT_SUPPORTED_NOT_ENABLED;
  389. }
  390. }
  391. }
  392. return statusFlag;
  393. }
  394. /*
  395. ================
  396. HasHTT
  397. ================
  398. */
  399. static bool HasHTT() {
  400. unsigned regs[4];
  401. int logicalNum, physicalNum, HTStatusFlag;
  402. // get CPU feature bits
  403. CPUID( 1, regs );
  404. // bit 28 of EDX denotes HTT existence
  405. if ( !( regs[_REG_EDX] & ( 1 << 28 ) ) ) {
  406. return false;
  407. }
  408. HTStatusFlag = CPUCount( logicalNum, physicalNum );
  409. if ( HTStatusFlag != HT_ENABLED ) {
  410. return false;
  411. }
  412. return true;
  413. }
  414. /*
  415. ================
  416. HasHTT
  417. ================
  418. */
  419. static bool HasDAZ() {
  420. __declspec(align(16)) unsigned char FXSaveArea[512];
  421. unsigned char *FXArea = FXSaveArea;
  422. DWORD dwMask = 0;
  423. unsigned regs[4];
  424. // get CPU feature bits
  425. CPUID( 1, regs );
  426. // bit 24 of EDX denotes support for FXSAVE
  427. if ( !( regs[_REG_EDX] & ( 1 << 24 ) ) ) {
  428. return false;
  429. }
  430. memset( FXArea, 0, sizeof( FXSaveArea ) );
  431. __asm {
  432. mov eax, FXArea
  433. FXSAVE [eax]
  434. }
  435. dwMask = *(DWORD *)&FXArea[28]; // Read the MXCSR Mask
  436. return ( ( dwMask & ( 1 << 6 ) ) == ( 1 << 6 ) ); // Return if the DAZ bit is set
  437. }
  438. /*
  439. ================================================================================================
  440. CPU
  441. ================================================================================================
  442. */
  443. /*
  444. ========================
  445. CountSetBits
  446. Helper function to count set bits in the processor mask.
  447. ========================
  448. */
  449. DWORD CountSetBits( ULONG_PTR bitMask ) {
  450. DWORD LSHIFT = sizeof( ULONG_PTR ) * 8 - 1;
  451. DWORD bitSetCount = 0;
  452. ULONG_PTR bitTest = (ULONG_PTR)1 << LSHIFT;
  453. for ( DWORD i = 0; i <= LSHIFT; i++ ) {
  454. bitSetCount += ( ( bitMask & bitTest ) ? 1 : 0 );
  455. bitTest /= 2;
  456. }
  457. return bitSetCount;
  458. }
  459. typedef BOOL (WINAPI *LPFN_GLPI)( PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD );
  460. enum LOGICAL_PROCESSOR_RELATIONSHIP_LOCAL {
  461. localRelationProcessorCore,
  462. localRelationNumaNode,
  463. localRelationCache,
  464. localRelationProcessorPackage
  465. };
  466. struct cpuInfo_t {
  467. int processorPackageCount;
  468. int processorCoreCount;
  469. int logicalProcessorCount;
  470. int numaNodeCount;
  471. struct cacheInfo_t {
  472. int count;
  473. int associativity;
  474. int lineSize;
  475. int size;
  476. } cacheLevel[3];
  477. };
  478. /*
  479. ========================
  480. GetCPUInfo
  481. ========================
  482. */
  483. bool GetCPUInfo( cpuInfo_t & cpuInfo ) {
  484. PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL;
  485. PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = NULL;
  486. PCACHE_DESCRIPTOR Cache;
  487. LPFN_GLPI glpi;
  488. BOOL done = FALSE;
  489. DWORD returnLength = 0;
  490. DWORD byteOffset = 0;
  491. memset( & cpuInfo, 0, sizeof( cpuInfo ) );
  492. glpi = (LPFN_GLPI)GetProcAddress( GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation" );
  493. if ( NULL == glpi ) {
  494. idLib::Printf( "\nGetLogicalProcessorInformation is not supported.\n" );
  495. return 0;
  496. }
  497. while ( !done ) {
  498. DWORD rc = glpi( buffer, &returnLength );
  499. if ( FALSE == rc ) {
  500. if ( GetLastError() == ERROR_INSUFFICIENT_BUFFER ) {
  501. if ( buffer ) {
  502. free( buffer );
  503. }
  504. buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc( returnLength );
  505. } else {
  506. idLib::Printf( "Sys_CPUCount error: %d\n", GetLastError() );
  507. return false;
  508. }
  509. } else {
  510. done = TRUE;
  511. }
  512. }
  513. ptr = buffer;
  514. while ( byteOffset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= returnLength ) {
  515. switch ( (LOGICAL_PROCESSOR_RELATIONSHIP_LOCAL) ptr->Relationship ) {
  516. case localRelationProcessorCore:
  517. cpuInfo.processorCoreCount++;
  518. // A hyperthreaded core supplies more than one logical processor.
  519. cpuInfo.logicalProcessorCount += CountSetBits( ptr->ProcessorMask );
  520. break;
  521. case localRelationNumaNode:
  522. // Non-NUMA systems report a single record of this type.
  523. cpuInfo.numaNodeCount++;
  524. break;
  525. case localRelationCache:
  526. // Cache data is in ptr->Cache, one CACHE_DESCRIPTOR structure for each cache.
  527. Cache = &ptr->Cache;
  528. if ( Cache->Level >= 1 && Cache->Level <= 3 ) {
  529. int level = Cache->Level - 1;
  530. if ( cpuInfo.cacheLevel[level].count > 0 ) {
  531. cpuInfo.cacheLevel[level].count++;
  532. } else {
  533. cpuInfo.cacheLevel[level].associativity = Cache->Associativity;
  534. cpuInfo.cacheLevel[level].lineSize = Cache->LineSize;
  535. cpuInfo.cacheLevel[level].size = Cache->Size;
  536. }
  537. }
  538. break;
  539. case localRelationProcessorPackage:
  540. // Logical processors share a physical package.
  541. cpuInfo.processorPackageCount++;
  542. break;
  543. default:
  544. idLib::Printf( "Error: Unsupported LOGICAL_PROCESSOR_RELATIONSHIP value.\n" );
  545. break;
  546. }
  547. byteOffset += sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION );
  548. ptr++;
  549. }
  550. free( buffer );
  551. return true;
  552. }
  553. /*
  554. ========================
  555. Sys_GetCPUCacheSize
  556. ========================
  557. */
  558. void Sys_GetCPUCacheSize( int level, int & count, int & size, int & lineSize ) {
  559. assert( level >= 1 && level <= 3 );
  560. cpuInfo_t cpuInfo;
  561. GetCPUInfo( cpuInfo );
  562. count = cpuInfo.cacheLevel[level - 1].count;
  563. size = cpuInfo.cacheLevel[level - 1].size;
  564. lineSize = cpuInfo.cacheLevel[level - 1].lineSize;
  565. }
  566. /*
  567. ========================
  568. Sys_CPUCount
  569. numLogicalCPUCores - the number of logical CPU per core
  570. numPhysicalCPUCores - the total number of cores per package
  571. numCPUPackages - the total number of packages (physical processors)
  572. ========================
  573. */
  574. void Sys_CPUCount( int & numLogicalCPUCores, int & numPhysicalCPUCores, int & numCPUPackages ) {
  575. cpuInfo_t cpuInfo;
  576. GetCPUInfo( cpuInfo );
  577. numPhysicalCPUCores = cpuInfo.processorCoreCount;
  578. numLogicalCPUCores = cpuInfo.logicalProcessorCount;
  579. numCPUPackages = cpuInfo.processorPackageCount;
  580. }
  581. /*
  582. ================
  583. Sys_GetCPUId
  584. ================
  585. */
  586. cpuid_t Sys_GetCPUId() {
  587. int flags;
  588. // verify we're at least a Pentium or 486 with CPUID support
  589. if ( !HasCPUID() ) {
  590. return CPUID_UNSUPPORTED;
  591. }
  592. // check for an AMD
  593. if ( IsAMD() ) {
  594. flags = CPUID_AMD;
  595. } else {
  596. flags = CPUID_INTEL;
  597. }
  598. // check for Multi Media Extensions
  599. if ( HasMMX() ) {
  600. flags |= CPUID_MMX;
  601. }
  602. // check for 3DNow!
  603. if ( Has3DNow() ) {
  604. flags |= CPUID_3DNOW;
  605. }
  606. // check for Streaming SIMD Extensions
  607. if ( HasSSE() ) {
  608. flags |= CPUID_SSE | CPUID_FTZ;
  609. }
  610. // check for Streaming SIMD Extensions 2
  611. if ( HasSSE2() ) {
  612. flags |= CPUID_SSE2;
  613. }
  614. // check for Streaming SIMD Extensions 3 aka Prescott's New Instructions
  615. if ( HasSSE3() ) {
  616. flags |= CPUID_SSE3;
  617. }
  618. // check for Hyper-Threading Technology
  619. if ( HasHTT() ) {
  620. flags |= CPUID_HTT;
  621. }
  622. // check for Conditional Move (CMOV) and fast floating point comparison (FCOMI) instructions
  623. if ( HasCMOV() ) {
  624. flags |= CPUID_CMOV;
  625. }
  626. // check for Denormals-Are-Zero mode
  627. if ( HasDAZ() ) {
  628. flags |= CPUID_DAZ;
  629. }
  630. return (cpuid_t)flags;
  631. }
  632. /*
  633. ===============================================================================
  634. FPU
  635. ===============================================================================
  636. */
  637. typedef struct bitFlag_s {
  638. char * name;
  639. int bit;
  640. } bitFlag_t;
  641. static byte fpuState[128], *statePtr = fpuState;
  642. static char fpuString[2048];
  643. static bitFlag_t controlWordFlags[] = {
  644. { "Invalid operation", 0 },
  645. { "Denormalized operand", 1 },
  646. { "Divide-by-zero", 2 },
  647. { "Numeric overflow", 3 },
  648. { "Numeric underflow", 4 },
  649. { "Inexact result (precision)", 5 },
  650. { "Infinity control", 12 },
  651. { "", 0 }
  652. };
  653. static char *precisionControlField[] = {
  654. "Single Precision (24-bits)",
  655. "Reserved",
  656. "Double Precision (53-bits)",
  657. "Double Extended Precision (64-bits)"
  658. };
  659. static char *roundingControlField[] = {
  660. "Round to nearest",
  661. "Round down",
  662. "Round up",
  663. "Round toward zero"
  664. };
  665. static bitFlag_t statusWordFlags[] = {
  666. { "Invalid operation", 0 },
  667. { "Denormalized operand", 1 },
  668. { "Divide-by-zero", 2 },
  669. { "Numeric overflow", 3 },
  670. { "Numeric underflow", 4 },
  671. { "Inexact result (precision)", 5 },
  672. { "Stack fault", 6 },
  673. { "Error summary status", 7 },
  674. { "FPU busy", 15 },
  675. { "", 0 }
  676. };
  677. /*
  678. ===============
  679. Sys_FPU_PrintStateFlags
  680. ===============
  681. */
  682. int Sys_FPU_PrintStateFlags( char *ptr, int ctrl, int stat, int tags, int inof, int inse, int opof, int opse ) {
  683. int i, length = 0;
  684. length += sprintf( ptr+length, "CTRL = %08x\n"
  685. "STAT = %08x\n"
  686. "TAGS = %08x\n"
  687. "INOF = %08x\n"
  688. "INSE = %08x\n"
  689. "OPOF = %08x\n"
  690. "OPSE = %08x\n"
  691. "\n",
  692. ctrl, stat, tags, inof, inse, opof, opse );
  693. length += sprintf( ptr+length, "Control Word:\n" );
  694. for ( i = 0; controlWordFlags[i].name[0]; i++ ) {
  695. length += sprintf( ptr+length, " %-30s = %s\n", controlWordFlags[i].name, ( ctrl & ( 1 << controlWordFlags[i].bit ) ) ? "true" : "false" );
  696. }
  697. length += sprintf( ptr+length, " %-30s = %s\n", "Precision control", precisionControlField[(ctrl>>8)&3] );
  698. length += sprintf( ptr+length, " %-30s = %s\n", "Rounding control", roundingControlField[(ctrl>>10)&3] );
  699. length += sprintf( ptr+length, "Status Word:\n" );
  700. for ( i = 0; statusWordFlags[i].name[0]; i++ ) {
  701. ptr += sprintf( ptr+length, " %-30s = %s\n", statusWordFlags[i].name, ( stat & ( 1 << statusWordFlags[i].bit ) ) ? "true" : "false" );
  702. }
  703. length += sprintf( ptr+length, " %-30s = %d%d%d%d\n", "Condition code", (stat>>8)&1, (stat>>9)&1, (stat>>10)&1, (stat>>14)&1 );
  704. length += sprintf( ptr+length, " %-30s = %d\n", "Top of stack pointer", (stat>>11)&7 );
  705. return length;
  706. }
  707. /*
  708. ===============
  709. Sys_FPU_StackIsEmpty
  710. ===============
  711. */
  712. bool Sys_FPU_StackIsEmpty() {
  713. __asm {
  714. mov eax, statePtr
  715. fnstenv [eax]
  716. mov eax, [eax+8]
  717. xor eax, 0xFFFFFFFF
  718. and eax, 0x0000FFFF
  719. jz empty
  720. }
  721. return false;
  722. empty:
  723. return true;
  724. }
  725. /*
  726. ===============
  727. Sys_FPU_ClearStack
  728. ===============
  729. */
  730. void Sys_FPU_ClearStack() {
  731. __asm {
  732. mov eax, statePtr
  733. fnstenv [eax]
  734. mov eax, [eax+8]
  735. xor eax, 0xFFFFFFFF
  736. mov edx, (3<<14)
  737. emptyStack:
  738. mov ecx, eax
  739. and ecx, edx
  740. jz done
  741. fstp st
  742. shr edx, 2
  743. jmp emptyStack
  744. done:
  745. }
  746. }
  747. /*
  748. ===============
  749. Sys_FPU_GetState
  750. gets the FPU state without changing the state
  751. ===============
  752. */
  753. const char *Sys_FPU_GetState() {
  754. double fpuStack[8] = { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
  755. double *fpuStackPtr = fpuStack;
  756. int i, numValues;
  757. char *ptr;
  758. __asm {
  759. mov esi, statePtr
  760. mov edi, fpuStackPtr
  761. fnstenv [esi]
  762. mov esi, [esi+8]
  763. xor esi, 0xFFFFFFFF
  764. mov edx, (3<<14)
  765. xor eax, eax
  766. mov ecx, esi
  767. and ecx, edx
  768. jz done
  769. fst qword ptr [edi+0]
  770. inc eax
  771. shr edx, 2
  772. mov ecx, esi
  773. and ecx, edx
  774. jz done
  775. fxch st(1)
  776. fst qword ptr [edi+8]
  777. inc eax
  778. fxch st(1)
  779. shr edx, 2
  780. mov ecx, esi
  781. and ecx, edx
  782. jz done
  783. fxch st(2)
  784. fst qword ptr [edi+16]
  785. inc eax
  786. fxch st(2)
  787. shr edx, 2
  788. mov ecx, esi
  789. and ecx, edx
  790. jz done
  791. fxch st(3)
  792. fst qword ptr [edi+24]
  793. inc eax
  794. fxch st(3)
  795. shr edx, 2
  796. mov ecx, esi
  797. and ecx, edx
  798. jz done
  799. fxch st(4)
  800. fst qword ptr [edi+32]
  801. inc eax
  802. fxch st(4)
  803. shr edx, 2
  804. mov ecx, esi
  805. and ecx, edx
  806. jz done
  807. fxch st(5)
  808. fst qword ptr [edi+40]
  809. inc eax
  810. fxch st(5)
  811. shr edx, 2
  812. mov ecx, esi
  813. and ecx, edx
  814. jz done
  815. fxch st(6)
  816. fst qword ptr [edi+48]
  817. inc eax
  818. fxch st(6)
  819. shr edx, 2
  820. mov ecx, esi
  821. and ecx, edx
  822. jz done
  823. fxch st(7)
  824. fst qword ptr [edi+56]
  825. inc eax
  826. fxch st(7)
  827. done:
  828. mov numValues, eax
  829. }
  830. int ctrl = *(int *)&fpuState[0];
  831. int stat = *(int *)&fpuState[4];
  832. int tags = *(int *)&fpuState[8];
  833. int inof = *(int *)&fpuState[12];
  834. int inse = *(int *)&fpuState[16];
  835. int opof = *(int *)&fpuState[20];
  836. int opse = *(int *)&fpuState[24];
  837. ptr = fpuString;
  838. ptr += sprintf( ptr,"FPU State:\n"
  839. "num values on stack = %d\n", numValues );
  840. for ( i = 0; i < 8; i++ ) {
  841. ptr += sprintf( ptr, "ST%d = %1.10e\n", i, fpuStack[i] );
  842. }
  843. Sys_FPU_PrintStateFlags( ptr, ctrl, stat, tags, inof, inse, opof, opse );
  844. return fpuString;
  845. }
  846. /*
  847. ===============
  848. Sys_FPU_EnableExceptions
  849. ===============
  850. */
  851. void Sys_FPU_EnableExceptions( int exceptions ) {
  852. __asm {
  853. mov eax, statePtr
  854. mov ecx, exceptions
  855. and cx, 63
  856. not cx
  857. fnstcw word ptr [eax]
  858. mov bx, word ptr [eax]
  859. or bx, 63
  860. and bx, cx
  861. mov word ptr [eax], bx
  862. fldcw word ptr [eax]
  863. }
  864. }
  865. /*
  866. ===============
  867. Sys_FPU_SetPrecision
  868. ===============
  869. */
  870. void Sys_FPU_SetPrecision( int precision ) {
  871. short precisionBitTable[4] = { 0, 1, 3, 0 };
  872. short precisionBits = precisionBitTable[precision & 3] << 8;
  873. short precisionMask = ~( ( 1 << 9 ) | ( 1 << 8 ) );
  874. __asm {
  875. mov eax, statePtr
  876. mov cx, precisionBits
  877. fnstcw word ptr [eax]
  878. mov bx, word ptr [eax]
  879. and bx, precisionMask
  880. or bx, cx
  881. mov word ptr [eax], bx
  882. fldcw word ptr [eax]
  883. }
  884. }
  885. /*
  886. ================
  887. Sys_FPU_SetRounding
  888. ================
  889. */
  890. void Sys_FPU_SetRounding( int rounding ) {
  891. short roundingBitTable[4] = { 0, 1, 2, 3 };
  892. short roundingBits = roundingBitTable[rounding & 3] << 10;
  893. short roundingMask = ~( ( 1 << 11 ) | ( 1 << 10 ) );
  894. __asm {
  895. mov eax, statePtr
  896. mov cx, roundingBits
  897. fnstcw word ptr [eax]
  898. mov bx, word ptr [eax]
  899. and bx, roundingMask
  900. or bx, cx
  901. mov word ptr [eax], bx
  902. fldcw word ptr [eax]
  903. }
  904. }
  905. /*
  906. ================
  907. Sys_FPU_SetDAZ
  908. ================
  909. */
  910. void Sys_FPU_SetDAZ( bool enable ) {
  911. DWORD dwData;
  912. _asm {
  913. movzx ecx, byte ptr enable
  914. and ecx, 1
  915. shl ecx, 6
  916. STMXCSR dword ptr dwData
  917. mov eax, dwData
  918. and eax, ~(1<<6) // clear DAX bit
  919. or eax, ecx // set the DAZ bit
  920. mov dwData, eax
  921. LDMXCSR dword ptr dwData
  922. }
  923. }
  924. /*
  925. ================
  926. Sys_FPU_SetFTZ
  927. ================
  928. */
  929. void Sys_FPU_SetFTZ( bool enable ) {
  930. DWORD dwData;
  931. _asm {
  932. movzx ecx, byte ptr enable
  933. and ecx, 1
  934. shl ecx, 15
  935. STMXCSR dword ptr dwData
  936. mov eax, dwData
  937. and eax, ~(1<<15) // clear FTZ bit
  938. or eax, ecx // set the FTZ bit
  939. mov dwData, eax
  940. LDMXCSR dword ptr dwData
  941. }
  942. }