win_cpu.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931
  1. /*
  2. ===========================================================================
  3. Doom 3 GPL Source Code
  4. Copyright (C) 1999-2011 id Software LLC, a ZeniMax Media company.
  5. This file is part of the Doom 3 GPL Source Code (?Doom 3 Source Code?).
  6. Doom 3 Source Code is free software: you can redistribute it and/or modify
  7. it under the terms of the GNU General Public License as published by
  8. the Free Software Foundation, either version 3 of the License, or
  9. (at your option) any later version.
  10. Doom 3 Source Code is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. GNU General Public License for more details.
  14. You should have received a copy of the GNU General Public License
  15. along with Doom 3 Source Code. If not, see <http://www.gnu.org/licenses/>.
  16. In addition, the Doom 3 Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 Source Code. If not, please request a copy in writing from id Software at the address below.
  17. If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
  18. ===========================================================================
  19. */
  20. #include "../../idlib/precompiled.h"
  21. #pragma hdrstop
  22. #include "win_local.h"
  23. /*
  24. ==============================================================
  25. Clock ticks
  26. ==============================================================
  27. */
  28. /*
  29. ================
  30. Sys_GetClockTicks
  31. ================
  32. */
  33. double Sys_GetClockTicks( void ) {
  34. #if 0
  35. LARGE_INTEGER li;
  36. QueryPerformanceCounter( &li );
  37. return = (double ) li.LowPart + (double) 0xFFFFFFFF * li.HighPart;
  38. #else
  39. unsigned long lo, hi;
  40. __asm {
  41. push ebx
  42. xor eax, eax
  43. cpuid
  44. rdtsc
  45. mov lo, eax
  46. mov hi, edx
  47. pop ebx
  48. }
  49. return (double ) lo + (double) 0xFFFFFFFF * hi;
  50. #endif
  51. }
  52. /*
  53. ================
  54. Sys_ClockTicksPerSecond
  55. ================
  56. */
  57. double Sys_ClockTicksPerSecond( void ) {
  58. static double ticks = 0;
  59. #if 0
  60. if ( !ticks ) {
  61. LARGE_INTEGER li;
  62. QueryPerformanceFrequency( &li );
  63. ticks = li.QuadPart;
  64. }
  65. #else
  66. if ( !ticks ) {
  67. HKEY hKey;
  68. LPBYTE ProcSpeed;
  69. DWORD buflen, ret;
  70. if ( !RegOpenKeyEx( HKEY_LOCAL_MACHINE, "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", 0, KEY_READ, &hKey ) ) {
  71. ProcSpeed = 0;
  72. buflen = sizeof( ProcSpeed );
  73. ret = RegQueryValueEx( hKey, "~MHz", NULL, NULL, (LPBYTE) &ProcSpeed, &buflen );
  74. // If we don't succeed, try some other spellings.
  75. if ( ret != ERROR_SUCCESS ) {
  76. ret = RegQueryValueEx( hKey, "~Mhz", NULL, NULL, (LPBYTE) &ProcSpeed, &buflen );
  77. }
  78. if ( ret != ERROR_SUCCESS ) {
  79. ret = RegQueryValueEx( hKey, "~mhz", NULL, NULL, (LPBYTE) &ProcSpeed, &buflen );
  80. }
  81. RegCloseKey( hKey );
  82. if ( ret == ERROR_SUCCESS ) {
  83. ticks = (double) ((unsigned long)ProcSpeed) * 1000000;
  84. }
  85. }
  86. }
  87. #endif
  88. return ticks;
  89. }
  90. /*
  91. ==============================================================
  92. CPU
  93. ==============================================================
  94. */
  95. /*
  96. ================
  97. HasCPUID
  98. ================
  99. */
  100. static bool HasCPUID( void ) {
  101. __asm
  102. {
  103. pushfd // save eflags
  104. pop eax
  105. test eax, 0x00200000 // check ID bit
  106. jz set21 // bit 21 is not set, so jump to set_21
  107. and eax, 0xffdfffff // clear bit 21
  108. push eax // save new value in register
  109. popfd // store new value in flags
  110. pushfd
  111. pop eax
  112. test eax, 0x00200000 // check ID bit
  113. jz good
  114. jmp err // cpuid not supported
  115. set21:
  116. or eax, 0x00200000 // set ID bit
  117. push eax // store new value
  118. popfd // store new value in EFLAGS
  119. pushfd
  120. pop eax
  121. test eax, 0x00200000 // if bit 21 is on
  122. jnz good
  123. jmp err
  124. }
  125. err:
  126. return false;
  127. good:
  128. return true;
  129. }
  130. #define _REG_EAX 0
  131. #define _REG_EBX 1
  132. #define _REG_ECX 2
  133. #define _REG_EDX 3
  134. /*
  135. ================
  136. CPUID
  137. ================
  138. */
  139. static void CPUID( int func, unsigned regs[4] ) {
  140. unsigned regEAX, regEBX, regECX, regEDX;
  141. __asm pusha
  142. __asm mov eax, func
  143. __asm __emit 00fh
  144. __asm __emit 0a2h
  145. __asm mov regEAX, eax
  146. __asm mov regEBX, ebx
  147. __asm mov regECX, ecx
  148. __asm mov regEDX, edx
  149. __asm popa
  150. regs[_REG_EAX] = regEAX;
  151. regs[_REG_EBX] = regEBX;
  152. regs[_REG_ECX] = regECX;
  153. regs[_REG_EDX] = regEDX;
  154. }
  155. /*
  156. ================
  157. IsAMD
  158. ================
  159. */
  160. static bool IsAMD( void ) {
  161. char pstring[16];
  162. char processorString[13];
  163. // get name of processor
  164. CPUID( 0, ( unsigned int * ) pstring );
  165. processorString[0] = pstring[4];
  166. processorString[1] = pstring[5];
  167. processorString[2] = pstring[6];
  168. processorString[3] = pstring[7];
  169. processorString[4] = pstring[12];
  170. processorString[5] = pstring[13];
  171. processorString[6] = pstring[14];
  172. processorString[7] = pstring[15];
  173. processorString[8] = pstring[8];
  174. processorString[9] = pstring[9];
  175. processorString[10] = pstring[10];
  176. processorString[11] = pstring[11];
  177. processorString[12] = 0;
  178. if ( strcmp( processorString, "AuthenticAMD" ) == 0 ) {
  179. return true;
  180. }
  181. return false;
  182. }
  183. /*
  184. ================
  185. HasCMOV
  186. ================
  187. */
  188. static bool HasCMOV( void ) {
  189. unsigned regs[4];
  190. // get CPU feature bits
  191. CPUID( 1, regs );
  192. // bit 15 of EDX denotes CMOV existence
  193. if ( regs[_REG_EDX] & ( 1 << 15 ) ) {
  194. return true;
  195. }
  196. return false;
  197. }
  198. /*
  199. ================
  200. Has3DNow
  201. ================
  202. */
  203. static bool Has3DNow( void ) {
  204. unsigned regs[4];
  205. // check AMD-specific functions
  206. CPUID( 0x80000000, regs );
  207. if ( regs[_REG_EAX] < 0x80000000 ) {
  208. return false;
  209. }
  210. // bit 31 of EDX denotes 3DNow! support
  211. CPUID( 0x80000001, regs );
  212. if ( regs[_REG_EDX] & ( 1 << 31 ) ) {
  213. return true;
  214. }
  215. return false;
  216. }
  217. /*
  218. ================
  219. HasMMX
  220. ================
  221. */
  222. static bool HasMMX( void ) {
  223. unsigned regs[4];
  224. // get CPU feature bits
  225. CPUID( 1, regs );
  226. // bit 23 of EDX denotes MMX existence
  227. if ( regs[_REG_EDX] & ( 1 << 23 ) ) {
  228. return true;
  229. }
  230. return false;
  231. }
  232. /*
  233. ================
  234. HasSSE
  235. ================
  236. */
  237. static bool HasSSE( void ) {
  238. unsigned regs[4];
  239. // get CPU feature bits
  240. CPUID( 1, regs );
  241. // bit 25 of EDX denotes SSE existence
  242. if ( regs[_REG_EDX] & ( 1 << 25 ) ) {
  243. return true;
  244. }
  245. return false;
  246. }
  247. /*
  248. ================
  249. HasSSE2
  250. ================
  251. */
  252. static bool HasSSE2( void ) {
  253. unsigned regs[4];
  254. // get CPU feature bits
  255. CPUID( 1, regs );
  256. // bit 26 of EDX denotes SSE2 existence
  257. if ( regs[_REG_EDX] & ( 1 << 26 ) ) {
  258. return true;
  259. }
  260. return false;
  261. }
  262. /*
  263. ================
  264. HasSSE3
  265. ================
  266. */
  267. static bool HasSSE3( void ) {
  268. unsigned regs[4];
  269. // get CPU feature bits
  270. CPUID( 1, regs );
  271. // bit 0 of ECX denotes SSE3 existence
  272. if ( regs[_REG_ECX] & ( 1 << 0 ) ) {
  273. return true;
  274. }
  275. return false;
  276. }
  277. /*
  278. ================
  279. LogicalProcPerPhysicalProc
  280. ================
  281. */
  282. #define NUM_LOGICAL_BITS 0x00FF0000 // EBX[23:16] Bit 16-23 in ebx contains the number of logical
  283. // processors per physical processor when execute cpuid with
  284. // eax set to 1
  285. static unsigned char LogicalProcPerPhysicalProc( void ) {
  286. unsigned int regebx = 0;
  287. __asm {
  288. mov eax, 1
  289. cpuid
  290. mov regebx, ebx
  291. }
  292. return (unsigned char) ((regebx & NUM_LOGICAL_BITS) >> 16);
  293. }
  294. /*
  295. ================
  296. GetAPIC_ID
  297. ================
  298. */
  299. #define INITIAL_APIC_ID_BITS 0xFF000000 // EBX[31:24] Bits 24-31 (8 bits) return the 8-bit unique
  300. // initial APIC ID for the processor this code is running on.
  301. // Default value = 0xff if HT is not supported
  302. static unsigned char GetAPIC_ID( void ) {
  303. unsigned int regebx = 0;
  304. __asm {
  305. mov eax, 1
  306. cpuid
  307. mov regebx, ebx
  308. }
  309. return (unsigned char) ((regebx & INITIAL_APIC_ID_BITS) >> 24);
  310. }
  311. /*
  312. ================
  313. CPUCount
  314. logicalNum is the number of logical CPU per physical CPU
  315. physicalNum is the total number of physical processor
  316. returns one of the HT_* flags
  317. ================
  318. */
  319. #define HT_NOT_CAPABLE 0
  320. #define HT_ENABLED 1
  321. #define HT_DISABLED 2
  322. #define HT_SUPPORTED_NOT_ENABLED 3
  323. #define HT_CANNOT_DETECT 4
  324. int CPUCount( int &logicalNum, int &physicalNum ) {
  325. int statusFlag;
  326. SYSTEM_INFO info;
  327. physicalNum = 1;
  328. logicalNum = 1;
  329. statusFlag = HT_NOT_CAPABLE;
  330. info.dwNumberOfProcessors = 0;
  331. GetSystemInfo (&info);
  332. // Number of physical processors in a non-Intel system
  333. // or in a 32-bit Intel system with Hyper-Threading technology disabled
  334. physicalNum = info.dwNumberOfProcessors;
  335. unsigned char HT_Enabled = 0;
  336. logicalNum = LogicalProcPerPhysicalProc();
  337. if ( logicalNum >= 1 ) { // > 1 doesn't mean HT is enabled in the BIOS
  338. HANDLE hCurrentProcessHandle;
  339. DWORD dwProcessAffinity;
  340. DWORD dwSystemAffinity;
  341. DWORD dwAffinityMask;
  342. // Calculate the appropriate shifts and mask based on the
  343. // number of logical processors.
  344. unsigned char i = 1, PHY_ID_MASK = 0xFF, PHY_ID_SHIFT = 0;
  345. while( i < logicalNum ) {
  346. i *= 2;
  347. PHY_ID_MASK <<= 1;
  348. PHY_ID_SHIFT++;
  349. }
  350. hCurrentProcessHandle = GetCurrentProcess();
  351. GetProcessAffinityMask( hCurrentProcessHandle, &dwProcessAffinity, &dwSystemAffinity );
  352. // Check if available process affinity mask is equal to the
  353. // available system affinity mask
  354. if ( dwProcessAffinity != dwSystemAffinity ) {
  355. statusFlag = HT_CANNOT_DETECT;
  356. physicalNum = -1;
  357. return statusFlag;
  358. }
  359. dwAffinityMask = 1;
  360. while ( dwAffinityMask != 0 && dwAffinityMask <= dwProcessAffinity ) {
  361. // Check if this CPU is available
  362. if ( dwAffinityMask & dwProcessAffinity ) {
  363. if ( SetProcessAffinityMask( hCurrentProcessHandle, dwAffinityMask ) ) {
  364. unsigned char APIC_ID, LOG_ID, PHY_ID;
  365. Sleep( 0 ); // Give OS time to switch CPU
  366. APIC_ID = GetAPIC_ID();
  367. LOG_ID = APIC_ID & ~PHY_ID_MASK;
  368. PHY_ID = APIC_ID >> PHY_ID_SHIFT;
  369. if ( LOG_ID != 0 ) {
  370. HT_Enabled = 1;
  371. }
  372. }
  373. }
  374. dwAffinityMask = dwAffinityMask << 1;
  375. }
  376. // Reset the processor affinity
  377. SetProcessAffinityMask( hCurrentProcessHandle, dwProcessAffinity );
  378. if ( logicalNum == 1 ) { // Normal P4 : HT is disabled in hardware
  379. statusFlag = HT_DISABLED;
  380. } else {
  381. if ( HT_Enabled ) {
  382. // Total physical processors in a Hyper-Threading enabled system.
  383. physicalNum /= logicalNum;
  384. statusFlag = HT_ENABLED;
  385. } else {
  386. statusFlag = HT_SUPPORTED_NOT_ENABLED;
  387. }
  388. }
  389. }
  390. return statusFlag;
  391. }
  392. /*
  393. ================
  394. HasHTT
  395. ================
  396. */
  397. static bool HasHTT( void ) {
  398. unsigned regs[4];
  399. int logicalNum, physicalNum, HTStatusFlag;
  400. // get CPU feature bits
  401. CPUID( 1, regs );
  402. // bit 28 of EDX denotes HTT existence
  403. if ( !( regs[_REG_EDX] & ( 1 << 28 ) ) ) {
  404. return false;
  405. }
  406. HTStatusFlag = CPUCount( logicalNum, physicalNum );
  407. if ( HTStatusFlag != HT_ENABLED ) {
  408. return false;
  409. }
  410. return true;
  411. }
  412. /*
  413. ================
  414. HasHTT
  415. ================
  416. */
  417. static bool HasDAZ( void ) {
  418. __declspec(align(16)) unsigned char FXSaveArea[512];
  419. unsigned char *FXArea = FXSaveArea;
  420. DWORD dwMask = 0;
  421. unsigned regs[4];
  422. // get CPU feature bits
  423. CPUID( 1, regs );
  424. // bit 24 of EDX denotes support for FXSAVE
  425. if ( !( regs[_REG_EDX] & ( 1 << 24 ) ) ) {
  426. return false;
  427. }
  428. memset( FXArea, 0, sizeof( FXSaveArea ) );
  429. __asm {
  430. mov eax, FXArea
  431. FXSAVE [eax]
  432. }
  433. dwMask = *(DWORD *)&FXArea[28]; // Read the MXCSR Mask
  434. return ( ( dwMask & ( 1 << 6 ) ) == ( 1 << 6 ) ); // Return if the DAZ bit is set
  435. }
  436. /*
  437. ================
  438. Sys_GetCPUId
  439. ================
  440. */
  441. cpuid_t Sys_GetCPUId( void ) {
  442. int flags;
  443. // verify we're at least a Pentium or 486 with CPUID support
  444. if ( !HasCPUID() ) {
  445. return CPUID_UNSUPPORTED;
  446. }
  447. // check for an AMD
  448. if ( IsAMD() ) {
  449. flags = CPUID_AMD;
  450. } else {
  451. flags = CPUID_INTEL;
  452. }
  453. // check for Multi Media Extensions
  454. if ( HasMMX() ) {
  455. flags |= CPUID_MMX;
  456. }
  457. // check for 3DNow!
  458. if ( Has3DNow() ) {
  459. flags |= CPUID_3DNOW;
  460. }
  461. // check for Streaming SIMD Extensions
  462. if ( HasSSE() ) {
  463. flags |= CPUID_SSE | CPUID_FTZ;
  464. }
  465. // check for Streaming SIMD Extensions 2
  466. if ( HasSSE2() ) {
  467. flags |= CPUID_SSE2;
  468. }
  469. // check for Streaming SIMD Extensions 3 aka Prescott's New Instructions
  470. if ( HasSSE3() ) {
  471. flags |= CPUID_SSE3;
  472. }
  473. // check for Hyper-Threading Technology
  474. if ( HasHTT() ) {
  475. flags |= CPUID_HTT;
  476. }
  477. // check for Conditional Move (CMOV) and fast floating point comparison (FCOMI) instructions
  478. if ( HasCMOV() ) {
  479. flags |= CPUID_CMOV;
  480. }
  481. // check for Denormals-Are-Zero mode
  482. if ( HasDAZ() ) {
  483. flags |= CPUID_DAZ;
  484. }
  485. return (cpuid_t)flags;
  486. }
  487. /*
  488. ===============================================================================
  489. FPU
  490. ===============================================================================
  491. */
  492. typedef struct bitFlag_s {
  493. char * name;
  494. int bit;
  495. } bitFlag_t;
  496. static byte fpuState[128], *statePtr = fpuState;
  497. static char fpuString[2048];
  498. static bitFlag_t controlWordFlags[] = {
  499. { "Invalid operation", 0 },
  500. { "Denormalized operand", 1 },
  501. { "Divide-by-zero", 2 },
  502. { "Numeric overflow", 3 },
  503. { "Numeric underflow", 4 },
  504. { "Inexact result (precision)", 5 },
  505. { "Infinity control", 12 },
  506. { "", 0 }
  507. };
  508. static char *precisionControlField[] = {
  509. "Single Precision (24-bits)",
  510. "Reserved",
  511. "Double Precision (53-bits)",
  512. "Double Extended Precision (64-bits)"
  513. };
  514. static char *roundingControlField[] = {
  515. "Round to nearest",
  516. "Round down",
  517. "Round up",
  518. "Round toward zero"
  519. };
  520. static bitFlag_t statusWordFlags[] = {
  521. { "Invalid operation", 0 },
  522. { "Denormalized operand", 1 },
  523. { "Divide-by-zero", 2 },
  524. { "Numeric overflow", 3 },
  525. { "Numeric underflow", 4 },
  526. { "Inexact result (precision)", 5 },
  527. { "Stack fault", 6 },
  528. { "Error summary status", 7 },
  529. { "FPU busy", 15 },
  530. { "", 0 }
  531. };
  532. /*
  533. ===============
  534. Sys_FPU_PrintStateFlags
  535. ===============
  536. */
  537. int Sys_FPU_PrintStateFlags( char *ptr, int ctrl, int stat, int tags, int inof, int inse, int opof, int opse ) {
  538. int i, length = 0;
  539. length += sprintf( ptr+length, "CTRL = %08x\n"
  540. "STAT = %08x\n"
  541. "TAGS = %08x\n"
  542. "INOF = %08x\n"
  543. "INSE = %08x\n"
  544. "OPOF = %08x\n"
  545. "OPSE = %08x\n"
  546. "\n",
  547. ctrl, stat, tags, inof, inse, opof, opse );
  548. length += sprintf( ptr+length, "Control Word:\n" );
  549. for ( i = 0; controlWordFlags[i].name[0]; i++ ) {
  550. length += sprintf( ptr+length, " %-30s = %s\n", controlWordFlags[i].name, ( ctrl & ( 1 << controlWordFlags[i].bit ) ) ? "true" : "false" );
  551. }
  552. length += sprintf( ptr+length, " %-30s = %s\n", "Precision control", precisionControlField[(ctrl>>8)&3] );
  553. length += sprintf( ptr+length, " %-30s = %s\n", "Rounding control", roundingControlField[(ctrl>>10)&3] );
  554. length += sprintf( ptr+length, "Status Word:\n" );
  555. for ( i = 0; statusWordFlags[i].name[0]; i++ ) {
  556. ptr += sprintf( ptr+length, " %-30s = %s\n", statusWordFlags[i].name, ( stat & ( 1 << statusWordFlags[i].bit ) ) ? "true" : "false" );
  557. }
  558. length += sprintf( ptr+length, " %-30s = %d%d%d%d\n", "Condition code", (stat>>8)&1, (stat>>9)&1, (stat>>10)&1, (stat>>14)&1 );
  559. length += sprintf( ptr+length, " %-30s = %d\n", "Top of stack pointer", (stat>>11)&7 );
  560. return length;
  561. }
  562. /*
  563. ===============
  564. Sys_FPU_StackIsEmpty
  565. ===============
  566. */
  567. bool Sys_FPU_StackIsEmpty( void ) {
  568. __asm {
  569. mov eax, statePtr
  570. fnstenv [eax]
  571. mov eax, [eax+8]
  572. xor eax, 0xFFFFFFFF
  573. and eax, 0x0000FFFF
  574. jz empty
  575. }
  576. return false;
  577. empty:
  578. return true;
  579. }
  580. /*
  581. ===============
  582. Sys_FPU_ClearStack
  583. ===============
  584. */
  585. void Sys_FPU_ClearStack( void ) {
  586. __asm {
  587. mov eax, statePtr
  588. fnstenv [eax]
  589. mov eax, [eax+8]
  590. xor eax, 0xFFFFFFFF
  591. mov edx, (3<<14)
  592. emptyStack:
  593. mov ecx, eax
  594. and ecx, edx
  595. jz done
  596. fstp st
  597. shr edx, 2
  598. jmp emptyStack
  599. done:
  600. }
  601. }
  602. /*
  603. ===============
  604. Sys_FPU_GetState
  605. gets the FPU state without changing the state
  606. ===============
  607. */
  608. const char *Sys_FPU_GetState( void ) {
  609. double fpuStack[8] = { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
  610. double *fpuStackPtr = fpuStack;
  611. int i, numValues;
  612. char *ptr;
  613. __asm {
  614. mov esi, statePtr
  615. mov edi, fpuStackPtr
  616. fnstenv [esi]
  617. mov esi, [esi+8]
  618. xor esi, 0xFFFFFFFF
  619. mov edx, (3<<14)
  620. xor eax, eax
  621. mov ecx, esi
  622. and ecx, edx
  623. jz done
  624. fst qword ptr [edi+0]
  625. inc eax
  626. shr edx, 2
  627. mov ecx, esi
  628. and ecx, edx
  629. jz done
  630. fxch st(1)
  631. fst qword ptr [edi+8]
  632. inc eax
  633. fxch st(1)
  634. shr edx, 2
  635. mov ecx, esi
  636. and ecx, edx
  637. jz done
  638. fxch st(2)
  639. fst qword ptr [edi+16]
  640. inc eax
  641. fxch st(2)
  642. shr edx, 2
  643. mov ecx, esi
  644. and ecx, edx
  645. jz done
  646. fxch st(3)
  647. fst qword ptr [edi+24]
  648. inc eax
  649. fxch st(3)
  650. shr edx, 2
  651. mov ecx, esi
  652. and ecx, edx
  653. jz done
  654. fxch st(4)
  655. fst qword ptr [edi+32]
  656. inc eax
  657. fxch st(4)
  658. shr edx, 2
  659. mov ecx, esi
  660. and ecx, edx
  661. jz done
  662. fxch st(5)
  663. fst qword ptr [edi+40]
  664. inc eax
  665. fxch st(5)
  666. shr edx, 2
  667. mov ecx, esi
  668. and ecx, edx
  669. jz done
  670. fxch st(6)
  671. fst qword ptr [edi+48]
  672. inc eax
  673. fxch st(6)
  674. shr edx, 2
  675. mov ecx, esi
  676. and ecx, edx
  677. jz done
  678. fxch st(7)
  679. fst qword ptr [edi+56]
  680. inc eax
  681. fxch st(7)
  682. done:
  683. mov numValues, eax
  684. }
  685. int ctrl = *(int *)&fpuState[0];
  686. int stat = *(int *)&fpuState[4];
  687. int tags = *(int *)&fpuState[8];
  688. int inof = *(int *)&fpuState[12];
  689. int inse = *(int *)&fpuState[16];
  690. int opof = *(int *)&fpuState[20];
  691. int opse = *(int *)&fpuState[24];
  692. ptr = fpuString;
  693. ptr += sprintf( ptr,"FPU State:\n"
  694. "num values on stack = %d\n", numValues );
  695. for ( i = 0; i < 8; i++ ) {
  696. ptr += sprintf( ptr, "ST%d = %1.10e\n", i, fpuStack[i] );
  697. }
  698. Sys_FPU_PrintStateFlags( ptr, ctrl, stat, tags, inof, inse, opof, opse );
  699. return fpuString;
  700. }
  701. /*
  702. ===============
  703. Sys_FPU_EnableExceptions
  704. ===============
  705. */
  706. void Sys_FPU_EnableExceptions( int exceptions ) {
  707. __asm {
  708. mov eax, statePtr
  709. mov ecx, exceptions
  710. and cx, 63
  711. not cx
  712. fnstcw word ptr [eax]
  713. mov bx, word ptr [eax]
  714. or bx, 63
  715. and bx, cx
  716. mov word ptr [eax], bx
  717. fldcw word ptr [eax]
  718. }
  719. }
  720. /*
  721. ===============
  722. Sys_FPU_SetPrecision
  723. ===============
  724. */
  725. void Sys_FPU_SetPrecision( int precision ) {
  726. short precisionBitTable[4] = { 0, 1, 3, 0 };
  727. short precisionBits = precisionBitTable[precision & 3] << 8;
  728. short precisionMask = ~( ( 1 << 9 ) | ( 1 << 8 ) );
  729. __asm {
  730. mov eax, statePtr
  731. mov cx, precisionBits
  732. fnstcw word ptr [eax]
  733. mov bx, word ptr [eax]
  734. and bx, precisionMask
  735. or bx, cx
  736. mov word ptr [eax], bx
  737. fldcw word ptr [eax]
  738. }
  739. }
  740. /*
  741. ================
  742. Sys_FPU_SetRounding
  743. ================
  744. */
  745. void Sys_FPU_SetRounding( int rounding ) {
  746. short roundingBitTable[4] = { 0, 1, 2, 3 };
  747. short roundingBits = roundingBitTable[rounding & 3] << 10;
  748. short roundingMask = ~( ( 1 << 11 ) | ( 1 << 10 ) );
  749. __asm {
  750. mov eax, statePtr
  751. mov cx, roundingBits
  752. fnstcw word ptr [eax]
  753. mov bx, word ptr [eax]
  754. and bx, roundingMask
  755. or bx, cx
  756. mov word ptr [eax], bx
  757. fldcw word ptr [eax]
  758. }
  759. }
  760. /*
  761. ================
  762. Sys_FPU_SetDAZ
  763. ================
  764. */
  765. void Sys_FPU_SetDAZ( bool enable ) {
  766. DWORD dwData;
  767. _asm {
  768. movzx ecx, byte ptr enable
  769. and ecx, 1
  770. shl ecx, 6
  771. STMXCSR dword ptr dwData
  772. mov eax, dwData
  773. and eax, ~(1<<6) // clear DAX bit
  774. or eax, ecx // set the DAZ bit
  775. mov dwData, eax
  776. LDMXCSR dword ptr dwData
  777. }
  778. }
  779. /*
  780. ================
  781. Sys_FPU_SetFTZ
  782. ================
  783. */
  784. void Sys_FPU_SetFTZ( bool enable ) {
  785. DWORD dwData;
  786. _asm {
  787. movzx ecx, byte ptr enable
  788. and ecx, 1
  789. shl ecx, 15
  790. STMXCSR dword ptr dwData
  791. mov eax, dwData
  792. and eax, ~(1<<15) // clear FTZ bit
  793. or eax, ecx // set the FTZ bit
  794. mov dwData, eax
  795. LDMXCSR dword ptr dwData
  796. }
  797. }