Simd.cpp 35 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273
  1. /*
  2. ===========================================================================
  3. Doom 3 BFG Edition GPL Source Code
  4. Copyright (C) 1993-2012 id Software LLC, a ZeniMax Media company.
  5. This file is part of the Doom 3 BFG Edition GPL Source Code ("Doom 3 BFG Edition Source Code").
  6. Doom 3 BFG Edition Source Code is free software: you can redistribute it and/or modify
  7. it under the terms of the GNU General Public License as published by
  8. the Free Software Foundation, either version 3 of the License, or
  9. (at your option) any later version.
  10. Doom 3 BFG Edition Source Code is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. GNU General Public License for more details.
  14. You should have received a copy of the GNU General Public License
  15. along with Doom 3 BFG Edition Source Code. If not, see <http://www.gnu.org/licenses/>.
  16. In addition, the Doom 3 BFG Edition Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 BFG Edition Source Code. If not, please request a copy in writing from id Software at the address below.
  17. If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
  18. ===========================================================================
  19. */
  20. #pragma hdrstop
  21. #include "../precompiled.h"
  22. #include "Simd_Generic.h"
  23. #include "Simd_SSE.h"
  24. idSIMDProcessor * processor = NULL; // pointer to SIMD processor
  25. idSIMDProcessor * generic = NULL; // pointer to generic SIMD implementation
  26. idSIMDProcessor * SIMDProcessor = NULL;
  27. /*
  28. ================
  29. idSIMD::Init
  30. ================
  31. */
  32. void idSIMD::Init() {
  33. generic = new (TAG_MATH) idSIMD_Generic;
  34. generic->cpuid = CPUID_GENERIC;
  35. processor = NULL;
  36. SIMDProcessor = generic;
  37. }
  38. /*
  39. ============
  40. idSIMD::InitProcessor
  41. ============
  42. */
  43. void idSIMD::InitProcessor( const char *module, bool forceGeneric ) {
  44. cpuid_t cpuid;
  45. idSIMDProcessor *newProcessor;
  46. cpuid = idLib::sys->GetProcessorId();
  47. if ( forceGeneric ) {
  48. newProcessor = generic;
  49. } else {
  50. if ( processor == NULL ) {
  51. if ( ( cpuid & CPUID_MMX ) && ( cpuid & CPUID_SSE ) ) {
  52. processor = new (TAG_MATH) idSIMD_SSE;
  53. } else {
  54. processor = generic;
  55. }
  56. processor->cpuid = cpuid;
  57. }
  58. newProcessor = processor;
  59. }
  60. if ( newProcessor != SIMDProcessor ) {
  61. SIMDProcessor = newProcessor;
  62. idLib::common->Printf( "%s using %s for SIMD processing\n", module, SIMDProcessor->GetName() );
  63. }
  64. if ( cpuid & CPUID_FTZ ) {
  65. idLib::sys->FPU_SetFTZ( true );
  66. idLib::common->Printf( "enabled Flush-To-Zero mode\n" );
  67. }
  68. if ( cpuid & CPUID_DAZ ) {
  69. idLib::sys->FPU_SetDAZ( true );
  70. idLib::common->Printf( "enabled Denormals-Are-Zero mode\n" );
  71. }
  72. }
  73. /*
  74. ================
  75. idSIMD::Shutdown
  76. ================
  77. */
  78. void idSIMD::Shutdown() {
  79. if ( processor != generic ) {
  80. delete processor;
  81. }
  82. delete generic;
  83. generic = NULL;
  84. processor = NULL;
  85. SIMDProcessor = NULL;
  86. }
  87. //===============================================================
  88. //
  89. // Test code
  90. //
  91. //===============================================================
  92. #define COUNT 999 // data count (odd to catch edge cases)
  93. #define BIG_COUNT COUNT*5 // Some tests need a larger count
  94. #define NUMTESTS 2048 // number of tests
  95. #define RANDOM_SEED 1013904223L //((int)idLib::sys->GetClockTicks())
  96. idSIMDProcessor *p_simd;
  97. idSIMDProcessor *p_generic;
  98. long baseClocks = 0;
  99. #define TIME_TYPE int
  100. #pragma warning(disable : 4731) // frame pointer register 'ebx' modified by inline assembly code
  101. long saved_ebx = 0;
  102. #define StartRecordTime( start ) \
  103. __asm mov saved_ebx, ebx \
  104. __asm xor eax, eax \
  105. __asm cpuid \
  106. __asm rdtsc \
  107. __asm mov start, eax \
  108. __asm xor eax, eax \
  109. __asm cpuid
  110. #define StopRecordTime( end ) \
  111. __asm xor eax, eax \
  112. __asm cpuid \
  113. __asm rdtsc \
  114. __asm mov end, eax \
  115. __asm mov ebx, saved_ebx \
  116. __asm xor eax, eax \
  117. __asm cpuid
  118. #define GetBest( start, end, best ) \
  119. if ( !best || end - start < best ) { \
  120. best = end - start; \
  121. }
  122. /*
  123. ============
  124. PrintClocks
  125. ============
  126. */
  127. void PrintClocks( char *string, int dataCount, int clocks, int otherClocks = 0 ) {
  128. int i;
  129. idLib::common->Printf( string );
  130. for ( i = idStr::LengthWithoutColors(string); i < 48; i++ ) {
  131. idLib::common->Printf(" ");
  132. }
  133. clocks -= baseClocks;
  134. if ( otherClocks && clocks ) {
  135. otherClocks -= baseClocks;
  136. float p = (float)otherClocks / (float)clocks;
  137. idLib::common->Printf( "c = %4d, clcks = %5d, %.1fX\n", dataCount, clocks, p );
  138. } else {
  139. idLib::common->Printf( "c = %4d, clcks = %5d\n", dataCount, clocks );
  140. }
  141. }
  142. /*
  143. ============
  144. GetBaseClocks
  145. ============
  146. */
  147. void GetBaseClocks() {
  148. int i, start, end, bestClocks;
  149. bestClocks = 0;
  150. for ( i = 0; i < NUMTESTS; i++ ) {
  151. StartRecordTime( start );
  152. StopRecordTime( end );
  153. GetBest( start, end, bestClocks );
  154. }
  155. baseClocks = bestClocks;
  156. }
  157. /*
  158. ============
  159. TestMinMax
  160. ============
  161. */
  162. void TestMinMax() {
  163. int i;
  164. TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
  165. ALIGN16( float fsrc0[COUNT] );
  166. ALIGN16( idVec2 v2src0[COUNT] );
  167. ALIGN16( idVec3 v3src0[COUNT] );
  168. ALIGN16( idDrawVert drawVerts[COUNT] );
  169. ALIGN16( triIndex_t indexes[COUNT] );
  170. float min = 0.0f, max = 0.0f, min2 = 0.0f, max2 = 0.0f;
  171. idVec2 v2min, v2max, v2min2, v2max2;
  172. idVec3 vmin, vmax, vmin2, vmax2;
  173. const char *result;
  174. idRandom srnd( RANDOM_SEED );
  175. for ( i = 0; i < COUNT; i++ ) {
  176. fsrc0[i] = srnd.CRandomFloat() * 10.0f;
  177. v2src0[i][0] = srnd.CRandomFloat() * 10.0f;
  178. v2src0[i][1] = srnd.CRandomFloat() * 10.0f;
  179. v3src0[i][0] = srnd.CRandomFloat() * 10.0f;
  180. v3src0[i][1] = srnd.CRandomFloat() * 10.0f;
  181. v3src0[i][2] = srnd.CRandomFloat() * 10.0f;
  182. drawVerts[i].xyz = v3src0[i];
  183. indexes[i] = i;
  184. }
  185. idLib::common->Printf("====================================\n" );
  186. bestClocksGeneric = 0;
  187. for ( i = 0; i < NUMTESTS; i++ ) {
  188. min = idMath::INFINITY;
  189. max = -idMath::INFINITY;
  190. StartRecordTime( start );
  191. p_generic->MinMax( min, max, fsrc0, COUNT );
  192. StopRecordTime( end );
  193. GetBest( start, end, bestClocksGeneric );
  194. }
  195. PrintClocks( "generic->MinMax( float[] )", COUNT, bestClocksGeneric );
  196. bestClocksSIMD = 0;
  197. for ( i = 0; i < NUMTESTS; i++ ) {
  198. StartRecordTime( start );
  199. p_simd->MinMax( min2, max2, fsrc0, COUNT );
  200. StopRecordTime( end );
  201. GetBest( start, end, bestClocksSIMD );
  202. }
  203. result = ( min == min2 && max == max2 ) ? "ok" : S_COLOR_RED"X";
  204. PrintClocks( va( " simd->MinMax( float[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
  205. bestClocksGeneric = 0;
  206. for ( i = 0; i < NUMTESTS; i++ ) {
  207. StartRecordTime( start );
  208. p_generic->MinMax( v2min, v2max, v2src0, COUNT );
  209. StopRecordTime( end );
  210. GetBest( start, end, bestClocksGeneric );
  211. }
  212. PrintClocks( "generic->MinMax( idVec2[] )", COUNT, bestClocksGeneric );
  213. bestClocksSIMD = 0;
  214. for ( i = 0; i < NUMTESTS; i++ ) {
  215. StartRecordTime( start );
  216. p_simd->MinMax( v2min2, v2max2, v2src0, COUNT );
  217. StopRecordTime( end );
  218. GetBest( start, end, bestClocksSIMD );
  219. }
  220. result = ( v2min == v2min2 && v2max == v2max2 ) ? "ok" : S_COLOR_RED"X";
  221. PrintClocks( va( " simd->MinMax( idVec2[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
  222. bestClocksGeneric = 0;
  223. for ( i = 0; i < NUMTESTS; i++ ) {
  224. StartRecordTime( start );
  225. p_generic->MinMax( vmin, vmax, v3src0, COUNT );
  226. StopRecordTime( end );
  227. GetBest( start, end, bestClocksGeneric );
  228. }
  229. PrintClocks( "generic->MinMax( idVec3[] )", COUNT, bestClocksGeneric );
  230. bestClocksSIMD = 0;
  231. for ( i = 0; i < NUMTESTS; i++ ) {
  232. StartRecordTime( start );
  233. p_simd->MinMax( vmin2, vmax2, v3src0, COUNT );
  234. StopRecordTime( end );
  235. GetBest( start, end, bestClocksSIMD );
  236. }
  237. result = ( vmin == vmin2 && vmax == vmax2 ) ? "ok" : S_COLOR_RED"X";
  238. PrintClocks( va( " simd->MinMax( idVec3[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
  239. bestClocksGeneric = 0;
  240. for ( i = 0; i < NUMTESTS; i++ ) {
  241. StartRecordTime( start );
  242. p_generic->MinMax( vmin, vmax, drawVerts, COUNT );
  243. StopRecordTime( end );
  244. GetBest( start, end, bestClocksGeneric );
  245. }
  246. PrintClocks( "generic->MinMax( idDrawVert[] )", COUNT, bestClocksGeneric );
  247. bestClocksSIMD = 0;
  248. for ( i = 0; i < NUMTESTS; i++ ) {
  249. StartRecordTime( start );
  250. p_simd->MinMax( vmin2, vmax2, drawVerts, COUNT );
  251. StopRecordTime( end );
  252. GetBest( start, end, bestClocksSIMD );
  253. }
  254. result = ( vmin == vmin2 && vmax == vmax2 ) ? "ok" : S_COLOR_RED"X";
  255. PrintClocks( va( " simd->MinMax( idDrawVert[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
  256. bestClocksGeneric = 0;
  257. for ( i = 0; i < NUMTESTS; i++ ) {
  258. StartRecordTime( start );
  259. p_generic->MinMax( vmin, vmax, drawVerts, indexes, COUNT );
  260. StopRecordTime( end );
  261. GetBest( start, end, bestClocksGeneric );
  262. }
  263. PrintClocks( "generic->MinMax( idDrawVert[], indexes[] )", COUNT, bestClocksGeneric );
  264. bestClocksSIMD = 0;
  265. for ( i = 0; i < NUMTESTS; i++ ) {
  266. StartRecordTime( start );
  267. p_simd->MinMax( vmin2, vmax2, drawVerts, indexes, COUNT );
  268. StopRecordTime( end );
  269. GetBest( start, end, bestClocksSIMD );
  270. }
  271. result = ( vmin == vmin2 && vmax == vmax2 ) ? "ok" : S_COLOR_RED"X";
  272. PrintClocks( va( " simd->MinMax( idDrawVert[], indexes[] ) %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
  273. }
  274. /*
  275. ============
  276. TestMemcpy
  277. ============
  278. */
  279. void TestMemcpy() {
  280. TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
  281. int i;
  282. byte test0[BIG_COUNT];
  283. byte test1[BIG_COUNT];
  284. const char * result;
  285. idRandom random( RANDOM_SEED );
  286. for ( i = 0; i < BIG_COUNT; i++ ) {
  287. test0[i] = random.RandomInt( 255 );
  288. }
  289. idLib::common->Printf("====================================\n" );
  290. bestClocksGeneric = 0;
  291. for ( i = 0; i < NUMTESTS; i++ ) {
  292. StartRecordTime( start );
  293. p_generic->Memcpy( test1, test0, BIG_COUNT );
  294. StopRecordTime( end );
  295. GetBest( start, end, bestClocksGeneric );
  296. }
  297. PrintClocks( "generic->Memcpy()", BIG_COUNT, bestClocksGeneric );
  298. for ( i = 0; i < BIG_COUNT; i++ ) {
  299. test0[i] = random.RandomInt( 255 );
  300. }
  301. bestClocksSIMD = 0;
  302. for ( i = 0; i < NUMTESTS; i++ ) {
  303. StartRecordTime( start );
  304. p_simd->Memcpy( test1, test0, BIG_COUNT );
  305. StopRecordTime( end );
  306. GetBest( start, end, bestClocksSIMD );
  307. }
  308. for ( i = 0; i < BIG_COUNT; i++ ) {
  309. if ( test1[i] != test0[i] ) {
  310. break;
  311. }
  312. }
  313. result = ( i >= BIG_COUNT ) ? "ok" : S_COLOR_RED"X";
  314. PrintClocks( va( " simd->Memcpy() %s", result), BIG_COUNT, bestClocksSIMD, bestClocksGeneric );
  315. }
  316. /*
  317. ============
  318. TestMemset
  319. ============
  320. */
  321. void TestMemset() {
  322. TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
  323. int i, j;
  324. const char * result;
  325. byte test0[BIG_COUNT];
  326. idRandom random( RANDOM_SEED );
  327. j = 1 + random.RandomInt( 254 );
  328. idLib::common->Printf("====================================\n" );
  329. bestClocksGeneric = 0;
  330. for ( i = 0; i < NUMTESTS; i++ ) {
  331. StartRecordTime( start );
  332. p_generic->Memset( test0, j, BIG_COUNT );
  333. StopRecordTime( end );
  334. GetBest( start, end, bestClocksGeneric );
  335. }
  336. PrintClocks( "generic->Memset()", BIG_COUNT, bestClocksGeneric );
  337. j = 1 + random.RandomInt( 254 );
  338. bestClocksSIMD = 0;
  339. for ( i = 0; i < NUMTESTS; i++ ) {
  340. StartRecordTime( start );
  341. p_simd->Memset( test0, j, BIG_COUNT );
  342. StopRecordTime( end );
  343. GetBest( start, end, bestClocksSIMD );
  344. }
  345. for ( i = 0; i < BIG_COUNT; i++ ) {
  346. if ( test0[i] != j ) {
  347. break;
  348. }
  349. }
  350. result = ( i >= BIG_COUNT ) ? "ok" : S_COLOR_RED"X";
  351. PrintClocks( va( " simd->Memset() %s", result), BIG_COUNT, bestClocksSIMD, bestClocksGeneric );
  352. j = 0;
  353. bestClocksGeneric = 0;
  354. for ( i = 0; i < NUMTESTS; i++ ) {
  355. StartRecordTime( start );
  356. p_generic->Memset( test0, j, BIG_COUNT );
  357. StopRecordTime( end );
  358. GetBest( start, end, bestClocksGeneric );
  359. }
  360. PrintClocks( "generic->Memset( 0 )", BIG_COUNT, bestClocksGeneric );
  361. bestClocksSIMD = 0;
  362. for ( i = 0; i < NUMTESTS; i++ ) {
  363. StartRecordTime( start );
  364. p_simd->Memset( test0, j, BIG_COUNT );
  365. StopRecordTime( end );
  366. GetBest( start, end, bestClocksSIMD );
  367. }
  368. for ( i = 0; i < BIG_COUNT; i++ ) {
  369. if ( test0[i] != j ) {
  370. break;
  371. }
  372. }
  373. result = ( i >= BIG_COUNT ) ? "ok" : S_COLOR_RED"X";
  374. PrintClocks( va( " simd->Memset( 0 ) %s", result), BIG_COUNT, bestClocksSIMD, bestClocksGeneric );
  375. }
  376. /*
  377. ============
  378. TestBlendJoints
  379. ============
  380. */
  381. void TestBlendJoints() {
  382. int i, j;
  383. TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
  384. idTempArray< idJointQuat > baseJoints( COUNT );
  385. idTempArray< idJointQuat > joints1( COUNT );
  386. idTempArray< idJointQuat > joints2( COUNT );
  387. idTempArray< idJointQuat > blendJoints( COUNT );
  388. idTempArray< int > index( COUNT );
  389. float lerp = 0.3f;
  390. const char *result;
  391. idRandom srnd( RANDOM_SEED );
  392. for ( i = 0; i < COUNT; i++ ) {
  393. idAngles angles;
  394. angles[0] = srnd.CRandomFloat() * 180.0f;
  395. angles[1] = srnd.CRandomFloat() * 180.0f;
  396. angles[2] = srnd.CRandomFloat() * 180.0f;
  397. baseJoints[i].q = angles.ToQuat();
  398. baseJoints[i].t[0] = srnd.CRandomFloat() * 10.0f;
  399. baseJoints[i].t[1] = srnd.CRandomFloat() * 10.0f;
  400. baseJoints[i].t[2] = srnd.CRandomFloat() * 10.0f;
  401. baseJoints[i].w = 0.0f;
  402. angles[0] = srnd.CRandomFloat() * 180.0f;
  403. angles[1] = srnd.CRandomFloat() * 180.0f;
  404. angles[2] = srnd.CRandomFloat() * 180.0f;
  405. blendJoints[i].q = angles.ToQuat();
  406. blendJoints[i].t[0] = srnd.CRandomFloat() * 10.0f;
  407. blendJoints[i].t[1] = srnd.CRandomFloat() * 10.0f;
  408. blendJoints[i].t[2] = srnd.CRandomFloat() * 10.0f;
  409. blendJoints[i].w = 0.0f;
  410. index[i] = i;
  411. }
  412. bestClocksGeneric = 0;
  413. for ( i = 0; i < NUMTESTS; i++ ) {
  414. for ( j = 0; j < COUNT; j++ ) {
  415. joints1[j] = baseJoints[j];
  416. }
  417. StartRecordTime( start );
  418. p_generic->BlendJoints( joints1.Ptr(), blendJoints.Ptr(), lerp, index.Ptr(), COUNT );
  419. StopRecordTime( end );
  420. GetBest( start, end, bestClocksGeneric );
  421. }
  422. PrintClocks( "generic->BlendJoints()", COUNT, bestClocksGeneric );
  423. bestClocksSIMD = 0;
  424. for ( i = 0; i < NUMTESTS; i++ ) {
  425. for ( j = 0; j < COUNT; j++ ) {
  426. joints2[j] = baseJoints[j];
  427. }
  428. StartRecordTime( start );
  429. p_simd->BlendJoints( joints2.Ptr(), blendJoints.Ptr(), lerp, index.Ptr(), COUNT );
  430. StopRecordTime( end );
  431. GetBest( start, end, bestClocksSIMD );
  432. }
  433. for ( i = 0; i < COUNT; i++ ) {
  434. if ( !joints1[i].t.Compare( joints2[i].t, 1e-3f ) ) {
  435. break;
  436. }
  437. if ( !joints1[i].q.Compare( joints2[i].q, 1e-2f ) ) {
  438. break;
  439. }
  440. }
  441. result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
  442. PrintClocks( va( " simd->BlendJoints() %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
  443. }
  444. /*
  445. ============
  446. TestBlendJoints
  447. ============
  448. */
  449. void TestBlendJointsFast() {
  450. int i, j;
  451. TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
  452. idTempArray< idJointQuat > baseJoints( COUNT );
  453. idTempArray< idJointQuat > joints1( COUNT );
  454. idTempArray< idJointQuat > joints2( COUNT );
  455. idTempArray< idJointQuat > blendJoints( COUNT );
  456. idTempArray< int > index( COUNT );
  457. float lerp = 0.3f;
  458. const char *result;
  459. idRandom srnd( RANDOM_SEED );
  460. for ( i = 0; i < COUNT; i++ ) {
  461. idAngles angles;
  462. angles[0] = srnd.CRandomFloat() * 180.0f;
  463. angles[1] = srnd.CRandomFloat() * 180.0f;
  464. angles[2] = srnd.CRandomFloat() * 180.0f;
  465. baseJoints[i].q = angles.ToQuat();
  466. baseJoints[i].t[0] = srnd.CRandomFloat() * 10.0f;
  467. baseJoints[i].t[1] = srnd.CRandomFloat() * 10.0f;
  468. baseJoints[i].t[2] = srnd.CRandomFloat() * 10.0f;
  469. baseJoints[i].w = 0.0f;
  470. angles[0] = srnd.CRandomFloat() * 180.0f;
  471. angles[1] = srnd.CRandomFloat() * 180.0f;
  472. angles[2] = srnd.CRandomFloat() * 180.0f;
  473. blendJoints[i].q = angles.ToQuat();
  474. blendJoints[i].t[0] = srnd.CRandomFloat() * 10.0f;
  475. blendJoints[i].t[1] = srnd.CRandomFloat() * 10.0f;
  476. blendJoints[i].t[2] = srnd.CRandomFloat() * 10.0f;
  477. blendJoints[i].w = 0.0f;
  478. index[i] = i;
  479. }
  480. bestClocksGeneric = 0;
  481. for ( i = 0; i < NUMTESTS; i++ ) {
  482. for ( j = 0; j < COUNT; j++ ) {
  483. joints1[j] = baseJoints[j];
  484. }
  485. StartRecordTime( start );
  486. p_generic->BlendJointsFast( joints1.Ptr(), blendJoints.Ptr(), lerp, index.Ptr(), COUNT );
  487. StopRecordTime( end );
  488. GetBest( start, end, bestClocksGeneric );
  489. }
  490. PrintClocks( "generic->BlendJointsFast()", COUNT, bestClocksGeneric );
  491. bestClocksSIMD = 0;
  492. for ( i = 0; i < NUMTESTS; i++ ) {
  493. for ( j = 0; j < COUNT; j++ ) {
  494. joints2[j] = baseJoints[j];
  495. }
  496. StartRecordTime( start );
  497. p_simd->BlendJointsFast( joints2.Ptr(), blendJoints.Ptr(), lerp, index.Ptr(), COUNT );
  498. StopRecordTime( end );
  499. GetBest( start, end, bestClocksSIMD );
  500. }
  501. for ( i = 0; i < COUNT; i++ ) {
  502. if ( !joints1[i].t.Compare( joints2[i].t, 1e-3f ) ) {
  503. break;
  504. }
  505. if ( !joints1[i].q.Compare( joints2[i].q, 1e-2f ) ) {
  506. break;
  507. }
  508. }
  509. result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
  510. PrintClocks( va( " simd->BlendJointsFast() %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
  511. }
  512. /*
  513. ============
  514. TestConvertJointQuatsToJointMats
  515. ============
  516. */
  517. void TestConvertJointQuatsToJointMats() {
  518. int i;
  519. TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
  520. idTempArray< idJointQuat > baseJoints( COUNT );
  521. idTempArray< idJointMat > joints1( COUNT );
  522. idTempArray< idJointMat > joints2( COUNT );
  523. const char *result;
  524. idRandom srnd( RANDOM_SEED );
  525. for ( i = 0; i < COUNT; i++ ) {
  526. idAngles angles;
  527. angles[0] = srnd.CRandomFloat() * 180.0f;
  528. angles[1] = srnd.CRandomFloat() * 180.0f;
  529. angles[2] = srnd.CRandomFloat() * 180.0f;
  530. baseJoints[i].q = angles.ToQuat();
  531. baseJoints[i].t[0] = srnd.CRandomFloat() * 10.0f;
  532. baseJoints[i].t[1] = srnd.CRandomFloat() * 10.0f;
  533. baseJoints[i].t[2] = srnd.CRandomFloat() * 10.0f;
  534. }
  535. bestClocksGeneric = 0;
  536. for ( i = 0; i < NUMTESTS; i++ ) {
  537. StartRecordTime( start );
  538. p_generic->ConvertJointQuatsToJointMats( joints1.Ptr(), baseJoints.Ptr(), COUNT );
  539. StopRecordTime( end );
  540. GetBest( start, end, bestClocksGeneric );
  541. }
  542. PrintClocks( "generic->ConvertJointQuatsToJointMats()", COUNT, bestClocksGeneric );
  543. bestClocksSIMD = 0;
  544. for ( i = 0; i < NUMTESTS; i++ ) {
  545. StartRecordTime( start );
  546. p_simd->ConvertJointQuatsToJointMats( joints2.Ptr(), baseJoints.Ptr(), COUNT );
  547. StopRecordTime( end );
  548. GetBest( start, end, bestClocksSIMD );
  549. }
  550. for ( i = 0; i < COUNT; i++ ) {
  551. if ( !joints1[i].Compare( joints2[i], 1e-4f ) ) {
  552. break;
  553. }
  554. }
  555. result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
  556. PrintClocks( va( " simd->ConvertJointQuatsToJointMats() %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
  557. }
  558. /*
  559. ============
  560. TestConvertJointMatsToJointQuats
  561. ============
  562. */
  563. void TestConvertJointMatsToJointQuats() {
  564. int i;
  565. TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
  566. idTempArray< idJointMat > baseJoints( COUNT );
  567. idTempArray< idJointQuat > joints1( COUNT );
  568. idTempArray< idJointQuat > joints2( COUNT );
  569. const char *result;
  570. idRandom srnd( RANDOM_SEED );
  571. for ( i = 0; i < COUNT; i++ ) {
  572. idAngles angles;
  573. angles[0] = srnd.CRandomFloat() * 180.0f;
  574. angles[1] = srnd.CRandomFloat() * 180.0f;
  575. angles[2] = srnd.CRandomFloat() * 180.0f;
  576. baseJoints[i].SetRotation( angles.ToMat3() );
  577. idVec3 v;
  578. v[0] = srnd.CRandomFloat() * 10.0f;
  579. v[1] = srnd.CRandomFloat() * 10.0f;
  580. v[2] = srnd.CRandomFloat() * 10.0f;
  581. baseJoints[i].SetTranslation( v );
  582. }
  583. bestClocksGeneric = 0;
  584. for ( i = 0; i < NUMTESTS; i++ ) {
  585. StartRecordTime( start );
  586. p_generic->ConvertJointMatsToJointQuats( joints1.Ptr(), baseJoints.Ptr(), COUNT );
  587. StopRecordTime( end );
  588. GetBest( start, end, bestClocksGeneric );
  589. }
  590. PrintClocks( "generic->ConvertJointMatsToJointQuats()", COUNT, bestClocksGeneric );
  591. bestClocksSIMD = 0;
  592. for ( i = 0; i < NUMTESTS; i++ ) {
  593. StartRecordTime( start );
  594. p_simd->ConvertJointMatsToJointQuats( joints2.Ptr(), baseJoints.Ptr(), COUNT );
  595. StopRecordTime( end );
  596. GetBest( start, end, bestClocksSIMD );
  597. }
  598. for ( i = 0; i < COUNT; i++ ) {
  599. if ( !joints1[i].q.Compare( joints2[i].q, 1e-4f ) ) {
  600. break;
  601. }
  602. if ( !joints1[i].t.Compare( joints2[i].t, 1e-4f ) ) {
  603. break;
  604. }
  605. }
  606. result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
  607. PrintClocks( va( " simd->ConvertJointMatsToJointQuats() %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
  608. }
  609. /*
  610. ============
  611. TestTransformJoints
  612. ============
  613. */
  614. void TestTransformJoints() {
  615. int i, j;
  616. TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
  617. idTempArray< idJointMat > joints( COUNT+1 );
  618. idTempArray< idJointMat > joints1( COUNT+1 );
  619. idTempArray< idJointMat > joints2( COUNT+1 );
  620. idTempArray< int > parents( COUNT+1 );
  621. const char *result;
  622. idRandom srnd( RANDOM_SEED );
  623. for ( i = 0; i <= COUNT; i++ ) {
  624. idAngles angles;
  625. angles[0] = srnd.CRandomFloat() * 180.0f;
  626. angles[1] = srnd.CRandomFloat() * 180.0f;
  627. angles[2] = srnd.CRandomFloat() * 180.0f;
  628. joints[i].SetRotation( angles.ToMat3() );
  629. idVec3 v;
  630. v[0] = srnd.CRandomFloat() * 2.0f;
  631. v[1] = srnd.CRandomFloat() * 2.0f;
  632. v[2] = srnd.CRandomFloat() * 2.0f;
  633. joints[i].SetTranslation( v );
  634. parents[i] = i - 1;
  635. }
  636. bestClocksGeneric = 0;
  637. for ( i = 0; i < NUMTESTS; i++ ) {
  638. for ( j = 0; j <= COUNT; j++ ) {
  639. joints1[j] = joints[j];
  640. }
  641. StartRecordTime( start );
  642. p_generic->TransformJoints( joints1.Ptr(), parents.Ptr(), 1, COUNT );
  643. StopRecordTime( end );
  644. GetBest( start, end, bestClocksGeneric );
  645. }
  646. PrintClocks( "generic->TransformJoints()", COUNT, bestClocksGeneric );
  647. bestClocksSIMD = 0;
  648. for ( i = 0; i < NUMTESTS; i++ ) {
  649. for ( j = 0; j <= COUNT; j++ ) {
  650. joints2[j] = joints[j];
  651. }
  652. StartRecordTime( start );
  653. p_simd->TransformJoints( joints2.Ptr(), parents.Ptr(), 1, COUNT );
  654. StopRecordTime( end );
  655. GetBest( start, end, bestClocksSIMD );
  656. }
  657. for ( i = 1; i <= COUNT; i++ ) {
  658. if ( !joints1[i].Compare( joints2[i], 1e-3f ) ) {
  659. break;
  660. }
  661. }
  662. result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
  663. PrintClocks( va( " simd->TransformJoints() %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
  664. }
  665. /*
  666. ============
  667. TestUntransformJoints
  668. ============
  669. */
  670. void TestUntransformJoints() {
  671. int i, j;
  672. TIME_TYPE start, end, bestClocksGeneric, bestClocksSIMD;
  673. idTempArray< idJointMat > joints( COUNT+1 );
  674. idTempArray< idJointMat > joints1( COUNT+1 );
  675. idTempArray< idJointMat > joints2( COUNT+1 );
  676. idTempArray< int > parents( COUNT+1 );
  677. const char *result;
  678. idRandom srnd( RANDOM_SEED );
  679. for ( i = 0; i <= COUNT; i++ ) {
  680. idAngles angles;
  681. angles[0] = srnd.CRandomFloat() * 180.0f;
  682. angles[1] = srnd.CRandomFloat() * 180.0f;
  683. angles[2] = srnd.CRandomFloat() * 180.0f;
  684. joints[i].SetRotation( angles.ToMat3() );
  685. idVec3 v;
  686. v[0] = srnd.CRandomFloat() * 2.0f;
  687. v[1] = srnd.CRandomFloat() * 2.0f;
  688. v[2] = srnd.CRandomFloat() * 2.0f;
  689. joints[i].SetTranslation( v );
  690. parents[i] = i - 1;
  691. }
  692. bestClocksGeneric = 0;
  693. for ( i = 0; i < NUMTESTS; i++ ) {
  694. for ( j = 0; j <= COUNT; j++ ) {
  695. joints1[j] = joints[j];
  696. }
  697. StartRecordTime( start );
  698. p_generic->UntransformJoints( joints1.Ptr(), parents.Ptr(), 1, COUNT );
  699. StopRecordTime( end );
  700. GetBest( start, end, bestClocksGeneric );
  701. }
  702. PrintClocks( "generic->UntransformJoints()", COUNT, bestClocksGeneric );
  703. bestClocksSIMD = 0;
  704. for ( i = 0; i < NUMTESTS; i++ ) {
  705. for ( j = 0; j <= COUNT; j++ ) {
  706. joints2[j] = joints[j];
  707. }
  708. StartRecordTime( start );
  709. p_simd->UntransformJoints( joints2.Ptr(), parents.Ptr(), 1, COUNT );
  710. StopRecordTime( end );
  711. GetBest( start, end, bestClocksSIMD );
  712. }
  713. for ( i = 1; i <= COUNT; i++ ) {
  714. if ( !joints1[i].Compare( joints2[i], 1e-3f ) ) {
  715. break;
  716. }
  717. }
  718. result = ( i >= COUNT ) ? "ok" : S_COLOR_RED"X";
  719. PrintClocks( va( " simd->UntransformJoints() %s", result ), COUNT, bestClocksSIMD, bestClocksGeneric );
  720. }
  721. /*
  722. ============
  723. TestMath
  724. ============
  725. */
  726. void TestMath() {
  727. int i;
  728. TIME_TYPE start, end, bestClocks;
  729. idLib::common->Printf("====================================\n" );
  730. float tst = -1.0f;
  731. float tst2 = 1.0f;
  732. float testvar = 1.0f;
  733. idRandom rnd;
  734. bestClocks = 0;
  735. tst = rnd.CRandomFloat();
  736. for ( i = 0; i < NUMTESTS; i++ ) {
  737. StartRecordTime( start );
  738. tst = fabs( tst );
  739. StopRecordTime( end );
  740. GetBest( start, end, bestClocks );
  741. testvar = ( testvar + tst ) * tst;
  742. tst = rnd.CRandomFloat();
  743. }
  744. PrintClocks( " fabs( tst )", 1, bestClocks );
  745. bestClocks = 0;
  746. tst = rnd.CRandomFloat();
  747. for ( i = 0; i < NUMTESTS; i++ ) {
  748. StartRecordTime( start );
  749. int tmp = * ( int * ) &tst;
  750. tmp &= 0x7FFFFFFF;
  751. tst = * ( float * ) &tmp;
  752. StopRecordTime( end );
  753. GetBest( start, end, bestClocks );
  754. testvar = ( testvar + tst ) * tst;
  755. tst = rnd.CRandomFloat();
  756. }
  757. PrintClocks( " idMath::Fabs( tst )", 1, bestClocks );
  758. bestClocks = 0;
  759. tst = 10.0f + 100.0f * rnd.RandomFloat();
  760. for ( i = 0; i < NUMTESTS; i++ ) {
  761. StartRecordTime( start );
  762. tst = sqrt( tst );
  763. StopRecordTime( end );
  764. GetBest( start, end, bestClocks );
  765. testvar = ( testvar + tst ) * tst * 0.01f;
  766. tst = 10.0f + 100.0f * rnd.RandomFloat();
  767. }
  768. PrintClocks( " sqrt( tst )", 1, bestClocks );
  769. bestClocks = 0;
  770. tst = rnd.RandomFloat();
  771. for ( i = 0; i < NUMTESTS; i++ ) {
  772. StartRecordTime( start );
  773. tst = idMath::Sqrt( tst );
  774. StopRecordTime( end );
  775. GetBest( start, end, bestClocks );
  776. testvar = ( testvar + tst ) * tst;
  777. tst = rnd.RandomFloat();
  778. }
  779. PrintClocks( " idMath::Sqrt( tst )", 1, bestClocks );
  780. bestClocks = 0;
  781. tst = rnd.RandomFloat();
  782. for ( i = 0; i < NUMTESTS; i++ ) {
  783. StartRecordTime( start );
  784. tst = idMath::Sqrt16( tst );
  785. StopRecordTime( end );
  786. GetBest( start, end, bestClocks );
  787. testvar = ( testvar + tst ) * tst;
  788. tst = rnd.RandomFloat();
  789. }
  790. PrintClocks( " idMath::Sqrt16( tst )", 1, bestClocks );
  791. bestClocks = 0;
  792. tst = rnd.CRandomFloat();
  793. for ( i = 0; i < NUMTESTS; i++ ) {
  794. StartRecordTime( start );
  795. tst = idMath::Sin( tst );
  796. StopRecordTime( end );
  797. GetBest( start, end, bestClocks );
  798. testvar = ( testvar + tst ) * tst;
  799. tst = rnd.CRandomFloat();
  800. }
  801. PrintClocks( " idMath::Sin( tst )", 1, bestClocks );
  802. bestClocks = 0;
  803. tst = rnd.CRandomFloat();
  804. for ( i = 0; i < NUMTESTS; i++ ) {
  805. StartRecordTime( start );
  806. tst = idMath::Sin16( tst );
  807. StopRecordTime( end );
  808. GetBest( start, end, bestClocks );
  809. testvar = ( testvar + tst ) * tst;
  810. tst = rnd.CRandomFloat();
  811. }
  812. PrintClocks( " idMath::Sin16( tst )", 1, bestClocks );
  813. bestClocks = 0;
  814. tst = rnd.CRandomFloat();
  815. for ( i = 0; i < NUMTESTS; i++ ) {
  816. StartRecordTime( start );
  817. tst = idMath::Cos( tst );
  818. StopRecordTime( end );
  819. GetBest( start, end, bestClocks );
  820. testvar = ( testvar + tst ) * tst;
  821. tst = rnd.CRandomFloat();
  822. }
  823. PrintClocks( " idMath::Cos( tst )", 1, bestClocks );
  824. bestClocks = 0;
  825. tst = rnd.CRandomFloat();
  826. for ( i = 0; i < NUMTESTS; i++ ) {
  827. StartRecordTime( start );
  828. tst = idMath::Cos16( tst );
  829. StopRecordTime( end );
  830. GetBest( start, end, bestClocks );
  831. testvar = ( testvar + tst ) * tst;
  832. tst = rnd.CRandomFloat();
  833. }
  834. PrintClocks( " idMath::Cos16( tst )", 1, bestClocks );
  835. bestClocks = 0;
  836. tst = rnd.CRandomFloat();
  837. for ( i = 0; i < NUMTESTS; i++ ) {
  838. StartRecordTime( start );
  839. idMath::SinCos( tst, tst, tst2 );
  840. StopRecordTime( end );
  841. GetBest( start, end, bestClocks );
  842. testvar = ( testvar + tst ) * tst;
  843. tst = rnd.CRandomFloat();
  844. }
  845. PrintClocks( " idMath::SinCos( tst )", 1, bestClocks );
  846. bestClocks = 0;
  847. tst = rnd.CRandomFloat();
  848. for ( i = 0; i < NUMTESTS; i++ ) {
  849. StartRecordTime( start );
  850. idMath::SinCos16( tst, tst, tst2 );
  851. StopRecordTime( end );
  852. GetBest( start, end, bestClocks );
  853. testvar = ( testvar + tst ) * tst;
  854. tst = rnd.CRandomFloat();
  855. }
  856. PrintClocks( "idMath::SinCos16( tst )", 1, bestClocks );
  857. bestClocks = 0;
  858. tst = rnd.CRandomFloat();
  859. for ( i = 0; i < NUMTESTS; i++ ) {
  860. StartRecordTime( start );
  861. tst = idMath::Tan( tst );
  862. StopRecordTime( end );
  863. GetBest( start, end, bestClocks );
  864. testvar = ( testvar + tst ) * tst;
  865. tst = rnd.CRandomFloat();
  866. }
  867. PrintClocks( " idMath::Tan( tst )", 1, bestClocks );
  868. bestClocks = 0;
  869. tst = rnd.CRandomFloat();
  870. for ( i = 0; i < NUMTESTS; i++ ) {
  871. StartRecordTime( start );
  872. tst = idMath::Tan16( tst );
  873. StopRecordTime( end );
  874. GetBest( start, end, bestClocks );
  875. testvar = ( testvar + tst ) * tst;
  876. tst = rnd.CRandomFloat();
  877. }
  878. PrintClocks( " idMath::Tan16( tst )", 1, bestClocks );
  879. bestClocks = 0;
  880. tst = rnd.CRandomFloat();
  881. for ( i = 0; i < NUMTESTS; i++ ) {
  882. StartRecordTime( start );
  883. tst = idMath::ASin( tst );
  884. StopRecordTime( end );
  885. GetBest( start, end, bestClocks );
  886. testvar = ( testvar + tst ) * tst * ( 1.0f / idMath::PI );
  887. tst = rnd.CRandomFloat();
  888. }
  889. PrintClocks( " idMath::ASin( tst )", 1, bestClocks );
  890. bestClocks = 0;
  891. tst = rnd.CRandomFloat();
  892. for ( i = 0; i < NUMTESTS; i++ ) {
  893. StartRecordTime( start );
  894. tst = idMath::ASin16( tst );
  895. StopRecordTime( end );
  896. GetBest( start, end, bestClocks );
  897. testvar = ( testvar + tst ) * tst * ( 1.0f / idMath::PI );
  898. tst = rnd.CRandomFloat();
  899. }
  900. PrintClocks( " idMath::ASin16( tst )", 1, bestClocks );
  901. bestClocks = 0;
  902. tst = rnd.CRandomFloat();
  903. for ( i = 0; i < NUMTESTS; i++ ) {
  904. StartRecordTime( start );
  905. tst = idMath::ACos( tst );
  906. StopRecordTime( end );
  907. GetBest( start, end, bestClocks );
  908. testvar = ( testvar + tst ) * tst * ( 1.0f / idMath::PI );
  909. tst = rnd.CRandomFloat();
  910. }
  911. PrintClocks( " idMath::ACos( tst )", 1, bestClocks );
  912. bestClocks = 0;
  913. tst = rnd.CRandomFloat();
  914. for ( i = 0; i < NUMTESTS; i++ ) {
  915. StartRecordTime( start );
  916. tst = idMath::ACos16( tst );
  917. StopRecordTime( end );
  918. GetBest( start, end, bestClocks );
  919. testvar = ( testvar + tst ) * tst * ( 1.0f / idMath::PI );
  920. tst = rnd.CRandomFloat();
  921. }
  922. PrintClocks( " idMath::ACos16( tst )", 1, bestClocks );
  923. bestClocks = 0;
  924. tst = rnd.CRandomFloat();
  925. for ( i = 0; i < NUMTESTS; i++ ) {
  926. StartRecordTime( start );
  927. tst = idMath::ATan( tst );
  928. StopRecordTime( end );
  929. GetBest( start, end, bestClocks );
  930. testvar = ( testvar + tst ) * tst;
  931. tst = rnd.CRandomFloat();
  932. }
  933. PrintClocks( " idMath::ATan( tst )", 1, bestClocks );
  934. bestClocks = 0;
  935. tst = rnd.CRandomFloat();
  936. for ( i = 0; i < NUMTESTS; i++ ) {
  937. StartRecordTime( start );
  938. tst = idMath::ATan16( tst );
  939. StopRecordTime( end );
  940. GetBest( start, end, bestClocks );
  941. testvar = ( testvar + tst ) * tst;
  942. tst = rnd.CRandomFloat();
  943. }
  944. PrintClocks( " idMath::ATan16( tst )", 1, bestClocks );
  945. bestClocks = 0;
  946. tst = rnd.CRandomFloat();
  947. for ( i = 0; i < NUMTESTS; i++ ) {
  948. StartRecordTime( start );
  949. tst = idMath::Pow( 2.7f, tst );
  950. StopRecordTime( end );
  951. GetBest( start, end, bestClocks );
  952. testvar = ( testvar + tst ) * tst * 0.1f;
  953. tst = rnd.CRandomFloat();
  954. }
  955. PrintClocks( " idMath::Pow( tst )", 1, bestClocks );
  956. bestClocks = 0;
  957. tst = rnd.CRandomFloat();
  958. for ( i = 0; i < NUMTESTS; i++ ) {
  959. StartRecordTime( start );
  960. tst = idMath::Pow16( 2.7f, tst );
  961. StopRecordTime( end );
  962. GetBest( start, end, bestClocks );
  963. testvar = ( testvar + tst ) * tst * 0.1f;
  964. tst = rnd.CRandomFloat();
  965. }
  966. PrintClocks( " idMath::Pow16( tst )", 1, bestClocks );
  967. bestClocks = 0;
  968. tst = rnd.CRandomFloat();
  969. for ( i = 0; i < NUMTESTS; i++ ) {
  970. StartRecordTime( start );
  971. tst = idMath::Exp( tst );
  972. StopRecordTime( end );
  973. GetBest( start, end, bestClocks );
  974. testvar = ( testvar + tst ) * tst * 0.1f;
  975. tst = rnd.CRandomFloat();
  976. }
  977. PrintClocks( " idMath::Exp( tst )", 1, bestClocks );
  978. bestClocks = 0;
  979. tst = rnd.CRandomFloat();
  980. for ( i = 0; i < NUMTESTS; i++ ) {
  981. StartRecordTime( start );
  982. tst = idMath::Exp16( tst );
  983. StopRecordTime( end );
  984. GetBest( start, end, bestClocks );
  985. testvar = ( testvar + tst ) * tst * 0.1f;
  986. tst = rnd.CRandomFloat();
  987. }
  988. PrintClocks( " idMath::Exp16( tst )", 1, bestClocks );
  989. bestClocks = 0;
  990. tst = rnd.CRandomFloat();
  991. for ( i = 0; i < NUMTESTS; i++ ) {
  992. tst = fabs( tst ) + 1.0f;
  993. StartRecordTime( start );
  994. tst = idMath::Log( tst );
  995. StopRecordTime( end );
  996. GetBest( start, end, bestClocks );
  997. testvar = ( testvar + tst ) * tst;
  998. tst = rnd.CRandomFloat();
  999. }
  1000. PrintClocks( " idMath::Log( tst )", 1, bestClocks );
  1001. bestClocks = 0;
  1002. tst = rnd.CRandomFloat();
  1003. for ( i = 0; i < NUMTESTS; i++ ) {
  1004. tst = fabs( tst ) + 1.0f;
  1005. StartRecordTime( start );
  1006. tst = idMath::Log16( tst );
  1007. StopRecordTime( end );
  1008. GetBest( start, end, bestClocks );
  1009. testvar = ( testvar + tst ) * tst;
  1010. tst = rnd.CRandomFloat();
  1011. }
  1012. PrintClocks( " idMath::Log16( tst )", 1, bestClocks );
  1013. idLib::common->Printf( "testvar = %f\n", testvar );
  1014. idMat3 resultMat3;
  1015. idQuat fromQuat, toQuat, resultQuat;
  1016. idCQuat cq;
  1017. idAngles ang;
  1018. fromQuat = idAngles( 30, 45, 0 ).ToQuat();
  1019. toQuat = idAngles( 45, 0, 0 ).ToQuat();
  1020. cq = idAngles( 30, 45, 0 ).ToQuat().ToCQuat();
  1021. ang = idAngles( 30, 40, 50 );
  1022. bestClocks = 0;
  1023. for ( i = 0; i < NUMTESTS; i++ ) {
  1024. StartRecordTime( start );
  1025. resultMat3 = fromQuat.ToMat3();
  1026. StopRecordTime( end );
  1027. GetBest( start, end, bestClocks );
  1028. }
  1029. PrintClocks( " idQuat::ToMat3()", 1, bestClocks );
  1030. bestClocks = 0;
  1031. for ( i = 0; i < NUMTESTS; i++ ) {
  1032. StartRecordTime( start );
  1033. resultQuat.Slerp( fromQuat, toQuat, 0.3f );
  1034. StopRecordTime( end );
  1035. GetBest( start, end, bestClocks );
  1036. }
  1037. PrintClocks( " idQuat::Slerp()", 1, bestClocks );
  1038. bestClocks = 0;
  1039. for ( i = 0; i < NUMTESTS; i++ ) {
  1040. StartRecordTime( start );
  1041. resultQuat = cq.ToQuat();
  1042. StopRecordTime( end );
  1043. GetBest( start, end, bestClocks );
  1044. }
  1045. PrintClocks( " idCQuat::ToQuat()", 1, bestClocks );
  1046. bestClocks = 0;
  1047. for ( i = 0; i < NUMTESTS; i++ ) {
  1048. StartRecordTime( start );
  1049. resultQuat = ang.ToQuat();
  1050. StopRecordTime( end );
  1051. GetBest( start, end, bestClocks );
  1052. }
  1053. PrintClocks( " idAngles::ToQuat()", 1, bestClocks );
  1054. bestClocks = 0;
  1055. for ( i = 0; i < NUMTESTS; i++ ) {
  1056. StartRecordTime( start );
  1057. resultMat3 = ang.ToMat3();
  1058. StopRecordTime( end );
  1059. GetBest( start, end, bestClocks );
  1060. }
  1061. PrintClocks( " idAngles::ToMat3()", 1, bestClocks );
  1062. }
  1063. /*
  1064. ============
  1065. idSIMD::Test_f
  1066. ============
  1067. */
  1068. void idSIMD::Test_f( const idCmdArgs &args ) {
  1069. SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL );
  1070. p_simd = processor;
  1071. p_generic = generic;
  1072. if ( idStr::Length( args.Argv( 1 ) ) != 0 ) {
  1073. cpuid_t cpuid = idLib::sys->GetProcessorId();
  1074. idStr argString = args.Args();
  1075. argString.Replace( " ", "" );
  1076. if ( idStr::Icmp( argString, "SSE" ) == 0 ) {
  1077. if ( !( cpuid & CPUID_MMX ) || !( cpuid & CPUID_SSE ) ) {
  1078. common->Printf( "CPU does not support MMX & SSE\n" );
  1079. return;
  1080. }
  1081. p_simd = new (TAG_MATH) idSIMD_SSE;
  1082. } else {
  1083. common->Printf( "invalid argument, use: MMX, 3DNow, SSE, SSE2, SSE3, AltiVec\n" );
  1084. return;
  1085. }
  1086. }
  1087. idLib::common->SetRefreshOnPrint( true );
  1088. idLib::common->Printf( "using %s for SIMD processing\n", p_simd->GetName() );
  1089. GetBaseClocks();
  1090. TestMath();
  1091. TestMinMax();
  1092. TestMemcpy();
  1093. TestMemset();
  1094. idLib::common->Printf("====================================\n" );
  1095. TestBlendJoints();
  1096. TestBlendJointsFast();
  1097. TestConvertJointQuatsToJointMats();
  1098. TestConvertJointMatsToJointQuats();
  1099. TestTransformJoints();
  1100. TestUntransformJoints();
  1101. idLib::common->Printf("====================================\n" );
  1102. idLib::common->SetRefreshOnPrint( false );
  1103. if ( p_simd != processor ) {
  1104. delete p_simd;
  1105. }
  1106. p_simd = NULL;
  1107. p_generic = NULL;
  1108. SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_NORMAL );
  1109. }