1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061 |
- /*
- ===========================================================================
- Doom 3 GPL Source Code
- Copyright (C) 1999-2011 id Software LLC, a ZeniMax Media company.
- This file is part of the Doom 3 GPL Source Code (?Doom 3 Source Code?).
- Doom 3 Source Code is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
- Doom 3 Source Code is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with Doom 3 Source Code. If not, see <http://www.gnu.org/licenses/>.
- In addition, the Doom 3 Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 Source Code. If not, please request a copy in writing from id Software at the address below.
- If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
- ===========================================================================
- */
- #include "../precompiled.h"
- #pragma hdrstop
- #include "Simd_Generic.h"
- //===============================================================
- //
- // Generic implementation of idSIMDProcessor
- //
- //===============================================================
- #define UNROLL1(Y) { int _IX; for (_IX=0;_IX<count;_IX++) {Y(_IX);} }
- #define UNROLL2(Y) { int _IX, _NM = count&0xfffffffe; for (_IX=0;_IX<_NM;_IX+=2){Y(_IX+0);Y(_IX+1);} if (_IX < count) {Y(_IX);}}
- #define UNROLL4(Y) { int _IX, _NM = count&0xfffffffc; for (_IX=0;_IX<_NM;_IX+=4){Y(_IX+0);Y(_IX+1);Y(_IX+2);Y(_IX+3);}for(;_IX<count;_IX++){Y(_IX);}}
- #define UNROLL8(Y) { int _IX, _NM = count&0xfffffff8; for (_IX=0;_IX<_NM;_IX+=8){Y(_IX+0);Y(_IX+1);Y(_IX+2);Y(_IX+3);Y(_IX+4);Y(_IX+5);Y(_IX+6);Y(_IX+7);} _NM = count&0xfffffffe; for(;_IX<_NM;_IX+=2){Y(_IX); Y(_IX+1);} if (_IX < count) {Y(_IX);} }
- #ifdef _DEBUG
- #define NODEFAULT default: assert( 0 )
- #elif _WIN32
- #define NODEFAULT default: __assume( 0 )
- #else
- #define NODEFAULT
- #endif
- /*
- ============
- idSIMD_Generic::GetName
- ============
- */
- const char * idSIMD_Generic::GetName( void ) const {
- return "generic code";
- }
- /*
- ============
- idSIMD_Generic::Add
- dst[i] = constant + src[i];
- ============
- */
- void VPCALL idSIMD_Generic::Add( float *dst, const float constant, const float *src, const int count ) {
- #define OPER(X) dst[(X)] = src[(X)] + constant;
- UNROLL4(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::Add
- dst[i] = src0[i] + src1[i];
- ============
- */
- void VPCALL idSIMD_Generic::Add( float *dst, const float *src0, const float *src1, const int count ) {
- #define OPER(X) dst[(X)] = src0[(X)] + src1[(X)];
- UNROLL4(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::Sub
- dst[i] = constant - src[i];
- ============
- */
- void VPCALL idSIMD_Generic::Sub( float *dst, const float constant, const float *src, const int count ) {
- double c = constant;
- #define OPER(X) dst[(X)] = c - src[(X)];
- UNROLL4(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::Sub
- dst[i] = src0[i] - src1[i];
- ============
- */
- void VPCALL idSIMD_Generic::Sub( float *dst, const float *src0, const float *src1, const int count ) {
- #define OPER(X) dst[(X)] = src0[(X)] - src1[(X)];
- UNROLL4(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::Mul
- dst[i] = constant * src[i];
- ============
- */
- void VPCALL idSIMD_Generic::Mul( float *dst, const float constant, const float *src0, const int count) {
- double c = constant;
- #define OPER(X) (dst[(X)] = (c * src0[(X)]))
- UNROLL4(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::Mul
- dst[i] = src0[i] * src1[i];
- ============
- */
- void VPCALL idSIMD_Generic::Mul( float *dst, const float *src0, const float *src1, const int count ) {
- #define OPER(X) (dst[(X)] = src0[(X)] * src1[(X)])
- UNROLL4(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::Div
- dst[i] = constant / divisor[i];
- ============
- */
- void VPCALL idSIMD_Generic::Div( float *dst, const float constant, const float *divisor, const int count ) {
- double c = constant;
- #define OPER(X) (dst[(X)] = (c / divisor[(X)]))
- UNROLL4(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::Div
- dst[i] = src0[i] / src1[i];
- ============
- */
- void VPCALL idSIMD_Generic::Div( float *dst, const float *src0, const float *src1, const int count ) {
- #define OPER(X) (dst[(X)] = src0[(X)] / src1[(X)])
- UNROLL4(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::MulAdd
- dst[i] += constant * src[i];
- ============
- */
- void VPCALL idSIMD_Generic::MulAdd( float *dst, const float constant, const float *src, const int count ) {
- double c = constant;
- #define OPER(X) (dst[(X)] += c * src[(X)])
- UNROLL4(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::MulAdd
- dst[i] += src0[i] * src1[i];
- ============
- */
- void VPCALL idSIMD_Generic::MulAdd( float *dst, const float *src0, const float *src1, const int count ) {
- #define OPER(X) (dst[(X)] += src0[(X)] * src1[(X)])
- UNROLL4(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::MulSub
- dst[i] -= constant * src[i];
- ============
- */
- void VPCALL idSIMD_Generic::MulSub( float *dst, const float constant, const float *src, const int count ) {
- double c = constant;
- #define OPER(X) (dst[(X)] -= c * src[(X)])
- UNROLL4(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::MulSub
- dst[i] -= src0[i] * src1[i];
- ============
- */
- void VPCALL idSIMD_Generic::MulSub( float *dst, const float *src0, const float *src1, const int count ) {
- #define OPER(X) (dst[(X)] -= src0[(X)] * src1[(X)])
- UNROLL4(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::Dot
- dst[i] = constant * src[i];
- ============
- */
- void VPCALL idSIMD_Generic::Dot( float *dst, const idVec3 &constant, const idVec3 *src, const int count ) {
- #define OPER(X) dst[(X)] = constant * src[(X)];
- UNROLL1(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::Dot
- dst[i] = constant * src[i].Normal() + src[i][3];
- ============
- */
- void VPCALL idSIMD_Generic::Dot( float *dst, const idVec3 &constant, const idPlane *src, const int count ) {
- #define OPER(X) dst[(X)] = constant * src[(X)].Normal() + src[(X)][3];
- UNROLL1(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::Dot
- dst[i] = constant * src[i].xyz;
- ============
- */
- void VPCALL idSIMD_Generic::Dot( float *dst, const idVec3 &constant, const idDrawVert *src, const int count ) {
- #define OPER(X) dst[(X)] = constant * src[(X)].xyz;
- UNROLL1(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::Dot
- dst[i] = constant.Normal() * src[i] + constant[3];
- ============
- */
- void VPCALL idSIMD_Generic::Dot( float *dst, const idPlane &constant, const idVec3 *src, const int count ) {
- #define OPER(X) dst[(X)] = constant.Normal() * src[(X)] + constant[3];
- UNROLL1(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::Dot
- dst[i] = constant.Normal() * src[i].Normal() + constant[3] * src[i][3];
- ============
- */
- void VPCALL idSIMD_Generic::Dot( float *dst, const idPlane &constant, const idPlane *src, const int count ) {
- #define OPER(X) dst[(X)] = constant.Normal() * src[(X)].Normal() + constant[3] * src[(X)][3];
- UNROLL1(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::Dot
- dst[i] = constant.Normal() * src[i].xyz + constant[3];
- ============
- */
- void VPCALL idSIMD_Generic::Dot( float *dst, const idPlane &constant, const idDrawVert *src, const int count ) {
- #define OPER(X) dst[(X)] = constant.Normal() * src[(X)].xyz + constant[3];
- UNROLL1(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::Dot
- dst[i] = src0[i] * src1[i];
- ============
- */
- void VPCALL idSIMD_Generic::Dot( float *dst, const idVec3 *src0, const idVec3 *src1, const int count ) {
- #define OPER(X) dst[(X)] = src0[(X)] * src1[(X)];
- UNROLL1(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::Dot
- dot = src1[0] * src2[0] + src1[1] * src2[1] + src1[2] * src2[2] + ...
- ============
- */
- void VPCALL idSIMD_Generic::Dot( float &dot, const float *src1, const float *src2, const int count ) {
- #if 1
- switch( count ) {
- case 0: {
- dot = 0.0f;
- return;
- }
- case 1: {
- dot = src1[0] * src2[0];
- return;
- }
- case 2: {
- dot = src1[0] * src2[0] + src1[1] * src2[1];
- return;
- }
- case 3: {
- dot = src1[0] * src2[0] + src1[1] * src2[1] + src1[2] * src2[2];
- return;
- }
- default: {
- int i;
- double s0, s1, s2, s3;
- s0 = src1[0] * src2[0];
- s1 = src1[1] * src2[1];
- s2 = src1[2] * src2[2];
- s3 = src1[3] * src2[3];
- for ( i = 4; i < count-7; i += 8 ) {
- s0 += src1[i+0] * src2[i+0];
- s1 += src1[i+1] * src2[i+1];
- s2 += src1[i+2] * src2[i+2];
- s3 += src1[i+3] * src2[i+3];
- s0 += src1[i+4] * src2[i+4];
- s1 += src1[i+5] * src2[i+5];
- s2 += src1[i+6] * src2[i+6];
- s3 += src1[i+7] * src2[i+7];
- }
- switch( count - i ) {
- NODEFAULT;
- case 7: s0 += src1[i+6] * src2[i+6];
- case 6: s1 += src1[i+5] * src2[i+5];
- case 5: s2 += src1[i+4] * src2[i+4];
- case 4: s3 += src1[i+3] * src2[i+3];
- case 3: s0 += src1[i+2] * src2[i+2];
- case 2: s1 += src1[i+1] * src2[i+1];
- case 1: s2 += src1[i+0] * src2[i+0];
- case 0: break;
- }
- double sum;
- sum = s3;
- sum += s2;
- sum += s1;
- sum += s0;
- dot = sum;
- }
- }
- #else
- dot = 0.0f;
- for ( i = 0; i < count; i++ ) {
- dot += src1[i] * src2[i];
- }
- #endif
- }
- /*
- ============
- idSIMD_Generic::CmpGT
- dst[i] = src0[i] > constant;
- ============
- */
- void VPCALL idSIMD_Generic::CmpGT( byte *dst, const float *src0, const float constant, const int count ) {
- #define OPER(X) dst[(X)] = src0[(X)] > constant;
- UNROLL4(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::CmpGT
- dst[i] |= ( src0[i] > constant ) << bitNum;
- ============
- */
- void VPCALL idSIMD_Generic::CmpGT( byte *dst, const byte bitNum, const float *src0, const float constant, const int count ) {
- #define OPER(X) dst[(X)] |= ( src0[(X)] > constant ) << bitNum;
- UNROLL4(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::CmpGE
- dst[i] = src0[i] >= constant;
- ============
- */
- void VPCALL idSIMD_Generic::CmpGE( byte *dst, const float *src0, const float constant, const int count ) {
- #define OPER(X) dst[(X)] = src0[(X)] >= constant;
- UNROLL4(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::CmpGE
- dst[i] |= ( src0[i] >= constant ) << bitNum;
- ============
- */
- void VPCALL idSIMD_Generic::CmpGE( byte *dst, const byte bitNum, const float *src0, const float constant, const int count ) {
- #define OPER(X) dst[(X)] |= ( src0[(X)] >= constant ) << bitNum;
- UNROLL4(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::CmpLT
- dst[i] = src0[i] < constant;
- ============
- */
- void VPCALL idSIMD_Generic::CmpLT( byte *dst, const float *src0, const float constant, const int count ) {
- #define OPER(X) dst[(X)] = src0[(X)] < constant;
- UNROLL4(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::CmpLT
- dst[i] |= ( src0[i] < constant ) << bitNum;
- ============
- */
- void VPCALL idSIMD_Generic::CmpLT( byte *dst, const byte bitNum, const float *src0, const float constant, const int count ) {
- #define OPER(X) dst[(X)] |= ( src0[(X)] < constant ) << bitNum;
- UNROLL4(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::CmpLE
- dst[i] = src0[i] <= constant;
- ============
- */
- void VPCALL idSIMD_Generic::CmpLE( byte *dst, const float *src0, const float constant, const int count ) {
- #define OPER(X) dst[(X)] = src0[(X)] <= constant;
- UNROLL4(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::CmpLE
- dst[i] |= ( src0[i] <= constant ) << bitNum;
- ============
- */
- void VPCALL idSIMD_Generic::CmpLE( byte *dst, const byte bitNum, const float *src0, const float constant, const int count ) {
- #define OPER(X) dst[(X)] |= ( src0[(X)] <= constant ) << bitNum;
- UNROLL4(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::MinMax
- ============
- */
- void VPCALL idSIMD_Generic::MinMax( float &min, float &max, const float *src, const int count ) {
- min = idMath::INFINITY; max = -idMath::INFINITY;
- #define OPER(X) if ( src[(X)] < min ) {min = src[(X)];} if ( src[(X)] > max ) {max = src[(X)];}
- UNROLL1(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::MinMax
- ============
- */
- void VPCALL idSIMD_Generic::MinMax( idVec2 &min, idVec2 &max, const idVec2 *src, const int count ) {
- min[0] = min[1] = idMath::INFINITY; max[0] = max[1] = -idMath::INFINITY;
- #define OPER(X) const idVec2 &v = src[(X)]; if ( v[0] < min[0] ) { min[0] = v[0]; } if ( v[0] > max[0] ) { max[0] = v[0]; } if ( v[1] < min[1] ) { min[1] = v[1]; } if ( v[1] > max[1] ) { max[1] = v[1]; }
- UNROLL1(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::MinMax
- ============
- */
- void VPCALL idSIMD_Generic::MinMax( idVec3 &min, idVec3 &max, const idVec3 *src, const int count ) {
- min[0] = min[1] = min[2] = idMath::INFINITY; max[0] = max[1] = max[2] = -idMath::INFINITY;
- #define OPER(X) const idVec3 &v = src[(X)]; if ( v[0] < min[0] ) { min[0] = v[0]; } if ( v[0] > max[0] ) { max[0] = v[0]; } if ( v[1] < min[1] ) { min[1] = v[1]; } if ( v[1] > max[1] ) { max[1] = v[1]; } if ( v[2] < min[2] ) { min[2] = v[2]; } if ( v[2] > max[2] ) { max[2] = v[2]; }
- UNROLL1(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::MinMax
- ============
- */
- void VPCALL idSIMD_Generic::MinMax( idVec3 &min, idVec3 &max, const idDrawVert *src, const int count ) {
- min[0] = min[1] = min[2] = idMath::INFINITY; max[0] = max[1] = max[2] = -idMath::INFINITY;
- #define OPER(X) const idVec3 &v = src[(X)].xyz; if ( v[0] < min[0] ) { min[0] = v[0]; } if ( v[0] > max[0] ) { max[0] = v[0]; } if ( v[1] < min[1] ) { min[1] = v[1]; } if ( v[1] > max[1] ) { max[1] = v[1]; } if ( v[2] < min[2] ) { min[2] = v[2]; } if ( v[2] > max[2] ) { max[2] = v[2]; }
- UNROLL1(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::MinMax
- ============
- */
- void VPCALL idSIMD_Generic::MinMax( idVec3 &min, idVec3 &max, const idDrawVert *src, const int *indexes, const int count ) {
- min[0] = min[1] = min[2] = idMath::INFINITY; max[0] = max[1] = max[2] = -idMath::INFINITY;
- #define OPER(X) const idVec3 &v = src[indexes[(X)]].xyz; if ( v[0] < min[0] ) { min[0] = v[0]; } if ( v[0] > max[0] ) { max[0] = v[0]; } if ( v[1] < min[1] ) { min[1] = v[1]; } if ( v[1] > max[1] ) { max[1] = v[1]; } if ( v[2] < min[2] ) { min[2] = v[2]; } if ( v[2] > max[2] ) { max[2] = v[2]; }
- UNROLL1(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::Clamp
- ============
- */
- void VPCALL idSIMD_Generic::Clamp( float *dst, const float *src, const float min, const float max, const int count ) {
- #define OPER(X) dst[(X)] = src[(X)] < min ? min : src[(X)] > max ? max : src[(X)];
- UNROLL1(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::ClampMin
- ============
- */
- void VPCALL idSIMD_Generic::ClampMin( float *dst, const float *src, const float min, const int count ) {
- #define OPER(X) dst[(X)] = src[(X)] < min ? min : src[(X)];
- UNROLL1(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::ClampMax
- ============
- */
- void VPCALL idSIMD_Generic::ClampMax( float *dst, const float *src, const float max, const int count ) {
- #define OPER(X) dst[(X)] = src[(X)] > max ? max : src[(X)];
- UNROLL1(OPER)
- #undef OPER
- }
- /*
- ================
- idSIMD_Generic::Memcpy
- ================
- */
- void VPCALL idSIMD_Generic::Memcpy( void *dst, const void *src, const int count ) {
- memcpy( dst, src, count );
- }
- /*
- ================
- idSIMD_Generic::Memset
- ================
- */
- void VPCALL idSIMD_Generic::Memset( void *dst, const int val, const int count ) {
- memset( dst, val, count );
- }
- /*
- ============
- idSIMD_Generic::Zero16
- ============
- */
- void VPCALL idSIMD_Generic::Zero16( float *dst, const int count ) {
- memset( dst, 0, count * sizeof( float ) );
- }
- /*
- ============
- idSIMD_Generic::Negate16
- ============
- */
- void VPCALL idSIMD_Generic::Negate16( float *dst, const int count ) {
- unsigned int *ptr = reinterpret_cast<unsigned int *>(dst);
- #define OPER(X) ptr[(X)] ^= ( 1 << 31 ) // IEEE 32 bits float sign bit
- UNROLL1(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::Copy16
- ============
- */
- void VPCALL idSIMD_Generic::Copy16( float *dst, const float *src, const int count ) {
- #define OPER(X) dst[(X)] = src[(X)]
- UNROLL1(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::Add16
- ============
- */
- void VPCALL idSIMD_Generic::Add16( float *dst, const float *src1, const float *src2, const int count ) {
- #define OPER(X) dst[(X)] = src1[(X)] + src2[(X)]
- UNROLL1(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::Sub16
- ============
- */
- void VPCALL idSIMD_Generic::Sub16( float *dst, const float *src1, const float *src2, const int count ) {
- #define OPER(X) dst[(X)] = src1[(X)] - src2[(X)]
- UNROLL1(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::Mul16
- ============
- */
- void VPCALL idSIMD_Generic::Mul16( float *dst, const float *src1, const float constant, const int count ) {
- #define OPER(X) dst[(X)] = src1[(X)] * constant
- UNROLL1(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::AddAssign16
- ============
- */
- void VPCALL idSIMD_Generic::AddAssign16( float *dst, const float *src, const int count ) {
- #define OPER(X) dst[(X)] += src[(X)]
- UNROLL1(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::SubAssign16
- ============
- */
- void VPCALL idSIMD_Generic::SubAssign16( float *dst, const float *src, const int count ) {
- #define OPER(X) dst[(X)] -= src[(X)]
- UNROLL1(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::MulAssign16
- ============
- */
- void VPCALL idSIMD_Generic::MulAssign16( float *dst, const float constant, const int count ) {
- #define OPER(X) dst[(X)] *= constant
- UNROLL1(OPER)
- #undef OPER
- }
- /*
- ============
- idSIMD_Generic::MatX_MultiplyVecX
- ============
- */
- void VPCALL idSIMD_Generic::MatX_MultiplyVecX( idVecX &dst, const idMatX &mat, const idVecX &vec ) {
- int i, j, numRows;
- const float *mPtr, *vPtr;
- float *dstPtr;
- assert( vec.GetSize() >= mat.GetNumColumns() );
- assert( dst.GetSize() >= mat.GetNumRows() );
- mPtr = mat.ToFloatPtr();
- vPtr = vec.ToFloatPtr();
- dstPtr = dst.ToFloatPtr();
- numRows = mat.GetNumRows();
- switch( mat.GetNumColumns() ) {
- case 1:
- for ( i = 0; i < numRows; i++ ) {
- dstPtr[i] = mPtr[0] * vPtr[0];
- mPtr++;
- }
- break;
- case 2:
- for ( i = 0; i < numRows; i++ ) {
- dstPtr[i] = mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1];
- mPtr += 2;
- }
- break;
- case 3:
- for ( i = 0; i < numRows; i++ ) {
- dstPtr[i] = mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1] + mPtr[2] * vPtr[2];
- mPtr += 3;
- }
- break;
- case 4:
- for ( i = 0; i < numRows; i++ ) {
- dstPtr[i] = mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1] + mPtr[2] * vPtr[2] +
- mPtr[3] * vPtr[3];
- mPtr += 4;
- }
- break;
- case 5:
- for ( i = 0; i < numRows; i++ ) {
- dstPtr[i] = mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1] + mPtr[2] * vPtr[2] +
- mPtr[3] * vPtr[3] + mPtr[4] * vPtr[4];
- mPtr += 5;
- }
- break;
- case 6:
- for ( i = 0; i < numRows; i++ ) {
- dstPtr[i] = mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1] + mPtr[2] * vPtr[2] +
- mPtr[3] * vPtr[3] + mPtr[4] * vPtr[4] + mPtr[5] * vPtr[5];
- mPtr += 6;
- }
- break;
- default:
- int numColumns = mat.GetNumColumns();
- for ( i = 0; i < numRows; i++ ) {
- float sum = mPtr[0] * vPtr[0];
- for ( j = 1; j < numColumns; j++ ) {
- sum += mPtr[j] * vPtr[j];
- }
- dstPtr[i] = sum;
- mPtr += numColumns;
- }
- break;
- }
- }
- /*
- ============
- idSIMD_Generic::MatX_MultiplyAddVecX
- ============
- */
- void VPCALL idSIMD_Generic::MatX_MultiplyAddVecX( idVecX &dst, const idMatX &mat, const idVecX &vec ) {
- int i, j, numRows;
- const float *mPtr, *vPtr;
- float *dstPtr;
- assert( vec.GetSize() >= mat.GetNumColumns() );
- assert( dst.GetSize() >= mat.GetNumRows() );
- mPtr = mat.ToFloatPtr();
- vPtr = vec.ToFloatPtr();
- dstPtr = dst.ToFloatPtr();
- numRows = mat.GetNumRows();
- switch( mat.GetNumColumns() ) {
- case 1:
- for ( i = 0; i < numRows; i++ ) {
- dstPtr[i] += mPtr[0] * vPtr[0];
- mPtr++;
- }
- break;
- case 2:
- for ( i = 0; i < numRows; i++ ) {
- dstPtr[i] += mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1];
- mPtr += 2;
- }
- break;
- case 3:
- for ( i = 0; i < numRows; i++ ) {
- dstPtr[i] += mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1] + mPtr[2] * vPtr[2];
- mPtr += 3;
- }
- break;
- case 4:
- for ( i = 0; i < numRows; i++ ) {
- dstPtr[i] += mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1] + mPtr[2] * vPtr[2] +
- mPtr[3] * vPtr[3];
- mPtr += 4;
- }
- break;
- case 5:
- for ( i = 0; i < numRows; i++ ) {
- dstPtr[i] += mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1] + mPtr[2] * vPtr[2] +
- mPtr[3] * vPtr[3] + mPtr[4] * vPtr[4];
- mPtr += 5;
- }
- break;
- case 6:
- for ( i = 0; i < numRows; i++ ) {
- dstPtr[i] += mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1] + mPtr[2] * vPtr[2] +
- mPtr[3] * vPtr[3] + mPtr[4] * vPtr[4] + mPtr[5] * vPtr[5];
- mPtr += 6;
- }
- break;
- default:
- int numColumns = mat.GetNumColumns();
- for ( i = 0; i < numRows; i++ ) {
- float sum = mPtr[0] * vPtr[0];
- for ( j = 1; j < numColumns; j++ ) {
- sum += mPtr[j] * vPtr[j];
- }
- dstPtr[i] += sum;
- mPtr += numColumns;
- }
- break;
- }
- }
- /*
- ============
- idSIMD_Generic::MatX_MultiplySubVecX
- ============
- */
- void VPCALL idSIMD_Generic::MatX_MultiplySubVecX( idVecX &dst, const idMatX &mat, const idVecX &vec ) {
- int i, j, numRows;
- const float *mPtr, *vPtr;
- float *dstPtr;
- assert( vec.GetSize() >= mat.GetNumColumns() );
- assert( dst.GetSize() >= mat.GetNumRows() );
- mPtr = mat.ToFloatPtr();
- vPtr = vec.ToFloatPtr();
- dstPtr = dst.ToFloatPtr();
- numRows = mat.GetNumRows();
- switch( mat.GetNumColumns() ) {
- case 1:
- for ( i = 0; i < numRows; i++ ) {
- dstPtr[i] -= mPtr[0] * vPtr[0];
- mPtr++;
- }
- break;
- case 2:
- for ( i = 0; i < numRows; i++ ) {
- dstPtr[i] -= mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1];
- mPtr += 2;
- }
- break;
- case 3:
- for ( i = 0; i < numRows; i++ ) {
- dstPtr[i] -= mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1] + mPtr[2] * vPtr[2];
- mPtr += 3;
- }
- break;
- case 4:
- for ( i = 0; i < numRows; i++ ) {
- dstPtr[i] -= mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1] + mPtr[2] * vPtr[2] +
- mPtr[3] * vPtr[3];
- mPtr += 4;
- }
- break;
- case 5:
- for ( i = 0; i < numRows; i++ ) {
- dstPtr[i] -= mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1] + mPtr[2] * vPtr[2] +
- mPtr[3] * vPtr[3] + mPtr[4] * vPtr[4];
- mPtr += 5;
- }
- break;
- case 6:
- for ( i = 0; i < numRows; i++ ) {
- dstPtr[i] -= mPtr[0] * vPtr[0] + mPtr[1] * vPtr[1] + mPtr[2] * vPtr[2] +
- mPtr[3] * vPtr[3] + mPtr[4] * vPtr[4] + mPtr[5] * vPtr[5];
- mPtr += 6;
- }
- break;
- default:
- int numColumns = mat.GetNumColumns();
- for ( i = 0; i < numRows; i++ ) {
- float sum = mPtr[0] * vPtr[0];
- for ( j = 1; j < numColumns; j++ ) {
- sum += mPtr[j] * vPtr[j];
- }
- dstPtr[i] -= sum;
- mPtr += numColumns;
- }
- break;
- }
- }
- /*
- ============
- idSIMD_Generic::MatX_TransposeMultiplyVecX
- ============
- */
- void VPCALL idSIMD_Generic::MatX_TransposeMultiplyVecX( idVecX &dst, const idMatX &mat, const idVecX &vec ) {
- int i, j, numColumns;
- const float *mPtr, *vPtr;
- float *dstPtr;
- assert( vec.GetSize() >= mat.GetNumRows() );
- assert( dst.GetSize() >= mat.GetNumColumns() );
- mPtr = mat.ToFloatPtr();
- vPtr = vec.ToFloatPtr();
- dstPtr = dst.ToFloatPtr();
- numColumns = mat.GetNumColumns();
- switch( mat.GetNumRows() ) {
- case 1:
- for ( i = 0; i < numColumns; i++ ) {
- dstPtr[i] = *(mPtr) * vPtr[0];
- mPtr++;
- }
- break;
- case 2:
- for ( i = 0; i < numColumns; i++ ) {
- dstPtr[i] = *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1];
- mPtr++;
- }
- break;
- case 3:
- for ( i = 0; i < numColumns; i++ ) {
- dstPtr[i] = *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1] + *(mPtr+2*numColumns) * vPtr[2];
- mPtr++;
- }
- break;
- case 4:
- for ( i = 0; i < numColumns; i++ ) {
- dstPtr[i] = *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1] + *(mPtr+2*numColumns) * vPtr[2] +
- *(mPtr+3*numColumns) * vPtr[3];
- mPtr++;
- }
- break;
- case 5:
- for ( i = 0; i < numColumns; i++ ) {
- dstPtr[i] = *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1] + *(mPtr+2*numColumns) * vPtr[2] +
- *(mPtr+3*numColumns) * vPtr[3] + *(mPtr+4*numColumns) * vPtr[4];
- mPtr++;
- }
- break;
- case 6:
- for ( i = 0; i < numColumns; i++ ) {
- dstPtr[i] = *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1] + *(mPtr+2*numColumns) * vPtr[2] +
- *(mPtr+3*numColumns) * vPtr[3] + *(mPtr+4*numColumns) * vPtr[4] + *(mPtr+5*numColumns) * vPtr[5];
- mPtr++;
- }
- break;
- default:
- int numRows = mat.GetNumRows();
- for ( i = 0; i < numColumns; i++ ) {
- mPtr = mat.ToFloatPtr() + i;
- float sum = mPtr[0] * vPtr[0];
- for ( j = 1; j < numRows; j++ ) {
- mPtr += numColumns;
- sum += mPtr[0] * vPtr[j];
- }
- dstPtr[i] = sum;
- }
- break;
- }
- }
- /*
- ============
- idSIMD_Generic::MatX_TransposeMultiplyAddVecX
- ============
- */
- void VPCALL idSIMD_Generic::MatX_TransposeMultiplyAddVecX( idVecX &dst, const idMatX &mat, const idVecX &vec ) {
- int i, j, numColumns;
- const float *mPtr, *vPtr;
- float *dstPtr;
- assert( vec.GetSize() >= mat.GetNumRows() );
- assert( dst.GetSize() >= mat.GetNumColumns() );
- mPtr = mat.ToFloatPtr();
- vPtr = vec.ToFloatPtr();
- dstPtr = dst.ToFloatPtr();
- numColumns = mat.GetNumColumns();
- switch( mat.GetNumRows() ) {
- case 1:
- for ( i = 0; i < numColumns; i++ ) {
- dstPtr[i] += *(mPtr) * vPtr[0];
- mPtr++;
- }
- break;
- case 2:
- for ( i = 0; i < numColumns; i++ ) {
- dstPtr[i] += *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1];
- mPtr++;
- }
- break;
- case 3:
- for ( i = 0; i < numColumns; i++ ) {
- dstPtr[i] += *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1] + *(mPtr+2*numColumns) * vPtr[2];
- mPtr++;
- }
- break;
- case 4:
- for ( i = 0; i < numColumns; i++ ) {
- dstPtr[i] += *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1] + *(mPtr+2*numColumns) * vPtr[2] +
- *(mPtr+3*numColumns) * vPtr[3];
- mPtr++;
- }
- break;
- case 5:
- for ( i = 0; i < numColumns; i++ ) {
- dstPtr[i] += *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1] + *(mPtr+2*numColumns) * vPtr[2] +
- *(mPtr+3*numColumns) * vPtr[3] + *(mPtr+4*numColumns) * vPtr[4];
- mPtr++;
- }
- break;
- case 6:
- for ( i = 0; i < numColumns; i++ ) {
- dstPtr[i] += *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1] + *(mPtr+2*numColumns) * vPtr[2] +
- *(mPtr+3*numColumns) * vPtr[3] + *(mPtr+4*numColumns) * vPtr[4] + *(mPtr+5*numColumns) * vPtr[5];
- mPtr++;
- }
- break;
- default:
- int numRows = mat.GetNumRows();
- for ( i = 0; i < numColumns; i++ ) {
- mPtr = mat.ToFloatPtr() + i;
- float sum = mPtr[0] * vPtr[0];
- for ( j = 1; j < numRows; j++ ) {
- mPtr += numColumns;
- sum += mPtr[0] * vPtr[j];
- }
- dstPtr[i] += sum;
- }
- break;
- }
- }
- /*
- ============
- idSIMD_Generic::MatX_TransposeMultiplySubVecX
- ============
- */
- void VPCALL idSIMD_Generic::MatX_TransposeMultiplySubVecX( idVecX &dst, const idMatX &mat, const idVecX &vec ) {
- int i, numColumns;
- const float *mPtr, *vPtr;
- float *dstPtr;
- assert( vec.GetSize() >= mat.GetNumRows() );
- assert( dst.GetSize() >= mat.GetNumColumns() );
- mPtr = mat.ToFloatPtr();
- vPtr = vec.ToFloatPtr();
- dstPtr = dst.ToFloatPtr();
- numColumns = mat.GetNumColumns();
- switch( mat.GetNumRows() ) {
- case 1:
- for ( i = 0; i < numColumns; i++ ) {
- dstPtr[i] -= *(mPtr) * vPtr[0];
- mPtr++;
- }
- break;
- case 2:
- for ( i = 0; i < numColumns; i++ ) {
- dstPtr[i] -= *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1];
- mPtr++;
- }
- break;
- case 3:
- for ( i = 0; i < numColumns; i++ ) {
- dstPtr[i] -= *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1] + *(mPtr+2*numColumns) * vPtr[2];
- mPtr++;
- }
- break;
- case 4:
- for ( i = 0; i < numColumns; i++ ) {
- dstPtr[i] -= *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1] + *(mPtr+2*numColumns) * vPtr[2] +
- *(mPtr+3*numColumns) * vPtr[3];
- mPtr++;
- }
- break;
- case 5:
- for ( i = 0; i < numColumns; i++ ) {
- dstPtr[i] -= *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1] + *(mPtr+2*numColumns) * vPtr[2] +
- *(mPtr+3*numColumns) * vPtr[3] + *(mPtr+4*numColumns) * vPtr[4];
- mPtr++;
- }
- break;
- case 6:
- for ( i = 0; i < numColumns; i++ ) {
- dstPtr[i] -= *(mPtr) * vPtr[0] + *(mPtr+numColumns) * vPtr[1] + *(mPtr+2*numColumns) * vPtr[2] +
- *(mPtr+3*numColumns) * vPtr[3] + *(mPtr+4*numColumns) * vPtr[4] + *(mPtr+5*numColumns) * vPtr[5];
- mPtr++;
- }
- break;
- default:
- int numRows = mat.GetNumRows();
- for ( i = 0; i < numColumns; i++ ) {
- mPtr = mat.ToFloatPtr() + i;
- float sum = mPtr[0] * vPtr[0];
- for ( int j = 1; j < numRows; j++ ) {
- mPtr += numColumns;
- sum += mPtr[0] * vPtr[j];
- }
- dstPtr[i] -= sum;
- }
- break;
- }
- }
- /*
- ============
- idSIMD_Generic::MatX_MultiplyMatX
- optimizes the following matrix multiplications:
- NxN * Nx6
- 6xN * Nx6
- Nx6 * 6xN
- 6x6 * 6xN
- with N in the range [1-6].
- ============
- */
- void VPCALL idSIMD_Generic::MatX_MultiplyMatX( idMatX &dst, const idMatX &m1, const idMatX &m2 ) {
- int i, j, k, l, n;
- float *dstPtr;
- const float *m1Ptr, *m2Ptr;
- double sum;
- assert( m1.GetNumColumns() == m2.GetNumRows() );
- dstPtr = dst.ToFloatPtr();
- m1Ptr = m1.ToFloatPtr();
- m2Ptr = m2.ToFloatPtr();
- k = m1.GetNumRows();
- l = m2.GetNumColumns();
- switch( m1.GetNumColumns() ) {
- case 1: {
- if ( l == 6 ) {
- for ( i = 0; i < k; i++ ) { // Nx1 * 1x6
- *dstPtr++ = m1Ptr[i] * m2Ptr[0];
- *dstPtr++ = m1Ptr[i] * m2Ptr[1];
- *dstPtr++ = m1Ptr[i] * m2Ptr[2];
- *dstPtr++ = m1Ptr[i] * m2Ptr[3];
- *dstPtr++ = m1Ptr[i] * m2Ptr[4];
- *dstPtr++ = m1Ptr[i] * m2Ptr[5];
- }
- return;
- }
- for ( i = 0; i < k; i++ ) {
- m2Ptr = m2.ToFloatPtr();
- for ( j = 0; j < l; j++ ) {
- *dstPtr++ = m1Ptr[0] * m2Ptr[0];
- m2Ptr++;
- }
- m1Ptr++;
- }
- break;
- }
- case 2: {
- if ( l == 6 ) {
- for ( i = 0; i < k; i++ ) { // Nx2 * 2x6
- *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[1] * m2Ptr[6];
- *dstPtr++ = m1Ptr[0] * m2Ptr[1] + m1Ptr[1] * m2Ptr[7];
- *dstPtr++ = m1Ptr[0] * m2Ptr[2] + m1Ptr[1] * m2Ptr[8];
- *dstPtr++ = m1Ptr[0] * m2Ptr[3] + m1Ptr[1] * m2Ptr[9];
- *dstPtr++ = m1Ptr[0] * m2Ptr[4] + m1Ptr[1] * m2Ptr[10];
- *dstPtr++ = m1Ptr[0] * m2Ptr[5] + m1Ptr[1] * m2Ptr[11];
- m1Ptr += 2;
- }
- return;
- }
- for ( i = 0; i < k; i++ ) {
- m2Ptr = m2.ToFloatPtr();
- for ( j = 0; j < l; j++ ) {
- *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[1] * m2Ptr[l];
- m2Ptr++;
- }
- m1Ptr += 2;
- }
- break;
- }
- case 3: {
- if ( l == 6 ) {
- for ( i = 0; i < k; i++ ) { // Nx3 * 3x6
- *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[1] * m2Ptr[6] + m1Ptr[2] * m2Ptr[12];
- *dstPtr++ = m1Ptr[0] * m2Ptr[1] + m1Ptr[1] * m2Ptr[7] + m1Ptr[2] * m2Ptr[13];
- *dstPtr++ = m1Ptr[0] * m2Ptr[2] + m1Ptr[1] * m2Ptr[8] + m1Ptr[2] * m2Ptr[14];
- *dstPtr++ = m1Ptr[0] * m2Ptr[3] + m1Ptr[1] * m2Ptr[9] + m1Ptr[2] * m2Ptr[15];
- *dstPtr++ = m1Ptr[0] * m2Ptr[4] + m1Ptr[1] * m2Ptr[10] + m1Ptr[2] * m2Ptr[16];
- *dstPtr++ = m1Ptr[0] * m2Ptr[5] + m1Ptr[1] * m2Ptr[11] + m1Ptr[2] * m2Ptr[17];
- m1Ptr += 3;
- }
- return;
- }
- for ( i = 0; i < k; i++ ) {
- m2Ptr = m2.ToFloatPtr();
- for ( j = 0; j < l; j++ ) {
- *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[1] * m2Ptr[l] + m1Ptr[2] * m2Ptr[2*l];
- m2Ptr++;
- }
- m1Ptr += 3;
- }
- break;
- }
- case 4: {
- if ( l == 6 ) {
- for ( i = 0; i < k; i++ ) { // Nx4 * 4x6
- *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[1] * m2Ptr[6] + m1Ptr[2] * m2Ptr[12] + m1Ptr[3] * m2Ptr[18];
- *dstPtr++ = m1Ptr[0] * m2Ptr[1] + m1Ptr[1] * m2Ptr[7] + m1Ptr[2] * m2Ptr[13] + m1Ptr[3] * m2Ptr[19];
- *dstPtr++ = m1Ptr[0] * m2Ptr[2] + m1Ptr[1] * m2Ptr[8] + m1Ptr[2] * m2Ptr[14] + m1Ptr[3] * m2Ptr[20];
- *dstPtr++ = m1Ptr[0] * m2Ptr[3] + m1Ptr[1] * m2Ptr[9] + m1Ptr[2] * m2Ptr[15] + m1Ptr[3] * m2Ptr[21];
- *dstPtr++ = m1Ptr[0] * m2Ptr[4] + m1Ptr[1] * m2Ptr[10] + m1Ptr[2] * m2Ptr[16] + m1Ptr[3] * m2Ptr[22];
- *dstPtr++ = m1Ptr[0] * m2Ptr[5] + m1Ptr[1] * m2Ptr[11] + m1Ptr[2] * m2Ptr[17] + m1Ptr[3] * m2Ptr[23];
- m1Ptr += 4;
- }
- return;
- }
- for ( i = 0; i < k; i++ ) {
- m2Ptr = m2.ToFloatPtr();
- for ( j = 0; j < l; j++ ) {
- *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[1] * m2Ptr[l] + m1Ptr[2] * m2Ptr[2*l] +
- m1Ptr[3] * m2Ptr[3*l];
- m2Ptr++;
- }
- m1Ptr += 4;
- }
- break;
- }
- case 5: {
- if ( l == 6 ) {
- for ( i = 0; i < k; i++ ) { // Nx5 * 5x6
- *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[1] * m2Ptr[6] + m1Ptr[2] * m2Ptr[12] + m1Ptr[3] * m2Ptr[18] + m1Ptr[4] * m2Ptr[24];
- *dstPtr++ = m1Ptr[0] * m2Ptr[1] + m1Ptr[1] * m2Ptr[7] + m1Ptr[2] * m2Ptr[13] + m1Ptr[3] * m2Ptr[19] + m1Ptr[4] * m2Ptr[25];
- *dstPtr++ = m1Ptr[0] * m2Ptr[2] + m1Ptr[1] * m2Ptr[8] + m1Ptr[2] * m2Ptr[14] + m1Ptr[3] * m2Ptr[20] + m1Ptr[4] * m2Ptr[26];
- *dstPtr++ = m1Ptr[0] * m2Ptr[3] + m1Ptr[1] * m2Ptr[9] + m1Ptr[2] * m2Ptr[15] + m1Ptr[3] * m2Ptr[21] + m1Ptr[4] * m2Ptr[27];
- *dstPtr++ = m1Ptr[0] * m2Ptr[4] + m1Ptr[1] * m2Ptr[10] + m1Ptr[2] * m2Ptr[16] + m1Ptr[3] * m2Ptr[22] + m1Ptr[4] * m2Ptr[28];
- *dstPtr++ = m1Ptr[0] * m2Ptr[5] + m1Ptr[1] * m2Ptr[11] + m1Ptr[2] * m2Ptr[17] + m1Ptr[3] * m2Ptr[23] + m1Ptr[4] * m2Ptr[29];
- m1Ptr += 5;
- }
- return;
- }
- for ( i = 0; i < k; i++ ) {
- m2Ptr = m2.ToFloatPtr();
- for ( j = 0; j < l; j++ ) {
- *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[1] * m2Ptr[l] + m1Ptr[2] * m2Ptr[2*l] +
- m1Ptr[3] * m2Ptr[3*l] + m1Ptr[4] * m2Ptr[4*l];
- m2Ptr++;
- }
- m1Ptr += 5;
- }
- break;
- }
- case 6: {
- switch( k ) {
- case 1: {
- if ( l == 1 ) { // 1x6 * 6x1
- dstPtr[0] = m1Ptr[0] * m2Ptr[0] + m1Ptr[1] * m2Ptr[1] + m1Ptr[2] * m2Ptr[2] +
- m1Ptr[3] * m2Ptr[3] + m1Ptr[4] * m2Ptr[4] + m1Ptr[5] * m2Ptr[5];
- return;
- }
- break;
- }
- case 2: {
- if ( l == 2 ) { // 2x6 * 6x2
- for ( i = 0; i < 2; i++ ) {
- for ( j = 0; j < 2; j++ ) {
- *dstPtr = m1Ptr[0] * m2Ptr[ 0 * 2 + j ]
- + m1Ptr[1] * m2Ptr[ 1 * 2 + j ]
- + m1Ptr[2] * m2Ptr[ 2 * 2 + j ]
- + m1Ptr[3] * m2Ptr[ 3 * 2 + j ]
- + m1Ptr[4] * m2Ptr[ 4 * 2 + j ]
- + m1Ptr[5] * m2Ptr[ 5 * 2 + j ];
- dstPtr++;
- }
- m1Ptr += 6;
- }
- return;
- }
- break;
- }
- case 3: {
- if ( l == 3 ) { // 3x6 * 6x3
- for ( i = 0; i < 3; i++ ) {
- for ( j = 0; j < 3; j++ ) {
- *dstPtr = m1Ptr[0] * m2Ptr[ 0 * 3 + j ]
- + m1Ptr[1] * m2Ptr[ 1 * 3 + j ]
- + m1Ptr[2] * m2Ptr[ 2 * 3 + j ]
- + m1Ptr[3] * m2Ptr[ 3 * 3 + j ]
- + m1Ptr[4] * m2Ptr[ 4 * 3 + j ]
- + m1Ptr[5] * m2Ptr[ 5 * 3 + j ];
- dstPtr++;
- }
- m1Ptr += 6;
- }
- return;
- }
- break;
- }
- case 4: {
- if ( l == 4 ) { // 4x6 * 6x4
- for ( i = 0; i < 4; i++ ) {
- for ( j = 0; j < 4; j++ ) {
- *dstPtr = m1Ptr[0] * m2Ptr[ 0 * 4 + j ]
- + m1Ptr[1] * m2Ptr[ 1 * 4 + j ]
- + m1Ptr[2] * m2Ptr[ 2 * 4 + j ]
- + m1Ptr[3] * m2Ptr[ 3 * 4 + j ]
- + m1Ptr[4] * m2Ptr[ 4 * 4 + j ]
- + m1Ptr[5] * m2Ptr[ 5 * 4 + j ];
- dstPtr++;
- }
- m1Ptr += 6;
- }
- return;
- }
- }
- case 5: {
- if ( l == 5 ) { // 5x6 * 6x5
- for ( i = 0; i < 5; i++ ) {
- for ( j = 0; j < 5; j++ ) {
- *dstPtr = m1Ptr[0] * m2Ptr[ 0 * 5 + j ]
- + m1Ptr[1] * m2Ptr[ 1 * 5 + j ]
- + m1Ptr[2] * m2Ptr[ 2 * 5 + j ]
- + m1Ptr[3] * m2Ptr[ 3 * 5 + j ]
- + m1Ptr[4] * m2Ptr[ 4 * 5 + j ]
- + m1Ptr[5] * m2Ptr[ 5 * 5 + j ];
- dstPtr++;
- }
- m1Ptr += 6;
- }
- return;
- }
- }
- case 6: {
- switch( l ) {
- case 1: { // 6x6 * 6x1
- for ( i = 0; i < 6; i++ ) {
- *dstPtr = m1Ptr[0] * m2Ptr[ 0 * 1 ]
- + m1Ptr[1] * m2Ptr[ 1 * 1 ]
- + m1Ptr[2] * m2Ptr[ 2 * 1 ]
- + m1Ptr[3] * m2Ptr[ 3 * 1 ]
- + m1Ptr[4] * m2Ptr[ 4 * 1 ]
- + m1Ptr[5] * m2Ptr[ 5 * 1 ];
- dstPtr++;
- m1Ptr += 6;
- }
- return;
- }
- case 2: { // 6x6 * 6x2
- for ( i = 0; i < 6; i++ ) {
- for ( j = 0; j < 2; j++ ) {
- *dstPtr = m1Ptr[0] * m2Ptr[ 0 * 2 + j ]
- + m1Ptr[1] * m2Ptr[ 1 * 2 + j ]
- + m1Ptr[2] * m2Ptr[ 2 * 2 + j ]
- + m1Ptr[3] * m2Ptr[ 3 * 2 + j ]
- + m1Ptr[4] * m2Ptr[ 4 * 2 + j ]
- + m1Ptr[5] * m2Ptr[ 5 * 2 + j ];
- dstPtr++;
- }
- m1Ptr += 6;
- }
- return;
- }
- case 3: { // 6x6 * 6x3
- for ( i = 0; i < 6; i++ ) {
- for ( j = 0; j < 3; j++ ) {
- *dstPtr = m1Ptr[0] * m2Ptr[ 0 * 3 + j ]
- + m1Ptr[1] * m2Ptr[ 1 * 3 + j ]
- + m1Ptr[2] * m2Ptr[ 2 * 3 + j ]
- + m1Ptr[3] * m2Ptr[ 3 * 3 + j ]
- + m1Ptr[4] * m2Ptr[ 4 * 3 + j ]
- + m1Ptr[5] * m2Ptr[ 5 * 3 + j ];
- dstPtr++;
- }
- m1Ptr += 6;
- }
- return;
- }
- case 4: { // 6x6 * 6x4
- for ( i = 0; i < 6; i++ ) {
- for ( j = 0; j < 4; j++ ) {
- *dstPtr = m1Ptr[0] * m2Ptr[ 0 * 4 + j ]
- + m1Ptr[1] * m2Ptr[ 1 * 4 + j ]
- + m1Ptr[2] * m2Ptr[ 2 * 4 + j ]
- + m1Ptr[3] * m2Ptr[ 3 * 4 + j ]
- + m1Ptr[4] * m2Ptr[ 4 * 4 + j ]
- + m1Ptr[5] * m2Ptr[ 5 * 4 + j ];
- dstPtr++;
- }
- m1Ptr += 6;
- }
- return;
- }
- case 5: { // 6x6 * 6x5
- for ( i = 0; i < 6; i++ ) {
- for ( j = 0; j < 5; j++ ) {
- *dstPtr = m1Ptr[0] * m2Ptr[ 0 * 5 + j ]
- + m1Ptr[1] * m2Ptr[ 1 * 5 + j ]
- + m1Ptr[2] * m2Ptr[ 2 * 5 + j ]
- + m1Ptr[3] * m2Ptr[ 3 * 5 + j ]
- + m1Ptr[4] * m2Ptr[ 4 * 5 + j ]
- + m1Ptr[5] * m2Ptr[ 5 * 5 + j ];
- dstPtr++;
- }
- m1Ptr += 6;
- }
- return;
- }
- case 6: { // 6x6 * 6x6
- for ( i = 0; i < 6; i++ ) {
- for ( j = 0; j < 6; j++ ) {
- *dstPtr = m1Ptr[0] * m2Ptr[ 0 * 6 + j ]
- + m1Ptr[1] * m2Ptr[ 1 * 6 + j ]
- + m1Ptr[2] * m2Ptr[ 2 * 6 + j ]
- + m1Ptr[3] * m2Ptr[ 3 * 6 + j ]
- + m1Ptr[4] * m2Ptr[ 4 * 6 + j ]
- + m1Ptr[5] * m2Ptr[ 5 * 6 + j ];
- dstPtr++;
- }
- m1Ptr += 6;
- }
- return;
- }
- }
- }
- }
- for ( i = 0; i < k; i++ ) {
- m2Ptr = m2.ToFloatPtr();
- for ( j = 0; j < l; j++ ) {
- *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[1] * m2Ptr[l] + m1Ptr[2] * m2Ptr[2*l] +
- m1Ptr[3] * m2Ptr[3*l] + m1Ptr[4] * m2Ptr[4*l] + m1Ptr[5] * m2Ptr[5*l];
- m2Ptr++;
- }
- m1Ptr += 6;
- }
- break;
- }
- default: {
- for ( i = 0; i < k; i++ ) {
- for ( j = 0; j < l; j++ ) {
- m2Ptr = m2.ToFloatPtr() + j;
- sum = m1Ptr[0] * m2Ptr[0];
- for ( n = 1; n < m1.GetNumColumns(); n++ ) {
- m2Ptr += l;
- sum += m1Ptr[n] * m2Ptr[0];
- }
- *dstPtr++ = sum;
- }
- m1Ptr += m1.GetNumColumns();
- }
- break;
- }
- }
- }
- /*
- ============
- idSIMD_Generic::MatX_TransposeMultiplyMatX
- optimizes the following tranpose matrix multiplications:
- Nx6 * NxN
- 6xN * 6x6
- with N in the range [1-6].
- ============
- */
- void VPCALL idSIMD_Generic::MatX_TransposeMultiplyMatX( idMatX &dst, const idMatX &m1, const idMatX &m2 ) {
- int i, j, k, l, n;
- float *dstPtr;
- const float *m1Ptr, *m2Ptr;
- double sum;
- assert( m1.GetNumRows() == m2.GetNumRows() );
- m1Ptr = m1.ToFloatPtr();
- m2Ptr = m2.ToFloatPtr();
- dstPtr = dst.ToFloatPtr();
- k = m1.GetNumColumns();
- l = m2.GetNumColumns();
- switch( m1.GetNumRows() ) {
- case 1:
- if ( k == 6 && l == 1 ) { // 1x6 * 1x1
- for ( i = 0; i < 6; i++ ) {
- *dstPtr++ = m1Ptr[0] * m2Ptr[0];
- m1Ptr++;
- }
- return;
- }
- for ( i = 0; i < k; i++ ) {
- m2Ptr = m2.ToFloatPtr();
- for ( j = 0; j < l; j++ ) {
- *dstPtr++ = m1Ptr[0] * m2Ptr[0];
- m2Ptr++;
- }
- m1Ptr++;
- }
- break;
- case 2:
- if ( k == 6 && l == 2 ) { // 2x6 * 2x2
- for ( i = 0; i < 6; i++ ) {
- *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*2+0] + m1Ptr[1*6] * m2Ptr[1*2+0];
- *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*2+1] + m1Ptr[1*6] * m2Ptr[1*2+1];
- m1Ptr++;
- }
- return;
- }
- for ( i = 0; i < k; i++ ) {
- m2Ptr = m2.ToFloatPtr();
- for ( j = 0; j < l; j++ ) {
- *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[k] * m2Ptr[l];
- m2Ptr++;
- }
- m1Ptr++;
- }
- break;
- case 3:
- if ( k == 6 && l == 3 ) { // 3x6 * 3x3
- for ( i = 0; i < 6; i++ ) {
- *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*3+0] + m1Ptr[1*6] * m2Ptr[1*3+0] + m1Ptr[2*6] * m2Ptr[2*3+0];
- *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*3+1] + m1Ptr[1*6] * m2Ptr[1*3+1] + m1Ptr[2*6] * m2Ptr[2*3+1];
- *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*3+2] + m1Ptr[1*6] * m2Ptr[1*3+2] + m1Ptr[2*6] * m2Ptr[2*3+2];
- m1Ptr++;
- }
- return;
- }
- for ( i = 0; i < k; i++ ) {
- m2Ptr = m2.ToFloatPtr();
- for ( j = 0; j < l; j++ ) {
- *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[k] * m2Ptr[l] + m1Ptr[2*k] * m2Ptr[2*l];
- m2Ptr++;
- }
- m1Ptr++;
- }
- break;
- case 4:
- if ( k == 6 && l == 4 ) { // 4x6 * 4x4
- for ( i = 0; i < 6; i++ ) {
- *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*4+0] + m1Ptr[1*6] * m2Ptr[1*4+0] + m1Ptr[2*6] * m2Ptr[2*4+0] + m1Ptr[3*6] * m2Ptr[3*4+0];
- *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*4+1] + m1Ptr[1*6] * m2Ptr[1*4+1] + m1Ptr[2*6] * m2Ptr[2*4+1] + m1Ptr[3*6] * m2Ptr[3*4+1];
- *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*4+2] + m1Ptr[1*6] * m2Ptr[1*4+2] + m1Ptr[2*6] * m2Ptr[2*4+2] + m1Ptr[3*6] * m2Ptr[3*4+2];
- *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*4+3] + m1Ptr[1*6] * m2Ptr[1*4+3] + m1Ptr[2*6] * m2Ptr[2*4+3] + m1Ptr[3*6] * m2Ptr[3*4+3];
- m1Ptr++;
- }
- return;
- }
- for ( i = 0; i < k; i++ ) {
- m2Ptr = m2.ToFloatPtr();
- for ( j = 0; j < l; j++ ) {
- *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[k] * m2Ptr[l] + m1Ptr[2*k] * m2Ptr[2*l] +
- m1Ptr[3*k] * m2Ptr[3*l];
- m2Ptr++;
- }
- m1Ptr++;
- }
- break;
- case 5:
- if ( k == 6 && l == 5 ) { // 5x6 * 5x5
- for ( i = 0; i < 6; i++ ) {
- *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*5+0] + m1Ptr[1*6] * m2Ptr[1*5+0] + m1Ptr[2*6] * m2Ptr[2*5+0] + m1Ptr[3*6] * m2Ptr[3*5+0] + m1Ptr[4*6] * m2Ptr[4*5+0];
- *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*5+1] + m1Ptr[1*6] * m2Ptr[1*5+1] + m1Ptr[2*6] * m2Ptr[2*5+1] + m1Ptr[3*6] * m2Ptr[3*5+1] + m1Ptr[4*6] * m2Ptr[4*5+1];
- *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*5+2] + m1Ptr[1*6] * m2Ptr[1*5+2] + m1Ptr[2*6] * m2Ptr[2*5+2] + m1Ptr[3*6] * m2Ptr[3*5+2] + m1Ptr[4*6] * m2Ptr[4*5+2];
- *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*5+3] + m1Ptr[1*6] * m2Ptr[1*5+3] + m1Ptr[2*6] * m2Ptr[2*5+3] + m1Ptr[3*6] * m2Ptr[3*5+3] + m1Ptr[4*6] * m2Ptr[4*5+3];
- *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*5+4] + m1Ptr[1*6] * m2Ptr[1*5+4] + m1Ptr[2*6] * m2Ptr[2*5+4] + m1Ptr[3*6] * m2Ptr[3*5+4] + m1Ptr[4*6] * m2Ptr[4*5+4];
- m1Ptr++;
- }
- return;
- }
- for ( i = 0; i < k; i++ ) {
- m2Ptr = m2.ToFloatPtr();
- for ( j = 0; j < l; j++ ) {
- *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[k] * m2Ptr[l] + m1Ptr[2*k] * m2Ptr[2*l] +
- m1Ptr[3*k] * m2Ptr[3*l] + m1Ptr[4*k] * m2Ptr[4*l];
- m2Ptr++;
- }
- m1Ptr++;
- }
- break;
- case 6:
- if ( l == 6 ) {
- switch( k ) {
- case 1: // 6x1 * 6x6
- m2Ptr = m2.ToFloatPtr();
- for ( j = 0; j < 6; j++ ) {
- *dstPtr++ = m1Ptr[0*1] * m2Ptr[0*6] +
- m1Ptr[1*1] * m2Ptr[1*6] +
- m1Ptr[2*1] * m2Ptr[2*6] +
- m1Ptr[3*1] * m2Ptr[3*6] +
- m1Ptr[4*1] * m2Ptr[4*6] +
- m1Ptr[5*1] * m2Ptr[5*6];
- m2Ptr++;
- }
- return;
- case 2: // 6x2 * 6x6
- for ( i = 0; i < 2; i++ ) {
- m2Ptr = m2.ToFloatPtr();
- for ( j = 0; j < 6; j++ ) {
- *dstPtr++ = m1Ptr[0*2] * m2Ptr[0*6] +
- m1Ptr[1*2] * m2Ptr[1*6] +
- m1Ptr[2*2] * m2Ptr[2*6] +
- m1Ptr[3*2] * m2Ptr[3*6] +
- m1Ptr[4*2] * m2Ptr[4*6] +
- m1Ptr[5*2] * m2Ptr[5*6];
- m2Ptr++;
- }
- m1Ptr++;
- }
- return;
- case 3: // 6x3 * 6x6
- for ( i = 0; i < 3; i++ ) {
- m2Ptr = m2.ToFloatPtr();
- for ( j = 0; j < 6; j++ ) {
- *dstPtr++ = m1Ptr[0*3] * m2Ptr[0*6] +
- m1Ptr[1*3] * m2Ptr[1*6] +
- m1Ptr[2*3] * m2Ptr[2*6] +
- m1Ptr[3*3] * m2Ptr[3*6] +
- m1Ptr[4*3] * m2Ptr[4*6] +
- m1Ptr[5*3] * m2Ptr[5*6];
- m2Ptr++;
- }
- m1Ptr++;
- }
- return;
- case 4: // 6x4 * 6x6
- for ( i = 0; i < 4; i++ ) {
- m2Ptr = m2.ToFloatPtr();
- for ( j = 0; j < 6; j++ ) {
- *dstPtr++ = m1Ptr[0*4] * m2Ptr[0*6] +
- m1Ptr[1*4] * m2Ptr[1*6] +
- m1Ptr[2*4] * m2Ptr[2*6] +
- m1Ptr[3*4] * m2Ptr[3*6] +
- m1Ptr[4*4] * m2Ptr[4*6] +
- m1Ptr[5*4] * m2Ptr[5*6];
- m2Ptr++;
- }
- m1Ptr++;
- }
- return;
- case 5: // 6x5 * 6x6
- for ( i = 0; i < 5; i++ ) {
- m2Ptr = m2.ToFloatPtr();
- for ( j = 0; j < 6; j++ ) {
- *dstPtr++ = m1Ptr[0*5] * m2Ptr[0*6] +
- m1Ptr[1*5] * m2Ptr[1*6] +
- m1Ptr[2*5] * m2Ptr[2*6] +
- m1Ptr[3*5] * m2Ptr[3*6] +
- m1Ptr[4*5] * m2Ptr[4*6] +
- m1Ptr[5*5] * m2Ptr[5*6];
- m2Ptr++;
- }
- m1Ptr++;
- }
- return;
- case 6: // 6x6 * 6x6
- for ( i = 0; i < 6; i++ ) {
- m2Ptr = m2.ToFloatPtr();
- for ( j = 0; j < 6; j++ ) {
- *dstPtr++ = m1Ptr[0*6] * m2Ptr[0*6] +
- m1Ptr[1*6] * m2Ptr[1*6] +
- m1Ptr[2*6] * m2Ptr[2*6] +
- m1Ptr[3*6] * m2Ptr[3*6] +
- m1Ptr[4*6] * m2Ptr[4*6] +
- m1Ptr[5*6] * m2Ptr[5*6];
- m2Ptr++;
- }
- m1Ptr++;
- }
- return;
- }
- }
- for ( i = 0; i < k; i++ ) {
- m2Ptr = m2.ToFloatPtr();
- for ( j = 0; j < l; j++ ) {
- *dstPtr++ = m1Ptr[0] * m2Ptr[0] + m1Ptr[k] * m2Ptr[l] + m1Ptr[2*k] * m2Ptr[2*l] +
- m1Ptr[3*k] * m2Ptr[3*l] + m1Ptr[4*k] * m2Ptr[4*l] + m1Ptr[5*k] * m2Ptr[5*l];
- m2Ptr++;
- }
- m1Ptr++;
- }
- break;
- default:
- for ( i = 0; i < k; i++ ) {
- for ( j = 0; j < l; j++ ) {
- m1Ptr = m1.ToFloatPtr() + i;
- m2Ptr = m2.ToFloatPtr() + j;
- sum = m1Ptr[0] * m2Ptr[0];
- for ( n = 1; n < m1.GetNumRows(); n++ ) {
- m1Ptr += k;
- m2Ptr += l;
- sum += m1Ptr[0] * m2Ptr[0];
- }
- *dstPtr++ = sum;
- }
- }
- break;
- }
- }
- /*
- ============
- idSIMD_Generic::MatX_LowerTriangularSolve
- solves x in Lx = b for the n * n sub-matrix of L
- if skip > 0 the first skip elements of x are assumed to be valid already
- L has to be a lower triangular matrix with (implicit) ones on the diagonal
- x == b is allowed
- ============
- */
- void VPCALL idSIMD_Generic::MatX_LowerTriangularSolve( const idMatX &L, float *x, const float *b, const int n, int skip ) {
- #if 1
- int nc;
- const float *lptr;
- if ( skip >= n ) {
- return;
- }
- lptr = L.ToFloatPtr();
- nc = L.GetNumColumns();
- // unrolled cases for n < 8
- if ( n < 8 ) {
- #define NSKIP( n, s ) ((n<<3)|(s&7))
- switch( NSKIP( n, skip ) ) {
- case NSKIP( 1, 0 ): x[0] = b[0];
- return;
- case NSKIP( 2, 0 ): x[0] = b[0];
- case NSKIP( 2, 1 ): x[1] = b[1] - lptr[1*nc+0] * x[0];
- return;
- case NSKIP( 3, 0 ): x[0] = b[0];
- case NSKIP( 3, 1 ): x[1] = b[1] - lptr[1*nc+0] * x[0];
- case NSKIP( 3, 2 ): x[2] = b[2] - lptr[2*nc+0] * x[0] - lptr[2*nc+1] * x[1];
- return;
- case NSKIP( 4, 0 ): x[0] = b[0];
- case NSKIP( 4, 1 ): x[1] = b[1] - lptr[1*nc+0] * x[0];
- case NSKIP( 4, 2 ): x[2] = b[2] - lptr[2*nc+0] * x[0] - lptr[2*nc+1] * x[1];
- case NSKIP( 4, 3 ): x[3] = b[3] - lptr[3*nc+0] * x[0] - lptr[3*nc+1] * x[1] - lptr[3*nc+2] * x[2];
- return;
- case NSKIP( 5, 0 ): x[0] = b[0];
- case NSKIP( 5, 1 ): x[1] = b[1] - lptr[1*nc+0] * x[0];
- case NSKIP( 5, 2 ): x[2] = b[2] - lptr[2*nc+0] * x[0] - lptr[2*nc+1] * x[1];
- case NSKIP( 5, 3 ): x[3] = b[3] - lptr[3*nc+0] * x[0] - lptr[3*nc+1] * x[1] - lptr[3*nc+2] * x[2];
- case NSKIP( 5, 4 ): x[4] = b[4] - lptr[4*nc+0] * x[0] - lptr[4*nc+1] * x[1] - lptr[4*nc+2] * x[2] - lptr[4*nc+3] * x[3];
- return;
- case NSKIP( 6, 0 ): x[0] = b[0];
- case NSKIP( 6, 1 ): x[1] = b[1] - lptr[1*nc+0] * x[0];
- case NSKIP( 6, 2 ): x[2] = b[2] - lptr[2*nc+0] * x[0] - lptr[2*nc+1] * x[1];
- case NSKIP( 6, 3 ): x[3] = b[3] - lptr[3*nc+0] * x[0] - lptr[3*nc+1] * x[1] - lptr[3*nc+2] * x[2];
- case NSKIP( 6, 4 ): x[4] = b[4] - lptr[4*nc+0] * x[0] - lptr[4*nc+1] * x[1] - lptr[4*nc+2] * x[2] - lptr[4*nc+3] * x[3];
- case NSKIP( 6, 5 ): x[5] = b[5] - lptr[5*nc+0] * x[0] - lptr[5*nc+1] * x[1] - lptr[5*nc+2] * x[2] - lptr[5*nc+3] * x[3] - lptr[5*nc+4] * x[4];
- return;
- case NSKIP( 7, 0 ): x[0] = b[0];
- case NSKIP( 7, 1 ): x[1] = b[1] - lptr[1*nc+0] * x[0];
- case NSKIP( 7, 2 ): x[2] = b[2] - lptr[2*nc+0] * x[0] - lptr[2*nc+1] * x[1];
- case NSKIP( 7, 3 ): x[3] = b[3] - lptr[3*nc+0] * x[0] - lptr[3*nc+1] * x[1] - lptr[3*nc+2] * x[2];
- case NSKIP( 7, 4 ): x[4] = b[4] - lptr[4*nc+0] * x[0] - lptr[4*nc+1] * x[1] - lptr[4*nc+2] * x[2] - lptr[4*nc+3] * x[3];
- case NSKIP( 7, 5 ): x[5] = b[5] - lptr[5*nc+0] * x[0] - lptr[5*nc+1] * x[1] - lptr[5*nc+2] * x[2] - lptr[5*nc+3] * x[3] - lptr[5*nc+4] * x[4];
- case NSKIP( 7, 6 ): x[6] = b[6] - lptr[6*nc+0] * x[0] - lptr[6*nc+1] * x[1] - lptr[6*nc+2] * x[2] - lptr[6*nc+3] * x[3] - lptr[6*nc+4] * x[4] - lptr[6*nc+5] * x[5];
- return;
- }
- return;
- }
- // process first 4 rows
- switch( skip ) {
- case 0: x[0] = b[0];
- case 1: x[1] = b[1] - lptr[1*nc+0] * x[0];
- case 2: x[2] = b[2] - lptr[2*nc+0] * x[0] - lptr[2*nc+1] * x[1];
- case 3: x[3] = b[3] - lptr[3*nc+0] * x[0] - lptr[3*nc+1] * x[1] - lptr[3*nc+2] * x[2];
- skip = 4;
- }
- lptr = L[skip];
- int i, j;
- register double s0, s1, s2, s3;
- for ( i = skip; i < n; i++ ) {
- s0 = lptr[0] * x[0];
- s1 = lptr[1] * x[1];
- s2 = lptr[2] * x[2];
- s3 = lptr[3] * x[3];
- for ( j = 4; j < i-7; j += 8 ) {
- s0 += lptr[j+0] * x[j+0];
- s1 += lptr[j+1] * x[j+1];
- s2 += lptr[j+2] * x[j+2];
- s3 += lptr[j+3] * x[j+3];
- s0 += lptr[j+4] * x[j+4];
- s1 += lptr[j+5] * x[j+5];
- s2 += lptr[j+6] * x[j+6];
- s3 += lptr[j+7] * x[j+7];
- }
- switch( i - j ) {
- NODEFAULT;
- case 7: s0 += lptr[j+6] * x[j+6];
- case 6: s1 += lptr[j+5] * x[j+5];
- case 5: s2 += lptr[j+4] * x[j+4];
- case 4: s3 += lptr[j+3] * x[j+3];
- case 3: s0 += lptr[j+2] * x[j+2];
- case 2: s1 += lptr[j+1] * x[j+1];
- case 1: s2 += lptr[j+0] * x[j+0];
- case 0: break;
- }
- double sum;
- sum = s3;
- sum += s2;
- sum += s1;
- sum += s0;
- sum -= b[i];
- x[i] = -sum;
- lptr += nc;
- }
- #else
- int i, j;
- const float *lptr;
- double sum;
- for ( i = skip; i < n; i++ ) {
- sum = b[i];
- lptr = L[i];
- for ( j = 0; j < i; j++ ) {
- sum -= lptr[j] * x[j];
- }
- x[i] = sum;
- }
- #endif
- }
- /*
- ============
- idSIMD_Generic::MatX_LowerTriangularSolveTranspose
- solves x in L'x = b for the n * n sub-matrix of L
- L has to be a lower triangular matrix with (implicit) ones on the diagonal
- x == b is allowed
- ============
- */
- void VPCALL idSIMD_Generic::MatX_LowerTriangularSolveTranspose( const idMatX &L, float *x, const float *b, const int n ) {
- #if 1
- int nc;
- const float *lptr;
- lptr = L.ToFloatPtr();
- nc = L.GetNumColumns();
- // unrolled cases for n < 8
- if ( n < 8 ) {
- switch( n ) {
- case 0:
- return;
- case 1:
- x[0] = b[0];
- return;
- case 2:
- x[1] = b[1];
- x[0] = b[0] - lptr[1*nc+0] * x[1];
- return;
- case 3:
- x[2] = b[2];
- x[1] = b[1] - lptr[2*nc+1] * x[2];
- x[0] = b[0] - lptr[2*nc+0] * x[2] - lptr[1*nc+0] * x[1];
- return;
- case 4:
- x[3] = b[3];
- x[2] = b[2] - lptr[3*nc+2] * x[3];
- x[1] = b[1] - lptr[3*nc+1] * x[3] - lptr[2*nc+1] * x[2];
- x[0] = b[0] - lptr[3*nc+0] * x[3] - lptr[2*nc+0] * x[2] - lptr[1*nc+0] * x[1];
- return;
- case 5:
- x[4] = b[4];
- x[3] = b[3] - lptr[4*nc+3] * x[4];
- x[2] = b[2] - lptr[4*nc+2] * x[4] - lptr[3*nc+2] * x[3];
- x[1] = b[1] - lptr[4*nc+1] * x[4] - lptr[3*nc+1] * x[3] - lptr[2*nc+1] * x[2];
- x[0] = b[0] - lptr[4*nc+0] * x[4] - lptr[3*nc+0] * x[3] - lptr[2*nc+0] * x[2] - lptr[1*nc+0] * x[1];
- return;
- case 6:
- x[5] = b[5];
- x[4] = b[4] - lptr[5*nc+4] * x[5];
- x[3] = b[3] - lptr[5*nc+3] * x[5] - lptr[4*nc+3] * x[4];
- x[2] = b[2] - lptr[5*nc+2] * x[5] - lptr[4*nc+2] * x[4] - lptr[3*nc+2] * x[3];
- x[1] = b[1] - lptr[5*nc+1] * x[5] - lptr[4*nc+1] * x[4] - lptr[3*nc+1] * x[3] - lptr[2*nc+1] * x[2];
- x[0] = b[0] - lptr[5*nc+0] * x[5] - lptr[4*nc+0] * x[4] - lptr[3*nc+0] * x[3] - lptr[2*nc+0] * x[2] - lptr[1*nc+0] * x[1];
- return;
- case 7:
- x[6] = b[6];
- x[5] = b[5] - lptr[6*nc+5] * x[6];
- x[4] = b[4] - lptr[6*nc+4] * x[6] - lptr[5*nc+4] * x[5];
- x[3] = b[3] - lptr[6*nc+3] * x[6] - lptr[5*nc+3] * x[5] - lptr[4*nc+3] * x[4];
- x[2] = b[2] - lptr[6*nc+2] * x[6] - lptr[5*nc+2] * x[5] - lptr[4*nc+2] * x[4] - lptr[3*nc+2] * x[3];
- x[1] = b[1] - lptr[6*nc+1] * x[6] - lptr[5*nc+1] * x[5] - lptr[4*nc+1] * x[4] - lptr[3*nc+1] * x[3] - lptr[2*nc+1] * x[2];
- x[0] = b[0] - lptr[6*nc+0] * x[6] - lptr[5*nc+0] * x[5] - lptr[4*nc+0] * x[4] - lptr[3*nc+0] * x[3] - lptr[2*nc+0] * x[2] - lptr[1*nc+0] * x[1];
- return;
- }
- return;
- }
- int i, j;
- register double s0, s1, s2, s3;
- float *xptr;
- lptr = L.ToFloatPtr() + n * nc + n - 4;
- xptr = x + n;
- // process 4 rows at a time
- for ( i = n; i >= 4; i -= 4 ) {
- s0 = b[i-4];
- s1 = b[i-3];
- s2 = b[i-2];
- s3 = b[i-1];
- // process 4x4 blocks
- for ( j = 0; j < n-i; j += 4 ) {
- s0 -= lptr[(j+0)*nc+0] * xptr[j+0];
- s1 -= lptr[(j+0)*nc+1] * xptr[j+0];
- s2 -= lptr[(j+0)*nc+2] * xptr[j+0];
- s3 -= lptr[(j+0)*nc+3] * xptr[j+0];
- s0 -= lptr[(j+1)*nc+0] * xptr[j+1];
- s1 -= lptr[(j+1)*nc+1] * xptr[j+1];
- s2 -= lptr[(j+1)*nc+2] * xptr[j+1];
- s3 -= lptr[(j+1)*nc+3] * xptr[j+1];
- s0 -= lptr[(j+2)*nc+0] * xptr[j+2];
- s1 -= lptr[(j+2)*nc+1] * xptr[j+2];
- s2 -= lptr[(j+2)*nc+2] * xptr[j+2];
- s3 -= lptr[(j+2)*nc+3] * xptr[j+2];
- s0 -= lptr[(j+3)*nc+0] * xptr[j+3];
- s1 -= lptr[(j+3)*nc+1] * xptr[j+3];
- s2 -= lptr[(j+3)*nc+2] * xptr[j+3];
- s3 -= lptr[(j+3)*nc+3] * xptr[j+3];
- }
- // process left over of the 4 rows
- s0 -= lptr[0-1*nc] * s3;
- s1 -= lptr[1-1*nc] * s3;
- s2 -= lptr[2-1*nc] * s3;
- s0 -= lptr[0-2*nc] * s2;
- s1 -= lptr[1-2*nc] * s2;
- s0 -= lptr[0-3*nc] * s1;
- // store result
- xptr[-4] = s0;
- xptr[-3] = s1;
- xptr[-2] = s2;
- xptr[-1] = s3;
- // update pointers for next four rows
- lptr -= 4 + 4 * nc;
- xptr -= 4;
- }
- // process left over rows
- for ( i--; i >= 0; i-- ) {
- s0 = b[i];
- lptr = L[0] + i;
- for ( j = i + 1; j < n; j++ ) {
- s0 -= lptr[j*nc] * x[j];
- }
- x[i] = s0;
- }
- #else
- int i, j, nc;
- const float *ptr;
- double sum;
- nc = L.GetNumColumns();
- for ( i = n - 1; i >= 0; i-- ) {
- sum = b[i];
- ptr = L[0] + i;
- for ( j = i + 1; j < n; j++ ) {
- sum -= ptr[j*nc] * x[j];
- }
- x[i] = sum;
- }
- #endif
- }
- /*
- ============
- idSIMD_Generic::MatX_LDLTFactor
- in-place factorization LDL' of the n * n sub-matrix of mat
- the reciprocal of the diagonal elements are stored in invDiag
- ============
- */
- bool VPCALL idSIMD_Generic::MatX_LDLTFactor( idMatX &mat, idVecX &invDiag, const int n ) {
- #if 1
- int i, j, k, nc;
- float *v, *diag, *mptr;
- double s0, s1, s2, s3, sum, d;
- v = (float *) _alloca16( n * sizeof( float ) );
- diag = (float *) _alloca16( n * sizeof( float ) );
- nc = mat.GetNumColumns();
- if ( n <= 0 ) {
- return true;
- }
- mptr = mat[0];
- sum = mptr[0];
- if ( sum == 0.0f ) {
- return false;
- }
- diag[0] = sum;
- invDiag[0] = d = 1.0f / sum;
- if ( n <= 1 ) {
- return true;
- }
- mptr = mat[0];
- for ( j = 1; j < n; j++ ) {
- mptr[j*nc+0] = ( mptr[j*nc+0] ) * d;
- }
- mptr = mat[1];
- v[0] = diag[0] * mptr[0]; s0 = v[0] * mptr[0];
- sum = mptr[1] - s0;
- if ( sum == 0.0f ) {
- return false;
- }
- mat[1][1] = sum;
- diag[1] = sum;
- invDiag[1] = d = 1.0f / sum;
- if ( n <= 2 ) {
- return true;
- }
- mptr = mat[0];
- for ( j = 2; j < n; j++ ) {
- mptr[j*nc+1] = ( mptr[j*nc+1] - v[0] * mptr[j*nc+0] ) * d;
- }
- mptr = mat[2];
- v[0] = diag[0] * mptr[0]; s0 = v[0] * mptr[0];
- v[1] = diag[1] * mptr[1]; s1 = v[1] * mptr[1];
- sum = mptr[2] - s0 - s1;
- if ( sum == 0.0f ) {
- return false;
- }
- mat[2][2] = sum;
- diag[2] = sum;
- invDiag[2] = d = 1.0f / sum;
- if ( n <= 3 ) {
- return true;
- }
- mptr = mat[0];
- for ( j = 3; j < n; j++ ) {
- mptr[j*nc+2] = ( mptr[j*nc+2] - v[0] * mptr[j*nc+0] - v[1] * mptr[j*nc+1] ) * d;
- }
- mptr = mat[3];
- v[0] = diag[0] * mptr[0]; s0 = v[0] * mptr[0];
- v[1] = diag[1] * mptr[1]; s1 = v[1] * mptr[1];
- v[2] = diag[2] * mptr[2]; s2 = v[2] * mptr[2];
- sum = mptr[3] - s0 - s1 - s2;
- if ( sum == 0.0f ) {
- return false;
- }
- mat[3][3] = sum;
- diag[3] = sum;
- invDiag[3] = d = 1.0f / sum;
- if ( n <= 4 ) {
- return true;
- }
- mptr = mat[0];
- for ( j = 4; j < n; j++ ) {
- mptr[j*nc+3] = ( mptr[j*nc+3] - v[0] * mptr[j*nc+0] - v[1] * mptr[j*nc+1] - v[2] * mptr[j*nc+2] ) * d;
- }
- for ( i = 4; i < n; i++ ) {
- mptr = mat[i];
- v[0] = diag[0] * mptr[0]; s0 = v[0] * mptr[0];
- v[1] = diag[1] * mptr[1]; s1 = v[1] * mptr[1];
- v[2] = diag[2] * mptr[2]; s2 = v[2] * mptr[2];
- v[3] = diag[3] * mptr[3]; s3 = v[3] * mptr[3];
- for ( k = 4; k < i-3; k += 4 ) {
- v[k+0] = diag[k+0] * mptr[k+0]; s0 += v[k+0] * mptr[k+0];
- v[k+1] = diag[k+1] * mptr[k+1]; s1 += v[k+1] * mptr[k+1];
- v[k+2] = diag[k+2] * mptr[k+2]; s2 += v[k+2] * mptr[k+2];
- v[k+3] = diag[k+3] * mptr[k+3]; s3 += v[k+3] * mptr[k+3];
- }
- switch( i - k ) {
- NODEFAULT;
- case 3: v[k+2] = diag[k+2] * mptr[k+2]; s0 += v[k+2] * mptr[k+2];
- case 2: v[k+1] = diag[k+1] * mptr[k+1]; s1 += v[k+1] * mptr[k+1];
- case 1: v[k+0] = diag[k+0] * mptr[k+0]; s2 += v[k+0] * mptr[k+0];
- case 0: break;
- }
- sum = s3;
- sum += s2;
- sum += s1;
- sum += s0;
- sum = mptr[i] - sum;
- if ( sum == 0.0f ) {
- return false;
- }
- mat[i][i] = sum;
- diag[i] = sum;
- invDiag[i] = d = 1.0f / sum;
- if ( i + 1 >= n ) {
- return true;
- }
- mptr = mat[i+1];
- for ( j = i+1; j < n; j++ ) {
- s0 = mptr[0] * v[0];
- s1 = mptr[1] * v[1];
- s2 = mptr[2] * v[2];
- s3 = mptr[3] * v[3];
- for ( k = 4; k < i-7; k += 8 ) {
- s0 += mptr[k+0] * v[k+0];
- s1 += mptr[k+1] * v[k+1];
- s2 += mptr[k+2] * v[k+2];
- s3 += mptr[k+3] * v[k+3];
- s0 += mptr[k+4] * v[k+4];
- s1 += mptr[k+5] * v[k+5];
- s2 += mptr[k+6] * v[k+6];
- s3 += mptr[k+7] * v[k+7];
- }
- switch( i - k ) {
- NODEFAULT;
- case 7: s0 += mptr[k+6] * v[k+6];
- case 6: s1 += mptr[k+5] * v[k+5];
- case 5: s2 += mptr[k+4] * v[k+4];
- case 4: s3 += mptr[k+3] * v[k+3];
- case 3: s0 += mptr[k+2] * v[k+2];
- case 2: s1 += mptr[k+1] * v[k+1];
- case 1: s2 += mptr[k+0] * v[k+0];
- case 0: break;
- }
- sum = s3;
- sum += s2;
- sum += s1;
- sum += s0;
- mptr[i] = ( mptr[i] - sum ) * d;
- mptr += nc;
- }
- }
- return true;
- #else
- int i, j, k, nc;
- float *v, *ptr, *diagPtr;
- double d, sum;
- v = (float *) _alloca16( n * sizeof( float ) );
- nc = mat.GetNumColumns();
- for ( i = 0; i < n; i++ ) {
- ptr = mat[i];
- diagPtr = mat[0];
- sum = ptr[i];
- for ( j = 0; j < i; j++ ) {
- d = ptr[j];
- v[j] = diagPtr[0] * d;
- sum -= v[j] * d;
- diagPtr += nc + 1;
- }
- if ( sum == 0.0f ) {
- return false;
- }
- diagPtr[0] = sum;
- invDiag[i] = d = 1.0f / sum;
- if ( i + 1 >= n ) {
- continue;
- }
- ptr = mat[i+1];
- for ( j = i + 1; j < n; j++ ) {
- sum = ptr[i];
- for ( k = 0; k < i; k++ ) {
- sum -= ptr[k] * v[k];
- }
- ptr[i] = sum * d;
- ptr += nc;
- }
- }
- return true;
- #endif
- }
- /*
- ============
- idSIMD_Generic::BlendJoints
- ============
- */
- void VPCALL idSIMD_Generic::BlendJoints( idJointQuat *joints, const idJointQuat *blendJoints, const float lerp, const int *index, const int numJoints ) {
- int i;
- for ( i = 0; i < numJoints; i++ ) {
- int j = index[i];
- joints[j].q.Slerp( joints[j].q, blendJoints[j].q, lerp );
- joints[j].t.Lerp( joints[j].t, blendJoints[j].t, lerp );
- }
- }
- /*
- ============
- idSIMD_Generic::ConvertJointQuatsToJointMats
- ============
- */
- void VPCALL idSIMD_Generic::ConvertJointQuatsToJointMats( idJointMat *jointMats, const idJointQuat *jointQuats, const int numJoints ) {
- int i;
- for ( i = 0; i < numJoints; i++ ) {
- jointMats[i].SetRotation( jointQuats[i].q.ToMat3() );
- jointMats[i].SetTranslation( jointQuats[i].t );
- }
- }
- /*
- ============
- idSIMD_Generic::ConvertJointMatsToJointQuats
- ============
- */
- void VPCALL idSIMD_Generic::ConvertJointMatsToJointQuats( idJointQuat *jointQuats, const idJointMat *jointMats, const int numJoints ) {
- int i;
- for ( i = 0; i < numJoints; i++ ) {
- jointQuats[i] = jointMats[i].ToJointQuat();
- }
- }
- /*
- ============
- idSIMD_Generic::TransformJoints
- ============
- */
- void VPCALL idSIMD_Generic::TransformJoints( idJointMat *jointMats, const int *parents, const int firstJoint, const int lastJoint ) {
- int i;
- for( i = firstJoint; i <= lastJoint; i++ ) {
- assert( parents[i] < i );
- jointMats[i] *= jointMats[parents[i]];
- }
- }
- /*
- ============
- idSIMD_Generic::UntransformJoints
- ============
- */
- void VPCALL idSIMD_Generic::UntransformJoints( idJointMat *jointMats, const int *parents, const int firstJoint, const int lastJoint ) {
- int i;
- for( i = lastJoint; i >= firstJoint; i-- ) {
- assert( parents[i] < i );
- jointMats[i] /= jointMats[parents[i]];
- }
- }
- /*
- ============
- idSIMD_Generic::TransformVerts
- ============
- */
- void VPCALL idSIMD_Generic::TransformVerts( idDrawVert *verts, const int numVerts, const idJointMat *joints, const idVec4 *weights, const int *index, int numWeights ) {
- int i, j;
- const byte *jointsPtr = (byte *)joints;
- for( j = i = 0; i < numVerts; i++ ) {
- idVec3 v;
- v = ( *(idJointMat *) ( jointsPtr + index[j*2+0] ) ) * weights[j];
- while( index[j*2+1] == 0 ) {
- j++;
- v += ( *(idJointMat *) ( jointsPtr + index[j*2+0] ) ) * weights[j];
- }
- j++;
- verts[i].xyz = v;
- }
- }
- /*
- ============
- idSIMD_Generic::TracePointCull
- ============
- */
- void VPCALL idSIMD_Generic::TracePointCull( byte *cullBits, byte &totalOr, const float radius, const idPlane *planes, const idDrawVert *verts, const int numVerts ) {
- int i;
- byte tOr;
- tOr = 0;
- for ( i = 0; i < numVerts; i++ ) {
- byte bits;
- float d0, d1, d2, d3, t;
- const idVec3 &v = verts[i].xyz;
- d0 = planes[0].Distance( v );
- d1 = planes[1].Distance( v );
- d2 = planes[2].Distance( v );
- d3 = planes[3].Distance( v );
- t = d0 + radius;
- bits = FLOATSIGNBITSET( t ) << 0;
- t = d1 + radius;
- bits |= FLOATSIGNBITSET( t ) << 1;
- t = d2 + radius;
- bits |= FLOATSIGNBITSET( t ) << 2;
- t = d3 + radius;
- bits |= FLOATSIGNBITSET( t ) << 3;
- t = d0 - radius;
- bits |= FLOATSIGNBITSET( t ) << 4;
- t = d1 - radius;
- bits |= FLOATSIGNBITSET( t ) << 5;
- t = d2 - radius;
- bits |= FLOATSIGNBITSET( t ) << 6;
- t = d3 - radius;
- bits |= FLOATSIGNBITSET( t ) << 7;
- bits ^= 0x0F; // flip lower four bits
- tOr |= bits;
- cullBits[i] = bits;
- }
- totalOr = tOr;
- }
- /*
- ============
- idSIMD_Generic::DecalPointCull
- ============
- */
- void VPCALL idSIMD_Generic::DecalPointCull( byte *cullBits, const idPlane *planes, const idDrawVert *verts, const int numVerts ) {
- int i;
- for ( i = 0; i < numVerts; i++ ) {
- byte bits;
- float d0, d1, d2, d3, d4, d5;
- const idVec3 &v = verts[i].xyz;
- d0 = planes[0].Distance( v );
- d1 = planes[1].Distance( v );
- d2 = planes[2].Distance( v );
- d3 = planes[3].Distance( v );
- d4 = planes[4].Distance( v );
- d5 = planes[5].Distance( v );
- bits = FLOATSIGNBITSET( d0 ) << 0;
- bits |= FLOATSIGNBITSET( d1 ) << 1;
- bits |= FLOATSIGNBITSET( d2 ) << 2;
- bits |= FLOATSIGNBITSET( d3 ) << 3;
- bits |= FLOATSIGNBITSET( d4 ) << 4;
- bits |= FLOATSIGNBITSET( d5 ) << 5;
- cullBits[i] = bits ^ 0x3F; // flip lower 6 bits
- }
- }
- /*
- ============
- idSIMD_Generic::OverlayPointCull
- ============
- */
- void VPCALL idSIMD_Generic::OverlayPointCull( byte *cullBits, idVec2 *texCoords, const idPlane *planes, const idDrawVert *verts, const int numVerts ) {
- int i;
- for ( i = 0; i < numVerts; i++ ) {
- byte bits;
- float d0, d1;
- const idVec3 &v = verts[i].xyz;
- texCoords[i][0] = d0 = planes[0].Distance( v );
- texCoords[i][1] = d1 = planes[1].Distance( v );
- bits = FLOATSIGNBITSET( d0 ) << 0;
- d0 = 1.0f - d0;
- bits |= FLOATSIGNBITSET( d1 ) << 1;
- d1 = 1.0f - d1;
- bits |= FLOATSIGNBITSET( d0 ) << 2;
- bits |= FLOATSIGNBITSET( d1 ) << 3;
- cullBits[i] = bits;
- }
- }
- /*
- ============
- idSIMD_Generic::DeriveTriPlanes
- Derives a plane equation for each triangle.
- ============
- */
- void VPCALL idSIMD_Generic::DeriveTriPlanes( idPlane *planes, const idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes ) {
- int i;
- for ( i = 0; i < numIndexes; i += 3 ) {
- const idDrawVert *a, *b, *c;
- float d0[3], d1[3], f;
- idVec3 n;
- a = verts + indexes[i + 0];
- b = verts + indexes[i + 1];
- c = verts + indexes[i + 2];
- d0[0] = b->xyz[0] - a->xyz[0];
- d0[1] = b->xyz[1] - a->xyz[1];
- d0[2] = b->xyz[2] - a->xyz[2];
- d1[0] = c->xyz[0] - a->xyz[0];
- d1[1] = c->xyz[1] - a->xyz[1];
- d1[2] = c->xyz[2] - a->xyz[2];
- n[0] = d1[1] * d0[2] - d1[2] * d0[1];
- n[1] = d1[2] * d0[0] - d1[0] * d0[2];
- n[2] = d1[0] * d0[1] - d1[1] * d0[0];
- f = idMath::RSqrt( n.x * n.x + n.y * n.y + n.z * n.z );
- n.x *= f;
- n.y *= f;
- n.z *= f;
- planes->SetNormal( n );
- planes->FitThroughPoint( a->xyz );
- planes++;
- }
- }
- /*
- ============
- idSIMD_Generic::DeriveTangents
- Derives the normal and orthogonal tangent vectors for the triangle vertices.
- For each vertex the normal and tangent vectors are derived from all triangles
- using the vertex which results in smooth tangents across the mesh.
- In the process the triangle planes are calculated as well.
- ============
- */
- void VPCALL idSIMD_Generic::DeriveTangents( idPlane *planes, idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes ) {
- int i;
- bool *used = (bool *)_alloca16( numVerts * sizeof( used[0] ) );
- memset( used, 0, numVerts * sizeof( used[0] ) );
- idPlane *planesPtr = planes;
- for ( i = 0; i < numIndexes; i += 3 ) {
- idDrawVert *a, *b, *c;
- unsigned long signBit;
- float d0[5], d1[5], f, area;
- idVec3 n, t0, t1;
- int v0 = indexes[i + 0];
- int v1 = indexes[i + 1];
- int v2 = indexes[i + 2];
- a = verts + v0;
- b = verts + v1;
- c = verts + v2;
- d0[0] = b->xyz[0] - a->xyz[0];
- d0[1] = b->xyz[1] - a->xyz[1];
- d0[2] = b->xyz[2] - a->xyz[2];
- d0[3] = b->st[0] - a->st[0];
- d0[4] = b->st[1] - a->st[1];
- d1[0] = c->xyz[0] - a->xyz[0];
- d1[1] = c->xyz[1] - a->xyz[1];
- d1[2] = c->xyz[2] - a->xyz[2];
- d1[3] = c->st[0] - a->st[0];
- d1[4] = c->st[1] - a->st[1];
- // normal
- n[0] = d1[1] * d0[2] - d1[2] * d0[1];
- n[1] = d1[2] * d0[0] - d1[0] * d0[2];
- n[2] = d1[0] * d0[1] - d1[1] * d0[0];
- f = idMath::RSqrt( n.x * n.x + n.y * n.y + n.z * n.z );
- n.x *= f;
- n.y *= f;
- n.z *= f;
- planesPtr->SetNormal( n );
- planesPtr->FitThroughPoint( a->xyz );
- planesPtr++;
- // area sign bit
- area = d0[3] * d1[4] - d0[4] * d1[3];
- signBit = ( *(unsigned long *)&area ) & ( 1 << 31 );
- // first tangent
- t0[0] = d0[0] * d1[4] - d0[4] * d1[0];
- t0[1] = d0[1] * d1[4] - d0[4] * d1[1];
- t0[2] = d0[2] * d1[4] - d0[4] * d1[2];
- f = idMath::RSqrt( t0.x * t0.x + t0.y * t0.y + t0.z * t0.z );
- *(unsigned long *)&f ^= signBit;
- t0.x *= f;
- t0.y *= f;
- t0.z *= f;
- // second tangent
- t1[0] = d0[3] * d1[0] - d0[0] * d1[3];
- t1[1] = d0[3] * d1[1] - d0[1] * d1[3];
- t1[2] = d0[3] * d1[2] - d0[2] * d1[3];
- f = idMath::RSqrt( t1.x * t1.x + t1.y * t1.y + t1.z * t1.z );
- *(unsigned long *)&f ^= signBit;
- t1.x *= f;
- t1.y *= f;
- t1.z *= f;
- if ( used[v0] ) {
- a->normal += n;
- a->tangents[0] += t0;
- a->tangents[1] += t1;
- } else {
- a->normal = n;
- a->tangents[0] = t0;
- a->tangents[1] = t1;
- used[v0] = true;
- }
- if ( used[v1] ) {
- b->normal += n;
- b->tangents[0] += t0;
- b->tangents[1] += t1;
- } else {
- b->normal = n;
- b->tangents[0] = t0;
- b->tangents[1] = t1;
- used[v1] = true;
- }
- if ( used[v2] ) {
- c->normal += n;
- c->tangents[0] += t0;
- c->tangents[1] += t1;
- } else {
- c->normal = n;
- c->tangents[0] = t0;
- c->tangents[1] = t1;
- used[v2] = true;
- }
- }
- }
- /*
- ============
- idSIMD_Generic::DeriveUnsmoothedTangents
- Derives the normal and orthogonal tangent vectors for the triangle vertices.
- For each vertex the normal and tangent vectors are derived from a single dominant triangle.
- ============
- */
- #define DERIVE_UNSMOOTHED_BITANGENT
- void VPCALL idSIMD_Generic::DeriveUnsmoothedTangents( idDrawVert *verts, const dominantTri_s *dominantTris, const int numVerts ) {
- int i;
- for ( i = 0; i < numVerts; i++ ) {
- idDrawVert *a, *b, *c;
- float d0, d1, d2, d3, d4;
- float d5, d6, d7, d8, d9;
- float s0, s1, s2;
- float n0, n1, n2;
- float t0, t1, t2;
- float t3, t4, t5;
- const dominantTri_s &dt = dominantTris[i];
- a = verts + i;
- b = verts + dt.v2;
- c = verts + dt.v3;
- d0 = b->xyz[0] - a->xyz[0];
- d1 = b->xyz[1] - a->xyz[1];
- d2 = b->xyz[2] - a->xyz[2];
- d3 = b->st[0] - a->st[0];
- d4 = b->st[1] - a->st[1];
- d5 = c->xyz[0] - a->xyz[0];
- d6 = c->xyz[1] - a->xyz[1];
- d7 = c->xyz[2] - a->xyz[2];
- d8 = c->st[0] - a->st[0];
- d9 = c->st[1] - a->st[1];
- s0 = dt.normalizationScale[0];
- s1 = dt.normalizationScale[1];
- s2 = dt.normalizationScale[2];
- n0 = s2 * ( d6 * d2 - d7 * d1 );
- n1 = s2 * ( d7 * d0 - d5 * d2 );
- n2 = s2 * ( d5 * d1 - d6 * d0 );
- t0 = s0 * ( d0 * d9 - d4 * d5 );
- t1 = s0 * ( d1 * d9 - d4 * d6 );
- t2 = s0 * ( d2 * d9 - d4 * d7 );
- #ifndef DERIVE_UNSMOOTHED_BITANGENT
- t3 = s1 * ( d3 * d5 - d0 * d8 );
- t4 = s1 * ( d3 * d6 - d1 * d8 );
- t5 = s1 * ( d3 * d7 - d2 * d8 );
- #else
- t3 = s1 * ( n2 * t1 - n1 * t2 );
- t4 = s1 * ( n0 * t2 - n2 * t0 );
- t5 = s1 * ( n1 * t0 - n0 * t1 );
- #endif
- a->normal[0] = n0;
- a->normal[1] = n1;
- a->normal[2] = n2;
- a->tangents[0][0] = t0;
- a->tangents[0][1] = t1;
- a->tangents[0][2] = t2;
- a->tangents[1][0] = t3;
- a->tangents[1][1] = t4;
- a->tangents[1][2] = t5;
- }
- }
- /*
- ============
- idSIMD_Generic::NormalizeTangents
- Normalizes each vertex normal and projects and normalizes the
- tangent vectors onto the plane orthogonal to the vertex normal.
- ============
- */
- void VPCALL idSIMD_Generic::NormalizeTangents( idDrawVert *verts, const int numVerts ) {
- for ( int i = 0; i < numVerts; i++ ) {
- idVec3 &v = verts[i].normal;
- float f;
- f = idMath::RSqrt( v.x * v.x + v.y * v.y + v.z * v.z );
- v.x *= f; v.y *= f; v.z *= f;
- for ( int j = 0; j < 2; j++ ) {
- idVec3 &t = verts[i].tangents[j];
- t -= ( t * v ) * v;
- f = idMath::RSqrt( t.x * t.x + t.y * t.y + t.z * t.z );
- t.x *= f; t.y *= f; t.z *= f;
- }
- }
- }
- /*
- ============
- idSIMD_Generic::CreateTextureSpaceLightVectors
- Calculates light vectors in texture space for the given triangle vertices.
- For each vertex the direction towards the light origin is projected onto texture space.
- The light vectors are only calculated for the vertices referenced by the indexes.
- ============
- */
- void VPCALL idSIMD_Generic::CreateTextureSpaceLightVectors( idVec3 *lightVectors, const idVec3 &lightOrigin, const idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes ) {
- bool *used = (bool *)_alloca16( numVerts * sizeof( used[0] ) );
- memset( used, 0, numVerts * sizeof( used[0] ) );
- for ( int i = numIndexes - 1; i >= 0; i-- ) {
- used[indexes[i]] = true;
- }
- for ( int i = 0; i < numVerts; i++ ) {
- if ( !used[i] ) {
- continue;
- }
- const idDrawVert *v = &verts[i];
- idVec3 lightDir = lightOrigin - v->xyz;
- lightVectors[i][0] = lightDir * v->tangents[0];
- lightVectors[i][1] = lightDir * v->tangents[1];
- lightVectors[i][2] = lightDir * v->normal;
- }
- }
- /*
- ============
- idSIMD_Generic::CreateSpecularTextureCoords
- Calculates specular texture coordinates for the given triangle vertices.
- For each vertex the normalized direction towards the light origin is added to the
- normalized direction towards the view origin and the result is projected onto texture space.
- The texture coordinates are only calculated for the vertices referenced by the indexes.
- ============
- */
- void VPCALL idSIMD_Generic::CreateSpecularTextureCoords( idVec4 *texCoords, const idVec3 &lightOrigin, const idVec3 &viewOrigin, const idDrawVert *verts, const int numVerts, const int *indexes, const int numIndexes ) {
- bool *used = (bool *)_alloca16( numVerts * sizeof( used[0] ) );
- memset( used, 0, numVerts * sizeof( used[0] ) );
- for ( int i = numIndexes - 1; i >= 0; i-- ) {
- used[indexes[i]] = true;
- }
- for ( int i = 0; i < numVerts; i++ ) {
- if ( !used[i] ) {
- continue;
- }
- const idDrawVert *v = &verts[i];
- idVec3 lightDir = lightOrigin - v->xyz;
- idVec3 viewDir = viewOrigin - v->xyz;
- float ilength;
- ilength = idMath::RSqrt( lightDir * lightDir );
- lightDir[0] *= ilength;
- lightDir[1] *= ilength;
- lightDir[2] *= ilength;
- ilength = idMath::RSqrt( viewDir * viewDir );
- viewDir[0] *= ilength;
- viewDir[1] *= ilength;
- viewDir[2] *= ilength;
- lightDir += viewDir;
- texCoords[i][0] = lightDir * v->tangents[0];
- texCoords[i][1] = lightDir * v->tangents[1];
- texCoords[i][2] = lightDir * v->normal;
- texCoords[i][3] = 1.0f;
- }
- }
- /*
- ============
- idSIMD_Generic::CreateShadowCache
- ============
- */
- int VPCALL idSIMD_Generic::CreateShadowCache( idVec4 *vertexCache, int *vertRemap, const idVec3 &lightOrigin, const idDrawVert *verts, const int numVerts ) {
- int outVerts = 0;
- for ( int i = 0; i < numVerts; i++ ) {
- if ( vertRemap[i] ) {
- continue;
- }
- const float *v = verts[i].xyz.ToFloatPtr();
- vertexCache[outVerts+0][0] = v[0];
- vertexCache[outVerts+0][1] = v[1];
- vertexCache[outVerts+0][2] = v[2];
- vertexCache[outVerts+0][3] = 1.0f;
- // R_SetupProjection() builds the projection matrix with a slight crunch
- // for depth, which keeps this w=0 division from rasterizing right at the
- // wrap around point and causing depth fighting with the rear caps
- vertexCache[outVerts+1][0] = v[0] - lightOrigin[0];
- vertexCache[outVerts+1][1] = v[1] - lightOrigin[1];
- vertexCache[outVerts+1][2] = v[2] - lightOrigin[2];
- vertexCache[outVerts+1][3] = 0.0f;
- vertRemap[i] = outVerts;
- outVerts += 2;
- }
- return outVerts;
- }
- /*
- ============
- idSIMD_Generic::CreateVertexProgramShadowCache
- ============
- */
- int VPCALL idSIMD_Generic::CreateVertexProgramShadowCache( idVec4 *vertexCache, const idDrawVert *verts, const int numVerts ) {
- for ( int i = 0; i < numVerts; i++ ) {
- const float *v = verts[i].xyz.ToFloatPtr();
- vertexCache[i*2+0][0] = v[0];
- vertexCache[i*2+1][0] = v[0];
- vertexCache[i*2+0][1] = v[1];
- vertexCache[i*2+1][1] = v[1];
- vertexCache[i*2+0][2] = v[2];
- vertexCache[i*2+1][2] = v[2];
- vertexCache[i*2+0][3] = 1.0f;
- vertexCache[i*2+1][3] = 0.0f;
- }
- return numVerts * 2;
- }
- /*
- ============
- idSIMD_Generic::UpSamplePCMTo44kHz
- Duplicate samples for 44kHz output.
- ============
- */
- void idSIMD_Generic::UpSamplePCMTo44kHz( float *dest, const short *src, const int numSamples, const int kHz, const int numChannels ) {
- if ( kHz == 11025 ) {
- if ( numChannels == 1 ) {
- for ( int i = 0; i < numSamples; i++ ) {
- dest[i*4+0] = dest[i*4+1] = dest[i*4+2] = dest[i*4+3] = (float) src[i+0];
- }
- } else {
- for ( int i = 0; i < numSamples; i += 2 ) {
- dest[i*4+0] = dest[i*4+2] = dest[i*4+4] = dest[i*4+6] = (float) src[i+0];
- dest[i*4+1] = dest[i*4+3] = dest[i*4+5] = dest[i*4+7] = (float) src[i+1];
- }
- }
- } else if ( kHz == 22050 ) {
- if ( numChannels == 1 ) {
- for ( int i = 0; i < numSamples; i++ ) {
- dest[i*2+0] = dest[i*2+1] = (float) src[i+0];
- }
- } else {
- for ( int i = 0; i < numSamples; i += 2 ) {
- dest[i*2+0] = dest[i*2+2] = (float) src[i+0];
- dest[i*2+1] = dest[i*2+3] = (float) src[i+1];
- }
- }
- } else if ( kHz == 44100 ) {
- for ( int i = 0; i < numSamples; i++ ) {
- dest[i] = (float) src[i];
- }
- } else {
- assert( 0 );
- }
- }
- /*
- ============
- idSIMD_Generic::UpSampleOGGTo44kHz
- Duplicate samples for 44kHz output.
- ============
- */
- void idSIMD_Generic::UpSampleOGGTo44kHz( float *dest, const float * const *ogg, const int numSamples, const int kHz, const int numChannels ) {
- if ( kHz == 11025 ) {
- if ( numChannels == 1 ) {
- for ( int i = 0; i < numSamples; i++ ) {
- dest[i*4+0] = dest[i*4+1] = dest[i*4+2] = dest[i*4+3] = ogg[0][i] * 32768.0f;
- }
- } else {
- for ( int i = 0; i < numSamples >> 1; i++ ) {
- dest[i*8+0] = dest[i*8+2] = dest[i*8+4] = dest[i*8+6] = ogg[0][i] * 32768.0f;
- dest[i*8+1] = dest[i*8+3] = dest[i*8+5] = dest[i*8+7] = ogg[1][i] * 32768.0f;
- }
- }
- } else if ( kHz == 22050 ) {
- if ( numChannels == 1 ) {
- for ( int i = 0; i < numSamples; i++ ) {
- dest[i*2+0] = dest[i*2+1] = ogg[0][i] * 32768.0f;
- }
- } else {
- for ( int i = 0; i < numSamples >> 1; i++ ) {
- dest[i*4+0] = dest[i*4+2] = ogg[0][i] * 32768.0f;
- dest[i*4+1] = dest[i*4+3] = ogg[1][i] * 32768.0f;
- }
- }
- } else if ( kHz == 44100 ) {
- if ( numChannels == 1 ) {
- for ( int i = 0; i < numSamples; i++ ) {
- dest[i*1+0] = ogg[0][i] * 32768.0f;
- }
- } else {
- for ( int i = 0; i < numSamples >> 1; i++ ) {
- dest[i*2+0] = ogg[0][i] * 32768.0f;
- dest[i*2+1] = ogg[1][i] * 32768.0f;
- }
- }
- } else {
- assert( 0 );
- }
- }
- /*
- ============
- idSIMD_Generic::MixSoundTwoSpeakerMono
- ============
- */
- void VPCALL idSIMD_Generic::MixSoundTwoSpeakerMono( float *mixBuffer, const float *samples, const int numSamples, const float lastV[2], const float currentV[2] ) {
- float sL = lastV[0];
- float sR = lastV[1];
- float incL = ( currentV[0] - lastV[0] ) / MIXBUFFER_SAMPLES;
- float incR = ( currentV[1] - lastV[1] ) / MIXBUFFER_SAMPLES;
- assert( numSamples == MIXBUFFER_SAMPLES );
- for( int j = 0; j < MIXBUFFER_SAMPLES; j++ ) {
- mixBuffer[j*2+0] += samples[j] * sL;
- mixBuffer[j*2+1] += samples[j] * sR;
- sL += incL;
- sR += incR;
- }
- }
- /*
- ============
- idSIMD_Generic::MixSoundTwoSpeakerStereo
- ============
- */
- void VPCALL idSIMD_Generic::MixSoundTwoSpeakerStereo( float *mixBuffer, const float *samples, const int numSamples, const float lastV[2], const float currentV[2] ) {
- float sL = lastV[0];
- float sR = lastV[1];
- float incL = ( currentV[0] - lastV[0] ) / MIXBUFFER_SAMPLES;
- float incR = ( currentV[1] - lastV[1] ) / MIXBUFFER_SAMPLES;
- assert( numSamples == MIXBUFFER_SAMPLES );
- for( int j = 0; j < MIXBUFFER_SAMPLES; j++ ) {
- mixBuffer[j*2+0] += samples[j*2+0] * sL;
- mixBuffer[j*2+1] += samples[j*2+1] * sR;
- sL += incL;
- sR += incR;
- }
- }
- /*
- ============
- idSIMD_Generic::MixSoundSixSpeakerMono
- ============
- */
- void VPCALL idSIMD_Generic::MixSoundSixSpeakerMono( float *mixBuffer, const float *samples, const int numSamples, const float lastV[6], const float currentV[6] ) {
- float sL0 = lastV[0];
- float sL1 = lastV[1];
- float sL2 = lastV[2];
- float sL3 = lastV[3];
- float sL4 = lastV[4];
- float sL5 = lastV[5];
- float incL0 = ( currentV[0] - lastV[0] ) / MIXBUFFER_SAMPLES;
- float incL1 = ( currentV[1] - lastV[1] ) / MIXBUFFER_SAMPLES;
- float incL2 = ( currentV[2] - lastV[2] ) / MIXBUFFER_SAMPLES;
- float incL3 = ( currentV[3] - lastV[3] ) / MIXBUFFER_SAMPLES;
- float incL4 = ( currentV[4] - lastV[4] ) / MIXBUFFER_SAMPLES;
- float incL5 = ( currentV[5] - lastV[5] ) / MIXBUFFER_SAMPLES;
- assert( numSamples == MIXBUFFER_SAMPLES );
- for( int i = 0; i < MIXBUFFER_SAMPLES; i++ ) {
- mixBuffer[i*6+0] += samples[i] * sL0;
- mixBuffer[i*6+1] += samples[i] * sL1;
- mixBuffer[i*6+2] += samples[i] * sL2;
- mixBuffer[i*6+3] += samples[i] * sL3;
- mixBuffer[i*6+4] += samples[i] * sL4;
- mixBuffer[i*6+5] += samples[i] * sL5;
- sL0 += incL0;
- sL1 += incL1;
- sL2 += incL2;
- sL3 += incL3;
- sL4 += incL4;
- sL5 += incL5;
- }
- }
- /*
- ============
- idSIMD_Generic::MixSoundSixSpeakerStereo
- ============
- */
- void VPCALL idSIMD_Generic::MixSoundSixSpeakerStereo( float *mixBuffer, const float *samples, const int numSamples, const float lastV[6], const float currentV[6] ) {
- float sL0 = lastV[0];
- float sL1 = lastV[1];
- float sL2 = lastV[2];
- float sL3 = lastV[3];
- float sL4 = lastV[4];
- float sL5 = lastV[5];
- float incL0 = ( currentV[0] - lastV[0] ) / MIXBUFFER_SAMPLES;
- float incL1 = ( currentV[1] - lastV[1] ) / MIXBUFFER_SAMPLES;
- float incL2 = ( currentV[2] - lastV[2] ) / MIXBUFFER_SAMPLES;
- float incL3 = ( currentV[3] - lastV[3] ) / MIXBUFFER_SAMPLES;
- float incL4 = ( currentV[4] - lastV[4] ) / MIXBUFFER_SAMPLES;
- float incL5 = ( currentV[5] - lastV[5] ) / MIXBUFFER_SAMPLES;
- assert( numSamples == MIXBUFFER_SAMPLES );
- for( int i = 0; i < MIXBUFFER_SAMPLES; i++ ) {
- mixBuffer[i*6+0] += samples[i*2+0] * sL0;
- mixBuffer[i*6+1] += samples[i*2+1] * sL1;
- mixBuffer[i*6+2] += samples[i*2+0] * sL2;
- mixBuffer[i*6+3] += samples[i*2+0] * sL3;
- mixBuffer[i*6+4] += samples[i*2+0] * sL4;
- mixBuffer[i*6+5] += samples[i*2+1] * sL5;
- sL0 += incL0;
- sL1 += incL1;
- sL2 += incL2;
- sL3 += incL3;
- sL4 += incL4;
- sL5 += incL5;
- }
- }
- /*
- ============
- idSIMD_Generic::MixedSoundToSamples
- ============
- */
- void VPCALL idSIMD_Generic::MixedSoundToSamples( short *samples, const float *mixBuffer, const int numSamples ) {
- for ( int i = 0; i < numSamples; i++ ) {
- if ( mixBuffer[i] <= -32768.0f ) {
- samples[i] = -32768;
- } else if ( mixBuffer[i] >= 32767.0f ) {
- samples[i] = 32767;
- } else {
- samples[i] = (short) mixBuffer[i];
- }
- }
- }
|