12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745 |
- /*
- Copyright (C) 1996-1997 Id Software, Inc.
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License
- as published by the Free Software Foundation; either version 2
- of the License, or (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- See the GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- */
- //
- // d_polysa.s
- // x86 assembly-language polygon model drawing code
- //
- #include "asm_i386.h"
- #include "quakeasm.h"
- #include "asm_draw.h"
- #include "d_ifacea.h"
- #if id386
- // !!! if this is changed, it must be changed in d_polyse.c too !!!
- #define DPS_MAXSPANS MAXHEIGHT+1
- // 1 extra for spanpackage that marks end
- //#define SPAN_SIZE (((DPS_MAXSPANS + 1 + ((CACHE_SIZE - 1) / spanpackage_t_size)) + 1) * spanpackage_t_size)
- #define SPAN_SIZE (1024+1+1+1)*32
- .data
- .align 4
- p10_minus_p20: .single 0
- p01_minus_p21: .single 0
- temp0: .single 0
- temp1: .single 0
- Ltemp: .single 0
- aff8entryvec_table: .long LDraw8, LDraw7, LDraw6, LDraw5
- .long LDraw4, LDraw3, LDraw2, LDraw1
- lzistepx: .long 0
- .text
- #ifndef NeXT
- .extern C(D_PolysetSetEdgeTable)
- .extern C(D_RasterizeAliasPolySmooth)
- #endif
- //----------------------------------------------------------------------
- // affine triangle gradient calculation code
- //----------------------------------------------------------------------
- #define skinwidth 4+0
- .globl C(D_PolysetCalcGradients)
- C(D_PolysetCalcGradients):
- // p00_minus_p20 = r_p0[0] - r_p2[0];
- // p01_minus_p21 = r_p0[1] - r_p2[1];
- // p10_minus_p20 = r_p1[0] - r_p2[0];
- // p11_minus_p21 = r_p1[1] - r_p2[1];
- //
- // xstepdenominv = 1.0 / (p10_minus_p20 * p01_minus_p21 -
- // p00_minus_p20 * p11_minus_p21);
- //
- // ystepdenominv = -xstepdenominv;
- fildl C(r_p0)+0 // r_p0[0]
- fildl C(r_p2)+0 // r_p2[0] | r_p0[0]
- fildl C(r_p0)+4 // r_p0[1] | r_p2[0] | r_p0[0]
- fildl C(r_p2)+4 // r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
- fildl C(r_p1)+0 // r_p1[0] | r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
- fildl C(r_p1)+4 // r_p1[1] | r_p1[0] | r_p2[1] | r_p0[1] |
- // r_p2[0] | r_p0[0]
- fxch %st(3) // r_p0[1] | r_p1[0] | r_p2[1] | r_p1[1] |
- // r_p2[0] | r_p0[0]
- fsub %st(2),%st(0) // p01_minus_p21 | r_p1[0] | r_p2[1] | r_p1[1] |
- // r_p2[0] | r_p0[0]
- fxch %st(1) // r_p1[0] | p01_minus_p21 | r_p2[1] | r_p1[1] |
- // r_p2[0] | r_p0[0]
- fsub %st(4),%st(0) // p10_minus_p20 | p01_minus_p21 | r_p2[1] |
- // r_p1[1] | r_p2[0] | r_p0[0]
- fxch %st(5) // r_p0[0] | p01_minus_p21 | r_p2[1] |
- // r_p1[1] | r_p2[0] | p10_minus_p20
- fsubp %st(0),%st(4) // p01_minus_p21 | r_p2[1] | r_p1[1] |
- // p00_minus_p20 | p10_minus_p20
- fxch %st(2) // r_p1[1] | r_p2[1] | p01_minus_p21 |
- // p00_minus_p20 | p10_minus_p20
- fsubp %st(0),%st(1) // p11_minus_p21 | p01_minus_p21 |
- // p00_minus_p20 | p10_minus_p20
- fxch %st(1) // p01_minus_p21 | p11_minus_p21 |
- // p00_minus_p20 | p10_minus_p20
- flds C(d_xdenom) // d_xdenom | p01_minus_p21 | p11_minus_p21 |
- // p00_minus_p20 | p10_minus_p20
- fxch %st(4) // p10_minus_p20 | p01_minus_p21 | p11_minus_p21 |
- // p00_minus_p20 | d_xdenom
- fstps p10_minus_p20 // p01_minus_p21 | p11_minus_p21 |
- // p00_minus_p20 | d_xdenom
- fstps p01_minus_p21 // p11_minus_p21 | p00_minus_p20 | xstepdenominv
- fxch %st(2) // xstepdenominv | p00_minus_p20 | p11_minus_p21
- //// ceil () for light so positive steps are exaggerated, negative steps
- //// diminished, pushing us away from underflow toward overflow. Underflow is
- //// very visible, overflow is very unlikely, because of ambient lighting
- // t0 = r_p0[4] - r_p2[4];
- // t1 = r_p1[4] - r_p2[4];
- fildl C(r_p2)+16 // r_p2[4] | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fildl C(r_p0)+16 // r_p0[4] | r_p2[4] | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fildl C(r_p1)+16 // r_p1[4] | r_p0[4] | r_p2[4] | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fxch %st(2) // r_p2[4] | r_p0[4] | r_p1[4] | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fld %st(0) // r_p2[4] | r_p2[4] | r_p0[4] | r_p1[4] |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fsubrp %st(0),%st(2) // r_p2[4] | t0 | r_p1[4] | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fsubrp %st(0),%st(2) // t0 | t1 | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- // r_lstepx = (int)
- // ceil((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv);
- // r_lstepy = (int)
- // ceil((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv);
- fld %st(0) // t0 | t0 | t1 | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fmul %st(5),%st(0) // t0*p11_minus_p21 | t0 | t1 | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
- // t0*p11_minus_p21 | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
- // t0*p11_minus_p21 | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fmul %st(5),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 |
- // t1*p01_minus_p21 | t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 |
- // t1*p00_minus_p20 | t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fld %st(2) // xstepdenominv |
- // t1*p00_minus_p20 - t0*p10_minus_p20 |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fmuls float_minus_1 // ystepdenominv |
- // t1*p00_minus_p20 - t0*p10_minus_p20 |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(2) // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // t1*p00_minus_p20 - t0*p10_minus_p20 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)*
- // xstepdenominv |
- // t1*p00_minus_p20 - t0*p10_minus_p20 |
- // | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fxch %st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
- // (t1*p01_minus_p21 - t0*p11_minus_p21)*
- // xstepdenominv | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv |
- // (t1*p01_minus_p21 - t0*p11_minus_p21)*
- // xstepdenominv | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fldcw ceil_cw
- fistpl C(r_lstepy) // r_lstepx | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fistpl C(r_lstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fldcw single_cw
- // t0 = r_p0[2] - r_p2[2];
- // t1 = r_p1[2] - r_p2[2];
- fildl C(r_p2)+8 // r_p2[2] | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fildl C(r_p0)+8 // r_p0[2] | r_p2[2] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fildl C(r_p1)+8 // r_p1[2] | r_p0[2] | r_p2[2] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(2) // r_p2[2] | r_p0[2] | r_p1[2] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fld %st(0) // r_p2[2] | r_p2[2] | r_p0[2] | r_p1[2] |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fsubrp %st(0),%st(2) // r_p2[2] | t0 | r_p1[2] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- // r_sstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
- // xstepdenominv);
- // r_sstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
- // ystepdenominv);
- fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv
- fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
- // t0*p11_minus_p21 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
- // t0*p11_minus_p21 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 |
- // t1*p01_minus_p21 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 |
- // t1*p00_minus_p20 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)*
- // xstepdenominv |
- // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv |
- // (t1*p01_minus_p21 - t0*p11_minus_p21)*
- // xstepdenominv | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fistpl C(r_sstepy) // r_sstepx | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fistpl C(r_sstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- // t0 = r_p0[3] - r_p2[3];
- // t1 = r_p1[3] - r_p2[3];
- fildl C(r_p2)+12 // r_p2[3] | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fildl C(r_p0)+12 // r_p0[3] | r_p2[3] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fildl C(r_p1)+12 // r_p1[3] | r_p0[3] | r_p2[3] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(2) // r_p2[3] | r_p0[3] | r_p1[3] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fld %st(0) // r_p2[3] | r_p2[3] | r_p0[3] | r_p1[3] |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fsubrp %st(0),%st(2) // r_p2[3] | t0 | r_p1[3] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- // r_tstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
- // xstepdenominv);
- // r_tstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
- // ystepdenominv);
- fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
- // t0*p11_minus_p21 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
- // t0*p11_minus_p21 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 |
- // t1*p01_minus_p21 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 |
- // t1*p00_minus_p20 | t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)*
- // xstepdenominv |
- // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
- // ystepdenominv |
- // (t1*p01_minus_p21 - t0*p11_minus_p21)*
- // xstepdenominv | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fistpl C(r_tstepy) // r_tstepx | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fistpl C(r_tstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- // t0 = r_p0[5] - r_p2[5];
- // t1 = r_p1[5] - r_p2[5];
- fildl C(r_p2)+20 // r_p2[5] | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fildl C(r_p0)+20 // r_p0[5] | r_p2[5] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fildl C(r_p1)+20 // r_p1[5] | r_p0[5] | r_p2[5] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fxch %st(2) // r_p2[5] | r_p0[5] | r_p1[5] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fld %st(0) // r_p2[5] | r_p2[5] | r_p0[5] | r_p1[5] |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // p11_minus_p21
- fsubrp %st(0),%st(2) // r_p2[5] | t0 | r_p1[5] | ystepdenominv |
- // xstepdenominv | p00_minus_p20 | p11_minus_p21
- fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- // r_zistepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
- // xstepdenominv);
- // r_zistepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
- // ystepdenominv);
- fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | p11_minus_p21
- fmulp %st(0),%st(6) // t0 | t1 | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | t0*p11_minus_p21
- fxch %st(1) // t1 | t0 | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | t0*p11_minus_p21
- fld %st(0) // t1 | t1 | t0 | ystepdenominv | xstepdenominv |
- // p00_minus_p20 | t0*p11_minus_p21
- fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 |
- // t0*p11_minus_p21
- fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | ystepdenominv |
- // xstepdenominv | p00_minus_p20 |
- // t0*p11_minus_p21
- fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // t0*p11_minus_p21
- fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
- // ystepdenominv | xstepdenominv | p00_minus_p20 |
- // t0*p11_minus_p21
- fmulp %st(0),%st(5) // t0*p10_minus_p20 | t1*p01_minus_p21 |
- // ystepdenominv | xstepdenominv |
- // t1*p00_minus_p20 | t0*p11_minus_p21
- fxch %st(5) // t0*p11_minus_p21 | t1*p01_minus_p21 |
- // ystepdenominv | xstepdenominv |
- // t1*p00_minus_p20 | t0*p10_minus_p20
- fsubrp %st(0),%st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // ystepdenominv | xstepdenominv |
- // t1*p00_minus_p20 | t0*p10_minus_p20
- fxch %st(3) // t1*p00_minus_p20 | ystepdenominv |
- // xstepdenominv |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // t0*p10_minus_p20
- fsubp %st(0),%st(4) // ystepdenominv | xstepdenominv |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // t1*p00_minus_p20 - t0*p10_minus_p20
- fxch %st(1) // xstepdenominv | ystepdenominv |
- // t1*p01_minus_p21 - t0*p11_minus_p21 |
- // t1*p00_minus_p20 - t0*p10_minus_p20
- fmulp %st(0),%st(2) // ystepdenominv |
- // (t1*p01_minus_p21 - t0*p11_minus_p21) *
- // xstepdenominv |
- // t1*p00_minus_p20 - t0*p10_minus_p20
- fmulp %st(0),%st(2) // (t1*p01_minus_p21 - t0*p11_minus_p21) *
- // xstepdenominv |
- // (t1*p00_minus_p20 - t0*p10_minus_p20) *
- // ystepdenominv
- fistpl C(r_zistepx) // (t1*p00_minus_p20 - t0*p10_minus_p20) *
- // ystepdenominv
- fistpl C(r_zistepy)
- // a_sstepxfrac = r_sstepx << 16;
- // a_tstepxfrac = r_tstepx << 16;
- //
- // a_ststepxwhole = r_affinetridesc.skinwidth * (r_tstepx >> 16) +
- // (r_sstepx >> 16);
- movl C(r_sstepx),%eax
- movl C(r_tstepx),%edx
- shll $16,%eax
- shll $16,%edx
- movl %eax,C(a_sstepxfrac)
- movl %edx,C(a_tstepxfrac)
- movl C(r_sstepx),%ecx
- movl C(r_tstepx),%eax
- sarl $16,%ecx
- sarl $16,%eax
- imull skinwidth(%esp)
- addl %ecx,%eax
- movl %eax,C(a_ststepxwhole)
- ret
- //----------------------------------------------------------------------
- // recursive subdivision affine triangle drawing code
- //
- // not C-callable because of stdcall return
- //----------------------------------------------------------------------
- #define lp1 4+16
- #define lp2 8+16
- #define lp3 12+16
- .globl C(D_PolysetRecursiveTriangle)
- C(D_PolysetRecursiveTriangle):
- pushl %ebp // preserve caller stack frame pointer
- pushl %esi // preserve register variables
- pushl %edi
- pushl %ebx
- // int *temp;
- // int d;
- // int new[6];
- // int i;
- // int z;
- // short *zbuf;
- movl lp2(%esp),%esi
- movl lp1(%esp),%ebx
- movl lp3(%esp),%edi
- // d = lp2[0] - lp1[0];
- // if (d < -1 || d > 1)
- // goto split;
- movl 0(%esi),%eax
- movl 0(%ebx),%edx
- movl 4(%esi),%ebp
- subl %edx,%eax
- movl 4(%ebx),%ecx
- subl %ecx,%ebp
- incl %eax
- cmpl $2,%eax
- ja LSplit
- // d = lp2[1] - lp1[1];
- // if (d < -1 || d > 1)
- // goto split;
- movl 0(%edi),%eax
- incl %ebp
- cmpl $2,%ebp
- ja LSplit
- // d = lp3[0] - lp2[0];
- // if (d < -1 || d > 1)
- // goto split2;
- movl 0(%esi),%edx
- movl 4(%edi),%ebp
- subl %edx,%eax
- movl 4(%esi),%ecx
- subl %ecx,%ebp
- incl %eax
- cmpl $2,%eax
- ja LSplit2
- // d = lp3[1] - lp2[1];
- // if (d < -1 || d > 1)
- // goto split2;
- movl 0(%ebx),%eax
- incl %ebp
- cmpl $2,%ebp
- ja LSplit2
- // d = lp1[0] - lp3[0];
- // if (d < -1 || d > 1)
- // goto split3;
- movl 0(%edi),%edx
- movl 4(%ebx),%ebp
- subl %edx,%eax
- movl 4(%edi),%ecx
- subl %ecx,%ebp
- incl %eax
- incl %ebp
- movl %ebx,%edx
- cmpl $2,%eax
- ja LSplit3
- // d = lp1[1] - lp3[1];
- // if (d < -1 || d > 1)
- // {
- //split3:
- // temp = lp1;
- // lp3 = lp2;
- // lp1 = lp3;
- // lp2 = temp;
- // goto split;
- // }
- //
- // return; // entire tri is filled
- //
- cmpl $2,%ebp
- jna LDone
- LSplit3:
- movl %edi,%ebx
- movl %esi,%edi
- movl %edx,%esi
- jmp LSplit
- //split2:
- LSplit2:
- // temp = lp1;
- // lp1 = lp2;
- // lp2 = lp3;
- // lp3 = temp;
- movl %ebx,%eax
- movl %esi,%ebx
- movl %edi,%esi
- movl %eax,%edi
- //split:
- LSplit:
- subl $24,%esp // allocate space for a new vertex
- //// split this edge
- // new[0] = (lp1[0] + lp2[0]) >> 1;
- // new[1] = (lp1[1] + lp2[1]) >> 1;
- // new[2] = (lp1[2] + lp2[2]) >> 1;
- // new[3] = (lp1[3] + lp2[3]) >> 1;
- // new[5] = (lp1[5] + lp2[5]) >> 1;
- movl 8(%ebx),%eax
- movl 8(%esi),%edx
- movl 12(%ebx),%ecx
- addl %edx,%eax
- movl 12(%esi),%edx
- sarl $1,%eax
- addl %edx,%ecx
- movl %eax,8(%esp)
- movl 20(%ebx),%eax
- sarl $1,%ecx
- movl 20(%esi),%edx
- movl %ecx,12(%esp)
- addl %edx,%eax
- movl 0(%ebx),%ecx
- movl 0(%esi),%edx
- sarl $1,%eax
- addl %ecx,%edx
- movl %eax,20(%esp)
- movl 4(%ebx),%eax
- sarl $1,%edx
- movl 4(%esi),%ebp
- movl %edx,0(%esp)
- addl %eax,%ebp
- sarl $1,%ebp
- movl %ebp,4(%esp)
- //// draw the point if splitting a leading edge
- // if (lp2[1] > lp1[1])
- // goto nodraw;
- cmpl %eax,4(%esi)
- jg LNoDraw
- // if ((lp2[1] == lp1[1]) && (lp2[0] < lp1[0]))
- // goto nodraw;
- movl 0(%esi),%edx
- jnz LDraw
- cmpl %ecx,%edx
- jl LNoDraw
- LDraw:
- // z = new[5] >> 16;
- movl 20(%esp),%edx
- movl 4(%esp),%ecx
- sarl $16,%edx
- movl 0(%esp),%ebp
- // zbuf = zspantable[new[1]] + new[0];
- movl C(zspantable)(,%ecx,4),%eax
- // if (z >= *zbuf)
- // {
- cmpw (%eax,%ebp,2),%dx
- jnge LNoDraw
- // int pix;
- //
- // *zbuf = z;
- movw %dx,(%eax,%ebp,2)
- // pix = d_pcolormap[skintable[new[3]>>16][new[2]>>16]];
- movl 12(%esp),%eax
- sarl $16,%eax
- movl 8(%esp),%edx
- sarl $16,%edx
- subl %ecx,%ecx
- movl C(skintable)(,%eax,4),%eax
- movl 4(%esp),%ebp
- movb (%eax,%edx,),%cl
- movl C(d_pcolormap),%edx
- movb (%edx,%ecx,),%dl
- movl 0(%esp),%ecx
- // d_viewbuffer[d_scantable[new[1]] + new[0]] = pix;
- movl C(d_scantable)(,%ebp,4),%eax
- addl %eax,%ecx
- movl C(d_viewbuffer),%eax
- movb %dl,(%eax,%ecx,1)
- // }
- //
- //nodraw:
- LNoDraw:
- //// recursively continue
- // D_PolysetRecursiveTriangle (lp3, lp1, new);
- pushl %esp
- pushl %ebx
- pushl %edi
- call C(D_PolysetRecursiveTriangle)
- // D_PolysetRecursiveTriangle (lp3, new, lp2);
- movl %esp,%ebx
- pushl %esi
- pushl %ebx
- pushl %edi
- call C(D_PolysetRecursiveTriangle)
- addl $24,%esp
- LDone:
- popl %ebx // restore register variables
- popl %edi
- popl %esi
- popl %ebp // restore caller stack frame pointer
- ret $12
- //----------------------------------------------------------------------
- // 8-bpp horizontal span drawing code for affine polygons, with smooth
- // shading and no transparency
- //----------------------------------------------------------------------
- #define pspans 4+8
- .globl C(D_PolysetAff8Start)
- C(D_PolysetAff8Start):
- .globl C(D_PolysetDrawSpans8)
- C(D_PolysetDrawSpans8):
- pushl %esi // preserve register variables
- pushl %ebx
- movl pspans(%esp),%esi // point to the first span descriptor
- movl C(r_zistepx),%ecx
- pushl %ebp // preserve caller's stack frame
- pushl %edi
- rorl $16,%ecx // put high 16 bits of 1/z step in low word
- movl spanpackage_t_count(%esi),%edx
- movl %ecx,lzistepx
- LSpanLoop:
- // lcount = d_aspancount - pspanpackage->count;
- //
- // errorterm += erroradjustup;
- // if (errorterm >= 0)
- // {
- // d_aspancount += d_countextrastep;
- // errorterm -= erroradjustdown;
- // }
- // else
- // {
- // d_aspancount += ubasestep;
- // }
- movl C(d_aspancount),%eax
- subl %edx,%eax
- movl C(erroradjustup),%edx
- movl C(errorterm),%ebx
- addl %edx,%ebx
- js LNoTurnover
- movl C(erroradjustdown),%edx
- movl C(d_countextrastep),%edi
- subl %edx,%ebx
- movl C(d_aspancount),%ebp
- movl %ebx,C(errorterm)
- addl %edi,%ebp
- movl %ebp,C(d_aspancount)
- jmp LRightEdgeStepped
- LNoTurnover:
- movl C(d_aspancount),%edi
- movl C(ubasestep),%edx
- movl %ebx,C(errorterm)
- addl %edx,%edi
- movl %edi,C(d_aspancount)
- LRightEdgeStepped:
- cmpl $1,%eax
- jl LNextSpan
- jz LExactlyOneLong
- //
- // set up advancetable
- //
- movl C(a_ststepxwhole),%ecx
- movl C(r_affinetridesc)+atd_skinwidth,%edx
- movl %ecx,advancetable+4 // advance base in t
- addl %edx,%ecx
- movl %ecx,advancetable // advance extra in t
- movl C(a_tstepxfrac),%ecx
- movw C(r_lstepx),%cx
- movl %eax,%edx // count
- movl %ecx,tstep
- addl $7,%edx
- shrl $3,%edx // count of full and partial loops
- movl spanpackage_t_sfrac(%esi),%ebx
- movw %dx,%bx
- movl spanpackage_t_pz(%esi),%ecx
- negl %eax
- movl spanpackage_t_pdest(%esi),%edi
- andl $7,%eax // 0->0, 1->7, 2->6, ... , 7->1
- subl %eax,%edi // compensate for hardwired offsets
- subl %eax,%ecx
- subl %eax,%ecx
- movl spanpackage_t_tfrac(%esi),%edx
- movw spanpackage_t_light(%esi),%dx
- movl spanpackage_t_zi(%esi),%ebp
- rorl $16,%ebp // put high 16 bits of 1/z in low word
- pushl %esi
- movl spanpackage_t_ptex(%esi),%esi
- jmp aff8entryvec_table(,%eax,4)
- // %bx = count of full and partial loops
- // %ebx high word = sfrac
- // %ecx = pz
- // %dx = light
- // %edx high word = tfrac
- // %esi = ptex
- // %edi = pdest
- // %ebp = 1/z
- // tstep low word = C(r_lstepx)
- // tstep high word = C(a_tstepxfrac)
- // C(a_sstepxfrac) low word = 0
- // C(a_sstepxfrac) high word = C(a_sstepxfrac)
- LDrawLoop:
- // FIXME: do we need to clamp light? We may need at least a buffer bit to
- // keep it from poking into tfrac and causing problems
- LDraw8:
- cmpw (%ecx),%bp
- jl Lp1
- xorl %eax,%eax
- movb %dh,%ah
- movb (%esi),%al
- movw %bp,(%ecx)
- movb 0x12345678(%eax),%al
- LPatch8:
- movb %al,(%edi)
- Lp1:
- addl tstep,%edx
- sbbl %eax,%eax
- addl lzistepx,%ebp
- adcl $0,%ebp
- addl C(a_sstepxfrac),%ebx
- adcl advancetable+4(,%eax,4),%esi
- LDraw7:
- cmpw 2(%ecx),%bp
- jl Lp2
- xorl %eax,%eax
- movb %dh,%ah
- movb (%esi),%al
- movw %bp,2(%ecx)
- movb 0x12345678(%eax),%al
- LPatch7:
- movb %al,1(%edi)
- Lp2:
- addl tstep,%edx
- sbbl %eax,%eax
- addl lzistepx,%ebp
- adcl $0,%ebp
- addl C(a_sstepxfrac),%ebx
- adcl advancetable+4(,%eax,4),%esi
- LDraw6:
- cmpw 4(%ecx),%bp
- jl Lp3
- xorl %eax,%eax
- movb %dh,%ah
- movb (%esi),%al
- movw %bp,4(%ecx)
- movb 0x12345678(%eax),%al
- LPatch6:
- movb %al,2(%edi)
- Lp3:
- addl tstep,%edx
- sbbl %eax,%eax
- addl lzistepx,%ebp
- adcl $0,%ebp
- addl C(a_sstepxfrac),%ebx
- adcl advancetable+4(,%eax,4),%esi
- LDraw5:
- cmpw 6(%ecx),%bp
- jl Lp4
- xorl %eax,%eax
- movb %dh,%ah
- movb (%esi),%al
- movw %bp,6(%ecx)
- movb 0x12345678(%eax),%al
- LPatch5:
- movb %al,3(%edi)
- Lp4:
- addl tstep,%edx
- sbbl %eax,%eax
- addl lzistepx,%ebp
- adcl $0,%ebp
- addl C(a_sstepxfrac),%ebx
- adcl advancetable+4(,%eax,4),%esi
- LDraw4:
- cmpw 8(%ecx),%bp
- jl Lp5
- xorl %eax,%eax
- movb %dh,%ah
- movb (%esi),%al
- movw %bp,8(%ecx)
- movb 0x12345678(%eax),%al
- LPatch4:
- movb %al,4(%edi)
- Lp5:
- addl tstep,%edx
- sbbl %eax,%eax
- addl lzistepx,%ebp
- adcl $0,%ebp
- addl C(a_sstepxfrac),%ebx
- adcl advancetable+4(,%eax,4),%esi
- LDraw3:
- cmpw 10(%ecx),%bp
- jl Lp6
- xorl %eax,%eax
- movb %dh,%ah
- movb (%esi),%al
- movw %bp,10(%ecx)
- movb 0x12345678(%eax),%al
- LPatch3:
- movb %al,5(%edi)
- Lp6:
- addl tstep,%edx
- sbbl %eax,%eax
- addl lzistepx,%ebp
- adcl $0,%ebp
- addl C(a_sstepxfrac),%ebx
- adcl advancetable+4(,%eax,4),%esi
- LDraw2:
- cmpw 12(%ecx),%bp
- jl Lp7
- xorl %eax,%eax
- movb %dh,%ah
- movb (%esi),%al
- movw %bp,12(%ecx)
- movb 0x12345678(%eax),%al
- LPatch2:
- movb %al,6(%edi)
- Lp7:
- addl tstep,%edx
- sbbl %eax,%eax
- addl lzistepx,%ebp
- adcl $0,%ebp
- addl C(a_sstepxfrac),%ebx
- adcl advancetable+4(,%eax,4),%esi
- LDraw1:
- cmpw 14(%ecx),%bp
- jl Lp8
- xorl %eax,%eax
- movb %dh,%ah
- movb (%esi),%al
- movw %bp,14(%ecx)
- movb 0x12345678(%eax),%al
- LPatch1:
- movb %al,7(%edi)
- Lp8:
- addl tstep,%edx
- sbbl %eax,%eax
- addl lzistepx,%ebp
- adcl $0,%ebp
- addl C(a_sstepxfrac),%ebx
- adcl advancetable+4(,%eax,4),%esi
- addl $8,%edi
- addl $16,%ecx
- decw %bx
- jnz LDrawLoop
- popl %esi // restore spans pointer
- LNextSpan:
- addl $(spanpackage_t_size),%esi // point to next span
- LNextSpanESISet:
- movl spanpackage_t_count(%esi),%edx
- cmpl $-999999,%edx // any more spans?
- jnz LSpanLoop // yes
- popl %edi
- popl %ebp // restore the caller's stack frame
- popl %ebx // restore register variables
- popl %esi
- ret
- // draw a one-long span
- LExactlyOneLong:
- movl spanpackage_t_pz(%esi),%ecx
- movl spanpackage_t_zi(%esi),%ebp
- rorl $16,%ebp // put high 16 bits of 1/z in low word
- movl spanpackage_t_ptex(%esi),%ebx
- cmpw (%ecx),%bp
- jl LNextSpan
- xorl %eax,%eax
- movl spanpackage_t_pdest(%esi),%edi
- movb spanpackage_t_light+1(%esi),%ah
- addl $(spanpackage_t_size),%esi // point to next span
- movb (%ebx),%al
- movw %bp,(%ecx)
- movb 0x12345678(%eax),%al
- LPatch9:
- movb %al,(%edi)
- jmp LNextSpanESISet
- .globl C(D_PolysetAff8End)
- C(D_PolysetAff8End):
- #define pcolormap 4
- .globl C(D_Aff8Patch)
- C(D_Aff8Patch):
- movl pcolormap(%esp),%eax
- movl %eax,LPatch1-4
- movl %eax,LPatch2-4
- movl %eax,LPatch3-4
- movl %eax,LPatch4-4
- movl %eax,LPatch5-4
- movl %eax,LPatch6-4
- movl %eax,LPatch7-4
- movl %eax,LPatch8-4
- movl %eax,LPatch9-4
- ret
- //----------------------------------------------------------------------
- // Alias model polygon dispatching code, combined with subdivided affine
- // triangle drawing code
- //----------------------------------------------------------------------
- .globl C(D_PolysetDraw)
- C(D_PolysetDraw):
- // spanpackage_t spans[DPS_MAXSPANS + 1 +
- // ((CACHE_SIZE - 1) / sizeof(spanpackage_t)) + 1];
- // // one extra because of cache line pretouching
- //
- // a_spans = (spanpackage_t *)
- // (((long)&spans[0] + CACHE_SIZE - 1) & ~(CACHE_SIZE - 1));
- subl $(SPAN_SIZE),%esp
- movl %esp,%eax
- addl $(CACHE_SIZE - 1),%eax
- andl $(~(CACHE_SIZE - 1)),%eax
- movl %eax,C(a_spans)
- // if (r_affinetridesc.drawtype)
- // D_DrawSubdiv ();
- // else
- // D_DrawNonSubdiv ();
- movl C(r_affinetridesc)+atd_drawtype,%eax
- testl %eax,%eax
- jz C(D_DrawNonSubdiv)
- pushl %ebp // preserve caller stack frame pointer
- // lnumtriangles = r_affinetridesc.numtriangles;
- movl C(r_affinetridesc)+atd_numtriangles,%ebp
- pushl %esi // preserve register variables
- shll $4,%ebp
- pushl %ebx
- // ptri = r_affinetridesc.ptriangles;
- movl C(r_affinetridesc)+atd_ptriangles,%ebx
- pushl %edi
- // mtriangle_t *ptri;
- // finalvert_t *pfv, *index0, *index1, *index2;
- // int i;
- // int lnumtriangles;
- // int s0, s1, s2;
- // pfv = r_affinetridesc.pfinalverts;
- movl C(r_affinetridesc)+atd_pfinalverts,%edi
- // for (i=0 ; i<lnumtriangles ; i++)
- // {
- Llooptop:
- // index0 = pfv + ptri[i].vertindex[0];
- // index1 = pfv + ptri[i].vertindex[1];
- // index2 = pfv + ptri[i].vertindex[2];
- movl mtri_vertindex-16+0(%ebx,%ebp,),%ecx
- movl mtri_vertindex-16+4(%ebx,%ebp,),%esi
- shll $(fv_shift),%ecx
- movl mtri_vertindex-16+8(%ebx,%ebp,),%edx
- shll $(fv_shift),%esi
- addl %edi,%ecx
- shll $(fv_shift),%edx
- addl %edi,%esi
- addl %edi,%edx
- // if (((index0->v[1]-index1->v[1]) *
- // (index0->v[0]-index2->v[0]) -
- // (index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1])) >= 0)
- // {
- // continue;
- // }
- //
- // d_pcolormap = &((byte *)acolormap)[index0->v[4] & 0xFF00];
- fildl fv_v+4(%ecx) // i0v1
- fildl fv_v+4(%esi) // i1v1 | i0v1
- fildl fv_v+0(%ecx) // i0v0 | i1v1 | i0v1
- fildl fv_v+0(%edx) // i2v0 | i0v0 | i1v1 | i0v1
- fxch %st(2) // i1v1 | i0v0 | i2v0 | i0v1
- fsubr %st(3),%st(0) // i0v1-i1v1 | i0v0 | i2v0 | i0v1
- fildl fv_v+0(%esi) // i1v0 | i0v1-i1v1 | i0v0 | i2v0 | i0v1
- fxch %st(2) // i0v0 | i0v1-i1v1 | i1v0 | i2v0 | i0v1
- fsub %st(0),%st(3) // i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0 | i0v1
- fildl fv_v+4(%edx) // i2v1 | i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1
- fxch %st(1) // i0v0 | i2v1 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1
- fsubp %st(0),%st(3) // i2v1 | i0v1-i1v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1
- fxch %st(1) // i0v1-i1v1 | i2v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1
- fmulp %st(0),%st(3) // i2v1 | i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1
- fsubrp %st(0),%st(3) // i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1-i2v1
- movl fv_v+16(%ecx),%eax
- andl $0xFF00,%eax
- fmulp %st(0),%st(2) // i0v1-i1v1*i0v0-i2v0 | i0v0-i1v0*i0v1-i2v1
- addl C(acolormap),%eax
- fsubp %st(0),%st(1) // (i0v1-i1v1)*(i0v0-i2v0)-(i0v0-i1v0)*(i0v1-i2v1)
- movl %eax,C(d_pcolormap)
- fstps Ltemp
- movl Ltemp,%eax
- subl $0x80000001,%eax
- jc Lskip
- // if (ptri[i].facesfront)
- // {
- // D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v);
- movl mtri_facesfront-16(%ebx,%ebp,),%eax
- testl %eax,%eax
- jz Lfacesback
- pushl %edx
- pushl %esi
- pushl %ecx
- call C(D_PolysetRecursiveTriangle)
- subl $16,%ebp
- jnz Llooptop
- jmp Ldone2
- // }
- // else
- // {
- Lfacesback:
- // s0 = index0->v[2];
- // s1 = index1->v[2];
- // s2 = index2->v[2];
- movl fv_v+8(%ecx),%eax
- pushl %eax
- movl fv_v+8(%esi),%eax
- pushl %eax
- movl fv_v+8(%edx),%eax
- pushl %eax
- pushl %ecx
- pushl %edx
- // if (index0->flags & ALIAS_ONSEAM)
- // index0->v[2] += r_affinetridesc.seamfixupX16;
- movl C(r_affinetridesc)+atd_seamfixupX16,%eax
- testl $(ALIAS_ONSEAM),fv_flags(%ecx)
- jz Lp11
- addl %eax,fv_v+8(%ecx)
- Lp11:
- // if (index1->flags & ALIAS_ONSEAM)
- // index1->v[2] += r_affinetridesc.seamfixupX16;
- testl $(ALIAS_ONSEAM),fv_flags(%esi)
- jz Lp12
- addl %eax,fv_v+8(%esi)
- Lp12:
- // if (index2->flags & ALIAS_ONSEAM)
- // index2->v[2] += r_affinetridesc.seamfixupX16;
- testl $(ALIAS_ONSEAM),fv_flags(%edx)
- jz Lp13
- addl %eax,fv_v+8(%edx)
- Lp13:
- // D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v);
- pushl %edx
- pushl %esi
- pushl %ecx
- call C(D_PolysetRecursiveTriangle)
- // index0->v[2] = s0;
- // index1->v[2] = s1;
- // index2->v[2] = s2;
- popl %edx
- popl %ecx
- popl %eax
- movl %eax,fv_v+8(%edx)
- popl %eax
- movl %eax,fv_v+8(%esi)
- popl %eax
- movl %eax,fv_v+8(%ecx)
- // }
- // }
- Lskip:
- subl $16,%ebp
- jnz Llooptop
- Ldone2:
- popl %edi // restore the caller's stack frame
- popl %ebx
- popl %esi // restore register variables
- popl %ebp
- addl $(SPAN_SIZE),%esp
- ret
- //----------------------------------------------------------------------
- // Alias model triangle left-edge scanning code
- //----------------------------------------------------------------------
- #define height 4+16
- .globl C(D_PolysetScanLeftEdge)
- C(D_PolysetScanLeftEdge):
- pushl %ebp // preserve caller stack frame pointer
- pushl %esi // preserve register variables
- pushl %edi
- pushl %ebx
- movl height(%esp),%eax
- movl C(d_sfrac),%ecx
- andl $0xFFFF,%eax
- movl C(d_ptex),%ebx
- orl %eax,%ecx
- movl C(d_pedgespanpackage),%esi
- movl C(d_tfrac),%edx
- movl C(d_light),%edi
- movl C(d_zi),%ebp
- // %eax: scratch
- // %ebx: d_ptex
- // %ecx: d_sfrac in high word, count in low word
- // %edx: d_tfrac
- // %esi: d_pedgespanpackage, errorterm, scratch alternately
- // %edi: d_light
- // %ebp: d_zi
- // do
- // {
- LScanLoop:
- // d_pedgespanpackage->ptex = ptex;
- // d_pedgespanpackage->pdest = d_pdest;
- // d_pedgespanpackage->pz = d_pz;
- // d_pedgespanpackage->count = d_aspancount;
- // d_pedgespanpackage->light = d_light;
- // d_pedgespanpackage->zi = d_zi;
- // d_pedgespanpackage->sfrac = d_sfrac << 16;
- // d_pedgespanpackage->tfrac = d_tfrac << 16;
- movl %ebx,spanpackage_t_ptex(%esi)
- movl C(d_pdest),%eax
- movl %eax,spanpackage_t_pdest(%esi)
- movl C(d_pz),%eax
- movl %eax,spanpackage_t_pz(%esi)
- movl C(d_aspancount),%eax
- movl %eax,spanpackage_t_count(%esi)
- movl %edi,spanpackage_t_light(%esi)
- movl %ebp,spanpackage_t_zi(%esi)
- movl %ecx,spanpackage_t_sfrac(%esi)
- movl %edx,spanpackage_t_tfrac(%esi)
- // pretouch the next cache line
- movb spanpackage_t_size(%esi),%al
- // d_pedgespanpackage++;
- addl $(spanpackage_t_size),%esi
- movl C(erroradjustup),%eax
- movl %esi,C(d_pedgespanpackage)
- // errorterm += erroradjustup;
- movl C(errorterm),%esi
- addl %eax,%esi
- movl C(d_pdest),%eax
- // if (errorterm >= 0)
- // {
- js LNoLeftEdgeTurnover
- // errorterm -= erroradjustdown;
- // d_pdest += d_pdestextrastep;
- subl C(erroradjustdown),%esi
- addl C(d_pdestextrastep),%eax
- movl %esi,C(errorterm)
- movl %eax,C(d_pdest)
- // d_pz += d_pzextrastep;
- // d_aspancount += d_countextrastep;
- // d_ptex += d_ptexextrastep;
- // d_sfrac += d_sfracextrastep;
- // d_ptex += d_sfrac >> 16;
- // d_sfrac &= 0xFFFF;
- // d_tfrac += d_tfracextrastep;
- movl C(d_pz),%eax
- movl C(d_aspancount),%esi
- addl C(d_pzextrastep),%eax
- addl C(d_sfracextrastep),%ecx
- adcl C(d_ptexextrastep),%ebx
- addl C(d_countextrastep),%esi
- movl %eax,C(d_pz)
- movl C(d_tfracextrastep),%eax
- movl %esi,C(d_aspancount)
- addl %eax,%edx
- // if (d_tfrac & 0x10000)
- // {
- jnc LSkip1
- // d_ptex += r_affinetridesc.skinwidth;
- // d_tfrac &= 0xFFFF;
- addl C(r_affinetridesc)+atd_skinwidth,%ebx
- // }
- LSkip1:
- // d_light += d_lightextrastep;
- // d_zi += d_ziextrastep;
- addl C(d_lightextrastep),%edi
- addl C(d_ziextrastep),%ebp
- // }
- movl C(d_pedgespanpackage),%esi
- decl %ecx
- testl $0xFFFF,%ecx
- jnz LScanLoop
- popl %ebx
- popl %edi
- popl %esi
- popl %ebp
- ret
- // else
- // {
- LNoLeftEdgeTurnover:
- movl %esi,C(errorterm)
- // d_pdest += d_pdestbasestep;
- addl C(d_pdestbasestep),%eax
- movl %eax,C(d_pdest)
- // d_pz += d_pzbasestep;
- // d_aspancount += ubasestep;
- // d_ptex += d_ptexbasestep;
- // d_sfrac += d_sfracbasestep;
- // d_ptex += d_sfrac >> 16;
- // d_sfrac &= 0xFFFF;
- movl C(d_pz),%eax
- movl C(d_aspancount),%esi
- addl C(d_pzbasestep),%eax
- addl C(d_sfracbasestep),%ecx
- adcl C(d_ptexbasestep),%ebx
- addl C(ubasestep),%esi
- movl %eax,C(d_pz)
- movl %esi,C(d_aspancount)
- // d_tfrac += d_tfracbasestep;
- movl C(d_tfracbasestep),%esi
- addl %esi,%edx
- // if (d_tfrac & 0x10000)
- // {
- jnc LSkip2
- // d_ptex += r_affinetridesc.skinwidth;
- // d_tfrac &= 0xFFFF;
- addl C(r_affinetridesc)+atd_skinwidth,%ebx
- // }
- LSkip2:
- // d_light += d_lightbasestep;
- // d_zi += d_zibasestep;
- addl C(d_lightbasestep),%edi
- addl C(d_zibasestep),%ebp
- // }
- // } while (--height);
- movl C(d_pedgespanpackage),%esi
- decl %ecx
- testl $0xFFFF,%ecx
- jnz LScanLoop
- popl %ebx
- popl %edi
- popl %esi
- popl %ebp
- ret
- //----------------------------------------------------------------------
- // Alias model vertex drawing code
- //----------------------------------------------------------------------
- #define fv 4+8
- #define numverts 8+8
- .globl C(D_PolysetDrawFinalVerts)
- C(D_PolysetDrawFinalVerts):
- pushl %ebp // preserve caller stack frame pointer
- pushl %ebx
- // int i, z;
- // short *zbuf;
- movl numverts(%esp),%ecx
- movl fv(%esp),%ebx
- pushl %esi // preserve register variables
- pushl %edi
- LFVLoop:
- // for (i=0 ; i<numverts ; i++, fv++)
- // {
- // // valid triangle coordinates for filling can include the bottom and
- // // right clip edges, due to the fill rule; these shouldn't be drawn
- // if ((fv->v[0] < r_refdef.vrectright) &&
- // (fv->v[1] < r_refdef.vrectbottom))
- // {
- movl fv_v+0(%ebx),%eax
- movl C(r_refdef)+rd_vrectright,%edx
- cmpl %edx,%eax
- jge LNextVert
- movl fv_v+4(%ebx),%esi
- movl C(r_refdef)+rd_vrectbottom,%edx
- cmpl %edx,%esi
- jge LNextVert
- // zbuf = zspantable[fv->v[1]] + fv->v[0];
- movl C(zspantable)(,%esi,4),%edi
- // z = fv->v[5]>>16;
- movl fv_v+20(%ebx),%edx
- shrl $16,%edx
- // if (z >= *zbuf)
- // {
- // int pix;
- cmpw (%edi,%eax,2),%dx
- jl LNextVert
- // *zbuf = z;
- movw %dx,(%edi,%eax,2)
- // pix = skintable[fv->v[3]>>16][fv->v[2]>>16];
- movl fv_v+12(%ebx),%edi
- shrl $16,%edi
- movl C(skintable)(,%edi,4),%edi
- movl fv_v+8(%ebx),%edx
- shrl $16,%edx
- movb (%edi,%edx),%dl
- // pix = ((byte *)acolormap)[pix + (fv->v[4] & 0xFF00)];
- movl fv_v+16(%ebx),%edi
- andl $0xFF00,%edi
- andl $0x00FF,%edx
- addl %edx,%edi
- movl C(acolormap),%edx
- movb (%edx,%edi,1),%dl
- // d_viewbuffer[d_scantable[fv->v[1]] + fv->v[0]] = pix;
- movl C(d_scantable)(,%esi,4),%edi
- movl C(d_viewbuffer),%esi
- addl %eax,%edi
- movb %dl,(%esi,%edi)
- // }
- // }
- // }
- LNextVert:
- addl $(fv_size),%ebx
- decl %ecx
- jnz LFVLoop
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
- //----------------------------------------------------------------------
- // Alias model non-subdivided polygon dispatching code
- //
- // not C-callable because of stack buffer cleanup
- //----------------------------------------------------------------------
- .globl C(D_DrawNonSubdiv)
- C(D_DrawNonSubdiv):
- pushl %ebp // preserve caller stack frame pointer
- movl C(r_affinetridesc)+atd_numtriangles,%ebp
- pushl %ebx
- shll $(mtri_shift),%ebp
- pushl %esi // preserve register variables
- movl C(r_affinetridesc)+atd_ptriangles,%esi
- pushl %edi
- // mtriangle_t *ptri;
- // finalvert_t *pfv, *index0, *index1, *index2;
- // int i;
- // int lnumtriangles;
- // pfv = r_affinetridesc.pfinalverts;
- // ptri = r_affinetridesc.ptriangles;
- // lnumtriangles = r_affinetridesc.numtriangles;
- LNDLoop:
- // for (i=0 ; i<lnumtriangles ; i++, ptri++)
- // {
- // index0 = pfv + ptri->vertindex[0];
- // index1 = pfv + ptri->vertindex[1];
- // index2 = pfv + ptri->vertindex[2];
- movl C(r_affinetridesc)+atd_pfinalverts,%edi
- movl mtri_vertindex+0-mtri_size(%esi,%ebp,1),%ecx
- shll $(fv_shift),%ecx
- movl mtri_vertindex+4-mtri_size(%esi,%ebp,1),%edx
- shll $(fv_shift),%edx
- movl mtri_vertindex+8-mtri_size(%esi,%ebp,1),%ebx
- shll $(fv_shift),%ebx
- addl %edi,%ecx
- addl %edi,%edx
- addl %edi,%ebx
- // d_xdenom = (index0->v[1]-index1->v[1]) *
- // (index0->v[0]-index2->v[0]) -
- // (index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1]);
- movl fv_v+4(%ecx),%eax
- movl fv_v+0(%ecx),%esi
- subl fv_v+4(%edx),%eax
- subl fv_v+0(%ebx),%esi
- imull %esi,%eax
- movl fv_v+0(%ecx),%esi
- movl fv_v+4(%ecx),%edi
- subl fv_v+0(%edx),%esi
- subl fv_v+4(%ebx),%edi
- imull %esi,%edi
- subl %edi,%eax
- // if (d_xdenom >= 0)
- // {
- // continue;
- jns LNextTri
- // }
- movl %eax,C(d_xdenom)
- fildl C(d_xdenom)
- // r_p0[0] = index0->v[0]; // u
- // r_p0[1] = index0->v[1]; // v
- // r_p0[2] = index0->v[2]; // s
- // r_p0[3] = index0->v[3]; // t
- // r_p0[4] = index0->v[4]; // light
- // r_p0[5] = index0->v[5]; // iz
- movl fv_v+0(%ecx),%eax
- movl fv_v+4(%ecx),%esi
- movl %eax,C(r_p0)+0
- movl %esi,C(r_p0)+4
- movl fv_v+8(%ecx),%eax
- movl fv_v+12(%ecx),%esi
- movl %eax,C(r_p0)+8
- movl %esi,C(r_p0)+12
- movl fv_v+16(%ecx),%eax
- movl fv_v+20(%ecx),%esi
- movl %eax,C(r_p0)+16
- movl %esi,C(r_p0)+20
- fdivrs float_1
- // r_p1[0] = index1->v[0];
- // r_p1[1] = index1->v[1];
- // r_p1[2] = index1->v[2];
- // r_p1[3] = index1->v[3];
- // r_p1[4] = index1->v[4];
- // r_p1[5] = index1->v[5];
- movl fv_v+0(%edx),%eax
- movl fv_v+4(%edx),%esi
- movl %eax,C(r_p1)+0
- movl %esi,C(r_p1)+4
- movl fv_v+8(%edx),%eax
- movl fv_v+12(%edx),%esi
- movl %eax,C(r_p1)+8
- movl %esi,C(r_p1)+12
- movl fv_v+16(%edx),%eax
- movl fv_v+20(%edx),%esi
- movl %eax,C(r_p1)+16
- movl %esi,C(r_p1)+20
- // r_p2[0] = index2->v[0];
- // r_p2[1] = index2->v[1];
- // r_p2[2] = index2->v[2];
- // r_p2[3] = index2->v[3];
- // r_p2[4] = index2->v[4];
- // r_p2[5] = index2->v[5];
- movl fv_v+0(%ebx),%eax
- movl fv_v+4(%ebx),%esi
- movl %eax,C(r_p2)+0
- movl %esi,C(r_p2)+4
- movl fv_v+8(%ebx),%eax
- movl fv_v+12(%ebx),%esi
- movl %eax,C(r_p2)+8
- movl %esi,C(r_p2)+12
- movl fv_v+16(%ebx),%eax
- movl fv_v+20(%ebx),%esi
- movl %eax,C(r_p2)+16
- movl C(r_affinetridesc)+atd_ptriangles,%edi
- movl %esi,C(r_p2)+20
- movl mtri_facesfront-mtri_size(%edi,%ebp,1),%eax
- // if (!ptri->facesfront)
- // {
- testl %eax,%eax
- jnz LFacesFront
- // if (index0->flags & ALIAS_ONSEAM)
- // r_p0[2] += r_affinetridesc.seamfixupX16;
- movl fv_flags(%ecx),%eax
- movl fv_flags(%edx),%esi
- movl fv_flags(%ebx),%edi
- testl $(ALIAS_ONSEAM),%eax
- movl C(r_affinetridesc)+atd_seamfixupX16,%eax
- jz LOnseamDone0
- addl %eax,C(r_p0)+8
- LOnseamDone0:
- // if (index1->flags & ALIAS_ONSEAM)
- // r_p1[2] += r_affinetridesc.seamfixupX16;
- testl $(ALIAS_ONSEAM),%esi
- jz LOnseamDone1
- addl %eax,C(r_p1)+8
- LOnseamDone1:
- // if (index2->flags & ALIAS_ONSEAM)
- // r_p2[2] += r_affinetridesc.seamfixupX16;
- testl $(ALIAS_ONSEAM),%edi
- jz LOnseamDone2
- addl %eax,C(r_p2)+8
- LOnseamDone2:
- // }
- LFacesFront:
- fstps C(d_xdenom)
- // D_PolysetSetEdgeTable ();
- // D_RasterizeAliasPolySmooth ();
- call C(D_PolysetSetEdgeTable)
- call C(D_RasterizeAliasPolySmooth)
- LNextTri:
- movl C(r_affinetridesc)+atd_ptriangles,%esi
- subl $16,%ebp
- jnz LNDLoop
- // }
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- addl $(SPAN_SIZE),%esp
- ret
- #endif // id386
|