d_spr8.s 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901
  1. /*
  2. Copyright (C) 1996-1997 Id Software, Inc.
  3. This program is free software; you can redistribute it and/or
  4. modify it under the terms of the GNU General Public License
  5. as published by the Free Software Foundation; either version 2
  6. of the License, or (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. See the GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program; if not, write to the Free Software
  13. Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  14. */
  15. //
  16. // d_spr8.s
  17. // x86 assembly-language horizontal 8-bpp transparent span-drawing code.
  18. //
  19. #include "asm_i386.h"
  20. #include "quakeasm.h"
  21. #include "asm_draw.h"
  22. #if id386
  23. //----------------------------------------------------------------------
  24. // 8-bpp horizontal span drawing code for polygons, with transparency.
  25. //----------------------------------------------------------------------
  26. .text
  27. // out-of-line, rarely-needed clamping code
  28. LClampHigh0:
  29. movl C(bbextents),%esi
  30. jmp LClampReentry0
  31. LClampHighOrLow0:
  32. jg LClampHigh0
  33. xorl %esi,%esi
  34. jmp LClampReentry0
  35. LClampHigh1:
  36. movl C(bbextentt),%edx
  37. jmp LClampReentry1
  38. LClampHighOrLow1:
  39. jg LClampHigh1
  40. xorl %edx,%edx
  41. jmp LClampReentry1
  42. LClampLow2:
  43. movl $2048,%ebp
  44. jmp LClampReentry2
  45. LClampHigh2:
  46. movl C(bbextents),%ebp
  47. jmp LClampReentry2
  48. LClampLow3:
  49. movl $2048,%ecx
  50. jmp LClampReentry3
  51. LClampHigh3:
  52. movl C(bbextentt),%ecx
  53. jmp LClampReentry3
  54. LClampLow4:
  55. movl $2048,%eax
  56. jmp LClampReentry4
  57. LClampHigh4:
  58. movl C(bbextents),%eax
  59. jmp LClampReentry4
  60. LClampLow5:
  61. movl $2048,%ebx
  62. jmp LClampReentry5
  63. LClampHigh5:
  64. movl C(bbextentt),%ebx
  65. jmp LClampReentry5
  66. #define pspans 4+16
  67. .align 4
  68. .globl C(D_SpriteDrawSpans)
  69. C(D_SpriteDrawSpans):
  70. pushl %ebp // preserve caller's stack frame
  71. pushl %edi
  72. pushl %esi // preserve register variables
  73. pushl %ebx
  74. //
  75. // set up scaled-by-8 steps, for 8-long segments; also set up cacheblock
  76. // and span list pointers, and 1/z step in 0.32 fixed-point
  77. //
  78. // FIXME: any overlap from rearranging?
  79. flds C(d_sdivzstepu)
  80. fmuls fp_8
  81. movl C(cacheblock),%edx
  82. flds C(d_tdivzstepu)
  83. fmuls fp_8
  84. movl pspans(%esp),%ebx // point to the first span descriptor
  85. flds C(d_zistepu)
  86. fmuls fp_8
  87. movl %edx,pbase // pbase = cacheblock
  88. flds C(d_zistepu)
  89. fmuls fp_64kx64k
  90. fxch %st(3)
  91. fstps sdivz8stepu
  92. fstps zi8stepu
  93. fstps tdivz8stepu
  94. fistpl izistep
  95. movl izistep,%eax
  96. rorl $16,%eax // put upper 16 bits in low word
  97. movl sspan_t_count(%ebx),%ecx
  98. movl %eax,izistep
  99. cmpl $0,%ecx
  100. jle LNextSpan
  101. LSpanLoop:
  102. //
  103. // set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
  104. // initial s and t values
  105. //
  106. // FIXME: pipeline FILD?
  107. fildl sspan_t_v(%ebx)
  108. fildl sspan_t_u(%ebx)
  109. fld %st(1) // dv | du | dv
  110. fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv
  111. fld %st(1) // du | dv*d_sdivzstepv | du | dv
  112. fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
  113. fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
  114. fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu |
  115. // dv*d_sdivzstepv | du | dv
  116. fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu |
  117. // dv*d_sdivzstepv | du | dv
  118. faddp %st(0),%st(2) // du*d_tdivzstepu |
  119. // du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
  120. fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
  121. // du*d_tdivzstepu | du | dv
  122. fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv |
  123. // du*d_tdivzstepu | du | dv
  124. fmuls C(d_tdivzstepv) // dv*d_tdivzstepv |
  125. // du*d_sdivzstepu + dv*d_sdivzstepv |
  126. // du*d_tdivzstepu | du | dv
  127. fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
  128. // dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
  129. fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv +
  130. // du*d_sdivzstepu; stays in %st(2) at end
  131. fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
  132. // s/z
  133. fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv |
  134. // du*d_tdivzstepu | du | s/z
  135. fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv |
  136. // du*d_tdivzstepu | du | s/z
  137. faddp %st(0),%st(2) // dv*d_zistepv |
  138. // dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
  139. fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu |
  140. // dv*d_zistepv | s/z
  141. fmuls C(d_zistepu) // du*d_zistepu |
  142. // dv*d_tdivzstepv + du*d_tdivzstepu |
  143. // dv*d_zistepv | s/z
  144. fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu |
  145. // du*d_zistepu | dv*d_zistepv | s/z
  146. fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv +
  147. // du*d_tdivzstepu; stays in %st(1) at end
  148. fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z
  149. faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z
  150. flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
  151. fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
  152. fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv +
  153. // du*d_zistepu; stays in %st(0) at end
  154. // 1/z | fp_64k | t/z | s/z
  155. fld %st(0) // FIXME: get rid of stall on FMUL?
  156. fmuls fp_64kx64k
  157. fxch %st(1)
  158. //
  159. // calculate and clamp s & t
  160. //
  161. fdivr %st(0),%st(2) // 1/z | z*64k | t/z | s/z
  162. fxch %st(1)
  163. fistpl izi // 0.32 fixed-point 1/z
  164. movl izi,%ebp
  165. //
  166. // set pz to point to the first z-buffer pixel in the span
  167. //
  168. rorl $16,%ebp // put upper 16 bits in low word
  169. movl sspan_t_v(%ebx),%eax
  170. movl %ebp,izi
  171. movl sspan_t_u(%ebx),%ebp
  172. imull C(d_zrowbytes)
  173. shll $1,%ebp // a word per pixel
  174. addl C(d_pzbuffer),%eax
  175. addl %ebp,%eax
  176. movl %eax,pz
  177. //
  178. // point %edi to the first pixel in the span
  179. //
  180. movl C(d_viewbuffer),%ebp
  181. movl sspan_t_v(%ebx),%eax
  182. pushl %ebx // preserve spans pointer
  183. movl C(tadjust),%edx
  184. movl C(sadjust),%esi
  185. movl C(d_scantable)(,%eax,4),%edi // v * screenwidth
  186. addl %ebp,%edi
  187. movl sspan_t_u(%ebx),%ebp
  188. addl %ebp,%edi // pdest = &pdestspan[scans->u];
  189. //
  190. // now start the FDIV for the end of the span
  191. //
  192. cmpl $8,%ecx
  193. ja LSetupNotLast1
  194. decl %ecx
  195. jz LCleanup1 // if only one pixel, no need to start an FDIV
  196. movl %ecx,spancountminus1
  197. // finish up the s and t calcs
  198. fxch %st(1) // z*64k | 1/z | t/z | s/z
  199. fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
  200. fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
  201. fxch %st(1) // z*64k | s | 1/z | t/z | s/z
  202. fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
  203. fxch %st(1) // s | t | 1/z | t/z | s/z
  204. fistpl s // 1/z | t | t/z | s/z
  205. fistpl t // 1/z | t/z | s/z
  206. fildl spancountminus1
  207. flds C(d_tdivzstepu) // _d_tdivzstepu | spancountminus1
  208. flds C(d_zistepu) // _d_zistepu | _d_tdivzstepu | spancountminus1
  209. fmul %st(2),%st(0) // _d_zistepu*scm1 | _d_tdivzstepu | scm1
  210. fxch %st(1) // _d_tdivzstepu | _d_zistepu*scm1 | scm1
  211. fmul %st(2),%st(0) // _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
  212. fxch %st(2) // scm1 | _d_zistepu*scm1 | _d_tdivzstepu*scm1
  213. fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_zistepu*scm1 |
  214. // _d_tdivzstepu*scm1
  215. fxch %st(1) // _d_zistepu*scm1 | _d_sdivzstepu*scm1 |
  216. // _d_tdivzstepu*scm1
  217. faddp %st(0),%st(3) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
  218. fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
  219. faddp %st(0),%st(3) // _d_sdivzstepu*scm1
  220. faddp %st(0),%st(3)
  221. flds fp_64k
  222. fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
  223. // overlap
  224. jmp LFDIVInFlight1
  225. LCleanup1:
  226. // finish up the s and t calcs
  227. fxch %st(1) // z*64k | 1/z | t/z | s/z
  228. fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
  229. fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
  230. fxch %st(1) // z*64k | s | 1/z | t/z | s/z
  231. fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
  232. fxch %st(1) // s | t | 1/z | t/z | s/z
  233. fistpl s // 1/z | t | t/z | s/z
  234. fistpl t // 1/z | t/z | s/z
  235. jmp LFDIVInFlight1
  236. .align 4
  237. LSetupNotLast1:
  238. // finish up the s and t calcs
  239. fxch %st(1) // z*64k | 1/z | t/z | s/z
  240. fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
  241. fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
  242. fxch %st(1) // z*64k | s | 1/z | t/z | s/z
  243. fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
  244. fxch %st(1) // s | t | 1/z | t/z | s/z
  245. fistpl s // 1/z | t | t/z | s/z
  246. fistpl t // 1/z | t/z | s/z
  247. fadds zi8stepu
  248. fxch %st(2)
  249. fadds sdivz8stepu
  250. fxch %st(2)
  251. flds tdivz8stepu
  252. faddp %st(0),%st(2)
  253. flds fp_64k
  254. fdiv %st(1),%st(0) // z = 1/1/z
  255. // this is what we've gone to all this trouble to
  256. // overlap
  257. LFDIVInFlight1:
  258. addl s,%esi
  259. addl t,%edx
  260. movl C(bbextents),%ebx
  261. movl C(bbextentt),%ebp
  262. cmpl %ebx,%esi
  263. ja LClampHighOrLow0
  264. LClampReentry0:
  265. movl %esi,s
  266. movl pbase,%ebx
  267. shll $16,%esi
  268. cmpl %ebp,%edx
  269. movl %esi,sfracf
  270. ja LClampHighOrLow1
  271. LClampReentry1:
  272. movl %edx,t
  273. movl s,%esi // sfrac = scans->sfrac;
  274. shll $16,%edx
  275. movl t,%eax // tfrac = scans->tfrac;
  276. sarl $16,%esi
  277. movl %edx,tfracf
  278. //
  279. // calculate the texture starting address
  280. //
  281. sarl $16,%eax
  282. addl %ebx,%esi
  283. imull C(cachewidth),%eax // (tfrac >> 16) * cachewidth
  284. addl %eax,%esi // psource = pbase + (sfrac >> 16) +
  285. // ((tfrac >> 16) * cachewidth);
  286. //
  287. // determine whether last span or not
  288. //
  289. cmpl $8,%ecx
  290. jna LLastSegment
  291. //
  292. // not the last segment; do full 8-wide segment
  293. //
  294. LNotLastSegment:
  295. //
  296. // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
  297. // get there
  298. //
  299. // pick up after the FDIV that was left in flight previously
  300. fld %st(0) // duplicate it
  301. fmul %st(4),%st(0) // s = s/z * z
  302. fxch %st(1)
  303. fmul %st(3),%st(0) // t = t/z * z
  304. fxch %st(1)
  305. fistpl snext
  306. fistpl tnext
  307. movl snext,%eax
  308. movl tnext,%edx
  309. subl $8,%ecx // count off this segments' pixels
  310. movl C(sadjust),%ebp
  311. pushl %ecx // remember count of remaining pixels
  312. movl C(tadjust),%ecx
  313. addl %eax,%ebp
  314. addl %edx,%ecx
  315. movl C(bbextents),%eax
  316. movl C(bbextentt),%edx
  317. cmpl $2048,%ebp
  318. jl LClampLow2
  319. cmpl %eax,%ebp
  320. ja LClampHigh2
  321. LClampReentry2:
  322. cmpl $2048,%ecx
  323. jl LClampLow3
  324. cmpl %edx,%ecx
  325. ja LClampHigh3
  326. LClampReentry3:
  327. movl %ebp,snext
  328. movl %ecx,tnext
  329. subl s,%ebp
  330. subl t,%ecx
  331. //
  332. // set up advancetable
  333. //
  334. movl %ecx,%eax
  335. movl %ebp,%edx
  336. sarl $19,%edx // sstep >>= 16;
  337. movl C(cachewidth),%ebx
  338. sarl $19,%eax // tstep >>= 16;
  339. jz LIsZero
  340. imull %ebx,%eax // (tstep >> 16) * cachewidth;
  341. LIsZero:
  342. addl %edx,%eax // add in sstep
  343. // (tstep >> 16) * cachewidth + (sstep >> 16);
  344. movl tfracf,%edx
  345. movl %eax,advancetable+4 // advance base in t
  346. addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth +
  347. // (sstep >> 16);
  348. shll $13,%ebp // left-justify sstep fractional part
  349. movl %ebp,sstep
  350. movl sfracf,%ebx
  351. shll $13,%ecx // left-justify tstep fractional part
  352. movl %eax,advancetable // advance extra in t
  353. movl %ecx,tstep
  354. movl pz,%ecx
  355. movl izi,%ebp
  356. cmpw (%ecx),%bp
  357. jl Lp1
  358. movb (%esi),%al // get first source texel
  359. cmpb $(TRANSPARENT_COLOR),%al
  360. jz Lp1
  361. movw %bp,(%ecx)
  362. movb %al,(%edi) // store first dest pixel
  363. Lp1:
  364. addl izistep,%ebp
  365. adcl $0,%ebp
  366. addl tstep,%edx // advance tfrac fractional part by tstep frac
  367. sbbl %eax,%eax // turn tstep carry into -1 (0 if none)
  368. addl sstep,%ebx // advance sfrac fractional part by sstep frac
  369. adcl advancetable+4(,%eax,4),%esi // point to next source texel
  370. cmpw 2(%ecx),%bp
  371. jl Lp2
  372. movb (%esi),%al
  373. cmpb $(TRANSPARENT_COLOR),%al
  374. jz Lp2
  375. movw %bp,2(%ecx)
  376. movb %al,1(%edi)
  377. Lp2:
  378. addl izistep,%ebp
  379. adcl $0,%ebp
  380. addl tstep,%edx
  381. sbbl %eax,%eax
  382. addl sstep,%ebx
  383. adcl advancetable+4(,%eax,4),%esi
  384. cmpw 4(%ecx),%bp
  385. jl Lp3
  386. movb (%esi),%al
  387. cmpb $(TRANSPARENT_COLOR),%al
  388. jz Lp3
  389. movw %bp,4(%ecx)
  390. movb %al,2(%edi)
  391. Lp3:
  392. addl izistep,%ebp
  393. adcl $0,%ebp
  394. addl tstep,%edx
  395. sbbl %eax,%eax
  396. addl sstep,%ebx
  397. adcl advancetable+4(,%eax,4),%esi
  398. cmpw 6(%ecx),%bp
  399. jl Lp4
  400. movb (%esi),%al
  401. cmpb $(TRANSPARENT_COLOR),%al
  402. jz Lp4
  403. movw %bp,6(%ecx)
  404. movb %al,3(%edi)
  405. Lp4:
  406. addl izistep,%ebp
  407. adcl $0,%ebp
  408. addl tstep,%edx
  409. sbbl %eax,%eax
  410. addl sstep,%ebx
  411. adcl advancetable+4(,%eax,4),%esi
  412. cmpw 8(%ecx),%bp
  413. jl Lp5
  414. movb (%esi),%al
  415. cmpb $(TRANSPARENT_COLOR),%al
  416. jz Lp5
  417. movw %bp,8(%ecx)
  418. movb %al,4(%edi)
  419. Lp5:
  420. addl izistep,%ebp
  421. adcl $0,%ebp
  422. addl tstep,%edx
  423. sbbl %eax,%eax
  424. addl sstep,%ebx
  425. adcl advancetable+4(,%eax,4),%esi
  426. //
  427. // start FDIV for end of next segment in flight, so it can overlap
  428. //
  429. popl %eax
  430. cmpl $8,%eax // more than one segment after this?
  431. ja LSetupNotLast2 // yes
  432. decl %eax
  433. jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV
  434. movl %eax,spancountminus1
  435. fildl spancountminus1
  436. flds C(d_zistepu) // _d_zistepu | spancountminus1
  437. fmul %st(1),%st(0) // _d_zistepu*scm1 | scm1
  438. flds C(d_tdivzstepu) // _d_tdivzstepu | _d_zistepu*scm1 | scm1
  439. fmul %st(2),%st(0) // _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
  440. fxch %st(1) // _d_zistepu*scm1 | _d_tdivzstepu*scm1 | scm1
  441. faddp %st(0),%st(3) // _d_tdivzstepu*scm1 | scm1
  442. fxch %st(1) // scm1 | _d_tdivzstepu*scm1
  443. fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
  444. fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
  445. faddp %st(0),%st(3) // _d_sdivzstepu*scm1
  446. flds fp_64k // 64k | _d_sdivzstepu*scm1
  447. fxch %st(1) // _d_sdivzstepu*scm1 | 64k
  448. faddp %st(0),%st(4) // 64k
  449. fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
  450. // overlap
  451. jmp LFDIVInFlight2
  452. .align 4
  453. LSetupNotLast2:
  454. fadds zi8stepu
  455. fxch %st(2)
  456. fadds sdivz8stepu
  457. fxch %st(2)
  458. flds tdivz8stepu
  459. faddp %st(0),%st(2)
  460. flds fp_64k
  461. fdiv %st(1),%st(0) // z = 1/1/z
  462. // this is what we've gone to all this trouble to
  463. // overlap
  464. LFDIVInFlight2:
  465. pushl %eax
  466. cmpw 10(%ecx),%bp
  467. jl Lp6
  468. movb (%esi),%al
  469. cmpb $(TRANSPARENT_COLOR),%al
  470. jz Lp6
  471. movw %bp,10(%ecx)
  472. movb %al,5(%edi)
  473. Lp6:
  474. addl izistep,%ebp
  475. adcl $0,%ebp
  476. addl tstep,%edx
  477. sbbl %eax,%eax
  478. addl sstep,%ebx
  479. adcl advancetable+4(,%eax,4),%esi
  480. cmpw 12(%ecx),%bp
  481. jl Lp7
  482. movb (%esi),%al
  483. cmpb $(TRANSPARENT_COLOR),%al
  484. jz Lp7
  485. movw %bp,12(%ecx)
  486. movb %al,6(%edi)
  487. Lp7:
  488. addl izistep,%ebp
  489. adcl $0,%ebp
  490. addl tstep,%edx
  491. sbbl %eax,%eax
  492. addl sstep,%ebx
  493. adcl advancetable+4(,%eax,4),%esi
  494. cmpw 14(%ecx),%bp
  495. jl Lp8
  496. movb (%esi),%al
  497. cmpb $(TRANSPARENT_COLOR),%al
  498. jz Lp8
  499. movw %bp,14(%ecx)
  500. movb %al,7(%edi)
  501. Lp8:
  502. addl izistep,%ebp
  503. adcl $0,%ebp
  504. addl tstep,%edx
  505. sbbl %eax,%eax
  506. addl sstep,%ebx
  507. adcl advancetable+4(,%eax,4),%esi
  508. addl $8,%edi
  509. addl $16,%ecx
  510. movl %edx,tfracf
  511. movl snext,%edx
  512. movl %ebx,sfracf
  513. movl tnext,%ebx
  514. movl %edx,s
  515. movl %ebx,t
  516. movl %ecx,pz
  517. movl %ebp,izi
  518. popl %ecx // retrieve count
  519. //
  520. // determine whether last span or not
  521. //
  522. cmpl $8,%ecx // are there multiple segments remaining?
  523. ja LNotLastSegment // yes
  524. //
  525. // last segment of scan
  526. //
  527. LLastSegment:
  528. //
  529. // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
  530. // get there. The number of pixels left is variable, and we want to land on the
  531. // last pixel, not step one past it, so we can't run into arithmetic problems
  532. //
  533. testl %ecx,%ecx
  534. jz LNoSteps // just draw the last pixel and we're done
  535. // pick up after the FDIV that was left in flight previously
  536. fld %st(0) // duplicate it
  537. fmul %st(4),%st(0) // s = s/z * z
  538. fxch %st(1)
  539. fmul %st(3),%st(0) // t = t/z * z
  540. fxch %st(1)
  541. fistpl snext
  542. fistpl tnext
  543. movl C(tadjust),%ebx
  544. movl C(sadjust),%eax
  545. addl snext,%eax
  546. addl tnext,%ebx
  547. movl C(bbextents),%ebp
  548. movl C(bbextentt),%edx
  549. cmpl $2048,%eax
  550. jl LClampLow4
  551. cmpl %ebp,%eax
  552. ja LClampHigh4
  553. LClampReentry4:
  554. movl %eax,snext
  555. cmpl $2048,%ebx
  556. jl LClampLow5
  557. cmpl %edx,%ebx
  558. ja LClampHigh5
  559. LClampReentry5:
  560. cmpl $1,%ecx // don't bother
  561. je LOnlyOneStep // if two pixels in segment, there's only one step,
  562. // of the segment length
  563. subl s,%eax
  564. subl t,%ebx
  565. addl %eax,%eax // convert to 15.17 format so multiply by 1.31
  566. addl %ebx,%ebx // reciprocal yields 16.48
  567. imull reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1)
  568. movl %edx,%ebp
  569. movl %ebx,%eax
  570. imull reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1)
  571. LSetEntryvec:
  572. //
  573. // set up advancetable
  574. //
  575. movl spr8entryvec_table(,%ecx,4),%ebx
  576. movl %edx,%eax
  577. pushl %ebx // entry point into code for RET later
  578. movl %ebp,%ecx
  579. sarl $16,%ecx // sstep >>= 16;
  580. movl C(cachewidth),%ebx
  581. sarl $16,%edx // tstep >>= 16;
  582. jz LIsZeroLast
  583. imull %ebx,%edx // (tstep >> 16) * cachewidth;
  584. LIsZeroLast:
  585. addl %ecx,%edx // add in sstep
  586. // (tstep >> 16) * cachewidth + (sstep >> 16);
  587. movl tfracf,%ecx
  588. movl %edx,advancetable+4 // advance base in t
  589. addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth +
  590. // (sstep >> 16);
  591. shll $16,%ebp // left-justify sstep fractional part
  592. movl sfracf,%ebx
  593. shll $16,%eax // left-justify tstep fractional part
  594. movl %edx,advancetable // advance extra in t
  595. movl %eax,tstep
  596. movl %ebp,sstep
  597. movl %ecx,%edx
  598. movl pz,%ecx
  599. movl izi,%ebp
  600. ret // jump to the number-of-pixels handler
  601. //----------------------------------------
  602. LNoSteps:
  603. movl pz,%ecx
  604. subl $7,%edi // adjust for hardwired offset
  605. subl $14,%ecx
  606. jmp LEndSpan
  607. LOnlyOneStep:
  608. subl s,%eax
  609. subl t,%ebx
  610. movl %eax,%ebp
  611. movl %ebx,%edx
  612. jmp LSetEntryvec
  613. //----------------------------------------
  614. .globl Spr8Entry2_8
  615. Spr8Entry2_8:
  616. subl $6,%edi // adjust for hardwired offsets
  617. subl $12,%ecx
  618. movb (%esi),%al
  619. jmp LLEntry2_8
  620. //----------------------------------------
  621. .globl Spr8Entry3_8
  622. Spr8Entry3_8:
  623. subl $5,%edi // adjust for hardwired offsets
  624. subl $10,%ecx
  625. jmp LLEntry3_8
  626. //----------------------------------------
  627. .globl Spr8Entry4_8
  628. Spr8Entry4_8:
  629. subl $4,%edi // adjust for hardwired offsets
  630. subl $8,%ecx
  631. jmp LLEntry4_8
  632. //----------------------------------------
  633. .globl Spr8Entry5_8
  634. Spr8Entry5_8:
  635. subl $3,%edi // adjust for hardwired offsets
  636. subl $6,%ecx
  637. jmp LLEntry5_8
  638. //----------------------------------------
  639. .globl Spr8Entry6_8
  640. Spr8Entry6_8:
  641. subl $2,%edi // adjust for hardwired offsets
  642. subl $4,%ecx
  643. jmp LLEntry6_8
  644. //----------------------------------------
  645. .globl Spr8Entry7_8
  646. Spr8Entry7_8:
  647. decl %edi // adjust for hardwired offsets
  648. subl $2,%ecx
  649. jmp LLEntry7_8
  650. //----------------------------------------
  651. .globl Spr8Entry8_8
  652. Spr8Entry8_8:
  653. cmpw (%ecx),%bp
  654. jl Lp9
  655. movb (%esi),%al
  656. cmpb $(TRANSPARENT_COLOR),%al
  657. jz Lp9
  658. movw %bp,(%ecx)
  659. movb %al,(%edi)
  660. Lp9:
  661. addl izistep,%ebp
  662. adcl $0,%ebp
  663. addl tstep,%edx
  664. sbbl %eax,%eax
  665. addl sstep,%ebx
  666. adcl advancetable+4(,%eax,4),%esi
  667. LLEntry7_8:
  668. cmpw 2(%ecx),%bp
  669. jl Lp10
  670. movb (%esi),%al
  671. cmpb $(TRANSPARENT_COLOR),%al
  672. jz Lp10
  673. movw %bp,2(%ecx)
  674. movb %al,1(%edi)
  675. Lp10:
  676. addl izistep,%ebp
  677. adcl $0,%ebp
  678. addl tstep,%edx
  679. sbbl %eax,%eax
  680. addl sstep,%ebx
  681. adcl advancetable+4(,%eax,4),%esi
  682. LLEntry6_8:
  683. cmpw 4(%ecx),%bp
  684. jl Lp11
  685. movb (%esi),%al
  686. cmpb $(TRANSPARENT_COLOR),%al
  687. jz Lp11
  688. movw %bp,4(%ecx)
  689. movb %al,2(%edi)
  690. Lp11:
  691. addl izistep,%ebp
  692. adcl $0,%ebp
  693. addl tstep,%edx
  694. sbbl %eax,%eax
  695. addl sstep,%ebx
  696. adcl advancetable+4(,%eax,4),%esi
  697. LLEntry5_8:
  698. cmpw 6(%ecx),%bp
  699. jl Lp12
  700. movb (%esi),%al
  701. cmpb $(TRANSPARENT_COLOR),%al
  702. jz Lp12
  703. movw %bp,6(%ecx)
  704. movb %al,3(%edi)
  705. Lp12:
  706. addl izistep,%ebp
  707. adcl $0,%ebp
  708. addl tstep,%edx
  709. sbbl %eax,%eax
  710. addl sstep,%ebx
  711. adcl advancetable+4(,%eax,4),%esi
  712. LLEntry4_8:
  713. cmpw 8(%ecx),%bp
  714. jl Lp13
  715. movb (%esi),%al
  716. cmpb $(TRANSPARENT_COLOR),%al
  717. jz Lp13
  718. movw %bp,8(%ecx)
  719. movb %al,4(%edi)
  720. Lp13:
  721. addl izistep,%ebp
  722. adcl $0,%ebp
  723. addl tstep,%edx
  724. sbbl %eax,%eax
  725. addl sstep,%ebx
  726. adcl advancetable+4(,%eax,4),%esi
  727. LLEntry3_8:
  728. cmpw 10(%ecx),%bp
  729. jl Lp14
  730. movb (%esi),%al
  731. cmpb $(TRANSPARENT_COLOR),%al
  732. jz Lp14
  733. movw %bp,10(%ecx)
  734. movb %al,5(%edi)
  735. Lp14:
  736. addl izistep,%ebp
  737. adcl $0,%ebp
  738. addl tstep,%edx
  739. sbbl %eax,%eax
  740. addl sstep,%ebx
  741. adcl advancetable+4(,%eax,4),%esi
  742. LLEntry2_8:
  743. cmpw 12(%ecx),%bp
  744. jl Lp15
  745. movb (%esi),%al
  746. cmpb $(TRANSPARENT_COLOR),%al
  747. jz Lp15
  748. movw %bp,12(%ecx)
  749. movb %al,6(%edi)
  750. Lp15:
  751. addl izistep,%ebp
  752. adcl $0,%ebp
  753. addl tstep,%edx
  754. sbbl %eax,%eax
  755. addl sstep,%ebx
  756. adcl advancetable+4(,%eax,4),%esi
  757. LEndSpan:
  758. cmpw 14(%ecx),%bp
  759. jl Lp16
  760. movb (%esi),%al // load first texel in segment
  761. cmpb $(TRANSPARENT_COLOR),%al
  762. jz Lp16
  763. movw %bp,14(%ecx)
  764. movb %al,7(%edi)
  765. Lp16:
  766. //
  767. // clear s/z, t/z, 1/z from FP stack
  768. //
  769. fstp %st(0)
  770. fstp %st(0)
  771. fstp %st(0)
  772. popl %ebx // restore spans pointer
  773. LNextSpan:
  774. addl $(sspan_t_size),%ebx // point to next span
  775. movl sspan_t_count(%ebx),%ecx
  776. cmpl $0,%ecx // any more spans?
  777. jg LSpanLoop // yes
  778. jz LNextSpan // yes, but this one's empty
  779. popl %ebx // restore register variables
  780. popl %esi
  781. popl %edi
  782. popl %ebp // restore the caller's stack frame
  783. ret
  784. #endif // id386