d_parta.s 9.9 KB


  1. /*
  2. Copyright (C) 1996-1997 Id Software, Inc.
  3. This program is free software; you can redistribute it and/or
  4. modify it under the terms of the GNU General Public License
  5. as published by the Free Software Foundation; either version 2
  6. of the License, or (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. See the GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program; if not, write to the Free Software
  13. Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  14. */
  15. //
  16. // d_parta.s
  17. // x86 assembly-language 8-bpp particle-drawing code.
  18. //
  19. #include "asm_i386.h"
  20. #include "quakeasm.h"
  21. #include "d_ifacea.h"
  22. #include "asm_draw.h"
  23. #if id386
  24. //----------------------------------------------------------------------
  25. // 8-bpp particle drawing code.
  26. //----------------------------------------------------------------------
  27. //FIXME: comments, full optimization
  28. //----------------------------------------------------------------------
  29. // 8-bpp particle queueing code.
  30. //----------------------------------------------------------------------
  31. .text
  32. #define P 12+4
  33. .align 4
  34. .globl C(D_DrawParticle)
  35. C(D_DrawParticle):
  36. pushl %ebp // preserve caller's stack frame
  37. pushl %edi // preserve register variables
  38. pushl %ebx
  39. movl P(%esp),%edi
  40. // FIXME: better FP overlap in general here
  41. // transform point
  42. // VectorSubtract (p->org, r_origin, local);
  43. flds C(r_origin)
  44. fsubrs pt_org(%edi)
  45. flds pt_org+4(%edi)
  46. fsubs C(r_origin)+4
  47. flds pt_org+8(%edi)
  48. fsubs C(r_origin)+8
  49. fxch %st(2) // local[0] | local[1] | local[2]
  50. // transformed[2] = DotProduct(local, r_ppn);
  51. flds C(r_ppn) // r_ppn[0] | local[0] | local[1] | local[2]
  52. fmul %st(1),%st(0) // dot0 | local[0] | local[1] | local[2]
  53. flds C(r_ppn)+4 // r_ppn[1] | dot0 | local[0] | local[1] | local[2]
  54. fmul %st(3),%st(0) // dot1 | dot0 | local[0] | local[1] | local[2]
  55. flds C(r_ppn)+8 // r_ppn[2] | dot1 | dot0 | local[0] |
  56. // local[1] | local[2]
  57. fmul %st(5),%st(0) // dot2 | dot1 | dot0 | local[0] | local[1] | local[2]
  58. fxch %st(2) // dot0 | dot1 | dot2 | local[0] | local[1] | local[2]
  59. faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[0] | local[1] |
  60. // local[2]
  61. faddp %st(0),%st(1) // z | local[0] | local[1] | local[2]
  62. fld %st(0) // z | z | local[0] | local[1] |
  63. // local[2]
  64. fdivrs float_1 // 1/z | z | local[0] | local[1] | local[2]
  65. fxch %st(1) // z | 1/z | local[0] | local[1] | local[2]
  66. // if (transformed[2] < PARTICLE_Z_CLIP)
  67. // return;
  68. fcomps float_particle_z_clip // 1/z | local[0] | local[1] | local[2]
  69. fxch %st(3) // local[2] | local[0] | local[1] | 1/z
  70. flds C(r_pup) // r_pup[0] | local[2] | local[0] | local[1] | 1/z
  71. fmul %st(2),%st(0) // dot0 | local[2] | local[0] | local[1] | 1/z
  72. flds C(r_pup)+4 // r_pup[1] | dot0 | local[2] | local[0] |
  73. // local[1] | 1/z
  74. fnstsw %ax
  75. testb $1,%ah
  76. jnz LPop6AndDone
  77. // transformed[1] = DotProduct(local, r_pup);
  78. fmul %st(4),%st(0) // dot1 | dot0 | local[2] | local[0] | local[1] | 1/z
  79. flds C(r_pup)+8 // r_pup[2] | dot1 | dot0 | local[2] |
  80. // local[0] | local[1] | 1/z
  81. fmul %st(3),%st(0) // dot2 | dot1 | dot0 | local[2] | local[0] |
  82. // local[1] | 1/z
  83. fxch %st(2) // dot0 | dot1 | dot2 | local[2] | local[0] |
  84. // local[1] | 1/z
  85. faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[2] | local[0] |
  86. // local[1] | 1/z
  87. faddp %st(0),%st(1) // y | local[2] | local[0] | local[1] | 1/z
  88. fxch %st(3) // local[1] | local[2] | local[0] | y | 1/z
  89. // transformed[0] = DotProduct(local, r_pright);
  90. fmuls C(r_pright)+4 // dot1 | local[2] | local[0] | y | 1/z
  91. fxch %st(2) // local[0] | local[2] | dot1 | y | 1/z
  92. fmuls C(r_pright) // dot0 | local[2] | dot1 | y | 1/z
  93. fxch %st(1) // local[2] | dot0 | dot1 | y | 1/z
  94. fmuls C(r_pright)+8 // dot2 | dot0 | dot1 | y | 1/z
  95. fxch %st(2) // dot1 | dot0 | dot2 | y | 1/z
  96. faddp %st(0),%st(1) // dot1 + dot0 | dot2 | y | 1/z
  97. faddp %st(0),%st(1) // x | y | 1/z
  98. fxch %st(1) // y | x | 1/z
  99. // project the point
  100. fmul %st(2),%st(0) // y/z | x | 1/z
  101. fxch %st(1) // x | y/z | 1/z
  102. fmul %st(2),%st(0) // x/z | y/z | 1/z
  103. fxch %st(1) // y/z | x/z | 1/z
  104. fsubrs C(ycenter) // v | x/z | 1/z
  105. fxch %st(1) // x/z | v | 1/z
  106. fadds C(xcenter) // u | v | 1/z
  107. // FIXME: preadjust xcenter and ycenter
  108. fxch %st(1) // v | u | 1/z
  109. fadds float_point5 // v | u | 1/z
  110. fxch %st(1) // u | v | 1/z
  111. fadds float_point5 // u | v | 1/z
  112. fxch %st(2) // 1/z | v | u
  113. fmuls DP_32768 // 1/z * 0x8000 | v | u
  114. fxch %st(2) // u | v | 1/z * 0x8000
  115. // FIXME: use Terje's fp->int trick here?
  116. // FIXME: check we're getting proper rounding here
  117. fistpl DP_u // v | 1/z * 0x8000
  118. fistpl DP_v // 1/z * 0x8000
  119. movl DP_u,%eax
  120. movl DP_v,%edx
  121. // if ((v > d_vrectbottom_particle) ||
  122. // (u > d_vrectright_particle) ||
  123. // (v < d_vrecty) ||
  124. // (u < d_vrectx))
  125. // {
  126. // continue;
  127. // }
  128. movl C(d_vrectbottom_particle),%ebx
  129. movl C(d_vrectright_particle),%ecx
  130. cmpl %ebx,%edx
  131. jg LPop1AndDone
  132. cmpl %ecx,%eax
  133. jg LPop1AndDone
  134. movl C(d_vrecty),%ebx
  135. movl C(d_vrectx),%ecx
  136. cmpl %ebx,%edx
  137. jl LPop1AndDone
  138. cmpl %ecx,%eax
  139. jl LPop1AndDone
  140. flds pt_color(%edi) // color | 1/z * 0x8000
  141. // FIXME: use Terje's fast fp->int trick?
  142. fistpl DP_Color // 1/z * 0x8000
  143. movl C(d_viewbuffer),%ebx
  144. addl %eax,%ebx
  145. movl C(d_scantable)(,%edx,4),%edi // point to the pixel
  146. imull C(d_zrowbytes),%edx // point to the z pixel
  147. leal (%edx,%eax,2),%edx
  148. movl C(d_pzbuffer),%eax
  149. fistpl izi
  150. addl %ebx,%edi
  151. addl %eax,%edx
  152. // pix = izi >> d_pix_shift;
  153. movl izi,%eax
  154. movl C(d_pix_shift),%ecx
  155. shrl %cl,%eax
  156. movl izi,%ebp
  157. // if (pix < d_pix_min)
  158. // pix = d_pix_min;
  159. // else if (pix > d_pix_max)
  160. // pix = d_pix_max;
  161. movl C(d_pix_min),%ebx
  162. movl C(d_pix_max),%ecx
  163. cmpl %ebx,%eax
  164. jnl LTestPixMax
  165. movl %ebx,%eax
  166. jmp LTestDone
  167. LTestPixMax:
  168. cmpl %ecx,%eax
  169. jng LTestDone
  170. movl %ecx,%eax
  171. LTestDone:
  172. movb DP_Color,%ch
  173. movl C(d_y_aspect_shift),%ebx
  174. testl %ebx,%ebx
  175. jnz LDefault
  176. cmpl $4,%eax
  177. ja LDefault
  178. jmp DP_EntryTable-4(,%eax,4)
  179. // 1x1
  180. .globl DP_1x1
  181. DP_1x1:
  182. cmpw %bp,(%edx) // just one pixel to do
  183. jg LDone
  184. movw %bp,(%edx)
  185. movb %ch,(%edi)
  186. jmp LDone
  187. // 2x2
  188. .globl DP_2x2
  189. DP_2x2:
  190. pushl %esi
  191. movl C(screenwidth),%ebx
  192. movl C(d_zrowbytes),%esi
  193. cmpw %bp,(%edx)
  194. jg L2x2_1
  195. movw %bp,(%edx)
  196. movb %ch,(%edi)
  197. L2x2_1:
  198. cmpw %bp,2(%edx)
  199. jg L2x2_2
  200. movw %bp,2(%edx)
  201. movb %ch,1(%edi)
  202. L2x2_2:
  203. cmpw %bp,(%edx,%esi,1)
  204. jg L2x2_3
  205. movw %bp,(%edx,%esi,1)
  206. movb %ch,(%edi,%ebx,1)
  207. L2x2_3:
  208. cmpw %bp,2(%edx,%esi,1)
  209. jg L2x2_4
  210. movw %bp,2(%edx,%esi,1)
  211. movb %ch,1(%edi,%ebx,1)
  212. L2x2_4:
  213. popl %esi
  214. jmp LDone
  215. // 3x3
  216. .globl DP_3x3
  217. DP_3x3:
  218. pushl %esi
  219. movl C(screenwidth),%ebx
  220. movl C(d_zrowbytes),%esi
  221. cmpw %bp,(%edx)
  222. jg L3x3_1
  223. movw %bp,(%edx)
  224. movb %ch,(%edi)
  225. L3x3_1:
  226. cmpw %bp,2(%edx)
  227. jg L3x3_2
  228. movw %bp,2(%edx)
  229. movb %ch,1(%edi)
  230. L3x3_2:
  231. cmpw %bp,4(%edx)
  232. jg L3x3_3
  233. movw %bp,4(%edx)
  234. movb %ch,2(%edi)
  235. L3x3_3:
  236. cmpw %bp,(%edx,%esi,1)
  237. jg L3x3_4
  238. movw %bp,(%edx,%esi,1)
  239. movb %ch,(%edi,%ebx,1)
  240. L3x3_4:
  241. cmpw %bp,2(%edx,%esi,1)
  242. jg L3x3_5
  243. movw %bp,2(%edx,%esi,1)
  244. movb %ch,1(%edi,%ebx,1)
  245. L3x3_5:
  246. cmpw %bp,4(%edx,%esi,1)
  247. jg L3x3_6
  248. movw %bp,4(%edx,%esi,1)
  249. movb %ch,2(%edi,%ebx,1)
  250. L3x3_6:
  251. cmpw %bp,(%edx,%esi,2)
  252. jg L3x3_7
  253. movw %bp,(%edx,%esi,2)
  254. movb %ch,(%edi,%ebx,2)
  255. L3x3_7:
  256. cmpw %bp,2(%edx,%esi,2)
  257. jg L3x3_8
  258. movw %bp,2(%edx,%esi,2)
  259. movb %ch,1(%edi,%ebx,2)
  260. L3x3_8:
  261. cmpw %bp,4(%edx,%esi,2)
  262. jg L3x3_9
  263. movw %bp,4(%edx,%esi,2)
  264. movb %ch,2(%edi,%ebx,2)
  265. L3x3_9:
  266. popl %esi
  267. jmp LDone
  268. // 4x4
  269. .globl DP_4x4
  270. DP_4x4:
  271. pushl %esi
  272. movl C(screenwidth),%ebx
  273. movl C(d_zrowbytes),%esi
  274. cmpw %bp,(%edx)
  275. jg L4x4_1
  276. movw %bp,(%edx)
  277. movb %ch,(%edi)
  278. L4x4_1:
  279. cmpw %bp,2(%edx)
  280. jg L4x4_2
  281. movw %bp,2(%edx)
  282. movb %ch,1(%edi)
  283. L4x4_2:
  284. cmpw %bp,4(%edx)
  285. jg L4x4_3
  286. movw %bp,4(%edx)
  287. movb %ch,2(%edi)
  288. L4x4_3:
  289. cmpw %bp,6(%edx)
  290. jg L4x4_4
  291. movw %bp,6(%edx)
  292. movb %ch,3(%edi)
  293. L4x4_4:
  294. cmpw %bp,(%edx,%esi,1)
  295. jg L4x4_5
  296. movw %bp,(%edx,%esi,1)
  297. movb %ch,(%edi,%ebx,1)
  298. L4x4_5:
  299. cmpw %bp,2(%edx,%esi,1)
  300. jg L4x4_6
  301. movw %bp,2(%edx,%esi,1)
  302. movb %ch,1(%edi,%ebx,1)
  303. L4x4_6:
  304. cmpw %bp,4(%edx,%esi,1)
  305. jg L4x4_7
  306. movw %bp,4(%edx,%esi,1)
  307. movb %ch,2(%edi,%ebx,1)
  308. L4x4_7:
  309. cmpw %bp,6(%edx,%esi,1)
  310. jg L4x4_8
  311. movw %bp,6(%edx,%esi,1)
  312. movb %ch,3(%edi,%ebx,1)
  313. L4x4_8:
  314. leal (%edx,%esi,2),%edx
  315. leal (%edi,%ebx,2),%edi
  316. cmpw %bp,(%edx)
  317. jg L4x4_9
  318. movw %bp,(%edx)
  319. movb %ch,(%edi)
  320. L4x4_9:
  321. cmpw %bp,2(%edx)
  322. jg L4x4_10
  323. movw %bp,2(%edx)
  324. movb %ch,1(%edi)
  325. L4x4_10:
  326. cmpw %bp,4(%edx)
  327. jg L4x4_11
  328. movw %bp,4(%edx)
  329. movb %ch,2(%edi)
  330. L4x4_11:
  331. cmpw %bp,6(%edx)
  332. jg L4x4_12
  333. movw %bp,6(%edx)
  334. movb %ch,3(%edi)
  335. L4x4_12:
  336. cmpw %bp,(%edx,%esi,1)
  337. jg L4x4_13
  338. movw %bp,(%edx,%esi,1)
  339. movb %ch,(%edi,%ebx,1)
  340. L4x4_13:
  341. cmpw %bp,2(%edx,%esi,1)
  342. jg L4x4_14
  343. movw %bp,2(%edx,%esi,1)
  344. movb %ch,1(%edi,%ebx,1)
  345. L4x4_14:
  346. cmpw %bp,4(%edx,%esi,1)
  347. jg L4x4_15
  348. movw %bp,4(%edx,%esi,1)
  349. movb %ch,2(%edi,%ebx,1)
  350. L4x4_15:
  351. cmpw %bp,6(%edx,%esi,1)
  352. jg L4x4_16
  353. movw %bp,6(%edx,%esi,1)
  354. movb %ch,3(%edi,%ebx,1)
  355. L4x4_16:
  356. popl %esi
  357. jmp LDone
  358. // default case, handling any size particle
  359. LDefault:
  360. // count = pix << d_y_aspect_shift;
  361. movl %eax,%ebx
  362. movl %eax,DP_Pix
  363. movb C(d_y_aspect_shift),%cl
  364. shll %cl,%ebx
  365. // for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth)
  366. // {
  367. // for (i=0 ; i<pix ; i++)
  368. // {
  369. // if (pz[i] <= izi)
  370. // {
  371. // pz[i] = izi;
  372. // pdest[i] = color;
  373. // }
  374. // }
  375. // }
  376. LGenRowLoop:
  377. movl DP_Pix,%eax
  378. LGenColLoop:
  379. cmpw %bp,-2(%edx,%eax,2)
  380. jg LGSkip
  381. movw %bp,-2(%edx,%eax,2)
  382. movb %ch,-1(%edi,%eax,1)
  383. LGSkip:
  384. decl %eax // --pix
  385. jnz LGenColLoop
  386. addl C(d_zrowbytes),%edx
  387. addl C(screenwidth),%edi
  388. decl %ebx // --count
  389. jnz LGenRowLoop
  390. LDone:
  391. popl %ebx // restore register variables
  392. popl %edi
  393. popl %ebp // restore the caller's stack frame
  394. ret
  395. LPop6AndDone:
  396. fstp %st(0)
  397. fstp %st(0)
  398. fstp %st(0)
  399. fstp %st(0)
  400. fstp %st(0)
  401. LPop1AndDone:
  402. fstp %st(0)
  403. jmp LDone
  404. #endif // id386