TMAP_SKV.ASM 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334
  1. ;THE COMPUTER CODE CONTAINED HEREIN IS THE SOLE PROPERTY OF PARALLAX
  2. ;SOFTWARE CORPORATION ("PARALLAX"). PARALLAX, IN DISTRIBUTING THE CODE TO
  3. ;END-USERS, AND SUBJECT TO ALL OF THE TERMS AND CONDITIONS HEREIN, GRANTS A
  4. ;ROYALTY-FREE, PERPETUAL LICENSE TO SUCH END-USERS FOR USE BY SUCH END-USERS
  5. ;IN USING, DISPLAYING, AND CREATING DERIVATIVE WORKS THEREOF, SO LONG AS
  6. ;SUCH USE, DISPLAY OR CREATION IS FOR NON-COMMERCIAL, ROYALTY OR REVENUE
  7. ;FREE PURPOSES. IN NO EVENT SHALL THE END-USER USE THE COMPUTER CODE
  8. ;CONTAINED HEREIN FOR REVENUE-BEARING PURPOSES. THE END-USER UNDERSTANDS
  9. ;AND AGREES TO THE TERMS HEREIN AND ACCEPTS THE SAME BY USE OF THIS FILE.
  10. ;COPYRIGHT 1993-1998 PARALLAX SOFTWARE CORPORATION. ALL RIGHTS RESERVED.
  11. ;
  12. ; $Source: f:/miner/source/texmap/rcs/tmap_skv.asm $
  13. ; $Revision: 1.5 $
  14. ; $Author: mike $
  15. ; $Date: 1994/11/30 00:57:03 $
  16. ;
  17. ; Vertical scanner for sky bitmap rendering.
  18. ;
  19. ; $Log: tmap_skv.asm $
  20. ; Revision 1.5 1994/11/30 00:57:03 mike
  21. ; optimization.
  22. ;
  23. ; Revision 1.4 1994/11/12 16:41:13 mike
  24. ; jae -> ja.
  25. ;
  26. ; Revision 1.3 1994/05/24 11:03:12 mike
  27. ; Make work for any sized (power of 2) bitmap.
  28. ;
  29. ; Revision 1.2 1994/01/31 15:42:14 mike
  30. ; Vertical scanning sky texture mapper (in inner loop).
  31. ;
  32. ; Revision 1.1 1994/01/30 14:10:55 mike
  33. ; Initial revision
  34. ;
  35. ;
  36. DEBUG_ON = 1
  37. .386
  38. option oldstructs
  39. .nolist
  40. include psmacros.inc
  41. .list
  42. public asm_tmap_scanline_lin_sky_v_, asm_tmap_scanline_lin_v_
  43. include tmap_inc.asm
  44. sky_width_log_2 equ 10
  45. sky_height_log_2 equ 7
  46. width_log_2 equ 6
  47. height_log_2 equ 6
  48. _DATA SEGMENT DWORD PUBLIC USE32 'DATA'
  49. extd _fx_u
  50. extd _fx_v
  51. extd _fx_du_dx
  52. extd _fx_dv_dx
  53. extd _fx_y
  54. extd _fx_xleft
  55. extd _fx_xright
  56. extd _pixptr
  57. extd _x
  58. extd _loop_count
  59. _DATA ENDS
  60. DGROUP GROUP _DATA
  61. _TEXT SEGMENT PARA PUBLIC USE32 'CODE'
  62. ASSUME DS:_DATA
  63. ASSUME CS:_TEXT
  64. ; --------------------------------------------------------------------------------------------------
  65. ; Enter:
  66. ; _xleft fixed point left x coordinate
  67. ; _xright fixed point right x coordinate
  68. ; _y fixed point y coordinate
  69. ; _pixptr address of source pixel map
  70. ; _u fixed point initial u coordinate
  71. ; _v fixed point initial v coordinate
  72. ; _du_dx fixed point du/dx
  73. ; _dv_dx fixed point dv/dx
  74. ; for (x = (int) xleft; x <= (int) xright; x++) {
  75. ; _setcolor(read_pixel_from_tmap(srcb,((int) (u/z)) & 63,((int) (v/z)) & 63));
  76. ; _setpixel(x,y);
  77. ;
  78. ; u += du_dx;
  79. ; v += dv_dx;
  80. ; z += dz_dx;
  81. ; }
  82. align 4
  83. asm_tmap_scanline_lin_sky_v_:
  84. pusha
  85. ; Setup for loop: _loop_count iterations = (int) xright - (int) xleft
  86. ; esi source pixel pointer = pixptr
  87. ; edi initial row pointer = y*320+x
  88. ; set esi = pointer to start of texture map data
  89. mov esi,_pixptr
  90. ; set edi = address of first pixel to modify
  91. mov edi,_fx_xleft
  92. sar edi,16
  93. jns edi_ok
  94. sub edi,edi
  95. edi_ok:
  96. cmp edi,_window_bottom
  97. ja _none_to_do
  98. imul edi,_bytes_per_row
  99. add edi,_fx_y
  100. add edi,write_buffer
  101. ; set _loop_count = # of iterations
  102. mov eax,_fx_xright
  103. sar eax,16
  104. mov ebx,_fx_xleft
  105. sar ebx,16
  106. sub eax,ebx
  107. js _none_to_do
  108. cmp eax,_window_height
  109. jbe _ok_to_do
  110. mov eax,_window_height
  111. _ok_to_do:
  112. mov _loop_count,eax
  113. ; edi destination pixel pointer
  114. mov ebx,_fx_u
  115. mov ecx,_fx_du_dx
  116. mov edx,_fx_dv_dx
  117. mov ebp,_fx_v
  118. shl ebx,16-sky_width_log_2
  119. shl ebp,16-sky_height_log_2
  120. shl edx,16-sky_height_log_2
  121. shl ecx,16-sky_width_log_2
  122. ; eax work
  123. ; ebx u
  124. ; ecx du_dx
  125. ; edx dv_dx
  126. ; ebp v
  127. ; esi read address
  128. ; edi write address
  129. _size = (_end1 - _start1)/num_iters
  130. mov eax,num_iters-1
  131. sub eax,_loop_count
  132. jns j_eax_ok1
  133. inc eax ; sort of a hack, but we can get -1 here and want to be graceful
  134. jns j_eax_ok1 ; if we jump, we had -1, which is kind of ok, if not, we int 3
  135. int 3 ; oops, going to jump behind _start1, very bad...
  136. sub eax,eax ; ok to continue
  137. j_eax_ok1: imul eax,eax,dword ptr _size
  138. add eax,offset _start1
  139. jmp eax
  140. align 4
  141. _start1:
  142. ; "OPTIMIZATIONS" maybe not worth making
  143. ; Getting rid of the esi from the mov al,[esi+eax] instruction.
  144. ; This would require moving into eax at the top of the loop, rather than doing the sub eax,eax.
  145. ; You would have to align your bitmaps so that the two shlds would create the proper base address.
  146. ; In other words, your bitmap data would have to begin at 4096x (for 64x64 bitmaps).
  147. ; I did timings without converting the sub to a mov eax,esi and setting esi to the proper value.
  148. ; There was a speedup of about 1% to 1.5% without converting the sub to a mov.
  149. ; Getting rid of the edi by doing a mov nnnn[edi],al instead of mov [edi],al.
  150. ; The problem with this is you would have a dword offset for nnnn. My timings indicate it is slower. (I think.)
  151. ; Combining u,v and du,dv into single longwords.
  152. ; The problem with this is you then must do a 16 bit operation to extract them, and you don't have enough
  153. ; instructions to separate a destination operand from being used by the next instruction. It shaves out one
  154. ; register instruction (an add reg,reg), but adds a 16 bit operation, and the setup is more complicated.
  155. ; usage:
  156. ; eax work
  157. ; ebx u coordinate
  158. ; ecx delta u
  159. ; edx delta v
  160. ; ebp v coordinate
  161. ; esi pointer to source bitmap
  162. ; edi write address
  163. rept num_iters
  164. mov eax,ebp ; clear for
  165. add ebp,edx ; update v coordinate
  166. shr eax,32-sky_height_log_2 ; shift in v coordinate
  167. shld eax,ebx,sky_width_log_2 ; shift in u coordinate while shifting up v coordinate
  168. add ebx,ecx ; update u coordinate
  169. mov al,[esi+eax] ; get pixel from source bitmap
  170. mov [edi],al
  171. add edi,_bytes_per_row
  172. endm
  173. _end1:
  174. _none_to_do: popa
  175. ret
  176. ; --------------------------------------------------------------------------------------------------------------------------------
  177. align 4
  178. asm_tmap_scanline_lin_v_:
  179. pusha
  180. ; Setup for loop: _loop_count iterations = (int) xright - (int) xleft
  181. ; esi source pixel pointer = pixptr
  182. ; edi initial row pointer = y*320+x
  183. ; set esi = pointer to start of texture map data
  184. mov esi,_pixptr
  185. ; set edi = address of first pixel to modify
  186. mov edi,_fx_xleft
  187. sar edi,16
  188. jns edi_ok_a
  189. sub edi,edi
  190. edi_ok_a:
  191. cmp edi,_window_bottom
  192. ja _none_to_do_a
  193. imul edi,_bytes_per_row
  194. add edi,_fx_y
  195. add edi,write_buffer
  196. ; set _loop_count = # of iterations
  197. mov eax,_fx_xright
  198. sar eax,16
  199. mov ebx,_fx_xleft
  200. sar ebx,16
  201. sub eax,ebx
  202. js _none_to_do_a
  203. cmp eax,_window_height
  204. jbe _ok_to_do_a
  205. mov eax,_window_height
  206. _ok_to_do_a:
  207. mov _loop_count,eax
  208. ; edi destination pixel pointer
  209. mov ebx,_fx_u
  210. mov ecx,_fx_du_dx
  211. mov edx,_fx_dv_dx
  212. mov ebp,_fx_v
  213. shl ebx,16-width_log_2
  214. shl ebp,16-height_log_2
  215. shl edx,16-height_log_2
  216. shl ecx,16-width_log_2
  217. ; eax work
  218. ; ebx u
  219. ; ecx du_dx
  220. ; edx dv_dx
  221. ; ebp v
  222. ; esi read address
  223. ; edi write address
  224. _size_a = (_end1_a - _start1_a)/num_iters
  225. mov eax,num_iters-1
  226. sub eax,_loop_count
  227. jns j_eax_ok1_a
  228. inc eax ; sort of a hack, but we can get -1 here and want to be graceful
  229. jns j_eax_ok1_a ; if we jump, we had -1, which is kind of ok, if not, we int 3
  230. int 3 ; oops, going to jump behind _start1, very bad...
  231. sub eax,eax ; ok to continue
  232. j_eax_ok1_a: imul eax,eax,dword ptr _size_a
  233. add eax,offset _start1_a
  234. jmp eax
  235. align 4
  236. _start1_a:
  237. ; "OPTIMIZATIONS" maybe not worth making
  238. ; Getting rid of the esi from the mov al,[esi+eax] instruction.
  239. ; This would require moving into eax at the top of the loop, rather than doing the sub eax,eax.
  240. ; You would have to align your bitmaps so that the two shlds would create the proper base address.
  241. ; In other words, your bitmap data would have to begin at 4096x (for 64x64 bitmaps).
  242. ; I did timings without converting the sub to a mov eax,esi and setting esi to the proper value.
  243. ; There was a speedup of about 1% to 1.5% without converting the sub to a mov.
  244. ; Getting rid of the edi by doing a mov nnnn[edi],al instead of mov [edi],al.
  245. ; The problem with this is you would have a dword offset for nnnn. My timings indicate it is slower. (I think.)
  246. ; Combining u,v and du,dv into single longwords.
  247. ; The problem with this is you then must do a 16 bit operation to extract them, and you don't have enough
  248. ; instructions to separate a destination operand from being used by the next instruction. It shaves out one
  249. ; register instruction (an add reg,reg), but adds a 16 bit operation, and the setup is more complicated.
  250. ; usage:
  251. ; eax work
  252. ; ebx u coordinate
  253. ; ecx delta u
  254. ; edx delta v
  255. ; ebp v coordinate
  256. ; esi pointer to source bitmap
  257. ; edi write address
  258. rept num_iters
  259. mov eax,ebp ; clear for
  260. add ebp,edx ; update v coordinate
  261. shr eax,32-height_log_2 ; shift in v coordinate
  262. shld eax,ebx,width_log_2 ; shift in u coordinate while shifting up v coordinate
  263. add ebx,ecx ; update u coordinate
  264. mov al,[esi+eax] ; get pixel from source bitmap
  265. mov [edi],al
  266. add edi,_bytes_per_row
  267. endm
  268. _end1_a:
  269. _none_to_do_a: popa
  270. ret
  271. _TEXT ends
  272. end