math.s 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419
  1. /*
  2. Copyright (C) 1996-1997 Id Software, Inc.
  3. This program is free software; you can redistribute it and/or
  4. modify it under the terms of the GNU General Public License
  5. as published by the Free Software Foundation; either version 2
  6. of the License, or (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. See the GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program; if not, write to the Free Software
  13. Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  14. */
  15. //
  16. // math.s
  17. // x86 assembly-language math routines.
  18. #define GLQUAKE 1 // don't include unneeded defs
  19. #include "asm_i386.h"
  20. #include "quakeasm.h"
  21. #if id386
  22. .data
  23. .align 4
  24. Ljmptab: .long Lcase0, Lcase1, Lcase2, Lcase3
  25. .long Lcase4, Lcase5, Lcase6, Lcase7
  26. .text
  27. // TODO: rounding needed?
  28. // stack parameter offset
  29. #define val 4
  30. .globl C(Invert24To16)
  31. C(Invert24To16):
  32. movl val(%esp),%ecx
  33. movl $0x100,%edx // 0x10000000000 as dividend
  34. cmpl %edx,%ecx
  35. jle LOutOfRange
  36. subl %eax,%eax
  37. divl %ecx
  38. ret
  39. LOutOfRange:
  40. movl $0xFFFFFFFF,%eax
  41. ret
  42. #define in 4
  43. #define out 8
  44. .align 2
  45. .globl C(TransformVector)
  46. C(TransformVector):
  47. movl in(%esp),%eax
  48. movl out(%esp),%edx
  49. flds (%eax) // in[0]
  50. fmuls C(vright) // in[0]*vright[0]
  51. flds (%eax) // in[0] | in[0]*vright[0]
  52. fmuls C(vup) // in[0]*vup[0] | in[0]*vright[0]
  53. flds (%eax) // in[0] | in[0]*vup[0] | in[0]*vright[0]
  54. fmuls C(vpn) // in[0]*vpn[0] | in[0]*vup[0] | in[0]*vright[0]
  55. flds 4(%eax) // in[1] | ...
  56. fmuls C(vright)+4 // in[1]*vright[1] | ...
  57. flds 4(%eax) // in[1] | in[1]*vright[1] | ...
  58. fmuls C(vup)+4 // in[1]*vup[1] | in[1]*vright[1] | ...
  59. flds 4(%eax) // in[1] | in[1]*vup[1] | in[1]*vright[1] | ...
  60. fmuls C(vpn)+4 // in[1]*vpn[1] | in[1]*vup[1] | in[1]*vright[1] | ...
  61. fxch %st(2) // in[1]*vright[1] | in[1]*vup[1] | in[1]*vpn[1] | ...
  62. faddp %st(0),%st(5) // in[1]*vup[1] | in[1]*vpn[1] | ...
  63. faddp %st(0),%st(3) // in[1]*vpn[1] | ...
  64. faddp %st(0),%st(1) // vpn_accum | vup_accum | vright_accum
  65. flds 8(%eax) // in[2] | ...
  66. fmuls C(vright)+8 // in[2]*vright[2] | ...
  67. flds 8(%eax) // in[2] | in[2]*vright[2] | ...
  68. fmuls C(vup)+8 // in[2]*vup[2] | in[2]*vright[2] | ...
  69. flds 8(%eax) // in[2] | in[2]*vup[2] | in[2]*vright[2] | ...
  70. fmuls C(vpn)+8 // in[2]*vpn[2] | in[2]*vup[2] | in[2]*vright[2] | ...
  71. fxch %st(2) // in[2]*vright[2] | in[2]*vup[2] | in[2]*vpn[2] | ...
  72. faddp %st(0),%st(5) // in[2]*vup[2] | in[2]*vpn[2] | ...
  73. faddp %st(0),%st(3) // in[2]*vpn[2] | ...
  74. faddp %st(0),%st(1) // vpn_accum | vup_accum | vright_accum
  75. fstps 8(%edx) // out[2]
  76. fstps 4(%edx) // out[1]
  77. fstps (%edx) // out[0]
  78. ret
  79. #define EMINS 4+4
  80. #define EMAXS 4+8
  81. #define P 4+12
  82. .align 2
  83. .globl C(BoxOnPlaneSide)
  84. C(BoxOnPlaneSide):
  85. pushl %ebx
  86. movl P(%esp),%edx
  87. movl EMINS(%esp),%ecx
  88. xorl %eax,%eax
  89. movl EMAXS(%esp),%ebx
  90. movb pl_signbits(%edx),%al
  91. cmpb $8,%al
  92. jge Lerror
  93. flds pl_normal(%edx) // p->normal[0]
  94. fld %st(0) // p->normal[0] | p->normal[0]
  95. jmp Ljmptab(,%eax,4)
  96. //dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
  97. //dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
  98. Lcase0:
  99. fmuls (%ebx) // p->normal[0]*emaxs[0] | p->normal[0]
  100. flds pl_normal+4(%edx) // p->normal[1] | p->normal[0]*emaxs[0] |
  101. // p->normal[0]
  102. fxch %st(2) // p->normal[0] | p->normal[0]*emaxs[0] |
  103. // p->normal[1]
  104. fmuls (%ecx) // p->normal[0]*emins[0] |
  105. // p->normal[0]*emaxs[0] | p->normal[1]
  106. fxch %st(2) // p->normal[1] | p->normal[0]*emaxs[0] |
  107. // p->normal[0]*emins[0]
  108. fld %st(0) // p->normal[1] | p->normal[1] |
  109. // p->normal[0]*emaxs[0] |
  110. // p->normal[0]*emins[0]
  111. fmuls 4(%ebx) // p->normal[1]*emaxs[1] | p->normal[1] |
  112. // p->normal[0]*emaxs[0] |
  113. // p->normal[0]*emins[0]
  114. flds pl_normal+8(%edx) // p->normal[2] | p->normal[1]*emaxs[1] |
  115. // p->normal[1] | p->normal[0]*emaxs[0] |
  116. // p->normal[0]*emins[0]
  117. fxch %st(2) // p->normal[1] | p->normal[1]*emaxs[1] |
  118. // p->normal[2] | p->normal[0]*emaxs[0] |
  119. // p->normal[0]*emins[0]
  120. fmuls 4(%ecx) // p->normal[1]*emins[1] |
  121. // p->normal[1]*emaxs[1] |
  122. // p->normal[2] | p->normal[0]*emaxs[0] |
  123. // p->normal[0]*emins[0]
  124. fxch %st(2) // p->normal[2] | p->normal[1]*emaxs[1] |
  125. // p->normal[1]*emins[1] |
  126. // p->normal[0]*emaxs[0] |
  127. // p->normal[0]*emins[0]
  128. fld %st(0) // p->normal[2] | p->normal[2] |
  129. // p->normal[1]*emaxs[1] |
  130. // p->normal[1]*emins[1] |
  131. // p->normal[0]*emaxs[0] |
  132. // p->normal[0]*emins[0]
  133. fmuls 8(%ebx) // p->normal[2]*emaxs[2] |
  134. // p->normal[2] |
  135. // p->normal[1]*emaxs[1] |
  136. // p->normal[1]*emins[1] |
  137. // p->normal[0]*emaxs[0] |
  138. // p->normal[0]*emins[0]
  139. fxch %st(5) // p->normal[0]*emins[0] |
  140. // p->normal[2] |
  141. // p->normal[1]*emaxs[1] |
  142. // p->normal[1]*emins[1] |
  143. // p->normal[0]*emaxs[0] |
  144. // p->normal[2]*emaxs[2]
  145. faddp %st(0),%st(3) //p->normal[2] |
  146. // p->normal[1]*emaxs[1] |
  147. // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
  148. // p->normal[0]*emaxs[0] |
  149. // p->normal[2]*emaxs[2]
  150. fmuls 8(%ecx) //p->normal[2]*emins[2] |
  151. // p->normal[1]*emaxs[1] |
  152. // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
  153. // p->normal[0]*emaxs[0] |
  154. // p->normal[2]*emaxs[2]
  155. fxch %st(1) //p->normal[1]*emaxs[1] |
  156. // p->normal[2]*emins[2] |
  157. // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
  158. // p->normal[0]*emaxs[0] |
  159. // p->normal[2]*emaxs[2]
  160. faddp %st(0),%st(3) //p->normal[2]*emins[2] |
  161. // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
  162. // p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]|
  163. // p->normal[2]*emaxs[2]
  164. fxch %st(3) //p->normal[2]*emaxs[2] +
  165. // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
  166. // p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]|
  167. // p->normal[2]*emins[2]
  168. faddp %st(0),%st(2) //p->normal[1]*emins[1]+p->normal[0]*emins[0]|
  169. // dist1 | p->normal[2]*emins[2]
  170. jmp LSetSides
  171. //dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
  172. //dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
  173. Lcase1:
  174. fmuls (%ecx) // emins[0]
  175. flds pl_normal+4(%edx)
  176. fxch %st(2)
  177. fmuls (%ebx) // emaxs[0]
  178. fxch %st(2)
  179. fld %st(0)
  180. fmuls 4(%ebx) // emaxs[1]
  181. flds pl_normal+8(%edx)
  182. fxch %st(2)
  183. fmuls 4(%ecx) // emins[1]
  184. fxch %st(2)
  185. fld %st(0)
  186. fmuls 8(%ebx) // emaxs[2]
  187. fxch %st(5)
  188. faddp %st(0),%st(3)
  189. fmuls 8(%ecx) // emins[2]
  190. fxch %st(1)
  191. faddp %st(0),%st(3)
  192. fxch %st(3)
  193. faddp %st(0),%st(2)
  194. jmp LSetSides
  195. //dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
  196. //dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
  197. Lcase2:
  198. fmuls (%ebx) // emaxs[0]
  199. flds pl_normal+4(%edx)
  200. fxch %st(2)
  201. fmuls (%ecx) // emins[0]
  202. fxch %st(2)
  203. fld %st(0)
  204. fmuls 4(%ecx) // emins[1]
  205. flds pl_normal+8(%edx)
  206. fxch %st(2)
  207. fmuls 4(%ebx) // emaxs[1]
  208. fxch %st(2)
  209. fld %st(0)
  210. fmuls 8(%ebx) // emaxs[2]
  211. fxch %st(5)
  212. faddp %st(0),%st(3)
  213. fmuls 8(%ecx) // emins[2]
  214. fxch %st(1)
  215. faddp %st(0),%st(3)
  216. fxch %st(3)
  217. faddp %st(0),%st(2)
  218. jmp LSetSides
  219. //dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
  220. //dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
  221. Lcase3:
  222. fmuls (%ecx) // emins[0]
  223. flds pl_normal+4(%edx)
  224. fxch %st(2)
  225. fmuls (%ebx) // emaxs[0]
  226. fxch %st(2)
  227. fld %st(0)
  228. fmuls 4(%ecx) // emins[1]
  229. flds pl_normal+8(%edx)
  230. fxch %st(2)
  231. fmuls 4(%ebx) // emaxs[1]
  232. fxch %st(2)
  233. fld %st(0)
  234. fmuls 8(%ebx) // emaxs[2]
  235. fxch %st(5)
  236. faddp %st(0),%st(3)
  237. fmuls 8(%ecx) // emins[2]
  238. fxch %st(1)
  239. faddp %st(0),%st(3)
  240. fxch %st(3)
  241. faddp %st(0),%st(2)
  242. jmp LSetSides
  243. //dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
  244. //dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
  245. Lcase4:
  246. fmuls (%ebx) // emaxs[0]
  247. flds pl_normal+4(%edx)
  248. fxch %st(2)
  249. fmuls (%ecx) // emins[0]
  250. fxch %st(2)
  251. fld %st(0)
  252. fmuls 4(%ebx) // emaxs[1]
  253. flds pl_normal+8(%edx)
  254. fxch %st(2)
  255. fmuls 4(%ecx) // emins[1]
  256. fxch %st(2)
  257. fld %st(0)
  258. fmuls 8(%ecx) // emins[2]
  259. fxch %st(5)
  260. faddp %st(0),%st(3)
  261. fmuls 8(%ebx) // emaxs[2]
  262. fxch %st(1)
  263. faddp %st(0),%st(3)
  264. fxch %st(3)
  265. faddp %st(0),%st(2)
  266. jmp LSetSides
  267. //dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
  268. //dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
  269. Lcase5:
  270. fmuls (%ecx) // emins[0]
  271. flds pl_normal+4(%edx)
  272. fxch %st(2)
  273. fmuls (%ebx) // emaxs[0]
  274. fxch %st(2)
  275. fld %st(0)
  276. fmuls 4(%ebx) // emaxs[1]
  277. flds pl_normal+8(%edx)
  278. fxch %st(2)
  279. fmuls 4(%ecx) // emins[1]
  280. fxch %st(2)
  281. fld %st(0)
  282. fmuls 8(%ecx) // emins[2]
  283. fxch %st(5)
  284. faddp %st(0),%st(3)
  285. fmuls 8(%ebx) // emaxs[2]
  286. fxch %st(1)
  287. faddp %st(0),%st(3)
  288. fxch %st(3)
  289. faddp %st(0),%st(2)
  290. jmp LSetSides
  291. //dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
  292. //dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
  293. Lcase6:
  294. fmuls (%ebx) // emaxs[0]
  295. flds pl_normal+4(%edx)
  296. fxch %st(2)
  297. fmuls (%ecx) // emins[0]
  298. fxch %st(2)
  299. fld %st(0)
  300. fmuls 4(%ecx) // emins[1]
  301. flds pl_normal+8(%edx)
  302. fxch %st(2)
  303. fmuls 4(%ebx) // emaxs[1]
  304. fxch %st(2)
  305. fld %st(0)
  306. fmuls 8(%ecx) // emins[2]
  307. fxch %st(5)
  308. faddp %st(0),%st(3)
  309. fmuls 8(%ebx) // emaxs[2]
  310. fxch %st(1)
  311. faddp %st(0),%st(3)
  312. fxch %st(3)
  313. faddp %st(0),%st(2)
  314. jmp LSetSides
  315. //dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
  316. //dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
  317. Lcase7:
  318. fmuls (%ecx) // emins[0]
  319. flds pl_normal+4(%edx)
  320. fxch %st(2)
  321. fmuls (%ebx) // emaxs[0]
  322. fxch %st(2)
  323. fld %st(0)
  324. fmuls 4(%ecx) // emins[1]
  325. flds pl_normal+8(%edx)
  326. fxch %st(2)
  327. fmuls 4(%ebx) // emaxs[1]
  328. fxch %st(2)
  329. fld %st(0)
  330. fmuls 8(%ecx) // emins[2]
  331. fxch %st(5)
  332. faddp %st(0),%st(3)
  333. fmuls 8(%ebx) // emaxs[2]
  334. fxch %st(1)
  335. faddp %st(0),%st(3)
  336. fxch %st(3)
  337. faddp %st(0),%st(2)
  338. LSetSides:
  339. // sides = 0;
  340. // if (dist1 >= p->dist)
  341. // sides = 1;
  342. // if (dist2 < p->dist)
  343. // sides |= 2;
  344. faddp %st(0),%st(2) // dist1 | dist2
  345. fcomps pl_dist(%edx)
  346. xorl %ecx,%ecx
  347. fnstsw %ax
  348. fcomps pl_dist(%edx)
  349. andb $1,%ah
  350. xorb $1,%ah
  351. addb %ah,%cl
  352. fnstsw %ax
  353. andb $1,%ah
  354. addb %ah,%ah
  355. addb %ah,%cl
  356. // return sides;
  357. popl %ebx
  358. movl %ecx,%eax // return status
  359. ret
  360. Lerror:
  361. call C(BOPS_Error)
  362. #endif // id386