math.s 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. /*
  2. Copyright (C) 1996-1997 Id Software, Inc.
  3. This program is free software; you can redistribute it and/or
  4. modify it under the terms of the GNU General Public License
  5. as published by the Free Software Foundation; either version 2
  6. of the License, or (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. See the GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program; if not, write to the Free Software
  13. Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  14. */
  15. //
  16. // math.s
  17. // x86 assembly-language math routines.
  18. #include "asm_i386.h"
  19. #include "quakeasm.h"
  20. #if id386
  21. .data
  22. .align 4
  23. Ljmptab: .long Lcase0, Lcase1, Lcase2, Lcase3
  24. .long Lcase4, Lcase5, Lcase6, Lcase7
  25. .text
  26. // TODO: rounding needed?
  27. // stack parameter offset
  28. #define val 4
  29. .globl C(Invert24To16)
  30. C(Invert24To16):
  31. movl val(%esp),%ecx
  32. movl $0x100,%edx // 0x10000000000 as dividend
  33. cmpl %edx,%ecx
  34. jle LOutOfRange
  35. subl %eax,%eax
  36. divl %ecx
  37. ret
  38. LOutOfRange:
  39. movl $0xFFFFFFFF,%eax
  40. ret
  41. #define in 4
  42. #define out 8
  43. .align 2
  44. .globl C(TransformVector)
  45. C(TransformVector):
  46. movl in(%esp),%eax
  47. movl out(%esp),%edx
  48. flds (%eax) // in[0]
  49. fmuls C(vright) // in[0]*vright[0]
  50. flds (%eax) // in[0] | in[0]*vright[0]
  51. fmuls C(vup) // in[0]*vup[0] | in[0]*vright[0]
  52. flds (%eax) // in[0] | in[0]*vup[0] | in[0]*vright[0]
  53. fmuls C(vpn) // in[0]*vpn[0] | in[0]*vup[0] | in[0]*vright[0]
  54. flds 4(%eax) // in[1] | ...
  55. fmuls C(vright)+4 // in[1]*vright[1] | ...
  56. flds 4(%eax) // in[1] | in[1]*vright[1] | ...
  57. fmuls C(vup)+4 // in[1]*vup[1] | in[1]*vright[1] | ...
  58. flds 4(%eax) // in[1] | in[1]*vup[1] | in[1]*vright[1] | ...
  59. fmuls C(vpn)+4 // in[1]*vpn[1] | in[1]*vup[1] | in[1]*vright[1] | ...
  60. fxch %st(2) // in[1]*vright[1] | in[1]*vup[1] | in[1]*vpn[1] | ...
  61. faddp %st(0),%st(5) // in[1]*vup[1] | in[1]*vpn[1] | ...
  62. faddp %st(0),%st(3) // in[1]*vpn[1] | ...
  63. faddp %st(0),%st(1) // vpn_accum | vup_accum | vright_accum
  64. flds 8(%eax) // in[2] | ...
  65. fmuls C(vright)+8 // in[2]*vright[2] | ...
  66. flds 8(%eax) // in[2] | in[2]*vright[2] | ...
  67. fmuls C(vup)+8 // in[2]*vup[2] | in[2]*vright[2] | ...
  68. flds 8(%eax) // in[2] | in[2]*vup[2] | in[2]*vright[2] | ...
  69. fmuls C(vpn)+8 // in[2]*vpn[2] | in[2]*vup[2] | in[2]*vright[2] | ...
  70. fxch %st(2) // in[2]*vright[2] | in[2]*vup[2] | in[2]*vpn[2] | ...
  71. faddp %st(0),%st(5) // in[2]*vup[2] | in[2]*vpn[2] | ...
  72. faddp %st(0),%st(3) // in[2]*vpn[2] | ...
  73. faddp %st(0),%st(1) // vpn_accum | vup_accum | vright_accum
  74. fstps 8(%edx) // out[2]
  75. fstps 4(%edx) // out[1]
  76. fstps (%edx) // out[0]
  77. ret
  78. #define EMINS 4+4
  79. #define EMAXS 4+8
  80. #define P 4+12
  81. .align 2
  82. .globl C(BoxOnPlaneSide)
  83. C(BoxOnPlaneSide):
  84. pushl %ebx
  85. movl P(%esp),%edx
  86. movl EMINS(%esp),%ecx
  87. xorl %eax,%eax
  88. movl EMAXS(%esp),%ebx
  89. movb pl_signbits(%edx),%al
  90. cmpb $8,%al
  91. jge Lerror
  92. flds pl_normal(%edx) // p->normal[0]
  93. fld %st(0) // p->normal[0] | p->normal[0]
  94. jmp Ljmptab(,%eax,4)
  95. //dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
  96. //dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
  97. Lcase0:
  98. fmuls (%ebx) // p->normal[0]*emaxs[0] | p->normal[0]
  99. flds pl_normal+4(%edx) // p->normal[1] | p->normal[0]*emaxs[0] |
  100. // p->normal[0]
  101. fxch %st(2) // p->normal[0] | p->normal[0]*emaxs[0] |
  102. // p->normal[1]
  103. fmuls (%ecx) // p->normal[0]*emins[0] |
  104. // p->normal[0]*emaxs[0] | p->normal[1]
  105. fxch %st(2) // p->normal[1] | p->normal[0]*emaxs[0] |
  106. // p->normal[0]*emins[0]
  107. fld %st(0) // p->normal[1] | p->normal[1] |
  108. // p->normal[0]*emaxs[0] |
  109. // p->normal[0]*emins[0]
  110. fmuls 4(%ebx) // p->normal[1]*emaxs[1] | p->normal[1] |
  111. // p->normal[0]*emaxs[0] |
  112. // p->normal[0]*emins[0]
  113. flds pl_normal+8(%edx) // p->normal[2] | p->normal[1]*emaxs[1] |
  114. // p->normal[1] | p->normal[0]*emaxs[0] |
  115. // p->normal[0]*emins[0]
  116. fxch %st(2) // p->normal[1] | p->normal[1]*emaxs[1] |
  117. // p->normal[2] | p->normal[0]*emaxs[0] |
  118. // p->normal[0]*emins[0]
  119. fmuls 4(%ecx) // p->normal[1]*emins[1] |
  120. // p->normal[1]*emaxs[1] |
  121. // p->normal[2] | p->normal[0]*emaxs[0] |
  122. // p->normal[0]*emins[0]
  123. fxch %st(2) // p->normal[2] | p->normal[1]*emaxs[1] |
  124. // p->normal[1]*emins[1] |
  125. // p->normal[0]*emaxs[0] |
  126. // p->normal[0]*emins[0]
  127. fld %st(0) // p->normal[2] | p->normal[2] |
  128. // p->normal[1]*emaxs[1] |
  129. // p->normal[1]*emins[1] |
  130. // p->normal[0]*emaxs[0] |
  131. // p->normal[0]*emins[0]
  132. fmuls 8(%ebx) // p->normal[2]*emaxs[2] |
  133. // p->normal[2] |
  134. // p->normal[1]*emaxs[1] |
  135. // p->normal[1]*emins[1] |
  136. // p->normal[0]*emaxs[0] |
  137. // p->normal[0]*emins[0]
  138. fxch %st(5) // p->normal[0]*emins[0] |
  139. // p->normal[2] |
  140. // p->normal[1]*emaxs[1] |
  141. // p->normal[1]*emins[1] |
  142. // p->normal[0]*emaxs[0] |
  143. // p->normal[2]*emaxs[2]
  144. faddp %st(0),%st(3) //p->normal[2] |
  145. // p->normal[1]*emaxs[1] |
  146. // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
  147. // p->normal[0]*emaxs[0] |
  148. // p->normal[2]*emaxs[2]
  149. fmuls 8(%ecx) //p->normal[2]*emins[2] |
  150. // p->normal[1]*emaxs[1] |
  151. // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
  152. // p->normal[0]*emaxs[0] |
  153. // p->normal[2]*emaxs[2]
  154. fxch %st(1) //p->normal[1]*emaxs[1] |
  155. // p->normal[2]*emins[2] |
  156. // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
  157. // p->normal[0]*emaxs[0] |
  158. // p->normal[2]*emaxs[2]
  159. faddp %st(0),%st(3) //p->normal[2]*emins[2] |
  160. // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
  161. // p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]|
  162. // p->normal[2]*emaxs[2]
  163. fxch %st(3) //p->normal[2]*emaxs[2] +
  164. // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
  165. // p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]|
  166. // p->normal[2]*emins[2]
  167. faddp %st(0),%st(2) //p->normal[1]*emins[1]+p->normal[0]*emins[0]|
  168. // dist1 | p->normal[2]*emins[2]
  169. jmp LSetSides
  170. //dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
  171. //dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
  172. Lcase1:
  173. fmuls (%ecx) // emins[0]
  174. flds pl_normal+4(%edx)
  175. fxch %st(2)
  176. fmuls (%ebx) // emaxs[0]
  177. fxch %st(2)
  178. fld %st(0)
  179. fmuls 4(%ebx) // emaxs[1]
  180. flds pl_normal+8(%edx)
  181. fxch %st(2)
  182. fmuls 4(%ecx) // emins[1]
  183. fxch %st(2)
  184. fld %st(0)
  185. fmuls 8(%ebx) // emaxs[2]
  186. fxch %st(5)
  187. faddp %st(0),%st(3)
  188. fmuls 8(%ecx) // emins[2]
  189. fxch %st(1)
  190. faddp %st(0),%st(3)
  191. fxch %st(3)
  192. faddp %st(0),%st(2)
  193. jmp LSetSides
  194. //dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
  195. //dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
  196. Lcase2:
  197. fmuls (%ebx) // emaxs[0]
  198. flds pl_normal+4(%edx)
  199. fxch %st(2)
  200. fmuls (%ecx) // emins[0]
  201. fxch %st(2)
  202. fld %st(0)
  203. fmuls 4(%ecx) // emins[1]
  204. flds pl_normal+8(%edx)
  205. fxch %st(2)
  206. fmuls 4(%ebx) // emaxs[1]
  207. fxch %st(2)
  208. fld %st(0)
  209. fmuls 8(%ebx) // emaxs[2]
  210. fxch %st(5)
  211. faddp %st(0),%st(3)
  212. fmuls 8(%ecx) // emins[2]
  213. fxch %st(1)
  214. faddp %st(0),%st(3)
  215. fxch %st(3)
  216. faddp %st(0),%st(2)
  217. jmp LSetSides
  218. //dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
  219. //dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
  220. Lcase3:
  221. fmuls (%ecx) // emins[0]
  222. flds pl_normal+4(%edx)
  223. fxch %st(2)
  224. fmuls (%ebx) // emaxs[0]
  225. fxch %st(2)
  226. fld %st(0)
  227. fmuls 4(%ecx) // emins[1]
  228. flds pl_normal+8(%edx)
  229. fxch %st(2)
  230. fmuls 4(%ebx) // emaxs[1]
  231. fxch %st(2)
  232. fld %st(0)
  233. fmuls 8(%ebx) // emaxs[2]
  234. fxch %st(5)
  235. faddp %st(0),%st(3)
  236. fmuls 8(%ecx) // emins[2]
  237. fxch %st(1)
  238. faddp %st(0),%st(3)
  239. fxch %st(3)
  240. faddp %st(0),%st(2)
  241. jmp LSetSides
  242. //dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
  243. //dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
  244. Lcase4:
  245. fmuls (%ebx) // emaxs[0]
  246. flds pl_normal+4(%edx)
  247. fxch %st(2)
  248. fmuls (%ecx) // emins[0]
  249. fxch %st(2)
  250. fld %st(0)
  251. fmuls 4(%ebx) // emaxs[1]
  252. flds pl_normal+8(%edx)
  253. fxch %st(2)
  254. fmuls 4(%ecx) // emins[1]
  255. fxch %st(2)
  256. fld %st(0)
  257. fmuls 8(%ecx) // emins[2]
  258. fxch %st(5)
  259. faddp %st(0),%st(3)
  260. fmuls 8(%ebx) // emaxs[2]
  261. fxch %st(1)
  262. faddp %st(0),%st(3)
  263. fxch %st(3)
  264. faddp %st(0),%st(2)
  265. jmp LSetSides
  266. //dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
  267. //dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
  268. Lcase5:
  269. fmuls (%ecx) // emins[0]
  270. flds pl_normal+4(%edx)
  271. fxch %st(2)
  272. fmuls (%ebx) // emaxs[0]
  273. fxch %st(2)
  274. fld %st(0)
  275. fmuls 4(%ebx) // emaxs[1]
  276. flds pl_normal+8(%edx)
  277. fxch %st(2)
  278. fmuls 4(%ecx) // emins[1]
  279. fxch %st(2)
  280. fld %st(0)
  281. fmuls 8(%ecx) // emins[2]
  282. fxch %st(5)
  283. faddp %st(0),%st(3)
  284. fmuls 8(%ebx) // emaxs[2]
  285. fxch %st(1)
  286. faddp %st(0),%st(3)
  287. fxch %st(3)
  288. faddp %st(0),%st(2)
  289. jmp LSetSides
  290. //dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
  291. //dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
  292. Lcase6:
  293. fmuls (%ebx) // emaxs[0]
  294. flds pl_normal+4(%edx)
  295. fxch %st(2)
  296. fmuls (%ecx) // emins[0]
  297. fxch %st(2)
  298. fld %st(0)
  299. fmuls 4(%ecx) // emins[1]
  300. flds pl_normal+8(%edx)
  301. fxch %st(2)
  302. fmuls 4(%ebx) // emaxs[1]
  303. fxch %st(2)
  304. fld %st(0)
  305. fmuls 8(%ecx) // emins[2]
  306. fxch %st(5)
  307. faddp %st(0),%st(3)
  308. fmuls 8(%ebx) // emaxs[2]
  309. fxch %st(1)
  310. faddp %st(0),%st(3)
  311. fxch %st(3)
  312. faddp %st(0),%st(2)
  313. jmp LSetSides
  314. //dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
  315. //dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
  316. Lcase7:
  317. fmuls (%ecx) // emins[0]
  318. flds pl_normal+4(%edx)
  319. fxch %st(2)
  320. fmuls (%ebx) // emaxs[0]
  321. fxch %st(2)
  322. fld %st(0)
  323. fmuls 4(%ecx) // emins[1]
  324. flds pl_normal+8(%edx)
  325. fxch %st(2)
  326. fmuls 4(%ebx) // emaxs[1]
  327. fxch %st(2)
  328. fld %st(0)
  329. fmuls 8(%ecx) // emins[2]
  330. fxch %st(5)
  331. faddp %st(0),%st(3)
  332. fmuls 8(%ebx) // emaxs[2]
  333. fxch %st(1)
  334. faddp %st(0),%st(3)
  335. fxch %st(3)
  336. faddp %st(0),%st(2)
  337. LSetSides:
  338. // sides = 0;
  339. // if (dist1 >= p->dist)
  340. // sides = 1;
  341. // if (dist2 < p->dist)
  342. // sides |= 2;
  343. faddp %st(0),%st(2) // dist1 | dist2
  344. fcomps pl_dist(%edx)
  345. xorl %ecx,%ecx
  346. fnstsw %ax
  347. fcomps pl_dist(%edx)
  348. andb $1,%ah
  349. xorb $1,%ah
  350. addb %ah,%cl
  351. fnstsw %ax
  352. andb $1,%ah
  353. addb %ah,%ah
  354. addb %ah,%cl
  355. // return sides;
  356. popl %ebx
  357. movl %ecx,%eax // return status
  358. ret
  359. Lerror:
  360. call C(BOPS_Error)
  361. #endif // id386