div_Xsig.S 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366
  1. .file "div_Xsig.S"
  2. /*---------------------------------------------------------------------------+
  3. | div_Xsig.S |
  4. | |
  5. | Division subroutine for 96 bit quantities |
  6. | |
  7. | Copyright (C) 1994,1995 |
  8. | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
  9. | Australia. E-mail billm@jacobi.maths.monash.edu.au |
  10. | |
  11. | |
  12. +---------------------------------------------------------------------------*/
  13. /*---------------------------------------------------------------------------+
  14. | Divide the 96 bit quantity pointed to by a, by that pointed to by b, and |
  15. | put the 96 bit result at the location d. |
  16. | |
  17. | The result may not be accurate to 96 bits. It is intended for use where |
  18. | a result better than 64 bits is required. The result should usually be |
  19. | good to at least 94 bits. |
  20. | The returned result is actually divided by one half. This is done to |
  21. | prevent overflow. |
  22. | |
  23. | .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb -> .dddddddddddd |
  24. | |
  25. | void div_Xsig(Xsig *a, Xsig *b, Xsig *dest) |
  26. | |
  27. +---------------------------------------------------------------------------*/
  28. #include "exception.h"
  29. #include "fpu_emu.h"
  30. #define XsigLL(x) (x)
  31. #define XsigL(x) 4(x)
  32. #define XsigH(x) 8(x)
  33. #ifndef NON_REENTRANT_FPU
  34. /*
  35. Local storage on the stack:
  36. Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
  37. */
  38. #define FPU_accum_3 -4(%ebp)
  39. #define FPU_accum_2 -8(%ebp)
  40. #define FPU_accum_1 -12(%ebp)
  41. #define FPU_accum_0 -16(%ebp)
  42. #define FPU_result_3 -20(%ebp)
  43. #define FPU_result_2 -24(%ebp)
  44. #define FPU_result_1 -28(%ebp)
  45. #else
  46. .data
  47. /*
  48. Local storage in a static area:
  49. Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
  50. */
  51. .align 4,0
  52. FPU_accum_3:
  53. .long 0
  54. FPU_accum_2:
  55. .long 0
  56. FPU_accum_1:
  57. .long 0
  58. FPU_accum_0:
  59. .long 0
  60. FPU_result_3:
  61. .long 0
  62. FPU_result_2:
  63. .long 0
  64. FPU_result_1:
  65. .long 0
  66. #endif /* NON_REENTRANT_FPU */
  67. .text
  68. ENTRY(div_Xsig)
  69. pushl %ebp
  70. movl %esp,%ebp
  71. #ifndef NON_REENTRANT_FPU
  72. subl $28,%esp
  73. #endif /* NON_REENTRANT_FPU */
  74. pushl %esi
  75. pushl %edi
  76. pushl %ebx
  77. movl PARAM1,%esi /* pointer to num */
  78. movl PARAM2,%ebx /* pointer to denom */
  79. #ifdef PARANOID
  80. testl $0x80000000, XsigH(%ebx) /* Divisor */
  81. je L_bugged
  82. #endif /* PARANOID */
  83. /*---------------------------------------------------------------------------+
  84. | Divide: Return arg1/arg2 to arg3. |
  85. | |
  86. | The maximum returned value is (ignoring exponents) |
  87. | .ffffffff ffffffff |
  88. | ------------------ = 1.ffffffff fffffffe |
  89. | .80000000 00000000 |
  90. | and the minimum is |
  91. | .80000000 00000000 |
  92. | ------------------ = .80000000 00000001 (rounded) |
  93. | .ffffffff ffffffff |
  94. | |
  95. +---------------------------------------------------------------------------*/
  96. /* Save extended dividend in local register */
  97. /* Divide by 2 to prevent overflow */
  98. clc
  99. movl XsigH(%esi),%eax
  100. rcrl %eax
  101. movl %eax,FPU_accum_3
  102. movl XsigL(%esi),%eax
  103. rcrl %eax
  104. movl %eax,FPU_accum_2
  105. movl XsigLL(%esi),%eax
  106. rcrl %eax
  107. movl %eax,FPU_accum_1
  108. movl $0,%eax
  109. rcrl %eax
  110. movl %eax,FPU_accum_0
  111. movl FPU_accum_2,%eax /* Get the current num */
  112. movl FPU_accum_3,%edx
  113. /*----------------------------------------------------------------------*/
  114. /* Initialization done.
  115. Do the first 32 bits. */
  116. /* We will divide by a number which is too large */
  117. movl XsigH(%ebx),%ecx
  118. addl $1,%ecx
  119. jnc LFirst_div_not_1
  120. /* here we need to divide by 100000000h,
  121. i.e., no division at all.. */
  122. mov %edx,%eax
  123. jmp LFirst_div_done
  124. LFirst_div_not_1:
  125. divl %ecx /* Divide the numerator by the augmented
  126. denom ms dw */
  127. LFirst_div_done:
  128. movl %eax,FPU_result_3 /* Put the result in the answer */
  129. mull XsigH(%ebx) /* mul by the ms dw of the denom */
  130. subl %eax,FPU_accum_2 /* Subtract from the num local reg */
  131. sbbl %edx,FPU_accum_3
  132. movl FPU_result_3,%eax /* Get the result back */
  133. mull XsigL(%ebx) /* now mul the ls dw of the denom */
  134. subl %eax,FPU_accum_1 /* Subtract from the num local reg */
  135. sbbl %edx,FPU_accum_2
  136. sbbl $0,FPU_accum_3
  137. je LDo_2nd_32_bits /* Must check for non-zero result here */
  138. #ifdef PARANOID
  139. jb L_bugged_1
  140. #endif /* PARANOID */
  141. /* need to subtract another once of the denom */
  142. incl FPU_result_3 /* Correct the answer */
  143. movl XsigL(%ebx),%eax
  144. movl XsigH(%ebx),%edx
  145. subl %eax,FPU_accum_1 /* Subtract from the num local reg */
  146. sbbl %edx,FPU_accum_2
  147. #ifdef PARANOID
  148. sbbl $0,FPU_accum_3
  149. jne L_bugged_1 /* Must check for non-zero result here */
  150. #endif /* PARANOID */
  151. /*----------------------------------------------------------------------*/
  152. /* Half of the main problem is done, there is just a reduced numerator
  153. to handle now.
  154. Work with the second 32 bits, FPU_accum_0 not used from now on */
  155. LDo_2nd_32_bits:
  156. movl FPU_accum_2,%edx /* get the reduced num */
  157. movl FPU_accum_1,%eax
  158. /* need to check for possible subsequent overflow */
  159. cmpl XsigH(%ebx),%edx
  160. jb LDo_2nd_div
  161. ja LPrevent_2nd_overflow
  162. cmpl XsigL(%ebx),%eax
  163. jb LDo_2nd_div
  164. LPrevent_2nd_overflow:
  165. /* The numerator is greater or equal, would cause overflow */
  166. /* prevent overflow */
  167. subl XsigL(%ebx),%eax
  168. sbbl XsigH(%ebx),%edx
  169. movl %edx,FPU_accum_2
  170. movl %eax,FPU_accum_1
  171. incl FPU_result_3 /* Reflect the subtraction in the answer */
  172. #ifdef PARANOID
  173. je L_bugged_2 /* Can't bump the result to 1.0 */
  174. #endif /* PARANOID */
  175. LDo_2nd_div:
  176. cmpl $0,%ecx /* augmented denom msw */
  177. jnz LSecond_div_not_1
  178. /* %ecx == 0, we are dividing by 1.0 */
  179. mov %edx,%eax
  180. jmp LSecond_div_done
  181. LSecond_div_not_1:
  182. divl %ecx /* Divide the numerator by the denom ms dw */
  183. LSecond_div_done:
  184. movl %eax,FPU_result_2 /* Put the result in the answer */
  185. mull XsigH(%ebx) /* mul by the ms dw of the denom */
  186. subl %eax,FPU_accum_1 /* Subtract from the num local reg */
  187. sbbl %edx,FPU_accum_2
  188. #ifdef PARANOID
  189. jc L_bugged_2
  190. #endif /* PARANOID */
  191. movl FPU_result_2,%eax /* Get the result back */
  192. mull XsigL(%ebx) /* now mul the ls dw of the denom */
  193. subl %eax,FPU_accum_0 /* Subtract from the num local reg */
  194. sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */
  195. sbbl $0,FPU_accum_2
  196. #ifdef PARANOID
  197. jc L_bugged_2
  198. #endif /* PARANOID */
  199. jz LDo_3rd_32_bits
  200. #ifdef PARANOID
  201. cmpl $1,FPU_accum_2
  202. jne L_bugged_2
  203. #endif /* PARANOID */
  204. /* need to subtract another once of the denom */
  205. movl XsigL(%ebx),%eax
  206. movl XsigH(%ebx),%edx
  207. subl %eax,FPU_accum_0 /* Subtract from the num local reg */
  208. sbbl %edx,FPU_accum_1
  209. sbbl $0,FPU_accum_2
  210. #ifdef PARANOID
  211. jc L_bugged_2
  212. jne L_bugged_2
  213. #endif /* PARANOID */
  214. addl $1,FPU_result_2 /* Correct the answer */
  215. adcl $0,FPU_result_3
  216. #ifdef PARANOID
  217. jc L_bugged_2 /* Must check for non-zero result here */
  218. #endif /* PARANOID */
  219. /*----------------------------------------------------------------------*/
  220. /* The division is essentially finished here, we just need to perform
  221. tidying operations.
  222. Deal with the 3rd 32 bits */
  223. LDo_3rd_32_bits:
  224. /* We use an approximation for the third 32 bits.
  225. To take account of the 3rd 32 bits of the divisor
  226. (call them del), we subtract del * (a/b) */
  227. movl FPU_result_3,%eax /* a/b */
  228. mull XsigLL(%ebx) /* del */
  229. subl %edx,FPU_accum_1
  230. /* A borrow indicates that the result is negative */
  231. jnb LTest_over
  232. movl XsigH(%ebx),%edx
  233. addl %edx,FPU_accum_1
  234. subl $1,FPU_result_2 /* Adjust the answer */
  235. sbbl $0,FPU_result_3
  236. /* The above addition might not have been enough, check again. */
  237. movl FPU_accum_1,%edx /* get the reduced num */
  238. cmpl XsigH(%ebx),%edx /* denom */
  239. jb LDo_3rd_div
  240. movl XsigH(%ebx),%edx
  241. addl %edx,FPU_accum_1
  242. subl $1,FPU_result_2 /* Adjust the answer */
  243. sbbl $0,FPU_result_3
  244. jmp LDo_3rd_div
  245. LTest_over:
  246. movl FPU_accum_1,%edx /* get the reduced num */
  247. /* need to check for possible subsequent overflow */
  248. cmpl XsigH(%ebx),%edx /* denom */
  249. jb LDo_3rd_div
  250. /* prevent overflow */
  251. subl XsigH(%ebx),%edx
  252. movl %edx,FPU_accum_1
  253. addl $1,FPU_result_2 /* Reflect the subtraction in the answer */
  254. adcl $0,FPU_result_3
  255. LDo_3rd_div:
  256. movl FPU_accum_0,%eax
  257. movl FPU_accum_1,%edx
  258. divl XsigH(%ebx)
  259. movl %eax,FPU_result_1 /* Rough estimate of third word */
  260. movl PARAM3,%esi /* pointer to answer */
  261. movl FPU_result_1,%eax
  262. movl %eax,XsigLL(%esi)
  263. movl FPU_result_2,%eax
  264. movl %eax,XsigL(%esi)
  265. movl FPU_result_3,%eax
  266. movl %eax,XsigH(%esi)
  267. L_exit:
  268. popl %ebx
  269. popl %edi
  270. popl %esi
  271. leave
  272. ret
  273. #ifdef PARANOID
  274. /* The logic is wrong if we got here */
  275. L_bugged:
  276. pushl EX_INTERNAL|0x240
  277. call EXCEPTION
  278. pop %ebx
  279. jmp L_exit
  280. L_bugged_1:
  281. pushl EX_INTERNAL|0x241
  282. call EXCEPTION
  283. pop %ebx
  284. jmp L_exit
  285. L_bugged_2:
  286. pushl EX_INTERNAL|0x242
  287. call EXCEPTION
  288. pop %ebx
  289. jmp L_exit
  290. #endif /* PARANOID */