polynom_Xsig.S 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. /*---------------------------------------------------------------------------+
  2. | polynomial_Xsig.S |
  3. | |
  4. | Fixed point arithmetic polynomial evaluation. |
  5. | |
  6. | Copyright (C) 1992,1993,1994,1995 |
  7. | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
  8. | Australia. E-mail billm@jacobi.maths.monash.edu.au |
  9. | |
  10. | Call from C as: |
  11. | void polynomial_Xsig(Xsig *accum, unsigned long long x, |
  12. | unsigned long long terms[], int n) |
  13. | |
  14. | Computes: |
  15. | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x |
  16. | and adds the result to the 12 byte Xsig. |
  17. | The terms[] are each 8 bytes, but all computation is performed to 12 byte |
  18. | precision. |
  19. | |
  20. | This function must be used carefully: most overflow of intermediate |
  21. | results is controlled, but overflow of the result is not. |
  22. | |
  23. +---------------------------------------------------------------------------*/
  24. .file "polynomial_Xsig.S"
  25. #include "fpu_emu.h"
  26. #define TERM_SIZE $8
  27. #define SUM_MS -20(%ebp) /* sum ms long */
  28. #define SUM_MIDDLE -24(%ebp) /* sum middle long */
  29. #define SUM_LS -28(%ebp) /* sum ls long */
  30. #define ACCUM_MS -4(%ebp) /* accum ms long */
  31. #define ACCUM_MIDDLE -8(%ebp) /* accum middle long */
  32. #define ACCUM_LS -12(%ebp) /* accum ls long */
  33. #define OVERFLOWED -16(%ebp) /* addition overflow flag */
  34. .text
  35. ENTRY(polynomial_Xsig)
  36. pushl %ebp
  37. movl %esp,%ebp
  38. subl $32,%esp
  39. pushl %esi
  40. pushl %edi
  41. pushl %ebx
  42. movl PARAM2,%esi /* x */
  43. movl PARAM3,%edi /* terms */
  44. movl TERM_SIZE,%eax
  45. mull PARAM4 /* n */
  46. addl %eax,%edi
  47. movl 4(%edi),%edx /* terms[n] */
  48. movl %edx,SUM_MS
  49. movl (%edi),%edx /* terms[n] */
  50. movl %edx,SUM_MIDDLE
  51. xor %eax,%eax
  52. movl %eax,SUM_LS
  53. movb %al,OVERFLOWED
  54. subl TERM_SIZE,%edi
  55. decl PARAM4
  56. js L_accum_done
  57. L_accum_loop:
  58. xor %eax,%eax
  59. movl %eax,ACCUM_MS
  60. movl %eax,ACCUM_MIDDLE
  61. movl SUM_MIDDLE,%eax
  62. mull (%esi) /* x ls long */
  63. movl %edx,ACCUM_LS
  64. movl SUM_MIDDLE,%eax
  65. mull 4(%esi) /* x ms long */
  66. addl %eax,ACCUM_LS
  67. adcl %edx,ACCUM_MIDDLE
  68. adcl $0,ACCUM_MS
  69. movl SUM_MS,%eax
  70. mull (%esi) /* x ls long */
  71. addl %eax,ACCUM_LS
  72. adcl %edx,ACCUM_MIDDLE
  73. adcl $0,ACCUM_MS
  74. movl SUM_MS,%eax
  75. mull 4(%esi) /* x ms long */
  76. addl %eax,ACCUM_MIDDLE
  77. adcl %edx,ACCUM_MS
  78. testb $0xff,OVERFLOWED
  79. jz L_no_overflow
  80. movl (%esi),%eax
  81. addl %eax,ACCUM_MIDDLE
  82. movl 4(%esi),%eax
  83. adcl %eax,ACCUM_MS /* This could overflow too */
  84. L_no_overflow:
  85. /*
  86. * Now put the sum of next term and the accumulator
  87. * into the sum register
  88. */
  89. movl ACCUM_LS,%eax
  90. addl (%edi),%eax /* term ls long */
  91. movl %eax,SUM_LS
  92. movl ACCUM_MIDDLE,%eax
  93. adcl (%edi),%eax /* term ls long */
  94. movl %eax,SUM_MIDDLE
  95. movl ACCUM_MS,%eax
  96. adcl 4(%edi),%eax /* term ms long */
  97. movl %eax,SUM_MS
  98. sbbb %al,%al
  99. movb %al,OVERFLOWED /* Used in the next iteration */
  100. subl TERM_SIZE,%edi
  101. decl PARAM4
  102. jns L_accum_loop
  103. L_accum_done:
  104. movl PARAM1,%edi /* accum */
  105. movl SUM_LS,%eax
  106. addl %eax,(%edi)
  107. movl SUM_MIDDLE,%eax
  108. adcl %eax,4(%edi)
  109. movl SUM_MS,%eax
  110. adcl %eax,8(%edi)
  111. popl %ebx
  112. popl %edi
  113. popl %esi
  114. leave
  115. ret