div64.S 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. /*
  2. * linux/arch/arm/lib/div64.S
  3. *
  4. * Optimized computation of 64-bit dividend / 32-bit divisor
  5. *
  6. * Author: Nicolas Pitre
  7. * Created: Oct 5, 2003
  8. * Copyright: Monta Vista Software, Inc.
  9. *
  10. * This program is free software; you can redistribute it and/or modify
  11. * it under the terms of the GNU General Public License version 2 as
  12. * published by the Free Software Foundation.
  13. */
  14. #include <linux/linkage.h>
  15. #include <asm/assembler.h>
  16. #include <asm/unwind.h>
  17. #ifdef __ARMEB__
  18. #define xh r0
  19. #define xl r1
  20. #define yh r2
  21. #define yl r3
  22. #else
  23. #define xl r0
  24. #define xh r1
  25. #define yl r2
  26. #define yh r3
  27. #endif
  28. /*
  29. * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
  30. *
  31. * Note: Calling convention is totally non standard for optimal code.
  32. * This is meant to be used by do_div() from include/asm/div64.h only.
  33. *
  34. * Input parameters:
  35. * xh-xl = dividend (clobbered)
  36. * r4 = divisor (preserved)
  37. *
  38. * Output values:
  39. * yh-yl = result
  40. * xh = remainder
  41. *
  42. * Clobbered regs: xl, ip
  43. */
  44. ENTRY(__do_div64)
  45. UNWIND(.fnstart)
  46. @ Test for easy paths first.
  47. subs ip, r4, #1
  48. bls 9f @ divisor is 0 or 1
  49. tst ip, r4
  50. beq 8f @ divisor is power of 2
  51. @ See if we need to handle upper 32-bit result.
  52. cmp xh, r4
  53. mov yh, #0
  54. blo 3f
  55. @ Align divisor with upper part of dividend.
  56. @ The aligned divisor is stored in yl preserving the original.
  57. @ The bit position is stored in ip.
  58. #if __LINUX_ARM_ARCH__ >= 5
  59. clz yl, r4
  60. clz ip, xh
  61. sub yl, yl, ip
  62. mov ip, #1
  63. mov ip, ip, lsl yl
  64. mov yl, r4, lsl yl
  65. #else
  66. mov yl, r4
  67. mov ip, #1
  68. 1: cmp yl, #0x80000000
  69. cmpcc yl, xh
  70. movcc yl, yl, lsl #1
  71. movcc ip, ip, lsl #1
  72. bcc 1b
  73. #endif
  74. @ The division loop for needed upper bit positions.
  75. @ Break out early if dividend reaches 0.
  76. 2: cmp xh, yl
  77. orrcs yh, yh, ip
  78. subcss xh, xh, yl
  79. movnes ip, ip, lsr #1
  80. mov yl, yl, lsr #1
  81. bne 2b
  82. @ See if we need to handle lower 32-bit result.
  83. 3: cmp xh, #0
  84. mov yl, #0
  85. cmpeq xl, r4
  86. movlo xh, xl
  87. retlo lr
  88. @ The division loop for lower bit positions.
  89. @ Here we shift remainer bits leftwards rather than moving the
  90. @ divisor for comparisons, considering the carry-out bit as well.
  91. mov ip, #0x80000000
  92. 4: movs xl, xl, lsl #1
  93. adcs xh, xh, xh
  94. beq 6f
  95. cmpcc xh, r4
  96. 5: orrcs yl, yl, ip
  97. subcs xh, xh, r4
  98. movs ip, ip, lsr #1
  99. bne 4b
  100. ret lr
  101. @ The top part of remainder became zero. If carry is set
  102. @ (the 33th bit) this is a false positive so resume the loop.
  103. @ Otherwise, if lower part is also null then we are done.
  104. 6: bcs 5b
  105. cmp xl, #0
  106. reteq lr
  107. @ We still have remainer bits in the low part. Bring them up.
  108. #if __LINUX_ARM_ARCH__ >= 5
  109. clz xh, xl @ we know xh is zero here so...
  110. add xh, xh, #1
  111. mov xl, xl, lsl xh
  112. mov ip, ip, lsr xh
  113. #else
  114. 7: movs xl, xl, lsl #1
  115. mov ip, ip, lsr #1
  116. bcc 7b
  117. #endif
  118. @ Current remainder is now 1. It is worthless to compare with
  119. @ divisor at this point since divisor can not be smaller than 3 here.
  120. @ If possible, branch for another shift in the division loop.
  121. @ If no bit position left then we are done.
  122. movs ip, ip, lsr #1
  123. mov xh, #1
  124. bne 4b
  125. ret lr
  126. 8: @ Division by a power of 2: determine what that divisor order is
  127. @ then simply shift values around
  128. #if __LINUX_ARM_ARCH__ >= 5
  129. clz ip, r4
  130. rsb ip, ip, #31
  131. #else
  132. mov yl, r4
  133. cmp r4, #(1 << 16)
  134. mov ip, #0
  135. movhs yl, yl, lsr #16
  136. movhs ip, #16
  137. cmp yl, #(1 << 8)
  138. movhs yl, yl, lsr #8
  139. addhs ip, ip, #8
  140. cmp yl, #(1 << 4)
  141. movhs yl, yl, lsr #4
  142. addhs ip, ip, #4
  143. cmp yl, #(1 << 2)
  144. addhi ip, ip, #3
  145. addls ip, ip, yl, lsr #1
  146. #endif
  147. mov yh, xh, lsr ip
  148. mov yl, xl, lsr ip
  149. rsb ip, ip, #32
  150. ARM( orr yl, yl, xh, lsl ip )
  151. THUMB( lsl xh, xh, ip )
  152. THUMB( orr yl, yl, xh )
  153. mov xh, xl, lsl ip
  154. mov xh, xh, lsr ip
  155. ret lr
  156. @ eq -> division by 1: obvious enough...
  157. 9: moveq yl, xl
  158. moveq yh, xh
  159. moveq xh, #0
  160. reteq lr
  161. UNWIND(.fnend)
  162. UNWIND(.fnstart)
  163. UNWIND(.pad #4)
  164. UNWIND(.save {lr})
  165. Ldiv0_64:
  166. @ Division by 0:
  167. str lr, [sp, #-8]!
  168. bl __div0
  169. @ as wrong as it could be...
  170. mov yl, #0
  171. mov yh, #0
  172. mov xh, #0
  173. ldr pc, [sp], #8
  174. UNWIND(.fnend)
  175. ENDPROC(__do_div64)