div64.S 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. /*
  2. * linux/arch/arm/lib/div64.S
  3. *
  4. * Optimized computation of 64-bit dividend / 32-bit divisor
  5. *
  6. * Author: Nicolas Pitre
  7. * Created: Oct 5, 2003
  8. * Copyright: Monta Vista Software, Inc.
  9. *
  10. * This program is free software; you can redistribute it and/or modify
  11. * it under the terms of the GNU General Public License version 2 as
  12. * published by the Free Software Foundation.
  13. */
  14. #include <linux/linkage.h>
  15. #include <asm/unwind.h>
  16. #ifdef __ARMEB__
  17. #define xh r0
  18. #define xl r1
  19. #define yh r2
  20. #define yl r3
  21. #else
  22. #define xl r0
  23. #define xh r1
  24. #define yl r2
  25. #define yh r3
  26. #endif
  27. /*
  28. * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
  29. *
  30. * Note: Calling convention is totally non standard for optimal code.
  31. * This is meant to be used by do_div() from include/asm/div64.h only.
  32. *
  33. * Input parameters:
  34. * xh-xl = dividend (clobbered)
  35. * r4 = divisor (preserved)
  36. *
  37. * Output values:
  38. * yh-yl = result
  39. * xh = remainder
  40. *
  41. * Clobbered regs: xl, ip
  42. */
  43. ENTRY(__do_div64)
  44. UNWIND(.fnstart)
  45. @ Test for easy paths first.
  46. subs ip, r4, #1
  47. bls 9f @ divisor is 0 or 1
  48. tst ip, r4
  49. beq 8f @ divisor is power of 2
  50. @ See if we need to handle upper 32-bit result.
  51. cmp xh, r4
  52. mov yh, #0
  53. blo 3f
  54. @ Align divisor with upper part of dividend.
  55. @ The aligned divisor is stored in yl preserving the original.
  56. @ The bit position is stored in ip.
  57. #if __LINUX_ARM_ARCH__ >= 5
  58. clz yl, r4
  59. clz ip, xh
  60. sub yl, yl, ip
  61. mov ip, #1
  62. mov ip, ip, lsl yl
  63. mov yl, r4, lsl yl
  64. #else
  65. mov yl, r4
  66. mov ip, #1
  67. 1: cmp yl, #0x80000000
  68. cmpcc yl, xh
  69. movcc yl, yl, lsl #1
  70. movcc ip, ip, lsl #1
  71. bcc 1b
  72. #endif
  73. @ The division loop for needed upper bit positions.
  74. @ Break out early if dividend reaches 0.
  75. 2: cmp xh, yl
  76. orrcs yh, yh, ip
  77. subcss xh, xh, yl
  78. movnes ip, ip, lsr #1
  79. mov yl, yl, lsr #1
  80. bne 2b
  81. @ See if we need to handle lower 32-bit result.
  82. 3: cmp xh, #0
  83. mov yl, #0
  84. cmpeq xl, r4
  85. movlo xh, xl
  86. movlo pc, lr
  87. @ The division loop for lower bit positions.
  88. @ Here we shift remainer bits leftwards rather than moving the
  89. @ divisor for comparisons, considering the carry-out bit as well.
  90. mov ip, #0x80000000
  91. 4: movs xl, xl, lsl #1
  92. adcs xh, xh, xh
  93. beq 6f
  94. cmpcc xh, r4
  95. 5: orrcs yl, yl, ip
  96. subcs xh, xh, r4
  97. movs ip, ip, lsr #1
  98. bne 4b
  99. mov pc, lr
  100. @ The top part of remainder became zero. If carry is set
  101. @ (the 33th bit) this is a false positive so resume the loop.
  102. @ Otherwise, if lower part is also null then we are done.
  103. 6: bcs 5b
  104. cmp xl, #0
  105. moveq pc, lr
  106. @ We still have remainer bits in the low part. Bring them up.
  107. #if __LINUX_ARM_ARCH__ >= 5
  108. clz xh, xl @ we know xh is zero here so...
  109. add xh, xh, #1
  110. mov xl, xl, lsl xh
  111. mov ip, ip, lsr xh
  112. #else
  113. 7: movs xl, xl, lsl #1
  114. mov ip, ip, lsr #1
  115. bcc 7b
  116. #endif
  117. @ Current remainder is now 1. It is worthless to compare with
  118. @ divisor at this point since divisor can not be smaller than 3 here.
  119. @ If possible, branch for another shift in the division loop.
  120. @ If no bit position left then we are done.
  121. movs ip, ip, lsr #1
  122. mov xh, #1
  123. bne 4b
  124. mov pc, lr
  125. 8: @ Division by a power of 2: determine what that divisor order is
  126. @ then simply shift values around
  127. #if __LINUX_ARM_ARCH__ >= 5
  128. clz ip, r4
  129. rsb ip, ip, #31
  130. #else
  131. mov yl, r4
  132. cmp r4, #(1 << 16)
  133. mov ip, #0
  134. movhs yl, yl, lsr #16
  135. movhs ip, #16
  136. cmp yl, #(1 << 8)
  137. movhs yl, yl, lsr #8
  138. addhs ip, ip, #8
  139. cmp yl, #(1 << 4)
  140. movhs yl, yl, lsr #4
  141. addhs ip, ip, #4
  142. cmp yl, #(1 << 2)
  143. addhi ip, ip, #3
  144. addls ip, ip, yl, lsr #1
  145. #endif
  146. mov yh, xh, lsr ip
  147. mov yl, xl, lsr ip
  148. rsb ip, ip, #32
  149. ARM( orr yl, yl, xh, lsl ip )
  150. THUMB( lsl xh, xh, ip )
  151. THUMB( orr yl, yl, xh )
  152. mov xh, xl, lsl ip
  153. mov xh, xh, lsr ip
  154. mov pc, lr
  155. @ eq -> division by 1: obvious enough...
  156. 9: moveq yl, xl
  157. moveq yh, xh
  158. moveq xh, #0
  159. moveq pc, lr
  160. UNWIND(.fnend)
  161. UNWIND(.fnstart)
  162. UNWIND(.pad #4)
  163. UNWIND(.save {lr})
  164. Ldiv0_64:
  165. @ Division by 0:
  166. str lr, [sp, #-8]!
  167. bl __div0
  168. @ as wrong as it could be...
  169. mov yl, #0
  170. mov yh, #0
  171. mov xh, #0
  172. ldr pc, [sp], #8
  173. UNWIND(.fnend)
  174. ENDPROC(__do_div64)