lib1funcs.S 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. /*
  2. * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
  3. *
  4. * Author: Nicolas Pitre <nico@fluxnic.net>
  5. * - contributed to gcc-3.4 on Sep 30, 2003
  6. * - adapted for the Linux kernel on Oct 2, 2003
  7. */
  8. /* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
  9. This file is free software; you can redistribute it and/or modify it
  10. under the terms of the GNU General Public License as published by the
  11. Free Software Foundation; either version 2, or (at your option) any
  12. later version.
  13. In addition to the permissions in the GNU General Public License, the
  14. Free Software Foundation gives you unlimited permission to link the
  15. compiled version of this file into combinations with other programs,
  16. and to distribute those combinations without any restriction coming
  17. from the use of this file. (The General Public License restrictions
  18. do apply in other respects; for example, they cover modification of
  19. the file, and distribution when not linked into a combine
  20. executable.)
  21. This file is distributed in the hope that it will be useful, but
  22. WITHOUT ANY WARRANTY; without even the implied warranty of
  23. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  24. General Public License for more details.
  25. You should have received a copy of the GNU General Public License
  26. along with this program; see the file COPYING. If not, write to
  27. the Free Software Foundation, 59 Temple Place - Suite 330,
  28. Boston, MA 02111-1307, USA. */
  29. #include <linux/linkage.h>
  30. #include <asm/assembler.h>
  31. #include <asm/unwind.h>
  32. .macro ARM_DIV_BODY dividend, divisor, result, curbit
  33. #if __LINUX_ARM_ARCH__ >= 5
  34. clz \curbit, \divisor
  35. clz \result, \dividend
  36. sub \result, \curbit, \result
  37. mov \curbit, #1
  38. mov \divisor, \divisor, lsl \result
  39. mov \curbit, \curbit, lsl \result
  40. mov \result, #0
  41. #else
  42. @ Initially shift the divisor left 3 bits if possible,
  43. @ set curbit accordingly. This allows for curbit to be located
  44. @ at the left end of each 4 bit nibbles in the division loop
  45. @ to save one loop in most cases.
  46. tst \divisor, #0xe0000000
  47. moveq \divisor, \divisor, lsl #3
  48. moveq \curbit, #8
  49. movne \curbit, #1
  50. @ Unless the divisor is very big, shift it up in multiples of
  51. @ four bits, since this is the amount of unwinding in the main
  52. @ division loop. Continue shifting until the divisor is
  53. @ larger than the dividend.
  54. 1: cmp \divisor, #0x10000000
  55. cmplo \divisor, \dividend
  56. movlo \divisor, \divisor, lsl #4
  57. movlo \curbit, \curbit, lsl #4
  58. blo 1b
  59. @ For very big divisors, we must shift it a bit at a time, or
  60. @ we will be in danger of overflowing.
  61. 1: cmp \divisor, #0x80000000
  62. cmplo \divisor, \dividend
  63. movlo \divisor, \divisor, lsl #1
  64. movlo \curbit, \curbit, lsl #1
  65. blo 1b
  66. mov \result, #0
  67. #endif
  68. @ Division loop
  69. 1: cmp \dividend, \divisor
  70. subhs \dividend, \dividend, \divisor
  71. orrhs \result, \result, \curbit
  72. cmp \dividend, \divisor, lsr #1
  73. subhs \dividend, \dividend, \divisor, lsr #1
  74. orrhs \result, \result, \curbit, lsr #1
  75. cmp \dividend, \divisor, lsr #2
  76. subhs \dividend, \dividend, \divisor, lsr #2
  77. orrhs \result, \result, \curbit, lsr #2
  78. cmp \dividend, \divisor, lsr #3
  79. subhs \dividend, \dividend, \divisor, lsr #3
  80. orrhs \result, \result, \curbit, lsr #3
  81. cmp \dividend, #0 @ Early termination?
  82. movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
  83. movne \divisor, \divisor, lsr #4
  84. bne 1b
  85. .endm
  86. .macro ARM_DIV2_ORDER divisor, order
  87. #if __LINUX_ARM_ARCH__ >= 5
  88. clz \order, \divisor
  89. rsb \order, \order, #31
  90. #else
  91. cmp \divisor, #(1 << 16)
  92. movhs \divisor, \divisor, lsr #16
  93. movhs \order, #16
  94. movlo \order, #0
  95. cmp \divisor, #(1 << 8)
  96. movhs \divisor, \divisor, lsr #8
  97. addhs \order, \order, #8
  98. cmp \divisor, #(1 << 4)
  99. movhs \divisor, \divisor, lsr #4
  100. addhs \order, \order, #4
  101. cmp \divisor, #(1 << 2)
  102. addhi \order, \order, #3
  103. addls \order, \order, \divisor, lsr #1
  104. #endif
  105. .endm
  106. .macro ARM_MOD_BODY dividend, divisor, order, spare
  107. #if __LINUX_ARM_ARCH__ >= 5
  108. clz \order, \divisor
  109. clz \spare, \dividend
  110. sub \order, \order, \spare
  111. mov \divisor, \divisor, lsl \order
  112. #else
  113. mov \order, #0
  114. @ Unless the divisor is very big, shift it up in multiples of
  115. @ four bits, since this is the amount of unwinding in the main
  116. @ division loop. Continue shifting until the divisor is
  117. @ larger than the dividend.
  118. 1: cmp \divisor, #0x10000000
  119. cmplo \divisor, \dividend
  120. movlo \divisor, \divisor, lsl #4
  121. addlo \order, \order, #4
  122. blo 1b
  123. @ For very big divisors, we must shift it a bit at a time, or
  124. @ we will be in danger of overflowing.
  125. 1: cmp \divisor, #0x80000000
  126. cmplo \divisor, \dividend
  127. movlo \divisor, \divisor, lsl #1
  128. addlo \order, \order, #1
  129. blo 1b
  130. #endif
  131. @ Perform all needed substractions to keep only the reminder.
  132. @ Do comparisons in batch of 4 first.
  133. subs \order, \order, #3 @ yes, 3 is intended here
  134. blt 2f
  135. 1: cmp \dividend, \divisor
  136. subhs \dividend, \dividend, \divisor
  137. cmp \dividend, \divisor, lsr #1
  138. subhs \dividend, \dividend, \divisor, lsr #1
  139. cmp \dividend, \divisor, lsr #2
  140. subhs \dividend, \dividend, \divisor, lsr #2
  141. cmp \dividend, \divisor, lsr #3
  142. subhs \dividend, \dividend, \divisor, lsr #3
  143. cmp \dividend, #1
  144. mov \divisor, \divisor, lsr #4
  145. subges \order, \order, #4
  146. bge 1b
  147. tst \order, #3
  148. teqne \dividend, #0
  149. beq 5f
  150. @ Either 1, 2 or 3 comparison/substractions are left.
  151. 2: cmn \order, #2
  152. blt 4f
  153. beq 3f
  154. cmp \dividend, \divisor
  155. subhs \dividend, \dividend, \divisor
  156. mov \divisor, \divisor, lsr #1
  157. 3: cmp \dividend, \divisor
  158. subhs \dividend, \dividend, \divisor
  159. mov \divisor, \divisor, lsr #1
  160. 4: cmp \dividend, \divisor
  161. subhs \dividend, \dividend, \divisor
  162. 5:
  163. .endm
  164. ENTRY(__udivsi3)
  165. ENTRY(__aeabi_uidiv)
  166. UNWIND(.fnstart)
  167. subs r2, r1, #1
  168. moveq pc, lr
  169. bcc Ldiv0
  170. cmp r0, r1
  171. bls 11f
  172. tst r1, r2
  173. beq 12f
  174. ARM_DIV_BODY r0, r1, r2, r3
  175. mov r0, r2
  176. mov pc, lr
  177. 11: moveq r0, #1
  178. movne r0, #0
  179. mov pc, lr
  180. 12: ARM_DIV2_ORDER r1, r2
  181. mov r0, r0, lsr r2
  182. mov pc, lr
  183. UNWIND(.fnend)
  184. ENDPROC(__udivsi3)
  185. ENDPROC(__aeabi_uidiv)
  186. ENTRY(__umodsi3)
  187. UNWIND(.fnstart)
  188. subs r2, r1, #1 @ compare divisor with 1
  189. bcc Ldiv0
  190. cmpne r0, r1 @ compare dividend with divisor
  191. moveq r0, #0
  192. tsthi r1, r2 @ see if divisor is power of 2
  193. andeq r0, r0, r2
  194. movls pc, lr
  195. ARM_MOD_BODY r0, r1, r2, r3
  196. mov pc, lr
  197. UNWIND(.fnend)
  198. ENDPROC(__umodsi3)
  199. ENTRY(__divsi3)
  200. ENTRY(__aeabi_idiv)
  201. UNWIND(.fnstart)
  202. cmp r1, #0
  203. eor ip, r0, r1 @ save the sign of the result.
  204. beq Ldiv0
  205. rsbmi r1, r1, #0 @ loops below use unsigned.
  206. subs r2, r1, #1 @ division by 1 or -1 ?
  207. beq 10f
  208. movs r3, r0
  209. rsbmi r3, r0, #0 @ positive dividend value
  210. cmp r3, r1
  211. bls 11f
  212. tst r1, r2 @ divisor is power of 2 ?
  213. beq 12f
  214. ARM_DIV_BODY r3, r1, r0, r2
  215. cmp ip, #0
  216. rsbmi r0, r0, #0
  217. mov pc, lr
  218. 10: teq ip, r0 @ same sign ?
  219. rsbmi r0, r0, #0
  220. mov pc, lr
  221. 11: movlo r0, #0
  222. moveq r0, ip, asr #31
  223. orreq r0, r0, #1
  224. mov pc, lr
  225. 12: ARM_DIV2_ORDER r1, r2
  226. cmp ip, #0
  227. mov r0, r3, lsr r2
  228. rsbmi r0, r0, #0
  229. mov pc, lr
  230. UNWIND(.fnend)
  231. ENDPROC(__divsi3)
  232. ENDPROC(__aeabi_idiv)
  233. ENTRY(__modsi3)
  234. UNWIND(.fnstart)
  235. cmp r1, #0
  236. beq Ldiv0
  237. rsbmi r1, r1, #0 @ loops below use unsigned.
  238. movs ip, r0 @ preserve sign of dividend
  239. rsbmi r0, r0, #0 @ if negative make positive
  240. subs r2, r1, #1 @ compare divisor with 1
  241. cmpne r0, r1 @ compare dividend with divisor
  242. moveq r0, #0
  243. tsthi r1, r2 @ see if divisor is power of 2
  244. andeq r0, r0, r2
  245. bls 10f
  246. ARM_MOD_BODY r0, r1, r2, r3
  247. 10: cmp ip, #0
  248. rsbmi r0, r0, #0
  249. mov pc, lr
  250. UNWIND(.fnend)
  251. ENDPROC(__modsi3)
  252. #ifdef CONFIG_AEABI
  253. ENTRY(__aeabi_uidivmod)
  254. UNWIND(.fnstart)
  255. UNWIND(.save {r0, r1, ip, lr} )
  256. stmfd sp!, {r0, r1, ip, lr}
  257. bl __aeabi_uidiv
  258. ldmfd sp!, {r1, r2, ip, lr}
  259. mul r3, r0, r2
  260. sub r1, r1, r3
  261. mov pc, lr
  262. UNWIND(.fnend)
  263. ENDPROC(__aeabi_uidivmod)
  264. ENTRY(__aeabi_idivmod)
  265. UNWIND(.fnstart)
  266. UNWIND(.save {r0, r1, ip, lr} )
  267. stmfd sp!, {r0, r1, ip, lr}
  268. bl __aeabi_idiv
  269. ldmfd sp!, {r1, r2, ip, lr}
  270. mul r3, r0, r2
  271. sub r1, r1, r3
  272. mov pc, lr
  273. UNWIND(.fnend)
  274. ENDPROC(__aeabi_idivmod)
  275. #endif
  276. Ldiv0:
  277. UNWIND(.fnstart)
  278. UNWIND(.pad #4)
  279. UNWIND(.save {lr})
  280. str lr, [sp, #-8]!
  281. bl __div0
  282. mov r0, #0 @ About as wrong as it could be.
  283. ldr pc, [sp], #8
  284. UNWIND(.fnend)
  285. ENDPROC(Ldiv0)