123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213 |
- /*
- * linux/arch/arm/lib/div64.S
- *
- * Optimized computation of 64-bit dividend / 32-bit divisor
- *
- * Author: Nicolas Pitre
- * Created: Oct 5, 2003
- * Copyright: Monta Vista Software, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
- #include <linux/linkage.h>
- #include <asm/assembler.h>
- #include <asm/unwind.h>
- #ifdef __ARMEB__
- #define xh r0
- #define xl r1
- #define yh r2
- #define yl r3
- #else
- #define xl r0
- #define xh r1
- #define yl r2
- #define yh r3
- #endif
- /*
- * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
- *
- * Note: Calling convention is totally non standard for optimal code.
- * This is meant to be used by do_div() from include/asm/div64.h only.
- *
- * Input parameters:
- * xh-xl = dividend (clobbered)
- * r4 = divisor (preserved)
- *
- * Output values:
- * yh-yl = result
- * xh = remainder
- *
- * Clobbered regs: xl, ip
- */
- ENTRY(__do_div64)
- UNWIND(.fnstart)
- @ Test for easy paths first.
- subs ip, r4, #1
- bls 9f @ divisor is 0 or 1
- tst ip, r4
- beq 8f @ divisor is power of 2
- @ See if we need to handle upper 32-bit result.
- cmp xh, r4
- mov yh, #0
- blo 3f
- @ Align divisor with upper part of dividend.
- @ The aligned divisor is stored in yl preserving the original.
- @ The bit position is stored in ip.
- #if __LINUX_ARM_ARCH__ >= 5
- clz yl, r4
- clz ip, xh
- sub yl, yl, ip
- mov ip, #1
- mov ip, ip, lsl yl
- mov yl, r4, lsl yl
- #else
- mov yl, r4
- mov ip, #1
- 1: cmp yl, #0x80000000
- cmpcc yl, xh
- movcc yl, yl, lsl #1
- movcc ip, ip, lsl #1
- bcc 1b
- #endif
- @ The division loop for needed upper bit positions.
- @ Break out early if dividend reaches 0.
- 2: cmp xh, yl
- orrcs yh, yh, ip
- subcss xh, xh, yl
- movnes ip, ip, lsr #1
- mov yl, yl, lsr #1
- bne 2b
- @ See if we need to handle lower 32-bit result.
- 3: cmp xh, #0
- mov yl, #0
- cmpeq xl, r4
- movlo xh, xl
- retlo lr
- @ The division loop for lower bit positions.
- @ Here we shift remainer bits leftwards rather than moving the
- @ divisor for comparisons, considering the carry-out bit as well.
- mov ip, #0x80000000
- 4: movs xl, xl, lsl #1
- adcs xh, xh, xh
- beq 6f
- cmpcc xh, r4
- 5: orrcs yl, yl, ip
- subcs xh, xh, r4
- movs ip, ip, lsr #1
- bne 4b
- ret lr
- @ The top part of remainder became zero. If carry is set
- @ (the 33th bit) this is a false positive so resume the loop.
- @ Otherwise, if lower part is also null then we are done.
- 6: bcs 5b
- cmp xl, #0
- reteq lr
- @ We still have remainer bits in the low part. Bring them up.
- #if __LINUX_ARM_ARCH__ >= 5
- clz xh, xl @ we know xh is zero here so...
- add xh, xh, #1
- mov xl, xl, lsl xh
- mov ip, ip, lsr xh
- #else
- 7: movs xl, xl, lsl #1
- mov ip, ip, lsr #1
- bcc 7b
- #endif
- @ Current remainder is now 1. It is worthless to compare with
- @ divisor at this point since divisor can not be smaller than 3 here.
- @ If possible, branch for another shift in the division loop.
- @ If no bit position left then we are done.
- movs ip, ip, lsr #1
- mov xh, #1
- bne 4b
- ret lr
- 8: @ Division by a power of 2: determine what that divisor order is
- @ then simply shift values around
- #if __LINUX_ARM_ARCH__ >= 5
- clz ip, r4
- rsb ip, ip, #31
- #else
- mov yl, r4
- cmp r4, #(1 << 16)
- mov ip, #0
- movhs yl, yl, lsr #16
- movhs ip, #16
- cmp yl, #(1 << 8)
- movhs yl, yl, lsr #8
- addhs ip, ip, #8
- cmp yl, #(1 << 4)
- movhs yl, yl, lsr #4
- addhs ip, ip, #4
- cmp yl, #(1 << 2)
- addhi ip, ip, #3
- addls ip, ip, yl, lsr #1
- #endif
- mov yh, xh, lsr ip
- mov yl, xl, lsr ip
- rsb ip, ip, #32
- ARM( orr yl, yl, xh, lsl ip )
- THUMB( lsl xh, xh, ip )
- THUMB( orr yl, yl, xh )
- mov xh, xl, lsl ip
- mov xh, xh, lsr ip
- ret lr
- @ eq -> division by 1: obvious enough...
- 9: moveq yl, xl
- moveq yh, xh
- moveq xh, #0
- reteq lr
- UNWIND(.fnend)
- UNWIND(.fnstart)
- UNWIND(.pad #4)
- UNWIND(.save {lr})
- Ldiv0_64:
- @ Division by 0:
- str lr, [sp, #-8]!
- bl __div0
- @ as wrong as it could be...
- mov yl, #0
- mov yh, #0
- mov xh, #0
- ldr pc, [sp], #8
- UNWIND(.fnend)
- ENDPROC(__do_div64)
|