6ebbf2ce43
ARMv6 and greater introduced a new instruction ("bx") which can be used to return from function calls. Recent CPUs perform better when the "bx lr" instruction is used rather than the "mov pc, lr" instruction, and this sequence is strongly recommended to be used by the ARM architecture manual (section A.4.1.1). We provide a new macro "ret" with all its variants for the condition code which will resolve to the appropriate instruction. Rather than doing this piecemeal, and miss some instances, change all the "mov pc" instances to use the new macro, with the exception of the "movs" instruction and the kprobes code. This allows us to detect the "mov pc, lr" case and fix it up - and also gives us the possibility of deploying this for other registers depending on the CPU selection. Reported-by: Will Deacon <will.deacon@arm.com> Tested-by: Stephen Warren <swarren@nvidia.com> # Tegra Jetson TK1 Tested-by: Robert Jarzmik <robert.jarzmik@free.fr> # mioa701_bootresume.S Tested-by: Andrew Lunn <andrew@lunn.ch> # Kirkwood Tested-by: Shawn Guo <shawn.guo@freescale.com> Tested-by: Tony Lindgren <tony@atomide.com> # OMAPs Tested-by: Gregory CLEMENT <gregory.clement@free-electrons.com> # Armada XP, 375, 385 Acked-by: Sekhar Nori <nsekhar@ti.com> # DaVinci Acked-by: Christoffer Dall <christoffer.dall@linaro.org> # kvm/hyp Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com> # PXA3xx Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> # Xen Tested-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de> # ARMv7M Tested-by: Simon Horman <horms+renesas@verge.net.au> # Shmobile Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
363 lines
8 KiB
ArmAsm
363 lines
8 KiB
ArmAsm
/*
|
|
* linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
|
|
*
|
|
* Author: Nicolas Pitre <nico@fluxnic.net>
|
|
* - contributed to gcc-3.4 on Sep 30, 2003
|
|
* - adapted for the Linux kernel on Oct 2, 2003
|
|
*/
|
|
|
|
/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
|
|
|
|
This file is free software; you can redistribute it and/or modify it
|
|
under the terms of the GNU General Public License as published by the
|
|
Free Software Foundation; either version 2, or (at your option) any
|
|
later version.
|
|
|
|
In addition to the permissions in the GNU General Public License, the
|
|
Free Software Foundation gives you unlimited permission to link the
|
|
compiled version of this file into combinations with other programs,
|
|
and to distribute those combinations without any restriction coming
|
|
from the use of this file. (The General Public License restrictions
|
|
do apply in other respects; for example, they cover modification of
|
|
the file, and distribution when not linked into a combine
|
|
executable.)
|
|
|
|
This file is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; see the file COPYING. If not, write to
|
|
the Free Software Foundation, 59 Temple Place - Suite 330,
|
|
Boston, MA 02111-1307, USA. */
|
|
|
|
|
|
#include <linux/linkage.h>
|
|
#include <asm/assembler.h>
|
|
#include <asm/unwind.h>
|
|
|
|
.macro ARM_DIV_BODY dividend, divisor, result, curbit
|
|
|
|
#if __LINUX_ARM_ARCH__ >= 5
|
|
|
|
clz \curbit, \divisor
|
|
clz \result, \dividend
|
|
sub \result, \curbit, \result
|
|
mov \curbit, #1
|
|
mov \divisor, \divisor, lsl \result
|
|
mov \curbit, \curbit, lsl \result
|
|
mov \result, #0
|
|
|
|
#else
|
|
|
|
@ Initially shift the divisor left 3 bits if possible,
|
|
@ set curbit accordingly. This allows for curbit to be located
|
|
@ at the left end of each 4 bit nibbles in the division loop
|
|
@ to save one loop in most cases.
|
|
tst \divisor, #0xe0000000
|
|
moveq \divisor, \divisor, lsl #3
|
|
moveq \curbit, #8
|
|
movne \curbit, #1
|
|
|
|
@ Unless the divisor is very big, shift it up in multiples of
|
|
@ four bits, since this is the amount of unwinding in the main
|
|
@ division loop. Continue shifting until the divisor is
|
|
@ larger than the dividend.
|
|
1: cmp \divisor, #0x10000000
|
|
cmplo \divisor, \dividend
|
|
movlo \divisor, \divisor, lsl #4
|
|
movlo \curbit, \curbit, lsl #4
|
|
blo 1b
|
|
|
|
@ For very big divisors, we must shift it a bit at a time, or
|
|
@ we will be in danger of overflowing.
|
|
1: cmp \divisor, #0x80000000
|
|
cmplo \divisor, \dividend
|
|
movlo \divisor, \divisor, lsl #1
|
|
movlo \curbit, \curbit, lsl #1
|
|
blo 1b
|
|
|
|
mov \result, #0
|
|
|
|
#endif
|
|
|
|
@ Division loop
|
|
1: cmp \dividend, \divisor
|
|
subhs \dividend, \dividend, \divisor
|
|
orrhs \result, \result, \curbit
|
|
cmp \dividend, \divisor, lsr #1
|
|
subhs \dividend, \dividend, \divisor, lsr #1
|
|
orrhs \result, \result, \curbit, lsr #1
|
|
cmp \dividend, \divisor, lsr #2
|
|
subhs \dividend, \dividend, \divisor, lsr #2
|
|
orrhs \result, \result, \curbit, lsr #2
|
|
cmp \dividend, \divisor, lsr #3
|
|
subhs \dividend, \dividend, \divisor, lsr #3
|
|
orrhs \result, \result, \curbit, lsr #3
|
|
cmp \dividend, #0 @ Early termination?
|
|
movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
|
|
movne \divisor, \divisor, lsr #4
|
|
bne 1b
|
|
|
|
.endm
|
|
|
|
|
|
.macro ARM_DIV2_ORDER divisor, order
|
|
|
|
#if __LINUX_ARM_ARCH__ >= 5
|
|
|
|
clz \order, \divisor
|
|
rsb \order, \order, #31
|
|
|
|
#else
|
|
|
|
cmp \divisor, #(1 << 16)
|
|
movhs \divisor, \divisor, lsr #16
|
|
movhs \order, #16
|
|
movlo \order, #0
|
|
|
|
cmp \divisor, #(1 << 8)
|
|
movhs \divisor, \divisor, lsr #8
|
|
addhs \order, \order, #8
|
|
|
|
cmp \divisor, #(1 << 4)
|
|
movhs \divisor, \divisor, lsr #4
|
|
addhs \order, \order, #4
|
|
|
|
cmp \divisor, #(1 << 2)
|
|
addhi \order, \order, #3
|
|
addls \order, \order, \divisor, lsr #1
|
|
|
|
#endif
|
|
|
|
.endm
|
|
|
|
|
|
.macro ARM_MOD_BODY dividend, divisor, order, spare
|
|
|
|
#if __LINUX_ARM_ARCH__ >= 5
|
|
|
|
clz \order, \divisor
|
|
clz \spare, \dividend
|
|
sub \order, \order, \spare
|
|
mov \divisor, \divisor, lsl \order
|
|
|
|
#else
|
|
|
|
mov \order, #0
|
|
|
|
@ Unless the divisor is very big, shift it up in multiples of
|
|
@ four bits, since this is the amount of unwinding in the main
|
|
@ division loop. Continue shifting until the divisor is
|
|
@ larger than the dividend.
|
|
1: cmp \divisor, #0x10000000
|
|
cmplo \divisor, \dividend
|
|
movlo \divisor, \divisor, lsl #4
|
|
addlo \order, \order, #4
|
|
blo 1b
|
|
|
|
@ For very big divisors, we must shift it a bit at a time, or
|
|
@ we will be in danger of overflowing.
|
|
1: cmp \divisor, #0x80000000
|
|
cmplo \divisor, \dividend
|
|
movlo \divisor, \divisor, lsl #1
|
|
addlo \order, \order, #1
|
|
blo 1b
|
|
|
|
#endif
|
|
|
|
@ Perform all needed substractions to keep only the reminder.
|
|
@ Do comparisons in batch of 4 first.
|
|
subs \order, \order, #3 @ yes, 3 is intended here
|
|
blt 2f
|
|
|
|
1: cmp \dividend, \divisor
|
|
subhs \dividend, \dividend, \divisor
|
|
cmp \dividend, \divisor, lsr #1
|
|
subhs \dividend, \dividend, \divisor, lsr #1
|
|
cmp \dividend, \divisor, lsr #2
|
|
subhs \dividend, \dividend, \divisor, lsr #2
|
|
cmp \dividend, \divisor, lsr #3
|
|
subhs \dividend, \dividend, \divisor, lsr #3
|
|
cmp \dividend, #1
|
|
mov \divisor, \divisor, lsr #4
|
|
subges \order, \order, #4
|
|
bge 1b
|
|
|
|
tst \order, #3
|
|
teqne \dividend, #0
|
|
beq 5f
|
|
|
|
@ Either 1, 2 or 3 comparison/substractions are left.
|
|
2: cmn \order, #2
|
|
blt 4f
|
|
beq 3f
|
|
cmp \dividend, \divisor
|
|
subhs \dividend, \dividend, \divisor
|
|
mov \divisor, \divisor, lsr #1
|
|
3: cmp \dividend, \divisor
|
|
subhs \dividend, \dividend, \divisor
|
|
mov \divisor, \divisor, lsr #1
|
|
4: cmp \dividend, \divisor
|
|
subhs \dividend, \dividend, \divisor
|
|
5:
|
|
.endm
|
|
|
|
|
|
ENTRY(__udivsi3)
|
|
ENTRY(__aeabi_uidiv)
|
|
UNWIND(.fnstart)
|
|
|
|
subs r2, r1, #1
|
|
reteq lr
|
|
bcc Ldiv0
|
|
cmp r0, r1
|
|
bls 11f
|
|
tst r1, r2
|
|
beq 12f
|
|
|
|
ARM_DIV_BODY r0, r1, r2, r3
|
|
|
|
mov r0, r2
|
|
ret lr
|
|
|
|
11: moveq r0, #1
|
|
movne r0, #0
|
|
ret lr
|
|
|
|
12: ARM_DIV2_ORDER r1, r2
|
|
|
|
mov r0, r0, lsr r2
|
|
ret lr
|
|
|
|
UNWIND(.fnend)
|
|
ENDPROC(__udivsi3)
|
|
ENDPROC(__aeabi_uidiv)
|
|
|
|
ENTRY(__umodsi3)
|
|
UNWIND(.fnstart)
|
|
|
|
subs r2, r1, #1 @ compare divisor with 1
|
|
bcc Ldiv0
|
|
cmpne r0, r1 @ compare dividend with divisor
|
|
moveq r0, #0
|
|
tsthi r1, r2 @ see if divisor is power of 2
|
|
andeq r0, r0, r2
|
|
retls lr
|
|
|
|
ARM_MOD_BODY r0, r1, r2, r3
|
|
|
|
ret lr
|
|
|
|
UNWIND(.fnend)
|
|
ENDPROC(__umodsi3)
|
|
|
|
ENTRY(__divsi3)
|
|
ENTRY(__aeabi_idiv)
|
|
UNWIND(.fnstart)
|
|
|
|
cmp r1, #0
|
|
eor ip, r0, r1 @ save the sign of the result.
|
|
beq Ldiv0
|
|
rsbmi r1, r1, #0 @ loops below use unsigned.
|
|
subs r2, r1, #1 @ division by 1 or -1 ?
|
|
beq 10f
|
|
movs r3, r0
|
|
rsbmi r3, r0, #0 @ positive dividend value
|
|
cmp r3, r1
|
|
bls 11f
|
|
tst r1, r2 @ divisor is power of 2 ?
|
|
beq 12f
|
|
|
|
ARM_DIV_BODY r3, r1, r0, r2
|
|
|
|
cmp ip, #0
|
|
rsbmi r0, r0, #0
|
|
ret lr
|
|
|
|
10: teq ip, r0 @ same sign ?
|
|
rsbmi r0, r0, #0
|
|
ret lr
|
|
|
|
11: movlo r0, #0
|
|
moveq r0, ip, asr #31
|
|
orreq r0, r0, #1
|
|
ret lr
|
|
|
|
12: ARM_DIV2_ORDER r1, r2
|
|
|
|
cmp ip, #0
|
|
mov r0, r3, lsr r2
|
|
rsbmi r0, r0, #0
|
|
ret lr
|
|
|
|
UNWIND(.fnend)
|
|
ENDPROC(__divsi3)
|
|
ENDPROC(__aeabi_idiv)
|
|
|
|
ENTRY(__modsi3)
|
|
UNWIND(.fnstart)
|
|
|
|
cmp r1, #0
|
|
beq Ldiv0
|
|
rsbmi r1, r1, #0 @ loops below use unsigned.
|
|
movs ip, r0 @ preserve sign of dividend
|
|
rsbmi r0, r0, #0 @ if negative make positive
|
|
subs r2, r1, #1 @ compare divisor with 1
|
|
cmpne r0, r1 @ compare dividend with divisor
|
|
moveq r0, #0
|
|
tsthi r1, r2 @ see if divisor is power of 2
|
|
andeq r0, r0, r2
|
|
bls 10f
|
|
|
|
ARM_MOD_BODY r0, r1, r2, r3
|
|
|
|
10: cmp ip, #0
|
|
rsbmi r0, r0, #0
|
|
ret lr
|
|
|
|
UNWIND(.fnend)
|
|
ENDPROC(__modsi3)
|
|
|
|
#ifdef CONFIG_AEABI
|
|
|
|
ENTRY(__aeabi_uidivmod)
|
|
UNWIND(.fnstart)
|
|
UNWIND(.save {r0, r1, ip, lr} )
|
|
|
|
stmfd sp!, {r0, r1, ip, lr}
|
|
bl __aeabi_uidiv
|
|
ldmfd sp!, {r1, r2, ip, lr}
|
|
mul r3, r0, r2
|
|
sub r1, r1, r3
|
|
ret lr
|
|
|
|
UNWIND(.fnend)
|
|
ENDPROC(__aeabi_uidivmod)
|
|
|
|
ENTRY(__aeabi_idivmod)
|
|
UNWIND(.fnstart)
|
|
UNWIND(.save {r0, r1, ip, lr} )
|
|
stmfd sp!, {r0, r1, ip, lr}
|
|
bl __aeabi_idiv
|
|
ldmfd sp!, {r1, r2, ip, lr}
|
|
mul r3, r0, r2
|
|
sub r1, r1, r3
|
|
ret lr
|
|
|
|
UNWIND(.fnend)
|
|
ENDPROC(__aeabi_idivmod)
|
|
|
|
#endif
|
|
|
|
Ldiv0:
|
|
UNWIND(.fnstart)
|
|
UNWIND(.pad #4)
|
|
UNWIND(.save {lr})
|
|
str lr, [sp, #-8]!
|
|
bl __div0
|
|
mov r0, #0 @ About as wrong as it could be.
|
|
ldr pc, [sp], #8
|
|
UNWIND(.fnend)
|
|
ENDPROC(Ldiv0)
|