[PATCH] powerpc: Cleanup LOADADDR etc. asm macros

This patch consolidates the variety of macros used for loading 32 or
64-bit constants in assembler (LOADADDR, LOADBASE, SET_REG_TO_*).  The
idea is to make the set of macros consistent across 32 and 64 bit and
to make it more obvious which is the appropriate one to use in a given
situation.  The new macros and their semantics are described in the
comments in ppc_asm.h.

In the process, we change several places that were unnecessarily using
immediate loads on ppc64 to use the GOT/TOC.  Likewise we cleanup a
couple of places where we were clumsily subtracting PAGE_OFFSET with
asm instructions to use assemble-time arithmetic or the toreal() macro
instead.

Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
This commit is contained in:
David Gibson 2006-01-13 14:56:25 +11:00 committed by Paul Mackerras
parent 7e78e5e502
commit e58c3495e6
9 changed files with 106 additions and 103 deletions

View file

@ -130,7 +130,7 @@ _GLOBAL(__save_cpu_setup)
mfcr r7
/* Get storage ptr */
LOADADDR(r5,cpu_state_storage)
LOAD_REG_IMMEDIATE(r5,cpu_state_storage)
/* We only deal with 970 for now */
mfspr r0,SPRN_PVR
@ -164,7 +164,7 @@ _GLOBAL(__restore_cpu_setup)
/* Get storage ptr (FIXME when using anton reloc as we
* are running with translation disabled here
*/
LOADADDR(r5,cpu_state_storage)
LOAD_REG_IMMEDIATE(r5,cpu_state_storage)
/* We only deal with 970 for now */
mfspr r0,SPRN_PVR

View file

@ -988,7 +988,7 @@ _GLOBAL(enter_rtas)
stwu r1,-INT_FRAME_SIZE(r1)
mflr r0
stw r0,INT_FRAME_SIZE+4(r1)
LOADADDR(r4, rtas)
LOAD_REG_ADDR(r4, rtas)
lis r6,1f@ha /* physical return address for rtas */
addi r6,r6,1f@l
tophys(r6,r6)

View file

@ -689,9 +689,8 @@ _GLOBAL(enter_rtas)
std r6,PACASAVEDMSR(r13)
/* Setup our real return addr */
SET_REG_TO_LABEL(r4,.rtas_return_loc)
SET_REG_TO_CONST(r9,PAGE_OFFSET)
sub r4,r4,r9
LOAD_REG_ADDR(r4,.rtas_return_loc)
clrldi r4,r4,2 /* convert to realmode address */
mtlr r4
li r0,0
@ -706,7 +705,7 @@ _GLOBAL(enter_rtas)
sync /* disable interrupts so SRR0/1 */
mtmsrd r0 /* don't get trashed */
SET_REG_TO_LABEL(r4,rtas)
LOAD_REG_ADDR(r4, rtas)
ld r5,RTASENTRY(r4) /* get the rtas->entry value */
ld r4,RTASBASE(r4) /* get the rtas->base value */
@ -718,8 +717,7 @@ _GLOBAL(enter_rtas)
_STATIC(rtas_return_loc)
/* relocation is off at this point */
mfspr r4,SPRN_SPRG3 /* Get PACA */
SET_REG_TO_CONST(r5, PAGE_OFFSET)
sub r4,r4,r5 /* RELOC the PACA base pointer */
clrldi r4,r4,2 /* convert to realmode address */
mfmsr r6
li r0,MSR_RI
@ -728,7 +726,7 @@ _STATIC(rtas_return_loc)
mtmsrd r6
ld r1,PACAR1(r4) /* Restore our SP */
LOADADDR(r3,.rtas_restore_regs)
LOAD_REG_IMMEDIATE(r3,.rtas_restore_regs)
ld r4,PACASAVEDMSR(r4) /* Restore our MSR */
mtspr SPRN_SRR0,r3

View file

@ -39,9 +39,9 @@ _GLOBAL(load_up_fpu)
* to another. Instead we call giveup_fpu in switch_to.
*/
#ifndef CONFIG_SMP
LOADBASE(r3, last_task_used_math)
LOAD_REG_ADDRBASE(r3, last_task_used_math)
toreal(r3)
PPC_LL r4,OFF(last_task_used_math)(r3)
PPC_LL r4,ADDROFF(last_task_used_math)(r3)
PPC_LCMPI 0,r4,0
beq 1f
toreal(r4)
@ -77,7 +77,7 @@ _GLOBAL(load_up_fpu)
#ifndef CONFIG_SMP
subi r4,r5,THREAD
fromreal(r4)
PPC_STL r4,OFF(last_task_used_math)(r3)
PPC_STL r4,ADDROFF(last_task_used_math)(r3)
#endif /* CONFIG_SMP */
/* restore registers and return */
/* we haven't used ctr or xer or lr */
@ -113,8 +113,8 @@ _GLOBAL(giveup_fpu)
1:
#ifndef CONFIG_SMP
li r5,0
LOADBASE(r4,last_task_used_math)
PPC_STL r5,OFF(last_task_used_math)(r4)
LOAD_REG_ADDRBASE(r4,last_task_used_math)
PPC_STL r5,ADDROFF(last_task_used_math)(r4)
#endif /* CONFIG_SMP */
blr

View file

@ -154,12 +154,12 @@ _GLOBAL(__secondary_hold)
bne 100b
#ifdef CONFIG_HMT
LOADADDR(r4, .hmt_init)
SET_REG_IMMEDIATE(r4, .hmt_init)
mtctr r4
bctr
#else
#ifdef CONFIG_SMP
LOADADDR(r4, .pSeries_secondary_smp_init)
LOAD_REG_IMMEDIATE(r4, .pSeries_secondary_smp_init)
mtctr r4
mr r3,r24
bctr
@ -205,9 +205,10 @@ exception_marker:
#define EX_LR 72
/*
* We're short on space and time in the exception prolog, so we can't use
* the normal LOADADDR macro. Normally we just need the low halfword of the
* address, but for Kdump we need the whole low word.
* We're short on space and time in the exception prolog, so we can't
* use the normal SET_REG_IMMEDIATE macro. Normally we just need the
* low halfword of the address, but for Kdump we need the whole low
* word.
*/
#ifdef CONFIG_CRASH_DUMP
#define LOAD_HANDLER(reg, label) \
@ -713,7 +714,7 @@ system_reset_iSeries:
lbz r23,PACAPROCSTART(r13) /* Test if this processor
* should start */
sync
LOADADDR(r3,current_set)
LOAD_REG_IMMEDIATE(r3,current_set)
sldi r28,r24,3 /* get current_set[cpu#] */
ldx r3,r3,r28
addi r1,r3,THREAD_SIZE
@ -746,8 +747,8 @@ iSeries_secondary_smp_loop:
decrementer_iSeries_masked:
li r11,1
stb r11,PACALPPACA+LPPACADECRINT(r13)
LOADBASE(r12,tb_ticks_per_jiffy)
lwz r12,OFF(tb_ticks_per_jiffy)(r12)
LOAD_REG_ADDRBASE(r12,tb_ticks_per_jiffy)
lwz r12,ADDROFF(tb_ticks_per_jiffy)(r12)
mtspr SPRN_DEC,r12
/* fall through */
@ -1412,7 +1413,7 @@ _GLOBAL(pSeries_secondary_smp_init)
* physical cpu id in r24, we need to search the pacas to find
* which logical id maps to our physical one.
*/
LOADADDR(r13, paca) /* Get base vaddr of paca array */
LOAD_REG_IMMEDIATE(r13, paca) /* Get base vaddr of paca array */
li r5,0 /* logical cpu id */
1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */
cmpw r6,r24 /* Compare to our id */
@ -1446,8 +1447,8 @@ _GLOBAL(pSeries_secondary_smp_init)
#ifdef CONFIG_PPC_ISERIES
_STATIC(__start_initialization_iSeries)
/* Clear out the BSS */
LOADADDR(r11,__bss_stop)
LOADADDR(r8,__bss_start)
LOAD_REG_IMMEDIATE(r11,__bss_stop)
LOAD_REG_IMMEDIATE(r8,__bss_start)
sub r11,r11,r8 /* bss size */
addi r11,r11,7 /* round up to an even double word */
rldicl. r11,r11,61,3 /* shift right by 3 */
@ -1458,17 +1459,17 @@ _STATIC(__start_initialization_iSeries)
3: stdu r0,8(r8)
bdnz 3b
4:
LOADADDR(r1,init_thread_union)
LOAD_REG_IMMEDIATE(r1,init_thread_union)
addi r1,r1,THREAD_SIZE
li r0,0
stdu r0,-STACK_FRAME_OVERHEAD(r1)
LOADADDR(r3,cpu_specs)
LOADADDR(r4,cur_cpu_spec)
LOAD_REG_IMMEDIATE(r3,cpu_specs)
LOAD_REG_IMMEDIATE(r4,cur_cpu_spec)
li r5,0
bl .identify_cpu
LOADADDR(r2,__toc_start)
LOAD_REG_IMMEDIATE(r2,__toc_start)
addi r2,r2,0x4000
addi r2,r2,0x4000
@ -1528,7 +1529,7 @@ _GLOBAL(__start_initialization_multiplatform)
li r24,0
/* Switch off MMU if not already */
LOADADDR(r4, .__after_prom_start - KERNELBASE)
LOAD_REG_IMMEDIATE(r4, .__after_prom_start - KERNELBASE)
add r4,r4,r30
bl .__mmu_off
b .__after_prom_start
@ -1548,7 +1549,7 @@ _STATIC(__boot_from_prom)
/* put a relocation offset into r3 */
bl .reloc_offset
LOADADDR(r2,__toc_start)
LOAD_REG_IMMEDIATE(r2,__toc_start)
addi r2,r2,0x4000
addi r2,r2,0x4000
@ -1588,9 +1589,9 @@ _STATIC(__after_prom_start)
*/
bl .reloc_offset
mr r26,r3
SET_REG_TO_CONST(r27,KERNELBASE)
LOAD_REG_IMMEDIATE(r27, KERNELBASE)
LOADADDR(r3, PHYSICAL_START) /* target addr */
LOAD_REG_IMMEDIATE(r3, PHYSICAL_START) /* target addr */
// XXX FIXME: Use phys returned by OF (r30)
add r4,r27,r26 /* source addr */
@ -1598,7 +1599,7 @@ _STATIC(__after_prom_start)
/* i.e. where we are running */
/* the source addr */
LOADADDR(r5,copy_to_here) /* # bytes of memory to copy */
LOAD_REG_IMMEDIATE(r5,copy_to_here) /* # bytes of memory to copy */
sub r5,r5,r27
li r6,0x100 /* Start offset, the first 0x100 */
@ -1608,11 +1609,11 @@ _STATIC(__after_prom_start)
/* this includes the code being */
/* executed here. */
LOADADDR(r0, 4f) /* Jump to the copy of this code */
LOAD_REG_IMMEDIATE(r0, 4f) /* Jump to the copy of this code */
mtctr r0 /* that we just made/relocated */
bctr
4: LOADADDR(r5,klimit)
4: LOAD_REG_IMMEDIATE(r5,klimit)
add r5,r5,r26
ld r5,0(r5) /* get the value of klimit */
sub r5,r5,r27
@ -1694,7 +1695,7 @@ _GLOBAL(pmac_secondary_start)
mtmsrd r3 /* RI on */
/* Set up a paca value for this processor. */
LOADADDR(r4, paca) /* Get base vaddr of paca array */
LOAD_REG_IMMEDIATE(r4, paca) /* Get base vaddr of paca array */
mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */
add r13,r13,r4 /* for this processor. */
mtspr SPRN_SPRG3,r13 /* Save vaddr of paca in SPRG3 */
@ -1731,7 +1732,7 @@ _GLOBAL(__secondary_start)
bl .early_setup_secondary
/* Initialize the kernel stack. Just a repeat for iSeries. */
LOADADDR(r3,current_set)
LOAD_REG_ADDR(r3, current_set)
sldi r28,r24,3 /* get current_set[cpu#] */
ldx r1,r3,r28
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
@ -1742,8 +1743,8 @@ _GLOBAL(__secondary_start)
mtlr r7
/* enable MMU and jump to start_secondary */
LOADADDR(r3,.start_secondary_prolog)
SET_REG_TO_CONST(r4, MSR_KERNEL)
LOAD_REG_ADDR(r3, .start_secondary_prolog)
LOAD_REG_IMMEDIATE(r4, MSR_KERNEL)
#ifdef DO_SOFT_DISABLE
ori r4,r4,MSR_EE
#endif
@ -1792,8 +1793,8 @@ _STATIC(start_here_multiplatform)
* be detached from the kernel completely. Besides, we need
* to clear it now for kexec-style entry.
*/
LOADADDR(r11,__bss_stop)
LOADADDR(r8,__bss_start)
LOAD_REG_IMMEDIATE(r11,__bss_stop)
LOAD_REG_IMMEDIATE(r8,__bss_start)
sub r11,r11,r8 /* bss size */
addi r11,r11,7 /* round up to an even double word */
rldicl. r11,r11,61,3 /* shift right by 3 */
@ -1831,7 +1832,7 @@ _STATIC(start_here_multiplatform)
/* up the htab. This is done because we have relocated the */
/* kernel but are still running in real mode. */
LOADADDR(r3,init_thread_union)
LOAD_REG_IMMEDIATE(r3,init_thread_union)
add r3,r3,r26
/* set up a stack pointer (physical address) */
@ -1840,14 +1841,14 @@ _STATIC(start_here_multiplatform)
stdu r0,-STACK_FRAME_OVERHEAD(r1)
/* set up the TOC (physical address) */
LOADADDR(r2,__toc_start)
LOAD_REG_IMMEDIATE(r2,__toc_start)
addi r2,r2,0x4000
addi r2,r2,0x4000
add r2,r2,r26
LOADADDR(r3,cpu_specs)
LOAD_REG_IMMEDIATE(r3, cpu_specs)
add r3,r3,r26
LOADADDR(r4,cur_cpu_spec)
LOAD_REG_IMMEDIATE(r4,cur_cpu_spec)
add r4,r4,r26
mr r5,r26
bl .identify_cpu
@ -1863,11 +1864,11 @@ _STATIC(start_here_multiplatform)
* nowhere it can be initialized differently before we reach this
* code
*/
LOADADDR(r27, boot_cpuid)
LOAD_REG_IMMEDIATE(r27, boot_cpuid)
add r27,r27,r26
lwz r27,0(r27)
LOADADDR(r24, paca) /* Get base vaddr of paca array */
LOAD_REG_IMMEDIATE(r24, paca) /* Get base vaddr of paca array */
mulli r13,r27,PACA_SIZE /* Calculate vaddr of right paca */
add r13,r13,r24 /* for this processor. */
add r13,r13,r26 /* convert to physical addr */
@ -1880,8 +1881,8 @@ _STATIC(start_here_multiplatform)
mr r3,r31
bl .early_setup
LOADADDR(r3,.start_here_common)
SET_REG_TO_CONST(r4, MSR_KERNEL)
LOAD_REG_IMMEDIATE(r3, .start_here_common)
LOAD_REG_IMMEDIATE(r4, MSR_KERNEL)
mtspr SPRN_SRR0,r3
mtspr SPRN_SRR1,r4
rfid
@ -1895,7 +1896,7 @@ _STATIC(start_here_common)
/* The following code sets up the SP and TOC now that we are */
/* running with translation enabled. */
LOADADDR(r3,init_thread_union)
LOAD_REG_IMMEDIATE(r3,init_thread_union)
/* set up the stack */
addi r1,r3,THREAD_SIZE
@ -1908,16 +1909,16 @@ _STATIC(start_here_common)
li r3,0
bl .do_cpu_ftr_fixups
LOADADDR(r26, boot_cpuid)
LOAD_REG_IMMEDIATE(r26, boot_cpuid)
lwz r26,0(r26)
LOADADDR(r24, paca) /* Get base vaddr of paca array */
LOAD_REG_IMMEDIATE(r24, paca) /* Get base vaddr of paca array */
mulli r13,r26,PACA_SIZE /* Calculate vaddr of right paca */
add r13,r13,r24 /* for this processor. */
mtspr SPRN_SPRG3,r13
/* ptr to current */
LOADADDR(r4,init_task)
LOAD_REG_IMMEDIATE(r4, init_task)
std r4,PACACURRENT(r13)
/* Load the TOC */
@ -1940,7 +1941,7 @@ _STATIC(start_here_common)
_GLOBAL(hmt_init)
#ifdef CONFIG_HMT
LOADADDR(r5, hmt_thread_data)
LOAD_REG_IMMEDIATE(r5, hmt_thread_data)
mfspr r7,SPRN_PVR
srwi r7,r7,16
cmpwi r7,0x34 /* Pulsar */
@ -1961,7 +1962,7 @@ _GLOBAL(hmt_init)
b 101f
__hmt_secondary_hold:
LOADADDR(r5, hmt_thread_data)
LOAD_REG_IMMEDIATE(r5, hmt_thread_data)
clrldi r5,r5,4
li r7,0
mfspr r6,SPRN_PIR
@ -1989,7 +1990,7 @@ __hmt_secondary_hold:
#ifdef CONFIG_HMT
_GLOBAL(hmt_start_secondary)
LOADADDR(r4,__hmt_secondary_hold)
LOAD_REG_IMMEDIATE(r4,__hmt_secondary_hold)
clrldi r4,r4,4
mtspr SPRN_NIADORM, r4
mfspr r4, SPRN_MSRDORM

View file

@ -38,14 +38,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP)
/* We must dynamically check for the NAP feature as it
* can be cleared by CPU init after the fixups are done
*/
LOADBASE(r3,cur_cpu_spec)
ld r4,OFF(cur_cpu_spec)(r3)
LOAD_REG_ADDRBASE(r3,cur_cpu_spec)
ld r4,ADDROFF(cur_cpu_spec)(r3)
ld r4,CPU_SPEC_FEATURES(r4)
andi. r0,r4,CPU_FTR_CAN_NAP
beqlr
/* Now check if user or arch enabled NAP mode */
LOADBASE(r3,powersave_nap)
lwz r4,OFF(powersave_nap)(r3)
LOAD_REG_ADDRBASE(r3,powersave_nap)
lwz r4,ADDROFF(powersave_nap)(r3)
cmpwi 0,r4,0
beqlr

View file

@ -68,7 +68,7 @@ _GLOBAL(reloc_offset)
mflr r0
bl 1f
1: mflr r3
LOADADDR(r4,1b)
LOAD_REG_IMMEDIATE(r4,1b)
subf r3,r4,r3
mtlr r0
blr
@ -80,7 +80,7 @@ _GLOBAL(add_reloc_offset)
mflr r0
bl 1f
1: mflr r5
LOADADDR(r4,1b)
LOAD_REG_IMMEDIATE(r4,1b)
subf r5,r4,r5
add r3,r3,r5
mtlr r0

View file

@ -39,7 +39,7 @@ _GLOBAL(reloc_offset)
mflr r0
bl 1f
1: mflr r3
LOADADDR(r4,1b)
LOAD_REG_IMMEDIATE(r4,1b)
subf r3,r4,r3
mtlr r0
blr
@ -51,7 +51,7 @@ _GLOBAL(add_reloc_offset)
mflr r0
bl 1f
1: mflr r5
LOADADDR(r4,1b)
LOAD_REG_IMMEDIATE(r4,1b)
subf r5,r4,r5
add r3,r3,r5
mtlr r0
@ -498,15 +498,15 @@ _GLOBAL(identify_cpu)
*/
_GLOBAL(do_cpu_ftr_fixups)
/* Get CPU 0 features */
LOADADDR(r6,cur_cpu_spec)
LOAD_REG_IMMEDIATE(r6,cur_cpu_spec)
sub r6,r6,r3
ld r4,0(r6)
sub r4,r4,r3
ld r4,CPU_SPEC_FEATURES(r4)
/* Get the fixup table */
LOADADDR(r6,__start___ftr_fixup)
LOAD_REG_IMMEDIATE(r6,__start___ftr_fixup)
sub r6,r6,r3
LOADADDR(r7,__stop___ftr_fixup)
LOAD_REG_IMMEDIATE(r7,__stop___ftr_fixup)
sub r7,r7,r3
/* Do the fixup */
1: cmpld r6,r7

View file

@ -156,52 +156,56 @@ GLUE(.,name):
#endif
/*
* LOADADDR( rn, name )
* loads the address of 'name' into 'rn'
* LOAD_REG_IMMEDIATE(rn, expr)
* Loads the value of the constant expression 'expr' into register 'rn'
* using immediate instructions only. Use this when it's important not
* to reference other data (i.e. on ppc64 when the TOC pointer is not
* valid).
*
* LOADBASE( rn, name )
* loads the address (possibly without the low 16 bits) of 'name' into 'rn'
* suitable for base+disp addressing
* LOAD_REG_ADDR(rn, name)
* Loads the address of label 'name' into register 'rn'. Use this when
* you don't particularly need immediate instructions only, but you need
* the whole address in one register (e.g. it's a structure address and
* you want to access various offsets within it). On ppc32 this is
* identical to LOAD_REG_IMMEDIATE.
*
* LOAD_REG_ADDRBASE(rn, name)
* ADDROFF(name)
* LOAD_REG_ADDRBASE loads part of the address of label 'name' into
* register 'rn'. ADDROFF(name) returns the remainder of the address as
* a constant expression. ADDROFF(name) is a signed expression < 16 bits
* in size, so is suitable for use directly as an offset in load and store
* instructions. Use this when loading/storing a single word or less as:
* LOAD_REG_ADDRBASE(rX, name)
* ld rY,ADDROFF(name)(rX)
*/
#ifdef __powerpc64__
#define LOADADDR(rn,name) \
lis rn,name##@highest; \
ori rn,rn,name##@higher; \
rldicr rn,rn,32,31; \
oris rn,rn,name##@h; \
ori rn,rn,name##@l
#define LOAD_REG_IMMEDIATE(reg,expr) \
lis (reg),(expr)@highest; \
ori (reg),(reg),(expr)@higher; \
rldicr (reg),(reg),32,31; \
oris (reg),(reg),(expr)@h; \
ori (reg),(reg),(expr)@l;
#define LOADBASE(rn,name) \
ld rn,name@got(r2)
#define LOAD_REG_ADDR(reg,name) \
ld (reg),name@got(r2)
#define OFF(name) 0
#define SET_REG_TO_CONST(reg, value) \
lis reg,(((value)>>48)&0xFFFF); \
ori reg,reg,(((value)>>32)&0xFFFF); \
rldicr reg,reg,32,31; \
oris reg,reg,(((value)>>16)&0xFFFF); \
ori reg,reg,((value)&0xFFFF);
#define SET_REG_TO_LABEL(reg, label) \
lis reg,(label)@highest; \
ori reg,reg,(label)@higher; \
rldicr reg,reg,32,31; \
oris reg,reg,(label)@h; \
ori reg,reg,(label)@l;
#define LOAD_REG_ADDRBASE(reg,name) LOAD_REG_ADDR(reg,name)
#define ADDROFF(name) 0
/* offsets for stack frame layout */
#define LRSAVE 16
#else /* 32-bit */
#define LOADADDR(rn,name) \
lis rn,name@ha; \
addi rn,rn,name@l
#define LOADBASE(rn,name) \
lis rn,name@ha
#define LOAD_REG_IMMEDIATE(reg,expr) \
lis (reg),(expr)@ha; \
addi (reg),(reg),(expr)@l;
#define OFF(name) name@l
#define LOAD_REG_ADDR(reg,name) LOAD_REG_IMMEDIATE(reg, name)
#define LOAD_REG_ADDRBASE(reg, name) lis (reg),name@ha
#define ADDROFF(name) name@l
/* offsets for stack frame layout */
#define LRSAVE 4