sh: Add control register barriers.

Currently when making changes to control registers, we
typically need some time for changes to take effect (8
nops, generally).  However, for sh4a we simply need to
do an icbi..

This is a simple patch for implementing a general purpose
ctrl_barrier() which functions as a control register write
barrier. There's some additional documentation in the patch
itself, but it's pretty self explanatory.

There were also some places where we were not doing the
barrier, which didn't seem to have any adverse effects on
legacy parts, but certainly did on sh4a. It's safer to have
the barrier in place for legacy parts as well in these cases,
though this does make flush_tlb_all() more expensive (by an
order of 8 nops).  We can ifdef around the flush_tlb_all()
case for now if it's clear that all legacy parts won't have
a problem with this.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
This commit is contained in:
Paul Mundt 2006-09-27 14:57:44 +09:00
parent 749cf48692
commit 298476220d
4 changed files with 45 additions and 15 deletions

View file

@ -269,6 +269,11 @@ static inline void flush_icache_all(void)
ccr |= CCR_CACHE_ICI; ccr |= CCR_CACHE_ICI;
ctrl_outl(ccr, CCR); ctrl_outl(ccr, CCR);
/*
* back_to_P1() will take care of the barrier for us, don't add
* another one!
*/
back_to_P1(); back_to_P1();
local_irq_restore(flags); local_irq_restore(flags);
} }

View file

@ -369,12 +369,13 @@ void flush_tlb_all(void)
* Flush all the TLB. * Flush all the TLB.
* *
* Write to the MMU control register's bit: * Write to the MMU control register's bit:
* TF-bit for SH-3, TI-bit for SH-4. * TF-bit for SH-3, TI-bit for SH-4.
* It's same position, bit #2. * It's same position, bit #2.
*/ */
local_irq_save(flags); local_irq_save(flags);
status = ctrl_inl(MMUCR); status = ctrl_inl(MMUCR);
status |= 0x04; status |= 0x04;
ctrl_outl(status, MMUCR); ctrl_outl(status, MMUCR);
ctrl_barrier();
local_irq_restore(flags); local_irq_restore(flags);
} }

View file

@ -174,9 +174,7 @@ static inline void enable_mmu(void)
{ {
/* Enable MMU */ /* Enable MMU */
ctrl_outl(MMU_CONTROL_INIT, MMUCR); ctrl_outl(MMU_CONTROL_INIT, MMUCR);
ctrl_barrier();
/* The manual suggests doing some nops after turning on the MMU */
__asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop\n\t");
if (mmu_context_cache == NO_CONTEXT) if (mmu_context_cache == NO_CONTEXT)
mmu_context_cache = MMU_CONTEXT_FIRST_VERSION; mmu_context_cache = MMU_CONTEXT_FIRST_VERSION;
@ -191,7 +189,8 @@ static inline void disable_mmu(void)
cr = ctrl_inl(MMUCR); cr = ctrl_inl(MMUCR);
cr &= ~MMU_CONTROL_INIT; cr &= ~MMU_CONTROL_INIT;
ctrl_outl(cr, MMUCR); ctrl_outl(cr, MMUCR);
__asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop\n\t");
ctrl_barrier();
} }
#else #else
/* /*

View file

@ -67,8 +67,17 @@ static inline void sched_cacheflush(void)
{ {
} }
#define nop() __asm__ __volatile__ ("nop") #ifdef CONFIG_CPU_SH4A
#define __icbi() \
{ \
unsigned long __addr; \
__addr = 0xa8000000; \
__asm__ __volatile__( \
"icbi %0\n\t" \
: /* no output */ \
: "m" (__m(__addr))); \
}
#endif
#define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) #define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
@ -84,15 +93,31 @@ static __inline__ unsigned long tas(volatile int *m)
extern void __xchg_called_with_bad_pointer(void); extern void __xchg_called_with_bad_pointer(void);
/*
* A brief note on ctrl_barrier(), the control register write barrier.
*
* Legacy SH cores typically require a sequence of 8 nops after
* modification of a control register in order for the changes to take
* effect. On newer cores (like the sh4a and sh5) this is accomplished
* with icbi.
*
* Also note that on sh4a in the icbi case we can forego a synco for the
* write barrier, as it's not necessary for control registers.
*
* Historically we have only done this type of barrier for the MMUCR, but
* it's also necessary for the CCR, so we make it generic here instead.
*/
#ifdef CONFIG_CPU_SH4A #ifdef CONFIG_CPU_SH4A
#define mb() __asm__ __volatile__ ("synco": : :"memory") #define mb() __asm__ __volatile__ ("synco": : :"memory")
#define rmb() mb() #define rmb() mb()
#define wmb() __asm__ __volatile__ ("synco": : :"memory") #define wmb() __asm__ __volatile__ ("synco": : :"memory")
#define ctrl_barrier() __icbi()
#define read_barrier_depends() do { } while(0) #define read_barrier_depends() do { } while(0)
#else #else
#define mb() __asm__ __volatile__ ("": : :"memory") #define mb() __asm__ __volatile__ ("": : :"memory")
#define rmb() mb() #define rmb() mb()
#define wmb() __asm__ __volatile__ ("": : :"memory") #define wmb() __asm__ __volatile__ ("": : :"memory")
#define ctrl_barrier() __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop")
#define read_barrier_depends() do { } while(0) #define read_barrier_depends() do { } while(0)
#endif #endif
@ -218,8 +243,8 @@ do { \
#define back_to_P1() \ #define back_to_P1() \
do { \ do { \
unsigned long __dummy; \ unsigned long __dummy; \
ctrl_barrier(); \
__asm__ __volatile__( \ __asm__ __volatile__( \
"nop;nop;nop;nop;nop;nop;nop\n\t" \
"mov.l 1f, %0\n\t" \ "mov.l 1f, %0\n\t" \
"jmp @%0\n\t" \ "jmp @%0\n\t" \
" nop\n\t" \ " nop\n\t" \