diff --git a/include/asm-sh/spinlock.h b/include/asm-sh/spinlock.h
index 92f6e2008b2e..e793181d64da 100644
--- a/include/asm-sh/spinlock.h
+++ b/include/asm-sh/spinlock.h
@@ -2,6 +2,7 @@
  * include/asm-sh/spinlock.h
  *
  * Copyright (C) 2002, 2003 Paul Mundt
+ * Copyright (C) 2006, 2007 Akio Idehara
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -10,17 +11,22 @@
 #ifndef __ASM_SH_SPINLOCK_H
 #define __ASM_SH_SPINLOCK_H
 
-#include <asm/atomic.h>
-#include <asm/spinlock_types.h>
+/*
+ * The only locking implemented here uses SH-4A opcodes. For others,
+ * split this out as per atomic-*.h.
+ */
+#ifndef CONFIG_CPU_SH4A
+#error "Need movli.l/movco.l for spinlocks"
+#endif
 
 /*
  * Your basic SMP spinlocks, allowing only a single CPU anywhere
  */
 
-#define __raw_spin_is_locked(x)	((x)->lock != 0)
+#define __raw_spin_is_locked(x)		((x)->lock <= 0)
 #define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
 #define __raw_spin_unlock_wait(x) \
-	do { cpu_relax(); } while (__raw_spin_is_locked(x))
+	do { cpu_relax(); } while ((x)->lock)
 
 /*
  * Simple spin lock operations.  There are two variants, one clears IRQ's
@@ -30,12 +36,19 @@
  */
 static inline void __raw_spin_lock(raw_spinlock_t *lock)
 {
+	unsigned long tmp;
+	unsigned long oldval;
+
 	__asm__ __volatile__ (
-		"1:\n\t"
-		"tas.b @%0\n\t"
-		"bf/s 1b\n\t"
-		"nop\n\t"
-		: "=r" (lock->lock)
+		"1:						\n\t"
+		"movli.l	@%2, %0	! __raw_spin_lock	\n\t"
+		"mov		%0, %1				\n\t"
+		"mov		#0, %0				\n\t"
+		"movco.l	%0, @%2				\n\t"
+		"bf		1b				\n\t"
+		"cmp/pl		%1				\n\t"
+		"bf		1b				\n\t"
+		: "=&z" (tmp), "=&r" (oldval)
 		: "r" (&lock->lock)
 		: "t", "memory"
 	);
@@ -43,12 +56,36 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock)
 
 static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 {
-	//assert_spin_locked(lock);
+	unsigned long tmp;
 
-	lock->lock = 0;
+	__asm__ __volatile__ (
+		"mov		#1, %0 ! __raw_spin_unlock	\n\t"
+		"mov.l		%0, @%1				\n\t"
+		: "=&z" (tmp)
+		: "r" (&lock->lock)
+		: "t", "memory"
+	);
 }
 
-#define __raw_spin_trylock(x) (!test_and_set_bit(0, &(x)->lock))
+static inline int __raw_spin_trylock(raw_spinlock_t *lock)
+{
+	unsigned long tmp, oldval;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%2, %0	! __raw_spin_trylock	\n\t"
+		"mov		%0, %1				\n\t"
+		"mov		#0, %0				\n\t"
+		"movco.l	%0, @%2				\n\t"
+		"bf		1b				\n\t"
+		"synco						\n\t"
+		: "=&z" (tmp), "=&r" (oldval)
+		: "r" (&lock->lock)
+		: "t", "memory"
+	);
+
+	return oldval;
+}
 
 /*
  * Read-write spinlocks, allowing multiple readers but only one writer.
@@ -59,58 +96,124 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock)
  * read-locks.
  */
 
+/**
+ * read_can_lock - would read_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define __raw_read_can_lock(x)	((x)->lock > 0)
+
+/**
+ * write_can_lock - would write_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define __raw_write_can_lock(x)	((x)->lock == RW_LOCK_BIAS)
+
 static inline void __raw_read_lock(raw_rwlock_t *rw)
 {
-	__raw_spin_lock(&rw->lock);
+	unsigned long tmp;
 
-	atomic_inc(&rw->counter);
-
-	__raw_spin_unlock(&rw->lock);
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! __raw_read_lock	\n\t"
+		"cmp/pl		%0				\n\t"
+		"bf		1b				\n\t"
+		"add		#-1, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		: "=&z" (tmp)
+		: "r" (&rw->lock)
+		: "t", "memory"
+	);
 }
 
 static inline void __raw_read_unlock(raw_rwlock_t *rw)
 {
-	__raw_spin_lock(&rw->lock);
+	unsigned long tmp;
 
-	atomic_dec(&rw->counter);
-
-	__raw_spin_unlock(&rw->lock);
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! __raw_read_unlock	\n\t"
+		"add		#1, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		: "=&z" (tmp)
+		: "r" (&rw->lock)
+		: "t", "memory"
+	);
 }
 
 static inline void __raw_write_lock(raw_rwlock_t *rw)
 {
-	__raw_spin_lock(&rw->lock);
-	atomic_set(&rw->counter, -1);
+	unsigned long tmp;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! __raw_write_lock	\n\t"
+		"cmp/hs		%2, %0				\n\t"
+		"bf		1b				\n\t"
+		"sub		%2, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		: "=&z" (tmp)
+		: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
+		: "t", "memory"
+	);
 }
 
 static inline void __raw_write_unlock(raw_rwlock_t *rw)
 {
-	atomic_set(&rw->counter, 0);
-	__raw_spin_unlock(&rw->lock);
+	__asm__ __volatile__ (
+		"mov.l		%1, @%0 ! __raw_write_unlock	\n\t"
+		:
+		: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
+		: "t", "memory"
+	);
 }
 
-static inline int __raw_write_can_lock(raw_rwlock_t *rw)
+static inline int __raw_read_trylock(raw_rwlock_t *rw)
 {
-	return (atomic_read(&rw->counter) == RW_LOCK_BIAS);
-}
+	unsigned long tmp, oldval;
 
-static inline int __raw_read_trylock(raw_rwlock_t *lock)
-{
-	atomic_t *count = (atomic_t*)lock;
-	if (atomic_dec_return(count) >= 0)
-		return 1;
-	atomic_inc(count);
-	return 0;
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%2, %0	! __raw_read_trylock	\n\t"
+		"mov		%0, %1				\n\t"
+		"cmp/pl		%0				\n\t"
+		"bf		2f				\n\t"
+		"add		#-1, %0				\n\t"
+		"movco.l	%0, @%2				\n\t"
+		"bf		1b				\n\t"
+		"2:						\n\t"
+		"synco						\n\t"
+		: "=&z" (tmp), "=&r" (oldval)
+		: "r" (&rw->lock)
+		: "t", "memory"
+	);
+
+	return (oldval > 0);
 }
 
 static inline int __raw_write_trylock(raw_rwlock_t *rw)
 {
-	if (atomic_sub_and_test(RW_LOCK_BIAS, &rw->counter))
-		return 1;
-	
-	atomic_add(RW_LOCK_BIAS, &rw->counter);
+	unsigned long tmp, oldval;
 
-	return 0;
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%2, %0	! __raw_write_trylock	\n\t"
+		"mov		%0, %1				\n\t"
+		"cmp/hs		%3, %0				\n\t"
+		"bf		2f				\n\t"
+		"sub		%3, %0				\n\t"
+		"2:						\n\t"
+		"movco.l	%0, @%2				\n\t"
+		"bf		1b				\n\t"
+		"synco						\n\t"
+		: "=&z" (tmp), "=&r" (oldval)
+		: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
+		: "t", "memory"
+	);
+
+	return (oldval > (RW_LOCK_BIAS - 1));
 }
 
 #define _raw_spin_relax(lock)	cpu_relax()
diff --git a/include/asm-sh/spinlock_types.h b/include/asm-sh/spinlock_types.h
index 5c58134f2c4e..b4d244e7b60c 100644
--- a/include/asm-sh/spinlock_types.h
+++ b/include/asm-sh/spinlock_types.h
@@ -6,19 +6,16 @@
 #endif
 
 typedef struct {
-	volatile unsigned long lock;
+	volatile unsigned int lock;
 } raw_spinlock_t;
 
-#define __RAW_SPIN_LOCK_UNLOCKED	{ 1 }
-
-#include <asm/atomic.h>
+#define __RAW_SPIN_LOCK_UNLOCKED		{ 1 }
 
 typedef struct {
-	raw_spinlock_t lock;
-	atomic_t counter;
+	volatile unsigned int lock;
 } raw_rwlock_t;
 
 #define RW_LOCK_BIAS			0x01000000
-#define __RAW_RW_LOCK_UNLOCKED		{ { 0 }, { RW_LOCK_BIAS } }
+#define __RAW_RW_LOCK_UNLOCKED		{ RW_LOCK_BIAS }
 
 #endif