2005-04-16 16:20:36 -06:00
|
|
|
/* rwsem.h: R/W semaphores implemented using XADD/CMPXCHG for i486+
|
|
|
|
*
|
|
|
|
* Written by David Howells (dhowells@redhat.com).
|
|
|
|
*
|
2008-01-30 05:30:28 -07:00
|
|
|
* Derived from asm-x86/semaphore.h
|
2005-04-16 16:20:36 -06:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* The MSW of the count is the negated number of active writers and waiting
|
|
|
|
* lockers, and the LSW is the total number of active locks
|
|
|
|
*
|
|
|
|
* The lock count is initialized to 0 (no active and no waiting lockers).
|
|
|
|
*
|
|
|
|
* When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an
|
|
|
|
* uncontended lock. This can be determined because XADD returns the old value.
|
|
|
|
* Readers increment by 1 and see a positive value when uncontended, negative
|
|
|
|
* if there are writers (and maybe) readers waiting (in which case it goes to
|
|
|
|
* sleep).
|
|
|
|
*
|
|
|
|
* The value of WAITING_BIAS supports up to 32766 waiting processes. This can
|
|
|
|
* be extended to 65534 by manually checking the whole MSW rather than relying
|
|
|
|
* on the S flag.
|
|
|
|
*
|
|
|
|
* The value of ACTIVE_BIAS supports up to 65535 active processes.
|
|
|
|
*
|
|
|
|
* This should be totally fair - if anything is waiting, a process that wants a
|
|
|
|
* lock will go to the back of the queue. When the currently active lock is
|
|
|
|
* released, if there's a writer at the front of the queue, then that and only
|
|
|
|
* that will be woken up; if there's a bunch of consequtive readers at the
|
|
|
|
* front, then they'll all be woken up, but no other readers will be.
|
|
|
|
*/
|
|
|
|
|
2008-10-22 23:26:29 -06:00
|
|
|
#ifndef _ASM_X86_RWSEM_H
|
|
|
|
#define _ASM_X86_RWSEM_H
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
#ifndef _LINUX_RWSEM_H
|
|
|
|
#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
|
|
|
|
#include <linux/list.h>
|
|
|
|
#include <linux/spinlock.h>
|
2006-07-03 01:24:53 -06:00
|
|
|
#include <linux/lockdep.h>
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
struct rwsem_waiter;
|
|
|
|
|
2008-01-30 05:33:00 -07:00
|
|
|
extern asmregparm struct rw_semaphore *
|
|
|
|
rwsem_down_read_failed(struct rw_semaphore *sem);
|
|
|
|
extern asmregparm struct rw_semaphore *
|
|
|
|
rwsem_down_write_failed(struct rw_semaphore *sem);
|
|
|
|
extern asmregparm struct rw_semaphore *
|
|
|
|
rwsem_wake(struct rw_semaphore *);
|
|
|
|
extern asmregparm struct rw_semaphore *
|
|
|
|
rwsem_downgrade_wake(struct rw_semaphore *sem);
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
/*
|
|
|
|
* the semaphore definition
|
2010-01-12 18:57:35 -07:00
|
|
|
*
|
|
|
|
* The bias values and the counter type needs to be extended to 64 bits
|
|
|
|
* if we want to have more than 32767 potential readers/writers
|
2005-04-16 16:20:36 -06:00
|
|
|
*/
|
2008-03-23 02:03:21 -06:00
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
#define RWSEM_UNLOCKED_VALUE 0x00000000
|
|
|
|
#define RWSEM_ACTIVE_BIAS 0x00000001
|
|
|
|
#define RWSEM_ACTIVE_MASK 0x0000ffff
|
|
|
|
#define RWSEM_WAITING_BIAS (-0x00010000)
|
|
|
|
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
|
|
|
|
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
|
2008-03-23 02:03:21 -06:00
|
|
|
|
2010-01-12 18:57:35 -07:00
|
|
|
typedef signed int rwsem_count_t;
|
|
|
|
|
2008-03-23 02:03:21 -06:00
|
|
|
struct rw_semaphore {
|
2010-01-12 18:57:35 -07:00
|
|
|
rwsem_count_t count;
|
2005-04-16 16:20:36 -06:00
|
|
|
spinlock_t wait_lock;
|
|
|
|
struct list_head wait_list;
|
2006-07-03 01:24:53 -06:00
|
|
|
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
|
|
|
struct lockdep_map dep_map;
|
|
|
|
#endif
|
2005-04-16 16:20:36 -06:00
|
|
|
};
|
|
|
|
|
2006-07-03 01:24:53 -06:00
|
|
|
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
|
|
|
# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
|
|
|
|
#else
|
|
|
|
# define __RWSEM_DEP_MAP_INIT(lockname)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
2008-03-23 02:03:21 -06:00
|
|
|
#define __RWSEM_INITIALIZER(name) \
|
|
|
|
{ \
|
|
|
|
RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
|
|
|
|
LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) \
|
|
|
|
}
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2008-03-23 02:03:21 -06:00
|
|
|
#define DECLARE_RWSEM(name) \
|
2005-04-16 16:20:36 -06:00
|
|
|
struct rw_semaphore name = __RWSEM_INITIALIZER(name)
|
|
|
|
|
2006-07-03 01:24:53 -06:00
|
|
|
extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
|
|
|
|
struct lock_class_key *key);
|
|
|
|
|
|
|
|
#define init_rwsem(sem) \
|
|
|
|
do { \
|
|
|
|
static struct lock_class_key __key; \
|
|
|
|
\
|
|
|
|
__init_rwsem((sem), #sem, &__key); \
|
|
|
|
} while (0)
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
/*
|
|
|
|
* lock for reading
|
|
|
|
*/
|
|
|
|
static inline void __down_read(struct rw_semaphore *sem)
|
|
|
|
{
|
2008-03-23 02:03:21 -06:00
|
|
|
asm volatile("# beginning down_read\n\t"
|
x86-32: clean up rwsem inline asm statements
This makes gcc use the right register names and instruction operand sizes
automatically for the rwsem inline asm statements.
So instead of using "(%%eax)" to specify the memory address that is the
semaphore, we use "(%1)" or similar. And instead of forcing the operation
to always be 32-bit, we use "%z0", taking the size from the actual
semaphore data structure itself.
This doesn't actually matter on x86-32, but if we want to use the same
inline asm for x86-64, we'll need to have the compiler generate the proper
64-bit names for the registers (%rax instead of %eax), and if we want to
use a 64-bit counter too (in order to avoid the 15-bit limit on the
write counter that limits concurrent users to 32767 threads), we'll need
to be able to generate instructions with "q" accesses rather than "l".
Since this header currently isn't enabled on x86-64, none of that matters,
but we do want to use the xadd version of the semaphores rather than have
to take spinlocks to do a rwsem. The mm->mmap_sem can be heavily contended
when you have lots of threads all taking page faults, and the fallback
rwsem code that uses a spinlock performs abysmally badly in that case.
[ hpa: modified the patch to skip size suffixes entirely when they are
redundant due to register operands. ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <alpine.LFD.2.00.1001121613560.17145@localhost.localdomain>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2010-01-12 17:21:09 -07:00
|
|
|
LOCK_PREFIX " inc%z0 (%1)\n\t"
|
2008-03-23 02:03:21 -06:00
|
|
|
/* adds 0x00000001, returns the old value */
|
|
|
|
" jns 1f\n"
|
|
|
|
" call call_rwsem_down_read_failed\n"
|
|
|
|
"1:\n\t"
|
|
|
|
"# ending down_read\n\t"
|
|
|
|
: "+m" (sem->count)
|
|
|
|
: "a" (sem)
|
|
|
|
: "memory", "cc");
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* trylock for reading -- returns 1 if successful, 0 if contention
|
|
|
|
*/
|
|
|
|
static inline int __down_read_trylock(struct rw_semaphore *sem)
|
|
|
|
{
|
2010-01-12 18:57:35 -07:00
|
|
|
rwsem_count_t result, tmp;
|
2008-03-23 02:03:21 -06:00
|
|
|
asm volatile("# beginning __down_read_trylock\n\t"
|
x86-32: clean up rwsem inline asm statements
This makes gcc use the right register names and instruction operand sizes
automatically for the rwsem inline asm statements.
So instead of using "(%%eax)" to specify the memory address that is the
semaphore, we use "(%1)" or similar. And instead of forcing the operation
to always be 32-bit, we use "%z0", taking the size from the actual
semaphore data structure itself.
This doesn't actually matter on x86-32, but if we want to use the same
inline asm for x86-64, we'll need to have the compiler generate the proper
64-bit names for the registers (%rax instead of %eax), and if we want to
use a 64-bit counter too (in order to avoid the 15-bit limit on the
write counter that limits concurrent users to 32767 threads), we'll need
to be able to generate instructions with "q" accesses rather than "l".
Since this header currently isn't enabled on x86-64, none of that matters,
but we do want to use the xadd version of the semaphores rather than have
to take spinlocks to do a rwsem. The mm->mmap_sem can be heavily contended
when you have lots of threads all taking page faults, and the fallback
rwsem code that uses a spinlock performs abysmally badly in that case.
[ hpa: modified the patch to skip size suffixes entirely when they are
redundant due to register operands. ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <alpine.LFD.2.00.1001121613560.17145@localhost.localdomain>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2010-01-12 17:21:09 -07:00
|
|
|
" mov %0,%1\n\t"
|
2008-03-23 02:03:21 -06:00
|
|
|
"1:\n\t"
|
x86-32: clean up rwsem inline asm statements
This makes gcc use the right register names and instruction operand sizes
automatically for the rwsem inline asm statements.
So instead of using "(%%eax)" to specify the memory address that is the
semaphore, we use "(%1)" or similar. And instead of forcing the operation
to always be 32-bit, we use "%z0", taking the size from the actual
semaphore data structure itself.
This doesn't actually matter on x86-32, but if we want to use the same
inline asm for x86-64, we'll need to have the compiler generate the proper
64-bit names for the registers (%rax instead of %eax), and if we want to
use a 64-bit counter too (in order to avoid the 15-bit limit on the
write counter that limits concurrent users to 32767 threads), we'll need
to be able to generate instructions with "q" accesses rather than "l".
Since this header currently isn't enabled on x86-64, none of that matters,
but we do want to use the xadd version of the semaphores rather than have
to take spinlocks to do a rwsem. The mm->mmap_sem can be heavily contended
when you have lots of threads all taking page faults, and the fallback
rwsem code that uses a spinlock performs abysmally badly in that case.
[ hpa: modified the patch to skip size suffixes entirely when they are
redundant due to register operands. ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <alpine.LFD.2.00.1001121613560.17145@localhost.localdomain>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2010-01-12 17:21:09 -07:00
|
|
|
" mov %1,%2\n\t"
|
|
|
|
" add %3,%2\n\t"
|
2008-03-23 02:03:21 -06:00
|
|
|
" jle 2f\n\t"
|
x86-32: clean up rwsem inline asm statements
This makes gcc use the right register names and instruction operand sizes
automatically for the rwsem inline asm statements.
So instead of using "(%%eax)" to specify the memory address that is the
semaphore, we use "(%1)" or similar. And instead of forcing the operation
to always be 32-bit, we use "%z0", taking the size from the actual
semaphore data structure itself.
This doesn't actually matter on x86-32, but if we want to use the same
inline asm for x86-64, we'll need to have the compiler generate the proper
64-bit names for the registers (%rax instead of %eax), and if we want to
use a 64-bit counter too (in order to avoid the 15-bit limit on the
write counter that limits concurrent users to 32767 threads), we'll need
to be able to generate instructions with "q" accesses rather than "l".
Since this header currently isn't enabled on x86-64, none of that matters,
but we do want to use the xadd version of the semaphores rather than have
to take spinlocks to do a rwsem. The mm->mmap_sem can be heavily contended
when you have lots of threads all taking page faults, and the fallback
rwsem code that uses a spinlock performs abysmally badly in that case.
[ hpa: modified the patch to skip size suffixes entirely when they are
redundant due to register operands. ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <alpine.LFD.2.00.1001121613560.17145@localhost.localdomain>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2010-01-12 17:21:09 -07:00
|
|
|
LOCK_PREFIX " cmpxchg %2,%0\n\t"
|
2008-03-23 02:03:21 -06:00
|
|
|
" jnz 1b\n\t"
|
|
|
|
"2:\n\t"
|
|
|
|
"# ending __down_read_trylock\n\t"
|
|
|
|
: "+m" (sem->count), "=&a" (result), "=&r" (tmp)
|
|
|
|
: "i" (RWSEM_ACTIVE_READ_BIAS)
|
|
|
|
: "memory", "cc");
|
|
|
|
return result >= 0 ? 1 : 0;
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* lock for writing
|
|
|
|
*/
|
2006-07-03 01:24:53 -06:00
|
|
|
static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
2010-01-12 18:57:35 -07:00
|
|
|
rwsem_count_t tmp;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
tmp = RWSEM_ACTIVE_WRITE_BIAS;
|
2008-03-23 02:03:21 -06:00
|
|
|
asm volatile("# beginning down_write\n\t"
|
x86-32: clean up rwsem inline asm statements
This makes gcc use the right register names and instruction operand sizes
automatically for the rwsem inline asm statements.
So instead of using "(%%eax)" to specify the memory address that is the
semaphore, we use "(%1)" or similar. And instead of forcing the operation
to always be 32-bit, we use "%z0", taking the size from the actual
semaphore data structure itself.
This doesn't actually matter on x86-32, but if we want to use the same
inline asm for x86-64, we'll need to have the compiler generate the proper
64-bit names for the registers (%rax instead of %eax), and if we want to
use a 64-bit counter too (in order to avoid the 15-bit limit on the
write counter that limits concurrent users to 32767 threads), we'll need
to be able to generate instructions with "q" accesses rather than "l".
Since this header currently isn't enabled on x86-64, none of that matters,
but we do want to use the xadd version of the semaphores rather than have
to take spinlocks to do a rwsem. The mm->mmap_sem can be heavily contended
when you have lots of threads all taking page faults, and the fallback
rwsem code that uses a spinlock performs abysmally badly in that case.
[ hpa: modified the patch to skip size suffixes entirely when they are
redundant due to register operands. ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <alpine.LFD.2.00.1001121613560.17145@localhost.localdomain>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2010-01-12 17:21:09 -07:00
|
|
|
LOCK_PREFIX " xadd %1,(%2)\n\t"
|
2008-03-23 02:03:21 -06:00
|
|
|
/* subtract 0x0000ffff, returns the old value */
|
x86-32: clean up rwsem inline asm statements
This makes gcc use the right register names and instruction operand sizes
automatically for the rwsem inline asm statements.
So instead of using "(%%eax)" to specify the memory address that is the
semaphore, we use "(%1)" or similar. And instead of forcing the operation
to always be 32-bit, we use "%z0", taking the size from the actual
semaphore data structure itself.
This doesn't actually matter on x86-32, but if we want to use the same
inline asm for x86-64, we'll need to have the compiler generate the proper
64-bit names for the registers (%rax instead of %eax), and if we want to
use a 64-bit counter too (in order to avoid the 15-bit limit on the
write counter that limits concurrent users to 32767 threads), we'll need
to be able to generate instructions with "q" accesses rather than "l".
Since this header currently isn't enabled on x86-64, none of that matters,
but we do want to use the xadd version of the semaphores rather than have
to take spinlocks to do a rwsem. The mm->mmap_sem can be heavily contended
when you have lots of threads all taking page faults, and the fallback
rwsem code that uses a spinlock performs abysmally badly in that case.
[ hpa: modified the patch to skip size suffixes entirely when they are
redundant due to register operands. ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <alpine.LFD.2.00.1001121613560.17145@localhost.localdomain>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2010-01-12 17:21:09 -07:00
|
|
|
" test %1,%1\n\t"
|
2008-03-23 02:03:21 -06:00
|
|
|
/* was the count 0 before? */
|
|
|
|
" jz 1f\n"
|
|
|
|
" call call_rwsem_down_write_failed\n"
|
|
|
|
"1:\n"
|
|
|
|
"# ending down_write"
|
|
|
|
: "+m" (sem->count), "=d" (tmp)
|
|
|
|
: "a" (sem), "1" (tmp)
|
|
|
|
: "memory", "cc");
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
2006-07-03 01:24:53 -06:00
|
|
|
static inline void __down_write(struct rw_semaphore *sem)
|
|
|
|
{
|
|
|
|
__down_write_nested(sem, 0);
|
|
|
|
}
|
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
/*
|
|
|
|
* trylock for writing -- returns 1 if successful, 0 if contention
|
|
|
|
*/
|
|
|
|
static inline int __down_write_trylock(struct rw_semaphore *sem)
|
|
|
|
{
|
2010-01-12 18:57:35 -07:00
|
|
|
rwsem_count_t ret = cmpxchg(&sem->count,
|
|
|
|
RWSEM_UNLOCKED_VALUE,
|
|
|
|
RWSEM_ACTIVE_WRITE_BIAS);
|
2005-04-16 16:20:36 -06:00
|
|
|
if (ret == RWSEM_UNLOCKED_VALUE)
|
|
|
|
return 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* unlock after reading
|
|
|
|
*/
|
|
|
|
static inline void __up_read(struct rw_semaphore *sem)
|
|
|
|
{
|
2010-01-12 18:57:35 -07:00
|
|
|
rwsem_count_t tmp = -RWSEM_ACTIVE_READ_BIAS;
|
2008-03-23 02:03:21 -06:00
|
|
|
asm volatile("# beginning __up_read\n\t"
|
x86-32: clean up rwsem inline asm statements
This makes gcc use the right register names and instruction operand sizes
automatically for the rwsem inline asm statements.
So instead of using "(%%eax)" to specify the memory address that is the
semaphore, we use "(%1)" or similar. And instead of forcing the operation
to always be 32-bit, we use "%z0", taking the size from the actual
semaphore data structure itself.
This doesn't actually matter on x86-32, but if we want to use the same
inline asm for x86-64, we'll need to have the compiler generate the proper
64-bit names for the registers (%rax instead of %eax), and if we want to
use a 64-bit counter too (in order to avoid the 15-bit limit on the
write counter that limits concurrent users to 32767 threads), we'll need
to be able to generate instructions with "q" accesses rather than "l".
Since this header currently isn't enabled on x86-64, none of that matters,
but we do want to use the xadd version of the semaphores rather than have
to take spinlocks to do a rwsem. The mm->mmap_sem can be heavily contended
when you have lots of threads all taking page faults, and the fallback
rwsem code that uses a spinlock performs abysmally badly in that case.
[ hpa: modified the patch to skip size suffixes entirely when they are
redundant due to register operands. ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <alpine.LFD.2.00.1001121613560.17145@localhost.localdomain>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2010-01-12 17:21:09 -07:00
|
|
|
LOCK_PREFIX " xadd %1,(%2)\n\t"
|
2008-03-23 02:03:21 -06:00
|
|
|
/* subtracts 1, returns the old value */
|
|
|
|
" jns 1f\n\t"
|
|
|
|
" call call_rwsem_wake\n"
|
|
|
|
"1:\n"
|
|
|
|
"# ending __up_read\n"
|
|
|
|
: "+m" (sem->count), "=d" (tmp)
|
|
|
|
: "a" (sem), "1" (tmp)
|
|
|
|
: "memory", "cc");
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* unlock after writing
|
|
|
|
*/
|
|
|
|
static inline void __up_write(struct rw_semaphore *sem)
|
|
|
|
{
|
2010-01-12 18:57:35 -07:00
|
|
|
rwsem_count_t tmp;
|
2008-03-23 02:03:21 -06:00
|
|
|
asm volatile("# beginning __up_write\n\t"
|
x86-32: clean up rwsem inline asm statements
This makes gcc use the right register names and instruction operand sizes
automatically for the rwsem inline asm statements.
So instead of using "(%%eax)" to specify the memory address that is the
semaphore, we use "(%1)" or similar. And instead of forcing the operation
to always be 32-bit, we use "%z0", taking the size from the actual
semaphore data structure itself.
This doesn't actually matter on x86-32, but if we want to use the same
inline asm for x86-64, we'll need to have the compiler generate the proper
64-bit names for the registers (%rax instead of %eax), and if we want to
use a 64-bit counter too (in order to avoid the 15-bit limit on the
write counter that limits concurrent users to 32767 threads), we'll need
to be able to generate instructions with "q" accesses rather than "l".
Since this header currently isn't enabled on x86-64, none of that matters,
but we do want to use the xadd version of the semaphores rather than have
to take spinlocks to do a rwsem. The mm->mmap_sem can be heavily contended
when you have lots of threads all taking page faults, and the fallback
rwsem code that uses a spinlock performs abysmally badly in that case.
[ hpa: modified the patch to skip size suffixes entirely when they are
redundant due to register operands. ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <alpine.LFD.2.00.1001121613560.17145@localhost.localdomain>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2010-01-12 17:21:09 -07:00
|
|
|
LOCK_PREFIX " xadd %1,(%2)\n\t"
|
2008-03-23 02:03:21 -06:00
|
|
|
/* tries to transition
|
|
|
|
0xffff0001 -> 0x00000000 */
|
|
|
|
" jz 1f\n"
|
|
|
|
" call call_rwsem_wake\n"
|
|
|
|
"1:\n\t"
|
|
|
|
"# ending __up_write\n"
|
x86-32: clean up rwsem inline asm statements
This makes gcc use the right register names and instruction operand sizes
automatically for the rwsem inline asm statements.
So instead of using "(%%eax)" to specify the memory address that is the
semaphore, we use "(%1)" or similar. And instead of forcing the operation
to always be 32-bit, we use "%z0", taking the size from the actual
semaphore data structure itself.
This doesn't actually matter on x86-32, but if we want to use the same
inline asm for x86-64, we'll need to have the compiler generate the proper
64-bit names for the registers (%rax instead of %eax), and if we want to
use a 64-bit counter too (in order to avoid the 15-bit limit on the
write counter that limits concurrent users to 32767 threads), we'll need
to be able to generate instructions with "q" accesses rather than "l".
Since this header currently isn't enabled on x86-64, none of that matters,
but we do want to use the xadd version of the semaphores rather than have
to take spinlocks to do a rwsem. The mm->mmap_sem can be heavily contended
when you have lots of threads all taking page faults, and the fallback
rwsem code that uses a spinlock performs abysmally badly in that case.
[ hpa: modified the patch to skip size suffixes entirely when they are
redundant due to register operands. ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <alpine.LFD.2.00.1001121613560.17145@localhost.localdomain>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2010-01-12 17:21:09 -07:00
|
|
|
: "+m" (sem->count), "=d" (tmp)
|
|
|
|
: "a" (sem), "1" (-RWSEM_ACTIVE_WRITE_BIAS)
|
|
|
|
: "memory", "cc");
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* downgrade write lock to read lock
|
|
|
|
*/
|
|
|
|
static inline void __downgrade_write(struct rw_semaphore *sem)
|
|
|
|
{
|
2008-03-23 02:03:21 -06:00
|
|
|
asm volatile("# beginning __downgrade_write\n\t"
|
x86-32: clean up rwsem inline asm statements
This makes gcc use the right register names and instruction operand sizes
automatically for the rwsem inline asm statements.
So instead of using "(%%eax)" to specify the memory address that is the
semaphore, we use "(%1)" or similar. And instead of forcing the operation
to always be 32-bit, we use "%z0", taking the size from the actual
semaphore data structure itself.
This doesn't actually matter on x86-32, but if we want to use the same
inline asm for x86-64, we'll need to have the compiler generate the proper
64-bit names for the registers (%rax instead of %eax), and if we want to
use a 64-bit counter too (in order to avoid the 15-bit limit on the
write counter that limits concurrent users to 32767 threads), we'll need
to be able to generate instructions with "q" accesses rather than "l".
Since this header currently isn't enabled on x86-64, none of that matters,
but we do want to use the xadd version of the semaphores rather than have
to take spinlocks to do a rwsem. The mm->mmap_sem can be heavily contended
when you have lots of threads all taking page faults, and the fallback
rwsem code that uses a spinlock performs abysmally badly in that case.
[ hpa: modified the patch to skip size suffixes entirely when they are
redundant due to register operands. ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <alpine.LFD.2.00.1001121613560.17145@localhost.localdomain>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2010-01-12 17:21:09 -07:00
|
|
|
LOCK_PREFIX " add%z0 %2,(%1)\n\t"
|
2008-03-23 02:03:21 -06:00
|
|
|
/* transitions 0xZZZZ0001 -> 0xYYYY0001 */
|
|
|
|
" jns 1f\n\t"
|
|
|
|
" call call_rwsem_downgrade_wake\n"
|
|
|
|
"1:\n\t"
|
|
|
|
"# ending __downgrade_write\n"
|
|
|
|
: "+m" (sem->count)
|
|
|
|
: "a" (sem), "i" (-RWSEM_WAITING_BIAS)
|
|
|
|
: "memory", "cc");
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* implement atomic add functionality
|
|
|
|
*/
|
|
|
|
static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
|
|
|
|
{
|
x86-32: clean up rwsem inline asm statements
This makes gcc use the right register names and instruction operand sizes
automatically for the rwsem inline asm statements.
So instead of using "(%%eax)" to specify the memory address that is the
semaphore, we use "(%1)" or similar. And instead of forcing the operation
to always be 32-bit, we use "%z0", taking the size from the actual
semaphore data structure itself.
This doesn't actually matter on x86-32, but if we want to use the same
inline asm for x86-64, we'll need to have the compiler generate the proper
64-bit names for the registers (%rax instead of %eax), and if we want to
use a 64-bit counter too (in order to avoid the 15-bit limit on the
write counter that limits concurrent users to 32767 threads), we'll need
to be able to generate instructions with "q" accesses rather than "l".
Since this header currently isn't enabled on x86-64, none of that matters,
but we do want to use the xadd version of the semaphores rather than have
to take spinlocks to do a rwsem. The mm->mmap_sem can be heavily contended
when you have lots of threads all taking page faults, and the fallback
rwsem code that uses a spinlock performs abysmally badly in that case.
[ hpa: modified the patch to skip size suffixes entirely when they are
redundant due to register operands. ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <alpine.LFD.2.00.1001121613560.17145@localhost.localdomain>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2010-01-12 17:21:09 -07:00
|
|
|
asm volatile(LOCK_PREFIX "add%z0 %1,%0"
|
2008-03-23 02:03:21 -06:00
|
|
|
: "+m" (sem->count)
|
|
|
|
: "ir" (delta));
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* implement exchange and add functionality
|
|
|
|
*/
|
2010-01-12 18:57:35 -07:00
|
|
|
static inline rwsem_count_t rwsem_atomic_update(int delta, struct rw_semaphore *sem)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
2010-01-12 18:57:35 -07:00
|
|
|
rwsem_count_t tmp = delta;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2008-03-23 02:03:21 -06:00
|
|
|
asm volatile(LOCK_PREFIX "xadd %0,%1"
|
|
|
|
: "+r" (tmp), "+m" (sem->count)
|
|
|
|
: : "memory");
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2008-03-23 02:03:21 -06:00
|
|
|
return tmp + delta;
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
2005-10-29 19:15:44 -06:00
|
|
|
static inline int rwsem_is_locked(struct rw_semaphore *sem)
|
|
|
|
{
|
|
|
|
return (sem->count != 0);
|
|
|
|
}
|
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
#endif /* __KERNEL__ */
|
2008-10-22 23:26:29 -06:00
|
|
|
#endif /* _ASM_X86_RWSEM_H */
|