kernel-fxtec-pro1x/arch/x86/lib/atomic64_cx8_32.S

/*
 * atomic64_t for 586+
 *
 * Copyright © 2010  Luca Barbieri
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 */

#include <linux/linkage.h>
#include <asm/alternative-asm.h>
#include <asm/dwarf2.h>

.macro SAVE reg
	pushl %\reg
	CFI_ADJUST_CFA_OFFSET 4
	CFI_REL_OFFSET \reg, 0
.endm

.macro RESTORE reg
	popl %\reg
	CFI_ADJUST_CFA_OFFSET -4
	CFI_RESTORE \reg
.endm

.macro read64 reg
	movl %ebx, %eax
	movl %ecx, %edx
/* we need LOCK_PREFIX since otherwise cmpxchg8b always does the write */
	LOCK_PREFIX
	cmpxchg8b (\reg)
.endm

ENTRY(atomic64_read_cx8)
	CFI_STARTPROC

	read64 %ecx
	ret
	CFI_ENDPROC
ENDPROC(atomic64_read_cx8)

ENTRY(atomic64_set_cx8)
	CFI_STARTPROC

1:
/* we don't need LOCK_PREFIX since aligned 64-bit writes
 * are atomic on 586 and newer */
	cmpxchg8b (%esi)
	jne 1b

	ret
	CFI_ENDPROC
ENDPROC(atomic64_set_cx8)

ENTRY(atomic64_xchg_cx8)
	CFI_STARTPROC

	movl %ebx, %eax
	movl %ecx, %edx
1:
	LOCK_PREFIX
	cmpxchg8b (%esi)
	jne 1b

	ret
	CFI_ENDPROC
ENDPROC(atomic64_xchg_cx8)

.macro addsub_return func ins insc
ENTRY(atomic64_\func\()_return_cx8)
	CFI_STARTPROC
	SAVE ebp
	SAVE ebx
	SAVE esi
	SAVE edi

	movl %eax, %esi
	movl %edx, %edi
	movl %ecx, %ebp

	read64 %ebp
1:
	movl %eax, %ebx
	movl %edx, %ecx
	\ins\()l %esi, %ebx
	\insc\()l %edi, %ecx
	LOCK_PREFIX
	cmpxchg8b (%ebp)
	jne 1b

10:
	movl %ebx, %eax
	movl %ecx, %edx
	RESTORE edi
	RESTORE esi
	RESTORE ebx
	RESTORE ebp
	ret
	CFI_ENDPROC
ENDPROC(atomic64_\func\()_return_cx8)
.endm

addsub_return add add adc
addsub_return sub sub sbb

.macro incdec_return func ins insc
ENTRY(atomic64_\func\()_return_cx8)
	CFI_STARTPROC
	SAVE ebx

	read64 %esi
1:
	movl %eax, %ebx
	movl %edx, %ecx
	\ins\()l $1, %ebx
	\insc\()l $0, %ecx
	LOCK_PREFIX
	cmpxchg8b (%esi)
	jne 1b

10:
	movl %ebx, %eax
	movl %ecx, %edx
	RESTORE ebx
	ret
	CFI_ENDPROC
ENDPROC(atomic64_\func\()_return_cx8)
.endm

incdec_return inc add adc
incdec_return dec sub sbb

ENTRY(atomic64_dec_if_positive_cx8)
	CFI_STARTPROC
	SAVE ebx

	read64 %esi
1:
	movl %eax, %ebx
	movl %edx, %ecx
	subl $1, %ebx
	sbb $0, %ecx
	js 2f
	LOCK_PREFIX
	cmpxchg8b (%esi)
	jne 1b

2:
	movl %ebx, %eax
	movl %ecx, %edx
	RESTORE ebx
	ret
	CFI_ENDPROC
ENDPROC(atomic64_dec_if_positive_cx8)

ENTRY(atomic64_add_unless_cx8)
	CFI_STARTPROC
	SAVE ebp
	SAVE ebx
/* these just push these two parameters on the stack */
	SAVE edi
	SAVE esi

	movl %ecx, %ebp
	movl %eax, %esi
	movl %edx, %edi

	read64 %ebp
1:
	cmpl %eax, 0(%esp)
	je 4f
2:
	movl %eax, %ebx
	movl %edx, %ecx
	addl %esi, %ebx
	adcl %edi, %ecx
	LOCK_PREFIX
	cmpxchg8b (%ebp)
	jne 1b

	movl $1, %eax
3:
	addl $8, %esp
	CFI_ADJUST_CFA_OFFSET -8
	RESTORE ebx
	RESTORE ebp
	ret
4:
	cmpl %edx, 4(%esp)
	jne 2b
	xorl %eax, %eax
	jmp 3b
	CFI_ENDPROC
ENDPROC(atomic64_add_unless_cx8)

ENTRY(atomic64_inc_not_zero_cx8)
	CFI_STARTPROC
	SAVE ebx

	read64 %esi
1:
	testl %eax, %eax
	je 4f
2:
	movl %eax, %ebx
	movl %edx, %ecx
	addl $1, %ebx
	adcl $0, %ecx
	LOCK_PREFIX
	cmpxchg8b (%esi)
	jne 1b

	movl $1, %eax
3:
	RESTORE ebx
	ret
4:
	testl %edx, %edx
	jne 2b
	jmp 3b
	CFI_ENDPROC
ENDPROC(atomic64_inc_not_zero_cx8)
x86-32: Rewrite 32-bit atomic64 functions in assembly This patch replaces atomic64_32.c with two assembly implementations, one for 386/486 machines using pushf/cli/popf and one for 586+ machines using cmpxchg8b. The cmpxchg8b implementation provides the following advantages over the current one: 1. Implements atomic64_add_unless, atomic64_dec_if_positive and atomic64_inc_not_zero 2. Uses the ZF flag changed by cmpxchg8b instead of doing a comparison 3. Uses custom register calling conventions that reduce or eliminate register moves to suit cmpxchg8b 4. Reads the initial value instead of using cmpxchg8b to do that. Currently we use lock xaddl and movl, which seems the fastest. 5. Does not use the lock prefix for atomic64_set 64-bit writes are already atomic, so we don't need that. We still need it for atomic64_read to avoid restoring a value changed in the meantime. 6. Allocates registers as well or better than gcc The 386 implementation provides support for 386 and 486 machines. 386/486 SMP is not supported (we dropped it), but such support can be added easily if desired. A pure assembly implementation is required due to the custom calling conventions, and desire to use %ebp in atomic64_add_return (we need 7 registers...), as well as the ability to use pushf/popf in the 386 code without an intermediate pop/push. The parameter names are changed to match the convention in atomic_64.h Changes in v3 (due to rebasing to tip/x86/asm): - Patches atomic64_32.h instead of atomic_32.h - Uses the CALL alternative mechanism from commit 1b1d9258181bae199dc940f4bd0298126b9a73d9 Changes in v2: - Merged 386 and cx8 support in the same patch - 386 support now done in assembly, C code no longer used at all - cmpxchg64 is used for atomic64_cmpxchg - stop using macros, use one-line inline functions instead - miscellanous changes and improvements Signed-off-by: Luca Barbieri <luca@luca-barbieri.com> LKML-Reference: <1267005265-27958-5-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com> 2010-02-24 02:54:25 -07:00			`/*`
			`* atomic64_t for 586+`
			`*`
			`* Copyright © 2010 Luca Barbieri`
			`*`
			`* This program is free software; you can redistribute it and/or modify`
			`* it under the terms of the GNU General Public License as published by`
			`* the Free Software Foundation; either version 2 of the License, or`
			`* (at your option) any later version.`
			`*/`

			`#include <linux/linkage.h>`
			`#include <asm/alternative-asm.h>`
			`#include <asm/dwarf2.h>`

			`.macro SAVE reg`
			`pushl %\reg`
			`CFI_ADJUST_CFA_OFFSET 4`
			`CFI_REL_OFFSET \reg, 0`
			`.endm`

			`.macro RESTORE reg`
			`popl %\reg`
			`CFI_ADJUST_CFA_OFFSET -4`
			`CFI_RESTORE \reg`
			`.endm`

			`.macro read64 reg`
			`movl %ebx, %eax`
			`movl %ecx, %edx`
			`/* we need LOCK_PREFIX since otherwise cmpxchg8b always does the write */`
			`LOCK_PREFIX`
			`cmpxchg8b (\reg)`
			`.endm`

			`ENTRY(atomic64_read_cx8)`
			`CFI_STARTPROC`

			`read64 %ecx`
			`ret`
			`CFI_ENDPROC`
			`ENDPROC(atomic64_read_cx8)`

			`ENTRY(atomic64_set_cx8)`
			`CFI_STARTPROC`

			`1:`
			`/* we don't need LOCK_PREFIX since aligned 64-bit writes`
			`* are atomic on 586 and newer */`
			`cmpxchg8b (%esi)`
			`jne 1b`

			`ret`
			`CFI_ENDPROC`
			`ENDPROC(atomic64_set_cx8)`

			`ENTRY(atomic64_xchg_cx8)`
			`CFI_STARTPROC`

			`movl %ebx, %eax`
			`movl %ecx, %edx`
			`1:`
			`LOCK_PREFIX`
			`cmpxchg8b (%esi)`
			`jne 1b`

			`ret`
			`CFI_ENDPROC`
			`ENDPROC(atomic64_xchg_cx8)`

			`.macro addsub_return func ins insc`
			`ENTRY(atomic64_\func\()_return_cx8)`
			`CFI_STARTPROC`
			`SAVE ebp`
			`SAVE ebx`
			`SAVE esi`
			`SAVE edi`

			`movl %eax, %esi`
			`movl %edx, %edi`
			`movl %ecx, %ebp`

			`read64 %ebp`
			`1:`
			`movl %eax, %ebx`
			`movl %edx, %ecx`
			`\ins\()l %esi, %ebx`
			`\insc\()l %edi, %ecx`
			`LOCK_PREFIX`
			`cmpxchg8b (%ebp)`
			`jne 1b`

			`10:`
			`movl %ebx, %eax`
			`movl %ecx, %edx`
			`RESTORE edi`
			`RESTORE esi`
			`RESTORE ebx`
			`RESTORE ebp`
			`ret`
			`CFI_ENDPROC`
			`ENDPROC(atomic64_\func\()_return_cx8)`
			`.endm`

			`addsub_return add add adc`
			`addsub_return sub sub sbb`

			`.macro incdec_return func ins insc`
			`ENTRY(atomic64_\func\()_return_cx8)`
			`CFI_STARTPROC`
			`SAVE ebx`

			`read64 %esi`
			`1:`
			`movl %eax, %ebx`
			`movl %edx, %ecx`
			`\ins\()l $1, %ebx`
			`\insc\()l $0, %ecx`
			`LOCK_PREFIX`
			`cmpxchg8b (%esi)`
			`jne 1b`

			`10:`
			`movl %ebx, %eax`
			`movl %ecx, %edx`
			`RESTORE ebx`
			`ret`
			`CFI_ENDPROC`
			`ENDPROC(atomic64_\func\()_return_cx8)`
			`.endm`

			`incdec_return inc add adc`
			`incdec_return dec sub sbb`

			`ENTRY(atomic64_dec_if_positive_cx8)`
			`CFI_STARTPROC`
			`SAVE ebx`

			`read64 %esi`
			`1:`
			`movl %eax, %ebx`
			`movl %edx, %ecx`
			`subl $1, %ebx`
			`sbb $0, %ecx`
			`js 2f`
			`LOCK_PREFIX`
			`cmpxchg8b (%esi)`
			`jne 1b`

			`2:`
			`movl %ebx, %eax`
			`movl %ecx, %edx`
			`RESTORE ebx`
			`ret`
			`CFI_ENDPROC`
			`ENDPROC(atomic64_dec_if_positive_cx8)`

			`ENTRY(atomic64_add_unless_cx8)`
			`CFI_STARTPROC`
			`SAVE ebp`
			`SAVE ebx`
			`/* these just push these two parameters on the stack */`
			`SAVE edi`
			`SAVE esi`

			`movl %ecx, %ebp`
			`movl %eax, %esi`
			`movl %edx, %edi`

			`read64 %ebp`
			`1:`
			`cmpl %eax, 0(%esp)`
			`je 4f`
			`2:`
			`movl %eax, %ebx`
			`movl %edx, %ecx`
			`addl %esi, %ebx`
			`adcl %edi, %ecx`
			`LOCK_PREFIX`
			`cmpxchg8b (%ebp)`
			`jne 1b`

x86-32: Fix atomic64_add_unless return value convention atomic64_add_unless must return 1 if it perfomed the add and 0 otherwise. The implementation did the opposite thing. Reported-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Luca Barbieri <luca@luca-barbieri.com> LKML-Reference: <1267469749-11878-3-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com> 2010-03-01 11:55:46 -07:00			`movl $1, %eax`
x86-32: Rewrite 32-bit atomic64 functions in assembly This patch replaces atomic64_32.c with two assembly implementations, one for 386/486 machines using pushf/cli/popf and one for 586+ machines using cmpxchg8b. The cmpxchg8b implementation provides the following advantages over the current one: 1. Implements atomic64_add_unless, atomic64_dec_if_positive and atomic64_inc_not_zero 2. Uses the ZF flag changed by cmpxchg8b instead of doing a comparison 3. Uses custom register calling conventions that reduce or eliminate register moves to suit cmpxchg8b 4. Reads the initial value instead of using cmpxchg8b to do that. Currently we use lock xaddl and movl, which seems the fastest. 5. Does not use the lock prefix for atomic64_set 64-bit writes are already atomic, so we don't need that. We still need it for atomic64_read to avoid restoring a value changed in the meantime. 6. Allocates registers as well or better than gcc The 386 implementation provides support for 386 and 486 machines. 386/486 SMP is not supported (we dropped it), but such support can be added easily if desired. A pure assembly implementation is required due to the custom calling conventions, and desire to use %ebp in atomic64_add_return (we need 7 registers...), as well as the ability to use pushf/popf in the 386 code without an intermediate pop/push. The parameter names are changed to match the convention in atomic_64.h Changes in v3 (due to rebasing to tip/x86/asm): - Patches atomic64_32.h instead of atomic_32.h - Uses the CALL alternative mechanism from commit 1b1d9258181bae199dc940f4bd0298126b9a73d9 Changes in v2: - Merged 386 and cx8 support in the same patch - 386 support now done in assembly, C code no longer used at all - cmpxchg64 is used for atomic64_cmpxchg - stop using macros, use one-line inline functions instead - miscellanous changes and improvements Signed-off-by: Luca Barbieri <luca@luca-barbieri.com> LKML-Reference: <1267005265-27958-5-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com> 2010-02-24 02:54:25 -07:00			`3:`
			`addl $8, %esp`
			`CFI_ADJUST_CFA_OFFSET -8`
			`RESTORE ebx`
			`RESTORE ebp`
			`ret`
			`4:`
			`cmpl %edx, 4(%esp)`
			`jne 2b`
x86-32: Fix atomic64_add_unless return value convention atomic64_add_unless must return 1 if it perfomed the add and 0 otherwise. The implementation did the opposite thing. Reported-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Luca Barbieri <luca@luca-barbieri.com> LKML-Reference: <1267469749-11878-3-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com> 2010-03-01 11:55:46 -07:00			`xorl %eax, %eax`
x86-32: Rewrite 32-bit atomic64 functions in assembly This patch replaces atomic64_32.c with two assembly implementations, one for 386/486 machines using pushf/cli/popf and one for 586+ machines using cmpxchg8b. The cmpxchg8b implementation provides the following advantages over the current one: 1. Implements atomic64_add_unless, atomic64_dec_if_positive and atomic64_inc_not_zero 2. Uses the ZF flag changed by cmpxchg8b instead of doing a comparison 3. Uses custom register calling conventions that reduce or eliminate register moves to suit cmpxchg8b 4. Reads the initial value instead of using cmpxchg8b to do that. Currently we use lock xaddl and movl, which seems the fastest. 5. Does not use the lock prefix for atomic64_set 64-bit writes are already atomic, so we don't need that. We still need it for atomic64_read to avoid restoring a value changed in the meantime. 6. Allocates registers as well or better than gcc The 386 implementation provides support for 386 and 486 machines. 386/486 SMP is not supported (we dropped it), but such support can be added easily if desired. A pure assembly implementation is required due to the custom calling conventions, and desire to use %ebp in atomic64_add_return (we need 7 registers...), as well as the ability to use pushf/popf in the 386 code without an intermediate pop/push. The parameter names are changed to match the convention in atomic_64.h Changes in v3 (due to rebasing to tip/x86/asm): - Patches atomic64_32.h instead of atomic_32.h - Uses the CALL alternative mechanism from commit 1b1d9258181bae199dc940f4bd0298126b9a73d9 Changes in v2: - Merged 386 and cx8 support in the same patch - 386 support now done in assembly, C code no longer used at all - cmpxchg64 is used for atomic64_cmpxchg - stop using macros, use one-line inline functions instead - miscellanous changes and improvements Signed-off-by: Luca Barbieri <luca@luca-barbieri.com> LKML-Reference: <1267005265-27958-5-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com> 2010-02-24 02:54:25 -07:00			`jmp 3b`
			`CFI_ENDPROC`
			`ENDPROC(atomic64_add_unless_cx8)`

			`ENTRY(atomic64_inc_not_zero_cx8)`
			`CFI_STARTPROC`
			`SAVE ebx`

			`read64 %esi`
			`1:`
			`testl %eax, %eax`
			`je 4f`
			`2:`
			`movl %eax, %ebx`
			`movl %edx, %ecx`
			`addl $1, %ebx`
			`adcl $0, %ecx`
			`LOCK_PREFIX`
			`cmpxchg8b (%esi)`
			`jne 1b`

x86-32: Fix atomic64_inc_not_zero return value convention atomic64_inc_not_zero must return 1 if it perfomed the add and 0 otherwise. It was doing the opposite thing. Signed-off-by: Luca Barbieri <luca@luca-barbieri.com> LKML-Reference: <1267469749-11878-6-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com> 2010-03-01 11:55:49 -07:00			`movl $1, %eax`
x86-32: Rewrite 32-bit atomic64 functions in assembly This patch replaces atomic64_32.c with two assembly implementations, one for 386/486 machines using pushf/cli/popf and one for 586+ machines using cmpxchg8b. The cmpxchg8b implementation provides the following advantages over the current one: 1. Implements atomic64_add_unless, atomic64_dec_if_positive and atomic64_inc_not_zero 2. Uses the ZF flag changed by cmpxchg8b instead of doing a comparison 3. Uses custom register calling conventions that reduce or eliminate register moves to suit cmpxchg8b 4. Reads the initial value instead of using cmpxchg8b to do that. Currently we use lock xaddl and movl, which seems the fastest. 5. Does not use the lock prefix for atomic64_set 64-bit writes are already atomic, so we don't need that. We still need it for atomic64_read to avoid restoring a value changed in the meantime. 6. Allocates registers as well or better than gcc The 386 implementation provides support for 386 and 486 machines. 386/486 SMP is not supported (we dropped it), but such support can be added easily if desired. A pure assembly implementation is required due to the custom calling conventions, and desire to use %ebp in atomic64_add_return (we need 7 registers...), as well as the ability to use pushf/popf in the 386 code without an intermediate pop/push. The parameter names are changed to match the convention in atomic_64.h Changes in v3 (due to rebasing to tip/x86/asm): - Patches atomic64_32.h instead of atomic_32.h - Uses the CALL alternative mechanism from commit 1b1d9258181bae199dc940f4bd0298126b9a73d9 Changes in v2: - Merged 386 and cx8 support in the same patch - 386 support now done in assembly, C code no longer used at all - cmpxchg64 is used for atomic64_cmpxchg - stop using macros, use one-line inline functions instead - miscellanous changes and improvements Signed-off-by: Luca Barbieri <luca@luca-barbieri.com> LKML-Reference: <1267005265-27958-5-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com> 2010-02-24 02:54:25 -07:00			`3:`
			`RESTORE ebx`
			`ret`
			`4:`
			`testl %edx, %edx`
			`jne 2b`
			`jmp 3b`
			`CFI_ENDPROC`
			`ENDPROC(atomic64_inc_not_zero_cx8)`