x86, mem: memmove_64.S: Optimize memmove by enhanced REP MOVSB/STOSB
Support memmove() by enhanced rep movsb. On processors supporting enhanced REP MOVSB/STOSB, the alternative memmove() function using enhanced rep movsb overrides the original function. The patch doesn't change the backward memmove case to use enhanced rep movsb. Signed-off-by: Fenghua Yu <fenghua.yu@intel.com> Link: http://lkml.kernel.org/r/1305671358-14478-9-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
This commit is contained in:
parent
101068c1f4
commit
057e05c1d6
1 changed files with 28 additions and 1 deletions
|
@ -8,6 +8,7 @@
|
||||||
#define _STRING_C
|
#define _STRING_C
|
||||||
#include <linux/linkage.h>
|
#include <linux/linkage.h>
|
||||||
#include <asm/dwarf2.h>
|
#include <asm/dwarf2.h>
|
||||||
|
#include <asm/cpufeature.h>
|
||||||
|
|
||||||
#undef memmove
|
#undef memmove
|
||||||
|
|
||||||
|
@ -24,6 +25,7 @@
|
||||||
*/
|
*/
|
||||||
ENTRY(memmove)
|
ENTRY(memmove)
|
||||||
CFI_STARTPROC
|
CFI_STARTPROC
|
||||||
|
|
||||||
/* Handle more 32bytes in loop */
|
/* Handle more 32bytes in loop */
|
||||||
mov %rdi, %rax
|
mov %rdi, %rax
|
||||||
cmp $0x20, %rdx
|
cmp $0x20, %rdx
|
||||||
|
@ -31,8 +33,13 @@ ENTRY(memmove)
|
||||||
|
|
||||||
/* Decide forward/backward copy mode */
|
/* Decide forward/backward copy mode */
|
||||||
cmp %rdi, %rsi
|
cmp %rdi, %rsi
|
||||||
jb 2f
|
jge .Lmemmove_begin_forward
|
||||||
|
mov %rsi, %r8
|
||||||
|
add %rdx, %r8
|
||||||
|
cmp %rdi, %r8
|
||||||
|
jg 2f
|
||||||
|
|
||||||
|
.Lmemmove_begin_forward:
|
||||||
/*
|
/*
|
||||||
* movsq instruction have many startup latency
|
* movsq instruction have many startup latency
|
||||||
* so we handle small size by general register.
|
* so we handle small size by general register.
|
||||||
|
@ -78,6 +85,8 @@ ENTRY(memmove)
|
||||||
rep movsq
|
rep movsq
|
||||||
movq %r11, (%r10)
|
movq %r11, (%r10)
|
||||||
jmp 13f
|
jmp 13f
|
||||||
|
.Lmemmove_end_forward:
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Handle data backward by movsq.
|
* Handle data backward by movsq.
|
||||||
*/
|
*/
|
||||||
|
@ -194,4 +203,22 @@ ENTRY(memmove)
|
||||||
13:
|
13:
|
||||||
retq
|
retq
|
||||||
CFI_ENDPROC
|
CFI_ENDPROC
|
||||||
|
|
||||||
|
.section .altinstr_replacement,"ax"
|
||||||
|
.Lmemmove_begin_forward_efs:
|
||||||
|
/* Forward moving data. */
|
||||||
|
movq %rdx, %rcx
|
||||||
|
rep movsb
|
||||||
|
retq
|
||||||
|
.Lmemmove_end_forward_efs:
|
||||||
|
.previous
|
||||||
|
|
||||||
|
.section .altinstructions,"a"
|
||||||
|
.align 8
|
||||||
|
.quad .Lmemmove_begin_forward
|
||||||
|
.quad .Lmemmove_begin_forward_efs
|
||||||
|
.word X86_FEATURE_ERMS
|
||||||
|
.byte .Lmemmove_end_forward-.Lmemmove_begin_forward
|
||||||
|
.byte .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
|
||||||
|
.previous
|
||||||
ENDPROC(memmove)
|
ENDPROC(memmove)
|
||||||
|
|
Loading…
Reference in a new issue