From a5b250a428aabc619ace872f8220a7d0b8f7d557 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 5 Nov 2005 17:25:54 +0100 Subject: [PATCH] x86_64: Remove optimization for B stepping AMD K8 B stepping were the first shipping Opterons. memcpy/memset/copy_page/ clear_page had special optimized version for them. These are really old and in the minority now and the difference to the generic versions (using rep microcode) is not that big anyways. So just remove them. TODO: figure out optimized versions for Intel Netburst based EM64T Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/lib/copy_page.S | 87 --------------------------------------------- 1 file changed, 87 deletions(-) (limited to 'arch/x86_64/lib/copy_page.S') diff --git a/arch/x86_64/lib/copy_page.S b/arch/x86_64/lib/copy_page.S index dd3aa47b6bf..621a1976940 100644 --- a/arch/x86_64/lib/copy_page.S +++ b/arch/x86_64/lib/copy_page.S @@ -8,94 +8,7 @@ .globl copy_page .p2align 4 copy_page: - subq $3*8,%rsp - movq %rbx,(%rsp) - movq %r12,1*8(%rsp) - movq %r13,2*8(%rsp) - - movl $(4096/64)-5,%ecx - .p2align 4 -.Loop64: - dec %rcx - - movq (%rsi), %rax - movq 8 (%rsi), %rbx - movq 16 (%rsi), %rdx - movq 24 (%rsi), %r8 - movq 32 (%rsi), %r9 - movq 40 (%rsi), %r10 - movq 48 (%rsi), %r11 - movq 56 (%rsi), %r12 - - prefetcht0 5*64(%rsi) - - movq %rax, (%rdi) - movq %rbx, 8 (%rdi) - movq %rdx, 16 (%rdi) - movq %r8, 24 (%rdi) - movq %r9, 32 (%rdi) - movq %r10, 40 (%rdi) - movq %r11, 48 (%rdi) - movq %r12, 56 (%rdi) - - leaq 64 (%rsi), %rsi - leaq 64 (%rdi), %rdi - - jnz .Loop64 - - movl $5,%ecx - .p2align 4 -.Loop2: - decl %ecx - - movq (%rsi), %rax - movq 8 (%rsi), %rbx - movq 16 (%rsi), %rdx - movq 24 (%rsi), %r8 - movq 32 (%rsi), %r9 - movq 40 (%rsi), %r10 - movq 48 (%rsi), %r11 - movq 56 (%rsi), %r12 - - movq %rax, (%rdi) - movq %rbx, 8 (%rdi) - movq %rdx, 16 (%rdi) - movq %r8, 24 (%rdi) - movq %r9, 32 (%rdi) - movq %r10, 40 (%rdi) - movq %r11, 48 (%rdi) - movq %r12, 56 (%rdi) - - leaq 64(%rdi),%rdi - leaq 64(%rsi),%rsi - - jnz .Loop2 - - movq (%rsp),%rbx - movq 1*8(%rsp),%r12 - movq 2*8(%rsp),%r13 - addq $3*8,%rsp - ret - - /* C stepping K8 run faster using the string copy instructions. - It is also a lot simpler. Use this when possible */ - -#include - - .section .altinstructions,"a" - .align 8 - .quad copy_page - .quad copy_page_c - .byte X86_FEATURE_K8_C - .byte copy_page_c_end-copy_page_c - .byte copy_page_c_end-copy_page_c - .previous - - .section .altinstr_replacement,"ax" -copy_page_c: movl $4096/8,%ecx rep movsq ret -copy_page_c_end: - .previous -- cgit