Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ |
Dave Jones | 038b0a6 | 2006-10-04 03:38:54 -0400 | [diff] [blame] | 2 | |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 3 | #include <linux/linkage.h> |
Borislav Petkov | cd4d09e | 2016-01-26 22:12:04 +0100 | [diff] [blame] | 4 | #include <asm/cpufeatures.h> |
Andy Lutomirski | 59e97e4 | 2011-07-13 09:24:10 -0400 | [diff] [blame] | 5 | #include <asm/alternative-asm.h> |
Al Viro | 784d569 | 2016-01-11 11:04:34 -0500 | [diff] [blame] | 6 | #include <asm/export.h> |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 7 | |
Borislav Petkov | 090a3f6 | 2015-01-12 18:19:40 +0100 | [diff] [blame] | 8 | /* |
| 9 | * Some CPUs run faster using the string copy instructions (sane microcode). |
| 10 | * It is also a lot simpler. Use this when possible. But, don't use streaming |
| 11 | * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the |
| 12 | * prefetch distance based on SMP/UP. |
| 13 | */ |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 14 | ALIGN |
Borislav Petkov | 090a3f6 | 2015-01-12 18:19:40 +0100 | [diff] [blame] | 15 | ENTRY(copy_page) |
Borislav Petkov | 090a3f6 | 2015-01-12 18:19:40 +0100 | [diff] [blame] | 16 | ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 17 | movl $4096/8, %ecx |
| 18 | rep movsq |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 19 | ret |
Borislav Petkov | 090a3f6 | 2015-01-12 18:19:40 +0100 | [diff] [blame] | 20 | ENDPROC(copy_page) |
Al Viro | 784d569 | 2016-01-11 11:04:34 -0500 | [diff] [blame] | 21 | EXPORT_SYMBOL(copy_page) |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 22 | |
Borislav Petkov | 090a3f6 | 2015-01-12 18:19:40 +0100 | [diff] [blame] | 23 | ENTRY(copy_page_regs) |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 24 | subq $2*8, %rsp |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 25 | movq %rbx, (%rsp) |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 26 | movq %r12, 1*8(%rsp) |
Andi Kleen | 7bcd3f3 | 2006-02-03 21:51:02 +0100 | [diff] [blame] | 27 | |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 28 | movl $(4096/64)-5, %ecx |
Andi Kleen | 7bcd3f3 | 2006-02-03 21:51:02 +0100 | [diff] [blame] | 29 | .p2align 4 |
| 30 | .Loop64: |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 31 | dec %rcx |
| 32 | movq 0x8*0(%rsi), %rax |
| 33 | movq 0x8*1(%rsi), %rbx |
| 34 | movq 0x8*2(%rsi), %rdx |
| 35 | movq 0x8*3(%rsi), %r8 |
| 36 | movq 0x8*4(%rsi), %r9 |
| 37 | movq 0x8*5(%rsi), %r10 |
| 38 | movq 0x8*6(%rsi), %r11 |
| 39 | movq 0x8*7(%rsi), %r12 |
Andi Kleen | 7bcd3f3 | 2006-02-03 21:51:02 +0100 | [diff] [blame] | 40 | |
| 41 | prefetcht0 5*64(%rsi) |
| 42 | |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 43 | movq %rax, 0x8*0(%rdi) |
| 44 | movq %rbx, 0x8*1(%rdi) |
| 45 | movq %rdx, 0x8*2(%rdi) |
| 46 | movq %r8, 0x8*3(%rdi) |
| 47 | movq %r9, 0x8*4(%rdi) |
| 48 | movq %r10, 0x8*5(%rdi) |
| 49 | movq %r11, 0x8*6(%rdi) |
| 50 | movq %r12, 0x8*7(%rdi) |
Andi Kleen | 7bcd3f3 | 2006-02-03 21:51:02 +0100 | [diff] [blame] | 51 | |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 52 | leaq 64 (%rsi), %rsi |
| 53 | leaq 64 (%rdi), %rdi |
Andi Kleen | 7bcd3f3 | 2006-02-03 21:51:02 +0100 | [diff] [blame] | 54 | |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 55 | jnz .Loop64 |
Andi Kleen | 7bcd3f3 | 2006-02-03 21:51:02 +0100 | [diff] [blame] | 56 | |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 57 | movl $5, %ecx |
Andi Kleen | 7bcd3f3 | 2006-02-03 21:51:02 +0100 | [diff] [blame] | 58 | .p2align 4 |
| 59 | .Loop2: |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 60 | decl %ecx |
Andi Kleen | 7bcd3f3 | 2006-02-03 21:51:02 +0100 | [diff] [blame] | 61 | |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 62 | movq 0x8*0(%rsi), %rax |
| 63 | movq 0x8*1(%rsi), %rbx |
| 64 | movq 0x8*2(%rsi), %rdx |
| 65 | movq 0x8*3(%rsi), %r8 |
| 66 | movq 0x8*4(%rsi), %r9 |
| 67 | movq 0x8*5(%rsi), %r10 |
| 68 | movq 0x8*6(%rsi), %r11 |
| 69 | movq 0x8*7(%rsi), %r12 |
Andi Kleen | 7bcd3f3 | 2006-02-03 21:51:02 +0100 | [diff] [blame] | 70 | |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 71 | movq %rax, 0x8*0(%rdi) |
| 72 | movq %rbx, 0x8*1(%rdi) |
| 73 | movq %rdx, 0x8*2(%rdi) |
| 74 | movq %r8, 0x8*3(%rdi) |
| 75 | movq %r9, 0x8*4(%rdi) |
| 76 | movq %r10, 0x8*5(%rdi) |
| 77 | movq %r11, 0x8*6(%rdi) |
| 78 | movq %r12, 0x8*7(%rdi) |
Andi Kleen | 7bcd3f3 | 2006-02-03 21:51:02 +0100 | [diff] [blame] | 79 | |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 80 | leaq 64(%rdi), %rdi |
| 81 | leaq 64(%rsi), %rsi |
Andi Kleen | 7bcd3f3 | 2006-02-03 21:51:02 +0100 | [diff] [blame] | 82 | jnz .Loop2 |
| 83 | |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 84 | movq (%rsp), %rbx |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 85 | movq 1*8(%rsp), %r12 |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 86 | addq $2*8, %rsp |
Andi Kleen | 7bcd3f3 | 2006-02-03 21:51:02 +0100 | [diff] [blame] | 87 | ret |
Borislav Petkov | 090a3f6 | 2015-01-12 18:19:40 +0100 | [diff] [blame] | 88 | ENDPROC(copy_page_regs) |