Greg Kroah-Hartman | b244131 | 2017-11-01 15:07:57 +0100 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2 | /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ |
Dave Jones | 038b0a6 | 2006-10-04 03:38:54 -0400 | [diff] [blame] | 3 | |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 4 | #include <linux/linkage.h> |
Borislav Petkov | cd4d09e | 2016-01-26 22:12:04 +0100 | [diff] [blame] | 5 | #include <asm/cpufeatures.h> |
Andy Lutomirski | 59e97e4 | 2011-07-13 09:24:10 -0400 | [diff] [blame] | 6 | #include <asm/alternative-asm.h> |
Al Viro | 784d569 | 2016-01-11 11:04:34 -0500 | [diff] [blame] | 7 | #include <asm/export.h> |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 8 | |
Borislav Petkov | 090a3f6 | 2015-01-12 18:19:40 +0100 | [diff] [blame] | 9 | /* |
| 10 | * Some CPUs run faster using the string copy instructions (sane microcode). |
| 11 | * It is also a lot simpler. Use this when possible. But, don't use streaming |
| 12 | * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the |
| 13 | * prefetch distance based on SMP/UP. |
| 14 | */ |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 15 | ALIGN |
Borislav Petkov | 090a3f6 | 2015-01-12 18:19:40 +0100 | [diff] [blame] | 16 | ENTRY(copy_page) |
Borislav Petkov | 090a3f6 | 2015-01-12 18:19:40 +0100 | [diff] [blame] | 17 | ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 18 | movl $4096/8, %ecx |
| 19 | rep movsq |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 20 | ret |
Borislav Petkov | 090a3f6 | 2015-01-12 18:19:40 +0100 | [diff] [blame] | 21 | ENDPROC(copy_page) |
Al Viro | 784d569 | 2016-01-11 11:04:34 -0500 | [diff] [blame] | 22 | EXPORT_SYMBOL(copy_page) |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 23 | |
Borislav Petkov | 090a3f6 | 2015-01-12 18:19:40 +0100 | [diff] [blame] | 24 | ENTRY(copy_page_regs) |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 25 | subq $2*8, %rsp |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 26 | movq %rbx, (%rsp) |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 27 | movq %r12, 1*8(%rsp) |
Andi Kleen | 7bcd3f3 | 2006-02-03 21:51:02 +0100 | [diff] [blame] | 28 | |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 29 | movl $(4096/64)-5, %ecx |
Andi Kleen | 7bcd3f3 | 2006-02-03 21:51:02 +0100 | [diff] [blame] | 30 | .p2align 4 |
| 31 | .Loop64: |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 32 | dec %rcx |
| 33 | movq 0x8*0(%rsi), %rax |
| 34 | movq 0x8*1(%rsi), %rbx |
| 35 | movq 0x8*2(%rsi), %rdx |
| 36 | movq 0x8*3(%rsi), %r8 |
| 37 | movq 0x8*4(%rsi), %r9 |
| 38 | movq 0x8*5(%rsi), %r10 |
| 39 | movq 0x8*6(%rsi), %r11 |
| 40 | movq 0x8*7(%rsi), %r12 |
Andi Kleen | 7bcd3f3 | 2006-02-03 21:51:02 +0100 | [diff] [blame] | 41 | |
| 42 | prefetcht0 5*64(%rsi) |
| 43 | |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 44 | movq %rax, 0x8*0(%rdi) |
| 45 | movq %rbx, 0x8*1(%rdi) |
| 46 | movq %rdx, 0x8*2(%rdi) |
| 47 | movq %r8, 0x8*3(%rdi) |
| 48 | movq %r9, 0x8*4(%rdi) |
| 49 | movq %r10, 0x8*5(%rdi) |
| 50 | movq %r11, 0x8*6(%rdi) |
| 51 | movq %r12, 0x8*7(%rdi) |
Andi Kleen | 7bcd3f3 | 2006-02-03 21:51:02 +0100 | [diff] [blame] | 52 | |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 53 | leaq 64 (%rsi), %rsi |
| 54 | leaq 64 (%rdi), %rdi |
Andi Kleen | 7bcd3f3 | 2006-02-03 21:51:02 +0100 | [diff] [blame] | 55 | |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 56 | jnz .Loop64 |
Andi Kleen | 7bcd3f3 | 2006-02-03 21:51:02 +0100 | [diff] [blame] | 57 | |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 58 | movl $5, %ecx |
Andi Kleen | 7bcd3f3 | 2006-02-03 21:51:02 +0100 | [diff] [blame] | 59 | .p2align 4 |
| 60 | .Loop2: |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 61 | decl %ecx |
Andi Kleen | 7bcd3f3 | 2006-02-03 21:51:02 +0100 | [diff] [blame] | 62 | |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 63 | movq 0x8*0(%rsi), %rax |
| 64 | movq 0x8*1(%rsi), %rbx |
| 65 | movq 0x8*2(%rsi), %rdx |
| 66 | movq 0x8*3(%rsi), %r8 |
| 67 | movq 0x8*4(%rsi), %r9 |
| 68 | movq 0x8*5(%rsi), %r10 |
| 69 | movq 0x8*6(%rsi), %r11 |
| 70 | movq 0x8*7(%rsi), %r12 |
Andi Kleen | 7bcd3f3 | 2006-02-03 21:51:02 +0100 | [diff] [blame] | 71 | |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 72 | movq %rax, 0x8*0(%rdi) |
| 73 | movq %rbx, 0x8*1(%rdi) |
| 74 | movq %rdx, 0x8*2(%rdi) |
| 75 | movq %r8, 0x8*3(%rdi) |
| 76 | movq %r9, 0x8*4(%rdi) |
| 77 | movq %r10, 0x8*5(%rdi) |
| 78 | movq %r11, 0x8*6(%rdi) |
| 79 | movq %r12, 0x8*7(%rdi) |
Andi Kleen | 7bcd3f3 | 2006-02-03 21:51:02 +0100 | [diff] [blame] | 80 | |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 81 | leaq 64(%rdi), %rdi |
| 82 | leaq 64(%rsi), %rsi |
Andi Kleen | 7bcd3f3 | 2006-02-03 21:51:02 +0100 | [diff] [blame] | 83 | jnz .Loop2 |
| 84 | |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 85 | movq (%rsp), %rbx |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 86 | movq 1*8(%rsp), %r12 |
Ma Ling | 269833b | 2012-10-18 03:52:45 +0800 | [diff] [blame] | 87 | addq $2*8, %rsp |
Andi Kleen | 7bcd3f3 | 2006-02-03 21:51:02 +0100 | [diff] [blame] | 88 | ret |
Borislav Petkov | 090a3f6 | 2015-01-12 18:19:40 +0100 | [diff] [blame] | 89 | ENDPROC(copy_page_regs) |