Martin Schwidefsky | 2cfc5f9 | 2016-02-02 14:40:40 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Optimized xor_block operation for RAID4/5 |
| 3 | * |
| 4 | * Copyright IBM Corp. 2016 |
| 5 | * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> |
| 6 | */ |
| 7 | |
| 8 | #include <linux/types.h> |
| 9 | #include <linux/module.h> |
| 10 | #include <linux/raid/xor.h> |
| 11 | |
| 12 | static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) |
| 13 | { |
| 14 | asm volatile( |
| 15 | " larl 1,2f\n" |
| 16 | " aghi %0,-1\n" |
| 17 | " jm 3f\n" |
| 18 | " srlg 0,%0,8\n" |
| 19 | " ltgr 0,0\n" |
| 20 | " jz 1f\n" |
| 21 | "0: xc 0(256,%1),0(%2)\n" |
| 22 | " la %1,256(%1)\n" |
| 23 | " la %2,256(%2)\n" |
| 24 | " brctg 0,0b\n" |
| 25 | "1: ex %0,0(1)\n" |
| 26 | " j 3f\n" |
| 27 | "2: xc 0(1,%1),0(%2)\n" |
| 28 | "3:\n" |
| 29 | : : "d" (bytes), "a" (p1), "a" (p2) |
| 30 | : "0", "1", "cc", "memory"); |
| 31 | } |
| 32 | |
| 33 | static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, |
| 34 | unsigned long *p3) |
| 35 | { |
| 36 | asm volatile( |
| 37 | " larl 1,2f\n" |
| 38 | " aghi %0,-1\n" |
| 39 | " jm 3f\n" |
| 40 | " srlg 0,%0,8\n" |
| 41 | " ltgr 0,0\n" |
| 42 | " jz 1f\n" |
| 43 | "0: xc 0(256,%1),0(%2)\n" |
| 44 | " xc 0(256,%1),0(%3)\n" |
| 45 | " la %1,256(%1)\n" |
| 46 | " la %2,256(%2)\n" |
| 47 | " la %3,256(%3)\n" |
| 48 | " brctg 0,0b\n" |
| 49 | "1: ex %0,0(1)\n" |
| 50 | " ex %0,6(1)\n" |
| 51 | " j 3f\n" |
| 52 | "2: xc 0(1,%1),0(%2)\n" |
| 53 | " xc 0(1,%1),0(%3)\n" |
| 54 | "3:\n" |
| 55 | : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3) |
| 56 | : : "0", "1", "cc", "memory"); |
| 57 | } |
| 58 | |
| 59 | static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, |
| 60 | unsigned long *p3, unsigned long *p4) |
| 61 | { |
| 62 | asm volatile( |
| 63 | " larl 1,2f\n" |
| 64 | " aghi %0,-1\n" |
| 65 | " jm 3f\n" |
| 66 | " srlg 0,%0,8\n" |
| 67 | " ltgr 0,0\n" |
| 68 | " jz 1f\n" |
| 69 | "0: xc 0(256,%1),0(%2)\n" |
| 70 | " xc 0(256,%1),0(%3)\n" |
| 71 | " xc 0(256,%1),0(%4)\n" |
| 72 | " la %1,256(%1)\n" |
| 73 | " la %2,256(%2)\n" |
| 74 | " la %3,256(%3)\n" |
| 75 | " la %4,256(%4)\n" |
| 76 | " brctg 0,0b\n" |
| 77 | "1: ex %0,0(1)\n" |
| 78 | " ex %0,6(1)\n" |
| 79 | " ex %0,12(1)\n" |
| 80 | " j 3f\n" |
| 81 | "2: xc 0(1,%1),0(%2)\n" |
| 82 | " xc 0(1,%1),0(%3)\n" |
| 83 | " xc 0(1,%1),0(%4)\n" |
| 84 | "3:\n" |
| 85 | : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4) |
| 86 | : : "0", "1", "cc", "memory"); |
| 87 | } |
| 88 | |
| 89 | static void xor_xc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, |
| 90 | unsigned long *p3, unsigned long *p4, unsigned long *p5) |
| 91 | { |
| 92 | /* Get around a gcc oddity */ |
| 93 | register unsigned long *reg7 asm ("7") = p5; |
| 94 | |
| 95 | asm volatile( |
| 96 | " larl 1,2f\n" |
| 97 | " aghi %0,-1\n" |
| 98 | " jm 3f\n" |
| 99 | " srlg 0,%0,8\n" |
| 100 | " ltgr 0,0\n" |
| 101 | " jz 1f\n" |
| 102 | "0: xc 0(256,%1),0(%2)\n" |
| 103 | " xc 0(256,%1),0(%3)\n" |
| 104 | " xc 0(256,%1),0(%4)\n" |
| 105 | " xc 0(256,%1),0(%5)\n" |
| 106 | " la %1,256(%1)\n" |
| 107 | " la %2,256(%2)\n" |
| 108 | " la %3,256(%3)\n" |
| 109 | " la %4,256(%4)\n" |
| 110 | " la %5,256(%5)\n" |
| 111 | " brctg 0,0b\n" |
| 112 | "1: ex %0,0(1)\n" |
| 113 | " ex %0,6(1)\n" |
| 114 | " ex %0,12(1)\n" |
| 115 | " ex %0,18(1)\n" |
| 116 | " j 3f\n" |
| 117 | "2: xc 0(1,%1),0(%2)\n" |
| 118 | " xc 0(1,%1),0(%3)\n" |
| 119 | " xc 0(1,%1),0(%4)\n" |
| 120 | " xc 0(1,%1),0(%5)\n" |
| 121 | "3:\n" |
| 122 | : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4), |
| 123 | "+a" (reg7) |
| 124 | : : "0", "1", "cc", "memory"); |
| 125 | } |
| 126 | |
| 127 | struct xor_block_template xor_block_xc = { |
| 128 | .name = "xc", |
| 129 | .do_2 = xor_xc_2, |
| 130 | .do_3 = xor_xc_3, |
| 131 | .do_4 = xor_xc_4, |
| 132 | .do_5 = xor_xc_5, |
| 133 | }; |
| 134 | EXPORT_SYMBOL(xor_block_xc); |