Ard Biesheuvel | 0195659 | 2013-05-17 18:51:23 +0200 | [diff] [blame] | 1 | /* |
| 2 | * linux/arch/arm/lib/xor-neon.c |
| 3 | * |
| 4 | * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or modify |
| 7 | * it under the terms of the GNU General Public License version 2 as |
| 8 | * published by the Free Software Foundation. |
| 9 | */ |
| 10 | |
| 11 | #include <linux/raid/xor.h> |
Ard Biesheuvel | 9319206 | 2013-09-09 15:08:38 +0100 | [diff] [blame] | 12 | #include <linux/module.h> |
| 13 | |
| 14 | MODULE_LICENSE("GPL"); |
Ard Biesheuvel | 0195659 | 2013-05-17 18:51:23 +0200 | [diff] [blame] | 15 | |
| 16 | #ifndef __ARM_NEON__ |
| 17 | #error You should compile this file with '-mfloat-abi=softfp -mfpu=neon' |
| 18 | #endif |
| 19 | |
| 20 | /* |
| 21 | * Pull in the reference implementations while instructing GCC (through |
| 22 | * -ftree-vectorize) to attempt to exploit implicit parallelism and emit |
| 23 | * NEON instructions. |
| 24 | */ |
| 25 | #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) |
| 26 | #pragma GCC optimize "tree-vectorize" |
| 27 | #else |
| 28 | /* |
| 29 | * While older versions of GCC do not generate incorrect code, they fail to |
| 30 | * recognize the parallel nature of these functions, and emit plain ARM code, |
| 31 | * which is known to be slower than the optimized ARM code in asm-arm/xor.h. |
| 32 | */ |
| 33 | #warning This code requires at least version 4.6 of GCC |
| 34 | #endif |
| 35 | |
| 36 | #pragma GCC diagnostic ignored "-Wunused-variable" |
| 37 | #include <asm-generic/xor.h> |
| 38 | |
| 39 | struct xor_block_template const xor_block_neon_inner = { |
| 40 | .name = "__inner_neon__", |
| 41 | .do_2 = xor_8regs_2, |
| 42 | .do_3 = xor_8regs_3, |
| 43 | .do_4 = xor_8regs_4, |
| 44 | .do_5 = xor_8regs_5, |
| 45 | }; |
Ard Biesheuvel | 9319206 | 2013-09-09 15:08:38 +0100 | [diff] [blame] | 46 | EXPORT_SYMBOL(xor_block_neon_inner); |