Christophe Lyon | 80902f6 | 2013-03-29 16:26:42 +0100 | [diff] [blame] | 1 | /* |
| 2 | |
| 3 | Copyright (c) 2013 STMicroelectronics |
| 4 | Written by Christophe Lyon |
| 5 | |
| 6 | Permission is hereby granted, free of charge, to any person obtaining a copy |
| 7 | of this software and associated documentation files (the "Software"), to deal |
| 8 | in the Software without restriction, including without limitation the rights |
| 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 10 | copies of the Software, and to permit persons to whom the Software is |
| 11 | furnished to do so, subject to the following conditions: |
| 12 | |
| 13 | The above copyright notice and this permission notice shall be included in |
| 14 | all copies or substantial portions of the Software. |
| 15 | |
| 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| 22 | THE SOFTWARE. |
| 23 | |
| 24 | */ |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 25 | |
Christophe Lyon | 1775be0 | 2014-07-10 13:46:54 +0200 | [diff] [blame] | 26 | #if defined(__arm__) || defined(__aarch64__) |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 27 | #include <arm_neon.h> |
| 28 | #else |
| 29 | #include "stm-arm-neon.h" |
| 30 | #endif |
| 31 | #include "stm-arm-neon-ref.h" |
| 32 | |
| 33 | /* Initialization helpers; 4 slices are needed for vld2, vld3 and |
| 34 | vld4. */ |
| 35 | #define MY_INIT_TAB(T,W,N) xNAME(INIT_TAB,N)(T##W##_t) |
| 36 | #define MY_INIT_TAB2(T,W,N) xNAME(INIT_TAB2,N)(T##W##_t) |
| 37 | #define MY_INIT_TAB3(T,W,N) xNAME(INIT_TAB3,N)(T##W##_t) |
| 38 | #define MY_INIT_TAB4(T,W,N) xNAME(INIT_TAB4,N)(T##W##_t) |
| 39 | |
| 40 | /* Initialized input buffers. */ |
| 41 | #define VECT_VAR_DECL_INIT(V, T, W, N) \ |
| 42 | VECT_VAR_DECL(V,T,W,N) [] = { MY_INIT_TAB(T,W,N) }; |
| 43 | |
| 44 | /* Specialized initializer with 4 entries, as used by vldX_dup and |
Christophe Lyon | ea43894 | 2013-04-09 15:43:27 +0200 | [diff] [blame] | 45 | vdup tests, which iterate 4 times on input buffers. */ |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 46 | #define VECT_VAR_DECL_INIT4(V, T, W, N) \ |
| 47 | VECT_VAR_DECL(V,T,W,N) [] = { MY_INIT_TAB(T,W,4) }; |
| 48 | |
| 49 | /* Initializers for arrays of vectors. */ |
| 50 | #define VECT_ARRAY_INIT2(V, T, W, N) \ |
| 51 | T##W##_t VECT_ARRAY_VAR(V,T,W,N,2)[] = \ |
| 52 | { MY_INIT_TAB(T,W,N) \ |
| 53 | MY_INIT_TAB2(T,W,N) }; |
| 54 | |
| 55 | #define VECT_ARRAY_INIT3(V, T, W, N) \ |
| 56 | T##W##_t VECT_ARRAY_VAR(V,T,W,N,3)[] = \ |
| 57 | { MY_INIT_TAB(T,W,N) \ |
| 58 | MY_INIT_TAB2(T,W,N) \ |
| 59 | MY_INIT_TAB3(T,W,N) }; |
| 60 | |
| 61 | #define VECT_ARRAY_INIT4(V, T, W, N) \ |
| 62 | T##W##_t VECT_ARRAY_VAR(V,T,W,N,4)[] = \ |
| 63 | { MY_INIT_TAB(T,W,N) \ |
| 64 | MY_INIT_TAB2(T,W,N) \ |
| 65 | MY_INIT_TAB3(T,W,N) \ |
| 66 | MY_INIT_TAB4(T,W,N) }; |
| 67 | |
| 68 | /* Sample initialization vectors. */ |
| 69 | #define INIT_TAB_1(T) \ |
| 70 | (T)-16, |
| 71 | #define INIT_TAB2_1(T) \ |
| 72 | (T)-15, |
| 73 | #define INIT_TAB3_1(T) \ |
| 74 | (T)-14, |
| 75 | #define INIT_TAB4_1(T) \ |
| 76 | (T)-13, |
| 77 | |
| 78 | #define INIT_TAB_2(T) \ |
| 79 | (T)-16, (T)-15, |
| 80 | #define INIT_TAB2_2(T) \ |
| 81 | (T)-14, (T)-13, |
| 82 | #define INIT_TAB3_2(T) \ |
| 83 | (T)-12, (T)-11, |
| 84 | #define INIT_TAB4_2(T) \ |
| 85 | (T)-10, (T)-9, |
| 86 | |
| 87 | /* Initializer for vld3_lane tests. */ |
| 88 | #define INIT_TAB_3(T) \ |
| 89 | (T)-16, (T)-15, (T)-14, |
| 90 | |
| 91 | #define INIT_TAB_4(T) \ |
| 92 | (T)-16, (T)-15, (T)-14, (T)-13, |
| 93 | #define INIT_TAB2_4(T) \ |
| 94 | (T)-12, (T)-11, (T)-10, (T)-9, |
| 95 | #define INIT_TAB3_4(T) \ |
| 96 | (T)-8, (T)-7, (T)-6, (T)-5, |
| 97 | #define INIT_TAB4_4(T) \ |
| 98 | (T)-4, (T)-3, (T)-2, (T)-1, |
| 99 | |
| 100 | #define INIT_TAB_8(T) \ |
| 101 | (T)-16, (T)-15, (T)-14, (T)-13, (T)-12, (T)-11, (T)-10, (T)-9, |
| 102 | #define INIT_TAB2_8(T) \ |
| 103 | (T)-8, (T)-7, (T)-6, (T)-5, (T)-4, (T)-3, (T)-2, (T)-1, |
| 104 | #define INIT_TAB3_8(T) \ |
| 105 | (T)0, (T)1, (T)2, (T)3, (T)4, (T)5, (T)6, (T)7, |
| 106 | #define INIT_TAB4_8(T) \ |
| 107 | (T)8, (T)9, (T)10, (T)11, (T)12, (T)13, (T)14, (T)15, |
| 108 | |
| 109 | #define INIT_TAB_16(T) \ |
| 110 | (T)-16, (T)-15, (T)-14, (T)-13, (T)-12, (T)-11, (T)-10, (T)-9, \ |
| 111 | (T)-8, (T)-7, (T)-6, (T)-5, (T)-4, (T)-3, (T)-2, (T)-1, |
| 112 | #define INIT_TAB2_16(T) \ |
| 113 | (T)0, (T)1, (T)2, (T)3, (T)4, (T)5, (T)6, (T)7, \ |
| 114 | (T)8, (T)9, (T)10, (T)11, (T)12, (T)13, (T)14, (T)15, |
| 115 | #define INIT_TAB3_16(T) \ |
| 116 | (T)16, (T)17, (T)18, (T)19, (T)20, (T)21, (T)22, (T)23, \ |
| 117 | (T)24, (T)25, (T)26, (T)27, (T)28, (T)29, (T)30, (T)31, |
| 118 | #define INIT_TAB4_16(T) \ |
| 119 | (T)32, (T)33, (T)34, (T)35, (T)36, (T)37, (T)38, (T)39, \ |
| 120 | (T)40, (T)41, (T)42, (T)43, (T)44, (T)45, (T)46, (T)47, |
| 121 | |
| 122 | /* Input buffers, one of each size. */ |
| 123 | /* Insert some padding to try to exhibit out of bounds accesses. */ |
| 124 | VECT_VAR_DECL_INIT(buffer, int, 8, 8); |
| 125 | PAD(buffer_pad, int, 8, 8); |
| 126 | VECT_VAR_DECL_INIT(buffer, int, 16, 4); |
| 127 | PAD(buffer_pad, int, 16, 4); |
| 128 | VECT_VAR_DECL_INIT(buffer, int, 32, 2); |
| 129 | PAD(buffer_pad, int, 32, 2); |
| 130 | VECT_VAR_DECL_INIT(buffer, int, 64, 1); |
| 131 | PAD(buffer_pad, int, 64, 1); |
| 132 | VECT_VAR_DECL_INIT(buffer, uint, 8, 8); |
| 133 | PAD(buffer_pad, uint, 8, 8); |
Christophe Lyon | 80902f6 | 2013-03-29 16:26:42 +0100 | [diff] [blame] | 134 | VECT_VAR_DECL_INIT(buffer, poly, 8, 8); |
| 135 | PAD(buffer_pad, poly, 8, 8); |
| 136 | VECT_VAR_DECL_INIT(buffer, poly, 16, 4); |
| 137 | PAD(buffer_pad, poly, 16, 4); |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 138 | VECT_VAR_DECL_INIT(buffer, uint, 16, 4); |
| 139 | PAD(buffer_pad, uint, 16, 4); |
| 140 | VECT_VAR_DECL_INIT(buffer, uint, 32, 2); |
| 141 | PAD(buffer_pad, uint, 32, 2); |
| 142 | VECT_VAR_DECL_INIT(buffer, uint, 64, 1); |
| 143 | PAD(buffer_pad, uint, 64, 1); |
| 144 | VECT_VAR_DECL_INIT(buffer, float, 32, 2); |
| 145 | PAD(buffer_pad, float, 32, 2); |
Christophe Lyon | d98beba | 2016-08-24 18:02:41 +0200 | [diff] [blame] | 146 | #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) |
Christophe Lyon | 34adaf6 | 2013-04-11 15:05:18 +0200 | [diff] [blame] | 147 | /* We need a different initialization for ARMCC, because the compiler |
| 148 | performs the conversion to half-precision internal |
| 149 | representation. */ |
| 150 | #ifdef __ARMCC_VERSION |
| 151 | __fp16 buffer_float16x4[4] = {-16, -15, -14, -13}; |
| 152 | #else |
| 153 | VECT_VAR_DECL(buffer, float, 16, 4) [] = {0xcc00 /* -16 */, 0xcb80 /* -15 */, |
| 154 | 0xcb00 /* -14 */, 0xca80 /* -13 */}; |
| 155 | #endif |
| 156 | PAD(buffer_pad, float, 16, 4); |
| 157 | #endif |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 158 | VECT_VAR_DECL_INIT(buffer, int, 8, 16); |
| 159 | PAD(buffer_pad, int, 8, 16); |
| 160 | VECT_VAR_DECL_INIT(buffer, int, 16, 8); |
| 161 | PAD(buffer_pad, int, 16, 8); |
| 162 | VECT_VAR_DECL_INIT(buffer, int, 32, 4); |
| 163 | PAD(buffer_pad, int, 32, 4); |
| 164 | VECT_VAR_DECL_INIT(buffer, int, 64, 2); |
| 165 | PAD(buffer_pad, int, 64, 2); |
| 166 | VECT_VAR_DECL_INIT(buffer, uint, 8, 16); |
| 167 | PAD(buffer_pad, uint, 8, 16); |
| 168 | VECT_VAR_DECL_INIT(buffer, uint, 16, 8); |
| 169 | PAD(buffer_pad, uint, 16, 8); |
| 170 | VECT_VAR_DECL_INIT(buffer, uint, 32, 4); |
| 171 | PAD(buffer_pad, uint, 32, 4); |
| 172 | VECT_VAR_DECL_INIT(buffer, uint, 64, 2); |
| 173 | PAD(buffer_pad, uint, 64, 2); |
Christophe Lyon | 80902f6 | 2013-03-29 16:26:42 +0100 | [diff] [blame] | 174 | VECT_VAR_DECL_INIT(buffer, poly, 8, 16); |
| 175 | PAD(buffer_pad, poly, 8, 16); |
| 176 | VECT_VAR_DECL_INIT(buffer, poly, 16, 8); |
| 177 | PAD(buffer_pad, poly, 16, 8); |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 178 | VECT_VAR_DECL_INIT(buffer, float, 32, 4); |
| 179 | PAD(buffer_pad, float, 32, 4); |
Christophe Lyon | d98beba | 2016-08-24 18:02:41 +0200 | [diff] [blame] | 180 | #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) |
Christophe Lyon | 34adaf6 | 2013-04-11 15:05:18 +0200 | [diff] [blame] | 181 | #ifdef __ARMCC_VERSION |
| 182 | __fp16 buffer_float16x8[8] = {-16, -15, -14, -13, -12, -11, -10, -9}; |
| 183 | #else |
| 184 | VECT_VAR_DECL(buffer, float, 16, 8) [] = {0xcc00 /* -16 */, 0xcb80 /* -15 */, |
| 185 | 0xcb00 /* -14 */, 0xca80 /* -13 */, |
| 186 | 0xca00 /* -12 */, 0xc980 /* -11 */, |
| 187 | 0xc900 /* -10 */, 0xc880 /* -9 */}; |
| 188 | #endif |
| 189 | PAD(buffer_pad, float, 16, 8); |
| 190 | #endif |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 191 | |
| 192 | /* The tests for vld1_dup and vdup expect at least 4 entries in the |
| 193 | input buffer, so force 1- and 2-elements initializers to have 4 |
| 194 | entries. */ |
| 195 | VECT_VAR_DECL_INIT(buffer_dup, int, 8, 8); |
| 196 | VECT_VAR_DECL(buffer_dup_pad, int, 8, 8); |
| 197 | VECT_VAR_DECL_INIT(buffer_dup, int, 16, 4); |
| 198 | VECT_VAR_DECL(buffer_dup_pad, int, 16, 4); |
| 199 | VECT_VAR_DECL_INIT4(buffer_dup, int, 32, 2); |
| 200 | VECT_VAR_DECL(buffer_dup_pad, int, 32, 2); |
| 201 | VECT_VAR_DECL_INIT4(buffer_dup, int, 64, 1); |
| 202 | VECT_VAR_DECL(buffer_dup_pad, int, 64, 1); |
| 203 | VECT_VAR_DECL_INIT(buffer_dup, uint, 8, 8); |
| 204 | VECT_VAR_DECL(buffer_dup_pad, uint, 8, 8); |
| 205 | VECT_VAR_DECL_INIT(buffer_dup, uint, 16, 4); |
| 206 | VECT_VAR_DECL(buffer_dup_pad, uint, 16, 4); |
| 207 | VECT_VAR_DECL_INIT4(buffer_dup, uint, 32, 2); |
| 208 | VECT_VAR_DECL(buffer_dup_pad, uint, 32, 2); |
| 209 | VECT_VAR_DECL_INIT4(buffer_dup, uint, 64, 1); |
| 210 | VECT_VAR_DECL(buffer_dup_pad, uint, 64, 1); |
Christophe Lyon | 80902f6 | 2013-03-29 16:26:42 +0100 | [diff] [blame] | 211 | VECT_VAR_DECL_INIT(buffer_dup, poly, 8, 8); |
| 212 | VECT_VAR_DECL(buffer_dup_pad, poly, 8, 8); |
| 213 | VECT_VAR_DECL_INIT(buffer_dup, poly, 16, 4); |
| 214 | VECT_VAR_DECL(buffer_dup_pad, poly, 16, 4); |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 215 | VECT_VAR_DECL_INIT4(buffer_dup, float, 32, 2); |
| 216 | VECT_VAR_DECL(buffer_dup_pad, float, 32, 2); |
Christophe Lyon | d98beba | 2016-08-24 18:02:41 +0200 | [diff] [blame] | 217 | #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) |
Christophe Lyon | 34adaf6 | 2013-04-11 15:05:18 +0200 | [diff] [blame] | 218 | #ifdef __ARMCC_VERSION |
| 219 | __fp16 buffer_dup_float16x4[4] = {-16, -15, -14, -13}; |
| 220 | #else |
| 221 | VECT_VAR_DECL(buffer_dup, float, 16, 4)[] = {0xcc00 /* -16 */, 0xcb80 /* -15 */, |
| 222 | 0xcb00 /* -14 */, 0xca80 /* -13 */}; |
| 223 | #endif |
| 224 | PAD(buffer_dup_pad, float, 16, 4); |
| 225 | #endif |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 226 | VECT_VAR_DECL_INIT(buffer_dup, int, 8, 16); |
| 227 | VECT_VAR_DECL(buffer_dup_pad, int, 8, 16); |
| 228 | VECT_VAR_DECL_INIT(buffer_dup, int, 16, 8); |
| 229 | VECT_VAR_DECL(buffer_dup_pad, int, 16, 8); |
| 230 | VECT_VAR_DECL_INIT(buffer_dup, int, 32, 4); |
| 231 | VECT_VAR_DECL(buffer_dup_pad, int, 32, 4); |
| 232 | VECT_VAR_DECL_INIT4(buffer_dup, int, 64, 2); |
| 233 | VECT_VAR_DECL(buffer_dup_pad, int, 64, 2); |
| 234 | VECT_VAR_DECL_INIT(buffer_dup, uint, 8, 16); |
| 235 | VECT_VAR_DECL(buffer_dup_pad, uint, 8, 16); |
| 236 | VECT_VAR_DECL_INIT(buffer_dup, uint, 16, 8); |
| 237 | VECT_VAR_DECL(buffer_dup_pad, uint, 16, 8); |
| 238 | VECT_VAR_DECL_INIT(buffer_dup, uint, 32, 4); |
| 239 | VECT_VAR_DECL(buffer_dup_pad, uint, 32, 4); |
| 240 | VECT_VAR_DECL_INIT4(buffer_dup, uint, 64, 2); |
| 241 | VECT_VAR_DECL(buffer_dup_pad, uint, 64, 2); |
Christophe Lyon | 80902f6 | 2013-03-29 16:26:42 +0100 | [diff] [blame] | 242 | VECT_VAR_DECL_INIT(buffer_dup, poly, 8, 16); |
| 243 | VECT_VAR_DECL(buffer_dup_pad, poly, 8, 16); |
| 244 | VECT_VAR_DECL_INIT(buffer_dup, poly, 16, 8); |
| 245 | VECT_VAR_DECL(buffer_dup_pad, poly, 16, 8); |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 246 | VECT_VAR_DECL_INIT(buffer_dup, float, 32, 4); |
| 247 | VECT_VAR_DECL(buffer_dup_pad, float, 32, 4); |
Christophe Lyon | d98beba | 2016-08-24 18:02:41 +0200 | [diff] [blame] | 248 | #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) |
Christophe Lyon | 34adaf6 | 2013-04-11 15:05:18 +0200 | [diff] [blame] | 249 | #ifdef __ARMCC_VERSION |
| 250 | __fp16 buffer_dup_float16x8[8] = {-16, -15, -14, -13, -12, -11, -10, -9}; |
| 251 | #else |
| 252 | VECT_VAR_DECL(buffer_dup, float, 16, 8)[] = {0xcc00 /* -16 */, 0xcb80 /* -15 */, |
| 253 | 0xcb00 /* -14 */, 0xca80 /* -13 */, |
| 254 | 0xca00 /* -12 */, 0xc980 /* -11 */, |
| 255 | 0xc900 /* -10 */, 0xc880 /* -9 */}; |
| 256 | #endif |
| 257 | PAD(buffer_dup_pad, float, 16, 8); |
| 258 | #endif |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 259 | |
| 260 | /* Input buffers for vld2, 1 of each size */ |
| 261 | VECT_ARRAY_INIT2(buffer_vld2, int, 8, 8); |
| 262 | PAD(buffer_vld2_pad, int, 8, 8); |
| 263 | VECT_ARRAY_INIT2(buffer_vld2, int, 16, 4); |
| 264 | PAD(buffer_vld2_pad, int, 16, 4); |
| 265 | VECT_ARRAY_INIT2(buffer_vld2, int, 32, 2); |
| 266 | PAD(buffer_vld2_pad, int, 32, 2); |
| 267 | VECT_ARRAY_INIT2(buffer_vld2, int, 64, 1); |
| 268 | PAD(buffer_vld2_pad, int, 64, 1); |
| 269 | VECT_ARRAY_INIT2(buffer_vld2, uint, 8, 8); |
| 270 | PAD(buffer_vld2_pad, uint, 8, 8); |
| 271 | VECT_ARRAY_INIT2(buffer_vld2, uint, 16, 4); |
| 272 | PAD(buffer_vld2_pad, uint, 16, 4); |
| 273 | VECT_ARRAY_INIT2(buffer_vld2, uint, 32, 2); |
| 274 | PAD(buffer_vld2_pad, uint, 32, 2); |
| 275 | VECT_ARRAY_INIT2(buffer_vld2, uint, 64, 1); |
| 276 | PAD(buffer_vld2_pad, uint, 64, 1); |
Christophe Lyon | 80902f6 | 2013-03-29 16:26:42 +0100 | [diff] [blame] | 277 | VECT_ARRAY_INIT2(buffer_vld2, poly, 8, 8); |
| 278 | PAD(buffer_vld2_pad, poly, 8, 8); |
| 279 | VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 4); |
| 280 | PAD(buffer_vld2_pad, poly, 16, 4); |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 281 | VECT_ARRAY_INIT2(buffer_vld2, float, 32, 2); |
| 282 | PAD(buffer_vld2_pad, float, 32, 2); |
Christophe Lyon | d98beba | 2016-08-24 18:02:41 +0200 | [diff] [blame] | 283 | #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) |
Christophe Lyon | 34adaf6 | 2013-04-11 15:05:18 +0200 | [diff] [blame] | 284 | #ifdef __ARMCC_VERSION |
| 285 | __fp16 buffer_vld2_float16x4x2[4*2] = {-16, -15, -14, -13, -12, -11, -10, -9}; |
| 286 | #else |
| 287 | float16_t buffer_vld2_float16x4x2[4*2] = {0xcc00 /* -16 */, 0xcb80 /* -15 */, |
| 288 | 0xcb00 /* -14 */, 0xca80 /* -13 */, |
| 289 | 0xca00 /* -12 */, 0xc980 /* -11 */, |
| 290 | 0xc900 /* -10 */, 0xc880 /* -9 */}; |
| 291 | #endif |
| 292 | PAD(buffer_vld2_pad, float, 16, 4); |
| 293 | #endif |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 294 | VECT_ARRAY_INIT2(buffer_vld2, int, 8, 16); |
| 295 | PAD(buffer_vld2_pad, int, 8, 16); |
| 296 | VECT_ARRAY_INIT2(buffer_vld2, int, 16, 8); |
| 297 | PAD(buffer_vld2_pad, int, 16, 8); |
| 298 | VECT_ARRAY_INIT2(buffer_vld2, int, 32, 4); |
| 299 | PAD(buffer_vld2_pad, int, 32, 4); |
| 300 | VECT_ARRAY_INIT2(buffer_vld2, int, 64, 2); |
| 301 | PAD(buffer_vld2_pad, int, 64, 2); |
| 302 | VECT_ARRAY_INIT2(buffer_vld2, uint, 8, 16); |
| 303 | PAD(buffer_vld2_pad, uint, 8, 16); |
| 304 | VECT_ARRAY_INIT2(buffer_vld2, uint, 16, 8); |
| 305 | PAD(buffer_vld2_pad, uint, 16, 8); |
| 306 | VECT_ARRAY_INIT2(buffer_vld2, uint, 32, 4); |
| 307 | PAD(buffer_vld2_pad, uint, 32, 4); |
| 308 | VECT_ARRAY_INIT2(buffer_vld2, uint, 64, 2); |
| 309 | PAD(buffer_vld2_pad, uint, 64, 2); |
Christophe Lyon | 80902f6 | 2013-03-29 16:26:42 +0100 | [diff] [blame] | 310 | VECT_ARRAY_INIT2(buffer_vld2, poly, 8, 16); |
| 311 | PAD(buffer_vld2_pad, poly, 8, 16); |
| 312 | VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 8); |
| 313 | PAD(buffer_vld2_pad, poly, 16, 8); |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 314 | VECT_ARRAY_INIT2(buffer_vld2, float, 32, 4); |
| 315 | PAD(buffer_vld2_pad, float, 32, 4); |
Christophe Lyon | d98beba | 2016-08-24 18:02:41 +0200 | [diff] [blame] | 316 | #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) |
Christophe Lyon | 34adaf6 | 2013-04-11 15:05:18 +0200 | [diff] [blame] | 317 | #ifdef __ARMCC_VERSION |
| 318 | __fp16 buffer_vld2_float16x8x2[8*2] = {-16, -15, -14, -13, -12, -11, -10, -9, |
| 319 | -8, -7, -6, -5, -4, -3, -2, -1}; |
| 320 | #else |
| 321 | float16_t buffer_vld2_float16x8x2[8*2] = {0xcc00 /* -16 */, 0xcb80 /* -15 */, |
| 322 | 0xcb00 /* -14 */, 0xca80 /* -13 */, |
| 323 | 0xca00 /* -12 */, 0xc980 /* -11 */, |
| 324 | 0xc900 /* -10 */, 0xc880 /* -9 */, |
| 325 | 0xc800 /* -8 */, 0xc700 /* -7 */, |
| 326 | 0xc600 /* -6 */, 0xc500 /* -5 */, |
| 327 | 0xc400 /* -4 */, 0xc200 /* -3 */, |
| 328 | 0xc000 /* -2 */, 0xbc00 /* -1 */}; |
| 329 | #endif |
| 330 | PAD(buffer_vld2_pad, float, 16, 8); |
| 331 | #endif |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 332 | |
| 333 | /* Input buffers for vld3, 1 of each size */ |
| 334 | VECT_ARRAY_INIT3(buffer_vld3, int, 8, 8); |
| 335 | PAD(buffer_vld3_pad, int, 8, 8); |
| 336 | VECT_ARRAY_INIT3(buffer_vld3, int, 16, 4); |
| 337 | PAD(buffer_vld3_pad, int, 16, 4); |
| 338 | VECT_ARRAY_INIT3(buffer_vld3, int, 32, 2); |
| 339 | PAD(buffer_vld3_pad, int, 32, 2); |
| 340 | VECT_ARRAY_INIT3(buffer_vld3, int, 64, 1); |
| 341 | PAD(buffer_vld3_pad, int, 64, 1); |
| 342 | VECT_ARRAY_INIT3(buffer_vld3, uint, 8, 8); |
| 343 | PAD(buffer_vld3_pad, uint, 8, 8); |
| 344 | VECT_ARRAY_INIT3(buffer_vld3, uint, 16, 4); |
| 345 | PAD(buffer_vld3_pad, uint, 16, 4); |
| 346 | VECT_ARRAY_INIT3(buffer_vld3, uint, 32, 2); |
| 347 | PAD(buffer_vld3_pad, uint, 32, 2); |
| 348 | VECT_ARRAY_INIT3(buffer_vld3, uint, 64, 1); |
| 349 | PAD(buffer_vld3_pad, uint, 64, 1); |
Christophe Lyon | 80902f6 | 2013-03-29 16:26:42 +0100 | [diff] [blame] | 350 | VECT_ARRAY_INIT3(buffer_vld3, poly, 8, 8); |
| 351 | PAD(buffer_vld3_pad, poly, 8, 8); |
| 352 | VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 4); |
| 353 | PAD(buffer_vld3_pad, poly, 16, 4); |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 354 | VECT_ARRAY_INIT3(buffer_vld3, float, 32, 2); |
| 355 | PAD(buffer_vld3_pad, float, 32, 2); |
Christophe Lyon | d98beba | 2016-08-24 18:02:41 +0200 | [diff] [blame] | 356 | #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) |
Christophe Lyon | 34adaf6 | 2013-04-11 15:05:18 +0200 | [diff] [blame] | 357 | #ifdef __ARMCC_VERSION |
| 358 | __fp16 buffer_vld3_float16x4x3[4*3] = {-16, -15, -14, -13, -12, -11, -10, -9, |
| 359 | -8, -7, -6, -5}; |
| 360 | #else |
| 361 | float16_t buffer_vld3_float16x4x3[4*3] = {0xcc00 /* -16 */, 0xcb80 /* -15 */, |
| 362 | 0xcb00 /* -14 */, 0xca80 /* -13 */, |
| 363 | 0xca00 /* -12 */, 0xc980 /* -11 */, |
| 364 | 0xc900 /* -10 */, 0xc880 /* -9 */, |
| 365 | 0xc800 /* -8 */, 0xc700 /* -7 */, |
| 366 | 0xc600 /* -6 */, 0xc500 /* -5 */}; |
| 367 | #endif |
| 368 | PAD(buffer_vld3_pad, float, 16, 4); |
| 369 | #endif |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 370 | VECT_ARRAY_INIT3(buffer_vld3, int, 8, 16); |
| 371 | PAD(buffer_vld3_pad, int, 8, 16); |
| 372 | VECT_ARRAY_INIT3(buffer_vld3, int, 16, 8); |
| 373 | PAD(buffer_vld3_pad, int, 16, 8); |
| 374 | VECT_ARRAY_INIT3(buffer_vld3, int, 32, 4); |
| 375 | PAD(buffer_vld3_pad, int, 32, 4); |
| 376 | VECT_ARRAY_INIT3(buffer_vld3, int, 64, 2); |
| 377 | PAD(buffer_vld3_pad, int, 64, 2); |
| 378 | VECT_ARRAY_INIT3(buffer_vld3, uint, 8, 16); |
| 379 | PAD(buffer_vld3_pad, uint, 8, 16); |
| 380 | VECT_ARRAY_INIT3(buffer_vld3, uint, 16, 8); |
| 381 | PAD(buffer_vld3_pad, uint, 16, 8); |
| 382 | VECT_ARRAY_INIT3(buffer_vld3, uint, 32, 4); |
| 383 | PAD(buffer_vld3_pad, uint, 32, 4); |
| 384 | VECT_ARRAY_INIT3(buffer_vld3, uint, 64, 2); |
| 385 | PAD(buffer_vld3_pad, uint, 64, 2); |
Christophe Lyon | 80902f6 | 2013-03-29 16:26:42 +0100 | [diff] [blame] | 386 | VECT_ARRAY_INIT3(buffer_vld3, poly, 8, 16); |
| 387 | PAD(buffer_vld3_pad, poly, 8, 16); |
| 388 | VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 8); |
| 389 | PAD(buffer_vld3_pad, poly, 16, 8); |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 390 | VECT_ARRAY_INIT3(buffer_vld3, float, 32, 4); |
| 391 | PAD(buffer_vld3_pad, float, 32, 4); |
Christophe Lyon | d98beba | 2016-08-24 18:02:41 +0200 | [diff] [blame] | 392 | #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) |
Christophe Lyon | 34adaf6 | 2013-04-11 15:05:18 +0200 | [diff] [blame] | 393 | #ifdef __ARMCC_VERSION |
| 394 | __fp16 buffer_vld3_float16x8x3[8*3] = {-16, -15, -14, -13, -12, -11, -10, -9, |
| 395 | -8, -7, -6, -5, -4, -3, -2, -1, |
| 396 | 0, 1, 2, 3, 4, 5, 6, 7}; |
| 397 | #else |
| 398 | float16_t buffer_vld3_float16x8x3[8*3] = {0xcc00 /* -16 */, 0xcb80 /* -15 */, |
| 399 | 0xcb00 /* -14 */, 0xca80 /* -13 */, |
| 400 | 0xca00 /* -12 */, 0xc980 /* -11 */, |
| 401 | 0xc900 /* -10 */, 0xc880 /* -9 */, |
| 402 | 0xc800 /* -8 */, 0xc700 /* -7 */, |
| 403 | 0xc600 /* -6 */, 0xc500 /* -6 */, |
| 404 | 0xc400 /* -4 */, 0xc200 /* -3 */, |
| 405 | 0xc000 /* -2 */, 0xbc00 /* -1 */, |
| 406 | 0, 0x3c00 /* 1 */, |
| 407 | 0x4000 /* 2 */, 0x4200 /* 3 */, |
| 408 | 0x4400 /* 4 */, 0x4500 /* 5 */, |
| 409 | 0x4600 /* 6 */, 0x4700 /* 7 */}; |
| 410 | #endif |
| 411 | PAD(buffer_vld3_pad, float, 16, 8); |
| 412 | #endif |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 413 | |
| 414 | /* Input buffers for vld4, 1 of each size */ |
| 415 | VECT_ARRAY_INIT4(buffer_vld4, int, 8, 8); |
| 416 | PAD(buffer_vld4_pad, int, 8, 8); |
| 417 | VECT_ARRAY_INIT4(buffer_vld4, int, 16, 4); |
| 418 | PAD(buffer_vld4_pad, int, 16, 4); |
| 419 | VECT_ARRAY_INIT4(buffer_vld4, int, 32, 2); |
| 420 | PAD(buffer_vld4_pad, int, 32, 2); |
| 421 | VECT_ARRAY_INIT4(buffer_vld4, int, 64, 1); |
| 422 | PAD(buffer_vld4_pad, int, 64, 1); |
| 423 | VECT_ARRAY_INIT4(buffer_vld4, uint, 8, 8); |
| 424 | PAD(buffer_vld4_pad, uint, 8, 8); |
| 425 | VECT_ARRAY_INIT4(buffer_vld4, uint, 16, 4); |
| 426 | PAD(buffer_vld4_pad, uint, 16, 4); |
| 427 | VECT_ARRAY_INIT4(buffer_vld4, uint, 32, 2); |
| 428 | PAD(buffer_vld4_pad, uint, 32, 2); |
| 429 | VECT_ARRAY_INIT4(buffer_vld4, uint, 64, 1); |
| 430 | PAD(buffer_vld4_pad, uint, 64, 1); |
Christophe Lyon | 80902f6 | 2013-03-29 16:26:42 +0100 | [diff] [blame] | 431 | VECT_ARRAY_INIT4(buffer_vld4, poly, 8, 8); |
| 432 | PAD(buffer_vld4_pad, poly, 8, 8); |
| 433 | VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 4); |
| 434 | PAD(buffer_vld4_pad, poly, 16, 4); |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 435 | VECT_ARRAY_INIT4(buffer_vld4, float, 32, 2); |
| 436 | PAD(buffer_vld4_pad, float, 32, 2); |
Christophe Lyon | d98beba | 2016-08-24 18:02:41 +0200 | [diff] [blame] | 437 | #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) |
Christophe Lyon | 34adaf6 | 2013-04-11 15:05:18 +0200 | [diff] [blame] | 438 | #ifdef __ARMCC_VERSION |
| 439 | __fp16 buffer_vld4_float16x4x4[4*4] = {-16, -15, -14, -13, -12, -11, -10, -9, |
| 440 | -8, -7, -6, -5, -4, -3, -2, -1}; |
| 441 | #else |
| 442 | float16_t buffer_vld4_float16x4x4[4*4] = {0xcc00 /* -16 */, 0xcb80 /* -15 */, |
| 443 | 0xcb00 /* -14 */, 0xca80 /* -13 */, |
| 444 | 0xca00 /* -12 */, 0xc980 /* -11 */, |
| 445 | 0xc900 /* -10 */, 0xc880 /* -9 */, |
| 446 | 0xc800 /* -8 */, 0xc700 /* -7 */, |
| 447 | 0xc600 /* -6 */, 0xc500 /* -5 */, |
| 448 | 0xc400 /* -4 */, 0xc200 /* -3 */, |
| 449 | 0xc000 /* -2 */, 0xbc00 /* -1 */}; |
| 450 | #endif |
| 451 | PAD(buffer_vld4_pad, float, 16, 4); |
| 452 | #endif |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 453 | VECT_ARRAY_INIT4(buffer_vld4, int, 8, 16); |
| 454 | PAD(buffer_vld4_pad, int, 8, 16); |
| 455 | VECT_ARRAY_INIT4(buffer_vld4, int, 16, 8); |
| 456 | PAD(buffer_vld4_pad, int, 16, 8); |
| 457 | VECT_ARRAY_INIT4(buffer_vld4, int, 32, 4); |
| 458 | PAD(buffer_vld4_pad, int, 32, 4); |
| 459 | VECT_ARRAY_INIT4(buffer_vld4, int, 64, 2); |
| 460 | PAD(buffer_vld4_pad, int, 64, 2); |
| 461 | VECT_ARRAY_INIT4(buffer_vld4, uint, 8, 16); |
| 462 | PAD(buffer_vld4_pad, uint, 8, 16); |
| 463 | VECT_ARRAY_INIT4(buffer_vld4, uint, 16, 8); |
| 464 | PAD(buffer_vld4_pad, uint, 16, 8); |
| 465 | VECT_ARRAY_INIT4(buffer_vld4, uint, 32, 4); |
| 466 | PAD(buffer_vld4_pad, uint, 32, 4); |
| 467 | VECT_ARRAY_INIT4(buffer_vld4, uint, 64, 2); |
| 468 | PAD(buffer_vld4_pad, uint, 64, 2); |
Christophe Lyon | 80902f6 | 2013-03-29 16:26:42 +0100 | [diff] [blame] | 469 | VECT_ARRAY_INIT4(buffer_vld4, poly, 8, 16); |
| 470 | PAD(buffer_vld4_pad, poly, 8, 16); |
| 471 | VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 8); |
| 472 | PAD(buffer_vld4_pad, poly, 16, 8); |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 473 | VECT_ARRAY_INIT4(buffer_vld4, float, 32, 4); |
| 474 | PAD(buffer_vld4_pad, float, 32, 4); |
Christophe Lyon | d98beba | 2016-08-24 18:02:41 +0200 | [diff] [blame] | 475 | #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) |
Christophe Lyon | 34adaf6 | 2013-04-11 15:05:18 +0200 | [diff] [blame] | 476 | #ifdef __ARMCC_VERSION |
| 477 | __fp16 buffer_vld4_float16x8x4[8*4] = {-16, -15, -14, -13, -12, -11, -10, -9, |
| 478 | -8, -7, -6, -5, -4, -3, -2, -1, |
| 479 | 0, 1, 2, 3, 4, 5, 6, 7, |
| 480 | 8, 9, 10, 11, 12, 13, 14, 15}; |
| 481 | #else |
| 482 | float16_t buffer_vld4_float16x8x4[8*4] = {0xcc00 /* -16 */, 0xcb80 /* -15 */, |
| 483 | 0xcb00 /* -14 */, 0xca80 /* -13 */, |
| 484 | 0xca00 /* -12 */, 0xc980 /* -11 */, |
| 485 | 0xc900 /* -10 */, 0xc880 /* -9 */, |
| 486 | 0xc800 /* -8 */, 0xc700 /* -7 */, |
| 487 | 0xc600 /* -6 */, 0xc500 /* -6 */, |
| 488 | 0xc400 /* -4 */, 0xc200 /* -3 */, |
| 489 | 0xc000 /* -2 */, 0xbc00 /* -1 */, |
| 490 | 0, 0x3c00 /* 1 */, |
| 491 | 0x4000 /* 2 */, 0x4200 /* 3 */, |
| 492 | 0x4400 /* 4 */, 0x4500 /* 5 */, |
| 493 | 0x4600 /* 6 */, 0x4700 /* 7 */, |
| 494 | 0x4800 /* 8 */, 0x4880 /* 9 */, |
| 495 | 0x4900 /* 10 */, 0x4980 /* 11 */, |
| 496 | 0x4a00 /* 12 */, 0x4a80 /* 13 */, |
| 497 | 0x4b00 /* 14 */, 0x04b80 /* 15 */}; |
| 498 | #endif |
| 499 | PAD(buffer_vld4_pad, float, 16, 8); |
| 500 | #endif |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 501 | |
| 502 | /* Input buffers for vld2_lane */ |
| 503 | VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 8, 2); |
| 504 | VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 16, 2); |
| 505 | VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 32, 2); |
| 506 | VECT_VAR_DECL_INIT(buffer_vld2_lane, int, 64, 2); |
| 507 | VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 8, 2); |
| 508 | VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 16, 2); |
| 509 | VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 32, 2); |
| 510 | VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 64, 2); |
Christophe Lyon | 80902f6 | 2013-03-29 16:26:42 +0100 | [diff] [blame] | 511 | VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 8, 2); |
| 512 | VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 16, 2); |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 513 | VECT_VAR_DECL_INIT(buffer_vld2_lane, float, 32, 2); |
Christophe Lyon | d98beba | 2016-08-24 18:02:41 +0200 | [diff] [blame] | 514 | #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) |
Christophe Lyon | 34adaf6 | 2013-04-11 15:05:18 +0200 | [diff] [blame] | 515 | #ifdef __ARMCC_VERSION |
| 516 | __fp16 buffer_vld2_lane_float16x2[2] = {-16, -15}; |
| 517 | #else |
| 518 | VECT_VAR_DECL(buffer_vld2_lane, float, 16, 2) [] = {0xcc00 /* -16 */, |
| 519 | 0xcb80 /* -15 */}; |
| 520 | #endif |
| 521 | #endif |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 522 | |
| 523 | /* Input buffers for vld3_lane */ |
| 524 | VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 8, 3); |
| 525 | VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 16, 3); |
| 526 | VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 32, 3); |
| 527 | VECT_VAR_DECL_INIT(buffer_vld3_lane, int, 64, 3); |
| 528 | VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 8, 3); |
| 529 | VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 16, 3); |
| 530 | VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 32, 3); |
| 531 | VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 64, 3); |
Christophe Lyon | 80902f6 | 2013-03-29 16:26:42 +0100 | [diff] [blame] | 532 | VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 8, 3); |
| 533 | VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 16, 3); |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 534 | VECT_VAR_DECL_INIT(buffer_vld3_lane, float, 32, 3); |
Christophe Lyon | d98beba | 2016-08-24 18:02:41 +0200 | [diff] [blame] | 535 | #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) |
Christophe Lyon | 34adaf6 | 2013-04-11 15:05:18 +0200 | [diff] [blame] | 536 | #ifdef __ARMCC_VERSION |
| 537 | __fp16 buffer_vld3_lane_float16x3[3] = {-16, -15, -14}; |
| 538 | #else |
| 539 | VECT_VAR_DECL(buffer_vld3_lane, float, 16, 3) [] = {0xcc00 /* -16 */, |
| 540 | 0xcb80 /* -15 */, |
| 541 | 0xcb00 /* -14 */}; |
| 542 | #endif |
| 543 | #endif |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 544 | |
| 545 | /* Input buffers for vld4_lane */ |
| 546 | VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 8, 4); |
| 547 | VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 16, 4); |
| 548 | VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 32, 4); |
| 549 | VECT_VAR_DECL_INIT(buffer_vld4_lane, int, 64, 4); |
| 550 | VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 8, 4); |
| 551 | VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 16, 4); |
| 552 | VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 32, 4); |
| 553 | VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 64, 4); |
Christophe Lyon | 80902f6 | 2013-03-29 16:26:42 +0100 | [diff] [blame] | 554 | VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 8, 4); |
| 555 | VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 16, 4); |
Christophe Lyon | 01af0a5 | 2013-01-17 17:23:11 +0100 | [diff] [blame] | 556 | VECT_VAR_DECL_INIT(buffer_vld4_lane, float, 32, 4); |
Christophe Lyon | d98beba | 2016-08-24 18:02:41 +0200 | [diff] [blame] | 557 | #if defined(__ARM_FP16_FORMAT_IEEE) && ( ((__ARM_FP & 0x2) != 0) || ((__ARM_NEON_FP16_INTRINSICS & 1) != 0) ) |
Christophe Lyon | 34adaf6 | 2013-04-11 15:05:18 +0200 | [diff] [blame] | 558 | #ifdef __ARMCC_VERSION |
| 559 | __fp16 buffer_vld4_lane_float16x4[4] = {-16, -15, -14, -13}; |
| 560 | #else |
| 561 | VECT_VAR_DECL(buffer_vld4_lane, float, 16, 4) [] = {0xcc00 /* -16 */, |
| 562 | 0xcb80 /* -15 */, |
| 563 | 0xcb00 /* -14 */, |
| 564 | 0xca80 /* -13 */}; |
| 565 | #endif |
| 566 | #endif |