José Fonseca | 1aede69 | 2009-08-16 20:59:38 +0100 | [diff] [blame] | 1 | /************************************************************************** |
| 2 | * |
| 3 | * Copyright 2009 VMware, Inc. |
| 4 | * All Rights Reserved. |
| 5 | * |
| 6 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 7 | * copy of this software and associated documentation files (the |
| 8 | * "Software"), to deal in the Software without restriction, including |
| 9 | * without limitation the rights to use, copy, modify, merge, publish, |
| 10 | * distribute, sub license, and/or sell copies of the Software, and to |
| 11 | * permit persons to whom the Software is furnished to do so, subject to |
| 12 | * the following conditions: |
| 13 | * |
| 14 | * The above copyright notice and this permission notice (including the |
| 15 | * next paragraph) shall be included in all copies or substantial portions |
| 16 | * of the Software. |
| 17 | * |
| 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| 19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| 20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
| 21 | * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR |
| 22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
| 23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
| 24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| 25 | * |
| 26 | **************************************************************************/ |
| 27 | |
José Fonseca | 5811ed8 | 2009-08-22 22:26:55 +0100 | [diff] [blame] | 28 | /** |
| 29 | * @file |
| 30 | * Helper functions for logical operations. |
| 31 | * |
| 32 | * @author Jose Fonseca <jfonseca@vmware.com> |
| 33 | */ |
| 34 | |
José Fonseca | 1aede69 | 2009-08-16 20:59:38 +0100 | [diff] [blame] | 35 | |
José Fonseca | 7cda8ea | 2009-09-29 13:58:58 +0100 | [diff] [blame] | 36 | #include "util/u_cpu_detect.h" |
José Fonseca | 8546273 | 2010-02-01 15:11:34 +0000 | [diff] [blame] | 37 | #include "util/u_debug.h" |
José Fonseca | 7cda8ea | 2009-09-29 13:58:58 +0100 | [diff] [blame] | 38 | |
José Fonseca | 1aede69 | 2009-08-16 20:59:38 +0100 | [diff] [blame] | 39 | #include "lp_bld_type.h" |
José Fonseca | 09a7b01 | 2009-08-17 07:57:28 +0100 | [diff] [blame] | 40 | #include "lp_bld_const.h" |
José Fonseca | 1aede69 | 2009-08-16 20:59:38 +0100 | [diff] [blame] | 41 | #include "lp_bld_intr.h" |
| 42 | #include "lp_bld_logic.h" |
| 43 | |
| 44 | |
Brian Paul | e01fa1e | 2009-12-03 11:39:40 -0700 | [diff] [blame] | 45 | /** |
Brian Paul | 2297bc9 | 2009-12-16 12:32:45 -0700 | [diff] [blame] | 46 | * Build code to compare two values 'a' and 'b' of 'type' using the given func. |
| 47 | * \param func one of PIPE_FUNC_x |
Brian Paul | e01fa1e | 2009-12-03 11:39:40 -0700 | [diff] [blame] | 48 | */ |
José Fonseca | 1aede69 | 2009-08-16 20:59:38 +0100 | [diff] [blame] | 49 | LLVMValueRef |
Brian Paul | 2297bc9 | 2009-12-16 12:32:45 -0700 | [diff] [blame] | 50 | lp_build_compare(LLVMBuilderRef builder, |
| 51 | const struct lp_type type, |
| 52 | unsigned func, |
| 53 | LLVMValueRef a, |
| 54 | LLVMValueRef b) |
José Fonseca | 1aede69 | 2009-08-16 20:59:38 +0100 | [diff] [blame] | 55 | { |
José Fonseca | 1aede69 | 2009-08-16 20:59:38 +0100 | [diff] [blame] | 56 | LLVMTypeRef vec_type = lp_build_vec_type(type); |
| 57 | LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); |
| 58 | LLVMValueRef zeros = LLVMConstNull(int_vec_type); |
| 59 | LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); |
| 60 | LLVMValueRef cond; |
José Fonseca | 241c3a1 | 2009-08-31 10:22:36 +0100 | [diff] [blame] | 61 | LLVMValueRef res; |
| 62 | unsigned i; |
José Fonseca | 1aede69 | 2009-08-16 20:59:38 +0100 | [diff] [blame] | 63 | |
Brian Paul | e01fa1e | 2009-12-03 11:39:40 -0700 | [diff] [blame] | 64 | assert(func >= PIPE_FUNC_NEVER); |
| 65 | assert(func <= PIPE_FUNC_ALWAYS); |
| 66 | |
José Fonseca | 1aede69 | 2009-08-16 20:59:38 +0100 | [diff] [blame] | 67 | if(func == PIPE_FUNC_NEVER) |
| 68 | return zeros; |
| 69 | if(func == PIPE_FUNC_ALWAYS) |
| 70 | return ones; |
| 71 | |
| 72 | /* TODO: optimize the constant case */ |
| 73 | |
| 74 | /* XXX: It is not clear if we should use the ordered or unordered operators */ |
| 75 | |
| 76 | #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) |
| 77 | if(type.width * type.length == 128) { |
José Fonseca | 7cda8ea | 2009-09-29 13:58:58 +0100 | [diff] [blame] | 78 | if(type.floating && util_cpu_caps.has_sse) { |
Brian Paul | e01fa1e | 2009-12-03 11:39:40 -0700 | [diff] [blame] | 79 | /* float[4] comparison */ |
José Fonseca | 1aede69 | 2009-08-16 20:59:38 +0100 | [diff] [blame] | 80 | LLVMValueRef args[3]; |
| 81 | unsigned cc; |
| 82 | boolean swap; |
José Fonseca | 1aede69 | 2009-08-16 20:59:38 +0100 | [diff] [blame] | 83 | |
| 84 | swap = FALSE; |
| 85 | switch(func) { |
| 86 | case PIPE_FUNC_EQUAL: |
| 87 | cc = 0; |
| 88 | break; |
| 89 | case PIPE_FUNC_NOTEQUAL: |
| 90 | cc = 4; |
| 91 | break; |
| 92 | case PIPE_FUNC_LESS: |
| 93 | cc = 1; |
| 94 | break; |
| 95 | case PIPE_FUNC_LEQUAL: |
| 96 | cc = 2; |
| 97 | break; |
| 98 | case PIPE_FUNC_GREATER: |
| 99 | cc = 1; |
| 100 | swap = TRUE; |
| 101 | break; |
| 102 | case PIPE_FUNC_GEQUAL: |
| 103 | cc = 2; |
| 104 | swap = TRUE; |
| 105 | break; |
| 106 | default: |
| 107 | assert(0); |
Brian Paul | 2297bc9 | 2009-12-16 12:32:45 -0700 | [diff] [blame] | 108 | return lp_build_undef(type); |
José Fonseca | 1aede69 | 2009-08-16 20:59:38 +0100 | [diff] [blame] | 109 | } |
| 110 | |
| 111 | if(swap) { |
| 112 | args[0] = b; |
| 113 | args[1] = a; |
| 114 | } |
| 115 | else { |
| 116 | args[0] = a; |
| 117 | args[1] = b; |
| 118 | } |
| 119 | |
| 120 | args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0); |
Brian Paul | 2297bc9 | 2009-12-16 12:32:45 -0700 | [diff] [blame] | 121 | res = lp_build_intrinsic(builder, |
José Fonseca | 1aede69 | 2009-08-16 20:59:38 +0100 | [diff] [blame] | 122 | "llvm.x86.sse.cmp.ps", |
| 123 | vec_type, |
| 124 | args, 3); |
Brian Paul | 2297bc9 | 2009-12-16 12:32:45 -0700 | [diff] [blame] | 125 | res = LLVMBuildBitCast(builder, res, int_vec_type, ""); |
José Fonseca | 1aede69 | 2009-08-16 20:59:38 +0100 | [diff] [blame] | 126 | return res; |
| 127 | } |
José Fonseca | 7cda8ea | 2009-09-29 13:58:58 +0100 | [diff] [blame] | 128 | else if(util_cpu_caps.has_sse2) { |
Brian Paul | e01fa1e | 2009-12-03 11:39:40 -0700 | [diff] [blame] | 129 | /* int[4] comparison */ |
José Fonseca | d07b038 | 2009-08-20 20:17:55 +0100 | [diff] [blame] | 130 | static const struct { |
| 131 | unsigned swap:1; |
| 132 | unsigned eq:1; |
| 133 | unsigned gt:1; |
| 134 | unsigned not:1; |
| 135 | } table[] = { |
| 136 | {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */ |
| 137 | {1, 0, 1, 0}, /* PIPE_FUNC_LESS */ |
| 138 | {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */ |
| 139 | {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */ |
| 140 | {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */ |
| 141 | {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */ |
| 142 | {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */ |
| 143 | {0, 0, 0, 0} /* PIPE_FUNC_ALWAYS */ |
| 144 | }; |
| 145 | const char *pcmpeq; |
| 146 | const char *pcmpgt; |
| 147 | LLVMValueRef args[2]; |
| 148 | LLVMValueRef res; |
| 149 | |
| 150 | switch (type.width) { |
| 151 | case 8: |
| 152 | pcmpeq = "llvm.x86.sse2.pcmpeq.b"; |
| 153 | pcmpgt = "llvm.x86.sse2.pcmpgt.b"; |
| 154 | break; |
| 155 | case 16: |
| 156 | pcmpeq = "llvm.x86.sse2.pcmpeq.w"; |
| 157 | pcmpgt = "llvm.x86.sse2.pcmpgt.w"; |
| 158 | break; |
| 159 | case 32: |
| 160 | pcmpeq = "llvm.x86.sse2.pcmpeq.d"; |
| 161 | pcmpgt = "llvm.x86.sse2.pcmpgt.d"; |
| 162 | break; |
| 163 | default: |
| 164 | assert(0); |
Brian Paul | 2297bc9 | 2009-12-16 12:32:45 -0700 | [diff] [blame] | 165 | return lp_build_undef(type); |
José Fonseca | d07b038 | 2009-08-20 20:17:55 +0100 | [diff] [blame] | 166 | } |
| 167 | |
| 168 | /* There are no signed byte and unsigned word/dword comparison |
| 169 | * instructions. So flip the sign bit so that the results match. |
| 170 | */ |
| 171 | if(table[func].gt && |
| 172 | ((type.width == 8 && type.sign) || |
| 173 | (type.width != 8 && !type.sign))) { |
José Fonseca | 77b35dc | 2009-08-22 22:30:03 +0100 | [diff] [blame] | 174 | LLVMValueRef msb = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); |
Brian Paul | 2297bc9 | 2009-12-16 12:32:45 -0700 | [diff] [blame] | 175 | a = LLVMBuildXor(builder, a, msb, ""); |
| 176 | b = LLVMBuildXor(builder, b, msb, ""); |
José Fonseca | d07b038 | 2009-08-20 20:17:55 +0100 | [diff] [blame] | 177 | } |
| 178 | |
| 179 | if(table[func].swap) { |
| 180 | args[0] = b; |
| 181 | args[1] = a; |
| 182 | } |
| 183 | else { |
| 184 | args[0] = a; |
| 185 | args[1] = b; |
| 186 | } |
| 187 | |
| 188 | if(table[func].eq) |
Brian Paul | 2297bc9 | 2009-12-16 12:32:45 -0700 | [diff] [blame] | 189 | res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2); |
José Fonseca | d07b038 | 2009-08-20 20:17:55 +0100 | [diff] [blame] | 190 | else if (table[func].gt) |
Brian Paul | 2297bc9 | 2009-12-16 12:32:45 -0700 | [diff] [blame] | 191 | res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2); |
José Fonseca | d07b038 | 2009-08-20 20:17:55 +0100 | [diff] [blame] | 192 | else |
| 193 | res = LLVMConstNull(vec_type); |
| 194 | |
| 195 | if(table[func].not) |
Brian Paul | 2297bc9 | 2009-12-16 12:32:45 -0700 | [diff] [blame] | 196 | res = LLVMBuildNot(builder, res, ""); |
José Fonseca | d07b038 | 2009-08-20 20:17:55 +0100 | [diff] [blame] | 197 | |
| 198 | return res; |
| 199 | } |
José Fonseca | 1aede69 | 2009-08-16 20:59:38 +0100 | [diff] [blame] | 200 | } |
| 201 | #endif |
| 202 | |
| 203 | if(type.floating) { |
| 204 | LLVMRealPredicate op; |
| 205 | switch(func) { |
| 206 | case PIPE_FUNC_NEVER: |
| 207 | op = LLVMRealPredicateFalse; |
| 208 | break; |
| 209 | case PIPE_FUNC_ALWAYS: |
| 210 | op = LLVMRealPredicateTrue; |
| 211 | break; |
| 212 | case PIPE_FUNC_EQUAL: |
| 213 | op = LLVMRealUEQ; |
| 214 | break; |
| 215 | case PIPE_FUNC_NOTEQUAL: |
| 216 | op = LLVMRealUNE; |
| 217 | break; |
| 218 | case PIPE_FUNC_LESS: |
| 219 | op = LLVMRealULT; |
| 220 | break; |
| 221 | case PIPE_FUNC_LEQUAL: |
| 222 | op = LLVMRealULE; |
| 223 | break; |
| 224 | case PIPE_FUNC_GREATER: |
| 225 | op = LLVMRealUGT; |
| 226 | break; |
| 227 | case PIPE_FUNC_GEQUAL: |
| 228 | op = LLVMRealUGE; |
| 229 | break; |
| 230 | default: |
| 231 | assert(0); |
Brian Paul | 2297bc9 | 2009-12-16 12:32:45 -0700 | [diff] [blame] | 232 | return lp_build_undef(type); |
José Fonseca | 1aede69 | 2009-08-16 20:59:38 +0100 | [diff] [blame] | 233 | } |
José Fonseca | 241c3a1 | 2009-08-31 10:22:36 +0100 | [diff] [blame] | 234 | |
| 235 | #if 0 |
| 236 | /* XXX: Although valid IR, no LLVM target currently support this */ |
Brian Paul | 2297bc9 | 2009-12-16 12:32:45 -0700 | [diff] [blame] | 237 | cond = LLVMBuildFCmp(builder, op, a, b, ""); |
| 238 | res = LLVMBuildSelect(builder, cond, ones, zeros, ""); |
José Fonseca | 241c3a1 | 2009-08-31 10:22:36 +0100 | [diff] [blame] | 239 | #else |
| 240 | debug_printf("%s: warning: using slow element-wise vector comparison\n", |
| 241 | __FUNCTION__); |
| 242 | res = LLVMGetUndef(int_vec_type); |
| 243 | for(i = 0; i < type.length; ++i) { |
| 244 | LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); |
Brian Paul | 2297bc9 | 2009-12-16 12:32:45 -0700 | [diff] [blame] | 245 | cond = LLVMBuildFCmp(builder, op, |
| 246 | LLVMBuildExtractElement(builder, a, index, ""), |
| 247 | LLVMBuildExtractElement(builder, b, index, ""), |
José Fonseca | 241c3a1 | 2009-08-31 10:22:36 +0100 | [diff] [blame] | 248 | ""); |
Brian Paul | 2297bc9 | 2009-12-16 12:32:45 -0700 | [diff] [blame] | 249 | cond = LLVMBuildSelect(builder, cond, |
José Fonseca | 241c3a1 | 2009-08-31 10:22:36 +0100 | [diff] [blame] | 250 | LLVMConstExtractElement(ones, index), |
| 251 | LLVMConstExtractElement(zeros, index), |
| 252 | ""); |
Brian Paul | 2297bc9 | 2009-12-16 12:32:45 -0700 | [diff] [blame] | 253 | res = LLVMBuildInsertElement(builder, res, cond, index, ""); |
José Fonseca | 241c3a1 | 2009-08-31 10:22:36 +0100 | [diff] [blame] | 254 | } |
| 255 | #endif |
José Fonseca | 1aede69 | 2009-08-16 20:59:38 +0100 | [diff] [blame] | 256 | } |
| 257 | else { |
| 258 | LLVMIntPredicate op; |
| 259 | switch(func) { |
| 260 | case PIPE_FUNC_EQUAL: |
| 261 | op = LLVMIntEQ; |
| 262 | break; |
| 263 | case PIPE_FUNC_NOTEQUAL: |
| 264 | op = LLVMIntNE; |
| 265 | break; |
| 266 | case PIPE_FUNC_LESS: |
| 267 | op = type.sign ? LLVMIntSLT : LLVMIntULT; |
| 268 | break; |
| 269 | case PIPE_FUNC_LEQUAL: |
| 270 | op = type.sign ? LLVMIntSLE : LLVMIntULE; |
| 271 | break; |
| 272 | case PIPE_FUNC_GREATER: |
| 273 | op = type.sign ? LLVMIntSGT : LLVMIntUGT; |
| 274 | break; |
| 275 | case PIPE_FUNC_GEQUAL: |
| 276 | op = type.sign ? LLVMIntSGE : LLVMIntUGE; |
| 277 | break; |
| 278 | default: |
| 279 | assert(0); |
Brian Paul | 2297bc9 | 2009-12-16 12:32:45 -0700 | [diff] [blame] | 280 | return lp_build_undef(type); |
José Fonseca | 1aede69 | 2009-08-16 20:59:38 +0100 | [diff] [blame] | 281 | } |
José Fonseca | 241c3a1 | 2009-08-31 10:22:36 +0100 | [diff] [blame] | 282 | |
| 283 | #if 0 |
| 284 | /* XXX: Although valid IR, no LLVM target currently support this */ |
Brian Paul | 2297bc9 | 2009-12-16 12:32:45 -0700 | [diff] [blame] | 285 | cond = LLVMBuildICmp(builder, op, a, b, ""); |
| 286 | res = LLVMBuildSelect(builder, cond, ones, zeros, ""); |
José Fonseca | 241c3a1 | 2009-08-31 10:22:36 +0100 | [diff] [blame] | 287 | #else |
Brian Paul | 2297bc9 | 2009-12-16 12:32:45 -0700 | [diff] [blame] | 288 | debug_printf("%s: warning: using slow element-wise int vector comparison\n", |
José Fonseca | 241c3a1 | 2009-08-31 10:22:36 +0100 | [diff] [blame] | 289 | __FUNCTION__); |
| 290 | res = LLVMGetUndef(int_vec_type); |
| 291 | for(i = 0; i < type.length; ++i) { |
| 292 | LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); |
Brian Paul | 2297bc9 | 2009-12-16 12:32:45 -0700 | [diff] [blame] | 293 | cond = LLVMBuildICmp(builder, op, |
| 294 | LLVMBuildExtractElement(builder, a, index, ""), |
| 295 | LLVMBuildExtractElement(builder, b, index, ""), |
José Fonseca | 241c3a1 | 2009-08-31 10:22:36 +0100 | [diff] [blame] | 296 | ""); |
Brian Paul | 2297bc9 | 2009-12-16 12:32:45 -0700 | [diff] [blame] | 297 | cond = LLVMBuildSelect(builder, cond, |
José Fonseca | 241c3a1 | 2009-08-31 10:22:36 +0100 | [diff] [blame] | 298 | LLVMConstExtractElement(ones, index), |
| 299 | LLVMConstExtractElement(zeros, index), |
| 300 | ""); |
Brian Paul | 2297bc9 | 2009-12-16 12:32:45 -0700 | [diff] [blame] | 301 | res = LLVMBuildInsertElement(builder, res, cond, index, ""); |
José Fonseca | 241c3a1 | 2009-08-31 10:22:36 +0100 | [diff] [blame] | 302 | } |
| 303 | #endif |
José Fonseca | 1aede69 | 2009-08-16 20:59:38 +0100 | [diff] [blame] | 304 | } |
| 305 | |
José Fonseca | 241c3a1 | 2009-08-31 10:22:36 +0100 | [diff] [blame] | 306 | return res; |
José Fonseca | 1aede69 | 2009-08-16 20:59:38 +0100 | [diff] [blame] | 307 | } |
José Fonseca | 09a7b01 | 2009-08-17 07:57:28 +0100 | [diff] [blame] | 308 | |
| 309 | |
Brian Paul | 2297bc9 | 2009-12-16 12:32:45 -0700 | [diff] [blame] | 310 | |
| 311 | /** |
| 312 | * Build code to compare two values 'a' and 'b' using the given func. |
| 313 | * \param func one of PIPE_FUNC_x |
| 314 | */ |
| 315 | LLVMValueRef |
| 316 | lp_build_cmp(struct lp_build_context *bld, |
| 317 | unsigned func, |
| 318 | LLVMValueRef a, |
| 319 | LLVMValueRef b) |
| 320 | { |
| 321 | return lp_build_compare(bld->builder, bld->type, func, a, b); |
| 322 | } |
| 323 | |
| 324 | |
José Fonseca | 09a7b01 | 2009-08-17 07:57:28 +0100 | [diff] [blame] | 325 | LLVMValueRef |
| 326 | lp_build_select(struct lp_build_context *bld, |
| 327 | LLVMValueRef mask, |
| 328 | LLVMValueRef a, |
| 329 | LLVMValueRef b) |
| 330 | { |
José Fonseca | b4835ea | 2009-09-14 11:05:06 +0100 | [diff] [blame] | 331 | struct lp_type type = bld->type; |
José Fonseca | 09a7b01 | 2009-08-17 07:57:28 +0100 | [diff] [blame] | 332 | LLVMValueRef res; |
| 333 | |
| 334 | if(a == b) |
| 335 | return a; |
| 336 | |
| 337 | if(type.floating) { |
| 338 | LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); |
| 339 | a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); |
| 340 | b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); |
| 341 | } |
| 342 | |
José Fonseca | 09a7b01 | 2009-08-17 07:57:28 +0100 | [diff] [blame] | 343 | a = LLVMBuildAnd(bld->builder, a, mask, ""); |
| 344 | |
| 345 | /* This often gets translated to PANDN, but sometimes the NOT is |
| 346 | * pre-computed and stored in another constant. The best strategy depends |
| 347 | * on available registers, so it is not a big deal -- hopefully LLVM does |
| 348 | * the right decision attending the rest of the program. |
| 349 | */ |
| 350 | b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); |
| 351 | |
| 352 | res = LLVMBuildOr(bld->builder, a, b, ""); |
| 353 | |
| 354 | if(type.floating) { |
| 355 | LLVMTypeRef vec_type = lp_build_vec_type(type); |
| 356 | res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); |
| 357 | } |
| 358 | |
| 359 | return res; |
| 360 | } |
| 361 | |
| 362 | |
| 363 | LLVMValueRef |
| 364 | lp_build_select_aos(struct lp_build_context *bld, |
| 365 | LLVMValueRef a, |
| 366 | LLVMValueRef b, |
José Fonseca | 5e13e98 | 2009-09-13 16:12:48 +0100 | [diff] [blame] | 367 | const boolean cond[4]) |
José Fonseca | 09a7b01 | 2009-08-17 07:57:28 +0100 | [diff] [blame] | 368 | { |
José Fonseca | b4835ea | 2009-09-14 11:05:06 +0100 | [diff] [blame] | 369 | const struct lp_type type = bld->type; |
José Fonseca | 09a7b01 | 2009-08-17 07:57:28 +0100 | [diff] [blame] | 370 | const unsigned n = type.length; |
| 371 | unsigned i, j; |
| 372 | |
| 373 | if(a == b) |
| 374 | return a; |
| 375 | if(cond[0] && cond[1] && cond[2] && cond[3]) |
| 376 | return a; |
| 377 | if(!cond[0] && !cond[1] && !cond[2] && !cond[3]) |
| 378 | return b; |
| 379 | if(a == bld->undef || b == bld->undef) |
| 380 | return bld->undef; |
| 381 | |
| 382 | /* |
| 383 | * There are three major ways of accomplishing this: |
| 384 | * - with a shuffle, |
| 385 | * - with a select, |
| 386 | * - or with a bit mask. |
| 387 | * |
| 388 | * Select isn't supported for vector types yet. |
| 389 | * The flip between these is empirical and might need to be. |
| 390 | */ |
| 391 | if (n <= 4) { |
| 392 | /* |
| 393 | * Shuffle. |
| 394 | */ |
| 395 | LLVMTypeRef elem_type = LLVMInt32Type(); |
| 396 | LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; |
| 397 | |
| 398 | for(j = 0; j < n; j += 4) |
| 399 | for(i = 0; i < 4; ++i) |
| 400 | shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0); |
| 401 | |
| 402 | return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); |
| 403 | } |
José Fonseca | 1fc4100 | 2009-09-11 11:24:00 +0100 | [diff] [blame] | 404 | else { |
José Fonseca | 09a7b01 | 2009-08-17 07:57:28 +0100 | [diff] [blame] | 405 | #if 0 |
José Fonseca | 1fc4100 | 2009-09-11 11:24:00 +0100 | [diff] [blame] | 406 | /* XXX: Unfortunately select of vectors do not work */ |
José Fonseca | 09a7b01 | 2009-08-17 07:57:28 +0100 | [diff] [blame] | 407 | /* Use a select */ |
| 408 | LLVMTypeRef elem_type = LLVMInt1Type(); |
| 409 | LLVMValueRef cond[LP_MAX_VECTOR_LENGTH]; |
| 410 | |
| 411 | for(j = 0; j < n; j += 4) |
| 412 | for(i = 0; i < 4; ++i) |
| 413 | cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0); |
| 414 | |
| 415 | return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, ""); |
José Fonseca | 1fc4100 | 2009-09-11 11:24:00 +0100 | [diff] [blame] | 416 | #else |
José Fonseca | 09a7b01 | 2009-08-17 07:57:28 +0100 | [diff] [blame] | 417 | LLVMValueRef mask = lp_build_const_mask_aos(type, cond); |
| 418 | return lp_build_select(bld, mask, a, b); |
José Fonseca | 1fc4100 | 2009-09-11 11:24:00 +0100 | [diff] [blame] | 419 | #endif |
José Fonseca | 09a7b01 | 2009-08-17 07:57:28 +0100 | [diff] [blame] | 420 | } |
| 421 | } |