José Fonseca | 1dd7bb1 | 2009-08-04 12:09:52 +0100 | [diff] [blame] | 1 | /************************************************************************** |
| 2 | * |
| 3 | * Copyright 2009 VMware, Inc. |
| 4 | * All Rights Reserved. |
| 5 | * |
| 6 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 7 | * copy of this software and associated documentation files (the |
| 8 | * "Software"), to deal in the Software without restriction, including |
| 9 | * without limitation the rights to use, copy, modify, merge, publish, |
| 10 | * distribute, sub license, and/or sell copies of the Software, and to |
| 11 | * permit persons to whom the Software is furnished to do so, subject to |
| 12 | * the following conditions: |
| 13 | * |
| 14 | * The above copyright notice and this permission notice (including the |
| 15 | * next paragraph) shall be included in all copies or substantial portions |
| 16 | * of the Software. |
| 17 | * |
| 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| 19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| 20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
| 21 | * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR |
| 22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
| 23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
| 24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| 25 | * |
| 26 | **************************************************************************/ |
| 27 | |
José Fonseca | 5811ed8 | 2009-08-22 22:26:55 +0100 | [diff] [blame] | 28 | /** |
| 29 | * @file |
| 30 | * Helper functions for swizzling/shuffling. |
| 31 | * |
| 32 | * @author Jose Fonseca <jfonseca@vmware.com> |
| 33 | */ |
| 34 | |
José Fonseca | 1dd7bb1 | 2009-08-04 12:09:52 +0100 | [diff] [blame] | 35 | |
| 36 | #include "util/u_debug.h" |
| 37 | |
| 38 | #include "lp_bld_type.h" |
| 39 | #include "lp_bld_const.h" |
José Fonseca | 09a7b01 | 2009-08-17 07:57:28 +0100 | [diff] [blame] | 40 | #include "lp_bld_logic.h" |
José Fonseca | 1dd7bb1 | 2009-08-04 12:09:52 +0100 | [diff] [blame] | 41 | #include "lp_bld_swizzle.h" |
| 42 | |
| 43 | |
| 44 | LLVMValueRef |
José Fonseca | 6f5cd15 | 2009-08-19 17:57:07 +0100 | [diff] [blame] | 45 | lp_build_broadcast(LLVMBuilderRef builder, |
| 46 | LLVMTypeRef vec_type, |
| 47 | LLVMValueRef scalar) |
| 48 | { |
| 49 | const unsigned n = LLVMGetVectorSize(vec_type); |
| 50 | LLVMValueRef res; |
| 51 | unsigned i; |
| 52 | |
| 53 | res = LLVMGetUndef(vec_type); |
| 54 | for(i = 0; i < n; ++i) { |
| 55 | LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); |
| 56 | res = LLVMBuildInsertElement(builder, res, scalar, index, ""); |
| 57 | } |
| 58 | |
| 59 | return res; |
| 60 | } |
| 61 | |
| 62 | |
| 63 | LLVMValueRef |
José Fonseca | 4393ca7 | 2009-08-11 13:09:54 +0100 | [diff] [blame] | 64 | lp_build_broadcast_scalar(struct lp_build_context *bld, |
| 65 | LLVMValueRef scalar) |
| 66 | { |
José Fonseca | b4835ea | 2009-09-14 11:05:06 +0100 | [diff] [blame] | 67 | const struct lp_type type = bld->type; |
José Fonseca | 4393ca7 | 2009-08-11 13:09:54 +0100 | [diff] [blame] | 68 | LLVMValueRef res; |
| 69 | unsigned i; |
| 70 | |
| 71 | res = bld->undef; |
| 72 | for(i = 0; i < type.length; ++i) { |
| 73 | LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); |
| 74 | res = LLVMBuildInsertElement(bld->builder, res, scalar, index, ""); |
| 75 | } |
| 76 | |
| 77 | return res; |
| 78 | } |
| 79 | |
| 80 | |
| 81 | LLVMValueRef |
José Fonseca | 1dd7bb1 | 2009-08-04 12:09:52 +0100 | [diff] [blame] | 82 | lp_build_broadcast_aos(struct lp_build_context *bld, |
| 83 | LLVMValueRef a, |
| 84 | unsigned channel) |
| 85 | { |
José Fonseca | b4835ea | 2009-09-14 11:05:06 +0100 | [diff] [blame] | 86 | const struct lp_type type = bld->type; |
José Fonseca | 1dd7bb1 | 2009-08-04 12:09:52 +0100 | [diff] [blame] | 87 | const unsigned n = type.length; |
| 88 | unsigned i, j; |
| 89 | |
| 90 | if(a == bld->undef || a == bld->zero || a == bld->one) |
| 91 | return a; |
| 92 | |
José Fonseca | 28e4645 | 2009-08-04 12:32:24 +0100 | [diff] [blame] | 93 | /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing |
| 94 | * using shuffles here actually causes worst results. More investigation is |
| 95 | * needed. */ |
José Fonseca | 1dd7bb1 | 2009-08-04 12:09:52 +0100 | [diff] [blame] | 96 | if (n <= 4) { |
| 97 | /* |
| 98 | * Shuffle. |
| 99 | */ |
| 100 | LLVMTypeRef elem_type = LLVMInt32Type(); |
| 101 | LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; |
| 102 | |
| 103 | for(j = 0; j < n; j += 4) |
| 104 | for(i = 0; i < 4; ++i) |
| 105 | shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0); |
| 106 | |
| 107 | return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), ""); |
| 108 | } |
| 109 | else { |
| 110 | /* |
| 111 | * Bit mask and recursive shifts |
| 112 | * |
José Fonseca | 28e4645 | 2009-08-04 12:32:24 +0100 | [diff] [blame] | 113 | * XYZW XYZW .... XYZW <= input |
| 114 | * 0Y00 0Y00 .... 0Y00 |
| 115 | * YY00 YY00 .... YY00 |
| 116 | * YYYY YYYY .... YYYY <= output |
José Fonseca | 1dd7bb1 | 2009-08-04 12:09:52 +0100 | [diff] [blame] | 117 | */ |
José Fonseca | b4835ea | 2009-09-14 11:05:06 +0100 | [diff] [blame] | 118 | struct lp_type type4 = type; |
José Fonseca | 1dd7bb1 | 2009-08-04 12:09:52 +0100 | [diff] [blame] | 119 | const char shifts[4][2] = { |
| 120 | { 1, 2}, |
| 121 | {-1, 2}, |
| 122 | { 1, -2}, |
| 123 | {-1, -2} |
| 124 | }; |
| 125 | boolean cond[4]; |
| 126 | unsigned i; |
| 127 | |
| 128 | memset(cond, 0, sizeof cond); |
| 129 | cond[channel] = 1; |
| 130 | |
| 131 | a = LLVMBuildAnd(bld->builder, a, lp_build_const_mask_aos(type, cond), ""); |
| 132 | |
| 133 | type4.width *= 4; |
| 134 | type4.length /= 4; |
| 135 | |
| 136 | a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type4), ""); |
| 137 | |
| 138 | for(i = 0; i < 2; ++i) { |
| 139 | LLVMValueRef tmp = NULL; |
| 140 | int shift = shifts[channel][i]; |
| 141 | |
| 142 | #ifdef PIPE_ARCH_LITTLE_ENDIAN |
| 143 | shift = -shift; |
| 144 | #endif |
| 145 | |
| 146 | if(shift > 0) |
José Fonseca | 77b35dc | 2009-08-22 22:30:03 +0100 | [diff] [blame] | 147 | tmp = LLVMBuildLShr(bld->builder, a, lp_build_int_const_scalar(type4, shift*type.width), ""); |
José Fonseca | 1dd7bb1 | 2009-08-04 12:09:52 +0100 | [diff] [blame] | 148 | if(shift < 0) |
José Fonseca | 77b35dc | 2009-08-22 22:30:03 +0100 | [diff] [blame] | 149 | tmp = LLVMBuildShl(bld->builder, a, lp_build_int_const_scalar(type4, -shift*type.width), ""); |
José Fonseca | 1dd7bb1 | 2009-08-04 12:09:52 +0100 | [diff] [blame] | 150 | |
| 151 | assert(tmp); |
| 152 | if(tmp) |
| 153 | a = LLVMBuildOr(bld->builder, a, tmp, ""); |
| 154 | } |
| 155 | |
| 156 | return LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type), ""); |
| 157 | } |
| 158 | } |
| 159 | |
| 160 | |
| 161 | LLVMValueRef |
José Fonseca | 1dd7bb1 | 2009-08-04 12:09:52 +0100 | [diff] [blame] | 162 | lp_build_swizzle1_aos(struct lp_build_context *bld, |
| 163 | LLVMValueRef a, |
José Fonseca | 5e13e98 | 2009-09-13 16:12:48 +0100 | [diff] [blame] | 164 | const unsigned char swizzle[4]) |
José Fonseca | 1dd7bb1 | 2009-08-04 12:09:52 +0100 | [diff] [blame] | 165 | { |
| 166 | const unsigned n = bld->type.length; |
| 167 | unsigned i, j; |
| 168 | |
| 169 | if(a == bld->undef || a == bld->zero || a == bld->one) |
| 170 | return a; |
| 171 | |
| 172 | if(swizzle[0] == swizzle[1] && swizzle[1] == swizzle[2] && swizzle[2] == swizzle[3]) |
| 173 | return lp_build_broadcast_aos(bld, a, swizzle[0]); |
| 174 | |
| 175 | { |
| 176 | /* |
| 177 | * Shuffle. |
| 178 | */ |
| 179 | LLVMTypeRef elem_type = LLVMInt32Type(); |
| 180 | LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; |
| 181 | |
| 182 | for(j = 0; j < n; j += 4) |
| 183 | for(i = 0; i < 4; ++i) |
| 184 | shuffles[j + i] = LLVMConstInt(elem_type, j + swizzle[i], 0); |
| 185 | |
| 186 | return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), ""); |
| 187 | } |
| 188 | } |
| 189 | |
| 190 | |
| 191 | LLVMValueRef |
| 192 | lp_build_swizzle2_aos(struct lp_build_context *bld, |
| 193 | LLVMValueRef a, |
| 194 | LLVMValueRef b, |
José Fonseca | 5e13e98 | 2009-09-13 16:12:48 +0100 | [diff] [blame] | 195 | const unsigned char swizzle[4]) |
José Fonseca | 1dd7bb1 | 2009-08-04 12:09:52 +0100 | [diff] [blame] | 196 | { |
| 197 | const unsigned n = bld->type.length; |
| 198 | unsigned i, j; |
| 199 | |
| 200 | if(swizzle[0] < 4 && swizzle[1] < 4 && swizzle[2] < 4 && swizzle[3] < 4) |
| 201 | return lp_build_swizzle1_aos(bld, a, swizzle); |
| 202 | |
| 203 | if(a == b) { |
José Fonseca | 5e13e98 | 2009-09-13 16:12:48 +0100 | [diff] [blame] | 204 | unsigned char swizzle1[4]; |
| 205 | swizzle1[0] = swizzle[0] % 4; |
| 206 | swizzle1[1] = swizzle[1] % 4; |
| 207 | swizzle1[2] = swizzle[2] % 4; |
| 208 | swizzle1[3] = swizzle[3] % 4; |
| 209 | return lp_build_swizzle1_aos(bld, a, swizzle1); |
José Fonseca | 1dd7bb1 | 2009-08-04 12:09:52 +0100 | [diff] [blame] | 210 | } |
| 211 | |
| 212 | if(swizzle[0] % 4 == 0 && |
| 213 | swizzle[1] % 4 == 1 && |
| 214 | swizzle[2] % 4 == 2 && |
| 215 | swizzle[3] % 4 == 3) { |
| 216 | boolean cond[4]; |
| 217 | cond[0] = swizzle[0] / 4; |
| 218 | cond[1] = swizzle[1] / 4; |
| 219 | cond[2] = swizzle[2] / 4; |
| 220 | cond[3] = swizzle[3] / 4; |
| 221 | return lp_build_select_aos(bld, a, b, cond); |
| 222 | } |
| 223 | |
| 224 | { |
| 225 | /* |
| 226 | * Shuffle. |
| 227 | */ |
| 228 | LLVMTypeRef elem_type = LLVMInt32Type(); |
| 229 | LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; |
| 230 | |
| 231 | for(j = 0; j < n; j += 4) |
| 232 | for(i = 0; i < 4; ++i) |
| 233 | shuffles[j + i] = LLVMConstInt(elem_type, j + (swizzle[i] % 4) + (swizzle[i] / 4 * n), 0); |
| 234 | |
| 235 | return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); |
| 236 | } |
| 237 | } |
| 238 | |
| 239 | |