blob: 92e3dc4c25fa5623e3491fdfd4db871466a77d04 [file] [log] [blame]
Dave Airlie13a28ff2017-02-03 10:05:00 +10001/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18 * USE OR OTHER DEALINGS IN THE SOFTWARE.
19 *
20 * The above copyright notice and this permission notice (including the
21 * next paragraph) shall be included in all copies or substantial portions
22 * of the Software.
23 *
24 */
25/* based on pieces from si_pipe.c and radeon_llvm_emit.c */
26#include "ac_llvm_build.h"
27
28#include <llvm-c/Core.h>
29
30#include "c11/threads.h"
31
32#include <assert.h>
33#include <stdio.h>
34
35#include "ac_llvm_util.h"
Dave Airliee2659172017-04-25 23:33:29 +010036#include "ac_exp_param.h"
Dave Airlie13a28ff2017-02-03 10:05:00 +100037#include "util/bitscan.h"
38#include "util/macros.h"
Connor Abbottac27fa72017-06-05 14:16:43 -070039#include "util/u_atomic.h"
Nicolai Hähnle625dcbb2018-03-23 11:20:24 +010040#include "util/u_math.h"
Dave Airlie13a28ff2017-02-03 10:05:00 +100041#include "sid.h"
42
Dave Airliee2659172017-04-25 23:33:29 +010043#include "shader_enums.h"
44
Timothy Arceri42627da2018-03-07 10:53:34 +110045#define AC_LLVM_INITIAL_CF_DEPTH 4
46
47/* Data for if/else/endif and bgnloop/endloop control flow structures.
48 */
49struct ac_llvm_flow {
50 /* Loop exit or next part of if/else/endif. */
51 LLVMBasicBlockRef next_block;
52 LLVMBasicBlockRef loop_entry_block;
53};
54
Dave Airlie13a28ff2017-02-03 10:05:00 +100055/* Initialize module-independent parts of the context.
56 *
57 * The caller is responsible for initializing ctx::module and ctx::builder.
58 */
59void
Marek Olšák46959842018-07-04 01:37:30 -040060ac_llvm_context_init(struct ac_llvm_context *ctx,
Samuel Pitoiset03ef2642017-12-21 17:53:14 +010061 enum chip_class chip_class, enum radeon_family family)
Dave Airlie13a28ff2017-02-03 10:05:00 +100062{
63 LLVMValueRef args[1];
64
Marek Olšák46959842018-07-04 01:37:30 -040065 ctx->context = LLVMContextCreate();
66
Nicolai Hähnle3db86d82017-09-13 14:36:23 +020067 ctx->chip_class = chip_class;
Samuel Pitoiset03ef2642017-12-21 17:53:14 +010068 ctx->family = family;
Dave Airlie13a28ff2017-02-03 10:05:00 +100069 ctx->module = NULL;
70 ctx->builder = NULL;
71
72 ctx->voidt = LLVMVoidTypeInContext(ctx->context);
73 ctx->i1 = LLVMInt1TypeInContext(ctx->context);
74 ctx->i8 = LLVMInt8TypeInContext(ctx->context);
Nicolai Hähnle7bf8c942017-03-30 14:10:26 +020075 ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
Dave Airlie13a28ff2017-02-03 10:05:00 +100076 ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
Nicolai Hähnle7bf8c942017-03-30 14:10:26 +020077 ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
Samuel Pitoiset3fbdcd92018-11-02 09:50:32 +010078 ctx->intptr = ctx->i32;
Nicolai Hähnle7bf8c942017-03-30 14:10:26 +020079 ctx->f16 = LLVMHalfTypeInContext(ctx->context);
Dave Airlie13a28ff2017-02-03 10:05:00 +100080 ctx->f32 = LLVMFloatTypeInContext(ctx->context);
Nicolai Hähnle7bf8c942017-03-30 14:10:26 +020081 ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
Marek Olšák847d0a32018-01-02 04:34:53 +010082 ctx->v2i16 = LLVMVectorType(ctx->i16, 2);
Timothy Arceri309a5142017-11-02 12:59:00 +110083 ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
Timothy Arceriee376ac2017-11-02 13:02:54 +110084 ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
Dave Airlie13a28ff2017-02-03 10:05:00 +100085 ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
Timothy Arceri7f496672017-11-02 13:24:27 +110086 ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
Dave Airlie13a28ff2017-02-03 10:05:00 +100087 ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
Nicolai Hähnleedfd3be2017-06-08 20:04:28 +020088 ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
Dave Airlie13a28ff2017-02-03 10:05:00 +100089
Samuel Pitoisetcfd63142018-09-14 12:52:34 +020090 ctx->i16_0 = LLVMConstInt(ctx->i16, 0, false);
91 ctx->i16_1 = LLVMConstInt(ctx->i16, 1, false);
Nicolai Hähnle331a5742017-05-18 22:02:48 +020092 ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false);
93 ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
Timothy Arceri38876c82018-01-14 10:06:36 +110094 ctx->i64_0 = LLVMConstInt(ctx->i64, 0, false);
95 ctx->i64_1 = LLVMConstInt(ctx->i64, 1, false);
Nicolai Hähnle331a5742017-05-18 22:02:48 +020096 ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
97 ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0);
Timothy Arcerid7b6b8b2018-01-11 22:09:35 +110098 ctx->f64_0 = LLVMConstReal(ctx->f64, 0.0);
Timothy Arceric0eb3042018-01-11 17:03:36 +110099 ctx->f64_1 = LLVMConstReal(ctx->f64, 1.0);
Nicolai Hähnle331a5742017-05-18 22:02:48 +0200100
Dave Airliea76b6c22017-10-26 15:20:15 +1000101 ctx->i1false = LLVMConstInt(ctx->i1, 0, false);
102 ctx->i1true = LLVMConstInt(ctx->i1, 1, false);
103
Dave Airlie13a28ff2017-02-03 10:05:00 +1000104 ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
105 "range", 5);
106
107 ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
108 "invariant.load", 14);
109
110 ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6);
111
112 args[0] = LLVMConstReal(ctx->f32, 2.5);
113 ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
114
115 ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context,
116 "amdgpu.uniform", 14);
117
118 ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
119}
120
Timothy Arceri42627da2018-03-07 10:53:34 +1100121void
122ac_llvm_context_dispose(struct ac_llvm_context *ctx)
123{
124 free(ctx->flow);
125 ctx->flow = NULL;
126 ctx->flow_depth_max = 0;
127}
128
Timothy Arcerib99ebaa2017-12-11 12:54:47 +1100129int
130ac_get_llvm_num_components(LLVMValueRef value)
131{
132 LLVMTypeRef type = LLVMTypeOf(value);
133 unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
134 ? LLVMGetVectorSize(type)
135 : 1;
136 return num_components;
137}
138
139LLVMValueRef
140ac_llvm_extract_elem(struct ac_llvm_context *ac,
141 LLVMValueRef value,
142 int index)
143{
Timothy Arceri4a0c24f22017-12-13 18:46:56 +1100144 if (LLVMGetTypeKind(LLVMTypeOf(value)) != LLVMVectorTypeKind) {
145 assert(index == 0);
Timothy Arcerib99ebaa2017-12-11 12:54:47 +1100146 return value;
Timothy Arceri4a0c24f22017-12-13 18:46:56 +1100147 }
Timothy Arcerib99ebaa2017-12-11 12:54:47 +1100148
149 return LLVMBuildExtractElement(ac->builder, value,
150 LLVMConstInt(ac->i32, index, false), "");
151}
152
Timothy Arceria9f6b392018-02-06 14:38:19 +1100153int
154ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type)
155{
156 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
157 type = LLVMGetElementType(type);
158
159 if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind)
160 return LLVMGetIntTypeWidth(type);
161
162 if (type == ctx->f16)
163 return 16;
164 if (type == ctx->f32)
165 return 32;
166 if (type == ctx->f64)
167 return 64;
168
169 unreachable("Unhandled type kind in get_elem_bits");
170}
171
Connor Abbottc181d4f2017-06-05 14:37:01 -0700172unsigned
173ac_get_type_size(LLVMTypeRef type)
174{
175 LLVMTypeKind kind = LLVMGetTypeKind(type);
176
177 switch (kind) {
178 case LLVMIntegerTypeKind:
179 return LLVMGetIntTypeWidth(type) / 8;
Daniel Schürmannf5823672018-02-03 14:37:26 +0100180 case LLVMHalfTypeKind:
181 return 2;
Connor Abbottc181d4f2017-06-05 14:37:01 -0700182 case LLVMFloatTypeKind:
183 return 4;
Connor Abbottfafa2992017-07-18 20:44:47 -0700184 case LLVMDoubleTypeKind:
Marek Olšák931ec802018-01-01 21:04:22 +0100185 return 8;
Connor Abbottc181d4f2017-06-05 14:37:01 -0700186 case LLVMPointerTypeKind:
Marek Olšáka668c8d2018-09-07 18:44:54 -0400187 if (LLVMGetPointerAddressSpace(type) == AC_ADDR_SPACE_CONST_32BIT)
Marek Olšák931ec802018-01-01 21:04:22 +0100188 return 4;
Connor Abbottc181d4f2017-06-05 14:37:01 -0700189 return 8;
190 case LLVMVectorTypeKind:
191 return LLVMGetVectorSize(type) *
192 ac_get_type_size(LLVMGetElementType(type));
193 case LLVMArrayTypeKind:
194 return LLVMGetArrayLength(type) *
195 ac_get_type_size(LLVMGetElementType(type));
196 default:
197 assert(0);
198 return 0;
199 }
200}
201
Connor Abbott50967cd2017-07-18 17:32:10 -0700202static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
203{
204 if (t == ctx->f16 || t == ctx->i16)
205 return ctx->i16;
206 else if (t == ctx->f32 || t == ctx->i32)
207 return ctx->i32;
208 else if (t == ctx->f64 || t == ctx->i64)
209 return ctx->i64;
210 else
211 unreachable("Unhandled integer size");
212}
213
214LLVMTypeRef
215ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
216{
217 if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
218 LLVMTypeRef elem_type = LLVMGetElementType(t);
219 return LLVMVectorType(to_integer_type_scalar(ctx, elem_type),
220 LLVMGetVectorSize(t));
221 }
Bas Nieuwenhuizen58c8dad2019-01-23 01:54:39 +0100222 if (LLVMGetTypeKind(t) == LLVMPointerTypeKind) {
223 switch (LLVMGetPointerAddressSpace(t)) {
224 case AC_ADDR_SPACE_GLOBAL:
225 return ctx->i64;
226 case AC_ADDR_SPACE_LDS:
227 return ctx->i32;
228 default:
229 unreachable("unhandled address space");
230 }
231 }
Connor Abbott50967cd2017-07-18 17:32:10 -0700232 return to_integer_type_scalar(ctx, t);
233}
234
235LLVMValueRef
236ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v)
237{
238 LLVMTypeRef type = LLVMTypeOf(v);
Bas Nieuwenhuizen58c8dad2019-01-23 01:54:39 +0100239 if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) {
240 return LLVMBuildPtrToInt(ctx->builder, v, ac_to_integer_type(ctx, type), "");
241 }
Connor Abbott50967cd2017-07-18 17:32:10 -0700242 return LLVMBuildBitCast(ctx->builder, v, ac_to_integer_type(ctx, type), "");
243}
244
Dave Airlieec9fe8a2018-11-19 13:00:36 +1000245LLVMValueRef
246ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v)
247{
248 LLVMTypeRef type = LLVMTypeOf(v);
249 if (LLVMGetTypeKind(type) == LLVMPointerTypeKind)
250 return v;
251 return ac_to_integer(ctx, v);
252}
253
Connor Abbott50967cd2017-07-18 17:32:10 -0700254static LLVMTypeRef to_float_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
255{
256 if (t == ctx->i16 || t == ctx->f16)
257 return ctx->f16;
258 else if (t == ctx->i32 || t == ctx->f32)
259 return ctx->f32;
260 else if (t == ctx->i64 || t == ctx->f64)
261 return ctx->f64;
262 else
263 unreachable("Unhandled float size");
264}
265
266LLVMTypeRef
267ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
268{
269 if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
270 LLVMTypeRef elem_type = LLVMGetElementType(t);
271 return LLVMVectorType(to_float_type_scalar(ctx, elem_type),
272 LLVMGetVectorSize(t));
273 }
274 return to_float_type_scalar(ctx, t);
275}
276
277LLVMValueRef
278ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v)
279{
280 LLVMTypeRef type = LLVMTypeOf(v);
281 return LLVMBuildBitCast(ctx->builder, v, ac_to_float_type(ctx, type), "");
282}
283
284
Dave Airlie13a28ff2017-02-03 10:05:00 +1000285LLVMValueRef
Marek Olšák7f1446a2017-02-26 00:41:37 +0100286ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
287 LLVMTypeRef return_type, LLVMValueRef *params,
288 unsigned param_count, unsigned attrib_mask)
Dave Airlie13a28ff2017-02-03 10:05:00 +1000289{
Marek Olšák940da362017-02-22 02:29:12 +0100290 LLVMValueRef function, call;
Marek Olšák3bf1e032018-02-02 19:26:49 +0100291 bool set_callsite_attrs = !(attrib_mask & AC_FUNC_ATTR_LEGACY);
Dave Airlie13a28ff2017-02-03 10:05:00 +1000292
293 function = LLVMGetNamedFunction(ctx->module, name);
294 if (!function) {
295 LLVMTypeRef param_types[32], function_type;
296 unsigned i;
297
298 assert(param_count <= 32);
299
300 for (i = 0; i < param_count; ++i) {
301 assert(params[i]);
302 param_types[i] = LLVMTypeOf(params[i]);
303 }
304 function_type =
305 LLVMFunctionType(return_type, param_types, param_count, 0);
306 function = LLVMAddFunction(ctx->module, name, function_type);
307
308 LLVMSetFunctionCallConv(function, LLVMCCallConv);
309 LLVMSetLinkage(function, LLVMExternalLinkage);
310
Marek Olšák940da362017-02-22 02:29:12 +0100311 if (!set_callsite_attrs)
312 ac_add_func_attributes(ctx->context, function, attrib_mask);
Dave Airlie13a28ff2017-02-03 10:05:00 +1000313 }
Marek Olšák940da362017-02-22 02:29:12 +0100314
315 call = LLVMBuildCall(ctx->builder, function, params, param_count, "");
316 if (set_callsite_attrs)
317 ac_add_func_attributes(ctx->context, call, attrib_mask);
318 return call;
Dave Airlie13a28ff2017-02-03 10:05:00 +1000319}
320
Marek Olšák9af03312017-02-23 22:58:49 +0100321/**
322 * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
323 * intrinsic names).
324 */
325void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize)
326{
327 LLVMTypeRef elem_type = type;
328
329 assert(bufsize >= 8);
330
331 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
332 int ret = snprintf(buf, bufsize, "v%u",
333 LLVMGetVectorSize(type));
334 if (ret < 0) {
335 char *type_name = LLVMPrintTypeToString(type);
336 fprintf(stderr, "Error building type name for: %s\n",
337 type_name);
338 return;
339 }
340 elem_type = LLVMGetElementType(type);
341 buf += ret;
342 bufsize -= ret;
343 }
344 switch (LLVMGetTypeKind(elem_type)) {
345 default: break;
346 case LLVMIntegerTypeKind:
347 snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type));
348 break;
Daniel Schürmannf5823672018-02-03 14:37:26 +0100349 case LLVMHalfTypeKind:
350 snprintf(buf, bufsize, "f16");
351 break;
Marek Olšák9af03312017-02-23 22:58:49 +0100352 case LLVMFloatTypeKind:
353 snprintf(buf, bufsize, "f32");
354 break;
355 case LLVMDoubleTypeKind:
356 snprintf(buf, bufsize, "f64");
357 break;
358 }
359}
360
Nicolai Hähnle052b9742017-09-29 11:17:03 +0200361/**
362 * Helper function that builds an LLVM IR PHI node and immediately adds
363 * incoming edges.
364 */
365LLVMValueRef
366ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
367 unsigned count_incoming, LLVMValueRef *values,
368 LLVMBasicBlockRef *blocks)
369{
370 LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, "");
371 LLVMAddIncoming(phi, values, blocks, count_incoming);
372 return phi;
373}
374
Marek Olšák2276f8f2018-08-13 23:59:28 -0400375void ac_build_s_barrier(struct ac_llvm_context *ctx)
376{
377 ac_build_intrinsic(ctx, "llvm.amdgcn.s.barrier", ctx->voidt, NULL,
378 0, AC_FUNC_ATTR_CONVERGENT);
379}
380
Connor Abbottac27fa72017-06-05 14:16:43 -0700381/* Prevent optimizations (at least of memory accesses) across the current
382 * point in the program by emitting empty inline assembly that is marked as
383 * having side effects.
384 *
385 * Optionally, a value can be passed through the inline assembly to prevent
386 * LLVM from hoisting calls to ReadNone functions.
387 */
388void
389ac_build_optimization_barrier(struct ac_llvm_context *ctx,
390 LLVMValueRef *pvgpr)
391{
392 static int counter = 0;
393
394 LLVMBuilderRef builder = ctx->builder;
395 char code[16];
396
397 snprintf(code, sizeof(code), "; %d", p_atomic_inc_return(&counter));
398
399 if (!pvgpr) {
400 LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
401 LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", true, false);
402 LLVMBuildCall(builder, inlineasm, NULL, 0, "");
403 } else {
404 LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false);
405 LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "=v,0", true, false);
406 LLVMValueRef vgpr = *pvgpr;
407 LLVMTypeRef vgpr_type = LLVMTypeOf(vgpr);
408 unsigned vgpr_size = ac_get_type_size(vgpr_type);
409 LLVMValueRef vgpr0;
410
411 assert(vgpr_size % 4 == 0);
412
413 vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, vgpr_size / 4), "");
414 vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, "");
415 vgpr0 = LLVMBuildCall(builder, inlineasm, &vgpr0, 1, "");
416 vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, "");
417 vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, "");
418
419 *pvgpr = vgpr;
420 }
421}
422
Dave Airlie13a28ff2017-02-03 10:05:00 +1000423LLVMValueRef
Timothy Arcerib7b89bb2018-02-02 13:54:48 +1100424ac_build_shader_clock(struct ac_llvm_context *ctx)
425{
426 LLVMValueRef tmp = ac_build_intrinsic(ctx, "llvm.readcyclecounter",
427 ctx->i64, NULL, 0, 0);
428 return LLVMBuildBitCast(ctx->builder, tmp, ctx->v2i32, "");
429}
430
431LLVMValueRef
Connor Abbottbd73b892017-06-05 15:20:04 -0700432ac_build_ballot(struct ac_llvm_context *ctx,
433 LLVMValueRef value)
434{
435 LLVMValueRef args[3] = {
436 value,
437 ctx->i32_0,
438 LLVMConstInt(ctx->i32, LLVMIntNE, 0)
439 };
440
441 /* We currently have no other way to prevent LLVM from lifting the icmp
442 * calls to a dominating basic block.
443 */
444 ac_build_optimization_barrier(ctx, &args[0]);
445
Daniel Schürmannd19f20e2018-02-28 20:26:03 +0100446 args[0] = ac_to_integer(ctx, args[0]);
Connor Abbottbd73b892017-06-05 15:20:04 -0700447
448 return ac_build_intrinsic(ctx,
449 "llvm.amdgcn.icmp.i32",
450 ctx->i64, args, 3,
451 AC_FUNC_ATTR_NOUNWIND |
452 AC_FUNC_ATTR_READNONE |
453 AC_FUNC_ATTR_CONVERGENT);
454}
455
456LLVMValueRef
Connor Abbottb8a51c82017-06-06 16:40:26 -0700457ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value)
458{
459 LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
460 LLVMValueRef vote_set = ac_build_ballot(ctx, value);
461 return LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, active_set, "");
462}
463
464LLVMValueRef
465ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value)
466{
467 LLVMValueRef vote_set = ac_build_ballot(ctx, value);
468 return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set,
469 LLVMConstInt(ctx->i64, 0, 0), "");
470}
471
472LLVMValueRef
473ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value)
474{
475 LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
476 LLVMValueRef vote_set = ac_build_ballot(ctx, value);
477
478 LLVMValueRef all = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
479 vote_set, active_set, "");
480 LLVMValueRef none = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
481 vote_set,
482 LLVMConstInt(ctx->i64, 0, 0), "");
483 return LLVMBuildOr(ctx->builder, all, none, "");
484}
485
486LLVMValueRef
Timothy Arcericaf15ce2017-11-10 13:55:48 +1100487ac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values,
488 unsigned value_count, unsigned component)
489{
490 LLVMValueRef vec = NULL;
491
492 if (value_count == 1) {
493 return values[component];
494 } else if (!value_count)
495 unreachable("value_count is 0");
496
497 for (unsigned i = component; i < value_count + component; i++) {
498 LLVMValueRef value = values[i];
499
Timothy Arceri5b9362c2018-01-19 16:48:43 +1100500 if (i == component)
Timothy Arcericaf15ce2017-11-10 13:55:48 +1100501 vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
502 LLVMValueRef index = LLVMConstInt(ctx->i32, i - component, false);
503 vec = LLVMBuildInsertElement(ctx->builder, vec, value, index, "");
504 }
505 return vec;
506}
507
508LLVMValueRef
Dave Airlie13a28ff2017-02-03 10:05:00 +1000509ac_build_gather_values_extended(struct ac_llvm_context *ctx,
510 LLVMValueRef *values,
511 unsigned value_count,
512 unsigned value_stride,
Nicolai Hähnleac2ab5a2017-06-25 13:04:51 +0200513 bool load,
514 bool always_vector)
Dave Airlie13a28ff2017-02-03 10:05:00 +1000515{
516 LLVMBuilderRef builder = ctx->builder;
Marek Olšákc7878b02017-02-23 01:34:27 +0100517 LLVMValueRef vec = NULL;
Dave Airlie13a28ff2017-02-03 10:05:00 +1000518 unsigned i;
519
Nicolai Hähnleac2ab5a2017-06-25 13:04:51 +0200520 if (value_count == 1 && !always_vector) {
Dave Airlie13a28ff2017-02-03 10:05:00 +1000521 if (load)
522 return LLVMBuildLoad(builder, values[0], "");
523 return values[0];
524 } else if (!value_count)
525 unreachable("value_count is 0");
526
527 for (i = 0; i < value_count; i++) {
528 LLVMValueRef value = values[i * value_stride];
529 if (load)
530 value = LLVMBuildLoad(builder, value, "");
531
532 if (!i)
533 vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
534 LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
535 vec = LLVMBuildInsertElement(builder, vec, value, index, "");
536 }
537 return vec;
538}
539
540LLVMValueRef
541ac_build_gather_values(struct ac_llvm_context *ctx,
542 LLVMValueRef *values,
543 unsigned value_count)
544{
Nicolai Hähnleac2ab5a2017-06-25 13:04:51 +0200545 return ac_build_gather_values_extended(ctx, values, value_count, 1, false, false);
Dave Airlie13a28ff2017-02-03 10:05:00 +1000546}
547
Connor Abbott59535b02018-10-18 15:30:11 +0200548/* Expand a scalar or vector to <dst_channels x type> by filling the remaining
549 * channels with undef. Extract at most src_channels components from the input.
550 */
Samuel Pitoiset2154fac2019-02-12 15:09:29 +0100551static LLVMValueRef
552ac_build_expand(struct ac_llvm_context *ctx,
553 LLVMValueRef value,
554 unsigned src_channels,
555 unsigned dst_channels)
Connor Abbott59535b02018-10-18 15:30:11 +0200556{
557 LLVMTypeRef elemtype;
558 LLVMValueRef chan[dst_channels];
559
560 if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) {
561 unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value));
562
563 if (src_channels == dst_channels && vec_size == dst_channels)
564 return value;
565
566 src_channels = MIN2(src_channels, vec_size);
567
568 for (unsigned i = 0; i < src_channels; i++)
569 chan[i] = ac_llvm_extract_elem(ctx, value, i);
570
571 elemtype = LLVMGetElementType(LLVMTypeOf(value));
572 } else {
573 if (src_channels) {
574 assert(src_channels == 1);
575 chan[0] = value;
576 }
577 elemtype = LLVMTypeOf(value);
578 }
579
580 for (unsigned i = src_channels; i < dst_channels; i++)
581 chan[i] = LLVMGetUndef(elemtype);
582
583 return ac_build_gather_values(ctx, chan, dst_channels);
584}
585
Marek Olšákbe973ed2018-01-30 18:34:25 +0100586/* Expand a scalar or vector to <4 x type> by filling the remaining channels
587 * with undef. Extract at most num_channels components from the input.
588 */
589LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
590 LLVMValueRef value,
591 unsigned num_channels)
592{
Connor Abbott59535b02018-10-18 15:30:11 +0200593 return ac_build_expand(ctx, value, num_channels, 4);
Marek Olšákbe973ed2018-01-30 18:34:25 +0100594}
595
Marek Olšák77903c82018-09-21 21:30:09 -0400596LLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value)
597{
598 unsigned type_size = ac_get_type_size(LLVMTypeOf(value));
599 const char *name;
600
601 if (type_size == 2)
602 name = "llvm.rint.f16";
603 else if (type_size == 4)
604 name = "llvm.rint.f32";
605 else
606 name = "llvm.rint.f64";
607
608 return ac_build_intrinsic(ctx, name, LLVMTypeOf(value), &value, 1,
609 AC_FUNC_ATTR_READNONE);
610}
611
Dave Airlie13a28ff2017-02-03 10:05:00 +1000612LLVMValueRef
Marek Olšák7f1446a2017-02-26 00:41:37 +0100613ac_build_fdiv(struct ac_llvm_context *ctx,
614 LLVMValueRef num,
615 LLVMValueRef den)
Dave Airlie13a28ff2017-02-03 10:05:00 +1000616{
Marek Olšáka5f35aa2018-08-24 23:00:37 -0400617 /* If we do (num / den), LLVM >= 7.0 does:
618 * return num * v_rcp_f32(den * (fabs(den) > 0x1.0p+96f ? 0x1.0p-32f : 1.0f));
619 *
620 * If we do (num * (1 / den)), LLVM does:
621 * return num * v_rcp_f32(den);
622 */
Marek Olšák8676af12018-10-29 17:22:03 -0400623 LLVMValueRef one = LLVMTypeOf(num) == ctx->f64 ? ctx->f64_1 : ctx->f32_1;
624 LLVMValueRef rcp = LLVMBuildFDiv(ctx->builder, one, den, "");
Marek Olšáka5f35aa2018-08-24 23:00:37 -0400625 LLVMValueRef ret = LLVMBuildFMul(ctx->builder, num, rcp, "");
Dave Airlie13a28ff2017-02-03 10:05:00 +1000626
Samuel Pitoisetd43e72f2017-12-12 18:10:20 +0100627 /* Use v_rcp_f32 instead of precise division. */
Dave Airlie13a28ff2017-02-03 10:05:00 +1000628 if (!LLVMIsConstant(ret))
629 LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
630 return ret;
631}
632
Marek Olšákbfc79562018-09-22 21:17:52 -0400633/* See fast_idiv_by_const.h. */
634/* Set: increment = util_fast_udiv_info::increment ? multiplier : 0; */
635LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx,
636 LLVMValueRef num,
637 LLVMValueRef multiplier,
638 LLVMValueRef pre_shift,
639 LLVMValueRef post_shift,
640 LLVMValueRef increment)
641{
642 LLVMBuilderRef builder = ctx->builder;
643
644 num = LLVMBuildLShr(builder, num, pre_shift, "");
645 num = LLVMBuildMul(builder,
646 LLVMBuildZExt(builder, num, ctx->i64, ""),
647 LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
648 num = LLVMBuildAdd(builder, num,
649 LLVMBuildZExt(builder, increment, ctx->i64, ""), "");
650 num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
651 num = LLVMBuildTrunc(builder, num, ctx->i32, "");
652 return LLVMBuildLShr(builder, num, post_shift, "");
653}
654
655/* See fast_idiv_by_const.h. */
656/* If num != UINT_MAX, this more efficient version can be used. */
657/* Set: increment = util_fast_udiv_info::increment; */
658LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx,
659 LLVMValueRef num,
660 LLVMValueRef multiplier,
661 LLVMValueRef pre_shift,
662 LLVMValueRef post_shift,
663 LLVMValueRef increment)
664{
665 LLVMBuilderRef builder = ctx->builder;
666
667 num = LLVMBuildLShr(builder, num, pre_shift, "");
668 num = LLVMBuildNUWAdd(builder, num, increment, "");
669 num = LLVMBuildMul(builder,
670 LLVMBuildZExt(builder, num, ctx->i64, ""),
671 LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
672 num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
673 num = LLVMBuildTrunc(builder, num, ctx->i32, "");
674 return LLVMBuildLShr(builder, num, post_shift, "");
675}
676
677/* See fast_idiv_by_const.h. */
678/* Both operands must fit in 31 bits and the divisor must not be 1. */
679LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx,
680 LLVMValueRef num,
681 LLVMValueRef multiplier,
682 LLVMValueRef post_shift)
683{
684 LLVMBuilderRef builder = ctx->builder;
685
686 num = LLVMBuildMul(builder,
687 LLVMBuildZExt(builder, num, ctx->i64, ""),
688 LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
689 num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
690 num = LLVMBuildTrunc(builder, num, ctx->i32, "");
691 return LLVMBuildLShr(builder, num, post_shift, "");
692}
693
Dave Airlie13a28ff2017-02-03 10:05:00 +1000694/* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27
695 * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is
696 * already multiplied by two. id is the cube face number.
697 */
698struct cube_selection_coords {
699 LLVMValueRef stc[2];
700 LLVMValueRef ma;
701 LLVMValueRef id;
702};
703
704static void
705build_cube_intrinsic(struct ac_llvm_context *ctx,
706 LLVMValueRef in[3],
707 struct cube_selection_coords *out)
708{
Marek Olšák12beef02017-04-25 02:18:10 +0200709 LLVMTypeRef f32 = ctx->f32;
Dave Airlie13a28ff2017-02-03 10:05:00 +1000710
Marek Olšák12beef02017-04-25 02:18:10 +0200711 out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc",
712 f32, in, 3, AC_FUNC_ATTR_READNONE);
713 out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc",
714 f32, in, 3, AC_FUNC_ATTR_READNONE);
715 out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema",
716 f32, in, 3, AC_FUNC_ATTR_READNONE);
717 out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid",
718 f32, in, 3, AC_FUNC_ATTR_READNONE);
Dave Airlie13a28ff2017-02-03 10:05:00 +1000719}
720
721/**
722 * Build a manual selection sequence for cube face sc/tc coordinates and
723 * major axis vector (multiplied by 2 for consistency) for the given
724 * vec3 \p coords, for the face implied by \p selcoords.
725 *
726 * For the major axis, we always adjust the sign to be in the direction of
727 * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
728 * the selcoords major axis.
729 */
Nicolai Hähnlea6ea4c12017-09-22 19:14:16 +0200730static void build_cube_select(struct ac_llvm_context *ctx,
Dave Airlie13a28ff2017-02-03 10:05:00 +1000731 const struct cube_selection_coords *selcoords,
732 const LLVMValueRef *coords,
733 LLVMValueRef *out_st,
734 LLVMValueRef *out_ma)
735{
Nicolai Hähnlea6ea4c12017-09-22 19:14:16 +0200736 LLVMBuilderRef builder = ctx->builder;
Dave Airlie13a28ff2017-02-03 10:05:00 +1000737 LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
738 LLVMValueRef is_ma_positive;
739 LLVMValueRef sgn_ma;
740 LLVMValueRef is_ma_z, is_not_ma_z;
741 LLVMValueRef is_ma_y;
742 LLVMValueRef is_ma_x;
743 LLVMValueRef sgn;
744 LLVMValueRef tmp;
745
746 is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE,
747 selcoords->ma, LLVMConstReal(f32, 0.0), "");
748 sgn_ma = LLVMBuildSelect(builder, is_ma_positive,
749 LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), "");
750
751 is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), "");
752 is_not_ma_z = LLVMBuildNot(builder, is_ma_z, "");
753 is_ma_y = LLVMBuildAnd(builder, is_not_ma_z,
754 LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), "");
755 is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");
756
757 /* Select sc */
Nicolai Hähnle5be5c1e2017-09-22 19:05:52 +0200758 tmp = LLVMBuildSelect(builder, is_ma_x, coords[2], coords[0], "");
Dave Airlie13a28ff2017-02-03 10:05:00 +1000759 sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0),
Nicolai Hähnle5be5c1e2017-09-22 19:05:52 +0200760 LLVMBuildSelect(builder, is_ma_z, sgn_ma,
Dave Airlie13a28ff2017-02-03 10:05:00 +1000761 LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
762 out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");
763
764 /* Select tc */
765 tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
Nicolai Hähnle5be5c1e2017-09-22 19:05:52 +0200766 sgn = LLVMBuildSelect(builder, is_ma_y, sgn_ma,
Dave Airlie13a28ff2017-02-03 10:05:00 +1000767 LLVMConstReal(f32, -1.0), "");
768 out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");
769
770 /* Select ma */
771 tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
772 LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
Nicolai Hähnlea6ea4c12017-09-22 19:14:16 +0200773 tmp = ac_build_intrinsic(ctx, "llvm.fabs.f32",
774 ctx->f32, &tmp, 1, AC_FUNC_ATTR_READNONE);
775 *out_ma = LLVMBuildFMul(builder, tmp, LLVMConstReal(f32, 2.0), "");
Dave Airlie13a28ff2017-02-03 10:05:00 +1000776}
777
778void
779ac_prepare_cube_coords(struct ac_llvm_context *ctx,
Nicolai Hähnlee0af3be2017-09-13 10:47:02 +0200780 bool is_deriv, bool is_array, bool is_lod,
Dave Airlie13a28ff2017-02-03 10:05:00 +1000781 LLVMValueRef *coords_arg,
782 LLVMValueRef *derivs_arg)
783{
784
785 LLVMBuilderRef builder = ctx->builder;
786 struct cube_selection_coords selcoords;
787 LLVMValueRef coords[3];
788 LLVMValueRef invma;
789
Nicolai Hähnlee0af3be2017-09-13 10:47:02 +0200790 if (is_array && !is_lod) {
Marek Olšák77903c82018-09-21 21:30:09 -0400791 LLVMValueRef tmp = ac_build_round(ctx, coords_arg[3]);
Nicolai Hähnle94736d32017-09-13 15:33:23 +0200792
793 /* Section 8.9 (Texture Functions) of the GLSL 4.50 spec says:
794 *
795 * "For Array forms, the array layer used will be
796 *
797 * max(0, min(d−1, floor(layer+0.5)))
798 *
799 * where d is the depth of the texture array and layer
800 * comes from the component indicated in the tables below.
801 * Workaroudn for an issue where the layer is taken from a
802 * helper invocation which happens to fall on a different
803 * layer due to extrapolation."
804 *
805 * VI and earlier attempt to implement this in hardware by
806 * clamping the value of coords[2] = (8 * layer) + face.
807 * Unfortunately, this means that the we end up with the wrong
808 * face when clamping occurs.
809 *
810 * Clamp the layer earlier to work around the issue.
811 */
812 if (ctx->chip_class <= VI) {
813 LLVMValueRef ge0;
814 ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, "");
815 tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, "");
816 }
817
818 coords_arg[3] = tmp;
Nicolai Hähnlee0af3be2017-09-13 10:47:02 +0200819 }
820
Dave Airlie13a28ff2017-02-03 10:05:00 +1000821 build_cube_intrinsic(ctx, coords_arg, &selcoords);
822
Marek Olšák7f1446a2017-02-26 00:41:37 +0100823 invma = ac_build_intrinsic(ctx, "llvm.fabs.f32",
Dave Airlie13a28ff2017-02-03 10:05:00 +1000824 ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE);
Marek Olšák7f1446a2017-02-26 00:41:37 +0100825 invma = ac_build_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma);
Dave Airlie13a28ff2017-02-03 10:05:00 +1000826
827 for (int i = 0; i < 2; ++i)
828 coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, "");
829
830 coords[2] = selcoords.id;
831
832 if (is_deriv && derivs_arg) {
833 LLVMValueRef derivs[4];
834 int axis;
835
836 /* Convert cube derivatives to 2D derivatives. */
837 for (axis = 0; axis < 2; axis++) {
838 LLVMValueRef deriv_st[2];
839 LLVMValueRef deriv_ma;
840
841 /* Transform the derivative alongside the texture
842 * coordinate. Mathematically, the correct formula is
843 * as follows. Assume we're projecting onto the +Z face
844 * and denote by dx/dh the derivative of the (original)
845 * X texture coordinate with respect to horizontal
846 * window coordinates. The projection onto the +Z face
847 * plane is:
848 *
849 * f(x,z) = x/z
850 *
851 * Then df/dh = df/dx * dx/dh + df/dz * dz/dh
852 * = 1/z * dx/dh - x/z * 1/z * dz/dh.
853 *
854 * This motivatives the implementation below.
855 *
856 * Whether this actually gives the expected results for
857 * apps that might feed in derivatives obtained via
858 * finite differences is anyone's guess. The OpenGL spec
859 * seems awfully quiet about how textureGrad for cube
860 * maps should be handled.
861 */
Nicolai Hähnlea6ea4c12017-09-22 19:14:16 +0200862 build_cube_select(ctx, &selcoords, &derivs_arg[axis * 3],
Dave Airlie13a28ff2017-02-03 10:05:00 +1000863 deriv_st, &deriv_ma);
864
865 deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");
866
867 for (int i = 0; i < 2; ++i)
868 derivs[axis * 2 + i] =
869 LLVMBuildFSub(builder,
870 LLVMBuildFMul(builder, deriv_st[i], invma, ""),
871 LLVMBuildFMul(builder, deriv_ma, coords[i], ""), "");
872 }
873
874 memcpy(derivs_arg, derivs, sizeof(derivs));
875 }
876
877 /* Shift the texture coordinate. This must be applied after the
878 * derivative calculation.
879 */
880 for (int i = 0; i < 2; ++i)
881 coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), "");
882
883 if (is_array) {
884 /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
885 /* coords_arg.w component - array_index for cube arrays */
Marek Olšák60beac92018-08-15 20:50:03 -0400886 coords[2] = ac_build_fmad(ctx, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), coords[2]);
Dave Airlie13a28ff2017-02-03 10:05:00 +1000887 }
888
889 memcpy(coords_arg, coords, sizeof(coords));
890}
891
892
893LLVMValueRef
894ac_build_fs_interp(struct ac_llvm_context *ctx,
895 LLVMValueRef llvm_chan,
896 LLVMValueRef attr_number,
897 LLVMValueRef params,
898 LLVMValueRef i,
899 LLVMValueRef j)
900{
901 LLVMValueRef args[5];
902 LLVMValueRef p1;
Dave Airlie13a28ff2017-02-03 10:05:00 +1000903
904 args[0] = i;
905 args[1] = llvm_chan;
906 args[2] = attr_number;
907 args[3] = params;
908
Marek Olšák7f1446a2017-02-26 00:41:37 +0100909 p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1",
910 ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
Dave Airlie13a28ff2017-02-03 10:05:00 +1000911
912 args[0] = p1;
913 args[1] = j;
914 args[2] = llvm_chan;
915 args[3] = attr_number;
916 args[4] = params;
917
Marek Olšák7f1446a2017-02-26 00:41:37 +0100918 return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2",
919 ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
Dave Airlie13a28ff2017-02-03 10:05:00 +1000920}
921
922LLVMValueRef
923ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
924 LLVMValueRef parameter,
925 LLVMValueRef llvm_chan,
926 LLVMValueRef attr_number,
927 LLVMValueRef params)
928{
929 LLVMValueRef args[4];
Dave Airlie13a28ff2017-02-03 10:05:00 +1000930
931 args[0] = parameter;
932 args[1] = llvm_chan;
933 args[2] = attr_number;
934 args[3] = params;
935
Marek Olšák7f1446a2017-02-26 00:41:37 +0100936 return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.mov",
937 ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
Dave Airlie13a28ff2017-02-03 10:05:00 +1000938}
939
940LLVMValueRef
Bas Nieuwenhuizene00d9a92019-01-23 01:53:59 +0100941ac_build_gep_ptr(struct ac_llvm_context *ctx,
942 LLVMValueRef base_ptr,
943 LLVMValueRef index)
944{
945 return LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, "");
946}
947
948LLVMValueRef
Dave Airlie13a28ff2017-02-03 10:05:00 +1000949ac_build_gep0(struct ac_llvm_context *ctx,
950 LLVMValueRef base_ptr,
951 LLVMValueRef index)
952{
953 LLVMValueRef indices[2] = {
Marek Olšákcc36ebb2018-08-29 00:15:16 -0400954 ctx->i32_0,
Dave Airlie13a28ff2017-02-03 10:05:00 +1000955 index,
956 };
Marek Olšákcc36ebb2018-08-29 00:15:16 -0400957 return LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, "");
Dave Airlie13a28ff2017-02-03 10:05:00 +1000958}
959
Marek Olšákbe0bd952018-08-29 01:34:46 -0400960LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr,
961 LLVMValueRef index)
962{
963 return LLVMBuildPointerCast(ctx->builder,
964 ac_build_gep0(ctx, ptr, index),
965 LLVMTypeOf(ptr), "");
966}
967
Dave Airlie13a28ff2017-02-03 10:05:00 +1000968void
969ac_build_indexed_store(struct ac_llvm_context *ctx,
970 LLVMValueRef base_ptr, LLVMValueRef index,
971 LLVMValueRef value)
972{
973 LLVMBuildStore(ctx->builder, value,
974 ac_build_gep0(ctx, base_ptr, index));
975}
976
977/**
978 * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
979 * It's equivalent to doing a load from &base_ptr[index].
980 *
981 * \param base_ptr Where the array starts.
982 * \param index The element index into the array.
983 * \param uniform Whether the base_ptr and index can be assumed to be
Marek Olšák854593b2017-10-08 20:05:44 +0200984 * dynamically uniform (i.e. load to an SGPR)
985 * \param invariant Whether the load is invariant (no other opcodes affect it)
Marek Olšákbe0bd952018-08-29 01:34:46 -0400986 * \param no_unsigned_wraparound
987 * For all possible re-associations and re-distributions of an expression
988 * "base_ptr + index * elemsize" into "addr + offset" (excluding GEPs
989 * without inbounds in base_ptr), this parameter is true if "addr + offset"
990 * does not result in an unsigned integer wraparound. This is used for
991 * optimal code generation of 32-bit pointer arithmetic.
992 *
993 * For example, a 32-bit immediate offset that causes a 32-bit unsigned
994 * integer wraparound can't be an imm offset in s_load_dword, because
995 * the instruction performs "addr + offset" in 64 bits.
996 *
997 * Expected usage for bindless textures by chaining GEPs:
998 * // possible unsigned wraparound, don't use InBounds:
999 * ptr1 = LLVMBuildGEP(base_ptr, index);
1000 * image = load(ptr1); // becomes "s_load ptr1, 0"
1001 *
1002 * ptr2 = LLVMBuildInBoundsGEP(ptr1, 32 / elemsize);
1003 * sampler = load(ptr2); // becomes "s_load ptr1, 32" thanks to InBounds
Dave Airlie13a28ff2017-02-03 10:05:00 +10001004 */
Marek Olšák854593b2017-10-08 20:05:44 +02001005static LLVMValueRef
1006ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
Marek Olšákbe0bd952018-08-29 01:34:46 -04001007 LLVMValueRef index, bool uniform, bool invariant,
1008 bool no_unsigned_wraparound)
Dave Airlie13a28ff2017-02-03 10:05:00 +10001009{
Marek Olšák854593b2017-10-08 20:05:44 +02001010 LLVMValueRef pointer, result;
Marek Olšákbe0bd952018-08-29 01:34:46 -04001011 LLVMValueRef indices[2] = {ctx->i32_0, index};
Dave Airlie13a28ff2017-02-03 10:05:00 +10001012
Marek Olšákbe0bd952018-08-29 01:34:46 -04001013 if (no_unsigned_wraparound &&
Marek Olšáka668c8d2018-09-07 18:44:54 -04001014 LLVMGetPointerAddressSpace(LLVMTypeOf(base_ptr)) == AC_ADDR_SPACE_CONST_32BIT)
Marek Olšákbe0bd952018-08-29 01:34:46 -04001015 pointer = LLVMBuildInBoundsGEP(ctx->builder, base_ptr, indices, 2, "");
1016 else
1017 pointer = LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, "");
1018
Dave Airlie13a28ff2017-02-03 10:05:00 +10001019 if (uniform)
1020 LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
Marek Olšák854593b2017-10-08 20:05:44 +02001021 result = LLVMBuildLoad(ctx->builder, pointer, "");
1022 if (invariant)
1023 LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
1024 return result;
Dave Airlie13a28ff2017-02-03 10:05:00 +10001025}
1026
Marek Olšák854593b2017-10-08 20:05:44 +02001027LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
1028 LLVMValueRef index)
Dave Airlie13a28ff2017-02-03 10:05:00 +10001029{
Marek Olšákbe0bd952018-08-29 01:34:46 -04001030 return ac_build_load_custom(ctx, base_ptr, index, false, false, false);
Marek Olšák854593b2017-10-08 20:05:44 +02001031}
1032
1033LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
1034 LLVMValueRef base_ptr, LLVMValueRef index)
1035{
Marek Olšákbe0bd952018-08-29 01:34:46 -04001036 return ac_build_load_custom(ctx, base_ptr, index, false, true, false);
Marek Olšák854593b2017-10-08 20:05:44 +02001037}
1038
Marek Olšákbe0bd952018-08-29 01:34:46 -04001039/* This assumes that there is no unsigned integer wraparound during the address
1040 * computation, excluding all GEPs within base_ptr. */
Marek Olšák854593b2017-10-08 20:05:44 +02001041LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
1042 LLVMValueRef base_ptr, LLVMValueRef index)
1043{
Marek Olšákbe0bd952018-08-29 01:34:46 -04001044 return ac_build_load_custom(ctx, base_ptr, index, true, true, true);
1045}
1046
1047/* See ac_build_load_custom() documentation. */
1048LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
1049 LLVMValueRef base_ptr, LLVMValueRef index)
1050{
1051 return ac_build_load_custom(ctx, base_ptr, index, true, true, false);
Dave Airlie13a28ff2017-02-03 10:05:00 +10001052}
1053
1054/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
1055 * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
1056 * or v4i32 (num_channels=3,4).
1057 */
1058void
Marek Olšák27439df2017-02-24 01:20:35 +01001059ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
1060 LLVMValueRef rsrc,
1061 LLVMValueRef vdata,
1062 unsigned num_channels,
Marek Olšák8cfdbba2017-02-24 20:23:23 +01001063 LLVMValueRef voffset,
Marek Olšák27439df2017-02-24 01:20:35 +01001064 LLVMValueRef soffset,
1065 unsigned inst_offset,
Marek Olšák27439df2017-02-24 01:20:35 +01001066 bool glc,
Marek Olšák97e21cf2017-02-24 02:09:47 +01001067 bool slc,
1068 bool writeonly_memory,
Marek Olšákbcd3e762017-09-30 15:36:18 +02001069 bool swizzle_enable_hint)
Dave Airlie13a28ff2017-02-03 10:05:00 +10001070{
Marek Olšákf9eb1ef2018-05-16 22:23:41 -04001071 /* Split 3 channel stores, becase LLVM doesn't support 3-channel
1072 * intrinsics. */
1073 if (num_channels == 3) {
1074 LLVMValueRef v[3], v01;
1075
1076 for (int i = 0; i < 3; i++) {
1077 v[i] = LLVMBuildExtractElement(ctx->builder, vdata,
1078 LLVMConstInt(ctx->i32, i, 0), "");
1079 }
1080 v01 = ac_build_gather_values(ctx, v, 2);
1081
1082 ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset,
1083 soffset, inst_offset, glc, slc,
1084 writeonly_memory, swizzle_enable_hint);
1085 ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset,
1086 soffset, inst_offset + 8,
1087 glc, slc,
1088 writeonly_memory, swizzle_enable_hint);
1089 return;
1090 }
Dave Airlieeba4cf72018-05-16 09:36:22 +10001091
Marek Olšákbcd3e762017-09-30 15:36:18 +02001092 /* SWIZZLE_ENABLE requires that soffset isn't folded into voffset
1093 * (voffset is swizzled, but soffset isn't swizzled).
1094 * llvm.amdgcn.buffer.store doesn't have a separate soffset parameter.
1095 */
Marek Olšákf9eb1ef2018-05-16 22:23:41 -04001096 if (!swizzle_enable_hint) {
1097 LLVMValueRef offset = soffset;
Marek Olšák97e21cf2017-02-24 02:09:47 +01001098
Marek Olšákf9eb1ef2018-05-16 22:23:41 -04001099 static const char *types[] = {"f32", "v2f32", "v4f32"};
Marek Olšák97e21cf2017-02-24 02:09:47 +01001100
Marek Olšákf9eb1ef2018-05-16 22:23:41 -04001101 if (inst_offset)
1102 offset = LLVMBuildAdd(ctx->builder, offset,
1103 LLVMConstInt(ctx->i32, inst_offset, 0), "");
1104 if (voffset)
1105 offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
Marek Olšák97e21cf2017-02-24 02:09:47 +01001106
Marek Olšákf9eb1ef2018-05-16 22:23:41 -04001107 LLVMValueRef args[] = {
1108 ac_to_float(ctx, vdata),
1109 LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
Marek Olšákcc36ebb2018-08-29 00:15:16 -04001110 ctx->i32_0,
Marek Olšákf9eb1ef2018-05-16 22:23:41 -04001111 offset,
1112 LLVMConstInt(ctx->i1, glc, 0),
1113 LLVMConstInt(ctx->i1, slc, 0),
1114 };
1115
Marek Olšák97e21cf2017-02-24 02:09:47 +01001116 char name[256];
Marek Olšákf9eb1ef2018-05-16 22:23:41 -04001117 snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.%s",
1118 types[CLAMP(num_channels, 1, 3) - 1]);
Marek Olšák97e21cf2017-02-24 02:09:47 +01001119
Marek Olšákf9eb1ef2018-05-16 22:23:41 -04001120 ac_build_intrinsic(ctx, name, ctx->voidt,
1121 args, ARRAY_SIZE(args),
1122 writeonly_memory ?
1123 AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY :
1124 AC_FUNC_ATTR_WRITEONLY);
1125 return;
Marek Olšák97e21cf2017-02-24 02:09:47 +01001126 }
1127
Marek Olšákf9eb1ef2018-05-16 22:23:41 -04001128 static const unsigned dfmt[] = {
1129 V_008F0C_BUF_DATA_FORMAT_32,
1130 V_008F0C_BUF_DATA_FORMAT_32_32,
1131 V_008F0C_BUF_DATA_FORMAT_32_32_32,
1132 V_008F0C_BUF_DATA_FORMAT_32_32_32_32
1133 };
1134 static const char *types[] = {"i32", "v2i32", "v4i32"};
Dave Airlie13a28ff2017-02-03 10:05:00 +10001135 LLVMValueRef args[] = {
Dave Airlie13a28ff2017-02-03 10:05:00 +10001136 vdata,
Marek Olšákf9eb1ef2018-05-16 22:23:41 -04001137 LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
Marek Olšákcc36ebb2018-08-29 00:15:16 -04001138 ctx->i32_0,
1139 voffset ? voffset : ctx->i32_0,
Dave Airlie13a28ff2017-02-03 10:05:00 +10001140 soffset,
1141 LLVMConstInt(ctx->i32, inst_offset, 0),
Marek Olšák27439df2017-02-24 01:20:35 +01001142 LLVMConstInt(ctx->i32, dfmt[num_channels - 1], 0),
1143 LLVMConstInt(ctx->i32, V_008F0C_BUF_NUM_FORMAT_UINT, 0),
Marek Olšákf9eb1ef2018-05-16 22:23:41 -04001144 LLVMConstInt(ctx->i1, glc, 0),
1145 LLVMConstInt(ctx->i1, slc, 0),
Dave Airlie13a28ff2017-02-03 10:05:00 +10001146 };
Dave Airlie13a28ff2017-02-03 10:05:00 +10001147 char name[256];
Marek Olšákf9eb1ef2018-05-16 22:23:41 -04001148 snprintf(name, sizeof(name), "llvm.amdgcn.tbuffer.store.%s",
1149 types[CLAMP(num_channels, 1, 3) - 1]);
Dave Airlie13a28ff2017-02-03 10:05:00 +10001150
Marek Olšák7f1446a2017-02-26 00:41:37 +01001151 ac_build_intrinsic(ctx, name, ctx->voidt,
1152 args, ARRAY_SIZE(args),
Marek Olšákf9eb1ef2018-05-16 22:23:41 -04001153 writeonly_memory ?
1154 AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY :
1155 AC_FUNC_ATTR_WRITEONLY);
Dave Airlie13a28ff2017-02-03 10:05:00 +10001156}
1157
Samuel Pitoisetd7c93b52018-01-10 20:12:09 +01001158static LLVMValueRef
1159ac_build_buffer_load_common(struct ac_llvm_context *ctx,
1160 LLVMValueRef rsrc,
1161 LLVMValueRef vindex,
1162 LLVMValueRef voffset,
1163 unsigned num_channels,
1164 bool glc,
1165 bool slc,
1166 bool can_speculate,
1167 bool use_format)
1168{
1169 LLVMValueRef args[] = {
1170 LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
Marek Olšákcc36ebb2018-08-29 00:15:16 -04001171 vindex ? vindex : ctx->i32_0,
Samuel Pitoisetd7c93b52018-01-10 20:12:09 +01001172 voffset,
1173 LLVMConstInt(ctx->i1, glc, 0),
1174 LLVMConstInt(ctx->i1, slc, 0)
1175 };
1176 unsigned func = CLAMP(num_channels, 1, 3) - 1;
1177
1178 LLVMTypeRef types[] = {ctx->f32, ctx->v2f32, ctx->v4f32};
1179 const char *type_names[] = {"f32", "v2f32", "v4f32"};
1180 char name[256];
1181
1182 if (use_format) {
1183 snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.format.%s",
1184 type_names[func]);
1185 } else {
1186 snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
1187 type_names[func]);
1188 }
1189
1190 return ac_build_intrinsic(ctx, name, types[func], args,
1191 ARRAY_SIZE(args),
1192 ac_get_load_intr_attribs(can_speculate));
1193}
1194
Bas Nieuwenhuizendd0172e2018-11-12 22:42:36 +01001195static LLVMValueRef
1196ac_build_llvm8_buffer_load_common(struct ac_llvm_context *ctx,
1197 LLVMValueRef rsrc,
1198 LLVMValueRef vindex,
1199 LLVMValueRef voffset,
1200 LLVMValueRef soffset,
1201 unsigned num_channels,
1202 bool glc,
1203 bool slc,
1204 bool can_speculate,
1205 bool use_format,
1206 bool structurized)
1207{
1208 LLVMValueRef args[5];
1209 int idx = 0;
1210 args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
1211 if (structurized)
1212 args[idx++] = vindex ? vindex : ctx->i32_0;
1213 args[idx++] = voffset ? voffset : ctx->i32_0;
1214 args[idx++] = soffset ? soffset : ctx->i32_0;
1215 args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
1216 unsigned func = CLAMP(num_channels, 1, 3) - 1;
1217
1218 LLVMTypeRef types[] = {ctx->f32, ctx->v2f32, ctx->v4f32};
1219 const char *type_names[] = {"f32", "v2f32", "v4f32"};
1220 const char *indexing_kind = structurized ? "struct" : "raw";
1221 char name[256];
1222
1223 if (use_format) {
1224 snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.format.%s",
1225 indexing_kind, type_names[func]);
1226 } else {
1227 snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.%s",
1228 indexing_kind, type_names[func]);
1229 }
1230
1231 return ac_build_intrinsic(ctx, name, types[func], args,
1232 idx,
1233 ac_get_load_intr_attribs(can_speculate));
1234}
1235
Dave Airlie13a28ff2017-02-03 10:05:00 +10001236LLVMValueRef
1237ac_build_buffer_load(struct ac_llvm_context *ctx,
1238 LLVMValueRef rsrc,
1239 int num_channels,
1240 LLVMValueRef vindex,
1241 LLVMValueRef voffset,
1242 LLVMValueRef soffset,
1243 unsigned inst_offset,
1244 unsigned glc,
Marek Olšáke729dc72017-02-24 17:16:28 +01001245 unsigned slc,
Marek Olšáke019ea82017-05-19 15:02:34 +02001246 bool can_speculate,
1247 bool allow_smem)
Dave Airlie13a28ff2017-02-03 10:05:00 +10001248{
Marek Olšáke019ea82017-05-19 15:02:34 +02001249 LLVMValueRef offset = LLVMConstInt(ctx->i32, inst_offset, 0);
1250 if (voffset)
1251 offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
1252 if (soffset)
1253 offset = LLVMBuildAdd(ctx->builder, offset, soffset, "");
1254
Nicolai Hähnle7fbd48f2019-01-10 23:21:46 +01001255 if (allow_smem && !slc &&
1256 (!glc || (HAVE_LLVM >= 0x0800 && ctx->chip_class >= VI))) {
Marek Olšáke019ea82017-05-19 15:02:34 +02001257 assert(vindex == NULL);
1258
Dave Airlie16dd0eb2018-01-30 13:58:05 +10001259 LLVMValueRef result[8];
Marek Olšáke019ea82017-05-19 15:02:34 +02001260
1261 for (int i = 0; i < num_channels; i++) {
1262 if (i) {
1263 offset = LLVMBuildAdd(ctx->builder, offset,
1264 LLVMConstInt(ctx->i32, 4, 0), "");
1265 }
Nicolai Hähnlee4803ab2019-01-10 22:59:58 +01001266 const char *intrname =
1267 HAVE_LLVM >= 0x0800 ? "llvm.amdgcn.s.buffer.load.f32"
Michel Dänzer1a20b562019-01-14 12:52:52 +01001268 : "llvm.SI.load.const.v4i32";
Nicolai Hähnlee4803ab2019-01-10 22:59:58 +01001269 unsigned num_args = HAVE_LLVM >= 0x0800 ? 3 : 2;
Nicolai Hähnle7fbd48f2019-01-10 23:21:46 +01001270 LLVMValueRef args[3] = {
1271 rsrc,
1272 offset,
1273 glc ? ctx->i32_1 : ctx->i32_0,
1274 };
Nicolai Hähnlee4803ab2019-01-10 22:59:58 +01001275 result[i] = ac_build_intrinsic(ctx, intrname,
1276 ctx->f32, args, num_args,
Marek Olšáke019ea82017-05-19 15:02:34 +02001277 AC_FUNC_ATTR_READNONE |
Nicolai Hähnlee4803ab2019-01-10 22:59:58 +01001278 (HAVE_LLVM < 0x0800 ? AC_FUNC_ATTR_LEGACY : 0));
Marek Olšáke019ea82017-05-19 15:02:34 +02001279 }
1280 if (num_channels == 1)
1281 return result[0];
1282
1283 if (num_channels == 3)
1284 result[num_channels++] = LLVMGetUndef(ctx->f32);
1285 return ac_build_gather_values(ctx, result, num_channels);
1286 }
1287
Samuel Pitoisetd7c93b52018-01-10 20:12:09 +01001288 return ac_build_buffer_load_common(ctx, rsrc, vindex, offset,
1289 num_channels, glc, slc,
1290 can_speculate, false);
Dave Airlie13a28ff2017-02-03 10:05:00 +10001291}
1292
Marek Olšák94811dc2017-02-25 23:40:52 +01001293LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
1294 LLVMValueRef rsrc,
1295 LLVMValueRef vindex,
1296 LLVMValueRef voffset,
Samuel Pitoiset51e14bc2018-01-10 20:12:10 +01001297 unsigned num_channels,
Marek Olšákbac9fa92018-01-30 19:24:07 +01001298 bool glc,
Marek Olšáke1942c92017-05-25 16:13:54 +02001299 bool can_speculate)
Marek Olšák94811dc2017-02-25 23:40:52 +01001300{
Bas Nieuwenhuizendd0172e2018-11-12 22:42:36 +01001301 if (HAVE_LLVM >= 0x800) {
1302 return ac_build_llvm8_buffer_load_common(ctx, rsrc, vindex, voffset, ctx->i32_0,
1303 num_channels, glc, false,
1304 can_speculate, true, true);
1305 }
Samuel Pitoiset51e14bc2018-01-10 20:12:10 +01001306 return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset,
Marek Olšákbac9fa92018-01-30 19:24:07 +01001307 num_channels, glc, false,
Samuel Pitoiset51e14bc2018-01-10 20:12:10 +01001308 can_speculate, true);
Marek Olšák94811dc2017-02-25 23:40:52 +01001309}
1310
Bas Nieuwenhuizen4503ff72018-03-28 23:54:40 +02001311LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
1312 LLVMValueRef rsrc,
1313 LLVMValueRef vindex,
1314 LLVMValueRef voffset,
1315 unsigned num_channels,
1316 bool glc,
1317 bool can_speculate)
1318{
Bas Nieuwenhuizendd0172e2018-11-12 22:42:36 +01001319 if (HAVE_LLVM >= 0x800) {
1320 return ac_build_llvm8_buffer_load_common(ctx, rsrc, vindex, voffset, ctx->i32_0,
1321 num_channels, glc, false,
1322 can_speculate, true, true);
1323 }
1324
Bas Nieuwenhuizen4503ff72018-03-28 23:54:40 +02001325 LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 2, 0), "");
Marek Olšákcc36ebb2018-08-29 00:15:16 -04001326 LLVMValueRef stride = LLVMBuildExtractElement(ctx->builder, rsrc, ctx->i32_1, "");
Bas Nieuwenhuizen4503ff72018-03-28 23:54:40 +02001327 stride = LLVMBuildLShr(ctx->builder, stride, LLVMConstInt(ctx->i32, 16, 0), "");
1328
1329 LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->builder,
1330 LLVMBuildICmp(ctx->builder, LLVMIntUGT, elem_count, stride, ""),
1331 elem_count, stride, "");
1332
1333 LLVMValueRef new_rsrc = LLVMBuildInsertElement(ctx->builder, rsrc, new_elem_count,
1334 LLVMConstInt(ctx->i32, 2, 0), "");
1335
1336 return ac_build_buffer_load_common(ctx, new_rsrc, vindex, voffset,
1337 num_channels, glc, false,
1338 can_speculate, true);
1339}
1340
Daniel Schürmanna6a21e62018-02-07 19:40:43 +01001341LLVMValueRef
1342ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
1343 LLVMValueRef rsrc,
1344 LLVMValueRef vindex,
1345 LLVMValueRef voffset,
1346 LLVMValueRef soffset,
Samuel Pitoiset416013b2018-10-03 23:06:34 +02001347 LLVMValueRef immoffset,
1348 LLVMValueRef glc)
Daniel Schürmanna6a21e62018-02-07 19:40:43 +01001349{
Samuel Pitoiset2cf54332019-02-14 14:42:30 +01001350 unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16;
1351 unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
1352 LLVMValueRef res;
1353
1354 if (HAVE_LLVM >= 0x0800) {
1355 voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
1356
1357 res = ac_build_llvm8_tbuffer_load(ctx, rsrc, vindex, voffset,
1358 soffset, 1, dfmt, nfmt, glc,
1359 false, true, true);
1360 } else {
1361 const char *name = "llvm.amdgcn.tbuffer.load.i32";
1362 LLVMTypeRef type = ctx->i32;
1363 LLVMValueRef params[] = {
1364 rsrc,
1365 vindex,
1366 voffset,
1367 soffset,
1368 immoffset,
1369 LLVMConstInt(ctx->i32, dfmt, false),
1370 LLVMConstInt(ctx->i32, nfmt, false),
1371 glc,
1372 ctx->i1false,
1373 };
1374 res = ac_build_intrinsic(ctx, name, type, params, 9, 0);
1375 }
1376
Daniel Schürmanna6a21e62018-02-07 19:40:43 +01001377 return LLVMBuildTrunc(ctx->builder, res, ctx->i16, "");
1378}
1379
Samuel Pitoisetf0223142019-02-14 14:42:29 +01001380LLVMValueRef
1381ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx,
1382 LLVMValueRef rsrc,
1383 LLVMValueRef vindex,
1384 LLVMValueRef voffset,
1385 LLVMValueRef soffset,
1386 unsigned num_channels,
1387 unsigned dfmt,
1388 unsigned nfmt,
1389 bool glc,
1390 bool slc,
1391 bool can_speculate,
1392 bool structurized)
1393{
1394 LLVMValueRef args[6];
1395 int idx = 0;
1396 args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
1397 if (structurized)
1398 args[idx++] = vindex ? vindex : ctx->i32_0;
1399 args[idx++] = voffset ? voffset : ctx->i32_0;
1400 args[idx++] = soffset ? soffset : ctx->i32_0;
1401 args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
1402 args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
1403 unsigned func = CLAMP(num_channels, 1, 3) - 1;
1404
1405 LLVMTypeRef types[] = {ctx->i32, ctx->v2i32, ctx->v4i32};
1406 const char *type_names[] = {"i32", "v2i32", "v4i32"};
1407 const char *indexing_kind = structurized ? "struct" : "raw";
1408 char name[256];
1409
1410 snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.load.%s",
1411 indexing_kind, type_names[func]);
1412
1413 return ac_build_intrinsic(ctx, name, types[func], args,
1414 idx,
1415 ac_get_load_intr_attribs(can_speculate));
1416}
1417
Dave Airlie13a28ff2017-02-03 10:05:00 +10001418/**
1419 * Set range metadata on an instruction. This can only be used on load and
1420 * call instructions. If you know an instruction can only produce the values
1421 * 0, 1, 2, you would do set_range_metadata(value, 0, 3);
1422 * \p lo is the minimum value inclusive.
1423 * \p hi is the maximum value exclusive.
1424 */
1425static void set_range_metadata(struct ac_llvm_context *ctx,
1426 LLVMValueRef value, unsigned lo, unsigned hi)
1427{
1428 LLVMValueRef range_md, md_args[2];
1429 LLVMTypeRef type = LLVMTypeOf(value);
1430 LLVMContextRef context = LLVMGetTypeContext(type);
1431
1432 md_args[0] = LLVMConstInt(type, lo, false);
1433 md_args[1] = LLVMConstInt(type, hi, false);
1434 range_md = LLVMMDNodeInContext(context, md_args, 2);
1435 LLVMSetMetadata(value, ctx->range_md_kind, range_md);
1436}
1437
1438LLVMValueRef
1439ac_get_thread_id(struct ac_llvm_context *ctx)
1440{
1441 LLVMValueRef tid;
1442
Marek Olšák7e1faa72017-03-05 00:15:31 +01001443 LLVMValueRef tid_args[2];
1444 tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false);
Marek Olšákcc36ebb2018-08-29 00:15:16 -04001445 tid_args[1] = ctx->i32_0;
Marek Olšák7e1faa72017-03-05 00:15:31 +01001446 tid_args[1] = ac_build_intrinsic(ctx,
1447 "llvm.amdgcn.mbcnt.lo", ctx->i32,
1448 tid_args, 2, AC_FUNC_ATTR_READNONE);
Dave Airlie13a28ff2017-02-03 10:05:00 +10001449
Marek Olšák7e1faa72017-03-05 00:15:31 +01001450 tid = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi",
1451 ctx->i32, tid_args,
1452 2, AC_FUNC_ATTR_READNONE);
Dave Airlie13a28ff2017-02-03 10:05:00 +10001453 set_range_metadata(ctx, tid, 0, 64);
1454 return tid;
1455}
1456
1457/*
1458 * SI implements derivatives using the local data store (LDS)
1459 * All writes to the LDS happen in all executing threads at
1460 * the same time. TID is the Thread ID for the current
1461 * thread and is a value between 0 and 63, representing
1462 * the thread's position in the wavefront.
1463 *
1464 * For the pixel shader threads are grouped into quads of four pixels.
1465 * The TIDs of the pixels of a quad are:
1466 *
1467 * +------+------+
1468 * |4n + 0|4n + 1|
1469 * +------+------+
1470 * |4n + 2|4n + 3|
1471 * +------+------+
1472 *
1473 * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
1474 * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
1475 * the current pixel's column, and masking with 0xfffffffe yields the TID
1476 * of the left pixel of the current pixel's row.
1477 *
1478 * Adding 1 yields the TID of the pixel to the right of the left pixel, and
1479 * adding 2 yields the TID of the pixel below the top pixel.
1480 */
1481LLVMValueRef
Marek Olšák7f1446a2017-02-26 00:41:37 +01001482ac_build_ddxy(struct ac_llvm_context *ctx,
Marek Olšák7f1446a2017-02-26 00:41:37 +01001483 uint32_t mask,
1484 int idx,
Marek Olšák7f1446a2017-02-26 00:41:37 +01001485 LLVMValueRef val)
Dave Airlie13a28ff2017-02-03 10:05:00 +10001486{
Samuel Pitoiset3fbdcd92018-11-02 09:50:32 +01001487 unsigned tl_lanes[4], trbl_lanes[4];
Marek Olšák492ad9a2019-01-07 14:51:13 -05001488 LLVMValueRef tl, trbl;
Dave Airlie13a28ff2017-02-03 10:05:00 +10001489 LLVMValueRef result;
1490
Samuel Pitoiset3fbdcd92018-11-02 09:50:32 +01001491 for (unsigned i = 0; i < 4; ++i) {
1492 tl_lanes[i] = i & mask;
1493 trbl_lanes[i] = (i & mask) + idx;
Dave Airlie13a28ff2017-02-03 10:05:00 +10001494 }
1495
Samuel Pitoiset3fbdcd92018-11-02 09:50:32 +01001496 tl = ac_build_quad_swizzle(ctx, val,
1497 tl_lanes[0], tl_lanes[1],
1498 tl_lanes[2], tl_lanes[3]);
1499 trbl = ac_build_quad_swizzle(ctx, val,
1500 trbl_lanes[0], trbl_lanes[1],
1501 trbl_lanes[2], trbl_lanes[3]);
1502
Dave Airlie13a28ff2017-02-03 10:05:00 +10001503 tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
1504 trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
1505 result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
Nicolai Hähnlec0acb592018-05-01 11:06:18 +02001506
Samuel Pitoiset3fbdcd92018-11-02 09:50:32 +01001507 result = ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.f32", ctx->f32,
1508 &result, 1, 0);
Nicolai Hähnlec0acb592018-05-01 11:06:18 +02001509
Dave Airlie13a28ff2017-02-03 10:05:00 +10001510 return result;
1511}
Dave Airlief32955b2017-02-13 22:08:30 +00001512
1513void
Marek Olšák7f1446a2017-02-26 00:41:37 +01001514ac_build_sendmsg(struct ac_llvm_context *ctx,
1515 uint32_t msg,
1516 LLVMValueRef wave_id)
Dave Airlief32955b2017-02-13 22:08:30 +00001517{
1518 LLVMValueRef args[2];
Dave Airlief32955b2017-02-13 22:08:30 +00001519 args[0] = LLVMConstInt(ctx->i32, msg, false);
1520 args[1] = wave_id;
Marek Olšák3bf1e032018-02-02 19:26:49 +01001521 ac_build_intrinsic(ctx, "llvm.amdgcn.s.sendmsg", ctx->voidt, args, 2, 0);
Dave Airlief32955b2017-02-13 22:08:30 +00001522}
Dave Airliecae1ff12017-02-16 03:42:56 +00001523
1524LLVMValueRef
Marek Olšák7f1446a2017-02-26 00:41:37 +01001525ac_build_imsb(struct ac_llvm_context *ctx,
1526 LLVMValueRef arg,
1527 LLVMTypeRef dst_type)
Dave Airliecae1ff12017-02-16 03:42:56 +00001528{
Marek Olšák3bf1e032018-02-02 19:26:49 +01001529 LLVMValueRef msb = ac_build_intrinsic(ctx, "llvm.amdgcn.sffbh.i32",
Marek Olšák7f1446a2017-02-26 00:41:37 +01001530 dst_type, &arg, 1,
1531 AC_FUNC_ATTR_READNONE);
Dave Airliecae1ff12017-02-16 03:42:56 +00001532
1533 /* The HW returns the last bit index from MSB, but NIR/TGSI wants
1534 * the index from LSB. Invert it by doing "31 - msb". */
1535 msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
1536 msb, "");
1537
1538 LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true);
1539 LLVMValueRef cond = LLVMBuildOr(ctx->builder,
1540 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
Marek Olšákcc36ebb2018-08-29 00:15:16 -04001541 arg, ctx->i32_0, ""),
Dave Airliecae1ff12017-02-16 03:42:56 +00001542 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
1543 arg, all_ones, ""), "");
1544
1545 return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, "");
1546}
Dave Airlie0ec66b92017-02-16 03:53:27 +00001547
1548LLVMValueRef
Marek Olšák7f1446a2017-02-26 00:41:37 +01001549ac_build_umsb(struct ac_llvm_context *ctx,
1550 LLVMValueRef arg,
1551 LLVMTypeRef dst_type)
Dave Airlie0ec66b92017-02-16 03:53:27 +00001552{
Daniel Schürmannd19f20e2018-02-28 20:26:03 +01001553 const char *intrin_name;
1554 LLVMTypeRef type;
1555 LLVMValueRef highest_bit;
1556 LLVMValueRef zero;
Samuel Pitoiset5a6c8ca2018-09-14 12:52:36 +02001557 unsigned bitsize;
Daniel Schürmannd19f20e2018-02-28 20:26:03 +01001558
Samuel Pitoiset5a6c8ca2018-09-14 12:52:36 +02001559 bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(arg));
1560 switch (bitsize) {
1561 case 64:
Daniel Schürmannd19f20e2018-02-28 20:26:03 +01001562 intrin_name = "llvm.ctlz.i64";
1563 type = ctx->i64;
1564 highest_bit = LLVMConstInt(ctx->i64, 63, false);
1565 zero = ctx->i64_0;
Samuel Pitoiset5a6c8ca2018-09-14 12:52:36 +02001566 break;
1567 case 32:
Daniel Schürmannd19f20e2018-02-28 20:26:03 +01001568 intrin_name = "llvm.ctlz.i32";
1569 type = ctx->i32;
1570 highest_bit = LLVMConstInt(ctx->i32, 31, false);
1571 zero = ctx->i32_0;
Samuel Pitoiset5a6c8ca2018-09-14 12:52:36 +02001572 break;
1573 case 16:
1574 intrin_name = "llvm.ctlz.i16";
1575 type = ctx->i16;
1576 highest_bit = LLVMConstInt(ctx->i16, 15, false);
1577 zero = ctx->i16_0;
1578 break;
1579 default:
1580 unreachable(!"invalid bitsize");
1581 break;
Daniel Schürmannd19f20e2018-02-28 20:26:03 +01001582 }
1583
1584 LLVMValueRef params[2] = {
Dave Airlie0ec66b92017-02-16 03:53:27 +00001585 arg,
Dave Airliea76b6c22017-10-26 15:20:15 +10001586 ctx->i1true,
Dave Airlie0ec66b92017-02-16 03:53:27 +00001587 };
Daniel Schürmannd19f20e2018-02-28 20:26:03 +01001588
1589 LLVMValueRef msb = ac_build_intrinsic(ctx, intrin_name, type,
1590 params, 2,
Marek Olšák7f1446a2017-02-26 00:41:37 +01001591 AC_FUNC_ATTR_READNONE);
Dave Airlie0ec66b92017-02-16 03:53:27 +00001592
1593 /* The HW returns the last bit index from MSB, but TGSI/NIR wants
1594 * the index from LSB. Invert it by doing "31 - msb". */
Daniel Schürmannd19f20e2018-02-28 20:26:03 +01001595 msb = LLVMBuildSub(ctx->builder, highest_bit, msb, "");
1596 msb = LLVMBuildTruncOrBitCast(ctx->builder, msb, ctx->i32, "");
Dave Airlie0ec66b92017-02-16 03:53:27 +00001597
1598 /* check for zero */
1599 return LLVMBuildSelect(ctx->builder,
Daniel Schürmannd19f20e2018-02-28 20:26:03 +01001600 LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, zero, ""),
Dave Airlie0ec66b92017-02-16 03:53:27 +00001601 LLVMConstInt(ctx->i32, -1, true), msb, "");
1602}
Marek Olšák660b55e2017-02-16 22:41:16 +01001603
Marek Olšáka140aeb2018-01-02 03:59:43 +01001604LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a,
1605 LLVMValueRef b)
1606{
Rhys Perrybbbfdef2018-12-06 14:23:31 +00001607 char name[64];
1608 snprintf(name, sizeof(name), "llvm.minnum.f%d", ac_get_elem_bits(ctx, LLVMTypeOf(a)));
Marek Olšáka140aeb2018-01-02 03:59:43 +01001609 LLVMValueRef args[2] = {a, b};
Rhys Perrybbbfdef2018-12-06 14:23:31 +00001610 return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2,
Marek Olšáka140aeb2018-01-02 03:59:43 +01001611 AC_FUNC_ATTR_READNONE);
1612}
1613
1614LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a,
1615 LLVMValueRef b)
1616{
Rhys Perrybbbfdef2018-12-06 14:23:31 +00001617 char name[64];
1618 snprintf(name, sizeof(name), "llvm.maxnum.f%d", ac_get_elem_bits(ctx, LLVMTypeOf(a)));
Marek Olšáka140aeb2018-01-02 03:59:43 +01001619 LLVMValueRef args[2] = {a, b};
Rhys Perrybbbfdef2018-12-06 14:23:31 +00001620 return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2,
Marek Olšáka140aeb2018-01-02 03:59:43 +01001621 AC_FUNC_ATTR_READNONE);
1622}
1623
Marek Olšák847d0a32018-01-02 04:34:53 +01001624LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a,
1625 LLVMValueRef b)
1626{
1627 LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSLE, a, b, "");
1628 return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
1629}
1630
1631LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a,
1632 LLVMValueRef b)
1633{
1634 LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, a, b, "");
1635 return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
1636}
1637
Nicolai Hähnlea69afb62017-06-25 17:56:37 +02001638LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a,
1639 LLVMValueRef b)
1640{
1641 LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntULE, a, b, "");
1642 return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
1643}
1644
Marek Olšák7f1446a2017-02-26 00:41:37 +01001645LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value)
Marek Olšák660b55e2017-02-16 22:41:16 +01001646{
Rhys Perrybbbfdef2018-12-06 14:23:31 +00001647 LLVMTypeRef t = LLVMTypeOf(value);
1648 return ac_build_fmin(ctx, ac_build_fmax(ctx, value, LLVMConstReal(t, 0.0)),
1649 LLVMConstReal(t, 1.0));
Marek Olšák660b55e2017-02-16 22:41:16 +01001650}
Marek Olšák369f4a82017-02-23 02:06:40 +01001651
Marek Olšák7f1446a2017-02-26 00:41:37 +01001652void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a)
Marek Olšák369f4a82017-02-23 02:06:40 +01001653{
1654 LLVMValueRef args[9];
1655
Marek Olšákf9eb1ef2018-05-16 22:23:41 -04001656 args[0] = LLVMConstInt(ctx->i32, a->target, 0);
1657 args[1] = LLVMConstInt(ctx->i32, a->enabled_channels, 0);
Marek Olšák2b3ebe32017-02-23 02:15:54 +01001658
Marek Olšákf9eb1ef2018-05-16 22:23:41 -04001659 if (a->compr) {
1660 LLVMTypeRef i16 = LLVMInt16TypeInContext(ctx->context);
1661 LLVMTypeRef v2i16 = LLVMVectorType(i16, 2);
Marek Olšák2b3ebe32017-02-23 02:15:54 +01001662
Marek Olšákf9eb1ef2018-05-16 22:23:41 -04001663 args[2] = LLVMBuildBitCast(ctx->builder, a->out[0],
1664 v2i16, "");
1665 args[3] = LLVMBuildBitCast(ctx->builder, a->out[1],
1666 v2i16, "");
1667 args[4] = LLVMConstInt(ctx->i1, a->done, 0);
1668 args[5] = LLVMConstInt(ctx->i1, a->valid_mask, 0);
Marek Olšák2b3ebe32017-02-23 02:15:54 +01001669
Marek Olšákf9eb1ef2018-05-16 22:23:41 -04001670 ac_build_intrinsic(ctx, "llvm.amdgcn.exp.compr.v2i16",
1671 ctx->voidt, args, 6, 0);
1672 } else {
1673 args[2] = a->out[0];
1674 args[3] = a->out[1];
1675 args[4] = a->out[2];
1676 args[5] = a->out[3];
1677 args[6] = LLVMConstInt(ctx->i1, a->done, 0);
1678 args[7] = LLVMConstInt(ctx->i1, a->valid_mask, 0);
Marek Olšák2b3ebe32017-02-23 02:15:54 +01001679
Marek Olšákf9eb1ef2018-05-16 22:23:41 -04001680 ac_build_intrinsic(ctx, "llvm.amdgcn.exp.f32",
1681 ctx->voidt, args, 8, 0);
Marek Olšák2b3ebe32017-02-23 02:15:54 +01001682 }
Marek Olšák369f4a82017-02-23 02:06:40 +01001683}
Marek Olšákad18d7f2017-02-23 23:00:19 +01001684
Samuel Pitoisetbd9f7b72018-02-07 19:09:12 +01001685void ac_build_export_null(struct ac_llvm_context *ctx)
1686{
1687 struct ac_export_args args;
1688
1689 args.enabled_channels = 0x0; /* enabled channels */
1690 args.valid_mask = 1; /* whether the EXEC mask is valid */
1691 args.done = 1; /* DONE bit */
1692 args.target = V_008DFC_SQ_EXP_NULL;
1693 args.compr = 0; /* COMPR flag (0 = 32-bit export) */
1694 args.out[0] = LLVMGetUndef(ctx->f32); /* R */
1695 args.out[1] = LLVMGetUndef(ctx->f32); /* G */
1696 args.out[2] = LLVMGetUndef(ctx->f32); /* B */
1697 args.out[3] = LLVMGetUndef(ctx->f32); /* A */
1698
1699 ac_build_export(ctx, &args);
1700}
1701
Nicolai Hähnle625dcbb2018-03-23 11:20:24 +01001702static unsigned ac_num_coords(enum ac_image_dim dim)
1703{
1704 switch (dim) {
1705 case ac_image_1d:
1706 return 1;
1707 case ac_image_2d:
1708 case ac_image_1darray:
1709 return 2;
1710 case ac_image_3d:
1711 case ac_image_cube:
1712 case ac_image_2darray:
1713 case ac_image_2dmsaa:
1714 return 3;
1715 case ac_image_2darraymsaa:
1716 return 4;
1717 default:
1718 unreachable("ac_num_coords: bad dim");
1719 }
1720}
1721
1722static unsigned ac_num_derivs(enum ac_image_dim dim)
1723{
1724 switch (dim) {
1725 case ac_image_1d:
1726 case ac_image_1darray:
1727 return 2;
1728 case ac_image_2d:
1729 case ac_image_2darray:
1730 case ac_image_cube:
1731 return 4;
1732 case ac_image_3d:
1733 return 6;
1734 case ac_image_2dmsaa:
1735 case ac_image_2darraymsaa:
1736 default:
1737 unreachable("derivatives not supported");
1738 }
1739}
1740
Nicolai Hähnlea9a79932018-02-16 18:44:25 +01001741static const char *get_atomic_name(enum ac_atomic_op op)
1742{
1743 switch (op) {
1744 case ac_atomic_swap: return "swap";
1745 case ac_atomic_add: return "add";
1746 case ac_atomic_sub: return "sub";
1747 case ac_atomic_smin: return "smin";
1748 case ac_atomic_umin: return "umin";
1749 case ac_atomic_smax: return "smax";
1750 case ac_atomic_umax: return "umax";
1751 case ac_atomic_and: return "and";
1752 case ac_atomic_or: return "or";
1753 case ac_atomic_xor: return "xor";
1754 }
1755 unreachable("bad atomic op");
1756}
1757
Nicolai Hähnlea9a79932018-02-16 18:44:25 +01001758LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
1759 struct ac_image_args *a)
1760{
1761 const char *overload[3] = { "", "", "" };
1762 unsigned num_overloads = 0;
1763 LLVMValueRef args[18];
1764 unsigned num_args = 0;
Bas Nieuwenhuizen4fc2d5e2018-06-06 01:42:17 +02001765 enum ac_image_dim dim = a->dim;
Nicolai Hähnlea9a79932018-02-16 18:44:25 +01001766
1767 assert(!a->lod || a->lod == ctx->i32_0 || a->lod == ctx->f32_0 ||
1768 !a->level_zero);
1769 assert((a->opcode != ac_image_get_resinfo && a->opcode != ac_image_load_mip &&
1770 a->opcode != ac_image_store_mip) ||
1771 a->lod);
1772 assert(a->opcode == ac_image_sample || a->opcode == ac_image_gather4 ||
1773 (!a->compare && !a->offset));
1774 assert((a->opcode == ac_image_sample || a->opcode == ac_image_gather4 ||
1775 a->opcode == ac_image_get_lod) ||
1776 !a->bias);
1777 assert((a->bias ? 1 : 0) +
1778 (a->lod ? 1 : 0) +
1779 (a->level_zero ? 1 : 0) +
1780 (a->derivs[0] ? 1 : 0) <= 1);
1781
Bas Nieuwenhuizen4fc2d5e2018-06-06 01:42:17 +02001782 if (a->opcode == ac_image_get_lod) {
1783 switch (dim) {
1784 case ac_image_1darray:
1785 dim = ac_image_1d;
1786 break;
1787 case ac_image_2darray:
1788 case ac_image_cube:
1789 dim = ac_image_2d;
1790 break;
1791 default:
1792 break;
1793 }
1794 }
1795
Nicolai Hähnlea9a79932018-02-16 18:44:25 +01001796 bool sample = a->opcode == ac_image_sample ||
1797 a->opcode == ac_image_gather4 ||
1798 a->opcode == ac_image_get_lod;
1799 bool atomic = a->opcode == ac_image_atomic ||
1800 a->opcode == ac_image_atomic_cmpswap;
1801 LLVMTypeRef coord_type = sample ? ctx->f32 : ctx->i32;
1802
1803 if (atomic || a->opcode == ac_image_store || a->opcode == ac_image_store_mip) {
1804 args[num_args++] = a->data[0];
1805 if (a->opcode == ac_image_atomic_cmpswap)
1806 args[num_args++] = a->data[1];
1807 }
1808
1809 if (!atomic)
1810 args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, false);
1811
1812 if (a->offset)
1813 args[num_args++] = ac_to_integer(ctx, a->offset);
1814 if (a->bias) {
1815 args[num_args++] = ac_to_float(ctx, a->bias);
1816 overload[num_overloads++] = ".f32";
1817 }
1818 if (a->compare)
1819 args[num_args++] = ac_to_float(ctx, a->compare);
1820 if (a->derivs[0]) {
Bas Nieuwenhuizen4fc2d5e2018-06-06 01:42:17 +02001821 unsigned count = ac_num_derivs(dim);
Nicolai Hähnlea9a79932018-02-16 18:44:25 +01001822 for (unsigned i = 0; i < count; ++i)
1823 args[num_args++] = ac_to_float(ctx, a->derivs[i]);
1824 overload[num_overloads++] = ".f32";
1825 }
1826 unsigned num_coords =
Bas Nieuwenhuizen4fc2d5e2018-06-06 01:42:17 +02001827 a->opcode != ac_image_get_resinfo ? ac_num_coords(dim) : 0;
Nicolai Hähnlea9a79932018-02-16 18:44:25 +01001828 for (unsigned i = 0; i < num_coords; ++i)
1829 args[num_args++] = LLVMBuildBitCast(ctx->builder, a->coords[i], coord_type, "");
1830 if (a->lod)
1831 args[num_args++] = LLVMBuildBitCast(ctx->builder, a->lod, coord_type, "");
1832 overload[num_overloads++] = sample ? ".f32" : ".i32";
1833
1834 args[num_args++] = a->resource;
1835 if (sample) {
1836 args[num_args++] = a->sampler;
1837 args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, false);
1838 }
1839
1840 args[num_args++] = ctx->i32_0; /* texfailctrl */
1841 args[num_args++] = LLVMConstInt(ctx->i32, a->cache_policy, false);
1842
1843 const char *name;
1844 const char *atomic_subop = "";
1845 switch (a->opcode) {
1846 case ac_image_sample: name = "sample"; break;
1847 case ac_image_gather4: name = "gather4"; break;
1848 case ac_image_load: name = "load"; break;
1849 case ac_image_load_mip: name = "load.mip"; break;
1850 case ac_image_store: name = "store"; break;
1851 case ac_image_store_mip: name = "store.mip"; break;
1852 case ac_image_atomic:
1853 name = "atomic.";
1854 atomic_subop = get_atomic_name(a->atomic);
1855 break;
1856 case ac_image_atomic_cmpswap:
1857 name = "atomic.";
1858 atomic_subop = "cmpswap";
1859 break;
1860 case ac_image_get_lod: name = "getlod"; break;
1861 case ac_image_get_resinfo: name = "getresinfo"; break;
1862 default: unreachable("invalid image opcode");
1863 }
1864
1865 const char *dimname;
Bas Nieuwenhuizen4fc2d5e2018-06-06 01:42:17 +02001866 switch (dim) {
Nicolai Hähnlea9a79932018-02-16 18:44:25 +01001867 case ac_image_1d: dimname = "1d"; break;
1868 case ac_image_2d: dimname = "2d"; break;
1869 case ac_image_3d: dimname = "3d"; break;
1870 case ac_image_cube: dimname = "cube"; break;
1871 case ac_image_1darray: dimname = "1darray"; break;
1872 case ac_image_2darray: dimname = "2darray"; break;
1873 case ac_image_2dmsaa: dimname = "2dmsaa"; break;
1874 case ac_image_2darraymsaa: dimname = "2darraymsaa"; break;
1875 default: unreachable("invalid dim");
1876 }
1877
1878 bool lod_suffix =
1879 a->lod && (a->opcode == ac_image_sample || a->opcode == ac_image_gather4);
1880 char intr_name[96];
1881 snprintf(intr_name, sizeof(intr_name),
1882 "llvm.amdgcn.image.%s%s" /* base name */
1883 "%s%s%s" /* sample/gather modifiers */
1884 ".%s.%s%s%s%s", /* dimension and type overloads */
1885 name, atomic_subop,
1886 a->compare ? ".c" : "",
1887 a->bias ? ".b" :
1888 lod_suffix ? ".l" :
1889 a->derivs[0] ? ".d" :
1890 a->level_zero ? ".lz" : "",
1891 a->offset ? ".o" : "",
1892 dimname,
1893 atomic ? "i32" : "v4f32",
1894 overload[0], overload[1], overload[2]);
1895
1896 LLVMTypeRef retty;
1897 if (atomic)
1898 retty = ctx->i32;
1899 else if (a->opcode == ac_image_store || a->opcode == ac_image_store_mip)
1900 retty = ctx->voidt;
1901 else
1902 retty = ctx->v4f32;
1903
1904 LLVMValueRef result =
1905 ac_build_intrinsic(ctx, intr_name, retty, args, num_args,
1906 a->attributes);
1907 if (!sample && retty == ctx->v4f32) {
1908 result = LLVMBuildBitCast(ctx->builder, result,
1909 ctx->v4i32, "");
1910 }
1911 return result;
1912}
1913
Marek Olšák7f1446a2017-02-26 00:41:37 +01001914LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
1915 LLVMValueRef args[2])
Marek Olšák653ac0b2017-02-24 16:38:25 +01001916{
Marek Olšákf9eb1ef2018-05-16 22:23:41 -04001917 LLVMTypeRef v2f16 =
1918 LLVMVectorType(LLVMHalfTypeInContext(ctx->context), 2);
Daniel Schürmannf5823672018-02-03 14:37:26 +01001919
1920 return ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pkrtz", v2f16,
1921 args, 2, AC_FUNC_ATTR_READNONE);
Marek Olšák653ac0b2017-02-24 16:38:25 +01001922}
Marek Olšák9c095922017-02-24 22:44:18 +01001923
Marek Olšák847d0a32018-01-02 04:34:53 +01001924LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
1925 LLVMValueRef args[2])
1926{
Marek Olšákfd1121e2018-07-01 15:50:51 -04001927 LLVMValueRef res =
1928 ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.i16",
1929 ctx->v2i16, args, 2,
1930 AC_FUNC_ATTR_READNONE);
1931 return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
Marek Olšák847d0a32018-01-02 04:34:53 +01001932}
1933
1934LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
1935 LLVMValueRef args[2])
1936{
Marek Olšákfd1121e2018-07-01 15:50:51 -04001937 LLVMValueRef res =
1938 ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.u16",
1939 ctx->v2i16, args, 2,
1940 AC_FUNC_ATTR_READNONE);
1941 return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
Marek Olšák847d0a32018-01-02 04:34:53 +01001942}
1943
1944/* The 8-bit and 10-bit clamping is for HW workarounds. */
1945LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
1946 LLVMValueRef args[2], unsigned bits, bool hi)
1947{
1948 assert(bits == 8 || bits == 10 || bits == 16);
1949
1950 LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
1951 bits == 8 ? 127 : bits == 10 ? 511 : 32767, 0);
1952 LLVMValueRef min_rgb = LLVMConstInt(ctx->i32,
1953 bits == 8 ? -128 : bits == 10 ? -512 : -32768, 0);
1954 LLVMValueRef max_alpha =
1955 bits != 10 ? max_rgb : ctx->i32_1;
1956 LLVMValueRef min_alpha =
1957 bits != 10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0);
Marek Olšák847d0a32018-01-02 04:34:53 +01001958
1959 /* Clamp. */
Marek Olšákfd1121e2018-07-01 15:50:51 -04001960 if (bits != 16) {
Marek Olšák847d0a32018-01-02 04:34:53 +01001961 for (int i = 0; i < 2; i++) {
1962 bool alpha = hi && i == 1;
1963 args[i] = ac_build_imin(ctx, args[i],
1964 alpha ? max_alpha : max_rgb);
1965 args[i] = ac_build_imax(ctx, args[i],
1966 alpha ? min_alpha : min_rgb);
1967 }
1968 }
1969
Marek Olšákfd1121e2018-07-01 15:50:51 -04001970 LLVMValueRef res =
1971 ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.i16",
1972 ctx->v2i16, args, 2,
1973 AC_FUNC_ATTR_READNONE);
1974 return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
Marek Olšák847d0a32018-01-02 04:34:53 +01001975}
1976
1977/* The 8-bit and 10-bit clamping is for HW workarounds. */
1978LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
1979 LLVMValueRef args[2], unsigned bits, bool hi)
1980{
1981 assert(bits == 8 || bits == 10 || bits == 16);
1982
1983 LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
1984 bits == 8 ? 255 : bits == 10 ? 1023 : 65535, 0);
1985 LLVMValueRef max_alpha =
1986 bits != 10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0);
Marek Olšák847d0a32018-01-02 04:34:53 +01001987
1988 /* Clamp. */
Marek Olšákfd1121e2018-07-01 15:50:51 -04001989 if (bits != 16) {
Marek Olšák847d0a32018-01-02 04:34:53 +01001990 for (int i = 0; i < 2; i++) {
1991 bool alpha = hi && i == 1;
1992 args[i] = ac_build_umin(ctx, args[i],
1993 alpha ? max_alpha : max_rgb);
1994 }
1995 }
1996
Marek Olšákfd1121e2018-07-01 15:50:51 -04001997 LLVMValueRef res =
1998 ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.u16",
1999 ctx->v2i16, args, 2,
2000 AC_FUNC_ATTR_READNONE);
2001 return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
Marek Olšák847d0a32018-01-02 04:34:53 +01002002}
2003
Marek Olšák2a414c32017-10-04 05:07:50 +02002004LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1)
2005{
Marek Olšák2a414c32017-10-04 05:07:50 +02002006 return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1,
2007 &i1, 1, AC_FUNC_ATTR_READNONE);
2008}
2009
Marek Olšák1ff9e272017-10-04 04:51:39 +02002010void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1)
Marek Olšák9c095922017-02-24 22:44:18 +01002011{
Marek Olšákfd1121e2018-07-01 15:50:51 -04002012 ac_build_intrinsic(ctx, "llvm.amdgcn.kill", ctx->voidt,
2013 &i1, 1, 0);
Marek Olšák9c095922017-02-24 22:44:18 +01002014}
Marek Olšákd4324dd2017-02-24 23:06:31 +01002015
Marek Olšák7f1446a2017-02-26 00:41:37 +01002016LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
2017 LLVMValueRef offset, LLVMValueRef width,
2018 bool is_signed)
Marek Olšákd4324dd2017-02-24 23:06:31 +01002019{
2020 LLVMValueRef args[] = {
2021 input,
2022 offset,
2023 width,
2024 };
2025
Marek Olšák7f1446a2017-02-26 00:41:37 +01002026 return ac_build_intrinsic(ctx,
Marek Olšákf9eb1ef2018-05-16 22:23:41 -04002027 is_signed ? "llvm.amdgcn.sbfe.i32" :
2028 "llvm.amdgcn.ubfe.i32",
Marek Olšák7f1446a2017-02-26 00:41:37 +01002029 ctx->i32, args, 3,
Marek Olšákf9eb1ef2018-05-16 22:23:41 -04002030 AC_FUNC_ATTR_READNONE);
Marek Olšákd4324dd2017-02-24 23:06:31 +01002031}
Dave Airlie10ae83a2017-03-06 08:37:22 +10002032
Marek Olšák659f2e02018-08-14 01:49:49 -04002033LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
2034 LLVMValueRef s1, LLVMValueRef s2)
2035{
2036 return LLVMBuildAdd(ctx->builder,
2037 LLVMBuildMul(ctx->builder, s0, s1, ""), s2, "");
2038}
2039
2040LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
2041 LLVMValueRef s1, LLVMValueRef s2)
2042{
2043 return LLVMBuildFAdd(ctx->builder,
2044 LLVMBuildFMul(ctx->builder, s0, s1, ""), s2, "");
2045}
2046
Samuel Pitoiset225b1982017-12-12 18:10:23 +01002047void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16)
2048{
2049 LLVMValueRef args[1] = {
2050 LLVMConstInt(ctx->i32, simm16, false),
2051 };
2052 ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt",
2053 ctx->voidt, args, 1, 0);
2054}
2055
Samuel Pitoiset459e3392018-03-02 15:01:30 +01002056LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
2057 unsigned bitsize)
2058{
2059 LLVMTypeRef type;
2060 char *intr;
2061
2062 if (bitsize == 32) {
2063 intr = "llvm.floor.f32";
2064 type = ctx->f32;
2065 } else {
2066 intr = "llvm.floor.f64";
2067 type = ctx->f64;
2068 }
2069
2070 LLVMValueRef params[] = {
2071 src0,
2072 };
2073 LLVMValueRef floor = ac_build_intrinsic(ctx, intr, type, params, 1,
2074 AC_FUNC_ATTR_READNONE);
2075 return LLVMBuildFSub(ctx->builder, src0, floor, "");
2076}
2077
Samuel Pitoisete8bdde22018-03-02 15:01:31 +01002078LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0,
2079 unsigned bitsize)
2080{
2081 LLVMValueRef cmp, val, zero, one;
2082 LLVMTypeRef type;
2083
Samuel Pitoiset3e7f3e22018-09-14 12:52:35 +02002084 switch (bitsize) {
2085 case 64:
Samuel Pitoisete8bdde22018-03-02 15:01:31 +01002086 type = ctx->i64;
2087 zero = ctx->i64_0;
2088 one = ctx->i64_1;
Samuel Pitoiset3e7f3e22018-09-14 12:52:35 +02002089 break;
2090 case 32:
2091 type = ctx->i32;
2092 zero = ctx->i32_0;
2093 one = ctx->i32_1;
2094 break;
2095 case 16:
2096 type = ctx->i16;
2097 zero = ctx->i16_0;
2098 one = ctx->i16_1;
2099 break;
2100 default:
2101 unreachable(!"invalid bitsize");
2102 break;
Samuel Pitoisete8bdde22018-03-02 15:01:31 +01002103 }
2104
2105 cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, zero, "");
2106 val = LLVMBuildSelect(ctx->builder, cmp, one, src0, "");
2107 cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, zero, "");
2108 val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstInt(type, -1, true), "");
2109 return val;
2110}
2111
Samuel Pitoiset322a51b2018-03-02 15:01:32 +01002112LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0,
2113 unsigned bitsize)
2114{
2115 LLVMValueRef cmp, val, zero, one;
2116 LLVMTypeRef type;
2117
2118 if (bitsize == 32) {
2119 type = ctx->f32;
2120 zero = ctx->f32_0;
2121 one = ctx->f32_1;
2122 } else {
2123 type = ctx->f64;
2124 zero = ctx->f64_0;
2125 one = ctx->f64_1;
2126 }
2127
2128 cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, zero, "");
2129 val = LLVMBuildSelect(ctx->builder, cmp, one, src0, "");
2130 cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, zero, "");
2131 val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(type, -1.0), "");
2132 return val;
2133}
2134
Samuel Pitoiset371c35e2018-09-14 12:52:32 +02002135LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0)
2136{
2137 LLVMValueRef result;
2138 unsigned bitsize;
2139
2140 bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
2141
2142 switch (bitsize) {
2143 case 64:
2144 result = ac_build_intrinsic(ctx, "llvm.ctpop.i64", ctx->i64,
2145 (LLVMValueRef []) { src0 }, 1,
2146 AC_FUNC_ATTR_READNONE);
2147
2148 result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
2149 break;
2150 case 32:
2151 result = ac_build_intrinsic(ctx, "llvm.ctpop.i32", ctx->i32,
2152 (LLVMValueRef []) { src0 }, 1,
2153 AC_FUNC_ATTR_READNONE);
2154 break;
Samuel Pitoisetfc398f42018-09-14 12:52:38 +02002155 case 16:
2156 result = ac_build_intrinsic(ctx, "llvm.ctpop.i16", ctx->i16,
2157 (LLVMValueRef []) { src0 }, 1,
2158 AC_FUNC_ATTR_READNONE);
2159 break;
Samuel Pitoiset371c35e2018-09-14 12:52:32 +02002160 default:
2161 unreachable(!"invalid bitsize");
2162 break;
2163 }
2164
2165 return result;
2166}
2167
Samuel Pitoiset074e2912018-09-14 12:52:33 +02002168LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx,
2169 LLVMValueRef src0)
2170{
2171 LLVMValueRef result;
2172 unsigned bitsize;
2173
2174 bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
2175
2176 switch (bitsize) {
2177 case 32:
2178 result = ac_build_intrinsic(ctx, "llvm.bitreverse.i32", ctx->i32,
2179 (LLVMValueRef []) { src0 }, 1,
2180 AC_FUNC_ATTR_READNONE);
2181 break;
Samuel Pitoisetcd76ce02018-09-14 12:52:39 +02002182 case 16:
2183 result = ac_build_intrinsic(ctx, "llvm.bitreverse.i16", ctx->i16,
2184 (LLVMValueRef []) { src0 }, 1,
2185 AC_FUNC_ATTR_READNONE);
2186 break;
Samuel Pitoiset074e2912018-09-14 12:52:33 +02002187 default:
2188 unreachable(!"invalid bitsize");
2189 break;
2190 }
2191
2192 return result;
2193}
2194
Marek Olšákf9eb1ef2018-05-16 22:23:41 -04002195#define AC_EXP_TARGET 0
2196#define AC_EXP_ENABLED_CHANNELS 1
2197#define AC_EXP_OUT0 2
Dave Airliee2659172017-04-25 23:33:29 +01002198
Marek Olšákfaa37472017-04-29 23:47:08 +02002199enum ac_ir_type {
2200 AC_IR_UNDEF,
2201 AC_IR_CONST,
2202 AC_IR_VALUE,
2203};
2204
2205struct ac_vs_exp_chan
2206{
2207 LLVMValueRef value;
2208 float const_float;
2209 enum ac_ir_type type;
2210};
2211
2212struct ac_vs_exp_inst {
2213 unsigned offset;
2214 LLVMValueRef inst;
2215 struct ac_vs_exp_chan chan[4];
2216};
2217
2218struct ac_vs_exports {
2219 unsigned num;
2220 struct ac_vs_exp_inst exp[VARYING_SLOT_MAX];
2221};
2222
Dave Airliee2659172017-04-25 23:33:29 +01002223/* Return true if the PARAM export has been eliminated. */
2224static bool ac_eliminate_const_output(uint8_t *vs_output_param_offset,
2225 uint32_t num_outputs,
Marek Olšákfaa37472017-04-29 23:47:08 +02002226 struct ac_vs_exp_inst *exp)
Dave Airliee2659172017-04-25 23:33:29 +01002227{
2228 unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */
2229 bool is_zero[4] = {}, is_one[4] = {};
2230
2231 for (i = 0; i < 4; i++) {
Dave Airliee2659172017-04-25 23:33:29 +01002232 /* It's a constant expression. Undef outputs are eliminated too. */
Marek Olšákfaa37472017-04-29 23:47:08 +02002233 if (exp->chan[i].type == AC_IR_UNDEF) {
Dave Airliee2659172017-04-25 23:33:29 +01002234 is_zero[i] = true;
2235 is_one[i] = true;
Marek Olšákfaa37472017-04-29 23:47:08 +02002236 } else if (exp->chan[i].type == AC_IR_CONST) {
2237 if (exp->chan[i].const_float == 0)
Dave Airliee2659172017-04-25 23:33:29 +01002238 is_zero[i] = true;
Marek Olšákfaa37472017-04-29 23:47:08 +02002239 else if (exp->chan[i].const_float == 1)
Dave Airliee2659172017-04-25 23:33:29 +01002240 is_one[i] = true;
2241 else
2242 return false; /* other constant */
2243 } else
2244 return false;
2245 }
2246
2247 /* Only certain combinations of 0 and 1 can be eliminated. */
2248 if (is_zero[0] && is_zero[1] && is_zero[2])
2249 default_val = is_zero[3] ? 0 : 1;
2250 else if (is_one[0] && is_one[1] && is_one[2])
2251 default_val = is_zero[3] ? 2 : 3;
2252 else
2253 return false;
2254
2255 /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */
Marek Olšákfaa37472017-04-29 23:47:08 +02002256 LLVMInstructionEraseFromParent(exp->inst);
Dave Airliee2659172017-04-25 23:33:29 +01002257
2258 /* Change OFFSET to DEFAULT_VAL. */
2259 for (i = 0; i < num_outputs; i++) {
Marek Olšákfaa37472017-04-29 23:47:08 +02002260 if (vs_output_param_offset[i] == exp->offset) {
Dave Airliee2659172017-04-25 23:33:29 +01002261 vs_output_param_offset[i] =
2262 AC_EXP_PARAM_DEFAULT_VAL_0000 + default_val;
2263 break;
2264 }
2265 }
2266 return true;
2267}
2268
Samuel Pitoiset675dde12018-03-01 11:54:21 +01002269static bool ac_eliminate_duplicated_output(struct ac_llvm_context *ctx,
2270 uint8_t *vs_output_param_offset,
Marek Olšákb0871542017-04-29 23:56:03 +02002271 uint32_t num_outputs,
2272 struct ac_vs_exports *processed,
2273 struct ac_vs_exp_inst *exp)
2274{
2275 unsigned p, copy_back_channels = 0;
2276
2277 /* See if the output is already in the list of processed outputs.
2278 * The LLVMValueRef comparison relies on SSA.
2279 */
2280 for (p = 0; p < processed->num; p++) {
2281 bool different = false;
2282
2283 for (unsigned j = 0; j < 4; j++) {
2284 struct ac_vs_exp_chan *c1 = &processed->exp[p].chan[j];
2285 struct ac_vs_exp_chan *c2 = &exp->chan[j];
2286
2287 /* Treat undef as a match. */
2288 if (c2->type == AC_IR_UNDEF)
2289 continue;
2290
2291 /* If c1 is undef but c2 isn't, we can copy c2 to c1
2292 * and consider the instruction duplicated.
2293 */
2294 if (c1->type == AC_IR_UNDEF) {
2295 copy_back_channels |= 1 << j;
2296 continue;
2297 }
2298
2299 /* Test whether the channels are not equal. */
2300 if (c1->type != c2->type ||
2301 (c1->type == AC_IR_CONST &&
2302 c1->const_float != c2->const_float) ||
2303 (c1->type == AC_IR_VALUE &&
2304 c1->value != c2->value)) {
2305 different = true;
2306 break;
2307 }
2308 }
2309 if (!different)
2310 break;
2311
2312 copy_back_channels = 0;
2313 }
2314 if (p == processed->num)
2315 return false;
2316
2317 /* If a match was found, but the matching export has undef where the new
2318 * one has a normal value, copy the normal value to the undef channel.
2319 */
2320 struct ac_vs_exp_inst *match = &processed->exp[p];
2321
Samuel Pitoiset675dde12018-03-01 11:54:21 +01002322 /* Get current enabled channels mask. */
2323 LLVMValueRef arg = LLVMGetOperand(match->inst, AC_EXP_ENABLED_CHANNELS);
2324 unsigned enabled_channels = LLVMConstIntGetZExtValue(arg);
2325
Marek Olšákb0871542017-04-29 23:56:03 +02002326 while (copy_back_channels) {
2327 unsigned chan = u_bit_scan(&copy_back_channels);
2328
2329 assert(match->chan[chan].type == AC_IR_UNDEF);
2330 LLVMSetOperand(match->inst, AC_EXP_OUT0 + chan,
2331 exp->chan[chan].value);
2332 match->chan[chan] = exp->chan[chan];
Samuel Pitoiset675dde12018-03-01 11:54:21 +01002333
2334 /* Update number of enabled channels because the original mask
2335 * is not always 0xf.
2336 */
2337 enabled_channels |= (1 << chan);
2338 LLVMSetOperand(match->inst, AC_EXP_ENABLED_CHANNELS,
2339 LLVMConstInt(ctx->i32, enabled_channels, 0));
Marek Olšákb0871542017-04-29 23:56:03 +02002340 }
2341
2342 /* The PARAM export is duplicated. Kill it. */
2343 LLVMInstructionEraseFromParent(exp->inst);
2344
2345 /* Change OFFSET to the matching export. */
2346 for (unsigned i = 0; i < num_outputs; i++) {
2347 if (vs_output_param_offset[i] == exp->offset) {
2348 vs_output_param_offset[i] = match->offset;
2349 break;
2350 }
2351 }
2352 return true;
2353}
2354
Marek Olšák7647e902017-04-29 23:53:08 +02002355void ac_optimize_vs_outputs(struct ac_llvm_context *ctx,
2356 LLVMValueRef main_fn,
2357 uint8_t *vs_output_param_offset,
2358 uint32_t num_outputs,
2359 uint8_t *num_param_exports)
Dave Airliee2659172017-04-25 23:33:29 +01002360{
2361 LLVMBasicBlockRef bb;
2362 bool removed_any = false;
2363 struct ac_vs_exports exports;
2364
Dave Airliee2659172017-04-25 23:33:29 +01002365 exports.num = 0;
2366
2367 /* Process all LLVM instructions. */
2368 bb = LLVMGetFirstBasicBlock(main_fn);
2369 while (bb) {
2370 LLVMValueRef inst = LLVMGetFirstInstruction(bb);
2371
2372 while (inst) {
2373 LLVMValueRef cur = inst;
2374 inst = LLVMGetNextInstruction(inst);
Marek Olšákfaa37472017-04-29 23:47:08 +02002375 struct ac_vs_exp_inst exp;
Dave Airliee2659172017-04-25 23:33:29 +01002376
2377 if (LLVMGetInstructionOpcode(cur) != LLVMCall)
2378 continue;
2379
2380 LLVMValueRef callee = ac_llvm_get_called_value(cur);
2381
2382 if (!ac_llvm_is_function(callee))
2383 continue;
2384
2385 const char *name = LLVMGetValueName(callee);
2386 unsigned num_args = LLVMCountParams(callee);
2387
2388 /* Check if this is an export instruction. */
2389 if ((num_args != 9 && num_args != 8) ||
2390 (strcmp(name, "llvm.SI.export") &&
2391 strcmp(name, "llvm.amdgcn.exp.f32")))
2392 continue;
2393
2394 LLVMValueRef arg = LLVMGetOperand(cur, AC_EXP_TARGET);
2395 unsigned target = LLVMConstIntGetZExtValue(arg);
2396
2397 if (target < V_008DFC_SQ_EXP_PARAM)
2398 continue;
2399
2400 target -= V_008DFC_SQ_EXP_PARAM;
2401
Marek Olšákfaa37472017-04-29 23:47:08 +02002402 /* Parse the instruction. */
2403 memset(&exp, 0, sizeof(exp));
2404 exp.offset = target;
2405 exp.inst = cur;
2406
2407 for (unsigned i = 0; i < 4; i++) {
2408 LLVMValueRef v = LLVMGetOperand(cur, AC_EXP_OUT0 + i);
2409
2410 exp.chan[i].value = v;
2411
2412 if (LLVMIsUndef(v)) {
2413 exp.chan[i].type = AC_IR_UNDEF;
2414 } else if (LLVMIsAConstantFP(v)) {
2415 LLVMBool loses_info;
2416 exp.chan[i].type = AC_IR_CONST;
2417 exp.chan[i].const_float =
2418 LLVMConstRealGetDouble(v, &loses_info);
2419 } else {
2420 exp.chan[i].type = AC_IR_VALUE;
2421 }
2422 }
2423
Marek Olšákb0871542017-04-29 23:56:03 +02002424 /* Eliminate constant and duplicated PARAM exports. */
Dave Airliee2659172017-04-25 23:33:29 +01002425 if (ac_eliminate_const_output(vs_output_param_offset,
Marek Olšákb0871542017-04-29 23:56:03 +02002426 num_outputs, &exp) ||
Samuel Pitoiset675dde12018-03-01 11:54:21 +01002427 ac_eliminate_duplicated_output(ctx,
2428 vs_output_param_offset,
Marek Olšákb0871542017-04-29 23:56:03 +02002429 num_outputs, &exports,
2430 &exp)) {
Dave Airliee2659172017-04-25 23:33:29 +01002431 removed_any = true;
2432 } else {
Marek Olšákfaa37472017-04-29 23:47:08 +02002433 exports.exp[exports.num++] = exp;
Dave Airliee2659172017-04-25 23:33:29 +01002434 }
2435 }
2436 bb = LLVMGetNextBasicBlock(bb);
2437 }
2438
2439 /* Remove holes in export memory due to removed PARAM exports.
2440 * This is done by renumbering all PARAM exports.
2441 */
2442 if (removed_any) {
Marek Olšák34bc4702017-05-08 16:37:26 +02002443 uint8_t old_offset[VARYING_SLOT_MAX];
Dave Airliee2659172017-04-25 23:33:29 +01002444 unsigned out, i;
2445
2446 /* Make a copy of the offsets. We need the old version while
2447 * we are modifying some of them. */
Marek Olšák34bc4702017-05-08 16:37:26 +02002448 memcpy(old_offset, vs_output_param_offset,
2449 sizeof(old_offset));
Dave Airliee2659172017-04-25 23:33:29 +01002450
2451 for (i = 0; i < exports.num; i++) {
Marek Olšákfaa37472017-04-29 23:47:08 +02002452 unsigned offset = exports.exp[i].offset;
Dave Airliee2659172017-04-25 23:33:29 +01002453
Marek Olšák34bc4702017-05-08 16:37:26 +02002454 /* Update vs_output_param_offset. Multiple outputs can
2455 * have the same offset.
2456 */
Dave Airliee2659172017-04-25 23:33:29 +01002457 for (out = 0; out < num_outputs; out++) {
Marek Olšák34bc4702017-05-08 16:37:26 +02002458 if (old_offset[out] == offset)
2459 vs_output_param_offset[out] = i;
Dave Airliee2659172017-04-25 23:33:29 +01002460 }
Marek Olšák34bc4702017-05-08 16:37:26 +02002461
2462 /* Change the PARAM offset in the instruction. */
2463 LLVMSetOperand(exports.exp[i].inst, AC_EXP_TARGET,
2464 LLVMConstInt(ctx->i32,
2465 V_008DFC_SQ_EXP_PARAM + i, 0));
Dave Airliee2659172017-04-25 23:33:29 +01002466 }
Marek Olšák34bc4702017-05-08 16:37:26 +02002467 *num_param_exports = exports.num;
Dave Airliee2659172017-04-25 23:33:29 +01002468 }
2469}
Dave Airlie1dda2142017-10-19 05:29:02 +01002470
2471void ac_init_exec_full_mask(struct ac_llvm_context *ctx)
2472{
2473 LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0);
2474 ac_build_intrinsic(ctx,
2475 "llvm.amdgcn.init.exec", ctx->voidt,
2476 &full_mask, 1, AC_FUNC_ATTR_CONVERGENT);
2477}
Dave Airlief925f5b2017-10-26 14:43:51 +10002478
2479void ac_declare_lds_as_pointer(struct ac_llvm_context *ctx)
2480{
2481 unsigned lds_size = ctx->chip_class >= CIK ? 65536 : 32768;
2482 ctx->lds = LLVMBuildIntToPtr(ctx->builder, ctx->i32_0,
Marek Olšáka668c8d2018-09-07 18:44:54 -04002483 LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_ADDR_SPACE_LDS),
Dave Airlief925f5b2017-10-26 14:43:51 +10002484 "lds");
2485}
2486
2487LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx,
2488 LLVMValueRef dw_addr)
2489{
2490 return ac_build_load(ctx, ctx->lds, dw_addr);
2491}
2492
2493void ac_lds_store(struct ac_llvm_context *ctx,
2494 LLVMValueRef dw_addr,
2495 LLVMValueRef value)
2496{
2497 value = ac_to_integer(ctx, value);
2498 ac_build_indexed_store(ctx, ctx->lds,
2499 dw_addr, value);
2500}
Dave Airlie82d47b92017-10-26 15:28:41 +10002501
2502LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
2503 LLVMTypeRef dst_type,
2504 LLVMValueRef src0)
2505{
Timothy Arceri12a23502018-02-06 14:38:57 +11002506 unsigned src0_bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
2507 const char *intrin_name;
2508 LLVMTypeRef type;
2509 LLVMValueRef zero;
Samuel Pitoiset94dd08e2018-09-14 12:52:37 +02002510
2511 switch (src0_bitsize) {
2512 case 64:
Timothy Arceri12a23502018-02-06 14:38:57 +11002513 intrin_name = "llvm.cttz.i64";
2514 type = ctx->i64;
2515 zero = ctx->i64_0;
Samuel Pitoiset94dd08e2018-09-14 12:52:37 +02002516 break;
2517 case 32:
Timothy Arceri12a23502018-02-06 14:38:57 +11002518 intrin_name = "llvm.cttz.i32";
2519 type = ctx->i32;
2520 zero = ctx->i32_0;
Samuel Pitoiset94dd08e2018-09-14 12:52:37 +02002521 break;
2522 case 16:
2523 intrin_name = "llvm.cttz.i16";
2524 type = ctx->i16;
2525 zero = ctx->i16_0;
2526 break;
2527 default:
2528 unreachable(!"invalid bitsize");
Timothy Arceri12a23502018-02-06 14:38:57 +11002529 }
2530
Dave Airlie82d47b92017-10-26 15:28:41 +10002531 LLVMValueRef params[2] = {
2532 src0,
2533
2534 /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
2535 * add special code to check for x=0. The reason is that
2536 * the LLVM behavior for x=0 is different from what we
2537 * need here. However, LLVM also assumes that ffs(x) is
2538 * in [0, 31], but GLSL expects that ffs(0) = -1, so
2539 * a conditional assignment to handle 0 is still required.
2540 *
2541 * The hardware already implements the correct behavior.
2542 */
Marek Olšákcc36ebb2018-08-29 00:15:16 -04002543 ctx->i1true,
Dave Airlie82d47b92017-10-26 15:28:41 +10002544 };
2545
Timothy Arceri12a23502018-02-06 14:38:57 +11002546 LLVMValueRef lsb = ac_build_intrinsic(ctx, intrin_name, type,
Dave Airlie82d47b92017-10-26 15:28:41 +10002547 params, 2,
2548 AC_FUNC_ATTR_READNONE);
2549
Timothy Arceri12a23502018-02-06 14:38:57 +11002550 if (src0_bitsize == 64) {
2551 lsb = LLVMBuildTrunc(ctx->builder, lsb, ctx->i32, "");
2552 }
2553
Dave Airlie82d47b92017-10-26 15:28:41 +10002554 /* TODO: We need an intrinsic to skip this conditional. */
2555 /* Check for zero: */
2556 return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder,
2557 LLVMIntEQ, src0,
Timothy Arceri12a23502018-02-06 14:38:57 +11002558 zero, ""),
Dave Airlie82d47b92017-10-26 15:28:41 +10002559 LLVMConstInt(ctx->i32, -1, 0), lsb, "");
2560}
Marek Olšákb6339992017-12-31 23:35:59 +01002561
2562LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type)
2563{
2564 return LLVMPointerType(LLVMArrayType(elem_type, 0),
Marek Olšáka668c8d2018-09-07 18:44:54 -04002565 AC_ADDR_SPACE_CONST);
Marek Olšákb6339992017-12-31 23:35:59 +01002566}
Marek Olšák931ec802018-01-01 21:04:22 +01002567
2568LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type)
2569{
Marek Olšák931ec802018-01-01 21:04:22 +01002570 return LLVMPointerType(LLVMArrayType(elem_type, 0),
Marek Olšáka668c8d2018-09-07 18:44:54 -04002571 AC_ADDR_SPACE_CONST_32BIT);
Marek Olšák931ec802018-01-01 21:04:22 +01002572}
Timothy Arceri42627da2018-03-07 10:53:34 +11002573
2574static struct ac_llvm_flow *
2575get_current_flow(struct ac_llvm_context *ctx)
2576{
2577 if (ctx->flow_depth > 0)
2578 return &ctx->flow[ctx->flow_depth - 1];
2579 return NULL;
2580}
2581
2582static struct ac_llvm_flow *
2583get_innermost_loop(struct ac_llvm_context *ctx)
2584{
2585 for (unsigned i = ctx->flow_depth; i > 0; --i) {
2586 if (ctx->flow[i - 1].loop_entry_block)
2587 return &ctx->flow[i - 1];
2588 }
2589 return NULL;
2590}
2591
2592static struct ac_llvm_flow *
2593push_flow(struct ac_llvm_context *ctx)
2594{
2595 struct ac_llvm_flow *flow;
2596
2597 if (ctx->flow_depth >= ctx->flow_depth_max) {
2598 unsigned new_max = MAX2(ctx->flow_depth << 1,
2599 AC_LLVM_INITIAL_CF_DEPTH);
2600
2601 ctx->flow = realloc(ctx->flow, new_max * sizeof(*ctx->flow));
2602 ctx->flow_depth_max = new_max;
2603 }
2604
2605 flow = &ctx->flow[ctx->flow_depth];
2606 ctx->flow_depth++;
2607
2608 flow->next_block = NULL;
2609 flow->loop_entry_block = NULL;
2610 return flow;
2611}
2612
2613static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base,
2614 int label_id)
2615{
2616 char buf[32];
2617 snprintf(buf, sizeof(buf), "%s%d", base, label_id);
2618 LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
2619}
2620
2621/* Append a basic block at the level of the parent flow.
2622 */
2623static LLVMBasicBlockRef append_basic_block(struct ac_llvm_context *ctx,
2624 const char *name)
2625{
2626 assert(ctx->flow_depth >= 1);
2627
2628 if (ctx->flow_depth >= 2) {
2629 struct ac_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2];
2630
2631 return LLVMInsertBasicBlockInContext(ctx->context,
2632 flow->next_block, name);
2633 }
2634
2635 LLVMValueRef main_fn =
2636 LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder));
2637 return LLVMAppendBasicBlockInContext(ctx->context, main_fn, name);
2638}
2639
2640/* Emit a branch to the given default target for the current block if
2641 * applicable -- that is, if the current block does not already contain a
2642 * branch from a break or continue.
2643 */
2644static void emit_default_branch(LLVMBuilderRef builder,
2645 LLVMBasicBlockRef target)
2646{
2647 if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
2648 LLVMBuildBr(builder, target);
2649}
2650
2651void ac_build_bgnloop(struct ac_llvm_context *ctx, int label_id)
2652{
2653 struct ac_llvm_flow *flow = push_flow(ctx);
2654 flow->loop_entry_block = append_basic_block(ctx, "LOOP");
2655 flow->next_block = append_basic_block(ctx, "ENDLOOP");
2656 set_basicblock_name(flow->loop_entry_block, "loop", label_id);
2657 LLVMBuildBr(ctx->builder, flow->loop_entry_block);
2658 LLVMPositionBuilderAtEnd(ctx->builder, flow->loop_entry_block);
2659}
2660
2661void ac_build_break(struct ac_llvm_context *ctx)
2662{
2663 struct ac_llvm_flow *flow = get_innermost_loop(ctx);
2664 LLVMBuildBr(ctx->builder, flow->next_block);
2665}
2666
2667void ac_build_continue(struct ac_llvm_context *ctx)
2668{
2669 struct ac_llvm_flow *flow = get_innermost_loop(ctx);
2670 LLVMBuildBr(ctx->builder, flow->loop_entry_block);
2671}
2672
2673void ac_build_else(struct ac_llvm_context *ctx, int label_id)
2674{
2675 struct ac_llvm_flow *current_branch = get_current_flow(ctx);
2676 LLVMBasicBlockRef endif_block;
2677
2678 assert(!current_branch->loop_entry_block);
2679
2680 endif_block = append_basic_block(ctx, "ENDIF");
2681 emit_default_branch(ctx->builder, endif_block);
2682
2683 LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block);
2684 set_basicblock_name(current_branch->next_block, "else", label_id);
2685
2686 current_branch->next_block = endif_block;
2687}
2688
2689void ac_build_endif(struct ac_llvm_context *ctx, int label_id)
2690{
2691 struct ac_llvm_flow *current_branch = get_current_flow(ctx);
2692
2693 assert(!current_branch->loop_entry_block);
2694
2695 emit_default_branch(ctx->builder, current_branch->next_block);
2696 LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block);
2697 set_basicblock_name(current_branch->next_block, "endif", label_id);
2698
2699 ctx->flow_depth--;
2700}
2701
2702void ac_build_endloop(struct ac_llvm_context *ctx, int label_id)
2703{
2704 struct ac_llvm_flow *current_loop = get_current_flow(ctx);
2705
2706 assert(current_loop->loop_entry_block);
2707
2708 emit_default_branch(ctx->builder, current_loop->loop_entry_block);
2709
2710 LLVMPositionBuilderAtEnd(ctx->builder, current_loop->next_block);
2711 set_basicblock_name(current_loop->next_block, "endloop", label_id);
2712 ctx->flow_depth--;
2713}
2714
Nicolai Hähnle39634022018-05-23 22:04:20 +02002715void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id)
Timothy Arceri42627da2018-03-07 10:53:34 +11002716{
2717 struct ac_llvm_flow *flow = push_flow(ctx);
2718 LLVMBasicBlockRef if_block;
2719
2720 if_block = append_basic_block(ctx, "IF");
2721 flow->next_block = append_basic_block(ctx, "ELSE");
2722 set_basicblock_name(if_block, "if", label_id);
2723 LLVMBuildCondBr(ctx->builder, cond, if_block, flow->next_block);
2724 LLVMPositionBuilderAtEnd(ctx->builder, if_block);
2725}
2726
2727void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value,
2728 int label_id)
2729{
2730 LLVMValueRef cond = LLVMBuildFCmp(ctx->builder, LLVMRealUNE,
2731 value, ctx->f32_0, "");
Nicolai Hähnle39634022018-05-23 22:04:20 +02002732 ac_build_ifcc(ctx, cond, label_id);
Timothy Arceri42627da2018-03-07 10:53:34 +11002733}
2734
2735void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value,
2736 int label_id)
2737{
2738 LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
2739 ac_to_integer(ctx, value),
2740 ctx->i32_0, "");
Nicolai Hähnle39634022018-05-23 22:04:20 +02002741 ac_build_ifcc(ctx, cond, label_id);
Timothy Arceri42627da2018-03-07 10:53:34 +11002742}
Samuel Pitoisetbf636822018-03-09 16:22:44 +01002743
Marek Olšák82f5f892018-09-21 03:27:06 -04002744LLVMValueRef ac_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type,
Samuel Pitoisetbf636822018-03-09 16:22:44 +01002745 const char *name)
2746{
2747 LLVMBuilderRef builder = ac->builder;
2748 LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
2749 LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
2750 LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
2751 LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
2752 LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ac->context);
2753 LLVMValueRef res;
2754
2755 if (first_instr) {
2756 LLVMPositionBuilderBefore(first_builder, first_instr);
2757 } else {
2758 LLVMPositionBuilderAtEnd(first_builder, first_block);
2759 }
2760
2761 res = LLVMBuildAlloca(first_builder, type, name);
Samuel Pitoisetbf636822018-03-09 16:22:44 +01002762 LLVMDisposeBuilder(first_builder);
Samuel Pitoisetbf636822018-03-09 16:22:44 +01002763 return res;
2764}
2765
Marek Olšák82f5f892018-09-21 03:27:06 -04002766LLVMValueRef ac_build_alloca(struct ac_llvm_context *ac,
Samuel Pitoisetbf636822018-03-09 16:22:44 +01002767 LLVMTypeRef type, const char *name)
2768{
Marek Olšák82f5f892018-09-21 03:27:06 -04002769 LLVMValueRef ptr = ac_build_alloca_undef(ac, type, name);
2770 LLVMBuildStore(ac->builder, LLVMConstNull(type), ptr);
Samuel Pitoisetbf636822018-03-09 16:22:44 +01002771 return ptr;
2772}
Samuel Pitoiset895632b2018-03-09 16:26:34 +01002773
2774LLVMValueRef ac_cast_ptr(struct ac_llvm_context *ctx, LLVMValueRef ptr,
2775 LLVMTypeRef type)
2776{
2777 int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
2778 return LLVMBuildBitCast(ctx->builder, ptr,
2779 LLVMPointerType(type, addr_space), "");
2780}
Samuel Pitoiset28bb6872018-03-09 16:36:31 +01002781
2782LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value,
2783 unsigned count)
2784{
2785 unsigned num_components = ac_get_llvm_num_components(value);
2786 if (count == num_components)
2787 return value;
2788
Rhys Perry12dc7cb2018-12-05 13:42:47 +00002789 LLVMValueRef masks[MAX2(count, 2)];
2790 masks[0] = ctx->i32_0;
2791 masks[1] = ctx->i32_1;
2792 for (unsigned i = 2; i < count; i++)
2793 masks[i] = LLVMConstInt(ctx->i32, i, false);
Samuel Pitoiset28bb6872018-03-09 16:36:31 +01002794
2795 if (count == 1)
2796 return LLVMBuildExtractElement(ctx->builder, value, masks[0],
2797 "");
2798
2799 LLVMValueRef swizzle = LLVMConstVector(masks, count);
2800 return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, "");
2801}
Samuel Pitoiset61a91ca2018-03-09 16:39:35 +01002802
2803LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param,
2804 unsigned rshift, unsigned bitwidth)
2805{
2806 LLVMValueRef value = param;
2807 if (rshift)
2808 value = LLVMBuildLShr(ctx->builder, value,
2809 LLVMConstInt(ctx->i32, rshift, false), "");
2810
2811 if (rshift + bitwidth < 32) {
2812 unsigned mask = (1 << bitwidth) - 1;
2813 value = LLVMBuildAnd(ctx->builder, value,
2814 LLVMConstInt(ctx->i32, mask, false), "");
2815 }
2816 return value;
2817}
Marek Olšákdc04e4b2018-03-20 19:14:57 -04002818
2819/* Adjust the sample index according to FMASK.
2820 *
2821 * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
2822 * which is the identity mapping. Each nibble says which physical sample
2823 * should be fetched to get that sample.
2824 *
2825 * For example, 0x11111100 means there are only 2 samples stored and
2826 * the second sample covers 3/4 of the pixel. When reading samples 0
2827 * and 1, return physical sample 0 (determined by the first two 0s
2828 * in FMASK), otherwise return physical sample 1.
2829 *
2830 * The sample index should be adjusted as follows:
2831 * addr[sample_index] = (fmask >> (addr[sample_index] * 4)) & 0xF;
2832 */
2833void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask,
2834 LLVMValueRef *addr, bool is_array_tex)
2835{
2836 struct ac_image_args fmask_load = {};
2837 fmask_load.opcode = ac_image_load;
2838 fmask_load.resource = fmask;
2839 fmask_load.dmask = 0xf;
Nicolai Hähnlef9315832018-02-16 14:21:56 +01002840 fmask_load.dim = is_array_tex ? ac_image_2darray : ac_image_2d;
Marek Olšákdc04e4b2018-03-20 19:14:57 -04002841
Nicolai Hähnle625dcbb2018-03-23 11:20:24 +01002842 fmask_load.coords[0] = addr[0];
2843 fmask_load.coords[1] = addr[1];
2844 if (is_array_tex)
2845 fmask_load.coords[2] = addr[2];
Marek Olšákdc04e4b2018-03-20 19:14:57 -04002846
2847 LLVMValueRef fmask_value = ac_build_image_opcode(ac, &fmask_load);
2848 fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value,
2849 ac->i32_0, "");
2850
2851 /* Apply the formula. */
2852 unsigned sample_chan = is_array_tex ? 3 : 2;
2853 LLVMValueRef final_sample;
2854 final_sample = LLVMBuildMul(ac->builder, addr[sample_chan],
2855 LLVMConstInt(ac->i32, 4, 0), "");
2856 final_sample = LLVMBuildLShr(ac->builder, fmask_value, final_sample, "");
Marek Olšáke5e57c32018-05-17 23:23:24 -04002857 /* Mask the sample index by 0x7, because 0x8 means an unknown value
2858 * with EQAA, so those will map to 0. */
Marek Olšákdc04e4b2018-03-20 19:14:57 -04002859 final_sample = LLVMBuildAnd(ac->builder, final_sample,
Marek Olšáke5e57c32018-05-17 23:23:24 -04002860 LLVMConstInt(ac->i32, 0x7, 0), "");
Marek Olšákdc04e4b2018-03-20 19:14:57 -04002861
2862 /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
Marek Olšáke5e57c32018-05-17 23:23:24 -04002863 * resource descriptor is 0 (invalid).
Marek Olšákdc04e4b2018-03-20 19:14:57 -04002864 */
2865 LLVMValueRef tmp;
2866 tmp = LLVMBuildBitCast(ac->builder, fmask, ac->v8i32, "");
2867 tmp = LLVMBuildExtractElement(ac->builder, tmp, ac->i32_1, "");
2868 tmp = LLVMBuildICmp(ac->builder, LLVMIntNE, tmp, ac->i32_0, "");
2869
2870 /* Replace the MSAA sample index. */
2871 addr[sample_chan] = LLVMBuildSelect(ac->builder, tmp, final_sample,
2872 addr[sample_chan], "");
2873}
Daniel Schürmannd5f7ebd2018-03-06 15:03:36 +01002874
2875static LLVMValueRef
2876_ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane)
2877{
2878 ac_build_optimization_barrier(ctx, &src);
2879 return ac_build_intrinsic(ctx,
2880 lane == NULL ? "llvm.amdgcn.readfirstlane" : "llvm.amdgcn.readlane",
2881 LLVMTypeOf(src), (LLVMValueRef []) {
2882 src, lane },
2883 lane == NULL ? 1 : 2,
2884 AC_FUNC_ATTR_READNONE |
2885 AC_FUNC_ATTR_CONVERGENT);
2886}
2887
2888/**
2889 * Builds the "llvm.amdgcn.readlane" or "llvm.amdgcn.readfirstlane" intrinsic.
2890 * @param ctx
2891 * @param src
2892 * @param lane - id of the lane or NULL for the first active lane
2893 * @return value of the lane
2894 */
2895LLVMValueRef
2896ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane)
2897{
2898 LLVMTypeRef src_type = LLVMTypeOf(src);
2899 src = ac_to_integer(ctx, src);
2900 unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
2901 LLVMValueRef ret;
2902
2903 if (bits == 32) {
2904 ret = _ac_build_readlane(ctx, src, lane);
2905 } else {
2906 assert(bits % 32 == 0);
2907 LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
2908 LLVMValueRef src_vector =
2909 LLVMBuildBitCast(ctx->builder, src, vec_type, "");
2910 ret = LLVMGetUndef(vec_type);
2911 for (unsigned i = 0; i < bits / 32; i++) {
2912 src = LLVMBuildExtractElement(ctx->builder, src_vector,
2913 LLVMConstInt(ctx->i32, i, 0), "");
2914 LLVMValueRef ret_comp = _ac_build_readlane(ctx, src, lane);
2915 ret = LLVMBuildInsertElement(ctx->builder, ret, ret_comp,
2916 LLVMConstInt(ctx->i32, i, 0), "");
2917 }
2918 }
2919 return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
2920}
2921
2922LLVMValueRef
2923ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value, LLVMValueRef lane)
2924{
2925 /* TODO: Use the actual instruction when LLVM adds an intrinsic for it.
2926 */
2927 LLVMValueRef pred = LLVMBuildICmp(ctx->builder, LLVMIntEQ, lane,
2928 ac_get_thread_id(ctx), "");
2929 return LLVMBuildSelect(ctx->builder, pred, value, src, "");
2930}
2931
2932LLVMValueRef
2933ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask)
2934{
2935 LLVMValueRef mask_vec = LLVMBuildBitCast(ctx->builder, mask,
2936 LLVMVectorType(ctx->i32, 2),
2937 "");
2938 LLVMValueRef mask_lo = LLVMBuildExtractElement(ctx->builder, mask_vec,
2939 ctx->i32_0, "");
2940 LLVMValueRef mask_hi = LLVMBuildExtractElement(ctx->builder, mask_vec,
2941 ctx->i32_1, "");
2942 LLVMValueRef val =
2943 ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32,
2944 (LLVMValueRef []) { mask_lo, ctx->i32_0 },
2945 2, AC_FUNC_ATTR_READNONE);
2946 val = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi", ctx->i32,
2947 (LLVMValueRef []) { mask_hi, val },
2948 2, AC_FUNC_ATTR_READNONE);
2949 return val;
2950}
2951
2952enum dpp_ctrl {
2953 _dpp_quad_perm = 0x000,
2954 _dpp_row_sl = 0x100,
2955 _dpp_row_sr = 0x110,
2956 _dpp_row_rr = 0x120,
2957 dpp_wf_sl1 = 0x130,
2958 dpp_wf_rl1 = 0x134,
2959 dpp_wf_sr1 = 0x138,
2960 dpp_wf_rr1 = 0x13C,
2961 dpp_row_mirror = 0x140,
2962 dpp_row_half_mirror = 0x141,
2963 dpp_row_bcast15 = 0x142,
2964 dpp_row_bcast31 = 0x143
2965};
2966
2967static inline enum dpp_ctrl
2968dpp_quad_perm(unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3)
2969{
2970 assert(lane0 < 4 && lane1 < 4 && lane2 < 4 && lane3 < 4);
2971 return _dpp_quad_perm | lane0 | (lane1 << 2) | (lane2 << 4) | (lane3 << 6);
2972}
2973
2974static inline enum dpp_ctrl
2975dpp_row_sl(unsigned amount)
2976{
2977 assert(amount > 0 && amount < 16);
2978 return _dpp_row_sl | amount;
2979}
2980
2981static inline enum dpp_ctrl
2982dpp_row_sr(unsigned amount)
2983{
2984 assert(amount > 0 && amount < 16);
2985 return _dpp_row_sr | amount;
2986}
2987
2988static LLVMValueRef
2989_ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src,
2990 enum dpp_ctrl dpp_ctrl, unsigned row_mask, unsigned bank_mask,
2991 bool bound_ctrl)
2992{
2993 return ac_build_intrinsic(ctx, "llvm.amdgcn.update.dpp.i32",
2994 LLVMTypeOf(old),
2995 (LLVMValueRef[]) {
2996 old, src,
2997 LLVMConstInt(ctx->i32, dpp_ctrl, 0),
2998 LLVMConstInt(ctx->i32, row_mask, 0),
2999 LLVMConstInt(ctx->i32, bank_mask, 0),
3000 LLVMConstInt(ctx->i1, bound_ctrl, 0) },
3001 6, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
3002}
3003
3004static LLVMValueRef
3005ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src,
3006 enum dpp_ctrl dpp_ctrl, unsigned row_mask, unsigned bank_mask,
3007 bool bound_ctrl)
3008{
3009 LLVMTypeRef src_type = LLVMTypeOf(src);
3010 src = ac_to_integer(ctx, src);
3011 old = ac_to_integer(ctx, old);
3012 unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
3013 LLVMValueRef ret;
3014 if (bits == 32) {
3015 ret = _ac_build_dpp(ctx, old, src, dpp_ctrl, row_mask,
3016 bank_mask, bound_ctrl);
3017 } else {
3018 assert(bits % 32 == 0);
3019 LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
3020 LLVMValueRef src_vector =
3021 LLVMBuildBitCast(ctx->builder, src, vec_type, "");
3022 LLVMValueRef old_vector =
3023 LLVMBuildBitCast(ctx->builder, old, vec_type, "");
3024 ret = LLVMGetUndef(vec_type);
3025 for (unsigned i = 0; i < bits / 32; i++) {
3026 src = LLVMBuildExtractElement(ctx->builder, src_vector,
3027 LLVMConstInt(ctx->i32, i,
3028 0), "");
3029 old = LLVMBuildExtractElement(ctx->builder, old_vector,
3030 LLVMConstInt(ctx->i32, i,
3031 0), "");
3032 LLVMValueRef ret_comp = _ac_build_dpp(ctx, old, src,
3033 dpp_ctrl,
3034 row_mask,
3035 bank_mask,
3036 bound_ctrl);
3037 ret = LLVMBuildInsertElement(ctx->builder, ret,
3038 ret_comp,
3039 LLVMConstInt(ctx->i32, i,
3040 0), "");
3041 }
3042 }
3043 return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
3044}
3045
3046static inline unsigned
3047ds_pattern_bitmode(unsigned and_mask, unsigned or_mask, unsigned xor_mask)
3048{
3049 assert(and_mask < 32 && or_mask < 32 && xor_mask < 32);
3050 return and_mask | (or_mask << 5) | (xor_mask << 10);
3051}
3052
3053static LLVMValueRef
3054_ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask)
3055{
3056 return ac_build_intrinsic(ctx, "llvm.amdgcn.ds.swizzle",
3057 LLVMTypeOf(src), (LLVMValueRef []) {
3058 src, LLVMConstInt(ctx->i32, mask, 0) },
3059 2, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
3060}
3061
3062LLVMValueRef
3063ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask)
3064{
3065 LLVMTypeRef src_type = LLVMTypeOf(src);
3066 src = ac_to_integer(ctx, src);
3067 unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
3068 LLVMValueRef ret;
3069 if (bits == 32) {
3070 ret = _ac_build_ds_swizzle(ctx, src, mask);
3071 } else {
3072 assert(bits % 32 == 0);
3073 LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
3074 LLVMValueRef src_vector =
3075 LLVMBuildBitCast(ctx->builder, src, vec_type, "");
3076 ret = LLVMGetUndef(vec_type);
3077 for (unsigned i = 0; i < bits / 32; i++) {
3078 src = LLVMBuildExtractElement(ctx->builder, src_vector,
3079 LLVMConstInt(ctx->i32, i,
3080 0), "");
3081 LLVMValueRef ret_comp = _ac_build_ds_swizzle(ctx, src,
3082 mask);
3083 ret = LLVMBuildInsertElement(ctx->builder, ret,
3084 ret_comp,
3085 LLVMConstInt(ctx->i32, i,
3086 0), "");
3087 }
3088 }
3089 return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
3090}
3091
3092static LLVMValueRef
3093ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src)
3094{
3095 char name[32], type[8];
3096 ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type));
3097 snprintf(name, sizeof(name), "llvm.amdgcn.wwm.%s", type);
3098 return ac_build_intrinsic(ctx, name, LLVMTypeOf(src),
3099 (LLVMValueRef []) { src }, 1,
3100 AC_FUNC_ATTR_READNONE);
3101}
3102
3103static LLVMValueRef
3104ac_build_set_inactive(struct ac_llvm_context *ctx, LLVMValueRef src,
3105 LLVMValueRef inactive)
3106{
Timothy Arcerifae3b382018-06-07 10:30:01 +10003107 char name[33], type[8];
Daniel Schürmannd5f7ebd2018-03-06 15:03:36 +01003108 LLVMTypeRef src_type = LLVMTypeOf(src);
3109 src = ac_to_integer(ctx, src);
3110 inactive = ac_to_integer(ctx, inactive);
3111 ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type));
3112 snprintf(name, sizeof(name), "llvm.amdgcn.set.inactive.%s", type);
3113 LLVMValueRef ret =
3114 ac_build_intrinsic(ctx, name,
3115 LLVMTypeOf(src), (LLVMValueRef []) {
3116 src, inactive }, 2,
3117 AC_FUNC_ATTR_READNONE |
3118 AC_FUNC_ATTR_CONVERGENT);
3119 return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
3120}
3121
3122static LLVMValueRef
3123get_reduction_identity(struct ac_llvm_context *ctx, nir_op op, unsigned type_size)
3124{
3125 if (type_size == 4) {
3126 switch (op) {
3127 case nir_op_iadd: return ctx->i32_0;
3128 case nir_op_fadd: return ctx->f32_0;
3129 case nir_op_imul: return ctx->i32_1;
3130 case nir_op_fmul: return ctx->f32_1;
3131 case nir_op_imin: return LLVMConstInt(ctx->i32, INT32_MAX, 0);
3132 case nir_op_umin: return LLVMConstInt(ctx->i32, UINT32_MAX, 0);
3133 case nir_op_fmin: return LLVMConstReal(ctx->f32, INFINITY);
3134 case nir_op_imax: return LLVMConstInt(ctx->i32, INT32_MIN, 0);
3135 case nir_op_umax: return ctx->i32_0;
3136 case nir_op_fmax: return LLVMConstReal(ctx->f32, -INFINITY);
3137 case nir_op_iand: return LLVMConstInt(ctx->i32, -1, 0);
3138 case nir_op_ior: return ctx->i32_0;
3139 case nir_op_ixor: return ctx->i32_0;
3140 default:
3141 unreachable("bad reduction intrinsic");
3142 }
3143 } else { /* type_size == 64bit */
3144 switch (op) {
3145 case nir_op_iadd: return ctx->i64_0;
3146 case nir_op_fadd: return ctx->f64_0;
3147 case nir_op_imul: return ctx->i64_1;
3148 case nir_op_fmul: return ctx->f64_1;
3149 case nir_op_imin: return LLVMConstInt(ctx->i64, INT64_MAX, 0);
3150 case nir_op_umin: return LLVMConstInt(ctx->i64, UINT64_MAX, 0);
3151 case nir_op_fmin: return LLVMConstReal(ctx->f64, INFINITY);
3152 case nir_op_imax: return LLVMConstInt(ctx->i64, INT64_MIN, 0);
3153 case nir_op_umax: return ctx->i64_0;
3154 case nir_op_fmax: return LLVMConstReal(ctx->f64, -INFINITY);
3155 case nir_op_iand: return LLVMConstInt(ctx->i64, -1, 0);
3156 case nir_op_ior: return ctx->i64_0;
3157 case nir_op_ixor: return ctx->i64_0;
3158 default:
3159 unreachable("bad reduction intrinsic");
3160 }
3161 }
3162}
3163
3164static LLVMValueRef
3165ac_build_alu_op(struct ac_llvm_context *ctx, LLVMValueRef lhs, LLVMValueRef rhs, nir_op op)
3166{
3167 bool _64bit = ac_get_type_size(LLVMTypeOf(lhs)) == 8;
3168 switch (op) {
3169 case nir_op_iadd: return LLVMBuildAdd(ctx->builder, lhs, rhs, "");
3170 case nir_op_fadd: return LLVMBuildFAdd(ctx->builder, lhs, rhs, "");
3171 case nir_op_imul: return LLVMBuildMul(ctx->builder, lhs, rhs, "");
3172 case nir_op_fmul: return LLVMBuildFMul(ctx->builder, lhs, rhs, "");
3173 case nir_op_imin: return LLVMBuildSelect(ctx->builder,
3174 LLVMBuildICmp(ctx->builder, LLVMIntSLT, lhs, rhs, ""),
3175 lhs, rhs, "");
3176 case nir_op_umin: return LLVMBuildSelect(ctx->builder,
3177 LLVMBuildICmp(ctx->builder, LLVMIntULT, lhs, rhs, ""),
3178 lhs, rhs, "");
3179 case nir_op_fmin: return ac_build_intrinsic(ctx,
3180 _64bit ? "llvm.minnum.f64" : "llvm.minnum.f32",
3181 _64bit ? ctx->f64 : ctx->f32,
3182 (LLVMValueRef[]){lhs, rhs}, 2, AC_FUNC_ATTR_READNONE);
3183 case nir_op_imax: return LLVMBuildSelect(ctx->builder,
3184 LLVMBuildICmp(ctx->builder, LLVMIntSGT, lhs, rhs, ""),
3185 lhs, rhs, "");
3186 case nir_op_umax: return LLVMBuildSelect(ctx->builder,
3187 LLVMBuildICmp(ctx->builder, LLVMIntUGT, lhs, rhs, ""),
3188 lhs, rhs, "");
3189 case nir_op_fmax: return ac_build_intrinsic(ctx,
3190 _64bit ? "llvm.maxnum.f64" : "llvm.maxnum.f32",
3191 _64bit ? ctx->f64 : ctx->f32,
3192 (LLVMValueRef[]){lhs, rhs}, 2, AC_FUNC_ATTR_READNONE);
3193 case nir_op_iand: return LLVMBuildAnd(ctx->builder, lhs, rhs, "");
3194 case nir_op_ior: return LLVMBuildOr(ctx->builder, lhs, rhs, "");
3195 case nir_op_ixor: return LLVMBuildXor(ctx->builder, lhs, rhs, "");
3196 default:
3197 unreachable("bad reduction intrinsic");
3198 }
3199}
3200
Nicolai Hähnle300876a2018-05-23 22:09:27 +02003201/**
3202 * \param maxprefix specifies that the result only needs to be correct for a
3203 * prefix of this many threads
3204 *
3205 * TODO: add inclusive and excluse scan functions for SI chip class.
3206 */
Daniel Schürmannd5f7ebd2018-03-06 15:03:36 +01003207static LLVMValueRef
Nicolai Hähnle300876a2018-05-23 22:09:27 +02003208ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src, LLVMValueRef identity,
3209 unsigned maxprefix)
Daniel Schürmannd5f7ebd2018-03-06 15:03:36 +01003210{
3211 LLVMValueRef result, tmp;
3212 result = src;
Nicolai Hähnle300876a2018-05-23 22:09:27 +02003213 if (maxprefix <= 1)
3214 return result;
Daniel Schürmannd5f7ebd2018-03-06 15:03:36 +01003215 tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, false);
3216 result = ac_build_alu_op(ctx, result, tmp, op);
Nicolai Hähnle300876a2018-05-23 22:09:27 +02003217 if (maxprefix <= 2)
3218 return result;
Daniel Schürmannd5f7ebd2018-03-06 15:03:36 +01003219 tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(2), 0xf, 0xf, false);
3220 result = ac_build_alu_op(ctx, result, tmp, op);
Nicolai Hähnle300876a2018-05-23 22:09:27 +02003221 if (maxprefix <= 3)
3222 return result;
Daniel Schürmannd5f7ebd2018-03-06 15:03:36 +01003223 tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(3), 0xf, 0xf, false);
3224 result = ac_build_alu_op(ctx, result, tmp, op);
Nicolai Hähnle300876a2018-05-23 22:09:27 +02003225 if (maxprefix <= 4)
3226 return result;
Daniel Schürmannd5f7ebd2018-03-06 15:03:36 +01003227 tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(4), 0xf, 0xe, false);
3228 result = ac_build_alu_op(ctx, result, tmp, op);
Nicolai Hähnle300876a2018-05-23 22:09:27 +02003229 if (maxprefix <= 8)
3230 return result;
Daniel Schürmannd5f7ebd2018-03-06 15:03:36 +01003231 tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(8), 0xf, 0xc, false);
3232 result = ac_build_alu_op(ctx, result, tmp, op);
Nicolai Hähnle300876a2018-05-23 22:09:27 +02003233 if (maxprefix <= 16)
3234 return result;
Daniel Schürmannd5f7ebd2018-03-06 15:03:36 +01003235 tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false);
3236 result = ac_build_alu_op(ctx, result, tmp, op);
Nicolai Hähnle300876a2018-05-23 22:09:27 +02003237 if (maxprefix <= 32)
3238 return result;
Daniel Schürmannd5f7ebd2018-03-06 15:03:36 +01003239 tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
3240 result = ac_build_alu_op(ctx, result, tmp, op);
3241 return result;
3242}
3243
3244LLVMValueRef
3245ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op)
3246{
Daniel Schürmannd5f7ebd2018-03-06 15:03:36 +01003247 LLVMValueRef result;
Nicolai Hähnle8efaffa2018-09-20 19:09:50 +02003248
3249 if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) {
3250 LLVMBuilderRef builder = ctx->builder;
3251 src = LLVMBuildZExt(builder, src, ctx->i32, "");
3252 result = ac_build_ballot(ctx, src);
3253 result = ac_build_mbcnt(ctx, result);
3254 result = LLVMBuildAdd(builder, result, src, "");
3255 return result;
3256 }
3257
3258 ac_build_optimization_barrier(ctx, &src);
3259
Nicolai Hähnle3c77f262018-11-29 19:00:15 +01003260 LLVMValueRef identity =
3261 get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
3262 result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
3263 LLVMTypeOf(identity), "");
Nicolai Hähnle300876a2018-05-23 22:09:27 +02003264 result = ac_build_scan(ctx, op, result, identity, 64);
Daniel Schürmannd5f7ebd2018-03-06 15:03:36 +01003265
3266 return ac_build_wwm(ctx, result);
3267}
3268
3269LLVMValueRef
3270ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op)
3271{
Daniel Schürmannd5f7ebd2018-03-06 15:03:36 +01003272 LLVMValueRef result;
Nicolai Hähnle8efaffa2018-09-20 19:09:50 +02003273
3274 if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) {
3275 LLVMBuilderRef builder = ctx->builder;
3276 src = LLVMBuildZExt(builder, src, ctx->i32, "");
3277 result = ac_build_ballot(ctx, src);
3278 result = ac_build_mbcnt(ctx, result);
3279 return result;
3280 }
3281
3282 ac_build_optimization_barrier(ctx, &src);
3283
Nicolai Hähnle3c77f262018-11-29 19:00:15 +01003284 LLVMValueRef identity =
3285 get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
3286 result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
3287 LLVMTypeOf(identity), "");
Daniel Schürmannd5f7ebd2018-03-06 15:03:36 +01003288 result = ac_build_dpp(ctx, identity, result, dpp_wf_sr1, 0xf, 0xf, false);
Nicolai Hähnle300876a2018-05-23 22:09:27 +02003289 result = ac_build_scan(ctx, op, result, identity, 64);
Daniel Schürmannd5f7ebd2018-03-06 15:03:36 +01003290
3291 return ac_build_wwm(ctx, result);
3292}
3293
3294LLVMValueRef
3295ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsigned cluster_size)
3296{
3297 if (cluster_size == 1) return src;
3298 ac_build_optimization_barrier(ctx, &src);
3299 LLVMValueRef result, swap;
3300 LLVMValueRef identity = get_reduction_identity(ctx, op,
3301 ac_get_type_size(LLVMTypeOf(src)));
3302 result = LLVMBuildBitCast(ctx->builder,
3303 ac_build_set_inactive(ctx, src, identity),
3304 LLVMTypeOf(identity), "");
3305 swap = ac_build_quad_swizzle(ctx, result, 1, 0, 3, 2);
3306 result = ac_build_alu_op(ctx, result, swap, op);
3307 if (cluster_size == 2) return ac_build_wwm(ctx, result);
3308
3309 swap = ac_build_quad_swizzle(ctx, result, 2, 3, 0, 1);
3310 result = ac_build_alu_op(ctx, result, swap, op);
3311 if (cluster_size == 4) return ac_build_wwm(ctx, result);
3312
3313 if (ctx->chip_class >= VI)
3314 swap = ac_build_dpp(ctx, identity, result, dpp_row_half_mirror, 0xf, 0xf, false);
3315 else
3316 swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x04));
3317 result = ac_build_alu_op(ctx, result, swap, op);
3318 if (cluster_size == 8) return ac_build_wwm(ctx, result);
3319
3320 if (ctx->chip_class >= VI)
3321 swap = ac_build_dpp(ctx, identity, result, dpp_row_mirror, 0xf, 0xf, false);
3322 else
3323 swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x08));
3324 result = ac_build_alu_op(ctx, result, swap, op);
3325 if (cluster_size == 16) return ac_build_wwm(ctx, result);
3326
3327 if (ctx->chip_class >= VI && cluster_size != 32)
3328 swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false);
3329 else
3330 swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x10));
3331 result = ac_build_alu_op(ctx, result, swap, op);
3332 if (cluster_size == 32) return ac_build_wwm(ctx, result);
3333
3334 if (ctx->chip_class >= VI) {
3335 swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
3336 result = ac_build_alu_op(ctx, result, swap, op);
3337 result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 63, 0));
3338 return ac_build_wwm(ctx, result);
3339 } else {
3340 swap = ac_build_readlane(ctx, result, ctx->i32_0);
3341 result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 32, 0));
3342 result = ac_build_alu_op(ctx, result, swap, op);
3343 return ac_build_wwm(ctx, result);
3344 }
3345}
3346
Nicolai Hähnle300876a2018-05-23 22:09:27 +02003347/**
3348 * "Top half" of a scan that reduces per-wave values across an entire
3349 * workgroup.
3350 *
3351 * The source value must be present in the highest lane of the wave, and the
3352 * highest lane must be live.
3353 */
3354void
3355ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
3356{
3357 if (ws->maxwaves <= 1)
3358 return;
3359
3360 const LLVMValueRef i32_63 = LLVMConstInt(ctx->i32, 63, false);
3361 LLVMBuilderRef builder = ctx->builder;
3362 LLVMValueRef tid = ac_get_thread_id(ctx);
3363 LLVMValueRef tmp;
3364
3365 tmp = LLVMBuildICmp(builder, LLVMIntEQ, tid, i32_63, "");
3366 ac_build_ifcc(ctx, tmp, 1000);
3367 LLVMBuildStore(builder, ws->src, LLVMBuildGEP(builder, ws->scratch, &ws->waveidx, 1, ""));
3368 ac_build_endif(ctx, 1000);
3369}
3370
3371/**
3372 * "Bottom half" of a scan that reduces per-wave values across an entire
3373 * workgroup.
3374 *
3375 * The caller must place a barrier between the top and bottom halves.
3376 */
3377void
3378ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
3379{
3380 const LLVMTypeRef type = LLVMTypeOf(ws->src);
3381 const LLVMValueRef identity =
3382 get_reduction_identity(ctx, ws->op, ac_get_type_size(type));
3383
3384 if (ws->maxwaves <= 1) {
3385 ws->result_reduce = ws->src;
3386 ws->result_inclusive = ws->src;
3387 ws->result_exclusive = identity;
3388 return;
3389 }
3390 assert(ws->maxwaves <= 32);
3391
3392 LLVMBuilderRef builder = ctx->builder;
3393 LLVMValueRef tid = ac_get_thread_id(ctx);
3394 LLVMBasicBlockRef bbs[2];
3395 LLVMValueRef phivalues_scan[2];
3396 LLVMValueRef tmp, tmp2;
3397
3398 bbs[0] = LLVMGetInsertBlock(builder);
3399 phivalues_scan[0] = LLVMGetUndef(type);
3400
3401 if (ws->enable_reduce)
3402 tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, ws->numwaves, "");
3403 else if (ws->enable_inclusive)
3404 tmp = LLVMBuildICmp(builder, LLVMIntULE, tid, ws->waveidx, "");
3405 else
3406 tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, ws->waveidx, "");
3407 ac_build_ifcc(ctx, tmp, 1001);
3408 {
3409 tmp = LLVMBuildLoad(builder, LLVMBuildGEP(builder, ws->scratch, &tid, 1, ""), "");
3410
3411 ac_build_optimization_barrier(ctx, &tmp);
3412
3413 bbs[1] = LLVMGetInsertBlock(builder);
3414 phivalues_scan[1] = ac_build_scan(ctx, ws->op, tmp, identity, ws->maxwaves);
3415 }
3416 ac_build_endif(ctx, 1001);
3417
3418 const LLVMValueRef scan = ac_build_phi(ctx, type, 2, phivalues_scan, bbs);
3419
3420 if (ws->enable_reduce) {
3421 tmp = LLVMBuildSub(builder, ws->numwaves, ctx->i32_1, "");
3422 ws->result_reduce = ac_build_readlane(ctx, scan, tmp);
3423 }
3424 if (ws->enable_inclusive)
3425 ws->result_inclusive = ac_build_readlane(ctx, scan, ws->waveidx);
3426 if (ws->enable_exclusive) {
3427 tmp = LLVMBuildSub(builder, ws->waveidx, ctx->i32_1, "");
3428 tmp = ac_build_readlane(ctx, scan, tmp);
3429 tmp2 = LLVMBuildICmp(builder, LLVMIntEQ, ws->waveidx, ctx->i32_0, "");
3430 ws->result_exclusive = LLVMBuildSelect(builder, tmp2, identity, tmp, "");
3431 }
3432}
3433
3434/**
3435 * Inclusive scan of a per-wave value across an entire workgroup.
3436 *
3437 * This implies an s_barrier instruction.
3438 *
3439 * Unlike ac_build_inclusive_scan, the caller \em must ensure that all threads
3440 * of the workgroup are live. (This requirement cannot easily be relaxed in a
3441 * useful manner because of the barrier in the algorithm.)
3442 */
3443void
3444ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
3445{
3446 ac_build_wg_wavescan_top(ctx, ws);
3447 ac_build_s_barrier(ctx);
3448 ac_build_wg_wavescan_bottom(ctx, ws);
3449}
3450
3451/**
3452 * "Top half" of a scan that reduces per-thread values across an entire
3453 * workgroup.
3454 *
3455 * All lanes must be active when this code runs.
3456 */
3457void
3458ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
3459{
3460 if (ws->enable_exclusive) {
3461 ws->extra = ac_build_exclusive_scan(ctx, ws->src, ws->op);
Nicolai Hähnle8efaffa2018-09-20 19:09:50 +02003462 if (LLVMTypeOf(ws->src) == ctx->i1 && ws->op == nir_op_iadd)
3463 ws->src = LLVMBuildZExt(ctx->builder, ws->src, ctx->i32, "");
Nicolai Hähnle300876a2018-05-23 22:09:27 +02003464 ws->src = ac_build_alu_op(ctx, ws->extra, ws->src, ws->op);
3465 } else {
3466 ws->src = ac_build_inclusive_scan(ctx, ws->src, ws->op);
3467 }
3468
3469 bool enable_inclusive = ws->enable_inclusive;
3470 bool enable_exclusive = ws->enable_exclusive;
3471 ws->enable_inclusive = false;
3472 ws->enable_exclusive = ws->enable_exclusive || enable_inclusive;
3473 ac_build_wg_wavescan_top(ctx, ws);
3474 ws->enable_inclusive = enable_inclusive;
3475 ws->enable_exclusive = enable_exclusive;
3476}
3477
3478/**
3479 * "Bottom half" of a scan that reduces per-thread values across an entire
3480 * workgroup.
3481 *
3482 * The caller must place a barrier between the top and bottom halves.
3483 */
3484void
3485ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
3486{
3487 bool enable_inclusive = ws->enable_inclusive;
3488 bool enable_exclusive = ws->enable_exclusive;
3489 ws->enable_inclusive = false;
3490 ws->enable_exclusive = ws->enable_exclusive || enable_inclusive;
3491 ac_build_wg_wavescan_bottom(ctx, ws);
3492 ws->enable_inclusive = enable_inclusive;
3493 ws->enable_exclusive = enable_exclusive;
3494
3495 /* ws->result_reduce is already the correct value */
3496 if (ws->enable_inclusive)
3497 ws->result_inclusive = ac_build_alu_op(ctx, ws->result_exclusive, ws->src, ws->op);
3498 if (ws->enable_exclusive)
3499 ws->result_exclusive = ac_build_alu_op(ctx, ws->result_exclusive, ws->extra, ws->op);
3500}
3501
3502/**
3503 * A scan that reduces per-thread values across an entire workgroup.
3504 *
3505 * The caller must ensure that all lanes are active when this code runs
3506 * (WWM is insufficient!), because there is an implied barrier.
3507 */
3508void
3509ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
3510{
3511 ac_build_wg_scan_top(ctx, ws);
3512 ac_build_s_barrier(ctx);
3513 ac_build_wg_scan_bottom(ctx, ws);
3514}
3515
Daniel Schürmannd5f7ebd2018-03-06 15:03:36 +01003516LLVMValueRef
3517ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
3518 unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3)
3519{
3520 unsigned mask = dpp_quad_perm(lane0, lane1, lane2, lane3);
Marek Olšákfd1121e2018-07-01 15:50:51 -04003521 if (ctx->chip_class >= VI) {
Daniel Schürmannd5f7ebd2018-03-06 15:03:36 +01003522 return ac_build_dpp(ctx, src, src, mask, 0xf, 0xf, false);
3523 } else {
3524 return ac_build_ds_swizzle(ctx, src, (1 << 15) | mask);
3525 }
3526}
3527
3528LLVMValueRef
3529ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index)
3530{
3531 index = LLVMBuildMul(ctx->builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
3532 return ac_build_intrinsic(ctx,
3533 "llvm.amdgcn.ds.bpermute", ctx->i32,
3534 (LLVMValueRef []) {index, src}, 2,
3535 AC_FUNC_ATTR_READNONE |
3536 AC_FUNC_ATTR_CONVERGENT);
3537}