blob: 1a0d44bcdd30764024068533c43cc934c069af1c [file] [log] [blame]
Dave Airlie13a28ff2017-02-03 10:05:00 +10001/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18 * USE OR OTHER DEALINGS IN THE SOFTWARE.
19 *
20 * The above copyright notice and this permission notice (including the
21 * next paragraph) shall be included in all copies or substantial portions
22 * of the Software.
23 *
24 */
25/* based on pieces from si_pipe.c and radeon_llvm_emit.c */
26#include "ac_llvm_build.h"
27
28#include <llvm-c/Core.h>
29
30#include "c11/threads.h"
31
32#include <assert.h>
33#include <stdio.h>
34
35#include "ac_llvm_util.h"
Dave Airliee2659172017-04-25 23:33:29 +010036#include "ac_exp_param.h"
Dave Airlie13a28ff2017-02-03 10:05:00 +100037#include "util/bitscan.h"
38#include "util/macros.h"
Connor Abbottac27fa72017-06-05 14:16:43 -070039#include "util/u_atomic.h"
Dave Airlie13a28ff2017-02-03 10:05:00 +100040#include "sid.h"
41
Dave Airliee2659172017-04-25 23:33:29 +010042#include "shader_enums.h"
43
Dave Airlie13a28ff2017-02-03 10:05:00 +100044/* Initialize module-independent parts of the context.
45 *
46 * The caller is responsible for initializing ctx::module and ctx::builder.
47 */
48void
Nicolai Hähnle3db86d82017-09-13 14:36:23 +020049ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
50 enum chip_class chip_class)
Dave Airlie13a28ff2017-02-03 10:05:00 +100051{
52 LLVMValueRef args[1];
53
Nicolai Hähnle3db86d82017-09-13 14:36:23 +020054 ctx->chip_class = chip_class;
55
Dave Airlie13a28ff2017-02-03 10:05:00 +100056 ctx->context = context;
57 ctx->module = NULL;
58 ctx->builder = NULL;
59
60 ctx->voidt = LLVMVoidTypeInContext(ctx->context);
61 ctx->i1 = LLVMInt1TypeInContext(ctx->context);
62 ctx->i8 = LLVMInt8TypeInContext(ctx->context);
Nicolai Hähnle7bf8c942017-03-30 14:10:26 +020063 ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
Dave Airlie13a28ff2017-02-03 10:05:00 +100064 ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
Nicolai Hähnle7bf8c942017-03-30 14:10:26 +020065 ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
66 ctx->f16 = LLVMHalfTypeInContext(ctx->context);
Dave Airlie13a28ff2017-02-03 10:05:00 +100067 ctx->f32 = LLVMFloatTypeInContext(ctx->context);
Nicolai Hähnle7bf8c942017-03-30 14:10:26 +020068 ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
Timothy Arceri309a5142017-11-02 12:59:00 +110069 ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
Dave Airlie13a28ff2017-02-03 10:05:00 +100070 ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
71 ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
Nicolai Hähnleedfd3be2017-06-08 20:04:28 +020072 ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
Dave Airlie13a28ff2017-02-03 10:05:00 +100073
Nicolai Hähnle331a5742017-05-18 22:02:48 +020074 ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false);
75 ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
76 ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
77 ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0);
78
Dave Airliea76b6c22017-10-26 15:20:15 +100079 ctx->i1false = LLVMConstInt(ctx->i1, 0, false);
80 ctx->i1true = LLVMConstInt(ctx->i1, 1, false);
81
Dave Airlie13a28ff2017-02-03 10:05:00 +100082 ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
83 "range", 5);
84
85 ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
86 "invariant.load", 14);
87
88 ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6);
89
90 args[0] = LLVMConstReal(ctx->f32, 2.5);
91 ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
92
93 ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context,
94 "amdgpu.uniform", 14);
95
96 ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
97}
98
Connor Abbottc181d4f2017-06-05 14:37:01 -070099unsigned
100ac_get_type_size(LLVMTypeRef type)
101{
102 LLVMTypeKind kind = LLVMGetTypeKind(type);
103
104 switch (kind) {
105 case LLVMIntegerTypeKind:
106 return LLVMGetIntTypeWidth(type) / 8;
107 case LLVMFloatTypeKind:
108 return 4;
Connor Abbottfafa2992017-07-18 20:44:47 -0700109 case LLVMDoubleTypeKind:
Connor Abbottc181d4f2017-06-05 14:37:01 -0700110 case LLVMPointerTypeKind:
111 return 8;
112 case LLVMVectorTypeKind:
113 return LLVMGetVectorSize(type) *
114 ac_get_type_size(LLVMGetElementType(type));
115 case LLVMArrayTypeKind:
116 return LLVMGetArrayLength(type) *
117 ac_get_type_size(LLVMGetElementType(type));
118 default:
119 assert(0);
120 return 0;
121 }
122}
123
Connor Abbott50967cd2017-07-18 17:32:10 -0700124static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
125{
126 if (t == ctx->f16 || t == ctx->i16)
127 return ctx->i16;
128 else if (t == ctx->f32 || t == ctx->i32)
129 return ctx->i32;
130 else if (t == ctx->f64 || t == ctx->i64)
131 return ctx->i64;
132 else
133 unreachable("Unhandled integer size");
134}
135
136LLVMTypeRef
137ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
138{
139 if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
140 LLVMTypeRef elem_type = LLVMGetElementType(t);
141 return LLVMVectorType(to_integer_type_scalar(ctx, elem_type),
142 LLVMGetVectorSize(t));
143 }
144 return to_integer_type_scalar(ctx, t);
145}
146
147LLVMValueRef
148ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v)
149{
150 LLVMTypeRef type = LLVMTypeOf(v);
151 return LLVMBuildBitCast(ctx->builder, v, ac_to_integer_type(ctx, type), "");
152}
153
154static LLVMTypeRef to_float_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
155{
156 if (t == ctx->i16 || t == ctx->f16)
157 return ctx->f16;
158 else if (t == ctx->i32 || t == ctx->f32)
159 return ctx->f32;
160 else if (t == ctx->i64 || t == ctx->f64)
161 return ctx->f64;
162 else
163 unreachable("Unhandled float size");
164}
165
166LLVMTypeRef
167ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
168{
169 if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
170 LLVMTypeRef elem_type = LLVMGetElementType(t);
171 return LLVMVectorType(to_float_type_scalar(ctx, elem_type),
172 LLVMGetVectorSize(t));
173 }
174 return to_float_type_scalar(ctx, t);
175}
176
177LLVMValueRef
178ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v)
179{
180 LLVMTypeRef type = LLVMTypeOf(v);
181 return LLVMBuildBitCast(ctx->builder, v, ac_to_float_type(ctx, type), "");
182}
183
184
Dave Airlie13a28ff2017-02-03 10:05:00 +1000185LLVMValueRef
Marek Olšák7f1446a2017-02-26 00:41:37 +0100186ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
187 LLVMTypeRef return_type, LLVMValueRef *params,
188 unsigned param_count, unsigned attrib_mask)
Dave Airlie13a28ff2017-02-03 10:05:00 +1000189{
Marek Olšák940da362017-02-22 02:29:12 +0100190 LLVMValueRef function, call;
191 bool set_callsite_attrs = HAVE_LLVM >= 0x0400 &&
192 !(attrib_mask & AC_FUNC_ATTR_LEGACY);
Dave Airlie13a28ff2017-02-03 10:05:00 +1000193
194 function = LLVMGetNamedFunction(ctx->module, name);
195 if (!function) {
196 LLVMTypeRef param_types[32], function_type;
197 unsigned i;
198
199 assert(param_count <= 32);
200
201 for (i = 0; i < param_count; ++i) {
202 assert(params[i]);
203 param_types[i] = LLVMTypeOf(params[i]);
204 }
205 function_type =
206 LLVMFunctionType(return_type, param_types, param_count, 0);
207 function = LLVMAddFunction(ctx->module, name, function_type);
208
209 LLVMSetFunctionCallConv(function, LLVMCCallConv);
210 LLVMSetLinkage(function, LLVMExternalLinkage);
211
Marek Olšák940da362017-02-22 02:29:12 +0100212 if (!set_callsite_attrs)
213 ac_add_func_attributes(ctx->context, function, attrib_mask);
Dave Airlie13a28ff2017-02-03 10:05:00 +1000214 }
Marek Olšák940da362017-02-22 02:29:12 +0100215
216 call = LLVMBuildCall(ctx->builder, function, params, param_count, "");
217 if (set_callsite_attrs)
218 ac_add_func_attributes(ctx->context, call, attrib_mask);
219 return call;
Dave Airlie13a28ff2017-02-03 10:05:00 +1000220}
221
Marek Olšák9af03312017-02-23 22:58:49 +0100222/**
223 * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
224 * intrinsic names).
225 */
226void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize)
227{
228 LLVMTypeRef elem_type = type;
229
230 assert(bufsize >= 8);
231
232 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
233 int ret = snprintf(buf, bufsize, "v%u",
234 LLVMGetVectorSize(type));
235 if (ret < 0) {
236 char *type_name = LLVMPrintTypeToString(type);
237 fprintf(stderr, "Error building type name for: %s\n",
238 type_name);
239 return;
240 }
241 elem_type = LLVMGetElementType(type);
242 buf += ret;
243 bufsize -= ret;
244 }
245 switch (LLVMGetTypeKind(elem_type)) {
246 default: break;
247 case LLVMIntegerTypeKind:
248 snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type));
249 break;
250 case LLVMFloatTypeKind:
251 snprintf(buf, bufsize, "f32");
252 break;
253 case LLVMDoubleTypeKind:
254 snprintf(buf, bufsize, "f64");
255 break;
256 }
257}
258
Nicolai Hähnle052b9742017-09-29 11:17:03 +0200259/**
260 * Helper function that builds an LLVM IR PHI node and immediately adds
261 * incoming edges.
262 */
263LLVMValueRef
264ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
265 unsigned count_incoming, LLVMValueRef *values,
266 LLVMBasicBlockRef *blocks)
267{
268 LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, "");
269 LLVMAddIncoming(phi, values, blocks, count_incoming);
270 return phi;
271}
272
Connor Abbottac27fa72017-06-05 14:16:43 -0700273/* Prevent optimizations (at least of memory accesses) across the current
274 * point in the program by emitting empty inline assembly that is marked as
275 * having side effects.
276 *
277 * Optionally, a value can be passed through the inline assembly to prevent
278 * LLVM from hoisting calls to ReadNone functions.
279 */
280void
281ac_build_optimization_barrier(struct ac_llvm_context *ctx,
282 LLVMValueRef *pvgpr)
283{
284 static int counter = 0;
285
286 LLVMBuilderRef builder = ctx->builder;
287 char code[16];
288
289 snprintf(code, sizeof(code), "; %d", p_atomic_inc_return(&counter));
290
291 if (!pvgpr) {
292 LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
293 LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", true, false);
294 LLVMBuildCall(builder, inlineasm, NULL, 0, "");
295 } else {
296 LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false);
297 LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "=v,0", true, false);
298 LLVMValueRef vgpr = *pvgpr;
299 LLVMTypeRef vgpr_type = LLVMTypeOf(vgpr);
300 unsigned vgpr_size = ac_get_type_size(vgpr_type);
301 LLVMValueRef vgpr0;
302
303 assert(vgpr_size % 4 == 0);
304
305 vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, vgpr_size / 4), "");
306 vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, "");
307 vgpr0 = LLVMBuildCall(builder, inlineasm, &vgpr0, 1, "");
308 vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, "");
309 vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, "");
310
311 *pvgpr = vgpr;
312 }
313}
314
Dave Airlie13a28ff2017-02-03 10:05:00 +1000315LLVMValueRef
Connor Abbottbd73b892017-06-05 15:20:04 -0700316ac_build_ballot(struct ac_llvm_context *ctx,
317 LLVMValueRef value)
318{
319 LLVMValueRef args[3] = {
320 value,
321 ctx->i32_0,
322 LLVMConstInt(ctx->i32, LLVMIntNE, 0)
323 };
324
325 /* We currently have no other way to prevent LLVM from lifting the icmp
326 * calls to a dominating basic block.
327 */
328 ac_build_optimization_barrier(ctx, &args[0]);
329
330 if (LLVMTypeOf(args[0]) != ctx->i32)
331 args[0] = LLVMBuildBitCast(ctx->builder, args[0], ctx->i32, "");
332
333 return ac_build_intrinsic(ctx,
334 "llvm.amdgcn.icmp.i32",
335 ctx->i64, args, 3,
336 AC_FUNC_ATTR_NOUNWIND |
337 AC_FUNC_ATTR_READNONE |
338 AC_FUNC_ATTR_CONVERGENT);
339}
340
341LLVMValueRef
Connor Abbottb8a51c82017-06-06 16:40:26 -0700342ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value)
343{
344 LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
345 LLVMValueRef vote_set = ac_build_ballot(ctx, value);
346 return LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, active_set, "");
347}
348
349LLVMValueRef
350ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value)
351{
352 LLVMValueRef vote_set = ac_build_ballot(ctx, value);
353 return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set,
354 LLVMConstInt(ctx->i64, 0, 0), "");
355}
356
357LLVMValueRef
358ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value)
359{
360 LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
361 LLVMValueRef vote_set = ac_build_ballot(ctx, value);
362
363 LLVMValueRef all = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
364 vote_set, active_set, "");
365 LLVMValueRef none = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
366 vote_set,
367 LLVMConstInt(ctx->i64, 0, 0), "");
368 return LLVMBuildOr(ctx->builder, all, none, "");
369}
370
371LLVMValueRef
Dave Airlie13a28ff2017-02-03 10:05:00 +1000372ac_build_gather_values_extended(struct ac_llvm_context *ctx,
373 LLVMValueRef *values,
374 unsigned value_count,
375 unsigned value_stride,
Nicolai Hähnleac2ab5a2017-06-25 13:04:51 +0200376 bool load,
377 bool always_vector)
Dave Airlie13a28ff2017-02-03 10:05:00 +1000378{
379 LLVMBuilderRef builder = ctx->builder;
Marek Olšákc7878b02017-02-23 01:34:27 +0100380 LLVMValueRef vec = NULL;
Dave Airlie13a28ff2017-02-03 10:05:00 +1000381 unsigned i;
382
Nicolai Hähnleac2ab5a2017-06-25 13:04:51 +0200383 if (value_count == 1 && !always_vector) {
Dave Airlie13a28ff2017-02-03 10:05:00 +1000384 if (load)
385 return LLVMBuildLoad(builder, values[0], "");
386 return values[0];
387 } else if (!value_count)
388 unreachable("value_count is 0");
389
390 for (i = 0; i < value_count; i++) {
391 LLVMValueRef value = values[i * value_stride];
392 if (load)
393 value = LLVMBuildLoad(builder, value, "");
394
395 if (!i)
396 vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
397 LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
398 vec = LLVMBuildInsertElement(builder, vec, value, index, "");
399 }
400 return vec;
401}
402
403LLVMValueRef
404ac_build_gather_values(struct ac_llvm_context *ctx,
405 LLVMValueRef *values,
406 unsigned value_count)
407{
Nicolai Hähnleac2ab5a2017-06-25 13:04:51 +0200408 return ac_build_gather_values_extended(ctx, values, value_count, 1, false, false);
Dave Airlie13a28ff2017-02-03 10:05:00 +1000409}
410
411LLVMValueRef
Marek Olšák7f1446a2017-02-26 00:41:37 +0100412ac_build_fdiv(struct ac_llvm_context *ctx,
413 LLVMValueRef num,
414 LLVMValueRef den)
Dave Airlie13a28ff2017-02-03 10:05:00 +1000415{
416 LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, "");
417
418 if (!LLVMIsConstant(ret))
419 LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
420 return ret;
421}
422
423/* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27
424 * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is
425 * already multiplied by two. id is the cube face number.
426 */
427struct cube_selection_coords {
428 LLVMValueRef stc[2];
429 LLVMValueRef ma;
430 LLVMValueRef id;
431};
432
433static void
434build_cube_intrinsic(struct ac_llvm_context *ctx,
435 LLVMValueRef in[3],
436 struct cube_selection_coords *out)
437{
Marek Olšák12beef02017-04-25 02:18:10 +0200438 LLVMTypeRef f32 = ctx->f32;
Dave Airlie13a28ff2017-02-03 10:05:00 +1000439
Marek Olšák12beef02017-04-25 02:18:10 +0200440 out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc",
441 f32, in, 3, AC_FUNC_ATTR_READNONE);
442 out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc",
443 f32, in, 3, AC_FUNC_ATTR_READNONE);
444 out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema",
445 f32, in, 3, AC_FUNC_ATTR_READNONE);
446 out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid",
447 f32, in, 3, AC_FUNC_ATTR_READNONE);
Dave Airlie13a28ff2017-02-03 10:05:00 +1000448}
449
450/**
451 * Build a manual selection sequence for cube face sc/tc coordinates and
452 * major axis vector (multiplied by 2 for consistency) for the given
453 * vec3 \p coords, for the face implied by \p selcoords.
454 *
455 * For the major axis, we always adjust the sign to be in the direction of
456 * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
457 * the selcoords major axis.
458 */
Nicolai Hähnlea6ea4c12017-09-22 19:14:16 +0200459static void build_cube_select(struct ac_llvm_context *ctx,
Dave Airlie13a28ff2017-02-03 10:05:00 +1000460 const struct cube_selection_coords *selcoords,
461 const LLVMValueRef *coords,
462 LLVMValueRef *out_st,
463 LLVMValueRef *out_ma)
464{
Nicolai Hähnlea6ea4c12017-09-22 19:14:16 +0200465 LLVMBuilderRef builder = ctx->builder;
Dave Airlie13a28ff2017-02-03 10:05:00 +1000466 LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
467 LLVMValueRef is_ma_positive;
468 LLVMValueRef sgn_ma;
469 LLVMValueRef is_ma_z, is_not_ma_z;
470 LLVMValueRef is_ma_y;
471 LLVMValueRef is_ma_x;
472 LLVMValueRef sgn;
473 LLVMValueRef tmp;
474
475 is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE,
476 selcoords->ma, LLVMConstReal(f32, 0.0), "");
477 sgn_ma = LLVMBuildSelect(builder, is_ma_positive,
478 LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), "");
479
480 is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), "");
481 is_not_ma_z = LLVMBuildNot(builder, is_ma_z, "");
482 is_ma_y = LLVMBuildAnd(builder, is_not_ma_z,
483 LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), "");
484 is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");
485
486 /* Select sc */
Nicolai Hähnle5be5c1e2017-09-22 19:05:52 +0200487 tmp = LLVMBuildSelect(builder, is_ma_x, coords[2], coords[0], "");
Dave Airlie13a28ff2017-02-03 10:05:00 +1000488 sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0),
Nicolai Hähnle5be5c1e2017-09-22 19:05:52 +0200489 LLVMBuildSelect(builder, is_ma_z, sgn_ma,
Dave Airlie13a28ff2017-02-03 10:05:00 +1000490 LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
491 out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");
492
493 /* Select tc */
494 tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
Nicolai Hähnle5be5c1e2017-09-22 19:05:52 +0200495 sgn = LLVMBuildSelect(builder, is_ma_y, sgn_ma,
Dave Airlie13a28ff2017-02-03 10:05:00 +1000496 LLVMConstReal(f32, -1.0), "");
497 out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");
498
499 /* Select ma */
500 tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
501 LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
Nicolai Hähnlea6ea4c12017-09-22 19:14:16 +0200502 tmp = ac_build_intrinsic(ctx, "llvm.fabs.f32",
503 ctx->f32, &tmp, 1, AC_FUNC_ATTR_READNONE);
504 *out_ma = LLVMBuildFMul(builder, tmp, LLVMConstReal(f32, 2.0), "");
Dave Airlie13a28ff2017-02-03 10:05:00 +1000505}
506
507void
508ac_prepare_cube_coords(struct ac_llvm_context *ctx,
Nicolai Hähnlee0af3be2017-09-13 10:47:02 +0200509 bool is_deriv, bool is_array, bool is_lod,
Dave Airlie13a28ff2017-02-03 10:05:00 +1000510 LLVMValueRef *coords_arg,
511 LLVMValueRef *derivs_arg)
512{
513
514 LLVMBuilderRef builder = ctx->builder;
515 struct cube_selection_coords selcoords;
516 LLVMValueRef coords[3];
517 LLVMValueRef invma;
518
Nicolai Hähnlee0af3be2017-09-13 10:47:02 +0200519 if (is_array && !is_lod) {
Nicolai Hähnle94736d32017-09-13 15:33:23 +0200520 LLVMValueRef tmp = coords_arg[3];
521 tmp = ac_build_intrinsic(ctx, "llvm.rint.f32", ctx->f32, &tmp, 1, 0);
522
523 /* Section 8.9 (Texture Functions) of the GLSL 4.50 spec says:
524 *
525 * "For Array forms, the array layer used will be
526 *
527 * max(0, min(d−1, floor(layer+0.5)))
528 *
529 * where d is the depth of the texture array and layer
530 * comes from the component indicated in the tables below.
531 * Workaroudn for an issue where the layer is taken from a
532 * helper invocation which happens to fall on a different
533 * layer due to extrapolation."
534 *
535 * VI and earlier attempt to implement this in hardware by
536 * clamping the value of coords[2] = (8 * layer) + face.
537 * Unfortunately, this means that the we end up with the wrong
538 * face when clamping occurs.
539 *
540 * Clamp the layer earlier to work around the issue.
541 */
542 if (ctx->chip_class <= VI) {
543 LLVMValueRef ge0;
544 ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, "");
545 tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, "");
546 }
547
548 coords_arg[3] = tmp;
Nicolai Hähnlee0af3be2017-09-13 10:47:02 +0200549 }
550
Dave Airlie13a28ff2017-02-03 10:05:00 +1000551 build_cube_intrinsic(ctx, coords_arg, &selcoords);
552
Marek Olšák7f1446a2017-02-26 00:41:37 +0100553 invma = ac_build_intrinsic(ctx, "llvm.fabs.f32",
Dave Airlie13a28ff2017-02-03 10:05:00 +1000554 ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE);
Marek Olšák7f1446a2017-02-26 00:41:37 +0100555 invma = ac_build_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma);
Dave Airlie13a28ff2017-02-03 10:05:00 +1000556
557 for (int i = 0; i < 2; ++i)
558 coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, "");
559
560 coords[2] = selcoords.id;
561
562 if (is_deriv && derivs_arg) {
563 LLVMValueRef derivs[4];
564 int axis;
565
566 /* Convert cube derivatives to 2D derivatives. */
567 for (axis = 0; axis < 2; axis++) {
568 LLVMValueRef deriv_st[2];
569 LLVMValueRef deriv_ma;
570
571 /* Transform the derivative alongside the texture
572 * coordinate. Mathematically, the correct formula is
573 * as follows. Assume we're projecting onto the +Z face
574 * and denote by dx/dh the derivative of the (original)
575 * X texture coordinate with respect to horizontal
576 * window coordinates. The projection onto the +Z face
577 * plane is:
578 *
579 * f(x,z) = x/z
580 *
581 * Then df/dh = df/dx * dx/dh + df/dz * dz/dh
582 * = 1/z * dx/dh - x/z * 1/z * dz/dh.
583 *
584 * This motivatives the implementation below.
585 *
586 * Whether this actually gives the expected results for
587 * apps that might feed in derivatives obtained via
588 * finite differences is anyone's guess. The OpenGL spec
589 * seems awfully quiet about how textureGrad for cube
590 * maps should be handled.
591 */
Nicolai Hähnlea6ea4c12017-09-22 19:14:16 +0200592 build_cube_select(ctx, &selcoords, &derivs_arg[axis * 3],
Dave Airlie13a28ff2017-02-03 10:05:00 +1000593 deriv_st, &deriv_ma);
594
595 deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");
596
597 for (int i = 0; i < 2; ++i)
598 derivs[axis * 2 + i] =
599 LLVMBuildFSub(builder,
600 LLVMBuildFMul(builder, deriv_st[i], invma, ""),
601 LLVMBuildFMul(builder, deriv_ma, coords[i], ""), "");
602 }
603
604 memcpy(derivs_arg, derivs, sizeof(derivs));
605 }
606
607 /* Shift the texture coordinate. This must be applied after the
608 * derivative calculation.
609 */
610 for (int i = 0; i < 2; ++i)
611 coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), "");
612
613 if (is_array) {
614 /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
615 /* coords_arg.w component - array_index for cube arrays */
616 LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), "");
617 coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], "");
618 }
619
620 memcpy(coords_arg, coords, sizeof(coords));
621}
622
623
624LLVMValueRef
625ac_build_fs_interp(struct ac_llvm_context *ctx,
626 LLVMValueRef llvm_chan,
627 LLVMValueRef attr_number,
628 LLVMValueRef params,
629 LLVMValueRef i,
630 LLVMValueRef j)
631{
632 LLVMValueRef args[5];
633 LLVMValueRef p1;
634
635 if (HAVE_LLVM < 0x0400) {
636 LLVMValueRef ij[2];
637 ij[0] = LLVMBuildBitCast(ctx->builder, i, ctx->i32, "");
638 ij[1] = LLVMBuildBitCast(ctx->builder, j, ctx->i32, "");
639
640 args[0] = llvm_chan;
641 args[1] = attr_number;
642 args[2] = params;
643 args[3] = ac_build_gather_values(ctx, ij, 2);
Marek Olšák7f1446a2017-02-26 00:41:37 +0100644 return ac_build_intrinsic(ctx, "llvm.SI.fs.interp",
645 ctx->f32, args, 4,
646 AC_FUNC_ATTR_READNONE);
Dave Airlie13a28ff2017-02-03 10:05:00 +1000647 }
648
649 args[0] = i;
650 args[1] = llvm_chan;
651 args[2] = attr_number;
652 args[3] = params;
653
Marek Olšák7f1446a2017-02-26 00:41:37 +0100654 p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1",
655 ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
Dave Airlie13a28ff2017-02-03 10:05:00 +1000656
657 args[0] = p1;
658 args[1] = j;
659 args[2] = llvm_chan;
660 args[3] = attr_number;
661 args[4] = params;
662
Marek Olšák7f1446a2017-02-26 00:41:37 +0100663 return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2",
664 ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
Dave Airlie13a28ff2017-02-03 10:05:00 +1000665}
666
667LLVMValueRef
668ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
669 LLVMValueRef parameter,
670 LLVMValueRef llvm_chan,
671 LLVMValueRef attr_number,
672 LLVMValueRef params)
673{
674 LLVMValueRef args[4];
675 if (HAVE_LLVM < 0x0400) {
676 args[0] = llvm_chan;
677 args[1] = attr_number;
678 args[2] = params;
679
Marek Olšák7f1446a2017-02-26 00:41:37 +0100680 return ac_build_intrinsic(ctx,
681 "llvm.SI.fs.constant",
682 ctx->f32, args, 3,
683 AC_FUNC_ATTR_READNONE);
Dave Airlie13a28ff2017-02-03 10:05:00 +1000684 }
685
686 args[0] = parameter;
687 args[1] = llvm_chan;
688 args[2] = attr_number;
689 args[3] = params;
690
Marek Olšák7f1446a2017-02-26 00:41:37 +0100691 return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.mov",
692 ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
Dave Airlie13a28ff2017-02-03 10:05:00 +1000693}
694
695LLVMValueRef
696ac_build_gep0(struct ac_llvm_context *ctx,
697 LLVMValueRef base_ptr,
698 LLVMValueRef index)
699{
700 LLVMValueRef indices[2] = {
701 LLVMConstInt(ctx->i32, 0, 0),
702 index,
703 };
704 return LLVMBuildGEP(ctx->builder, base_ptr,
705 indices, 2, "");
706}
707
708void
709ac_build_indexed_store(struct ac_llvm_context *ctx,
710 LLVMValueRef base_ptr, LLVMValueRef index,
711 LLVMValueRef value)
712{
713 LLVMBuildStore(ctx->builder, value,
714 ac_build_gep0(ctx, base_ptr, index));
715}
716
717/**
718 * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
719 * It's equivalent to doing a load from &base_ptr[index].
720 *
721 * \param base_ptr Where the array starts.
722 * \param index The element index into the array.
723 * \param uniform Whether the base_ptr and index can be assumed to be
Marek Olšák854593b2017-10-08 20:05:44 +0200724 * dynamically uniform (i.e. load to an SGPR)
725 * \param invariant Whether the load is invariant (no other opcodes affect it)
Dave Airlie13a28ff2017-02-03 10:05:00 +1000726 */
Marek Olšák854593b2017-10-08 20:05:44 +0200727static LLVMValueRef
728ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
729 LLVMValueRef index, bool uniform, bool invariant)
Dave Airlie13a28ff2017-02-03 10:05:00 +1000730{
Marek Olšák854593b2017-10-08 20:05:44 +0200731 LLVMValueRef pointer, result;
Dave Airlie13a28ff2017-02-03 10:05:00 +1000732
733 pointer = ac_build_gep0(ctx, base_ptr, index);
734 if (uniform)
735 LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
Marek Olšák854593b2017-10-08 20:05:44 +0200736 result = LLVMBuildLoad(ctx->builder, pointer, "");
737 if (invariant)
738 LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
739 return result;
Dave Airlie13a28ff2017-02-03 10:05:00 +1000740}
741
Marek Olšák854593b2017-10-08 20:05:44 +0200742LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
743 LLVMValueRef index)
Dave Airlie13a28ff2017-02-03 10:05:00 +1000744{
Marek Olšák854593b2017-10-08 20:05:44 +0200745 return ac_build_load_custom(ctx, base_ptr, index, false, false);
746}
747
748LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
749 LLVMValueRef base_ptr, LLVMValueRef index)
750{
751 return ac_build_load_custom(ctx, base_ptr, index, false, true);
752}
753
754LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
755 LLVMValueRef base_ptr, LLVMValueRef index)
756{
757 return ac_build_load_custom(ctx, base_ptr, index, true, true);
Dave Airlie13a28ff2017-02-03 10:05:00 +1000758}
759
760/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
761 * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
762 * or v4i32 (num_channels=3,4).
763 */
764void
Marek Olšák27439df2017-02-24 01:20:35 +0100765ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
766 LLVMValueRef rsrc,
767 LLVMValueRef vdata,
768 unsigned num_channels,
Marek Olšák8cfdbba2017-02-24 20:23:23 +0100769 LLVMValueRef voffset,
Marek Olšák27439df2017-02-24 01:20:35 +0100770 LLVMValueRef soffset,
771 unsigned inst_offset,
Marek Olšák27439df2017-02-24 01:20:35 +0100772 bool glc,
Marek Olšák97e21cf2017-02-24 02:09:47 +0100773 bool slc,
774 bool writeonly_memory,
Marek Olšákbcd3e762017-09-30 15:36:18 +0200775 bool swizzle_enable_hint)
Dave Airlie13a28ff2017-02-03 10:05:00 +1000776{
Marek Olšákbcd3e762017-09-30 15:36:18 +0200777 /* SWIZZLE_ENABLE requires that soffset isn't folded into voffset
778 * (voffset is swizzled, but soffset isn't swizzled).
779 * llvm.amdgcn.buffer.store doesn't have a separate soffset parameter.
780 */
781 if (!swizzle_enable_hint) {
Marek Olšák97e21cf2017-02-24 02:09:47 +0100782 /* Split 3 channel stores, becase LLVM doesn't support 3-channel
783 * intrinsics. */
784 if (num_channels == 3) {
785 LLVMValueRef v[3], v01;
786
787 for (int i = 0; i < 3; i++) {
788 v[i] = LLVMBuildExtractElement(ctx->builder, vdata,
789 LLVMConstInt(ctx->i32, i, 0), "");
790 }
791 v01 = ac_build_gather_values(ctx, v, 2);
792
793 ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset,
794 soffset, inst_offset, glc, slc,
Marek Olšákbcd3e762017-09-30 15:36:18 +0200795 writeonly_memory, swizzle_enable_hint);
Marek Olšák97e21cf2017-02-24 02:09:47 +0100796 ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset,
797 soffset, inst_offset + 8,
798 glc, slc,
Marek Olšákbcd3e762017-09-30 15:36:18 +0200799 writeonly_memory, swizzle_enable_hint);
Marek Olšák97e21cf2017-02-24 02:09:47 +0100800 return;
801 }
802
803 unsigned func = CLAMP(num_channels, 1, 3) - 1;
804 static const char *types[] = {"f32", "v2f32", "v4f32"};
805 char name[256];
806 LLVMValueRef offset = soffset;
807
808 if (inst_offset)
809 offset = LLVMBuildAdd(ctx->builder, offset,
810 LLVMConstInt(ctx->i32, inst_offset, 0), "");
811 if (voffset)
812 offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
813
814 LLVMValueRef args[] = {
Connor Abbottb909d272017-07-18 17:35:35 -0700815 ac_to_float(ctx, vdata),
Marek Olšák97e21cf2017-02-24 02:09:47 +0100816 LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
817 LLVMConstInt(ctx->i32, 0, 0),
818 offset,
819 LLVMConstInt(ctx->i1, glc, 0),
820 LLVMConstInt(ctx->i1, slc, 0),
821 };
822
823 snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.%s",
824 types[func]);
825
Marek Olšák7f1446a2017-02-26 00:41:37 +0100826 ac_build_intrinsic(ctx, name, ctx->voidt,
827 args, ARRAY_SIZE(args),
828 writeonly_memory ?
829 AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY :
830 AC_FUNC_ATTR_WRITEONLY);
Marek Olšák97e21cf2017-02-24 02:09:47 +0100831 return;
832 }
833
Marek Olšák27439df2017-02-24 01:20:35 +0100834 static unsigned dfmt[] = {
835 V_008F0C_BUF_DATA_FORMAT_32,
836 V_008F0C_BUF_DATA_FORMAT_32_32,
837 V_008F0C_BUF_DATA_FORMAT_32_32_32,
838 V_008F0C_BUF_DATA_FORMAT_32_32_32_32
839 };
840 assert(num_channels >= 1 && num_channels <= 4);
841
Dave Airlie13a28ff2017-02-03 10:05:00 +1000842 LLVMValueRef args[] = {
843 rsrc,
844 vdata,
845 LLVMConstInt(ctx->i32, num_channels, 0),
Marek Olšák8cfdbba2017-02-24 20:23:23 +0100846 voffset ? voffset : LLVMGetUndef(ctx->i32),
Dave Airlie13a28ff2017-02-03 10:05:00 +1000847 soffset,
848 LLVMConstInt(ctx->i32, inst_offset, 0),
Marek Olšák27439df2017-02-24 01:20:35 +0100849 LLVMConstInt(ctx->i32, dfmt[num_channels - 1], 0),
850 LLVMConstInt(ctx->i32, V_008F0C_BUF_NUM_FORMAT_UINT, 0),
Marek Olšák8cfdbba2017-02-24 20:23:23 +0100851 LLVMConstInt(ctx->i32, voffset != NULL, 0),
Marek Olšák27439df2017-02-24 01:20:35 +0100852 LLVMConstInt(ctx->i32, 0, 0), /* idxen */
Dave Airlie13a28ff2017-02-03 10:05:00 +1000853 LLVMConstInt(ctx->i32, glc, 0),
854 LLVMConstInt(ctx->i32, slc, 0),
Marek Olšák27439df2017-02-24 01:20:35 +0100855 LLVMConstInt(ctx->i32, 0, 0), /* tfe*/
Dave Airlie13a28ff2017-02-03 10:05:00 +1000856 };
857
858 /* The instruction offset field has 12 bits */
Marek Olšák8cfdbba2017-02-24 20:23:23 +0100859 assert(voffset || inst_offset < (1 << 12));
Dave Airlie13a28ff2017-02-03 10:05:00 +1000860
861 /* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */
862 unsigned func = CLAMP(num_channels, 1, 3) - 1;
863 const char *types[] = {"i32", "v2i32", "v4i32"};
864 char name[256];
865 snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]);
866
Marek Olšák7f1446a2017-02-26 00:41:37 +0100867 ac_build_intrinsic(ctx, name, ctx->voidt,
868 args, ARRAY_SIZE(args),
869 AC_FUNC_ATTR_LEGACY);
Dave Airlie13a28ff2017-02-03 10:05:00 +1000870}
871
Dave Airlie13a28ff2017-02-03 10:05:00 +1000872LLVMValueRef
873ac_build_buffer_load(struct ac_llvm_context *ctx,
874 LLVMValueRef rsrc,
875 int num_channels,
876 LLVMValueRef vindex,
877 LLVMValueRef voffset,
878 LLVMValueRef soffset,
879 unsigned inst_offset,
880 unsigned glc,
Marek Olšáke729dc72017-02-24 17:16:28 +0100881 unsigned slc,
Marek Olšáke019ea82017-05-19 15:02:34 +0200882 bool can_speculate,
883 bool allow_smem)
Dave Airlie13a28ff2017-02-03 10:05:00 +1000884{
Marek Olšáke019ea82017-05-19 15:02:34 +0200885 LLVMValueRef offset = LLVMConstInt(ctx->i32, inst_offset, 0);
886 if (voffset)
887 offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
888 if (soffset)
889 offset = LLVMBuildAdd(ctx->builder, offset, soffset, "");
890
891 /* TODO: VI and later generations can use SMEM with GLC=1.*/
892 if (allow_smem && !glc && !slc) {
893 assert(vindex == NULL);
894
895 LLVMValueRef result[4];
896
897 for (int i = 0; i < num_channels; i++) {
898 if (i) {
899 offset = LLVMBuildAdd(ctx->builder, offset,
900 LLVMConstInt(ctx->i32, 4, 0), "");
901 }
902 LLVMValueRef args[2] = {rsrc, offset};
903 result[i] = ac_build_intrinsic(ctx, "llvm.SI.load.const.v4i32",
904 ctx->f32, args, 2,
905 AC_FUNC_ATTR_READNONE |
906 AC_FUNC_ATTR_LEGACY);
907 }
908 if (num_channels == 1)
909 return result[0];
910
911 if (num_channels == 3)
912 result[num_channels++] = LLVMGetUndef(ctx->f32);
913 return ac_build_gather_values(ctx, result, num_channels);
914 }
915
Dave Airlie13a28ff2017-02-03 10:05:00 +1000916 unsigned func = CLAMP(num_channels, 1, 3) - 1;
917
Marek Olšák12beef02017-04-25 02:18:10 +0200918 LLVMValueRef args[] = {
919 LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
920 vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
Marek Olšáke019ea82017-05-19 15:02:34 +0200921 offset,
Marek Olšák12beef02017-04-25 02:18:10 +0200922 LLVMConstInt(ctx->i1, glc, 0),
923 LLVMConstInt(ctx->i1, slc, 0)
924 };
Dave Airlie13a28ff2017-02-03 10:05:00 +1000925
Marek Olšák12beef02017-04-25 02:18:10 +0200926 LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2),
927 ctx->v4f32};
928 const char *type_names[] = {"f32", "v2f32", "v4f32"};
929 char name[256];
Dave Airlie13a28ff2017-02-03 10:05:00 +1000930
Marek Olšák12beef02017-04-25 02:18:10 +0200931 snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
932 type_names[func]);
933
934 return ac_build_intrinsic(ctx, name, types[func], args,
935 ARRAY_SIZE(args),
936 /* READNONE means writes can't affect it, while
937 * READONLY means that writes can affect it. */
Marek Olšáke1942c92017-05-25 16:13:54 +0200938 can_speculate && HAVE_LLVM >= 0x0400 ?
Marek Olšák12beef02017-04-25 02:18:10 +0200939 AC_FUNC_ATTR_READNONE :
940 AC_FUNC_ATTR_READONLY);
Dave Airlie13a28ff2017-02-03 10:05:00 +1000941}
942
Marek Olšák94811dc2017-02-25 23:40:52 +0100943LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
944 LLVMValueRef rsrc,
945 LLVMValueRef vindex,
946 LLVMValueRef voffset,
Marek Olšáke1942c92017-05-25 16:13:54 +0200947 bool can_speculate)
Marek Olšák94811dc2017-02-25 23:40:52 +0100948{
Marek Olšák12beef02017-04-25 02:18:10 +0200949 LLVMValueRef args [] = {
950 LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
Marek Olšák94811dc2017-02-25 23:40:52 +0100951 vindex,
Marek Olšák12beef02017-04-25 02:18:10 +0200952 voffset,
Dave Airliea76b6c22017-10-26 15:20:15 +1000953 ctx->i1false, /* glc */
954 ctx->i1false, /* slc */
Marek Olšák94811dc2017-02-25 23:40:52 +0100955 };
Marek Olšák12beef02017-04-25 02:18:10 +0200956
957 return ac_build_intrinsic(ctx,
958 "llvm.amdgcn.buffer.load.format.v4f32",
959 ctx->v4f32, args, ARRAY_SIZE(args),
960 /* READNONE means writes can't affect it, while
961 * READONLY means that writes can affect it. */
Marek Olšáke1942c92017-05-25 16:13:54 +0200962 can_speculate && HAVE_LLVM >= 0x0400 ?
Marek Olšák12beef02017-04-25 02:18:10 +0200963 AC_FUNC_ATTR_READNONE :
964 AC_FUNC_ATTR_READONLY);
Marek Olšák94811dc2017-02-25 23:40:52 +0100965}
966
Dave Airlie13a28ff2017-02-03 10:05:00 +1000967/**
968 * Set range metadata on an instruction. This can only be used on load and
969 * call instructions. If you know an instruction can only produce the values
970 * 0, 1, 2, you would do set_range_metadata(value, 0, 3);
971 * \p lo is the minimum value inclusive.
972 * \p hi is the maximum value exclusive.
973 */
974static void set_range_metadata(struct ac_llvm_context *ctx,
975 LLVMValueRef value, unsigned lo, unsigned hi)
976{
977 LLVMValueRef range_md, md_args[2];
978 LLVMTypeRef type = LLVMTypeOf(value);
979 LLVMContextRef context = LLVMGetTypeContext(type);
980
981 md_args[0] = LLVMConstInt(type, lo, false);
982 md_args[1] = LLVMConstInt(type, hi, false);
983 range_md = LLVMMDNodeInContext(context, md_args, 2);
984 LLVMSetMetadata(value, ctx->range_md_kind, range_md);
985}
986
987LLVMValueRef
988ac_get_thread_id(struct ac_llvm_context *ctx)
989{
990 LLVMValueRef tid;
991
Marek Olšák7e1faa72017-03-05 00:15:31 +0100992 LLVMValueRef tid_args[2];
993 tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false);
994 tid_args[1] = LLVMConstInt(ctx->i32, 0, false);
995 tid_args[1] = ac_build_intrinsic(ctx,
996 "llvm.amdgcn.mbcnt.lo", ctx->i32,
997 tid_args, 2, AC_FUNC_ATTR_READNONE);
Dave Airlie13a28ff2017-02-03 10:05:00 +1000998
Marek Olšák7e1faa72017-03-05 00:15:31 +0100999 tid = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi",
1000 ctx->i32, tid_args,
1001 2, AC_FUNC_ATTR_READNONE);
Dave Airlie13a28ff2017-02-03 10:05:00 +10001002 set_range_metadata(ctx, tid, 0, 64);
1003 return tid;
1004}
1005
1006/*
1007 * SI implements derivatives using the local data store (LDS)
1008 * All writes to the LDS happen in all executing threads at
1009 * the same time. TID is the Thread ID for the current
1010 * thread and is a value between 0 and 63, representing
1011 * the thread's position in the wavefront.
1012 *
1013 * For the pixel shader threads are grouped into quads of four pixels.
1014 * The TIDs of the pixels of a quad are:
1015 *
1016 * +------+------+
1017 * |4n + 0|4n + 1|
1018 * +------+------+
1019 * |4n + 2|4n + 3|
1020 * +------+------+
1021 *
1022 * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
1023 * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
1024 * the current pixel's column, and masking with 0xfffffffe yields the TID
1025 * of the left pixel of the current pixel's row.
1026 *
1027 * Adding 1 yields the TID of the pixel to the right of the left pixel, and
1028 * adding 2 yields the TID of the pixel below the top pixel.
1029 */
1030LLVMValueRef
Marek Olšák7f1446a2017-02-26 00:41:37 +01001031ac_build_ddxy(struct ac_llvm_context *ctx,
Marek Olšák7f1446a2017-02-26 00:41:37 +01001032 uint32_t mask,
1033 int idx,
Marek Olšák7f1446a2017-02-26 00:41:37 +01001034 LLVMValueRef val)
Dave Airlie13a28ff2017-02-03 10:05:00 +10001035{
Dave Airliecb6f16d2017-08-01 05:10:49 +01001036 LLVMValueRef tl, trbl, args[2];
Dave Airlie13a28ff2017-02-03 10:05:00 +10001037 LLVMValueRef result;
1038
Nicolai Hähnle67724522017-09-13 14:38:17 +02001039 if (ctx->chip_class >= VI) {
Dave Airliecb6f16d2017-08-01 05:10:49 +01001040 LLVMValueRef thread_id, tl_tid, trbl_tid;
1041 thread_id = ac_get_thread_id(ctx);
1042
1043 tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
1044 LLVMConstInt(ctx->i32, mask, false), "");
1045
1046 trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
1047 LLVMConstInt(ctx->i32, idx, false), "");
1048
Dave Airlie13a28ff2017-02-03 10:05:00 +10001049 args[0] = LLVMBuildMul(ctx->builder, tl_tid,
1050 LLVMConstInt(ctx->i32, 4, false), "");
1051 args[1] = val;
Marek Olšák7f1446a2017-02-26 00:41:37 +01001052 tl = ac_build_intrinsic(ctx,
1053 "llvm.amdgcn.ds.bpermute", ctx->i32,
Marek Olšákd5d74fe2017-02-26 19:00:44 +01001054 args, 2,
1055 AC_FUNC_ATTR_READNONE |
1056 AC_FUNC_ATTR_CONVERGENT);
Dave Airlie13a28ff2017-02-03 10:05:00 +10001057
1058 args[0] = LLVMBuildMul(ctx->builder, trbl_tid,
1059 LLVMConstInt(ctx->i32, 4, false), "");
Marek Olšák7f1446a2017-02-26 00:41:37 +01001060 trbl = ac_build_intrinsic(ctx,
1061 "llvm.amdgcn.ds.bpermute", ctx->i32,
Marek Olšákd5d74fe2017-02-26 19:00:44 +01001062 args, 2,
1063 AC_FUNC_ATTR_READNONE |
1064 AC_FUNC_ATTR_CONVERGENT);
Dave Airlie13a28ff2017-02-03 10:05:00 +10001065 } else {
Marek Olšák94d800b2017-10-04 16:59:40 +02001066 uint32_t masks[2] = {};
Dave Airlie13a28ff2017-02-03 10:05:00 +10001067
Dave Airliecb6f16d2017-08-01 05:10:49 +01001068 switch (mask) {
1069 case AC_TID_MASK_TOP_LEFT:
1070 masks[0] = 0x8000;
1071 if (idx == 1)
1072 masks[1] = 0x8055;
1073 else
1074 masks[1] = 0x80aa;
Dave Airlie13a28ff2017-02-03 10:05:00 +10001075
Dave Airliecb6f16d2017-08-01 05:10:49 +01001076 break;
1077 case AC_TID_MASK_TOP:
1078 masks[0] = 0x8044;
1079 masks[1] = 0x80ee;
1080 break;
1081 case AC_TID_MASK_LEFT:
1082 masks[0] = 0x80a0;
1083 masks[1] = 0x80f5;
1084 break;
Marek Olšák94d800b2017-10-04 16:59:40 +02001085 default:
1086 assert(0);
Dave Airliecb6f16d2017-08-01 05:10:49 +01001087 }
1088
1089 args[0] = val;
1090 args[1] = LLVMConstInt(ctx->i32, masks[0], false);
1091
1092 tl = ac_build_intrinsic(ctx,
1093 "llvm.amdgcn.ds.swizzle", ctx->i32,
1094 args, 2,
1095 AC_FUNC_ATTR_READNONE |
1096 AC_FUNC_ATTR_CONVERGENT);
1097
1098 args[1] = LLVMConstInt(ctx->i32, masks[1], false);
1099 trbl = ac_build_intrinsic(ctx,
1100 "llvm.amdgcn.ds.swizzle", ctx->i32,
1101 args, 2,
1102 AC_FUNC_ATTR_READNONE |
1103 AC_FUNC_ATTR_CONVERGENT);
Dave Airlie13a28ff2017-02-03 10:05:00 +10001104 }
1105
1106 tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
1107 trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
1108 result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
1109 return result;
1110}
Dave Airlief32955b2017-02-13 22:08:30 +00001111
1112void
Marek Olšák7f1446a2017-02-26 00:41:37 +01001113ac_build_sendmsg(struct ac_llvm_context *ctx,
1114 uint32_t msg,
1115 LLVMValueRef wave_id)
Dave Airlief32955b2017-02-13 22:08:30 +00001116{
1117 LLVMValueRef args[2];
1118 const char *intr_name = (HAVE_LLVM < 0x0400) ? "llvm.SI.sendmsg" : "llvm.amdgcn.s.sendmsg";
1119 args[0] = LLVMConstInt(ctx->i32, msg, false);
1120 args[1] = wave_id;
Marek Olšák7f1446a2017-02-26 00:41:37 +01001121 ac_build_intrinsic(ctx, intr_name, ctx->voidt, args, 2, 0);
Dave Airlief32955b2017-02-13 22:08:30 +00001122}
Dave Airliecae1ff12017-02-16 03:42:56 +00001123
1124LLVMValueRef
Marek Olšák7f1446a2017-02-26 00:41:37 +01001125ac_build_imsb(struct ac_llvm_context *ctx,
1126 LLVMValueRef arg,
1127 LLVMTypeRef dst_type)
Dave Airliecae1ff12017-02-16 03:42:56 +00001128{
Marek Olšákedd23e02017-02-17 14:26:07 +01001129 const char *intr_name = (HAVE_LLVM < 0x0400) ? "llvm.AMDGPU.flbit.i32" :
1130 "llvm.amdgcn.sffbh.i32";
Marek Olšák7f1446a2017-02-26 00:41:37 +01001131 LLVMValueRef msb = ac_build_intrinsic(ctx, intr_name,
1132 dst_type, &arg, 1,
1133 AC_FUNC_ATTR_READNONE);
Dave Airliecae1ff12017-02-16 03:42:56 +00001134
1135 /* The HW returns the last bit index from MSB, but NIR/TGSI wants
1136 * the index from LSB. Invert it by doing "31 - msb". */
1137 msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
1138 msb, "");
1139
1140 LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true);
1141 LLVMValueRef cond = LLVMBuildOr(ctx->builder,
1142 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
1143 arg, LLVMConstInt(ctx->i32, 0, 0), ""),
1144 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
1145 arg, all_ones, ""), "");
1146
1147 return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, "");
1148}
Dave Airlie0ec66b92017-02-16 03:53:27 +00001149
1150LLVMValueRef
Marek Olšák7f1446a2017-02-26 00:41:37 +01001151ac_build_umsb(struct ac_llvm_context *ctx,
1152 LLVMValueRef arg,
1153 LLVMTypeRef dst_type)
Dave Airlie0ec66b92017-02-16 03:53:27 +00001154{
1155 LLVMValueRef args[2] = {
1156 arg,
Dave Airliea76b6c22017-10-26 15:20:15 +10001157 ctx->i1true,
Dave Airlie0ec66b92017-02-16 03:53:27 +00001158 };
Marek Olšák7f1446a2017-02-26 00:41:37 +01001159 LLVMValueRef msb = ac_build_intrinsic(ctx, "llvm.ctlz.i32",
1160 dst_type, args, ARRAY_SIZE(args),
1161 AC_FUNC_ATTR_READNONE);
Dave Airlie0ec66b92017-02-16 03:53:27 +00001162
1163 /* The HW returns the last bit index from MSB, but TGSI/NIR wants
1164 * the index from LSB. Invert it by doing "31 - msb". */
1165 msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
1166 msb, "");
1167
1168 /* check for zero */
1169 return LLVMBuildSelect(ctx->builder,
1170 LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg,
1171 LLVMConstInt(ctx->i32, 0, 0), ""),
1172 LLVMConstInt(ctx->i32, -1, true), msb, "");
1173}
Marek Olšák660b55e2017-02-16 22:41:16 +01001174
Nicolai Hähnlea69afb62017-06-25 17:56:37 +02001175LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a,
1176 LLVMValueRef b)
1177{
1178 LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntULE, a, b, "");
1179 return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
1180}
1181
Marek Olšák7f1446a2017-02-26 00:41:37 +01001182LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value)
Marek Olšák660b55e2017-02-16 22:41:16 +01001183{
Marek Olšák675ef9c2017-02-16 22:52:53 +01001184 if (HAVE_LLVM >= 0x0500) {
1185 LLVMValueRef max[2] = {
1186 value,
1187 LLVMConstReal(ctx->f32, 0),
1188 };
1189 LLVMValueRef min[2] = {
1190 LLVMConstReal(ctx->f32, 1),
1191 };
1192
Marek Olšák7f1446a2017-02-26 00:41:37 +01001193 min[1] = ac_build_intrinsic(ctx, "llvm.maxnum.f32",
1194 ctx->f32, max, 2,
1195 AC_FUNC_ATTR_READNONE);
1196 return ac_build_intrinsic(ctx, "llvm.minnum.f32",
1197 ctx->f32, min, 2,
1198 AC_FUNC_ATTR_READNONE);
Marek Olšák675ef9c2017-02-16 22:52:53 +01001199 }
1200
Marek Olšák660b55e2017-02-16 22:41:16 +01001201 LLVMValueRef args[3] = {
1202 value,
1203 LLVMConstReal(ctx->f32, 0),
1204 LLVMConstReal(ctx->f32, 1),
1205 };
1206
Marek Olšák7e1faa72017-03-05 00:15:31 +01001207 return ac_build_intrinsic(ctx, "llvm.AMDGPU.clamp.", ctx->f32, args, 3,
Marek Olšák7f1446a2017-02-26 00:41:37 +01001208 AC_FUNC_ATTR_READNONE |
1209 AC_FUNC_ATTR_LEGACY);
Marek Olšák660b55e2017-02-16 22:41:16 +01001210}
Marek Olšák369f4a82017-02-23 02:06:40 +01001211
Marek Olšák7f1446a2017-02-26 00:41:37 +01001212void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a)
Marek Olšák369f4a82017-02-23 02:06:40 +01001213{
1214 LLVMValueRef args[9];
1215
Marek Olšák2b3ebe32017-02-23 02:15:54 +01001216 if (HAVE_LLVM >= 0x0500) {
1217 args[0] = LLVMConstInt(ctx->i32, a->target, 0);
1218 args[1] = LLVMConstInt(ctx->i32, a->enabled_channels, 0);
1219
1220 if (a->compr) {
1221 LLVMTypeRef i16 = LLVMInt16TypeInContext(ctx->context);
1222 LLVMTypeRef v2i16 = LLVMVectorType(i16, 2);
1223
1224 args[2] = LLVMBuildBitCast(ctx->builder, a->out[0],
1225 v2i16, "");
1226 args[3] = LLVMBuildBitCast(ctx->builder, a->out[1],
1227 v2i16, "");
1228 args[4] = LLVMConstInt(ctx->i1, a->done, 0);
1229 args[5] = LLVMConstInt(ctx->i1, a->valid_mask, 0);
1230
Marek Olšák7f1446a2017-02-26 00:41:37 +01001231 ac_build_intrinsic(ctx, "llvm.amdgcn.exp.compr.v2i16",
1232 ctx->voidt, args, 6, 0);
Marek Olšák2b3ebe32017-02-23 02:15:54 +01001233 } else {
1234 args[2] = a->out[0];
1235 args[3] = a->out[1];
1236 args[4] = a->out[2];
1237 args[5] = a->out[3];
1238 args[6] = LLVMConstInt(ctx->i1, a->done, 0);
1239 args[7] = LLVMConstInt(ctx->i1, a->valid_mask, 0);
1240
Marek Olšák7f1446a2017-02-26 00:41:37 +01001241 ac_build_intrinsic(ctx, "llvm.amdgcn.exp.f32",
1242 ctx->voidt, args, 8, 0);
Marek Olšák2b3ebe32017-02-23 02:15:54 +01001243 }
1244 return;
1245 }
1246
Marek Olšák369f4a82017-02-23 02:06:40 +01001247 args[0] = LLVMConstInt(ctx->i32, a->enabled_channels, 0);
1248 args[1] = LLVMConstInt(ctx->i32, a->valid_mask, 0);
1249 args[2] = LLVMConstInt(ctx->i32, a->done, 0);
1250 args[3] = LLVMConstInt(ctx->i32, a->target, 0);
1251 args[4] = LLVMConstInt(ctx->i32, a->compr, 0);
1252 memcpy(args + 5, a->out, sizeof(a->out[0]) * 4);
1253
Marek Olšák7f1446a2017-02-26 00:41:37 +01001254 ac_build_intrinsic(ctx, "llvm.SI.export", ctx->voidt, args, 9,
1255 AC_FUNC_ATTR_LEGACY);
Marek Olšák369f4a82017-02-23 02:06:40 +01001256}
Marek Olšákad18d7f2017-02-23 23:00:19 +01001257
Marek Olšák7f1446a2017-02-26 00:41:37 +01001258LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
1259 struct ac_image_args *a)
Marek Olšákad18d7f2017-02-23 23:00:19 +01001260{
1261 LLVMTypeRef dst_type;
1262 LLVMValueRef args[11];
1263 unsigned num_args = 0;
Eric Anholt34c04c72017-10-17 13:45:09 -07001264 const char *name = NULL;
Marek Olšákad18d7f2017-02-23 23:00:19 +01001265 char intr_name[128], type[64];
1266
Marek Olšák4b2e5b92017-02-23 23:37:59 +01001267 if (HAVE_LLVM >= 0x0400) {
1268 bool sample = a->opcode == ac_image_sample ||
1269 a->opcode == ac_image_gather4 ||
1270 a->opcode == ac_image_get_lod;
1271
1272 if (sample)
Connor Abbottb909d272017-07-18 17:35:35 -07001273 args[num_args++] = ac_to_float(ctx, a->addr);
Marek Olšák4b2e5b92017-02-23 23:37:59 +01001274 else
1275 args[num_args++] = a->addr;
1276
1277 args[num_args++] = a->resource;
1278 if (sample)
1279 args[num_args++] = a->sampler;
1280 args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0);
1281 if (sample)
1282 args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, 0);
Dave Airliea76b6c22017-10-26 15:20:15 +10001283 args[num_args++] = ctx->i1false; /* glc */
1284 args[num_args++] = ctx->i1false; /* slc */
1285 args[num_args++] = ctx->i1false; /* lwe */
Marek Olšák4b2e5b92017-02-23 23:37:59 +01001286 args[num_args++] = LLVMConstInt(ctx->i1, a->da, 0);
1287
1288 switch (a->opcode) {
1289 case ac_image_sample:
1290 name = "llvm.amdgcn.image.sample";
1291 break;
1292 case ac_image_gather4:
1293 name = "llvm.amdgcn.image.gather4";
1294 break;
1295 case ac_image_load:
1296 name = "llvm.amdgcn.image.load";
1297 break;
1298 case ac_image_load_mip:
1299 name = "llvm.amdgcn.image.load.mip";
1300 break;
1301 case ac_image_get_lod:
1302 name = "llvm.amdgcn.image.getlod";
1303 break;
1304 case ac_image_get_resinfo:
1305 name = "llvm.amdgcn.image.getresinfo";
1306 break;
Samuel Pitoiseta1c37ff2017-04-07 18:44:16 +02001307 default:
1308 unreachable("invalid image opcode");
Marek Olšák4b2e5b92017-02-23 23:37:59 +01001309 }
1310
1311 ac_build_type_name_for_intr(LLVMTypeOf(args[0]), type,
1312 sizeof(type));
1313
1314 snprintf(intr_name, sizeof(intr_name), "%s%s%s%s.v4f32.%s.v8i32",
1315 name,
1316 a->compare ? ".c" : "",
1317 a->bias ? ".b" :
1318 a->lod ? ".l" :
1319 a->deriv ? ".d" :
1320 a->level_zero ? ".lz" : "",
1321 a->offset ? ".o" : "",
1322 type);
1323
1324 LLVMValueRef result =
Marek Olšák7f1446a2017-02-26 00:41:37 +01001325 ac_build_intrinsic(ctx, intr_name,
1326 ctx->v4f32, args, num_args,
1327 AC_FUNC_ATTR_READNONE);
Marek Olšák4b2e5b92017-02-23 23:37:59 +01001328 if (!sample) {
1329 result = LLVMBuildBitCast(ctx->builder, result,
1330 ctx->v4i32, "");
1331 }
1332 return result;
1333 }
1334
Marek Olšákad18d7f2017-02-23 23:00:19 +01001335 args[num_args++] = a->addr;
1336 args[num_args++] = a->resource;
1337
1338 if (a->opcode == ac_image_load ||
1339 a->opcode == ac_image_load_mip ||
1340 a->opcode == ac_image_get_resinfo) {
1341 dst_type = ctx->v4i32;
1342 } else {
1343 dst_type = ctx->v4f32;
1344 args[num_args++] = a->sampler;
1345 }
1346
1347 args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0);
1348 args[num_args++] = LLVMConstInt(ctx->i32, a->unorm, 0);
1349 args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */
1350 args[num_args++] = LLVMConstInt(ctx->i32, a->da, 0);
1351 args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */
1352 args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */
1353 args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */
1354 args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */
1355
1356 switch (a->opcode) {
1357 case ac_image_sample:
1358 name = "llvm.SI.image.sample";
1359 break;
1360 case ac_image_gather4:
1361 name = "llvm.SI.gather4";
1362 break;
1363 case ac_image_load:
1364 name = "llvm.SI.image.load";
1365 break;
1366 case ac_image_load_mip:
1367 name = "llvm.SI.image.load.mip";
1368 break;
1369 case ac_image_get_lod:
1370 name = "llvm.SI.getlod";
1371 break;
1372 case ac_image_get_resinfo:
1373 name = "llvm.SI.getresinfo";
1374 break;
1375 }
1376
1377 ac_build_type_name_for_intr(LLVMTypeOf(a->addr), type, sizeof(type));
1378 snprintf(intr_name, sizeof(intr_name), "%s%s%s%s.%s",
1379 name,
1380 a->compare ? ".c" : "",
1381 a->bias ? ".b" :
1382 a->lod ? ".l" :
1383 a->deriv ? ".d" :
1384 a->level_zero ? ".lz" : "",
1385 a->offset ? ".o" : "",
1386 type);
1387
Marek Olšák7f1446a2017-02-26 00:41:37 +01001388 return ac_build_intrinsic(ctx, intr_name,
1389 dst_type, args, num_args,
1390 AC_FUNC_ATTR_READNONE |
1391 AC_FUNC_ATTR_LEGACY);
Marek Olšákad18d7f2017-02-23 23:00:19 +01001392}
Marek Olšák653ac0b2017-02-24 16:38:25 +01001393
Marek Olšák7f1446a2017-02-26 00:41:37 +01001394LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
1395 LLVMValueRef args[2])
Marek Olšák653ac0b2017-02-24 16:38:25 +01001396{
1397 if (HAVE_LLVM >= 0x0500) {
1398 LLVMTypeRef v2f16 =
1399 LLVMVectorType(LLVMHalfTypeInContext(ctx->context), 2);
1400 LLVMValueRef res =
Marek Olšák7f1446a2017-02-26 00:41:37 +01001401 ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pkrtz",
1402 v2f16, args, 2,
1403 AC_FUNC_ATTR_READNONE);
Marek Olšák653ac0b2017-02-24 16:38:25 +01001404 return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
1405 }
1406
Marek Olšák7f1446a2017-02-26 00:41:37 +01001407 return ac_build_intrinsic(ctx, "llvm.SI.packf16", ctx->i32, args, 2,
1408 AC_FUNC_ATTR_READNONE |
1409 AC_FUNC_ATTR_LEGACY);
Marek Olšák653ac0b2017-02-24 16:38:25 +01001410}
Marek Olšák9c095922017-02-24 22:44:18 +01001411
Marek Olšák2a414c32017-10-04 05:07:50 +02001412LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1)
1413{
1414 assert(HAVE_LLVM >= 0x0600);
1415 return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1,
1416 &i1, 1, AC_FUNC_ATTR_READNONE);
1417}
1418
Marek Olšák1ff9e272017-10-04 04:51:39 +02001419void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1)
Marek Olšák9c095922017-02-24 22:44:18 +01001420{
Marek Olšák478afbe2017-10-04 05:06:57 +02001421 if (HAVE_LLVM >= 0x0600) {
1422 ac_build_intrinsic(ctx, "llvm.amdgcn.kill", ctx->voidt,
1423 &i1, 1, 0);
1424 return;
1425 }
1426
Marek Olšák1ff9e272017-10-04 04:51:39 +02001427 LLVMValueRef value = LLVMBuildSelect(ctx->builder, i1,
1428 LLVMConstReal(ctx->f32, 1),
1429 LLVMConstReal(ctx->f32, -1), "");
1430 ac_build_intrinsic(ctx, "llvm.AMDGPU.kill", ctx->voidt,
1431 &value, 1, AC_FUNC_ATTR_LEGACY);
Marek Olšák9c095922017-02-24 22:44:18 +01001432}
Marek Olšákd4324dd2017-02-24 23:06:31 +01001433
Marek Olšák7f1446a2017-02-26 00:41:37 +01001434LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
1435 LLVMValueRef offset, LLVMValueRef width,
1436 bool is_signed)
Marek Olšákd4324dd2017-02-24 23:06:31 +01001437{
1438 LLVMValueRef args[] = {
1439 input,
1440 offset,
1441 width,
1442 };
1443
1444 if (HAVE_LLVM >= 0x0500) {
Marek Olšák7f1446a2017-02-26 00:41:37 +01001445 return ac_build_intrinsic(ctx,
1446 is_signed ? "llvm.amdgcn.sbfe.i32" :
1447 "llvm.amdgcn.ubfe.i32",
1448 ctx->i32, args, 3,
1449 AC_FUNC_ATTR_READNONE);
Marek Olšákd4324dd2017-02-24 23:06:31 +01001450 }
1451
Marek Olšák7f1446a2017-02-26 00:41:37 +01001452 return ac_build_intrinsic(ctx,
1453 is_signed ? "llvm.AMDGPU.bfe.i32" :
1454 "llvm.AMDGPU.bfe.u32",
1455 ctx->i32, args, 3,
1456 AC_FUNC_ATTR_READNONE |
1457 AC_FUNC_ATTR_LEGACY);
Marek Olšákd4324dd2017-02-24 23:06:31 +01001458}
Dave Airlie10ae83a2017-03-06 08:37:22 +10001459
1460void ac_get_image_intr_name(const char *base_name,
1461 LLVMTypeRef data_type,
1462 LLVMTypeRef coords_type,
1463 LLVMTypeRef rsrc_type,
1464 char *out_name, unsigned out_len)
1465{
1466 char coords_type_name[8];
1467
1468 ac_build_type_name_for_intr(coords_type, coords_type_name,
1469 sizeof(coords_type_name));
1470
1471 if (HAVE_LLVM <= 0x0309) {
1472 snprintf(out_name, out_len, "%s.%s", base_name, coords_type_name);
1473 } else {
1474 char data_type_name[8];
1475 char rsrc_type_name[8];
1476
1477 ac_build_type_name_for_intr(data_type, data_type_name,
1478 sizeof(data_type_name));
1479 ac_build_type_name_for_intr(rsrc_type, rsrc_type_name,
1480 sizeof(rsrc_type_name));
1481 snprintf(out_name, out_len, "%s.%s.%s.%s", base_name,
1482 data_type_name, coords_type_name, rsrc_type_name);
1483 }
1484}
Dave Airliee2659172017-04-25 23:33:29 +01001485
1486#define AC_EXP_TARGET (HAVE_LLVM >= 0x0500 ? 0 : 3)
1487#define AC_EXP_OUT0 (HAVE_LLVM >= 0x0500 ? 2 : 5)
1488
Marek Olšákfaa37472017-04-29 23:47:08 +02001489enum ac_ir_type {
1490 AC_IR_UNDEF,
1491 AC_IR_CONST,
1492 AC_IR_VALUE,
1493};
1494
1495struct ac_vs_exp_chan
1496{
1497 LLVMValueRef value;
1498 float const_float;
1499 enum ac_ir_type type;
1500};
1501
1502struct ac_vs_exp_inst {
1503 unsigned offset;
1504 LLVMValueRef inst;
1505 struct ac_vs_exp_chan chan[4];
1506};
1507
1508struct ac_vs_exports {
1509 unsigned num;
1510 struct ac_vs_exp_inst exp[VARYING_SLOT_MAX];
1511};
1512
Dave Airliee2659172017-04-25 23:33:29 +01001513/* Return true if the PARAM export has been eliminated. */
1514static bool ac_eliminate_const_output(uint8_t *vs_output_param_offset,
1515 uint32_t num_outputs,
Marek Olšákfaa37472017-04-29 23:47:08 +02001516 struct ac_vs_exp_inst *exp)
Dave Airliee2659172017-04-25 23:33:29 +01001517{
1518 unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */
1519 bool is_zero[4] = {}, is_one[4] = {};
1520
1521 for (i = 0; i < 4; i++) {
Dave Airliee2659172017-04-25 23:33:29 +01001522 /* It's a constant expression. Undef outputs are eliminated too. */
Marek Olšákfaa37472017-04-29 23:47:08 +02001523 if (exp->chan[i].type == AC_IR_UNDEF) {
Dave Airliee2659172017-04-25 23:33:29 +01001524 is_zero[i] = true;
1525 is_one[i] = true;
Marek Olšákfaa37472017-04-29 23:47:08 +02001526 } else if (exp->chan[i].type == AC_IR_CONST) {
1527 if (exp->chan[i].const_float == 0)
Dave Airliee2659172017-04-25 23:33:29 +01001528 is_zero[i] = true;
Marek Olšákfaa37472017-04-29 23:47:08 +02001529 else if (exp->chan[i].const_float == 1)
Dave Airliee2659172017-04-25 23:33:29 +01001530 is_one[i] = true;
1531 else
1532 return false; /* other constant */
1533 } else
1534 return false;
1535 }
1536
1537 /* Only certain combinations of 0 and 1 can be eliminated. */
1538 if (is_zero[0] && is_zero[1] && is_zero[2])
1539 default_val = is_zero[3] ? 0 : 1;
1540 else if (is_one[0] && is_one[1] && is_one[2])
1541 default_val = is_zero[3] ? 2 : 3;
1542 else
1543 return false;
1544
1545 /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */
Marek Olšákfaa37472017-04-29 23:47:08 +02001546 LLVMInstructionEraseFromParent(exp->inst);
Dave Airliee2659172017-04-25 23:33:29 +01001547
1548 /* Change OFFSET to DEFAULT_VAL. */
1549 for (i = 0; i < num_outputs; i++) {
Marek Olšákfaa37472017-04-29 23:47:08 +02001550 if (vs_output_param_offset[i] == exp->offset) {
Dave Airliee2659172017-04-25 23:33:29 +01001551 vs_output_param_offset[i] =
1552 AC_EXP_PARAM_DEFAULT_VAL_0000 + default_val;
1553 break;
1554 }
1555 }
1556 return true;
1557}
1558
Marek Olšákb0871542017-04-29 23:56:03 +02001559static bool ac_eliminate_duplicated_output(uint8_t *vs_output_param_offset,
1560 uint32_t num_outputs,
1561 struct ac_vs_exports *processed,
1562 struct ac_vs_exp_inst *exp)
1563{
1564 unsigned p, copy_back_channels = 0;
1565
1566 /* See if the output is already in the list of processed outputs.
1567 * The LLVMValueRef comparison relies on SSA.
1568 */
1569 for (p = 0; p < processed->num; p++) {
1570 bool different = false;
1571
1572 for (unsigned j = 0; j < 4; j++) {
1573 struct ac_vs_exp_chan *c1 = &processed->exp[p].chan[j];
1574 struct ac_vs_exp_chan *c2 = &exp->chan[j];
1575
1576 /* Treat undef as a match. */
1577 if (c2->type == AC_IR_UNDEF)
1578 continue;
1579
1580 /* If c1 is undef but c2 isn't, we can copy c2 to c1
1581 * and consider the instruction duplicated.
1582 */
1583 if (c1->type == AC_IR_UNDEF) {
1584 copy_back_channels |= 1 << j;
1585 continue;
1586 }
1587
1588 /* Test whether the channels are not equal. */
1589 if (c1->type != c2->type ||
1590 (c1->type == AC_IR_CONST &&
1591 c1->const_float != c2->const_float) ||
1592 (c1->type == AC_IR_VALUE &&
1593 c1->value != c2->value)) {
1594 different = true;
1595 break;
1596 }
1597 }
1598 if (!different)
1599 break;
1600
1601 copy_back_channels = 0;
1602 }
1603 if (p == processed->num)
1604 return false;
1605
1606 /* If a match was found, but the matching export has undef where the new
1607 * one has a normal value, copy the normal value to the undef channel.
1608 */
1609 struct ac_vs_exp_inst *match = &processed->exp[p];
1610
1611 while (copy_back_channels) {
1612 unsigned chan = u_bit_scan(&copy_back_channels);
1613
1614 assert(match->chan[chan].type == AC_IR_UNDEF);
1615 LLVMSetOperand(match->inst, AC_EXP_OUT0 + chan,
1616 exp->chan[chan].value);
1617 match->chan[chan] = exp->chan[chan];
1618 }
1619
1620 /* The PARAM export is duplicated. Kill it. */
1621 LLVMInstructionEraseFromParent(exp->inst);
1622
1623 /* Change OFFSET to the matching export. */
1624 for (unsigned i = 0; i < num_outputs; i++) {
1625 if (vs_output_param_offset[i] == exp->offset) {
1626 vs_output_param_offset[i] = match->offset;
1627 break;
1628 }
1629 }
1630 return true;
1631}
1632
Marek Olšák7647e902017-04-29 23:53:08 +02001633void ac_optimize_vs_outputs(struct ac_llvm_context *ctx,
1634 LLVMValueRef main_fn,
1635 uint8_t *vs_output_param_offset,
1636 uint32_t num_outputs,
1637 uint8_t *num_param_exports)
Dave Airliee2659172017-04-25 23:33:29 +01001638{
1639 LLVMBasicBlockRef bb;
1640 bool removed_any = false;
1641 struct ac_vs_exports exports;
1642
Dave Airliee2659172017-04-25 23:33:29 +01001643 exports.num = 0;
1644
1645 /* Process all LLVM instructions. */
1646 bb = LLVMGetFirstBasicBlock(main_fn);
1647 while (bb) {
1648 LLVMValueRef inst = LLVMGetFirstInstruction(bb);
1649
1650 while (inst) {
1651 LLVMValueRef cur = inst;
1652 inst = LLVMGetNextInstruction(inst);
Marek Olšákfaa37472017-04-29 23:47:08 +02001653 struct ac_vs_exp_inst exp;
Dave Airliee2659172017-04-25 23:33:29 +01001654
1655 if (LLVMGetInstructionOpcode(cur) != LLVMCall)
1656 continue;
1657
1658 LLVMValueRef callee = ac_llvm_get_called_value(cur);
1659
1660 if (!ac_llvm_is_function(callee))
1661 continue;
1662
1663 const char *name = LLVMGetValueName(callee);
1664 unsigned num_args = LLVMCountParams(callee);
1665
1666 /* Check if this is an export instruction. */
1667 if ((num_args != 9 && num_args != 8) ||
1668 (strcmp(name, "llvm.SI.export") &&
1669 strcmp(name, "llvm.amdgcn.exp.f32")))
1670 continue;
1671
1672 LLVMValueRef arg = LLVMGetOperand(cur, AC_EXP_TARGET);
1673 unsigned target = LLVMConstIntGetZExtValue(arg);
1674
1675 if (target < V_008DFC_SQ_EXP_PARAM)
1676 continue;
1677
1678 target -= V_008DFC_SQ_EXP_PARAM;
1679
Marek Olšákfaa37472017-04-29 23:47:08 +02001680 /* Parse the instruction. */
1681 memset(&exp, 0, sizeof(exp));
1682 exp.offset = target;
1683 exp.inst = cur;
1684
1685 for (unsigned i = 0; i < 4; i++) {
1686 LLVMValueRef v = LLVMGetOperand(cur, AC_EXP_OUT0 + i);
1687
1688 exp.chan[i].value = v;
1689
1690 if (LLVMIsUndef(v)) {
1691 exp.chan[i].type = AC_IR_UNDEF;
1692 } else if (LLVMIsAConstantFP(v)) {
1693 LLVMBool loses_info;
1694 exp.chan[i].type = AC_IR_CONST;
1695 exp.chan[i].const_float =
1696 LLVMConstRealGetDouble(v, &loses_info);
1697 } else {
1698 exp.chan[i].type = AC_IR_VALUE;
1699 }
1700 }
1701
Marek Olšákb0871542017-04-29 23:56:03 +02001702 /* Eliminate constant and duplicated PARAM exports. */
Dave Airliee2659172017-04-25 23:33:29 +01001703 if (ac_eliminate_const_output(vs_output_param_offset,
Marek Olšákb0871542017-04-29 23:56:03 +02001704 num_outputs, &exp) ||
1705 ac_eliminate_duplicated_output(vs_output_param_offset,
1706 num_outputs, &exports,
1707 &exp)) {
Dave Airliee2659172017-04-25 23:33:29 +01001708 removed_any = true;
1709 } else {
Marek Olšákfaa37472017-04-29 23:47:08 +02001710 exports.exp[exports.num++] = exp;
Dave Airliee2659172017-04-25 23:33:29 +01001711 }
1712 }
1713 bb = LLVMGetNextBasicBlock(bb);
1714 }
1715
1716 /* Remove holes in export memory due to removed PARAM exports.
1717 * This is done by renumbering all PARAM exports.
1718 */
1719 if (removed_any) {
Marek Olšák34bc4702017-05-08 16:37:26 +02001720 uint8_t old_offset[VARYING_SLOT_MAX];
Dave Airliee2659172017-04-25 23:33:29 +01001721 unsigned out, i;
1722
1723 /* Make a copy of the offsets. We need the old version while
1724 * we are modifying some of them. */
Marek Olšák34bc4702017-05-08 16:37:26 +02001725 memcpy(old_offset, vs_output_param_offset,
1726 sizeof(old_offset));
Dave Airliee2659172017-04-25 23:33:29 +01001727
1728 for (i = 0; i < exports.num; i++) {
Marek Olšákfaa37472017-04-29 23:47:08 +02001729 unsigned offset = exports.exp[i].offset;
Dave Airliee2659172017-04-25 23:33:29 +01001730
Marek Olšák34bc4702017-05-08 16:37:26 +02001731 /* Update vs_output_param_offset. Multiple outputs can
1732 * have the same offset.
1733 */
Dave Airliee2659172017-04-25 23:33:29 +01001734 for (out = 0; out < num_outputs; out++) {
Marek Olšák34bc4702017-05-08 16:37:26 +02001735 if (old_offset[out] == offset)
1736 vs_output_param_offset[out] = i;
Dave Airliee2659172017-04-25 23:33:29 +01001737 }
Marek Olšák34bc4702017-05-08 16:37:26 +02001738
1739 /* Change the PARAM offset in the instruction. */
1740 LLVMSetOperand(exports.exp[i].inst, AC_EXP_TARGET,
1741 LLVMConstInt(ctx->i32,
1742 V_008DFC_SQ_EXP_PARAM + i, 0));
Dave Airliee2659172017-04-25 23:33:29 +01001743 }
Marek Olšák34bc4702017-05-08 16:37:26 +02001744 *num_param_exports = exports.num;
Dave Airliee2659172017-04-25 23:33:29 +01001745 }
1746}
Dave Airlie1dda2142017-10-19 05:29:02 +01001747
1748void ac_init_exec_full_mask(struct ac_llvm_context *ctx)
1749{
1750 LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0);
1751 ac_build_intrinsic(ctx,
1752 "llvm.amdgcn.init.exec", ctx->voidt,
1753 &full_mask, 1, AC_FUNC_ATTR_CONVERGENT);
1754}
Dave Airlief925f5b2017-10-26 14:43:51 +10001755
1756void ac_declare_lds_as_pointer(struct ac_llvm_context *ctx)
1757{
1758 unsigned lds_size = ctx->chip_class >= CIK ? 65536 : 32768;
1759 ctx->lds = LLVMBuildIntToPtr(ctx->builder, ctx->i32_0,
1760 LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_LOCAL_ADDR_SPACE),
1761 "lds");
1762}
1763
1764LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx,
1765 LLVMValueRef dw_addr)
1766{
1767 return ac_build_load(ctx, ctx->lds, dw_addr);
1768}
1769
1770void ac_lds_store(struct ac_llvm_context *ctx,
1771 LLVMValueRef dw_addr,
1772 LLVMValueRef value)
1773{
1774 value = ac_to_integer(ctx, value);
1775 ac_build_indexed_store(ctx, ctx->lds,
1776 dw_addr, value);
1777}
Dave Airlie82d47b92017-10-26 15:28:41 +10001778
1779LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
1780 LLVMTypeRef dst_type,
1781 LLVMValueRef src0)
1782{
1783 LLVMValueRef params[2] = {
1784 src0,
1785
1786 /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
1787 * add special code to check for x=0. The reason is that
1788 * the LLVM behavior for x=0 is different from what we
1789 * need here. However, LLVM also assumes that ffs(x) is
1790 * in [0, 31], but GLSL expects that ffs(0) = -1, so
1791 * a conditional assignment to handle 0 is still required.
1792 *
1793 * The hardware already implements the correct behavior.
1794 */
1795 LLVMConstInt(ctx->i1, 1, false),
1796 };
1797
1798 LLVMValueRef lsb = ac_build_intrinsic(ctx, "llvm.cttz.i32", ctx->i32,
1799 params, 2,
1800 AC_FUNC_ATTR_READNONE);
1801
1802 /* TODO: We need an intrinsic to skip this conditional. */
1803 /* Check for zero: */
1804 return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder,
1805 LLVMIntEQ, src0,
1806 ctx->i32_0, ""),
1807 LLVMConstInt(ctx->i32, -1, 0), lsb, "");
1808}