amd/common: lower bitfield_insert to bfm & bitfield_select
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index dffaeed..9b6e65d 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -455,34 +455,30 @@
return result;
}
-static LLVMValueRef emit_bitfield_insert(struct ac_llvm_context *ctx,
- LLVMValueRef src0, LLVMValueRef src1,
- LLVMValueRef src2, LLVMValueRef src3)
+static LLVMValueRef emit_bfm(struct ac_llvm_context *ctx,
+ LLVMValueRef bits, LLVMValueRef offset)
{
- LLVMValueRef bfi_args[3], result;
+ /* mask = ((1 << bits) - 1) << offset */
+ return LLVMBuildShl(ctx->builder,
+ LLVMBuildSub(ctx->builder,
+ LLVMBuildShl(ctx->builder,
+ ctx->i32_1,
+ bits, ""),
+ ctx->i32_1, ""),
+ offset, "");
+}
- bfi_args[0] = LLVMBuildShl(ctx->builder,
- LLVMBuildSub(ctx->builder,
- LLVMBuildShl(ctx->builder,
- ctx->i32_1,
- src3, ""),
- ctx->i32_1, ""),
- src2, "");
- bfi_args[1] = LLVMBuildShl(ctx->builder, src1, src2, "");
- bfi_args[2] = src0;
-
- LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, src3, LLVMConstInt(ctx->i32, 32, false), "");
-
+static LLVMValueRef emit_bitfield_select(struct ac_llvm_context *ctx,
+ LLVMValueRef mask, LLVMValueRef insert,
+ LLVMValueRef base)
+{
/* Calculate:
- * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
+ * (mask & insert) | (~mask & base) = base ^ (mask & (insert ^ base))
* Use the right-hand side, which the LLVM backend can convert to V_BFI.
*/
- result = LLVMBuildXor(ctx->builder, bfi_args[2],
- LLVMBuildAnd(ctx->builder, bfi_args[0],
- LLVMBuildXor(ctx->builder, bfi_args[1], bfi_args[2], ""), ""), "");
-
- result = LLVMBuildSelect(ctx->builder, icond, src1, result, "");
- return result;
+ return LLVMBuildXor(ctx->builder, base,
+ LLVMBuildAnd(ctx->builder, mask,
+ LLVMBuildXor(ctx->builder, insert, base, ""), ""), "");
}
static LLVMValueRef emit_pack_half_2x16(struct ac_llvm_context *ctx,
@@ -835,15 +831,18 @@
else
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2, AC_FUNC_ATTR_READNONE);
break;
+ case nir_op_bfm:
+ result = emit_bfm(&ctx->ac, src[0], src[1]);
+ break;
+ case nir_op_bitfield_select:
+ result = emit_bitfield_select(&ctx->ac, src[0], src[1], src[2]);
+ break;
case nir_op_ibitfield_extract:
result = emit_bitfield_extract(&ctx->ac, true, src);
break;
case nir_op_ubitfield_extract:
result = emit_bitfield_extract(&ctx->ac, false, src);
break;
- case nir_op_bitfield_insert:
- result = emit_bitfield_insert(&ctx->ac, src[0], src[1], src[2], src[3]);
- break;
case nir_op_bitfield_reverse:
result = ac_build_bitfield_reverse(&ctx->ac, src[0]);
break;
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index c3bbf3f..3452a0b 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -58,6 +58,7 @@
.lower_device_index_to_zero = true,
.lower_fsat = true,
.lower_fdiv = true,
+ .lower_bitfield_insert_to_bitfield_select = true,
.lower_sub = true,
.lower_pack_snorm_2x16 = true,
.lower_pack_snorm_4x8 = true,
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index 0c561e7c..b7c7f1a 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -487,6 +487,7 @@
.lower_flrp64 = true,
.lower_fsat = true,
.lower_fdiv = true,
+ .lower_bitfield_insert_to_bitfield_select = true,
.lower_sub = true,
.lower_ffma = true,
.lower_fmod = true,