blob: e49b70559825c095b56c393dbe970e4d1744ae31 [file] [log] [blame]
/**************************************************************************
*
* Copyright 2009 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/**
* @file
* Unit tests for blend LLVM IR generation
*
* @author Jose Fonseca <jfonseca@vmware.com>
*
* Blend computation code derived from code written by
* @author Brian Paul <brian@vmware.com>
*/
#include "gallivm/lp_bld_type.h"
#include "gallivm/lp_bld_blend.h"
#include "gallivm/lp_bld_debug.h"
#include "lp_test.h"
enum vector_mode
{
AoS = 0,
SoA = 1
};
typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res);
void
write_tsv_header(FILE *fp)
{
fprintf(fp,
"result\t"
"cycles_per_channel\t"
"mode\t"
"type\t"
"sep_func\t"
"sep_src_factor\t"
"sep_dst_factor\t"
"rgb_func\t"
"rgb_src_factor\t"
"rgb_dst_factor\t"
"alpha_func\t"
"alpha_src_factor\t"
"alpha_dst_factor\n");
fflush(fp);
}
static void
write_tsv_row(FILE *fp,
const struct pipe_blend_state *blend,
enum vector_mode mode,
struct lp_type type,
double cycles,
boolean success)
{
fprintf(fp, "%s\t", success ? "pass" : "fail");
if (mode == AoS) {
fprintf(fp, "%.1f\t", cycles / type.length);
fprintf(fp, "aos\t");
}
if (mode == SoA) {
fprintf(fp, "%.1f\t", cycles / (4 * type.length));
fprintf(fp, "soa\t");
}
fprintf(fp, "%s%u%sx%u\t",
type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
type.width,
type.norm ? "n" : "",
type.length);
fprintf(fp,
"%s\t%s\t%s\t",
blend->rt[0].rgb_func != blend->rt[0].alpha_func ? "true" : "false",
blend->rt[0].rgb_src_factor != blend->rt[0].alpha_src_factor ? "true" : "false",
blend->rt[0].rgb_dst_factor != blend->rt[0].alpha_dst_factor ? "true" : "false");
fprintf(fp,
"%s\t%s\t%s\t%s\t%s\t%s\n",
debug_dump_blend_func(blend->rt[0].rgb_func, TRUE),
debug_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
debug_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
debug_dump_blend_func(blend->rt[0].alpha_func, TRUE),
debug_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
debug_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
fflush(fp);
}
static void
dump_blend_type(FILE *fp,
const struct pipe_blend_state *blend,
enum vector_mode mode,
struct lp_type type)
{
fprintf(fp, "%s", mode ? "soa" : "aos");
fprintf(fp, " type=%s%u%sx%u",
type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
type.width,
type.norm ? "n" : "",
type.length);
fprintf(fp,
" %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s",
"rgb_func", debug_dump_blend_func(blend->rt[0].rgb_func, TRUE),
"rgb_src_factor", debug_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
"rgb_dst_factor", debug_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
"alpha_func", debug_dump_blend_func(blend->rt[0].alpha_func, TRUE),
"alpha_src_factor", debug_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
"alpha_dst_factor", debug_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
fprintf(fp, " ...\n");
fflush(fp);
}
static LLVMValueRef
add_blend_test(LLVMModuleRef module,
const struct pipe_blend_state *blend,
enum vector_mode mode,
struct lp_type type)
{
LLVMTypeRef ret_type;
LLVMTypeRef vec_type;
LLVMTypeRef args[4];
LLVMValueRef func;
LLVMValueRef src_ptr;
LLVMValueRef dst_ptr;
LLVMValueRef const_ptr;
LLVMValueRef res_ptr;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
ret_type = LLVMInt64Type();
vec_type = lp_build_vec_type(type);
args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0);
func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 4, 0));
LLVMSetFunctionCallConv(func, LLVMCCallConv);
src_ptr = LLVMGetParam(func, 0);
dst_ptr = LLVMGetParam(func, 1);
const_ptr = LLVMGetParam(func, 2);
res_ptr = LLVMGetParam(func, 3);
block = LLVMAppendBasicBlock(func, "entry");
builder = LLVMCreateBuilder();
LLVMPositionBuilderAtEnd(builder, block);
if (mode == AoS) {
LLVMValueRef src;
LLVMValueRef dst;
LLVMValueRef con;
LLVMValueRef res;
src = LLVMBuildLoad(builder, src_ptr, "src");
dst = LLVMBuildLoad(builder, dst_ptr, "dst");
con = LLVMBuildLoad(builder, const_ptr, "const");
res = lp_build_blend_aos(builder, blend, type, src, dst, con, 3);
lp_build_name(res, "res");
LLVMBuildStore(builder, res, res_ptr);
}
if (mode == SoA) {
LLVMValueRef src[4];
LLVMValueRef dst[4];
LLVMValueRef con[4];
LLVMValueRef res[4];
unsigned i;
for(i = 0; i < 4; ++i) {
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), "");
dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
lp_build_name(src[i], "src.%c", "rgba"[i]);
lp_build_name(con[i], "con.%c", "rgba"[i]);
lp_build_name(dst[i], "dst.%c", "rgba"[i]);
}
lp_build_blend_soa(builder, blend, type, src, dst, con, res);
for(i = 0; i < 4; ++i) {
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
lp_build_name(res[i], "res.%c", "rgba"[i]);
LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, ""));
}
}
LLVMBuildRetVoid(builder);;
LLVMDisposeBuilder(builder);
return func;
}
/** Add and limit result to ceiling of 1.0 */
#define ADD_SAT(R, A, B) \
do { \
R = (A) + (B); if (R > 1.0f) R = 1.0f; \
} while (0)
/** Subtract and limit result to floor of 0.0 */
#define SUB_SAT(R, A, B) \
do { \
R = (A) - (B); if (R < 0.0f) R = 0.0f; \
} while (0)
static void
compute_blend_ref_term(unsigned rgb_factor,
unsigned alpha_factor,
const double *factor,
const double *src,
const double *dst,
const double *con,
double *term)
{
double temp;
switch (rgb_factor) {
case PIPE_BLENDFACTOR_ONE:
term[0] = factor[0]; /* R */
term[1] = factor[1]; /* G */
term[2] = factor[2]; /* B */
break;
case PIPE_BLENDFACTOR_SRC_COLOR:
term[0] = factor[0] * src[0]; /* R */
term[1] = factor[1] * src[1]; /* G */
term[2] = factor[2] * src[2]; /* B */
break;
case PIPE_BLENDFACTOR_SRC_ALPHA:
term[0] = factor[0] * src[3]; /* R */
term[1] = factor[1] * src[3]; /* G */
term[2] = factor[2] * src[3]; /* B */
break;
case PIPE_BLENDFACTOR_DST_COLOR:
term[0] = factor[0] * dst[0]; /* R */
term[1] = factor[1] * dst[1]; /* G */
term[2] = factor[2] * dst[2]; /* B */
break;
case PIPE_BLENDFACTOR_DST_ALPHA:
term[0] = factor[0] * dst[3]; /* R */
term[1] = factor[1] * dst[3]; /* G */
term[2] = factor[2] * dst[3]; /* B */
break;
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
temp = MIN2(src[3], 1.0f - dst[3]);
term[0] = factor[0] * temp; /* R */
term[1] = factor[1] * temp; /* G */
term[2] = factor[2] * temp; /* B */
break;
case PIPE_BLENDFACTOR_CONST_COLOR:
term[0] = factor[0] * con[0]; /* R */
term[1] = factor[1] * con[1]; /* G */
term[2] = factor[2] * con[2]; /* B */
break;
case PIPE_BLENDFACTOR_CONST_ALPHA:
term[0] = factor[0] * con[3]; /* R */
term[1] = factor[1] * con[3]; /* G */
term[2] = factor[2] * con[3]; /* B */
break;
case PIPE_BLENDFACTOR_SRC1_COLOR:
assert(0); /* to do */
break;
case PIPE_BLENDFACTOR_SRC1_ALPHA:
assert(0); /* to do */
break;
case PIPE_BLENDFACTOR_ZERO:
term[0] = 0.0f; /* R */
term[1] = 0.0f; /* G */
term[2] = 0.0f; /* B */
break;
case PIPE_BLENDFACTOR_INV_SRC_COLOR:
term[0] = factor[0] * (1.0f - src[0]); /* R */
term[1] = factor[1] * (1.0f - src[1]); /* G */
term[2] = factor[2] * (1.0f - src[2]); /* B */
break;
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
term[0] = factor[0] * (1.0f - src[3]); /* R */
term[1] = factor[1] * (1.0f - src[3]); /* G */
term[2] = factor[2] * (1.0f - src[3]); /* B */
break;
case PIPE_BLENDFACTOR_INV_DST_ALPHA:
term[0] = factor[0] * (1.0f - dst[3]); /* R */
term[1] = factor[1] * (1.0f - dst[3]); /* G */
term[2] = factor[2] * (1.0f - dst[3]); /* B */
break;
case PIPE_BLENDFACTOR_INV_DST_COLOR:
term[0] = factor[0] * (1.0f - dst[0]); /* R */
term[1] = factor[1] * (1.0f - dst[1]); /* G */
term[2] = factor[2] * (1.0f - dst[2]); /* B */
break;
case PIPE_BLENDFACTOR_INV_CONST_COLOR:
term[0] = factor[0] * (1.0f - con[0]); /* R */
term[1] = factor[1] * (1.0f - con[1]); /* G */
term[2] = factor[2] * (1.0f - con[2]); /* B */
break;
case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
term[0] = factor[0] * (1.0f - con[3]); /* R */
term[1] = factor[1] * (1.0f - con[3]); /* G */
term[2] = factor[2] * (1.0f - con[3]); /* B */
break;
case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
assert(0); /* to do */
break;
case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
assert(0); /* to do */
break;
default:
assert(0);
}
/*
* Compute src/first term A
*/
switch (alpha_factor) {
case PIPE_BLENDFACTOR_ONE:
term[3] = factor[3]; /* A */
break;
case PIPE_BLENDFACTOR_SRC_COLOR:
case PIPE_BLENDFACTOR_SRC_ALPHA:
term[3] = factor[3] * src[3]; /* A */
break;
case PIPE_BLENDFACTOR_DST_COLOR:
case PIPE_BLENDFACTOR_DST_ALPHA:
term[3] = factor[3] * dst[3]; /* A */
break;
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
term[3] = src[3]; /* A */
break;
case PIPE_BLENDFACTOR_CONST_COLOR:
case PIPE_BLENDFACTOR_CONST_ALPHA:
term[3] = factor[3] * con[3]; /* A */
break;
case PIPE_BLENDFACTOR_ZERO:
term[3] = 0.0f; /* A */
break;
case PIPE_BLENDFACTOR_INV_SRC_COLOR:
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
term[3] = factor[3] * (1.0f - src[3]); /* A */
break;
case PIPE_BLENDFACTOR_INV_DST_COLOR:
case PIPE_BLENDFACTOR_INV_DST_ALPHA:
term[3] = factor[3] * (1.0f - dst[3]); /* A */
break;
case PIPE_BLENDFACTOR_INV_CONST_COLOR:
case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
term[3] = factor[3] * (1.0f - con[3]);
break;
default:
assert(0);
}
}
static void
compute_blend_ref(const struct pipe_blend_state *blend,
const double *src,
const double *dst,
const double *con,
double *res)
{
double src_term[4];
double dst_term[4];
compute_blend_ref_term(blend->rt[0].rgb_src_factor, blend->rt[0].alpha_src_factor,
src, src, dst, con, src_term);
compute_blend_ref_term(blend->rt[0].rgb_dst_factor, blend->rt[0].alpha_dst_factor,
dst, src, dst, con, dst_term);
/*
* Combine RGB terms
*/
switch (blend->rt[0].rgb_func) {
case PIPE_BLEND_ADD:
ADD_SAT(res[0], src_term[0], dst_term[0]); /* R */
ADD_SAT(res[1], src_term[1], dst_term[1]); /* G */
ADD_SAT(res[2], src_term[2], dst_term[2]); /* B */
break;
case PIPE_BLEND_SUBTRACT:
SUB_SAT(res[0], src_term[0], dst_term[0]); /* R */
SUB_SAT(res[1], src_term[1], dst_term[1]); /* G */
SUB_SAT(res[2], src_term[2], dst_term[2]); /* B */
break;
case PIPE_BLEND_REVERSE_SUBTRACT:
SUB_SAT(res[0], dst_term[0], src_term[0]); /* R */
SUB_SAT(res[1], dst_term[1], src_term[1]); /* G */
SUB_SAT(res[2], dst_term[2], src_term[2]); /* B */
break;
case PIPE_BLEND_MIN:
res[0] = MIN2(src_term[0], dst_term[0]); /* R */
res[1] = MIN2(src_term[1], dst_term[1]); /* G */
res[2] = MIN2(src_term[2], dst_term[2]); /* B */
break;
case PIPE_BLEND_MAX:
res[0] = MAX2(src_term[0], dst_term[0]); /* R */
res[1] = MAX2(src_term[1], dst_term[1]); /* G */
res[2] = MAX2(src_term[2], dst_term[2]); /* B */
break;
default:
assert(0);
}
/*
* Combine A terms
*/
switch (blend->rt[0].alpha_func) {
case PIPE_BLEND_ADD:
ADD_SAT(res[3], src_term[3], dst_term[3]); /* A */
break;
case PIPE_BLEND_SUBTRACT:
SUB_SAT(res[3], src_term[3], dst_term[3]); /* A */
break;
case PIPE_BLEND_REVERSE_SUBTRACT:
SUB_SAT(res[3], dst_term[3], src_term[3]); /* A */
break;
case PIPE_BLEND_MIN:
res[3] = MIN2(src_term[3], dst_term[3]); /* A */
break;
case PIPE_BLEND_MAX:
res[3] = MAX2(src_term[3], dst_term[3]); /* A */
break;
default:
assert(0);
}
}
PIPE_ALIGN_STACK
static boolean
test_one(unsigned verbose,
FILE *fp,
const struct pipe_blend_state *blend,
enum vector_mode mode,
struct lp_type type)
{
LLVMModuleRef module = NULL;
LLVMValueRef func = NULL;
LLVMExecutionEngineRef engine = NULL;
LLVMModuleProviderRef provider = NULL;
LLVMPassManagerRef pass = NULL;
char *error = NULL;
blend_test_ptr_t blend_test_ptr;
boolean success;
const unsigned n = LP_TEST_NUM_SAMPLES;
int64_t cycles[LP_TEST_NUM_SAMPLES];
double cycles_avg = 0.0;
unsigned i, j;
if(verbose >= 1)
dump_blend_type(stdout, blend, mode, type);
module = LLVMModuleCreateWithName("test");
func = add_blend_test(module, blend, mode, type);
if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
LLVMDumpModule(module);
abort();
}
LLVMDisposeMessage(error);
provider = LLVMCreateModuleProviderForExistingModule(module);
if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
if(verbose < 1)
dump_blend_type(stderr, blend, mode, type);
fprintf(stderr, "%s\n", error);
LLVMDisposeMessage(error);
abort();
}
#if 0
pass = LLVMCreatePassManager();
LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
/* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
* but there are more on SVN. */
LLVMAddConstantPropagationPass(pass);
LLVMAddInstructionCombiningPass(pass);
LLVMAddPromoteMemoryToRegisterPass(pass);
LLVMAddGVNPass(pass);
LLVMAddCFGSimplificationPass(pass);
LLVMRunPassManager(pass, module);
#else
(void)pass;
#endif
if(verbose >= 2)
LLVMDumpModule(module);
blend_test_ptr = (blend_test_ptr_t)LLVMGetPointerToGlobal(engine, func);
if(verbose >= 2)
lp_disassemble(blend_test_ptr);
success = TRUE;
for(i = 0; i < n && success; ++i) {
if(mode == AoS) {
PIPE_ALIGN_VAR(16) uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
PIPE_ALIGN_VAR(16) uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
PIPE_ALIGN_VAR(16) uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
PIPE_ALIGN_VAR(16) uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
PIPE_ALIGN_VAR(16) uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
int64_t start_counter = 0;
int64_t end_counter = 0;
random_vec(type, src);
random_vec(type, dst);
random_vec(type, con);
{
double fsrc[LP_MAX_VECTOR_LENGTH];
double fdst[LP_MAX_VECTOR_LENGTH];
double fcon[LP_MAX_VECTOR_LENGTH];
double fref[LP_MAX_VECTOR_LENGTH];
read_vec(type, src, fsrc);
read_vec(type, dst, fdst);
read_vec(type, con, fcon);
for(j = 0; j < type.length; j += 4)
compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);
write_vec(type, ref, fref);
}
start_counter = rdtsc();
blend_test_ptr(src, dst, con, res);
end_counter = rdtsc();
cycles[i] = end_counter - start_counter;
if(!compare_vec(type, res, ref)) {
success = FALSE;
if(verbose < 1)
dump_blend_type(stderr, blend, mode, type);
fprintf(stderr, "MISMATCH\n");
fprintf(stderr, " Src: ");
dump_vec(stderr, type, src);
fprintf(stderr, "\n");
fprintf(stderr, " Dst: ");
dump_vec(stderr, type, dst);
fprintf(stderr, "\n");
fprintf(stderr, " Con: ");
dump_vec(stderr, type, con);
fprintf(stderr, "\n");
fprintf(stderr, " Res: ");
dump_vec(stderr, type, res);
fprintf(stderr, "\n");
fprintf(stderr, " Ref: ");
dump_vec(stderr, type, ref);
fprintf(stderr, "\n");
}
}
if(mode == SoA) {
const unsigned stride = type.length*type.width/8;
PIPE_ALIGN_VAR(16) uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
PIPE_ALIGN_VAR(16) uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
PIPE_ALIGN_VAR(16) uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
PIPE_ALIGN_VAR(16) uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
PIPE_ALIGN_VAR(16) uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
int64_t start_counter = 0;
int64_t end_counter = 0;
boolean mismatch;
for(j = 0; j < 4; ++j) {
random_vec(type, src + j*stride);
random_vec(type, dst + j*stride);
random_vec(type, con + j*stride);
}
{
double fsrc[4];
double fdst[4];
double fcon[4];
double fref[4];
unsigned k;
for(k = 0; k < type.length; ++k) {
for(j = 0; j < 4; ++j) {
fsrc[j] = read_elem(type, src + j*stride, k);
fdst[j] = read_elem(type, dst + j*stride, k);
fcon[j] = read_elem(type, con + j*stride, k);
}
compute_blend_ref(blend, fsrc, fdst, fcon, fref);
for(j = 0; j < 4; ++j)
write_elem(type, ref + j*stride, k, fref[j]);
}
}
start_counter = rdtsc();
blend_test_ptr(src, dst, con, res);
end_counter = rdtsc();
cycles[i] = end_counter - start_counter;
mismatch = FALSE;
for (j = 0; j < 4; ++j)
if(!compare_vec(type, res + j*stride, ref + j*stride))
mismatch = TRUE;
if (mismatch) {
success = FALSE;
if(verbose < 1)
dump_blend_type(stderr, blend, mode, type);
fprintf(stderr, "MISMATCH\n");
for(j = 0; j < 4; ++j) {
char channel = "RGBA"[j];
fprintf(stderr, " Src%c: ", channel);
dump_vec(stderr, type, src + j*stride);
fprintf(stderr, "\n");
fprintf(stderr, " Dst%c: ", channel);
dump_vec(stderr, type, dst + j*stride);
fprintf(stderr, "\n");
fprintf(stderr, " Con%c: ", channel);
dump_vec(stderr, type, con + j*stride);
fprintf(stderr, "\n");
fprintf(stderr, " Res%c: ", channel);
dump_vec(stderr, type, res + j*stride);
fprintf(stderr, "\n");
fprintf(stderr, " Ref%c: ", channel);
dump_vec(stderr, type, ref + j*stride);
fprintf(stderr, "\n");
}
}
}
}
/*
* Unfortunately the output of cycle counter is not very reliable as it comes
* -- sometimes we get outliers (due IRQs perhaps?) which are
* better removed to avoid random or biased data.
*/
{
double sum = 0.0, sum2 = 0.0;
double avg, std;
unsigned m;
for(i = 0; i < n; ++i) {
sum += cycles[i];
sum2 += cycles[i]*cycles[i];
}
avg = sum/n;
std = sqrtf((sum2 - n*avg*avg)/n);
m = 0;
sum = 0.0;
for(i = 0; i < n; ++i) {
if(fabs(cycles[i] - avg) <= 4.0*std) {
sum += cycles[i];
++m;
}
}
cycles_avg = sum/m;
}
if(fp)
write_tsv_row(fp, blend, mode, type, cycles_avg, success);
if (!success) {
if(verbose < 2)
LLVMDumpModule(module);
LLVMWriteBitcodeToFile(module, "blend.bc");
fprintf(stderr, "blend.bc written\n");
fprintf(stderr, "Invoke as \"llc -o - blend.bc\"\n");
abort();
}
LLVMFreeMachineCodeForFunction(engine, func);
LLVMDisposeExecutionEngine(engine);
if(pass)
LLVMDisposePassManager(pass);
return success;
}
const unsigned
blend_factors[] = {
PIPE_BLENDFACTOR_ZERO,
PIPE_BLENDFACTOR_ONE,
PIPE_BLENDFACTOR_SRC_COLOR,
PIPE_BLENDFACTOR_SRC_ALPHA,
PIPE_BLENDFACTOR_DST_COLOR,
PIPE_BLENDFACTOR_DST_ALPHA,
PIPE_BLENDFACTOR_CONST_COLOR,
PIPE_BLENDFACTOR_CONST_ALPHA,
#if 0
PIPE_BLENDFACTOR_SRC1_COLOR,
PIPE_BLENDFACTOR_SRC1_ALPHA,
#endif
PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE,
PIPE_BLENDFACTOR_INV_SRC_COLOR,
PIPE_BLENDFACTOR_INV_SRC_ALPHA,
PIPE_BLENDFACTOR_INV_DST_COLOR,
PIPE_BLENDFACTOR_INV_DST_ALPHA,
PIPE_BLENDFACTOR_INV_CONST_COLOR,
PIPE_BLENDFACTOR_INV_CONST_ALPHA,
#if 0
PIPE_BLENDFACTOR_INV_SRC1_COLOR,
PIPE_BLENDFACTOR_INV_SRC1_ALPHA,
#endif
};
const unsigned
blend_funcs[] = {
PIPE_BLEND_ADD,
PIPE_BLEND_SUBTRACT,
PIPE_BLEND_REVERSE_SUBTRACT,
PIPE_BLEND_MIN,
PIPE_BLEND_MAX
};
const struct lp_type blend_types[] = {
/* float, fixed, sign, norm, width, len */
{ TRUE, FALSE, FALSE, TRUE, 32, 4 }, /* f32 x 4 */
{ FALSE, FALSE, FALSE, TRUE, 8, 16 }, /* u8n x 16 */
};
const unsigned num_funcs = sizeof(blend_funcs)/sizeof(blend_funcs[0]);
const unsigned num_factors = sizeof(blend_factors)/sizeof(blend_factors[0]);
const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]);
boolean
test_all(unsigned verbose, FILE *fp)
{
const unsigned *rgb_func;
const unsigned *rgb_src_factor;
const unsigned *rgb_dst_factor;
const unsigned *alpha_func;
const unsigned *alpha_src_factor;
const unsigned *alpha_dst_factor;
struct pipe_blend_state blend;
enum vector_mode mode;
const struct lp_type *type;
bool success = TRUE;
for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) {
for(alpha_func = blend_funcs; alpha_func < &blend_funcs[num_funcs]; ++alpha_func) {
for(rgb_src_factor = blend_factors; rgb_src_factor < &blend_factors[num_factors]; ++rgb_src_factor) {
for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) {
for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) {
for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) {
for(mode = 0; mode < 2; ++mode) {
for(type = blend_types; type < &blend_types[num_types]; ++type) {
if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
continue;
memset(&blend, 0, sizeof blend);
blend.rt[0].blend_enable = 1;
blend.rt[0].rgb_func = *rgb_func;
blend.rt[0].rgb_src_factor = *rgb_src_factor;
blend.rt[0].rgb_dst_factor = *rgb_dst_factor;
blend.rt[0].alpha_func = *alpha_func;
blend.rt[0].alpha_src_factor = *alpha_src_factor;
blend.rt[0].alpha_dst_factor = *alpha_dst_factor;
blend.rt[0].colormask = PIPE_MASK_RGBA;
if(!test_one(verbose, fp, &blend, mode, *type))
success = FALSE;
}
}
}
}
}
}
}
}
return success;
}
boolean
test_some(unsigned verbose, FILE *fp, unsigned long n)
{
const unsigned *rgb_func;
const unsigned *rgb_src_factor;
const unsigned *rgb_dst_factor;
const unsigned *alpha_func;
const unsigned *alpha_src_factor;
const unsigned *alpha_dst_factor;
struct pipe_blend_state blend;
enum vector_mode mode;
const struct lp_type *type;
unsigned long i;
bool success = TRUE;
for(i = 0; i < n; ++i) {
rgb_func = &blend_funcs[rand() % num_funcs];
alpha_func = &blend_funcs[rand() % num_funcs];
rgb_src_factor = &blend_factors[rand() % num_factors];
alpha_src_factor = &blend_factors[rand() % num_factors];
do {
rgb_dst_factor = &blend_factors[rand() % num_factors];
} while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
do {
alpha_dst_factor = &blend_factors[rand() % num_factors];
} while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
mode = rand() & 1;
type = &blend_types[rand() % num_types];
memset(&blend, 0, sizeof blend);
blend.rt[0].blend_enable = 1;
blend.rt[0].rgb_func = *rgb_func;
blend.rt[0].rgb_src_factor = *rgb_src_factor;
blend.rt[0].rgb_dst_factor = *rgb_dst_factor;
blend.rt[0].alpha_func = *alpha_func;
blend.rt[0].alpha_src_factor = *alpha_src_factor;
blend.rt[0].alpha_dst_factor = *alpha_dst_factor;
blend.rt[0].colormask = PIPE_MASK_RGBA;
if(!test_one(verbose, fp, &blend, mode, *type))
success = FALSE;
}
return success;
}