#version 450 core | |
#extension GL_AMD_gpu_shader_half_float: enable | |
#extension GL_ARB_gpu_shader_int64: enable | |
void main() | |
{ | |
} | |
// Half float literals | |
void literal() | |
{ | |
const float16_t f16c = 0.000001hf; | |
const f16vec2 f16cv = f16vec2(-0.25HF, 0.03HF); | |
f16vec2 f16v; | |
f16v.x = f16c; | |
f16v += f16cv; | |
} | |
// Block memory layout | |
struct S | |
{ | |
float16_t x; // rule 1: align = 2, takes offsets 0-1 | |
f16vec2 y; // rule 2: align = 4, takes offsets 4-7 | |
f16vec3 z; // rule 3: align = 8, takes offsets 8-13 | |
}; | |
layout(column_major, std140) uniform B1 | |
{ | |
float16_t a; // rule 1: align = 2, takes offsets 0-1 | |
f16vec2 b; // rule 2: align = 4, takes offsets 4-7 | |
f16vec3 c; // rule 3: align = 8, takes offsets 8-15 | |
float16_t d[2]; // rule 4: align = 16, array stride = 16, | |
// takes offsets 16-47 | |
f16mat2x3 e; // rule 5: align = 16, matrix stride = 16, | |
// takes offsets 48-79 | |
f16mat2x3 f[2]; // rule 6: align = 16, matrix stride = 16, | |
// array stride = 32, f[0] takes | |
// offsets 80-111, f[1] takes offsets | |
// 112-143 | |
S g; // rule 9: align = 16, g.x takes offsets | |
// 144-145, g.y takes offsets 148-151, | |
// g.z takes offsets 152-159 | |
S h[2]; // rule 10: align = 16, array stride = 16, h[0] | |
// takes offsets 160-175, h[1] takes | |
// offsets 176-191 | |
}; | |
layout(row_major, std430) buffer B2 | |
{ | |
float16_t o; // rule 1: align = 2, takes offsets 0-1 | |
f16vec2 p; // rule 2: align = 4, takes offsets 4-7 | |
f16vec3 q; // rule 3: align = 8, takes offsets 8-13 | |
float16_t r[2]; // rule 4: align = 2, array stride = 2, takes | |
// offsets 14-17 | |
f16mat2x3 s; // rule 7: align = 4, matrix stride = 4, takes | |
// offsets 20-31 | |
f16mat2x3 t[2]; // rule 8: align = 4, matrix stride = 4, array | |
// stride = 12, t[0] takes offsets | |
// 32-43, t[1] takes offsets 44-55 | |
S u; // rule 9: align = 8, u.x takes offsets | |
// 56-57, u.y takes offsets 60-63, u.z | |
// takes offsets 64-69 | |
S v[2]; // rule 10: align = 8, array stride = 16, v[0] | |
// takes offsets 72-87, v[1] takes | |
// offsets 88-103 | |
}; | |
// Specialization constant | |
layout(constant_id = 100) const float16_t sf16 = 0.125hf; | |
layout(constant_id = 101) const float sf = 0.25; | |
layout(constant_id = 102) const double sd = 0.5lf; | |
const float f16_to_f = float(sf16); | |
const double f16_to_d = float(sf16); | |
const float16_t f_to_f16 = float16_t(sf); | |
const float16_t d_to_f16 = float16_t(sd); | |
void operators() | |
{ | |
float16_t f16; | |
f16vec2 f16v; | |
f16mat2x2 f16m; | |
bool b; | |
// Arithmetic | |
f16v += f16v; | |
f16v -= f16v; | |
f16v *= f16v; | |
f16v /= f16v; | |
f16v++; | |
f16v--; | |
++f16m; | |
--f16m; | |
f16v = -f16v; | |
f16m = -f16m; | |
f16 = f16v.x + f16v.y; | |
f16 = f16v.x - f16v.y; | |
f16 = f16v.x * f16v.y; | |
f16 = f16v.x / f16v.y; | |
// Relational | |
b = (f16v.x != f16); | |
b = (f16v.y == f16); | |
b = (f16v.x > f16); | |
b = (f16v.y < f16); | |
b = (f16v.x >= f16); | |
b = (f16v.y <= f16); | |
// Vector/matrix operations | |
f16v = f16v * f16; | |
f16m = f16m * f16; | |
f16v = f16m * f16v; | |
f16v = f16v * f16m; | |
f16m = f16m * f16m; | |
} | |
void typeCast() | |
{ | |
bvec3 bv; | |
vec3 fv; | |
dvec3 dv; | |
ivec3 iv; | |
uvec3 uv; | |
i64vec3 i64v; | |
u64vec3 u64v; | |
f16vec3 f16v; | |
f16v = f16vec3(bv); // bool -> float16 | |
bv = bvec3(f16v); // float16 -> bool | |
f16v = f16vec3(fv); // float -> float16 | |
fv = vec3(f16v); // float16 -> float | |
f16v = f16vec3(dv); // double -> float16 | |
dv = dvec3(dv); // float16 -> double | |
f16v = f16vec3(iv); // int -> float16 | |
iv = ivec3(f16v); // float16 -> int | |
f16v = f16vec3(uv); // uint -> float16 | |
uv = uvec3(f16v); // float16 -> uint | |
f16v = f16vec3(i64v); // int64 -> float16 | |
i64v = i64vec3(f16v); // float16 -> int64 | |
f16v = f16vec3(u64v); // uint64 -> float16 | |
u64v = u64vec3(f16v); // float16 -> uint64 | |
} | |
void builtinAngleTrigFuncs() | |
{ | |
f16vec4 f16v1, f16v2; | |
f16v2 = radians(f16v1); | |
f16v2 = degrees(f16v1); | |
f16v2 = sin(f16v1); | |
f16v2 = cos(f16v1); | |
f16v2 = tan(f16v1); | |
f16v2 = asin(f16v1); | |
f16v2 = acos(f16v1); | |
f16v2 = atan(f16v1, f16v2); | |
f16v2 = atan(f16v1); | |
f16v2 = sinh(f16v1); | |
f16v2 = cosh(f16v1); | |
f16v2 = tanh(f16v1); | |
f16v2 = asinh(f16v1); | |
f16v2 = acosh(f16v1); | |
f16v2 = atanh(f16v1); | |
} | |
void builtinExpFuncs() | |
{ | |
f16vec2 f16v1, f16v2; | |
f16v2 = pow(f16v1, f16v2); | |
f16v2 = exp(f16v1); | |
f16v2 = log(f16v1); | |
f16v2 = exp2(f16v1); | |
f16v2 = log2(f16v1); | |
f16v2 = sqrt(f16v1); | |
f16v2 = inversesqrt(f16v1); | |
} | |
void builtinCommonFuncs() | |
{ | |
f16vec3 f16v1, f16v2, f16v3; | |
float16_t f16; | |
bool b; | |
bvec3 bv; | |
ivec3 iv; | |
f16v2 = abs(f16v1); | |
f16v2 = sign(f16v1); | |
f16v2 = floor(f16v1); | |
f16v2 = trunc(f16v1); | |
f16v2 = round(f16v1); | |
f16v2 = roundEven(f16v1); | |
f16v2 = ceil(f16v1); | |
f16v2 = fract(f16v1); | |
f16v2 = mod(f16v1, f16v2); | |
f16v2 = mod(f16v1, f16); | |
f16v3 = modf(f16v1, f16v2); | |
f16v3 = min(f16v1, f16v2); | |
f16v3 = min(f16v1, f16); | |
f16v3 = max(f16v1, f16v2); | |
f16v3 = max(f16v1, f16); | |
f16v3 = clamp(f16v1, f16, f16v2.x); | |
f16v3 = clamp(f16v1, f16v2, f16vec3(f16)); | |
f16v3 = mix(f16v1, f16v2, f16); | |
f16v3 = mix(f16v1, f16v2, f16v3); | |
f16v3 = mix(f16v1, f16v2, bv); | |
f16v3 = step(f16v1, f16v2); | |
f16v3 = step(f16, f16v3); | |
f16v3 = smoothstep(f16v1, f16v2, f16v3); | |
f16v3 = smoothstep(f16, f16v1.x, f16v2); | |
b = isnan(f16); | |
bv = isinf(f16v1); | |
f16v3 = fma(f16v1, f16v2, f16v3); | |
f16v2 = frexp(f16v1, iv); | |
f16v2 = ldexp(f16v1, iv); | |
} | |
void builtinPackUnpackFuncs() | |
{ | |
uint u; | |
f16vec2 f16v; | |
u = packFloat2x16(f16v); | |
f16v = unpackFloat2x16(u); | |
} | |
void builtinGeometryFuncs() | |
{ | |
float16_t f16; | |
f16vec3 f16v1, f16v2, f16v3; | |
f16 = length(f16v1); | |
f16 = distance(f16v1, f16v2); | |
f16 = dot(f16v1, f16v2); | |
f16v3 = cross(f16v1, f16v2); | |
f16v2 = normalize(f16v1); | |
f16v3 = faceforward(f16v1, f16v2, f16v3); | |
f16v3 = reflect(f16v1, f16v2); | |
f16v3 = refract(f16v1, f16v2, f16); | |
} | |
void builtinMatrixFuncs() | |
{ | |
f16mat2x3 f16m1, f16m2, f16m3; | |
f16mat3x2 f16m4; | |
f16mat3 f16m5; | |
f16mat4 f16m6, f16m7; | |
f16vec3 f16v1; | |
f16vec2 f16v2; | |
float16_t f16; | |
f16m3 = matrixCompMult(f16m1, f16m2); | |
f16m1 = outerProduct(f16v1, f16v2); | |
f16m4 = transpose(f16m1); | |
f16 = determinant(f16m5); | |
f16m6 = inverse(f16m7); | |
} | |
void builtinVecRelFuncs() | |
{ | |
f16vec3 f16v1, f16v2; | |
bvec3 bv; | |
bv = lessThan(f16v1, f16v2); | |
bv = lessThanEqual(f16v1, f16v2); | |
bv = greaterThan(f16v1, f16v2); | |
bv = greaterThanEqual(f16v1, f16v2); | |
bv = equal(f16v1, f16v2); | |
bv = notEqual(f16v1, f16v2); | |
} | |
in f16vec3 if16v; | |
void builtinFragProcFuncs() | |
{ | |
f16vec3 f16v; | |
// Derivative | |
f16v.x = dFdx(if16v.x); | |
f16v.y = dFdy(if16v.y); | |
f16v.xy = dFdxFine(if16v.xy); | |
f16v.xy = dFdyFine(if16v.xy); | |
f16v = dFdxCoarse(if16v); | |
f16v = dFdxCoarse(if16v); | |
f16v.x = fwidth(if16v.x); | |
f16v.xy = fwidthFine(if16v.xy); | |
f16v = fwidthCoarse(if16v); | |
// Interpolation | |
f16v.x = interpolateAtCentroid(if16v.x); | |
f16v.xy = interpolateAtSample(if16v.xy, 1); | |
f16v = interpolateAtOffset(if16v, f16vec2(0.5hf)); | |
} |