Implement dynamic indexing of temporaries.
Previously only dynamic indexing of uniforms was supported.
Since this is essentially a gather operation within the register file,
it is slow. We optimize the common case of using the loop index
variable as relative address, where the index value would be the same
for all shader invocations running in lock-step across SIMD lanes.
Bug chromium:845103
Bug skia:7846
Change-Id: Idb36b512dd560d740ac9088691b633ff3a1561c1
Reviewed-on: https://swiftshader-review.googlesource.com/18968
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/OpenGL/compiler/OutputASM.cpp b/src/OpenGL/compiler/OutputASM.cpp
index cbfbf56..d7213cb 100644
--- a/src/OpenGL/compiler/OutputASM.cpp
+++ b/src/OpenGL/compiler/OutputASM.cpp
@@ -1831,6 +1831,11 @@
return false;
}
+ if(loop.isDeterministic())
+ {
+ deterministicVariables.insert(loop.index->getId());
+ }
+
bool unroll = (loop.iterations <= 4);
TIntermNode *init = node->getInit();
@@ -1916,6 +1921,11 @@
}
}
+ if(loop.isDeterministic())
+ {
+ deterministicVariables.erase(loop.index->getId());
+ }
+
return false;
}
@@ -2651,10 +2661,12 @@
sw::Shader::SourceParameter relativeRegister;
source(relativeRegister, right);
+ int indexId = right->getAsSymbolNode() ? right->getAsSymbolNode()->getId() : 0;
+
rel.index = relativeRegister.index;
rel.type = relativeRegister.type;
rel.scale = scale;
- rel.deterministic = !(vertexShader && left->getQualifier() == EvqUniform);
+ rel.dynamic = (right->getQualifier() != EvqUniform) && (deterministicVariables.count(indexId) == 0);
}
}
else if(rel.index != registerIndex(&address)) // Move the previous index register to the address register
diff --git a/src/OpenGL/compiler/OutputASM.h b/src/OpenGL/compiler/OutputASM.h
index 4480e2a..1aa791e 100644
--- a/src/OpenGL/compiler/OutputASM.h
+++ b/src/OpenGL/compiler/OutputASM.h
@@ -374,6 +374,8 @@
TQualifier outputQualifier;
+ std::set<int> deterministicVariables;
+
TParseContext &mContext;
};
diff --git a/src/Reactor/Reactor.hpp b/src/Reactor/Reactor.hpp
index a6d364f..f07dfa9 100644
--- a/src/Reactor/Reactor.hpp
+++ b/src/Reactor/Reactor.hpp
@@ -1822,6 +1822,16 @@
// RValue<Bool> operator!=(RValue<Int4> lhs, RValue<Int4> rhs);
// RValue<Bool> operator==(RValue<Int4> lhs, RValue<Int4> rhs);
+ inline RValue<Int4> operator+(RValue<Int> lhs, RValue<Int4> rhs)
+ {
+ return Int4(lhs) + rhs;
+ }
+
+ inline RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int> rhs)
+ {
+ return lhs + Int4(rhs);
+ }
+
RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y);
RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y);
RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y);
diff --git a/src/Shader/PixelProgram.cpp b/src/Shader/PixelProgram.cpp
index 0f0f6bd..3cedbce 100644
--- a/src/Shader/PixelProgram.cpp
+++ b/src/Shader/PixelProgram.cpp
@@ -366,14 +366,23 @@
if(dst.z) pDst.z = r[dst.index].z;
if(dst.w) pDst.w = r[dst.index].w;
}
+ else if(!dst.rel.dynamic)
+ {
+ Int a = dst.index + relativeAddress(dst.rel);
+
+ if(dst.x) pDst.x = r[a].x;
+ if(dst.y) pDst.y = r[a].y;
+ if(dst.z) pDst.z = r[a].z;
+ if(dst.w) pDst.w = r[a].w;
+ }
else
{
- Int a = relativeAddress(dst);
+ Int4 a = dst.index + dynamicAddress(dst.rel);
- if(dst.x) pDst.x = r[dst.index + a].x;
- if(dst.y) pDst.y = r[dst.index + a].y;
- if(dst.z) pDst.z = r[dst.index + a].z;
- if(dst.w) pDst.w = r[dst.index + a].w;
+ if(dst.x) pDst.x = r[a].x;
+ if(dst.y) pDst.y = r[a].y;
+ if(dst.z) pDst.z = r[a].z;
+ if(dst.w) pDst.w = r[a].w;
}
break;
case Shader::PARAMETER_COLOROUT:
@@ -384,9 +393,18 @@
if(dst.z) pDst.z = oC[dst.index].z;
if(dst.w) pDst.w = oC[dst.index].w;
}
+ else if(!dst.rel.dynamic)
+ {
+ Int a = dst.index + relativeAddress(dst.rel);
+
+ if(dst.x) pDst.x = oC[a].x;
+ if(dst.y) pDst.y = oC[a].y;
+ if(dst.z) pDst.z = oC[a].z;
+ if(dst.w) pDst.w = oC[a].w;
+ }
else
{
- Int a = relativeAddress(dst) + dst.index;
+ Int4 a = dst.index + dynamicAddress(dst.rel);
if(dst.x) pDst.x = oC[a].x;
if(dst.y) pDst.y = oC[a].y;
@@ -460,14 +478,23 @@
if(dst.z) r[dst.index].z = d.z;
if(dst.w) r[dst.index].w = d.w;
}
+ else if(!dst.rel.dynamic)
+ {
+ Int a = dst.index + relativeAddress(dst.rel);
+
+ if(dst.x) r[a].x = d.x;
+ if(dst.y) r[a].y = d.y;
+ if(dst.z) r[a].z = d.z;
+ if(dst.w) r[a].w = d.w;
+ }
else
{
- Int a = relativeAddress(dst);
+ Int4 a = dst.index + dynamicAddress(dst.rel);
- if(dst.x) r[dst.index + a].x = d.x;
- if(dst.y) r[dst.index + a].y = d.y;
- if(dst.z) r[dst.index + a].z = d.z;
- if(dst.w) r[dst.index + a].w = d.w;
+ if(dst.x) r.scatter_x(a, d.x);
+ if(dst.y) r.scatter_y(a, d.y);
+ if(dst.z) r.scatter_z(a, d.z);
+ if(dst.w) r.scatter_w(a, d.w);
}
break;
case Shader::PARAMETER_COLOROUT:
@@ -475,20 +502,30 @@
{
broadcastColor0 = (dst.index == 0) && broadcastColor0;
- if(dst.x) { oC[dst.index].x = d.x; }
- if(dst.y) { oC[dst.index].y = d.y; }
- if(dst.z) { oC[dst.index].z = d.z; }
- if(dst.w) { oC[dst.index].w = d.w; }
+ if(dst.x) oC[dst.index].x = d.x;
+ if(dst.y) oC[dst.index].y = d.y;
+ if(dst.z) oC[dst.index].z = d.z;
+ if(dst.w) oC[dst.index].w = d.w;
+ }
+ else if(!dst.rel.dynamic)
+ {
+ broadcastColor0 = false;
+ Int a = dst.index + relativeAddress(dst.rel);
+
+ if(dst.x) oC[a].x = d.x;
+ if(dst.y) oC[a].y = d.y;
+ if(dst.z) oC[a].z = d.z;
+ if(dst.w) oC[a].w = d.w;
}
else
{
broadcastColor0 = false;
- Int a = relativeAddress(dst) + dst.index;
+ Int4 a = dst.index + dynamicAddress(dst.rel);
- if(dst.x) { oC[a].x = d.x; }
- if(dst.y) { oC[a].y = d.y; }
- if(dst.z) { oC[a].z = d.z; }
- if(dst.w) { oC[a].w = d.w; }
+ if(dst.x) oC.scatter_x(a, d.x);
+ if(dst.y) oC.scatter_y(a, d.y);
+ if(dst.z) oC.scatter_z(a, d.z);
+ if(dst.w) oC.scatter_w(a, d.w);
}
break;
case Shader::PARAMETER_PREDICATE:
@@ -826,25 +863,27 @@
{
reg = r[i];
}
+ else if(!src.rel.dynamic)
+ {
+ reg = r[i + relativeAddress(src.rel, src.bufferIndex)];
+ }
else
{
- Int a = relativeAddress(src, src.bufferIndex);
-
- reg = r[i + a];
+ reg = r[i + dynamicAddress(src.rel)];
}
break;
case Shader::PARAMETER_INPUT:
+ if(src.rel.type == Shader::PARAMETER_VOID) // Not relative
{
- if(src.rel.type == Shader::PARAMETER_VOID) // Not relative
- {
- reg = v[i];
- }
- else
- {
- Int a = relativeAddress(src, src.bufferIndex);
-
- reg = v[i + a];
- }
+ reg = v[i];
+ }
+ else if(!src.rel.dynamic)
+ {
+ reg = v[i + relativeAddress(src.rel, src.bufferIndex)];
+ }
+ else
+ {
+ reg = v[i + dynamicAddress(src.rel)];
}
break;
case Shader::PARAMETER_CONST:
@@ -883,11 +922,13 @@
{
reg = oC[i];
}
+ else if(!src.rel.dynamic)
+ {
+ reg = oC[i + relativeAddress(src.rel, src.bufferIndex)];
+ }
else
{
- Int a = relativeAddress(src, src.bufferIndex);
-
- reg = oC[i + a];
+ reg = oC[i + dynamicAddress(src.rel)];
}
break;
case Shader::PARAMETER_DEPTHOUT:
@@ -995,20 +1036,9 @@
}
}
}
- else if(src.rel.type == Shader::PARAMETER_LOOP)
+ else if(!src.rel.dynamic || src.rel.type == Shader::PARAMETER_LOOP)
{
- Int loopCounter = aL[loopDepth];
-
- c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, loopCounter));
-
- c.x = c.x.xxxx;
- c.y = c.y.yyyy;
- c.z = c.z.zzzz;
- c.w = c.w.wwww;
- }
- else
- {
- Int a = relativeAddress(src, src.bufferIndex);
+ Int a = relativeAddress(src.rel, src.bufferIndex);
c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a));
@@ -1017,31 +1047,69 @@
c.z = c.z.zzzz;
c.w = c.w.wwww;
}
+ else
+ {
+ int component = src.rel.swizzle & 0x03;
+ Float4 a;
+
+ switch(src.rel.type)
+ {
+ case Shader::PARAMETER_TEMP: a = r[src.rel.index][component]; break;
+ case Shader::PARAMETER_INPUT: a = v[src.rel.index][component]; break;
+ case Shader::PARAMETER_OUTPUT: a = oC[src.rel.index][component]; break;
+ case Shader::PARAMETER_CONST: a = *Pointer<Float>(uniformAddress(src.bufferIndex, src.rel.index) + component * sizeof(float)); break;
+ case Shader::PARAMETER_MISCTYPE:
+ switch(src.rel.index)
+ {
+ case Shader::VPosIndex: a = vPos.x; break;
+ case Shader::VFaceIndex: a = vFace.x; break;
+ default: ASSERT(false);
+ }
+ break;
+ default: ASSERT(false);
+ }
+
+ Int4 index = Int4(i) + As<Int4>(a) * Int4(src.rel.scale);
+
+ index = Min(As<UInt4>(index), UInt4(VERTEX_UNIFORM_VECTORS)); // Clamp to constant register range, c[VERTEX_UNIFORM_VECTORS] = {0, 0, 0, 0}
+
+ Int index0 = Extract(index, 0);
+ Int index1 = Extract(index, 1);
+ Int index2 = Extract(index, 2);
+ Int index3 = Extract(index, 3);
+
+ c.x = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index0), 16);
+ c.y = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index1), 16);
+ c.z = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index2), 16);
+ c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index3), 16);
+
+ transpose4x4(c.x, c.y, c.z, c.w);
+ }
return c;
}
- Int PixelProgram::relativeAddress(const Shader::Parameter &var, int bufferIndex)
+ Int PixelProgram::relativeAddress(const Shader::Relative &rel, int bufferIndex)
{
- ASSERT(var.rel.deterministic);
+ ASSERT(!rel.dynamic);
- if(var.rel.type == Shader::PARAMETER_TEMP)
+ if(rel.type == Shader::PARAMETER_TEMP)
{
- return As<Int>(Extract(r[var.rel.index].x, 0)) * var.rel.scale;
+ return As<Int>(Extract(r[rel.index].x, 0)) * rel.scale;
}
- else if(var.rel.type == Shader::PARAMETER_INPUT)
+ else if(rel.type == Shader::PARAMETER_INPUT)
{
- return As<Int>(Extract(v[var.rel.index].x, 0)) * var.rel.scale;
+ return As<Int>(Extract(v[rel.index].x, 0)) * rel.scale;
}
- else if(var.rel.type == Shader::PARAMETER_OUTPUT)
+ else if(rel.type == Shader::PARAMETER_OUTPUT)
{
- return As<Int>(Extract(oC[var.rel.index].x, 0)) * var.rel.scale;
+ return As<Int>(Extract(oC[rel.index].x, 0)) * rel.scale;
}
- else if(var.rel.type == Shader::PARAMETER_CONST)
+ else if(rel.type == Shader::PARAMETER_CONST)
{
- return *Pointer<Int>(uniformAddress(bufferIndex, var.rel.index)) * var.rel.scale;
+ return *Pointer<Int>(uniformAddress(bufferIndex, rel.index)) * rel.scale;
}
- else if(var.rel.type == Shader::PARAMETER_LOOP)
+ else if(rel.type == Shader::PARAMETER_LOOP)
{
return aL[loopDepth];
}
@@ -1050,6 +1118,30 @@
return 0;
}
+ Int4 PixelProgram::dynamicAddress(const Shader::Relative &rel)
+ {
+ int component = rel.swizzle & 0x03;
+ Float4 a;
+
+ switch(rel.type)
+ {
+ case Shader::PARAMETER_TEMP: a = r[rel.index][component]; break;
+ case Shader::PARAMETER_INPUT: a = v[rel.index][component]; break;
+ case Shader::PARAMETER_OUTPUT: a = oC[rel.index][component]; break;
+ case Shader::PARAMETER_MISCTYPE:
+ switch(rel.index)
+ {
+ case Shader::VPosIndex: a = vPos.x; break;
+ case Shader::VFaceIndex: a = vFace.x; break;
+ default: ASSERT(false);
+ }
+ break;
+ default: ASSERT(false);
+ }
+
+ return As<Int4>(a) * Int4(rel.scale);
+ }
+
Float4 PixelProgram::linearToSRGB(const Float4 &x) // Approximates x^(1.0/2.2)
{
Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x));
diff --git a/src/Shader/PixelProgram.hpp b/src/Shader/PixelProgram.hpp
index ef6c2c0..240938d 100644
--- a/src/Shader/PixelProgram.hpp
+++ b/src/Shader/PixelProgram.hpp
@@ -94,7 +94,8 @@
Vector4f readConstant(const Src &src, unsigned int offset = 0);
RValue<Pointer<Byte>> uniformAddress(int bufferIndex, unsigned int index);
RValue<Pointer<Byte>> uniformAddress(int bufferIndex, unsigned int index, Int& offset);
- Int relativeAddress(const Shader::Parameter &var, int bufferIndex = -1);
+ Int relativeAddress(const Shader::Relative &rel, int bufferIndex = -1);
+ Int4 dynamicAddress(const Shader::Relative &rel);
Float4 linearToSRGB(const Float4 &x);
diff --git a/src/Shader/Shader.hpp b/src/Shader/Shader.hpp
index 6d431f5..9e4a810 100644
--- a/src/Shader/Shader.hpp
+++ b/src/Shader/Shader.hpp
@@ -402,7 +402,7 @@
unsigned int index;
unsigned int swizzle : 8;
unsigned int scale;
- bool deterministic; // Equal accross shader instances run in lockstep (e.g. unrollable loop couters)
+ bool dynamic; // Varies between concurrent shader instances
};
struct Parameter
@@ -433,7 +433,7 @@
rel.index = 0;
rel.swizzle = 0;
rel.scale = 1;
- rel.deterministic = false;
+ rel.dynamic = true;
}
std::string string(ShaderType shaderType, unsigned short version) const;
diff --git a/src/Shader/VertexProgram.cpp b/src/Shader/VertexProgram.cpp
index 8dbd600..ad4e37b 100644
--- a/src/Shader/VertexProgram.cpp
+++ b/src/Shader/VertexProgram.cpp
@@ -59,7 +59,7 @@
{
}
- void VertexProgram::pipeline(UInt& index)
+ void VertexProgram::pipeline(UInt &index)
{
if(!state.preTransformed)
{
@@ -71,7 +71,7 @@
}
}
- void VertexProgram::program(UInt& index)
+ void VertexProgram::program(UInt &index)
{
// shader->print("VertexShader-%0.8X.txt", state.shaderID);
@@ -372,14 +372,23 @@
if(dst.z) pDst.z = r[dst.index].z;
if(dst.w) pDst.w = r[dst.index].w;
}
+ else if(!dst.rel.dynamic)
+ {
+ Int a = dst.index + relativeAddress(dst.rel);
+
+ if(dst.x) pDst.x = r[a].x;
+ if(dst.y) pDst.y = r[a].y;
+ if(dst.z) pDst.z = r[a].z;
+ if(dst.w) pDst.w = r[a].w;
+ }
else
{
- Int a = relativeAddress(dst);
+ Int4 a = dst.index + dynamicAddress(dst.rel);
- if(dst.x) pDst.x = r[dst.index + a].x;
- if(dst.y) pDst.y = r[dst.index + a].y;
- if(dst.z) pDst.z = r[dst.index + a].z;
- if(dst.w) pDst.w = r[dst.index + a].w;
+ if(dst.x) pDst.x = r[a].x;
+ if(dst.y) pDst.y = r[a].y;
+ if(dst.z) pDst.z = r[a].z;
+ if(dst.w) pDst.w = r[a].w;
}
break;
case Shader::PARAMETER_ADDR: pDst = a0; break;
@@ -417,24 +426,30 @@
if(dst.z) pDst.z = o[T0 + dst.index].z;
if(dst.w) pDst.w = o[T0 + dst.index].w;
}
+ else if(dst.rel.type == Shader::PARAMETER_VOID) // Not relative
+ {
+ if(dst.x) pDst.x = o[dst.index].x;
+ if(dst.y) pDst.y = o[dst.index].y;
+ if(dst.z) pDst.z = o[dst.index].z;
+ if(dst.w) pDst.w = o[dst.index].w;
+ }
+ else if(!dst.rel.dynamic)
+ {
+ Int a = dst.index + relativeAddress(dst.rel);
+
+ if(dst.x) pDst.x = o[a].x;
+ if(dst.y) pDst.y = o[a].y;
+ if(dst.z) pDst.z = o[a].z;
+ if(dst.w) pDst.w = o[a].w;
+ }
else
{
- if(dst.rel.type == Shader::PARAMETER_VOID) // Not relative
- {
- if(dst.x) pDst.x = o[dst.index].x;
- if(dst.y) pDst.y = o[dst.index].y;
- if(dst.z) pDst.z = o[dst.index].z;
- if(dst.w) pDst.w = o[dst.index].w;
- }
- else
- {
- Int a = relativeAddress(dst);
+ Int4 a = dst.index + dynamicAddress(dst.rel);
- if(dst.x) pDst.x = o[dst.index + a].x;
- if(dst.y) pDst.y = o[dst.index + a].y;
- if(dst.z) pDst.z = o[dst.index + a].z;
- if(dst.w) pDst.w = o[dst.index + a].w;
- }
+ if(dst.x) pDst.x = o[a].x;
+ if(dst.y) pDst.y = o[a].y;
+ if(dst.z) pDst.z = o[a].z;
+ if(dst.w) pDst.w = o[a].w;
}
break;
case Shader::PARAMETER_LABEL: break;
@@ -499,14 +514,23 @@
if(dst.z) r[dst.index].z = d.z;
if(dst.w) r[dst.index].w = d.w;
}
+ else if(!dst.rel.dynamic)
+ {
+ Int a = dst.index + relativeAddress(dst.rel);
+
+ if(dst.x) r[a].x = d.x;
+ if(dst.y) r[a].y = d.y;
+ if(dst.z) r[a].z = d.z;
+ if(dst.w) r[a].w = d.w;
+ }
else
{
- Int a = relativeAddress(dst);
+ Int4 a = dst.index + dynamicAddress(dst.rel);
- if(dst.x) r[dst.index + a].x = d.x;
- if(dst.y) r[dst.index + a].y = d.y;
- if(dst.z) r[dst.index + a].z = d.z;
- if(dst.w) r[dst.index + a].w = d.w;
+ if(dst.x) r.scatter_x(a, d.x);
+ if(dst.y) r.scatter_y(a, d.y);
+ if(dst.z) r.scatter_z(a, d.z);
+ if(dst.w) r.scatter_w(a, d.w);
}
break;
case Shader::PARAMETER_ADDR:
@@ -548,24 +572,30 @@
if(dst.z) o[T0 + dst.index].z = d.z;
if(dst.w) o[T0 + dst.index].w = d.w;
}
+ else if(dst.rel.type == Shader::PARAMETER_VOID) // Not relative
+ {
+ if(dst.x) o[dst.index].x = d.x;
+ if(dst.y) o[dst.index].y = d.y;
+ if(dst.z) o[dst.index].z = d.z;
+ if(dst.w) o[dst.index].w = d.w;
+ }
+ else if(!dst.rel.dynamic)
+ {
+ Int a = dst.index + relativeAddress(dst.rel);
+
+ if(dst.x) o[a].x = d.x;
+ if(dst.y) o[a].y = d.y;
+ if(dst.z) o[a].z = d.z;
+ if(dst.w) o[a].w = d.w;
+ }
else
{
- if(dst.rel.type == Shader::PARAMETER_VOID) // Not relative
- {
- if(dst.x) o[dst.index].x = d.x;
- if(dst.y) o[dst.index].y = d.y;
- if(dst.z) o[dst.index].z = d.z;
- if(dst.w) o[dst.index].w = d.w;
- }
- else
- {
- Int a = relativeAddress(dst);
+ Int4 a = dst.index + dynamicAddress(dst.rel);
- if(dst.x) o[dst.index + a].x = d.x;
- if(dst.y) o[dst.index + a].y = d.y;
- if(dst.z) o[dst.index + a].z = d.z;
- if(dst.w) o[dst.index + a].w = d.w;
- }
+ if(dst.x) o.scatter_x(a, d.x);
+ if(dst.y) o.scatter_y(a, d.y);
+ if(dst.z) o.scatter_z(a, d.z);
+ if(dst.w) o.scatter_w(a, d.w);
}
break;
case Shader::PARAMETER_LABEL: break;
@@ -663,9 +693,13 @@
{
reg = r[i];
}
+ else if(!src.rel.dynamic)
+ {
+ reg = r[i + relativeAddress(src.rel, src.bufferIndex)];
+ }
else
{
- reg = r[i + relativeAddress(src, src.bufferIndex)];
+ reg = r[i + dynamicAddress(src.rel)];
}
break;
case Shader::PARAMETER_CONST:
@@ -676,9 +710,13 @@
{
reg = v[i];
}
+ else if(!src.rel.dynamic)
+ {
+ reg = v[i + relativeAddress(src.rel, src.bufferIndex)];
+ }
else
{
- reg = v[i + relativeAddress(src, src.bufferIndex)];
+ reg = v[i + dynamicAddress(src.rel)];
}
break;
case Shader::PARAMETER_VOID: return r[0]; // Dummy
@@ -708,9 +746,13 @@
{
reg = o[i];
}
+ else if(!src.rel.dynamic)
+ {
+ reg = o[i + relativeAddress(src.rel, src.bufferIndex)];
+ }
else
{
- reg = o[i + relativeAddress(src, src.bufferIndex)];
+ reg = o[i + dynamicAddress(src.rel)];
}
break;
case Shader::PARAMETER_MISCTYPE:
@@ -786,7 +828,7 @@
}
}
- RValue<Pointer<Byte>> VertexProgram::uniformAddress(int bufferIndex, unsigned int index, Int& offset)
+ RValue<Pointer<Byte>> VertexProgram::uniformAddress(int bufferIndex, unsigned int index, Int &offset)
{
return uniformAddress(bufferIndex, index) + offset * sizeof(float4);
}
@@ -826,11 +868,11 @@
}
}
}
- else if(src.rel.type == Shader::PARAMETER_LOOP)
+ else if(!src.rel.dynamic || src.rel.type == Shader::PARAMETER_LOOP)
{
- Int loopCounter = aL[loopDepth];
+ Int a = relativeAddress(src.rel, src.bufferIndex);
- c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, loopCounter));
+ c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a));
c.x = c.x.xxxx;
c.y = c.y.yyyy;
@@ -839,85 +881,68 @@
}
else
{
- if(src.rel.deterministic)
+ int component = src.rel.swizzle & 0x03;
+ Float4 a;
+
+ switch(src.rel.type)
{
- Int a = relativeAddress(src, src.bufferIndex);
-
- c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a));
-
- c.x = c.x.xxxx;
- c.y = c.y.yyyy;
- c.z = c.z.zzzz;
- c.w = c.w.wwww;
- }
- else
- {
- int component = src.rel.swizzle & 0x03;
- Float4 a;
-
- switch(src.rel.type)
+ case Shader::PARAMETER_ADDR: a = a0[component]; break;
+ case Shader::PARAMETER_TEMP: a = r[src.rel.index][component]; break;
+ case Shader::PARAMETER_INPUT: a = v[src.rel.index][component]; break;
+ case Shader::PARAMETER_OUTPUT: a = o[src.rel.index][component]; break;
+ case Shader::PARAMETER_CONST: a = *Pointer<Float>(uniformAddress(src.bufferIndex, src.rel.index) + component * sizeof(float)); break;
+ case Shader::PARAMETER_MISCTYPE:
+ switch(src.rel.index)
{
- case Shader::PARAMETER_ADDR: a = a0[component]; break;
- case Shader::PARAMETER_TEMP: a = r[src.rel.index][component]; break;
- case Shader::PARAMETER_INPUT: a = v[src.rel.index][component]; break;
- case Shader::PARAMETER_OUTPUT: a = o[src.rel.index][component]; break;
- case Shader::PARAMETER_CONST: a = *Pointer<Float>(uniformAddress(src.bufferIndex, src.rel.index) + component * sizeof(float)); break;
- case Shader::PARAMETER_MISCTYPE:
- if(src.rel.index == Shader::InstanceIDIndex)
- {
- a = As<Float4>(Int4(instanceID)); break;
- }
- else if(src.rel.index == Shader::VertexIDIndex)
- {
- a = As<Float4>(vertexID); break;
- }
- else ASSERT(false);
- break;
+ case Shader::InstanceIDIndex: a = As<Float4>(Int4(instanceID)); break;
+ case Shader::VertexIDIndex: a = As<Float4>(vertexID); break;
default: ASSERT(false);
}
-
- Int4 index = Int4(i) + As<Int4>(a) * Int4(src.rel.scale);
-
- index = Min(As<UInt4>(index), UInt4(VERTEX_UNIFORM_VECTORS)); // Clamp to constant register range, c[VERTEX_UNIFORM_VECTORS] = {0, 0, 0, 0}
-
- Int index0 = Extract(index, 0);
- Int index1 = Extract(index, 1);
- Int index2 = Extract(index, 2);
- Int index3 = Extract(index, 3);
-
- c.x = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index0), 16);
- c.y = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index1), 16);
- c.z = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index2), 16);
- c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index3), 16);
-
- transpose4x4(c.x, c.y, c.z, c.w);
+ break;
+ default: ASSERT(false);
}
+
+ Int4 index = Int4(i) + As<Int4>(a) * Int4(src.rel.scale);
+
+ index = Min(As<UInt4>(index), UInt4(VERTEX_UNIFORM_VECTORS)); // Clamp to constant register range, c[VERTEX_UNIFORM_VECTORS] = {0, 0, 0, 0}
+
+ Int index0 = Extract(index, 0);
+ Int index1 = Extract(index, 1);
+ Int index2 = Extract(index, 2);
+ Int index3 = Extract(index, 3);
+
+ c.x = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index0), 16);
+ c.y = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index1), 16);
+ c.z = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index2), 16);
+ c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index3), 16);
+
+ transpose4x4(c.x, c.y, c.z, c.w);
}
return c;
}
- Int VertexProgram::relativeAddress(const Shader::Parameter &var, int bufferIndex)
+ Int VertexProgram::relativeAddress(const Shader::Relative &rel, int bufferIndex)
{
- ASSERT(var.rel.deterministic);
+ ASSERT(!rel.dynamic);
- if(var.rel.type == Shader::PARAMETER_TEMP)
+ if(rel.type == Shader::PARAMETER_TEMP)
{
- return As<Int>(Extract(r[var.rel.index].x, 0)) * var.rel.scale;
+ return As<Int>(Extract(r[rel.index].x, 0)) * rel.scale;
}
- else if(var.rel.type == Shader::PARAMETER_INPUT)
+ else if(rel.type == Shader::PARAMETER_INPUT)
{
- return As<Int>(Extract(v[var.rel.index].x, 0)) * var.rel.scale;
+ return As<Int>(Extract(v[rel.index].x, 0)) * rel.scale;
}
- else if(var.rel.type == Shader::PARAMETER_OUTPUT)
+ else if(rel.type == Shader::PARAMETER_OUTPUT)
{
- return As<Int>(Extract(o[var.rel.index].x, 0)) * var.rel.scale;
+ return As<Int>(Extract(o[rel.index].x, 0)) * rel.scale;
}
- else if(var.rel.type == Shader::PARAMETER_CONST)
+ else if(rel.type == Shader::PARAMETER_CONST)
{
- return *Pointer<Int>(uniformAddress(bufferIndex, var.rel.index)) * var.rel.scale;
+ return *Pointer<Int>(uniformAddress(bufferIndex, rel.index)) * rel.scale;
}
- else if(var.rel.type == Shader::PARAMETER_LOOP)
+ else if(rel.type == Shader::PARAMETER_LOOP)
{
return aL[loopDepth];
}
@@ -926,6 +951,31 @@
return 0;
}
+ Int4 VertexProgram::dynamicAddress(const Shader::Relative &rel)
+ {
+ int component = rel.swizzle & 0x03;
+ Float4 a;
+
+ switch(rel.type)
+ {
+ case Shader::PARAMETER_ADDR: a = a0[component]; break;
+ case Shader::PARAMETER_TEMP: a = r[rel.index][component]; break;
+ case Shader::PARAMETER_INPUT: a = v[rel.index][component]; break;
+ case Shader::PARAMETER_OUTPUT: a = o[rel.index][component]; break;
+ case Shader::PARAMETER_MISCTYPE:
+ switch(rel.index)
+ {
+ case Shader::InstanceIDIndex: a = As<Float>(instanceID); break;
+ case Shader::VertexIDIndex: a = As<Float4>(vertexID); break;
+ default: ASSERT(false);
+ }
+ break;
+ default: ASSERT(false);
+ }
+
+ return As<Int4>(a) * Int4(rel.scale);
+ }
+
Int4 VertexProgram::enableMask(const Shader::Instruction *instruction)
{
Int4 enable = instruction->analysisBranch ? Int4(enableStack[enableIndex]) : Int4(0xFFFFFFFF);
diff --git a/src/Shader/VertexProgram.hpp b/src/Shader/VertexProgram.hpp
index fcd2a93..3c4199c 100644
--- a/src/Shader/VertexProgram.hpp
+++ b/src/Shader/VertexProgram.hpp
@@ -70,8 +70,9 @@
Vector4f fetchRegister(const Src &src, unsigned int offset = 0);
Vector4f readConstant(const Src &src, unsigned int offset = 0);
RValue<Pointer<Byte>> uniformAddress(int bufferIndex, unsigned int index);
- RValue<Pointer<Byte>> uniformAddress(int bufferIndex, unsigned int index, Int& offset);
- Int relativeAddress(const Shader::Parameter &var, int bufferIndex = -1);
+ RValue<Pointer<Byte>> uniformAddress(int bufferIndex, unsigned int index, Int &offset);
+ Int relativeAddress(const Shader::Relative &rel, int bufferIndex = -1);
+ Int4 dynamicAddress(const Shader::Relative &rel);
Int4 enableMask(const Shader::Instruction *instruction);
void M3X2(Vector4f &dst, Vector4f &src0, Src &src1);