John Bauman | 8940182 | 2014-05-06 15:04:28 -0400 | [diff] [blame^] | 1 | // SwiftShader Software Renderer |
| 2 | // |
| 3 | // Copyright(c) 2005-2011 TransGaming Inc. |
| 4 | // |
| 5 | // All rights reserved. No part of this software may be copied, distributed, transmitted, |
| 6 | // transcribed, stored in a retrieval system, translated into any human or computer |
| 7 | // language by any means, or disclosed to third parties without the explicit written |
| 8 | // agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express |
| 9 | // or implied, including but not limited to any patent rights, are granted to you. |
| 10 | // |
| 11 | |
| 12 | #include "QuadRasterizer.hpp" |
| 13 | |
| 14 | #include "Math.hpp" |
| 15 | #include "Primitive.hpp" |
| 16 | #include "Renderer.hpp" |
| 17 | #include "Constants.hpp" |
| 18 | #include "Debug.hpp" |
| 19 | |
| 20 | namespace sw |
| 21 | { |
| 22 | extern bool veryEarlyDepthTest; |
| 23 | extern bool complementaryDepthBuffer; |
| 24 | |
| 25 | extern int clusterCount; |
| 26 | |
| 27 | QuadRasterizer::QuadRasterizer(const PixelProcessor::State &state, const PixelShader *pixelShader) : PixelRoutine(state, pixelShader) |
| 28 | { |
| 29 | } |
| 30 | |
| 31 | QuadRasterizer::~QuadRasterizer() |
| 32 | { |
| 33 | } |
| 34 | |
| 35 | void QuadRasterizer::generate() |
| 36 | { |
| 37 | Function<Void, Pointer<Byte>, Int, Int, Pointer<Byte>> function; |
| 38 | { |
| 39 | #if PERF_PROFILE |
| 40 | Long pixelTime = Ticks(); |
| 41 | #endif |
| 42 | |
| 43 | Pointer<Byte> primitive(function.arg(0)); |
| 44 | Int count(function.arg(1)); |
| 45 | Int cluster(function.arg(2)); |
| 46 | Pointer<Byte> data(function.arg(3)); |
| 47 | |
| 48 | Registers r; |
| 49 | r.constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants)); |
| 50 | r.cluster = cluster; |
| 51 | r.data = data; |
| 52 | |
| 53 | Do |
| 54 | { |
| 55 | r.primitive = primitive; |
| 56 | |
| 57 | Int yMin = *Pointer<Int>(primitive + OFFSET(Primitive,yMin)); |
| 58 | Int yMax = *Pointer<Int>(primitive + OFFSET(Primitive,yMax)); |
| 59 | |
| 60 | Int cluster2 = r.cluster + r.cluster; |
| 61 | yMin += clusterCount * 2 - 2 - cluster2; |
| 62 | yMin &= -clusterCount * 2; |
| 63 | yMin += cluster2; |
| 64 | |
| 65 | If(yMin < yMax) |
| 66 | { |
| 67 | rasterize(r, yMin, yMax); |
| 68 | } |
| 69 | |
| 70 | primitive += sizeof(Primitive) * state.multiSample; |
| 71 | count--; |
| 72 | } |
| 73 | Until(count == 0) |
| 74 | |
| 75 | if(state.occlusionEnabled) |
| 76 | { |
| 77 | UInt clusterOcclusion = *Pointer<UInt>(data + OFFSET(DrawData,occlusion) + 4 * cluster); |
| 78 | clusterOcclusion += r.occlusion; |
| 79 | *Pointer<UInt>(data + OFFSET(DrawData,occlusion) + 4 * cluster) = clusterOcclusion; |
| 80 | } |
| 81 | |
| 82 | #if PERF_PROFILE |
| 83 | r.cycles[PERF_PIXEL] = Ticks() - pixelTime; |
| 84 | |
| 85 | for(int i = 0; i < PERF_TIMERS; i++) |
| 86 | { |
| 87 | *Pointer<Long>(data + OFFSET(DrawData,cycles[i]) + 8 * cluster) += r.cycles[i]; |
| 88 | } |
| 89 | #endif |
| 90 | |
| 91 | Return(); |
| 92 | } |
| 93 | |
| 94 | routine = function(L"PixelRoutine_%0.16llX", state.shaderHash); |
| 95 | } |
| 96 | |
| 97 | void QuadRasterizer::rasterize(Registers &r, Int &yMin, Int &yMax) |
| 98 | { |
| 99 | Pointer<Byte> cBuffer[4]; |
| 100 | Pointer<Byte> zBuffer; |
| 101 | Pointer<Byte> sBuffer; |
| 102 | |
| 103 | for(int index = 0; index < 4; index++) |
| 104 | { |
| 105 | if(state.colorWriteActive(index)) |
| 106 | { |
| 107 | cBuffer[index] = *Pointer<Pointer<Byte>>(r.data + OFFSET(DrawData,colorBuffer[index])) + yMin * *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index])); |
| 108 | } |
| 109 | } |
| 110 | |
| 111 | if(state.depthTestActive) |
| 112 | { |
| 113 | zBuffer = *Pointer<Pointer<Byte>>(r.data + OFFSET(DrawData,depthBuffer)) + yMin * *Pointer<Int>(r.data + OFFSET(DrawData,depthPitchB)); |
| 114 | } |
| 115 | |
| 116 | if(state.stencilActive) |
| 117 | { |
| 118 | sBuffer = *Pointer<Pointer<Byte>>(r.data + OFFSET(DrawData,stencilBuffer)) + yMin * *Pointer<Int>(r.data + OFFSET(DrawData,stencilPitchB)); |
| 119 | } |
| 120 | |
| 121 | Int y = yMin; |
| 122 | |
| 123 | Do |
| 124 | { |
| 125 | Int x0; |
| 126 | Int x1; |
| 127 | Int x2; |
| 128 | |
| 129 | x0 = Int(*Pointer<Short>(r.primitive + OFFSET(Primitive,outline->left) + (y + 0) * sizeof(Primitive::Span))); |
| 130 | x2 = Int(*Pointer<Short>(r.primitive + OFFSET(Primitive,outline->left) + (y + 1) * sizeof(Primitive::Span))); |
| 131 | x0 = IfThenElse(x0 < x2, x0, x2); |
| 132 | |
| 133 | for(unsigned int q = 1; q < state.multiSample; q++) |
| 134 | { |
| 135 | Int x0q = Int(*Pointer<Short>(r.primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->left) + (y + 0) * sizeof(Primitive::Span))); |
| 136 | Int x2q = Int(*Pointer<Short>(r.primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->left) + (y + 1) * sizeof(Primitive::Span))); |
| 137 | x0q = IfThenElse(x0q < x2q, x0q, x2q); |
| 138 | |
| 139 | x0 = IfThenElse(x0q < x0, x0q, x0); |
| 140 | } |
| 141 | |
| 142 | x0 &= 0xFFFFFFFE; |
| 143 | |
| 144 | x1 = Int(*Pointer<Short>(r.primitive + OFFSET(Primitive,outline->right) + (y + 0) * sizeof(Primitive::Span))); |
| 145 | x2 = Int(*Pointer<Short>(r.primitive + OFFSET(Primitive,outline->right) + (y + 1) * sizeof(Primitive::Span))); |
| 146 | x1 = IfThenElse(x1 > x2, x1, x2); |
| 147 | |
| 148 | for(unsigned int q = 1; q < state.multiSample; q++) |
| 149 | { |
| 150 | Int x1q = Int(*Pointer<Short>(r.primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->right) + (y + 0) * sizeof(Primitive::Span))); |
| 151 | Int x2q = Int(*Pointer<Short>(r.primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->right) + (y + 1) * sizeof(Primitive::Span))); |
| 152 | x1q = IfThenElse(x1q > x2q, x1q, x2q); |
| 153 | |
| 154 | x1 = IfThenElse(x1q > x1, x1q, x1); |
| 155 | } |
| 156 | |
| 157 | Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(r.primitive + OFFSET(Primitive,yQuad), 16); |
| 158 | |
| 159 | if(state.depthTestActive || state.pixelFogActive()) |
| 160 | { |
| 161 | for(unsigned int q = 0; q < state.multiSample; q++) |
| 162 | { |
| 163 | Float4 y = yyyy; |
| 164 | |
| 165 | if(state.multiSample > 1) |
| 166 | { |
| 167 | y -= *Pointer<Float4>(r.constants + OFFSET(Constants,Y) + q * sizeof(float4)); |
| 168 | } |
| 169 | |
| 170 | r.Dz[q] = *Pointer<Float4>(r.primitive + OFFSET(Primitive,z.C), 16) + y * *Pointer<Float4>(r.primitive + OFFSET(Primitive,z.B), 16); |
| 171 | } |
| 172 | } |
| 173 | |
| 174 | if(veryEarlyDepthTest && state.multiSample == 1) |
| 175 | { |
| 176 | if(!state.stencilActive && state.depthTestActive && (state.depthCompareMode == Context::DEPTH_LESSEQUAL || state.depthCompareMode == Context::DEPTH_LESS)) // FIXME: Both modes ok? |
| 177 | { |
| 178 | Float4 xxxx = Float4(Float(x0)) + *Pointer<Float4>(r.primitive + OFFSET(Primitive,xQuad), 16); |
| 179 | |
| 180 | Pointer<Byte> buffer; |
| 181 | Int pitch; |
| 182 | |
| 183 | if(!state.quadLayoutDepthBuffer) |
| 184 | { |
| 185 | buffer = zBuffer + 4 * x0; |
| 186 | pitch = *Pointer<Int>(r.data + OFFSET(DrawData,depthPitchB)); |
| 187 | } |
| 188 | else |
| 189 | { |
| 190 | buffer = zBuffer + 8 * x0; |
| 191 | } |
| 192 | |
| 193 | For(Int x = x0, x < x1, x += 2) |
| 194 | { |
| 195 | Float4 z = interpolate(xxxx, r.Dz[0], z, r.primitive + OFFSET(Primitive,z), false, false); |
| 196 | |
| 197 | Float4 zValue; |
| 198 | |
| 199 | if(!state.quadLayoutDepthBuffer) |
| 200 | { |
| 201 | // FIXME: Properly optimizes? |
| 202 | zValue.xy = *Pointer<Float4>(buffer); |
| 203 | zValue.zw = *Pointer<Float4>(buffer + pitch - 8); |
| 204 | } |
| 205 | else |
| 206 | { |
| 207 | zValue = *Pointer<Float4>(buffer, 16); |
| 208 | } |
| 209 | |
| 210 | Int4 zTest; |
| 211 | |
| 212 | if(complementaryDepthBuffer) |
| 213 | { |
| 214 | zTest = CmpLE(zValue, z); |
| 215 | } |
| 216 | else |
| 217 | { |
| 218 | zTest = CmpNLT(zValue, z); |
| 219 | } |
| 220 | |
| 221 | Int zMask = SignMask(zTest); |
| 222 | |
| 223 | If(zMask == 0) |
| 224 | { |
| 225 | x0 += 2; |
| 226 | } |
| 227 | Else |
| 228 | { |
| 229 | x = x1; |
| 230 | } |
| 231 | |
| 232 | xxxx += Float4(2); |
| 233 | |
| 234 | if(!state.quadLayoutDepthBuffer) |
| 235 | { |
| 236 | buffer += 8; |
| 237 | } |
| 238 | else |
| 239 | { |
| 240 | buffer += 16; |
| 241 | } |
| 242 | } |
| 243 | } |
| 244 | } |
| 245 | |
| 246 | If(x0 < x1) |
| 247 | { |
| 248 | if(state.perspective) |
| 249 | { |
| 250 | r.Dw = *Pointer<Float4>(r.primitive + OFFSET(Primitive,w.C), 16) + yyyy * *Pointer<Float4>(r.primitive + OFFSET(Primitive,w.B), 16); |
| 251 | } |
| 252 | |
| 253 | for(int interpolant = 0; interpolant < 11; interpolant++) |
| 254 | { |
| 255 | int componentCount = interpolant < 10 ? 4 : 1; // Fog only has one component |
| 256 | |
| 257 | for(int component = 0; component < componentCount; component++) |
| 258 | { |
| 259 | if(state.interpolant[interpolant].component & (1 << component)) |
| 260 | { |
| 261 | r.Dv[interpolant][component] = *Pointer<Float4>(r.primitive + OFFSET(Primitive,V[interpolant][component].C), 16); |
| 262 | |
| 263 | if(!(state.interpolant[interpolant].flat & (1 << component))) |
| 264 | { |
| 265 | r.Dv[interpolant][component] += yyyy * *Pointer<Float4>(r.primitive + OFFSET(Primitive,V[interpolant][component].B), 16); |
| 266 | } |
| 267 | } |
| 268 | } |
| 269 | } |
| 270 | |
| 271 | Short4 xLeft[4]; |
| 272 | Short4 xRight[4]; |
| 273 | |
| 274 | for(unsigned int q = 0; q < state.multiSample; q++) |
| 275 | { |
| 276 | xLeft[q] = *Pointer<Short4>(r.primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline) + y * sizeof(Primitive::Span)); |
| 277 | xRight[q] = xLeft[q]; |
| 278 | |
| 279 | xLeft[q] = Swizzle(xLeft[q], 0xA0) - Short4(1, 2, 1, 2); |
| 280 | xRight[q] = Swizzle(xRight[q], 0xF5) - Short4(0, 1, 0, 1); |
| 281 | } |
| 282 | |
| 283 | For(Int x = x0, x < x1, x += 2) |
| 284 | { |
| 285 | Short4 xxxx = Short4(x); |
| 286 | Int cMask[4]; |
| 287 | |
| 288 | for(unsigned int q = 0; q < state.multiSample; q++) |
| 289 | { |
| 290 | Short4 mask = CmpGT(xxxx, xLeft[q]) & CmpGT(xRight[q], xxxx); |
| 291 | cMask[q] = SignMask(Pack(mask, mask)) & 0x0000000F; |
| 292 | } |
| 293 | |
| 294 | quad(r, cBuffer, zBuffer, sBuffer, cMask, x, y); |
| 295 | } |
| 296 | } |
| 297 | |
| 298 | for(int index = 0; index < 4; index++) |
| 299 | { |
| 300 | if(state.colorWriteActive(index)) |
| 301 | { |
| 302 | cBuffer[index] += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index])) << (1 + log2(clusterCount)); // FIXME: Precompute |
| 303 | } |
| 304 | } |
| 305 | |
| 306 | if(state.depthTestActive) |
| 307 | { |
| 308 | zBuffer += *Pointer<Int>(r.data + OFFSET(DrawData,depthPitchB)) << (1 + log2(clusterCount)); // FIXME: Precompute |
| 309 | } |
| 310 | |
| 311 | if(state.stencilActive) |
| 312 | { |
| 313 | sBuffer += *Pointer<Int>(r.data + OFFSET(DrawData,stencilPitchB)) << (1 + log2(clusterCount)); // FIXME: Precompute |
| 314 | } |
| 315 | |
| 316 | y += 2 * clusterCount; |
| 317 | } |
| 318 | Until(y >= yMax) |
| 319 | } |
| 320 | } |