blob: bee130a8a6ae513b7be90664f63c96310ce70588 [file] [log] [blame]
Cody Schuffelen124c81a2019-03-15 15:21:34 -07001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "Renderer.hpp"
16
17#include "Clipper.hpp"
18#include "Surface.hpp"
19#include "Primitive.hpp"
20#include "Polygon.hpp"
21#include "WSI/FrameBuffer.hpp"
22#include "Device/SwiftConfig.hpp"
23#include "Reactor/Reactor.hpp"
24#include "Pipeline/Constants.hpp"
25#include "System/MutexLock.hpp"
26#include "System/CPUID.hpp"
27#include "System/Memory.hpp"
28#include "System/Resource.hpp"
29#include "System/Half.hpp"
30#include "System/Math.hpp"
31#include "System/Timer.hpp"
32#include "Vulkan/VkDebug.hpp"
33
34#undef max
35
36bool disableServer = true;
37
38#ifndef NDEBUG
39unsigned int minPrimitives = 1;
40unsigned int maxPrimitives = 1 << 21;
41#endif
42
43namespace sw
44{
45 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates
46 extern bool booleanFaceRegister;
47 extern bool fullPixelPositionRegister;
48 extern bool leadingVertexFirst; // Flat shading uses first vertex, else last
49 extern bool secondaryColor; // Specular lighting is applied after texturing
50 extern bool colorsDefaultToZero;
51
52 extern bool forceWindowed;
53 extern bool complementaryDepthBuffer;
54 extern bool postBlendSRGB;
55 extern bool exactColorRounding;
56 extern TransparencyAntialiasing transparencyAntialiasing;
57 extern bool forceClearRegisters;
58
59 extern bool precacheVertex;
60 extern bool precacheSetup;
61 extern bool precachePixel;
62
63 static const int batchSize = 128;
64 AtomicInt threadCount(1);
65 AtomicInt Renderer::unitCount(1);
66 AtomicInt Renderer::clusterCount(1);
67
68 TranscendentalPrecision logPrecision = ACCURATE;
69 TranscendentalPrecision expPrecision = ACCURATE;
70 TranscendentalPrecision rcpPrecision = ACCURATE;
71 TranscendentalPrecision rsqPrecision = ACCURATE;
72 bool perspectiveCorrection = true;
73
74 static void setGlobalRenderingSettings(Conventions conventions, bool exactColorRounding)
75 {
76 static bool initialized = false;
77
78 if(!initialized)
79 {
80 sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
81 sw::booleanFaceRegister = conventions.booleanFaceRegister;
82 sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
83 sw::leadingVertexFirst = conventions.leadingVertexFirst;
84 sw::secondaryColor = conventions.secondaryColor;
85 sw::colorsDefaultToZero = conventions.colorsDefaultToZero;
86 sw::exactColorRounding = exactColorRounding;
87 initialized = true;
88 }
89 }
90
91 struct Parameters
92 {
93 Renderer *renderer;
94 int threadIndex;
95 };
96
97 DrawCall::DrawCall()
98 {
99 queries = 0;
100
101 vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
102 vsDirtyConstI = 16;
103 vsDirtyConstB = 16;
104
105 psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
106 psDirtyConstI = 16;
107 psDirtyConstB = 16;
108
109 references = -1;
110
111 data = (DrawData*)allocate(sizeof(DrawData));
112 data->constants = &constants;
113 }
114
115 DrawCall::~DrawCall()
116 {
117 delete queries;
118
119 deallocate(data);
120 }
121
122 Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport()
123 {
124 setGlobalRenderingSettings(conventions, exactColorRounding);
125
126 setRenderTarget(0, nullptr);
127 clipper = new Clipper;
128 blitter = new Blitter;
129
130 updateClipPlanes = true;
131
132 #if PERF_HUD
133 resetTimers();
134 #endif
135
136 for(int i = 0; i < 16; i++)
137 {
138 vertexTask[i] = nullptr;
139
140 worker[i] = nullptr;
141 resume[i] = nullptr;
142 suspend[i] = nullptr;
143 }
144
145 threadsAwake = 0;
146 resumeApp = new Event();
147
148 currentDraw = 0;
149 nextDraw = 0;
150
151 qHead = 0;
152 qSize = 0;
153
154 for(int i = 0; i < 16; i++)
155 {
156 triangleBatch[i] = nullptr;
157 primitiveBatch[i] = nullptr;
158 }
159
160 for(int draw = 0; draw < DRAW_COUNT; draw++)
161 {
162 drawCall[draw] = new DrawCall();
163 drawList[draw] = drawCall[draw];
164 }
165
166 for(int unit = 0; unit < 16; unit++)
167 {
168 primitiveProgress[unit].init();
169 }
170
171 for(int cluster = 0; cluster < 16; cluster++)
172 {
173 pixelProgress[cluster].init();
174 }
175
176 clipFlags = 0;
177
178 swiftConfig = new SwiftConfig(disableServer);
179 updateConfiguration(true);
180
181 sync = new Resource(0);
182 }
183
184 Renderer::~Renderer()
185 {
186 sync->destruct();
187
188 delete clipper;
189 clipper = nullptr;
190
191 delete blitter;
192 blitter = nullptr;
193
194 terminateThreads();
195 delete resumeApp;
196
197 for(int draw = 0; draw < DRAW_COUNT; draw++)
198 {
199 delete drawCall[draw];
200 }
201
202 delete swiftConfig;
203 }
204
205 // This object has to be mem aligned
206 void* Renderer::operator new(size_t size)
207 {
208 ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class
209 return sw::allocate(sizeof(Renderer), 16);
210 }
211
212 void Renderer::operator delete(void * mem)
213 {
214 sw::deallocate(mem);
215 }
216
217 void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
218 {
219 #ifndef NDEBUG
220 if(count < minPrimitives || count > maxPrimitives)
221 {
222 return;
223 }
224 #endif
225
226 context->drawType = drawType;
227
228 updateConfiguration();
229 updateClipper();
230
231 int ms = context->getMultiSampleCount();
232 unsigned int oldMultiSampleMask = context->multiSampleMask;
233 context->multiSampleMask = context->sampleMask & ((unsigned)0xFFFFFFFF >> (32 - ms));
234
235 if(!context->multiSampleMask)
236 {
237 return;
238 }
239
240 sync->lock(sw::PRIVATE);
241
242 if(update || oldMultiSampleMask != context->multiSampleMask)
243 {
244 vertexState = VertexProcessor::update(drawType);
245 setupState = SetupProcessor::update();
246 pixelState = PixelProcessor::update();
247
248 vertexRoutine = VertexProcessor::routine(vertexState);
249 setupRoutine = SetupProcessor::routine(setupState);
250 pixelRoutine = PixelProcessor::routine(pixelState);
251 }
252
253 int batch = batchSize / ms;
254
255 int (Renderer::*setupPrimitives)(int batch, int count);
256
257 if(context->isDrawTriangle())
258 {
259 setupPrimitives = &Renderer::setupTriangles;
260 }
261 else if(context->isDrawLine())
262 {
263 setupPrimitives = &Renderer::setupLines;
264 }
265 else // Point draw
266 {
267 setupPrimitives = &Renderer::setupPoints;
268 }
269
270 DrawCall *draw = nullptr;
271
272 do
273 {
274 for(int i = 0; i < DRAW_COUNT; i++)
275 {
276 if(drawCall[i]->references == -1)
277 {
278 draw = drawCall[i];
279 drawList[nextDraw & DRAW_COUNT_BITS] = draw;
280
281 break;
282 }
283 }
284
285 if(!draw)
286 {
287 resumeApp->wait();
288 }
289 }
290 while(!draw);
291
292 DrawData *data = draw->data;
293
294 if(queries.size() != 0)
295 {
296 draw->queries = new std::list<Query*>();
297 bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled;
298 for(auto &query : queries)
299 {
300 if(includePrimitivesWrittenQueries || (query->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
301 {
302 ++query->reference; // Atomic
303 draw->queries->push_back(query);
304 }
305 }
306 }
307
308 draw->drawType = drawType;
309 draw->batchSize = batch;
310
311 vertexRoutine->bind();
312 setupRoutine->bind();
313 pixelRoutine->bind();
314
315 draw->vertexRoutine = vertexRoutine;
316 draw->setupRoutine = setupRoutine;
317 draw->pixelRoutine = pixelRoutine;
318 draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
319 draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
320 draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
321 draw->setupPrimitives = setupPrimitives;
322 draw->setupState = setupState;
323
324 for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
325 {
326 draw->vertexStream[i] = context->input[i].resource;
327 data->input[i] = context->input[i].buffer;
328 data->stride[i] = context->input[i].stride;
329
330 if(draw->vertexStream[i])
331 {
332 draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
333 }
334 }
335
336 if(context->indexBuffer)
337 {
338 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
339 }
340
341 draw->indexBuffer = context->indexBuffer;
342
343 for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
344 {
345 draw->texture[sampler] = 0;
346 }
347
348 for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
349 {
350 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
351 {
352 draw->texture[sampler] = context->texture[sampler];
353 draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE); // If the texure is both read and written, use the same read/write lock as render targets
354
355 data->mipmap[sampler] = context->sampler[sampler].getTextureData();
356 }
357 }
358
359 if(context->pixelShader)
360 {
361 if(draw->psDirtyConstF)
362 {
363 memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
364 draw->psDirtyConstF = 0;
365 }
366
367 if(draw->psDirtyConstI)
368 {
369 memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
370 draw->psDirtyConstI = 0;
371 }
372
373 if(draw->psDirtyConstB)
374 {
375 memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
376 draw->psDirtyConstB = 0;
377 }
378
379 PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers);
380 }
381 else
382 {
383 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
384 {
385 draw->pUniformBuffers[i] = nullptr;
386 }
387 }
388
389 for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
390 {
391 if(vertexState.sampler[sampler].textureType != TEXTURE_NULL)
392 {
393 draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
394 draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
395
396 data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
397 }
398 }
399
400 if(draw->vsDirtyConstF)
401 {
402 memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
403 draw->vsDirtyConstF = 0;
404 }
405
406 if(draw->vsDirtyConstI)
407 {
408 memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
409 draw->vsDirtyConstI = 0;
410 }
411
412 if(draw->vsDirtyConstB)
413 {
414 memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
415 draw->vsDirtyConstB = 0;
416 }
417
418 if(context->vertexShader->isInstanceIdDeclared())
419 {
420 data->instanceID = context->instanceID;
421 }
422
423 VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers);
424 VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers);
425
426 if(pixelState.stencilActive)
427 {
428 data->stencil[0] = stencil;
429 data->stencil[1] = stencilCCW;
430 }
431
432 if(setupState.isDrawPoint)
433 {
434 data->pointSizeMin = pointSizeMin;
435 data->pointSizeMax = pointSizeMax;
436 }
437
438 data->lineWidth = context->lineWidth;
439
440 data->factor = factor;
441
442 if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
443 {
444 float ref = context->alphaReference * (1.0f / 255.0f);
445 float margin = sw::min(ref, 1.0f - ref);
446
447 if(ms == 4)
448 {
449 data->a2c0 = replicate(ref - margin * 0.6f);
450 data->a2c1 = replicate(ref - margin * 0.2f);
451 data->a2c2 = replicate(ref + margin * 0.2f);
452 data->a2c3 = replicate(ref + margin * 0.6f);
453 }
454 else if(ms == 2)
455 {
456 data->a2c0 = replicate(ref - margin * 0.3f);
457 data->a2c1 = replicate(ref + margin * 0.3f);
458 }
459 else ASSERT(false);
460 }
461
462 if(pixelState.occlusionEnabled)
463 {
464 for(int cluster = 0; cluster < clusterCount; cluster++)
465 {
466 data->occlusion[cluster] = 0;
467 }
468 }
469
470 #if PERF_PROFILE
471 for(int cluster = 0; cluster < clusterCount; cluster++)
472 {
473 for(int i = 0; i < PERF_TIMERS; i++)
474 {
475 data->cycles[i][cluster] = 0;
476 }
477 }
478 #endif
479
480 // Viewport
481 {
482 float W = 0.5f * viewport.width;
483 float H = 0.5f * viewport.height;
484 float X0 = viewport.x + W;
485 float Y0 = viewport.y + H;
486 float N = viewport.minDepth;
487 float F = viewport.maxDepth;
488 float Z = F - N;
489
490 if(context->isDrawTriangle())
491 {
492 N += context->depthBias;
493 }
494
495 if(complementaryDepthBuffer)
496 {
497 Z = -Z;
498 N = 1 - N;
499 }
500
501 data->Wx16 = replicate(W * 16);
502 data->Hx16 = replicate(H * 16);
503 data->X0x16 = replicate(X0 * 16 - 8);
504 data->Y0x16 = replicate(Y0 * 16 - 8);
505 data->halfPixelX = replicate(0.5f / W);
506 data->halfPixelY = replicate(0.5f / H);
507 data->viewportHeight = abs(viewport.height);
508 data->slopeDepthBias = context->slopeDepthBias;
509 data->depthRange = Z;
510 data->depthNear = N;
511 draw->clipFlags = clipFlags;
512
513 if(clipFlags)
514 {
515 if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
516 if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
517 if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
518 if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
519 if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
520 if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
521 }
522 }
523
524 // Target
525 {
526 for(int index = 0; index < RENDERTARGETS; index++)
527 {
528 draw->renderTarget[index] = context->renderTarget[index];
529
530 if(draw->renderTarget[index])
531 {
532 unsigned int layer = context->renderTargetLayer[index];
533 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, layer, LOCK_READWRITE, MANAGED);
534 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
535 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
536 }
537 }
538
539 draw->depthBuffer = context->depthBuffer;
540 draw->stencilBuffer = context->stencilBuffer;
541
542 if(draw->depthBuffer)
543 {
544 unsigned int layer = context->depthBufferLayer;
545 data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, layer, LOCK_READWRITE, MANAGED);
546 data->depthPitchB = context->depthBuffer->getInternalPitchB();
547 data->depthSliceB = context->depthBuffer->getInternalSliceB();
548 }
549
550 if(draw->stencilBuffer)
551 {
552 unsigned int layer = context->stencilBufferLayer;
553 data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(0, 0, layer, MANAGED);
554 data->stencilPitchB = context->stencilBuffer->getStencilPitchB();
555 data->stencilSliceB = context->stencilBuffer->getStencilSliceB();
556 }
557 }
558
559 // Scissor
560 {
561 data->scissorX0 = scissor.x0;
562 data->scissorX1 = scissor.x1;
563 data->scissorY0 = scissor.y0;
564 data->scissorY1 = scissor.y1;
565 }
566
567 draw->primitive = 0;
568 draw->count = count;
569
570 draw->references = (count + batch - 1) / batch;
571
572 schedulerMutex.lock();
573 ++nextDraw; // Atomic
574 schedulerMutex.unlock();
575
576 #ifndef NDEBUG
577 if(threadCount == 1) // Use main thread for draw execution
578 {
579 threadsAwake = 1;
580 task[0].type = Task::RESUME;
581
582 taskLoop(0);
583 }
584 else
585 #endif
586 {
587 if(!threadsAwake)
588 {
589 suspend[0]->wait();
590
591 threadsAwake = 1;
592 task[0].type = Task::RESUME;
593
594 resume[0]->signal();
595 }
596 }
597 }
598
599 void Renderer::clear(void *value, VkFormat format, Surface *dest, const Rect &clearRect, unsigned int rgbaMask)
600 {
601 blitter->clear(value, format, dest, clearRect, rgbaMask);
602 }
603
604 void Renderer::blit(Surface *source, const SliceRectF &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil, bool sRGBconversion)
605 {
606 blitter->blit(source, sRect, dest, dRect, {filter, isStencil, sRGBconversion});
607 }
608
609 void Renderer::blit3D(Surface *source, Surface *dest)
610 {
611 blitter->blit3D(source, dest);
612 }
613
614 void Renderer::threadFunction(void *parameters)
615 {
616 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
617 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
618
619 if(logPrecision < IEEE)
620 {
621 CPUID::setFlushToZero(true);
622 CPUID::setDenormalsAreZero(true);
623 }
624
625 renderer->threadLoop(threadIndex);
626 }
627
628 void Renderer::threadLoop(int threadIndex)
629 {
630 while(!exitThreads)
631 {
632 taskLoop(threadIndex);
633
634 suspend[threadIndex]->signal();
635 resume[threadIndex]->wait();
636 }
637 }
638
639 void Renderer::taskLoop(int threadIndex)
640 {
641 while(task[threadIndex].type != Task::SUSPEND)
642 {
643 scheduleTask(threadIndex);
644 executeTask(threadIndex);
645 }
646 }
647
648 void Renderer::findAvailableTasks()
649 {
650 // Find pixel tasks
651 for(int cluster = 0; cluster < clusterCount; cluster++)
652 {
653 if(!pixelProgress[cluster].executing)
654 {
655 for(int unit = 0; unit < unitCount; unit++)
656 {
657 if(primitiveProgress[unit].references > 0) // Contains processed primitives
658 {
659 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
660 {
661 if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive) // Previous primitives have been rendered
662 {
663 Task &task = taskQueue[qHead];
664 task.type = Task::PIXELS;
665 task.primitiveUnit = unit;
666 task.pixelCluster = cluster;
667
668 pixelProgress[cluster].executing = true;
669
670 // Commit to the task queue
671 qHead = (qHead + 1) & TASK_COUNT_BITS;
672 qSize++;
673
674 break;
675 }
676 }
677 }
678 }
679 }
680 }
681
682 // Find primitive tasks
683 if(currentDraw == nextDraw)
684 {
685 return; // No more primitives to process
686 }
687
688 for(int unit = 0; unit < unitCount; unit++)
689 {
690 DrawCall *draw = drawList[currentDraw & DRAW_COUNT_BITS];
691
692 int primitive = draw->primitive;
693 int count = draw->count;
694
695 if(primitive >= count)
696 {
697 ++currentDraw; // Atomic
698
699 if(currentDraw == nextDraw)
700 {
701 return; // No more primitives to process
702 }
703
704 draw = drawList[currentDraw & DRAW_COUNT_BITS];
705 }
706
707 if(!primitiveProgress[unit].references) // Task not already being executed and not still in use by a pixel unit
708 {
709 primitive = draw->primitive;
710 count = draw->count;
711 int batch = draw->batchSize;
712
713 primitiveProgress[unit].drawCall = currentDraw;
714 primitiveProgress[unit].firstPrimitive = primitive;
715 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
716
717 draw->primitive += batch;
718
719 Task &task = taskQueue[qHead];
720 task.type = Task::PRIMITIVES;
721 task.primitiveUnit = unit;
722
723 primitiveProgress[unit].references = -1;
724
725 // Commit to the task queue
726 qHead = (qHead + 1) & TASK_COUNT_BITS;
727 qSize++;
728 }
729 }
730 }
731
732 void Renderer::scheduleTask(int threadIndex)
733 {
734 schedulerMutex.lock();
735
736 int curThreadsAwake = threadsAwake;
737
738 if((int)qSize < threadCount - curThreadsAwake + 1)
739 {
740 findAvailableTasks();
741 }
742
743 if(qSize != 0)
744 {
745 task[threadIndex] = taskQueue[(qHead - qSize) & TASK_COUNT_BITS];
746 qSize--;
747
748 if(curThreadsAwake != threadCount)
749 {
750 int wakeup = qSize - curThreadsAwake + 1;
751
752 for(int i = 0; i < threadCount && wakeup > 0; i++)
753 {
754 if(task[i].type == Task::SUSPEND)
755 {
756 suspend[i]->wait();
757 task[i].type = Task::RESUME;
758 resume[i]->signal();
759
760 ++threadsAwake; // Atomic
761 wakeup--;
762 }
763 }
764 }
765 }
766 else
767 {
768 task[threadIndex].type = Task::SUSPEND;
769
770 --threadsAwake; // Atomic
771 }
772
773 schedulerMutex.unlock();
774 }
775
776 void Renderer::executeTask(int threadIndex)
777 {
778 #if PERF_HUD
779 int64_t startTick = Timer::ticks();
780 #endif
781
782 switch(task[threadIndex].type)
783 {
784 case Task::PRIMITIVES:
785 {
786 int unit = task[threadIndex].primitiveUnit;
787
788 int input = primitiveProgress[unit].firstPrimitive;
789 int count = primitiveProgress[unit].primitiveCount;
790 DrawCall *draw = drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
791 int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives;
792
793 processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
794
795 #if PERF_HUD
796 int64_t time = Timer::ticks();
797 vertexTime[threadIndex] += time - startTick;
798 startTick = time;
799 #endif
800
801 int visible = 0;
802
803 if(!draw->setupState.rasterizerDiscard)
804 {
805 visible = (this->*setupPrimitives)(unit, count);
806 }
807
808 primitiveProgress[unit].visible = visible;
809 primitiveProgress[unit].references = clusterCount;
810
811 #if PERF_HUD
812 setupTime[threadIndex] += Timer::ticks() - startTick;
813 #endif
814 }
815 break;
816 case Task::PIXELS:
817 {
818 int unit = task[threadIndex].primitiveUnit;
819 int visible = primitiveProgress[unit].visible;
820
821 if(visible > 0)
822 {
823 int cluster = task[threadIndex].pixelCluster;
824 Primitive *primitive = primitiveBatch[unit];
825 DrawCall *draw = drawList[pixelProgress[cluster].drawCall & DRAW_COUNT_BITS];
826 DrawData *data = draw->data;
827 PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
828
829 pixelRoutine(primitive, visible, cluster, data);
830 }
831
832 finishRendering(task[threadIndex]);
833
834 #if PERF_HUD
835 pixelTime[threadIndex] += Timer::ticks() - startTick;
836 #endif
837 }
838 break;
839 case Task::RESUME:
840 break;
841 case Task::SUSPEND:
842 break;
843 default:
844 ASSERT(false);
845 }
846 }
847
848 void Renderer::synchronize()
849 {
850 sync->lock(sw::PUBLIC);
851 sync->unlock();
852 }
853
854 void Renderer::finishRendering(Task &pixelTask)
855 {
856 int unit = pixelTask.primitiveUnit;
857 int cluster = pixelTask.pixelCluster;
858
859 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
860 DrawData &data = *draw.data;
861 int primitive = primitiveProgress[unit].firstPrimitive;
862 int count = primitiveProgress[unit].primitiveCount;
863 int processedPrimitives = primitive + count;
864
865 pixelProgress[cluster].processedPrimitives = processedPrimitives;
866
867 if(pixelProgress[cluster].processedPrimitives >= draw.count)
868 {
869 ++pixelProgress[cluster].drawCall; // Atomic
870 pixelProgress[cluster].processedPrimitives = 0;
871 }
872
873 int ref = primitiveProgress[unit].references--; // Atomic
874
875 if(ref == 0)
876 {
877 ref = draw.references--; // Atomic
878
879 if(ref == 0)
880 {
881 #if PERF_PROFILE
882 for(int cluster = 0; cluster < clusterCount; cluster++)
883 {
884 for(int i = 0; i < PERF_TIMERS; i++)
885 {
886 profiler.cycles[i] += data.cycles[i][cluster];
887 }
888 }
889 #endif
890
891 if(draw.queries)
892 {
893 for(auto &query : *(draw.queries))
894 {
895 switch(query->type)
896 {
897 case Query::FRAGMENTS_PASSED:
898 for(int cluster = 0; cluster < clusterCount; cluster++)
899 {
900 query->data += data.occlusion[cluster];
901 }
902 break;
903 case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
904 query->data += processedPrimitives;
905 break;
906 default:
907 break;
908 }
909
910 --query->reference; // Atomic
911 }
912
913 delete draw.queries;
914 draw.queries = 0;
915 }
916
917 for(int i = 0; i < RENDERTARGETS; i++)
918 {
919 if(draw.renderTarget[i])
920 {
921 draw.renderTarget[i]->unlockInternal();
922 }
923 }
924
925 if(draw.depthBuffer)
926 {
927 draw.depthBuffer->unlockInternal();
928 }
929
930 if(draw.stencilBuffer)
931 {
932 draw.stencilBuffer->unlockStencil();
933 }
934
935 for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
936 {
937 if(draw.texture[i])
938 {
939 draw.texture[i]->unlock();
940 }
941 }
942
943 for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
944 {
945 if(draw.vertexStream[i])
946 {
947 draw.vertexStream[i]->unlock();
948 }
949 }
950
951 if(draw.indexBuffer)
952 {
953 draw.indexBuffer->unlock();
954 }
955
956 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
957 {
958 if(draw.pUniformBuffers[i])
959 {
960 draw.pUniformBuffers[i]->unlock();
961 }
962 if(draw.vUniformBuffers[i])
963 {
964 draw.vUniformBuffers[i]->unlock();
965 }
966 }
967
968 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
969 {
970 if(draw.transformFeedbackBuffers[i])
971 {
972 draw.transformFeedbackBuffers[i]->unlock();
973 }
974 }
975
976 draw.vertexRoutine->unbind();
977 draw.setupRoutine->unbind();
978 draw.pixelRoutine->unbind();
979
980 sync->unlock();
981
982 draw.references = -1;
983 resumeApp->signal();
984 }
985 }
986
987 pixelProgress[cluster].executing = false;
988 }
989
990 void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
991 {
992 Triangle *triangle = triangleBatch[unit];
993 int primitiveDrawCall = primitiveProgress[unit].drawCall;
994 DrawCall *draw = drawList[primitiveDrawCall & DRAW_COUNT_BITS];
995 DrawData *data = draw->data;
996 VertexTask *task = vertexTask[thread];
997
998 const void *indices = data->indices;
999 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
1000
1001 if(task->vertexCache.drawCall != primitiveDrawCall)
1002 {
1003 task->vertexCache.clear();
1004 task->vertexCache.drawCall = primitiveDrawCall;
1005 }
1006
1007 unsigned int batch[128][3]; // FIXME: Adjust to dynamic batch size
1008
1009 switch(draw->drawType)
1010 {
1011 case DRAW_POINTLIST:
1012 {
1013 unsigned int index = start;
1014
1015 for(unsigned int i = 0; i < triangleCount; i++)
1016 {
1017 batch[i][0] = index;
1018 batch[i][1] = index;
1019 batch[i][2] = index;
1020
1021 index += 1;
1022 }
1023 }
1024 break;
1025 case DRAW_LINELIST:
1026 {
1027 unsigned int index = 2 * start;
1028
1029 for(unsigned int i = 0; i < triangleCount; i++)
1030 {
1031 batch[i][0] = index + 0;
1032 batch[i][1] = index + 1;
1033 batch[i][2] = index + 1;
1034
1035 index += 2;
1036 }
1037 }
1038 break;
1039 case DRAW_LINESTRIP:
1040 {
1041 unsigned int index = start;
1042
1043 for(unsigned int i = 0; i < triangleCount; i++)
1044 {
1045 batch[i][0] = index + 0;
1046 batch[i][1] = index + 1;
1047 batch[i][2] = index + 1;
1048
1049 index += 1;
1050 }
1051 }
1052 break;
1053 case DRAW_TRIANGLELIST:
1054 {
1055 unsigned int index = 3 * start;
1056
1057 for(unsigned int i = 0; i < triangleCount; i++)
1058 {
1059 batch[i][0] = index + 0;
1060 batch[i][1] = index + 1;
1061 batch[i][2] = index + 2;
1062
1063 index += 3;
1064 }
1065 }
1066 break;
1067 case DRAW_TRIANGLESTRIP:
1068 {
1069 unsigned int index = start;
1070
1071 for(unsigned int i = 0; i < triangleCount; i++)
1072 {
1073 if(leadingVertexFirst)
1074 {
1075 batch[i][0] = index + 0;
1076 batch[i][1] = index + (index & 1) + 1;
1077 batch[i][2] = index + (~index & 1) + 1;
1078 }
1079 else
1080 {
1081 batch[i][0] = index + (index & 1);
1082 batch[i][1] = index + (~index & 1);
1083 batch[i][2] = index + 2;
1084 }
1085
1086 index += 1;
1087 }
1088 }
1089 break;
1090 case DRAW_TRIANGLEFAN:
1091 {
1092 unsigned int index = start;
1093
1094 for(unsigned int i = 0; i < triangleCount; i++)
1095 {
1096 if(leadingVertexFirst)
1097 {
1098 batch[i][0] = index + 1;
1099 batch[i][1] = index + 2;
1100 batch[i][2] = 0;
1101 }
1102 else
1103 {
1104 batch[i][0] = 0;
1105 batch[i][1] = index + 1;
1106 batch[i][2] = index + 2;
1107 }
1108
1109 index += 1;
1110 }
1111 }
1112 break;
1113 case DRAW_INDEXEDPOINTLIST16:
1114 {
1115 const unsigned short *index = (const unsigned short*)indices + start;
1116
1117 for(unsigned int i = 0; i < triangleCount; i++)
1118 {
1119 batch[i][0] = *index;
1120 batch[i][1] = *index;
1121 batch[i][2] = *index;
1122
1123 index += 1;
1124 }
1125 }
1126 break;
1127 case DRAW_INDEXEDPOINTLIST32:
1128 {
1129 const unsigned int *index = (const unsigned int*)indices + start;
1130
1131 for(unsigned int i = 0; i < triangleCount; i++)
1132 {
1133 batch[i][0] = *index;
1134 batch[i][1] = *index;
1135 batch[i][2] = *index;
1136
1137 index += 1;
1138 }
1139 }
1140 break;
1141 case DRAW_INDEXEDLINELIST16:
1142 {
1143 const unsigned short *index = (const unsigned short*)indices + 2 * start;
1144
1145 for(unsigned int i = 0; i < triangleCount; i++)
1146 {
1147 batch[i][0] = index[0];
1148 batch[i][1] = index[1];
1149 batch[i][2] = index[1];
1150
1151 index += 2;
1152 }
1153 }
1154 break;
1155 case DRAW_INDEXEDLINELIST32:
1156 {
1157 const unsigned int *index = (const unsigned int*)indices + 2 * start;
1158
1159 for(unsigned int i = 0; i < triangleCount; i++)
1160 {
1161 batch[i][0] = index[0];
1162 batch[i][1] = index[1];
1163 batch[i][2] = index[1];
1164
1165 index += 2;
1166 }
1167 }
1168 break;
1169 case DRAW_INDEXEDLINESTRIP16:
1170 {
1171 const unsigned short *index = (const unsigned short*)indices + start;
1172
1173 for(unsigned int i = 0; i < triangleCount; i++)
1174 {
1175 batch[i][0] = index[0];
1176 batch[i][1] = index[1];
1177 batch[i][2] = index[1];
1178
1179 index += 1;
1180 }
1181 }
1182 break;
1183 case DRAW_INDEXEDLINESTRIP32:
1184 {
1185 const unsigned int *index = (const unsigned int*)indices + start;
1186
1187 for(unsigned int i = 0; i < triangleCount; i++)
1188 {
1189 batch[i][0] = index[0];
1190 batch[i][1] = index[1];
1191 batch[i][2] = index[1];
1192
1193 index += 1;
1194 }
1195 }
1196 break;
1197 case DRAW_INDEXEDTRIANGLELIST16:
1198 {
1199 const unsigned short *index = (const unsigned short*)indices + 3 * start;
1200
1201 for(unsigned int i = 0; i < triangleCount; i++)
1202 {
1203 batch[i][0] = index[0];
1204 batch[i][1] = index[1];
1205 batch[i][2] = index[2];
1206
1207 index += 3;
1208 }
1209 }
1210 break;
1211 case DRAW_INDEXEDTRIANGLELIST32:
1212 {
1213 const unsigned int *index = (const unsigned int*)indices + 3 * start;
1214
1215 for(unsigned int i = 0; i < triangleCount; i++)
1216 {
1217 batch[i][0] = index[0];
1218 batch[i][1] = index[1];
1219 batch[i][2] = index[2];
1220
1221 index += 3;
1222 }
1223 }
1224 break;
1225 case DRAW_INDEXEDTRIANGLESTRIP16:
1226 {
1227 const unsigned short *index = (const unsigned short*)indices + start;
1228
1229 for(unsigned int i = 0; i < triangleCount; i++)
1230 {
1231 batch[i][0] = index[0];
1232 batch[i][1] = index[((start + i) & 1) + 1];
1233 batch[i][2] = index[(~(start + i) & 1) + 1];
1234
1235 index += 1;
1236 }
1237 }
1238 break;
1239 case DRAW_INDEXEDTRIANGLESTRIP32:
1240 {
1241 const unsigned int *index = (const unsigned int*)indices + start;
1242
1243 for(unsigned int i = 0; i < triangleCount; i++)
1244 {
1245 batch[i][0] = index[0];
1246 batch[i][1] = index[((start + i) & 1) + 1];
1247 batch[i][2] = index[(~(start + i) & 1) + 1];
1248
1249 index += 1;
1250 }
1251 }
1252 break;
1253 case DRAW_INDEXEDTRIANGLEFAN16:
1254 {
1255 const unsigned short *index = (const unsigned short*)indices;
1256
1257 for(unsigned int i = 0; i < triangleCount; i++)
1258 {
1259 batch[i][0] = index[start + i + 1];
1260 batch[i][1] = index[start + i + 2];
1261 batch[i][2] = index[0];
1262 }
1263 }
1264 break;
1265 case DRAW_INDEXEDTRIANGLEFAN32:
1266 {
1267 const unsigned int *index = (const unsigned int*)indices;
1268
1269 for(unsigned int i = 0; i < triangleCount; i++)
1270 {
1271 batch[i][0] = index[start + i + 1];
1272 batch[i][1] = index[start + i + 2];
1273 batch[i][2] = index[0];
1274 }
1275 }
1276 break;
1277 default:
1278 ASSERT(false);
1279 return;
1280 }
1281
1282 task->primitiveStart = start;
1283 task->vertexCount = triangleCount * 3;
1284 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1285 }
1286
1287 int Renderer::setupTriangles(int unit, int count)
1288 {
1289 Triangle *triangle = triangleBatch[unit];
1290 Primitive *primitive = primitiveBatch[unit];
1291
1292 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
1293 SetupProcessor::State &state = draw.setupState;
1294 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1295
1296 int ms = state.multiSample;
1297 int pos = state.positionRegister;
1298 const DrawData *data = draw.data;
1299 int visible = 0;
1300
1301 for(int i = 0; i < count; i++, triangle++)
1302 {
1303 Vertex &v0 = triangle->v0;
1304 Vertex &v1 = triangle->v1;
1305 Vertex &v2 = triangle->v2;
1306
1307 if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1308 {
1309 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1310
1311 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1312
1313 if(clipFlagsOr != Clipper::CLIP_FINITE)
1314 {
1315 if(!clipper->clip(polygon, clipFlagsOr, draw))
1316 {
1317 continue;
1318 }
1319 }
1320
1321 if(setupRoutine(primitive, triangle, &polygon, data))
1322 {
1323 primitive += ms;
1324 visible++;
1325 }
1326 }
1327 }
1328
1329 return visible;
1330 }
1331
1332 int Renderer::setupLines(int unit, int count)
1333 {
1334 Triangle *triangle = triangleBatch[unit];
1335 Primitive *primitive = primitiveBatch[unit];
1336 int visible = 0;
1337
1338 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
1339 SetupProcessor::State &state = draw.setupState;
1340
1341 int ms = state.multiSample;
1342
1343 for(int i = 0; i < count; i++)
1344 {
1345 if(setupLine(*primitive, *triangle, draw))
1346 {
1347 primitive += ms;
1348 visible++;
1349 }
1350
1351 triangle++;
1352 }
1353
1354 return visible;
1355 }
1356
1357 int Renderer::setupPoints(int unit, int count)
1358 {
1359 Triangle *triangle = triangleBatch[unit];
1360 Primitive *primitive = primitiveBatch[unit];
1361 int visible = 0;
1362
1363 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS];
1364 SetupProcessor::State &state = draw.setupState;
1365
1366 int ms = state.multiSample;
1367
1368 for(int i = 0; i < count; i++)
1369 {
1370 if(setupPoint(*primitive, *triangle, draw))
1371 {
1372 primitive += ms;
1373 visible++;
1374 }
1375
1376 triangle++;
1377 }
1378
1379 return visible;
1380 }
1381
1382 bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1383 {
1384 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1385 const SetupProcessor::State &state = draw.setupState;
1386 const DrawData &data = *draw.data;
1387
1388 float lineWidth = data.lineWidth;
1389
1390 Vertex &v0 = triangle.v0;
1391 Vertex &v1 = triangle.v1;
1392
1393 int pos = state.positionRegister;
1394
1395 const float4 &P0 = v0.v[pos];
1396 const float4 &P1 = v1.v[pos];
1397
1398 if(P0.w <= 0 && P1.w <= 0)
1399 {
1400 return false;
1401 }
1402
1403 const float W = data.Wx16[0] * (1.0f / 16.0f);
1404 const float H = data.Hx16[0] * (1.0f / 16.0f);
1405
1406 float dx = W * (P1.x / P1.w - P0.x / P0.w);
1407 float dy = H * (P1.y / P1.w - P0.y / P0.w);
1408
1409 if(dx == 0 && dy == 0)
1410 {
1411 return false;
1412 }
1413
1414 if(state.multiSample > 1) // Rectangle
1415 {
1416 float4 P[4];
1417 int C[4];
1418
1419 P[0] = P0;
1420 P[1] = P1;
1421 P[2] = P1;
1422 P[3] = P0;
1423
1424 float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
1425
1426 dx *= scale;
1427 dy *= scale;
1428
1429 float dx0h = dx * P0.w / H;
1430 float dy0w = dy * P0.w / W;
1431
1432 float dx1h = dx * P1.w / H;
1433 float dy1w = dy * P1.w / W;
1434
1435 P[0].x += -dy0w;
1436 P[0].y += +dx0h;
1437 C[0] = clipper->computeClipFlags(P[0]);
1438
1439 P[1].x += -dy1w;
1440 P[1].y += +dx1h;
1441 C[1] = clipper->computeClipFlags(P[1]);
1442
1443 P[2].x += +dy1w;
1444 P[2].y += -dx1h;
1445 C[2] = clipper->computeClipFlags(P[2]);
1446
1447 P[3].x += +dy0w;
1448 P[3].y += -dx0h;
1449 C[3] = clipper->computeClipFlags(P[3]);
1450
1451 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1452 {
1453 Polygon polygon(P, 4);
1454
1455 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1456
1457 if(clipFlagsOr != Clipper::CLIP_FINITE)
1458 {
1459 if(!clipper->clip(polygon, clipFlagsOr, draw))
1460 {
1461 return false;
1462 }
1463 }
1464
1465 return setupRoutine(&primitive, &triangle, &polygon, &data);
1466 }
1467 }
1468 else // Diamond test convention
1469 {
1470 float4 P[8];
1471 int C[8];
1472
1473 P[0] = P0;
1474 P[1] = P0;
1475 P[2] = P0;
1476 P[3] = P0;
1477 P[4] = P1;
1478 P[5] = P1;
1479 P[6] = P1;
1480 P[7] = P1;
1481
1482 float dx0 = lineWidth * 0.5f * P0.w / W;
1483 float dy0 = lineWidth * 0.5f * P0.w / H;
1484
1485 float dx1 = lineWidth * 0.5f * P1.w / W;
1486 float dy1 = lineWidth * 0.5f * P1.w / H;
1487
1488 P[0].x += -dx0;
1489 C[0] = clipper->computeClipFlags(P[0]);
1490
1491 P[1].y += +dy0;
1492 C[1] = clipper->computeClipFlags(P[1]);
1493
1494 P[2].x += +dx0;
1495 C[2] = clipper->computeClipFlags(P[2]);
1496
1497 P[3].y += -dy0;
1498 C[3] = clipper->computeClipFlags(P[3]);
1499
1500 P[4].x += -dx1;
1501 C[4] = clipper->computeClipFlags(P[4]);
1502
1503 P[5].y += +dy1;
1504 C[5] = clipper->computeClipFlags(P[5]);
1505
1506 P[6].x += +dx1;
1507 C[6] = clipper->computeClipFlags(P[6]);
1508
1509 P[7].y += -dy1;
1510 C[7] = clipper->computeClipFlags(P[7]);
1511
1512 if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1513 {
1514 float4 L[6];
1515
1516 if(dx > -dy)
1517 {
1518 if(dx > dy) // Right
1519 {
1520 L[0] = P[0];
1521 L[1] = P[1];
1522 L[2] = P[5];
1523 L[3] = P[6];
1524 L[4] = P[7];
1525 L[5] = P[3];
1526 }
1527 else // Down
1528 {
1529 L[0] = P[0];
1530 L[1] = P[4];
1531 L[2] = P[5];
1532 L[3] = P[6];
1533 L[4] = P[2];
1534 L[5] = P[3];
1535 }
1536 }
1537 else
1538 {
1539 if(dx > dy) // Up
1540 {
1541 L[0] = P[0];
1542 L[1] = P[1];
1543 L[2] = P[2];
1544 L[3] = P[6];
1545 L[4] = P[7];
1546 L[5] = P[4];
1547 }
1548 else // Left
1549 {
1550 L[0] = P[1];
1551 L[1] = P[2];
1552 L[2] = P[3];
1553 L[3] = P[7];
1554 L[4] = P[4];
1555 L[5] = P[5];
1556 }
1557 }
1558
1559 Polygon polygon(L, 6);
1560
1561 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1562
1563 if(clipFlagsOr != Clipper::CLIP_FINITE)
1564 {
1565 if(!clipper->clip(polygon, clipFlagsOr, draw))
1566 {
1567 return false;
1568 }
1569 }
1570
1571 return setupRoutine(&primitive, &triangle, &polygon, &data);
1572 }
1573 }
1574
1575 return false;
1576 }
1577
1578 bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1579 {
1580 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1581 const SetupProcessor::State &state = draw.setupState;
1582 const DrawData &data = *draw.data;
1583
1584 Vertex &v = triangle.v0;
1585
1586 float pSize;
1587
1588 int pts = state.pointSizeRegister;
1589
1590 if(state.pointSizeRegister != Unused)
1591 {
1592 pSize = v.v[pts].y;
1593 }
1594 else
1595 {
1596 pSize = 1.0f;
1597 }
1598
1599 pSize = clamp(pSize, data.pointSizeMin, data.pointSizeMax);
1600
1601 float4 P[4];
1602 int C[4];
1603
1604 int pos = state.positionRegister;
1605
1606 P[0] = v.v[pos];
1607 P[1] = v.v[pos];
1608 P[2] = v.v[pos];
1609 P[3] = v.v[pos];
1610
1611 const float X = pSize * P[0].w * data.halfPixelX[0];
1612 const float Y = pSize * P[0].w * data.halfPixelY[0];
1613
1614 P[0].x -= X;
1615 P[0].y += Y;
1616 C[0] = clipper->computeClipFlags(P[0]);
1617
1618 P[1].x += X;
1619 P[1].y += Y;
1620 C[1] = clipper->computeClipFlags(P[1]);
1621
1622 P[2].x += X;
1623 P[2].y -= Y;
1624 C[2] = clipper->computeClipFlags(P[2]);
1625
1626 P[3].x -= X;
1627 P[3].y -= Y;
1628 C[3] = clipper->computeClipFlags(P[3]);
1629
1630 triangle.v1 = triangle.v0;
1631 triangle.v2 = triangle.v0;
1632
1633 triangle.v1.X += iround(16 * 0.5f * pSize);
1634 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1635
1636 Polygon polygon(P, 4);
1637
1638 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1639 {
1640 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1641
1642 if(clipFlagsOr != Clipper::CLIP_FINITE)
1643 {
1644 if(!clipper->clip(polygon, clipFlagsOr, draw))
1645 {
1646 return false;
1647 }
1648 }
1649
1650 return setupRoutine(&primitive, &triangle, &polygon, &data);
1651 }
1652
1653 return false;
1654 }
1655
1656 void Renderer::initializeThreads()
1657 {
1658 unitCount = ceilPow2(threadCount);
1659 clusterCount = ceilPow2(threadCount);
1660
1661 for(int i = 0; i < unitCount; i++)
1662 {
1663 triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1664 primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1665 }
1666
1667 for(int i = 0; i < threadCount; i++)
1668 {
1669 vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1670 vertexTask[i]->vertexCache.drawCall = -1;
1671
1672 task[i].type = Task::SUSPEND;
1673
1674 resume[i] = new Event();
1675 suspend[i] = new Event();
1676
1677 Parameters parameters;
1678 parameters.threadIndex = i;
1679 parameters.renderer = this;
1680
1681 exitThreads = false;
1682 worker[i] = new Thread(threadFunction, &parameters);
1683
1684 suspend[i]->wait();
1685 suspend[i]->signal();
1686 }
1687 }
1688
1689 void Renderer::terminateThreads()
1690 {
1691 while(threadsAwake != 0)
1692 {
1693 Thread::sleep(1);
1694 }
1695
1696 for(int thread = 0; thread < threadCount; thread++)
1697 {
1698 if(worker[thread])
1699 {
1700 exitThreads = true;
1701 resume[thread]->signal();
1702 worker[thread]->join();
1703
1704 delete worker[thread];
1705 worker[thread] = 0;
1706 delete resume[thread];
1707 resume[thread] = 0;
1708 delete suspend[thread];
1709 suspend[thread] = 0;
1710 }
1711
1712 deallocate(vertexTask[thread]);
1713 vertexTask[thread] = 0;
1714 }
1715
1716 for(int i = 0; i < 16; i++)
1717 {
1718 deallocate(triangleBatch[i]);
1719 triangleBatch[i] = 0;
1720
1721 deallocate(primitiveBatch[i]);
1722 primitiveBatch[i] = 0;
1723 }
1724 }
1725
1726 void Renderer::loadConstants(const VertexShader *vertexShader)
1727 {
1728 size_t count = vertexShader->getLength();
1729
1730 for(size_t i = 0; i < count; i++)
1731 {
1732 const Shader::Instruction *instruction = vertexShader->getInstruction(i);
1733
1734 if(instruction->opcode == Shader::OPCODE_DEF)
1735 {
1736 int index = instruction->dst.index;
1737 float value[4];
1738
1739 value[0] = instruction->src[0].value[0];
1740 value[1] = instruction->src[0].value[1];
1741 value[2] = instruction->src[0].value[2];
1742 value[3] = instruction->src[0].value[3];
1743
1744 setVertexShaderConstantF(index, value);
1745 }
1746 else if(instruction->opcode == Shader::OPCODE_DEFI)
1747 {
1748 int index = instruction->dst.index;
1749 int integer[4];
1750
1751 integer[0] = instruction->src[0].integer[0];
1752 integer[1] = instruction->src[0].integer[1];
1753 integer[2] = instruction->src[0].integer[2];
1754 integer[3] = instruction->src[0].integer[3];
1755
1756 setVertexShaderConstantI(index, integer);
1757 }
1758 else if(instruction->opcode == Shader::OPCODE_DEFB)
1759 {
1760 int index = instruction->dst.index;
1761 int boolean = instruction->src[0].boolean[0];
1762
1763 setVertexShaderConstantB(index, &boolean);
1764 }
1765 }
1766 }
1767
1768 void Renderer::loadConstants(const PixelShader *pixelShader)
1769 {
1770 if(!pixelShader) return;
1771
1772 size_t count = pixelShader->getLength();
1773
1774 for(size_t i = 0; i < count; i++)
1775 {
1776 const Shader::Instruction *instruction = pixelShader->getInstruction(i);
1777
1778 if(instruction->opcode == Shader::OPCODE_DEF)
1779 {
1780 int index = instruction->dst.index;
1781 float value[4];
1782
1783 value[0] = instruction->src[0].value[0];
1784 value[1] = instruction->src[0].value[1];
1785 value[2] = instruction->src[0].value[2];
1786 value[3] = instruction->src[0].value[3];
1787
1788 setPixelShaderConstantF(index, value);
1789 }
1790 else if(instruction->opcode == Shader::OPCODE_DEFI)
1791 {
1792 int index = instruction->dst.index;
1793 int integer[4];
1794
1795 integer[0] = instruction->src[0].integer[0];
1796 integer[1] = instruction->src[0].integer[1];
1797 integer[2] = instruction->src[0].integer[2];
1798 integer[3] = instruction->src[0].integer[3];
1799
1800 setPixelShaderConstantI(index, integer);
1801 }
1802 else if(instruction->opcode == Shader::OPCODE_DEFB)
1803 {
1804 int index = instruction->dst.index;
1805 int boolean = instruction->src[0].boolean[0];
1806
1807 setPixelShaderConstantB(index, &boolean);
1808 }
1809 }
1810 }
1811
1812 void Renderer::setIndexBuffer(Resource *indexBuffer)
1813 {
1814 context->indexBuffer = indexBuffer;
1815 }
1816
1817 void Renderer::setMultiSampleMask(unsigned int mask)
1818 {
1819 context->sampleMask = mask;
1820 }
1821
1822 void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
1823 {
1824 sw::transparencyAntialiasing = transparencyAntialiasing;
1825 }
1826
1827 bool Renderer::isReadWriteTexture(int sampler)
1828 {
1829 for(int index = 0; index < RENDERTARGETS; index++)
1830 {
1831 if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
1832 {
1833 return true;
1834 }
1835 }
1836
1837 if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource())
1838 {
1839 return true;
1840 }
1841
1842 return false;
1843 }
1844
1845 void Renderer::updateClipper()
1846 {
1847 if(updateClipPlanes)
1848 {
1849 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
1850 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
1851 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
1852 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
1853 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
1854 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
1855
1856 updateClipPlanes = false;
1857 }
1858 }
1859
1860 void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
1861 {
1862 ASSERT(sampler < TOTAL_IMAGE_UNITS);
1863
1864 context->texture[sampler] = resource;
1865 }
1866
1867 void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
1868 {
1869 ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
1870
1871 context->sampler[sampler].setTextureLevel(face, level, surface, type);
1872 }
1873
1874 void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
1875 {
1876 if(type == SAMPLER_PIXEL)
1877 {
1878 PixelProcessor::setTextureFilter(sampler, textureFilter);
1879 }
1880 else
1881 {
1882 VertexProcessor::setTextureFilter(sampler, textureFilter);
1883 }
1884 }
1885
1886 void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
1887 {
1888 if(type == SAMPLER_PIXEL)
1889 {
1890 PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
1891 }
1892 else
1893 {
1894 VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
1895 }
1896 }
1897
1898 void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
1899 {
1900 if(type == SAMPLER_PIXEL)
1901 {
1902 PixelProcessor::setGatherEnable(sampler, enable);
1903 }
1904 else
1905 {
1906 VertexProcessor::setGatherEnable(sampler, enable);
1907 }
1908 }
1909
1910 void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
1911 {
1912 if(type == SAMPLER_PIXEL)
1913 {
1914 PixelProcessor::setAddressingModeU(sampler, addressMode);
1915 }
1916 else
1917 {
1918 VertexProcessor::setAddressingModeU(sampler, addressMode);
1919 }
1920 }
1921
1922 void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
1923 {
1924 if(type == SAMPLER_PIXEL)
1925 {
1926 PixelProcessor::setAddressingModeV(sampler, addressMode);
1927 }
1928 else
1929 {
1930 VertexProcessor::setAddressingModeV(sampler, addressMode);
1931 }
1932 }
1933
1934 void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
1935 {
1936 if(type == SAMPLER_PIXEL)
1937 {
1938 PixelProcessor::setAddressingModeW(sampler, addressMode);
1939 }
1940 else
1941 {
1942 VertexProcessor::setAddressingModeW(sampler, addressMode);
1943 }
1944 }
1945
1946 void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
1947 {
1948 if(type == SAMPLER_PIXEL)
1949 {
1950 PixelProcessor::setReadSRGB(sampler, sRGB);
1951 }
1952 else
1953 {
1954 VertexProcessor::setReadSRGB(sampler, sRGB);
1955 }
1956 }
1957
1958 void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
1959 {
1960 if(type == SAMPLER_PIXEL)
1961 {
1962 PixelProcessor::setMipmapLOD(sampler, bias);
1963 }
1964 else
1965 {
1966 VertexProcessor::setMipmapLOD(sampler, bias);
1967 }
1968 }
1969
1970 void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
1971 {
1972 if(type == SAMPLER_PIXEL)
1973 {
1974 PixelProcessor::setBorderColor(sampler, borderColor);
1975 }
1976 else
1977 {
1978 VertexProcessor::setBorderColor(sampler, borderColor);
1979 }
1980 }
1981
1982 void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
1983 {
1984 if(type == SAMPLER_PIXEL)
1985 {
1986 PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
1987 }
1988 else
1989 {
1990 VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
1991 }
1992 }
1993
1994 void Renderer::setHighPrecisionFiltering(SamplerType type, int sampler, bool highPrecisionFiltering)
1995 {
1996 if(type == SAMPLER_PIXEL)
1997 {
1998 PixelProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering);
1999 }
2000 else
2001 {
2002 VertexProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering);
2003 }
2004 }
2005
2006 void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR)
2007 {
2008 if(type == SAMPLER_PIXEL)
2009 {
2010 PixelProcessor::setSwizzleR(sampler, swizzleR);
2011 }
2012 else
2013 {
2014 VertexProcessor::setSwizzleR(sampler, swizzleR);
2015 }
2016 }
2017
2018 void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG)
2019 {
2020 if(type == SAMPLER_PIXEL)
2021 {
2022 PixelProcessor::setSwizzleG(sampler, swizzleG);
2023 }
2024 else
2025 {
2026 VertexProcessor::setSwizzleG(sampler, swizzleG);
2027 }
2028 }
2029
2030 void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB)
2031 {
2032 if(type == SAMPLER_PIXEL)
2033 {
2034 PixelProcessor::setSwizzleB(sampler, swizzleB);
2035 }
2036 else
2037 {
2038 VertexProcessor::setSwizzleB(sampler, swizzleB);
2039 }
2040 }
2041
2042 void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA)
2043 {
2044 if(type == SAMPLER_PIXEL)
2045 {
2046 PixelProcessor::setSwizzleA(sampler, swizzleA);
2047 }
2048 else
2049 {
2050 VertexProcessor::setSwizzleA(sampler, swizzleA);
2051 }
2052 }
2053
2054 void Renderer::setCompareFunc(SamplerType type, int sampler, CompareFunc compFunc)
2055 {
2056 if(type == SAMPLER_PIXEL)
2057 {
2058 PixelProcessor::setCompareFunc(sampler, compFunc);
2059 }
2060 else
2061 {
2062 VertexProcessor::setCompareFunc(sampler, compFunc);
2063 }
2064 }
2065
2066 void Renderer::setBaseLevel(SamplerType type, int sampler, int baseLevel)
2067 {
2068 if(type == SAMPLER_PIXEL)
2069 {
2070 PixelProcessor::setBaseLevel(sampler, baseLevel);
2071 }
2072 else
2073 {
2074 VertexProcessor::setBaseLevel(sampler, baseLevel);
2075 }
2076 }
2077
2078 void Renderer::setMaxLevel(SamplerType type, int sampler, int maxLevel)
2079 {
2080 if(type == SAMPLER_PIXEL)
2081 {
2082 PixelProcessor::setMaxLevel(sampler, maxLevel);
2083 }
2084 else
2085 {
2086 VertexProcessor::setMaxLevel(sampler, maxLevel);
2087 }
2088 }
2089
2090 void Renderer::setMinLod(SamplerType type, int sampler, float minLod)
2091 {
2092 if(type == SAMPLER_PIXEL)
2093 {
2094 PixelProcessor::setMinLod(sampler, minLod);
2095 }
2096 else
2097 {
2098 VertexProcessor::setMinLod(sampler, minLod);
2099 }
2100 }
2101
2102 void Renderer::setMaxLod(SamplerType type, int sampler, float maxLod)
2103 {
2104 if(type == SAMPLER_PIXEL)
2105 {
2106 PixelProcessor::setMaxLod(sampler, maxLod);
2107 }
2108 else
2109 {
2110 VertexProcessor::setMaxLod(sampler, maxLod);
2111 }
2112 }
2113
2114 void Renderer::setLineWidth(float width)
2115 {
2116 context->lineWidth = width;
2117 }
2118
2119 void Renderer::setDepthBias(float bias)
2120 {
2121 context->depthBias = bias;
2122 }
2123
2124 void Renderer::setSlopeDepthBias(float slopeBias)
2125 {
2126 context->slopeDepthBias = slopeBias;
2127 }
2128
2129 void Renderer::setRasterizerDiscard(bool rasterizerDiscard)
2130 {
2131 context->rasterizerDiscard = rasterizerDiscard;
2132 }
2133
2134 void Renderer::setPixelShader(const PixelShader *shader)
2135 {
2136 context->pixelShader = shader;
2137
2138 loadConstants(shader);
2139 }
2140
2141 void Renderer::setVertexShader(const VertexShader *shader)
2142 {
2143 context->vertexShader = shader;
2144
2145 loadConstants(shader);
2146 }
2147
2148 void Renderer::setPixelShaderConstantF(unsigned int index, const float value[4], unsigned int count)
2149 {
2150 for(unsigned int i = 0; i < DRAW_COUNT; i++)
2151 {
2152 if(drawCall[i]->psDirtyConstF < index + count)
2153 {
2154 drawCall[i]->psDirtyConstF = index + count;
2155 }
2156 }
2157
2158 for(unsigned int i = 0; i < count; i++)
2159 {
2160 PixelProcessor::setFloatConstant(index + i, value);
2161 value += 4;
2162 }
2163 }
2164
2165 void Renderer::setPixelShaderConstantI(unsigned int index, const int value[4], unsigned int count)
2166 {
2167 for(unsigned int i = 0; i < DRAW_COUNT; i++)
2168 {
2169 if(drawCall[i]->psDirtyConstI < index + count)
2170 {
2171 drawCall[i]->psDirtyConstI = index + count;
2172 }
2173 }
2174
2175 for(unsigned int i = 0; i < count; i++)
2176 {
2177 PixelProcessor::setIntegerConstant(index + i, value);
2178 value += 4;
2179 }
2180 }
2181
2182 void Renderer::setPixelShaderConstantB(unsigned int index, const int *boolean, unsigned int count)
2183 {
2184 for(unsigned int i = 0; i < DRAW_COUNT; i++)
2185 {
2186 if(drawCall[i]->psDirtyConstB < index + count)
2187 {
2188 drawCall[i]->psDirtyConstB = index + count;
2189 }
2190 }
2191
2192 for(unsigned int i = 0; i < count; i++)
2193 {
2194 PixelProcessor::setBooleanConstant(index + i, *boolean);
2195 boolean++;
2196 }
2197 }
2198
2199 void Renderer::setVertexShaderConstantF(unsigned int index, const float value[4], unsigned int count)
2200 {
2201 for(unsigned int i = 0; i < DRAW_COUNT; i++)
2202 {
2203 if(drawCall[i]->vsDirtyConstF < index + count)
2204 {
2205 drawCall[i]->vsDirtyConstF = index + count;
2206 }
2207 }
2208
2209 for(unsigned int i = 0; i < count; i++)
2210 {
2211 VertexProcessor::setFloatConstant(index + i, value);
2212 value += 4;
2213 }
2214 }
2215
2216 void Renderer::setVertexShaderConstantI(unsigned int index, const int value[4], unsigned int count)
2217 {
2218 for(unsigned int i = 0; i < DRAW_COUNT; i++)
2219 {
2220 if(drawCall[i]->vsDirtyConstI < index + count)
2221 {
2222 drawCall[i]->vsDirtyConstI = index + count;
2223 }
2224 }
2225
2226 for(unsigned int i = 0; i < count; i++)
2227 {
2228 VertexProcessor::setIntegerConstant(index + i, value);
2229 value += 4;
2230 }
2231 }
2232
2233 void Renderer::setVertexShaderConstantB(unsigned int index, const int *boolean, unsigned int count)
2234 {
2235 for(unsigned int i = 0; i < DRAW_COUNT; i++)
2236 {
2237 if(drawCall[i]->vsDirtyConstB < index + count)
2238 {
2239 drawCall[i]->vsDirtyConstB = index + count;
2240 }
2241 }
2242
2243 for(unsigned int i = 0; i < count; i++)
2244 {
2245 VertexProcessor::setBooleanConstant(index + i, *boolean);
2246 boolean++;
2247 }
2248 }
2249
2250 void Renderer::addQuery(Query *query)
2251 {
2252 queries.push_back(query);
2253 }
2254
2255 void Renderer::removeQuery(Query *query)
2256 {
2257 queries.remove(query);
2258 }
2259
2260 #if PERF_HUD
2261 int Renderer::getThreadCount()
2262 {
2263 return threadCount;
2264 }
2265
2266 int64_t Renderer::getVertexTime(int thread)
2267 {
2268 return vertexTime[thread];
2269 }
2270
2271 int64_t Renderer::getSetupTime(int thread)
2272 {
2273 return setupTime[thread];
2274 }
2275
2276 int64_t Renderer::getPixelTime(int thread)
2277 {
2278 return pixelTime[thread];
2279 }
2280
2281 void Renderer::resetTimers()
2282 {
2283 for(int thread = 0; thread < threadCount; thread++)
2284 {
2285 vertexTime[thread] = 0;
2286 setupTime[thread] = 0;
2287 pixelTime[thread] = 0;
2288 }
2289 }
2290 #endif
2291
2292 void Renderer::setContext(const sw::Context& context)
2293 {
2294 *(this->context) = context;
2295 }
2296
2297 void Renderer::setViewport(const VkViewport &viewport)
2298 {
2299 this->viewport = viewport;
2300 }
2301
2302 void Renderer::setScissor(const Rect &scissor)
2303 {
2304 this->scissor = scissor;
2305 }
2306
2307 void Renderer::setClipFlags(int flags)
2308 {
2309 clipFlags = flags << 8; // Bottom 8 bits used by legacy frustum
2310 }
2311
2312 void Renderer::setClipPlane(unsigned int index, const float plane[4])
2313 {
2314 if(index < MAX_CLIP_PLANES)
2315 {
2316 userPlane[index] = plane;
2317 }
2318 else ASSERT(false);
2319
2320 updateClipPlanes = true;
2321 }
2322
2323 void Renderer::updateConfiguration(bool initialUpdate)
2324 {
2325 bool newConfiguration = swiftConfig->hasNewConfiguration();
2326
2327 if(newConfiguration || initialUpdate)
2328 {
2329 terminateThreads();
2330
2331 SwiftConfig::Configuration configuration = {};
2332 swiftConfig->getConfiguration(configuration);
2333
2334 precacheVertex = !newConfiguration && configuration.precache;
2335 precacheSetup = !newConfiguration && configuration.precache;
2336 precachePixel = !newConfiguration && configuration.precache;
2337
2338 VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2339 PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2340 SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2341
2342 switch(configuration.textureSampleQuality)
2343 {
2344 case 0: Sampler::setFilterQuality(FILTER_POINT); break;
2345 case 1: Sampler::setFilterQuality(FILTER_LINEAR); break;
2346 case 2: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2347 default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2348 }
2349
2350 switch(configuration.mipmapQuality)
2351 {
2352 case 0: Sampler::setMipmapQuality(MIPMAP_POINT); break;
2353 case 1: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2354 default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2355 }
2356
2357 setPerspectiveCorrection(configuration.perspectiveCorrection);
2358
2359 switch(configuration.transcendentalPrecision)
2360 {
2361 case 0:
2362 logPrecision = APPROXIMATE;
2363 expPrecision = APPROXIMATE;
2364 rcpPrecision = APPROXIMATE;
2365 rsqPrecision = APPROXIMATE;
2366 break;
2367 case 1:
2368 logPrecision = PARTIAL;
2369 expPrecision = PARTIAL;
2370 rcpPrecision = PARTIAL;
2371 rsqPrecision = PARTIAL;
2372 break;
2373 case 2:
2374 logPrecision = ACCURATE;
2375 expPrecision = ACCURATE;
2376 rcpPrecision = ACCURATE;
2377 rsqPrecision = ACCURATE;
2378 break;
2379 case 3:
2380 logPrecision = WHQL;
2381 expPrecision = WHQL;
2382 rcpPrecision = WHQL;
2383 rsqPrecision = WHQL;
2384 break;
2385 case 4:
2386 logPrecision = IEEE;
2387 expPrecision = IEEE;
2388 rcpPrecision = IEEE;
2389 rsqPrecision = IEEE;
2390 break;
2391 default:
2392 logPrecision = ACCURATE;
2393 expPrecision = ACCURATE;
2394 rcpPrecision = ACCURATE;
2395 rsqPrecision = ACCURATE;
2396 break;
2397 }
2398
2399 switch(configuration.transparencyAntialiasing)
2400 {
2401 case 0: transparencyAntialiasing = TRANSPARENCY_NONE; break;
2402 case 1: transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2403 default: transparencyAntialiasing = TRANSPARENCY_NONE; break;
2404 }
2405
2406 switch(configuration.threadCount)
2407 {
2408 case -1: threadCount = CPUID::coreCount(); break;
2409 case 0: threadCount = CPUID::processAffinity(); break;
2410 default: threadCount = configuration.threadCount; break;
2411 }
2412
2413 CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2414 CPUID::setEnableSSSE3(configuration.enableSSSE3);
2415 CPUID::setEnableSSE3(configuration.enableSSE3);
2416 CPUID::setEnableSSE2(configuration.enableSSE2);
2417 CPUID::setEnableSSE(configuration.enableSSE);
2418
2419 for(int pass = 0; pass < 10; pass++)
2420 {
2421 optimization[pass] = configuration.optimization[pass];
2422 }
2423
2424 forceWindowed = configuration.forceWindowed;
2425 complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2426 postBlendSRGB = configuration.postBlendSRGB;
2427 exactColorRounding = configuration.exactColorRounding;
2428 forceClearRegisters = configuration.forceClearRegisters;
2429
2430 #ifndef NDEBUG
2431 minPrimitives = configuration.minPrimitives;
2432 maxPrimitives = configuration.maxPrimitives;
2433 #endif
2434 }
2435
2436 if(!initialUpdate && !worker[0])
2437 {
2438 initializeThreads();
2439 }
2440 }
2441}