blob: 2c28286b5ad5eb28a2f88963f61eb4bf27f8316c [file] [log] [blame]
/****************************************************************************
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* @file context.h
*
* @brief Definitions for SWR_CONTEXT and DRAW_CONTEXT
* The SWR_CONTEXT is our global context and contains the DC ring,
* thread state, etc.
*
* The DRAW_CONTEXT contains all state associated with a draw operation.
*
******************************************************************************/
#pragma once
#include <condition_variable>
#include <algorithm>
#include "core/api.h"
#include "core/utils.h"
#include "core/arena.h"
#include "core/fifo.hpp"
#include "core/knobs.h"
#include "common/simdintrin.h"
#include "core/threads.h"
#include "ringbuffer.h"
// x.8 fixed point precision values
#define FIXED_POINT_SHIFT 8
#define FIXED_POINT_SCALE 256
// x.16 fixed point precision values
#define FIXED_POINT16_SHIFT 16
#define FIXED_POINT16_SCALE 65536
struct SWR_CONTEXT;
struct DRAW_CONTEXT;
struct TRI_FLAGS
{
uint32_t frontFacing : 1;
uint32_t yMajor : 1;
uint32_t coverageMask : (SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM);
uint32_t reserved : 32 - 1 - 1 - (SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM);
float pointSize;
uint32_t primID;
uint32_t renderTargetArrayIndex;
};
//////////////////////////////////////////////////////////////////////////
/// SWR_TRIANGLE_DESC
/////////////////////////////////////////////////////////////////////////
struct SWR_TRIANGLE_DESC
{
float I[3];
float J[3];
float Z[3];
float OneOverW[3];
float recipDet;
float *pRecipW;
float *pAttribs;
float *pPerspAttribs;
float *pSamplePos;
float *pUserClipBuffer;
uint64_t coverageMask[SWR_MAX_NUM_MULTISAMPLES];
uint64_t anyCoveredSamples;
TRI_FLAGS triFlags;
};
struct TRIANGLE_WORK_DESC
{
float *pTriBuffer;
float *pAttribs;
float *pUserClipBuffer;
uint32_t numAttribs;
TRI_FLAGS triFlags;
};
union CLEAR_FLAGS
{
struct
{
uint32_t mask : 3;
};
uint32_t bits;
};
struct CLEAR_DESC
{
CLEAR_FLAGS flags;
float clearRTColor[4]; // RGBA_32F
float clearDepth; // [0..1]
uint8_t clearStencil;
};
struct DISCARD_INVALIDATE_TILES_DESC
{
uint32_t attachmentMask;
SWR_RECT rect;
SWR_TILE_STATE newTileState;
bool createNewTiles;
bool fullTilesOnly;
};
struct SYNC_DESC
{
PFN_CALLBACK_FUNC pfnCallbackFunc;
uint64_t userData;
uint64_t userData2;
uint64_t userData3;
};
struct QUERY_DESC
{
SWR_STATS* pStats;
};
struct STORE_TILES_DESC
{
SWR_RENDERTARGET_ATTACHMENT attachment;
SWR_TILE_STATE postStoreTileState;
};
struct COMPUTE_DESC
{
uint32_t threadGroupCountX;
uint32_t threadGroupCountY;
uint32_t threadGroupCountZ;
};
typedef void(*PFN_WORK_FUNC)(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pDesc);
enum WORK_TYPE
{
SYNC,
DRAW,
CLEAR,
DISCARDINVALIDATETILES,
STORETILES,
QUERYSTATS,
};
struct BE_WORK
{
WORK_TYPE type;
PFN_WORK_FUNC pfnWork;
union
{
SYNC_DESC sync;
TRIANGLE_WORK_DESC tri;
CLEAR_DESC clear;
DISCARD_INVALIDATE_TILES_DESC discardInvalidateTiles;
STORE_TILES_DESC storeTiles;
QUERY_DESC queryStats;
} desc;
};
struct DRAW_WORK
{
DRAW_CONTEXT* pDC;
union
{
uint32_t numIndices; // DrawIndexed: Number of indices for draw.
uint32_t numVerts; // Draw: Number of verts (triangles, lines, etc)
};
union
{
const int32_t* pIB; // DrawIndexed: App supplied indices
uint32_t startVertex; // Draw: Starting vertex in VB to render from.
};
int32_t baseVertex;
uint32_t numInstances; // Number of instances
uint32_t startInstance; // Instance offset
uint32_t startPrimID; // starting primitiveID for this draw batch
uint32_t startVertexID; // starting VertexID for this draw batch (only needed for non-indexed draws)
SWR_FORMAT type; // index buffer type
};
typedef void(*PFN_FE_WORK_FUNC)(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pDesc);
struct FE_WORK
{
WORK_TYPE type;
PFN_FE_WORK_FUNC pfnWork;
union
{
SYNC_DESC sync;
DRAW_WORK draw;
CLEAR_DESC clear;
DISCARD_INVALIDATE_TILES_DESC discardInvalidateTiles;
STORE_TILES_DESC storeTiles;
QUERY_DESC queryStats;
} desc;
};
struct GUARDBAND
{
float left, right, top, bottom;
};
struct PA_STATE;
// function signature for pipeline stages that execute after primitive assembly
typedef void(*PFN_PROCESS_PRIMS)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[],
uint32_t primMask, simdscalari primID);
OSALIGNLINE(struct) API_STATE
{
// Vertex Buffers
SWR_VERTEX_BUFFER_STATE vertexBuffers[KNOB_NUM_STREAMS];
// Index Buffer
SWR_INDEX_BUFFER_STATE indexBuffer;
// FS - Fetch Shader State
PFN_FETCH_FUNC pfnFetchFunc;
// VS - Vertex Shader State
PFN_VERTEX_FUNC pfnVertexFunc;
// GS - Geometry Shader State
PFN_GS_FUNC pfnGsFunc;
SWR_GS_STATE gsState;
// CS - Compute Shader
PFN_CS_FUNC pfnCsFunc;
uint32_t totalThreadsInGroup;
// FE - Frontend State
SWR_FRONTEND_STATE frontendState;
// SOS - Streamout Shader State
PFN_SO_FUNC pfnSoFunc[MAX_SO_STREAMS];
// Streamout state
SWR_STREAMOUT_STATE soState;
mutable SWR_STREAMOUT_BUFFER soBuffer[MAX_SO_STREAMS];
// Tessellation State
PFN_HS_FUNC pfnHsFunc;
PFN_DS_FUNC pfnDsFunc;
SWR_TS_STATE tsState;
// Specifies which VS outputs are sent to PS.
// Does not include position
uint32_t linkageMask;
uint32_t linkageCount;
uint8_t linkageMap[MAX_ATTRIBUTES];
// attrib mask, specifies the total set of attributes used
// by the frontend (vs, so, gs)
uint32_t feAttribMask;
PRIMITIVE_TOPOLOGY topology;
bool forceFront;
// RS - Rasterizer State
SWR_RASTSTATE rastState;
// floating point multisample offsets
float samplePos[SWR_MAX_NUM_MULTISAMPLES * 2];
GUARDBAND gbState;
SWR_VIEWPORT vp[KNOB_NUM_VIEWPORTS_SCISSORS];
SWR_VIEWPORT_MATRIX vpMatrix[KNOB_NUM_VIEWPORTS_SCISSORS];
BBOX scissorRects[KNOB_NUM_VIEWPORTS_SCISSORS];
BBOX scissorInFixedPoint;
// Backend state
SWR_BACKEND_STATE backendState;
// PS - Pixel shader state
SWR_PS_STATE psState;
SWR_DEPTH_STENCIL_STATE depthStencilState;
// OM - Output Merger State
SWR_BLEND_STATE blendState;
PFN_BLEND_JIT_FUNC pfnBlendFunc[SWR_NUM_RENDERTARGETS];
// Stats are incremented when this is true.
bool enableStats;
struct
{
uint32_t colorHottileEnable : 8;
uint32_t depthHottileEnable: 1;
uint32_t stencilHottileEnable : 1;
};
PFN_QUANTIZE_DEPTH pfnQuantizeDepth;
};
class MacroTileMgr;
class DispatchQueue;
struct RenderOutputBuffers
{
uint8_t* pColor[SWR_NUM_RENDERTARGETS];
uint8_t* pDepth;
uint8_t* pStencil;
};
// Plane equation A/B/C coeffs used to evaluate I/J barycentric coords
struct BarycentricCoeffs
{
simdscalar vIa;
simdscalar vIb;
simdscalar vIc;
simdscalar vJa;
simdscalar vJb;
simdscalar vJc;
simdscalar vZa;
simdscalar vZb;
simdscalar vZc;
simdscalar vRecipDet;
simdscalar vAOneOverW;
simdscalar vBOneOverW;
simdscalar vCOneOverW;
};
// pipeline function pointer types
typedef void(*PFN_BACKEND_FUNC)(DRAW_CONTEXT*, uint32_t, uint32_t, uint32_t, SWR_TRIANGLE_DESC&, RenderOutputBuffers&);
typedef void(*PFN_OUTPUT_MERGER)(SWR_PS_CONTEXT &, uint8_t* (&)[SWR_NUM_RENDERTARGETS], uint32_t, const SWR_BLEND_STATE*,
const PFN_BLEND_JIT_FUNC (&)[SWR_NUM_RENDERTARGETS], simdscalar&, simdscalar);
typedef void(*PFN_CALC_PIXEL_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT &);
typedef void(*PFN_CALC_SAMPLE_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT&);
typedef void(*PFN_CALC_CENTROID_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT &, const uint64_t *const, const uint32_t,
const simdscalar, const simdscalar);
struct BACKEND_FUNCS
{
PFN_BACKEND_FUNC pfnBackend;
PFN_CALC_PIXEL_BARYCENTRICS pfnCalcPixelBarycentrics;
PFN_CALC_SAMPLE_BARYCENTRICS pfnCalcSampleBarycentrics;
PFN_CALC_CENTROID_BARYCENTRICS pfnCalcCentroidBarycentrics;
PFN_OUTPUT_MERGER pfnOutputMerger;
};
// Draw State
struct DRAW_STATE
{
API_STATE state;
void* pPrivateState; // Its required the driver sets this up for each draw.
// pipeline function pointers, filled in by API thread when setting up the draw
BACKEND_FUNCS backendFuncs;
PFN_PROCESS_PRIMS pfnProcessPrims;
CachingArena* pArena; // This should only be used by API thread.
};
// Draw Context
// The api thread sets up a draw context that exists for the life of the draw.
// This draw context maintains all of the state needed for the draw operation.
struct DRAW_CONTEXT
{
SWR_CONTEXT* pContext;
uint64_t drawId;
MacroTileMgr* pTileMgr;
DispatchQueue* pDispatch; // Queue for thread groups. (isCompute)
uint64_t dependency;
DRAW_STATE* pState;
CachingArena* pArena;
bool isCompute; // Is this DC a compute context?
bool cleanupState; // True if this is the last draw using an entry in the state ring.
volatile bool doneFE; // Is FE work done for this draw?
volatile OSALIGNLINE(uint32_t) FeLock;
volatile int64_t threadsDone;
OSALIGNLINE(FE_WORK) FeWork;
uint8_t* pSpillFill[KNOB_MAX_NUM_THREADS]; // Scratch space used for spill fills.
};
static_assert((sizeof(DRAW_CONTEXT) & 63) == 0, "Invalid size for DRAW_CONTEXT");
INLINE const API_STATE& GetApiState(const DRAW_CONTEXT* pDC)
{
SWR_ASSERT(pDC != nullptr);
SWR_ASSERT(pDC->pState != nullptr);
return pDC->pState->state;
}
INLINE void* GetPrivateState(const DRAW_CONTEXT* pDC)
{
SWR_ASSERT(pDC != nullptr);
SWR_ASSERT(pDC->pState != nullptr);
return pDC->pState->pPrivateState;
}
class HotTileMgr;
struct SWR_CONTEXT
{
// Draw Context Ring
// Each draw needs its own state in order to support mulitple draws in flight across multiple threads.
// We maintain N draw contexts configured as a ring. The size of the ring limits the maximum number
// of draws that can be in flight at any given time.
//
// Description:
// 1. State - When an application first sets state we'll request a new draw context to use.
// a. If there are no available draw contexts then we'll have to wait until one becomes free.
// b. If one is available then set pCurDrawContext to point to it and mark it in use.
// c. All state calls set state on pCurDrawContext.
// 2. Draw - Creates submits a work item that is associated with current draw context.
// a. Set pPrevDrawContext = pCurDrawContext
// b. Set pCurDrawContext to NULL.
// 3. State - When an applications sets state after draw
// a. Same as step 1.
// b. State is copied from prev draw context to current.
RingBuffer<DRAW_CONTEXT> dcRing;
DRAW_CONTEXT *pCurDrawContext; // This points to DC entry in ring for an unsubmitted draw.
DRAW_CONTEXT *pPrevDrawContext; // This points to DC entry for the previous context submitted that we can copy state from.
// Draw State Ring
// When draw are very large (lots of primitives) then the API thread will break these up.
// These split draws all have identical state. So instead of storing the state directly
// in the Draw Context (DC) we instead store it in a Draw State (DS). This allows multiple DCs
// to reference a single entry in the DS ring.
RingBuffer<DRAW_STATE> dsRing;
uint32_t curStateId; // Current index to the next available entry in the DS ring.
uint32_t NumWorkerThreads;
uint32_t NumFEThreads;
uint32_t NumBEThreads;
THREAD_POOL threadPool; // Thread pool associated with this context
std::condition_variable FifosNotEmpty;
std::mutex WaitLock;
DRIVER_TYPE driverType;
uint32_t privateStateSize;
HotTileMgr *pHotTileMgr;
// tile load/store functions, passed in at create context time
PFN_LOAD_TILE pfnLoadTile;
PFN_STORE_TILE pfnStoreTile;
PFN_CLEAR_TILE pfnClearTile;
// Global Stats
SWR_STATS stats[KNOB_MAX_NUM_THREADS];
// Scratch space for workers.
uint8_t* pScratch[KNOB_MAX_NUM_THREADS];
CachingAllocator cachingArenaAllocator;
};
void WaitForDependencies(SWR_CONTEXT *pContext, uint64_t drawId);
void WakeAllThreads(SWR_CONTEXT *pContext);
#define UPDATE_STAT(name, count) if (GetApiState(pDC).enableStats) { pContext->stats[workerId].name += count; }
#define SET_STAT(name, count) if (GetApiState(pDC).enableStats) { pContext->stats[workerId].name = count; }