Add init-once to threaded backend
For the simplicity of this CL, I haven't enabled DAA for init-once yet.
The current init-once is only enabled for draw path, and it simply
generates the dev path in the init-once phase.
Bug: skia:
Change-Id: Ie9a9ef9fc453acbdeb48b06b93d578c626961e3f
Reviewed-on: https://skia-review.googlesource.com/87784
Commit-Queue: Yuqian Li <liyuqian@google.com>
Reviewed-by: Herb Derby <herb@google.com>
diff --git a/BUILD.gn b/BUILD.gn
index ea71c8a..d45f692 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -1490,13 +1490,7 @@
test_lib("samples") {
public_include_dirs = [ "samplecode" ]
include_dirs = [ "experimental" ]
- sources = samples_sources + [
- # Relocating these files here, so that clients don't try to build them while they're
- # still in active development. Clang's thread safety analysis gets tripped up by
- # conditional locks.
- "src/core/SkThreadedBMPDevice.cpp",
- "src/core/SkThreadedBMPDevice.h",
- ]
+ sources = samples_sources
deps = [
":experimental_sksg",
":experimental_svg_model",
diff --git a/gn/core.gni b/gn/core.gni
index 81218b8..72e12c2 100644
--- a/gn/core.gni
+++ b/gn/core.gni
@@ -313,6 +313,10 @@
"$_src/core/SkTextToPathIter.h",
"$_src/core/SkTime.cpp",
"$_src/core/SkTDPQueue.h",
+
+ # The Clang's false positive thread warnings have been resolved in SkTaskGroup2D.cpp
+ "$_src/core/SkThreadedBMPDevice.cpp",
+ "$_src/core/SkThreadedBMPDevice.h",
"$_src/core/SkThreadID.cpp",
"$_src/core/SkTLList.h",
"$_src/core/SkTLS.cpp",
diff --git a/src/core/SkBlitter.cpp b/src/core/SkBlitter.cpp
index f4de215..7a3e904 100644
--- a/src/core/SkBlitter.cpp
+++ b/src/core/SkBlitter.cpp
@@ -80,11 +80,13 @@
}
void SkBlitter::blitCoverageDeltas(SkCoverageDeltaList* deltas, const SkIRect& clip,
- bool isEvenOdd, bool isInverse, bool isConvex) {
- int runSize = clip.width() + 1; // +1 so we can set runs[clip.width()] = 0
- void* storage = this->allocBlitMemory(runSize * (sizeof(int16_t) + sizeof(SkAlpha)));
- int16_t* runs = reinterpret_cast<int16_t*>(storage);
- SkAlpha* alphas = reinterpret_cast<SkAlpha*>(runs + runSize);
+ bool isEvenOdd, bool isInverse, bool isConvex,
+ SkArenaAlloc* alloc) {
+ // We cannot use blitter to allocate the storage because the same blitter might be used across
+ // many threads.
+ int runSize = clip.width() + 1; // +1 so we can set runs[clip.width()] = 0
+ int16_t* runs = alloc->makeArrayDefault<int16_t>(runSize);
+ SkAlpha* alphas = alloc->makeArrayDefault<SkAlpha>(runSize);
runs[clip.width()] = 0; // we must set the last run to 0 so blitAntiH can stop there
bool canUseMask = !deltas->forceRLE() &&
diff --git a/src/core/SkBlitter.h b/src/core/SkBlitter.h
index c280ac3..357eb46 100644
--- a/src/core/SkBlitter.h
+++ b/src/core/SkBlitter.h
@@ -36,7 +36,8 @@
// For example, one may avoid some virtual blitAntiH calls by directly calling
// SkBlitRow::Color32.
virtual void blitCoverageDeltas(SkCoverageDeltaList* deltas, const SkIRect& clip,
- bool isEvenOdd, bool isInverse, bool isConvex);
+ bool isEvenOdd, bool isInverse, bool isConvex,
+ SkArenaAlloc* alloc);
/// Blit a horizontal run of one or more pixels.
virtual void blitH(int x, int y, int width) = 0;
diff --git a/src/core/SkDraw.cpp b/src/core/SkDraw.cpp
index d2d2307..478617f 100644
--- a/src/core/SkDraw.cpp
+++ b/src/core/SkDraw.cpp
@@ -34,6 +34,7 @@
#include "SkStrokeRec.h"
#include "SkTemplates.h"
#include "SkTextMapStateProc.h"
+#include "SkThreadedBMPDevice.h"
#include "SkTLazy.h"
#include "SkUtils.h"
@@ -951,12 +952,18 @@
}
void SkDraw::drawDevPath(const SkPath& devPath, const SkPaint& paint, bool drawCoverage,
- SkBlitter* customBlitter, bool doFill) const {
+ SkBlitter* customBlitter, bool doFill, SkInitOnceData* iData) const {
SkBlitter* blitter = nullptr;
SkAutoBlitterChoose blitterStorage;
+ SkAutoBlitterChoose* blitterStoragePtr = &blitterStorage;
+ if (iData) {
+ // we're in the threaded init-once phase; the blitter has to be allocated in the thread
+ // allocator so it will remain valid later during the draw phase.
+ blitterStoragePtr = iData->fAlloc->make<SkAutoBlitterChoose>();
+ }
if (nullptr == customBlitter) {
- blitterStorage.choose(fDst, *fMatrix, paint, drawCoverage);
- blitter = blitterStorage.get();
+ blitterStoragePtr->choose(fDst, *fMatrix, paint, drawCoverage);
+ blitter = blitterStoragePtr->get();
} else {
blitter = customBlitter;
}
@@ -1009,12 +1016,31 @@
}
}
}
- proc(devPath, *fRC, blitter);
+
+ if (iData == nullptr) {
+ proc(devPath, *fRC, blitter); // proceed directly if we're not in threaded init-once
+ } else if (true || !doFill || !paint.isAntiAlias()) {
+ // TODO remove true in the if statement above so we can proceed to DAA.
+
+ // We're in threaded init-once but we can't use DAA. Hence we'll stop here and hand all the
+ // remaining work to draw phase. This is a simple example of how to add init-once to
+ // existing drawXXX commands: simply send in SkInitOnceData, do as much init work as
+ // possible, and finally wrap the remaining work into iData->fElement->fDrawFn.
+ iData->fElement->setDrawFn([proc, devPath, blitter](SkArenaAlloc* alloc,
+ const SkThreadedBMPDevice::DrawState& ds, const SkIRect& tileBounds) {
+ SkThreadedBMPDevice::TileDraw tileDraw(ds, tileBounds);
+ proc(devPath, *tileDraw.fRC, blitter);
+ });
+ } else {
+ // We can use DAA to do scan conversion in the init-once phase.
+ // TODO To be implemented
+ }
}
void SkDraw::drawPath(const SkPath& origSrcPath, const SkPaint& origPaint,
const SkMatrix* prePathMatrix, bool pathIsMutable,
- bool drawCoverage, SkBlitter* customBlitter) const {
+ bool drawCoverage, SkBlitter* customBlitter,
+ SkInitOnceData* iData) const {
SkDEBUGCODE(this->validate();)
// nothing to draw
@@ -1024,17 +1050,21 @@
SkPath* pathPtr = (SkPath*)&origSrcPath;
bool doFill = true;
- SkPath tmpPath;
+ SkPath tmpPathStorage;
+ SkPath* tmpPath = &tmpPathStorage;
SkMatrix tmpMatrix;
const SkMatrix* matrix = fMatrix;
- tmpPath.setIsVolatile(true);
+ if (iData) {
+ tmpPath = iData->fAlloc->make<SkPath>();
+ }
+ tmpPath->setIsVolatile(true);
if (prePathMatrix) {
if (origPaint.getPathEffect() || origPaint.getStyle() != SkPaint::kFill_Style) {
SkPath* result = pathPtr;
if (!pathIsMutable) {
- result = &tmpPath;
+ result = tmpPath;
pathIsMutable = true;
}
pathPtr->transform(*prePathMatrix, result);
@@ -1079,18 +1109,18 @@
if (this->computeConservativeLocalClipBounds(&cullRect)) {
cullRectPtr = &cullRect;
}
- doFill = paint->getFillPath(*pathPtr, &tmpPath, cullRectPtr,
+ doFill = paint->getFillPath(*pathPtr, tmpPath, cullRectPtr,
ComputeResScaleForStroking(*fMatrix));
- pathPtr = &tmpPath;
+ pathPtr = tmpPath;
}
// avoid possibly allocating a new path in transform if we can
- SkPath* devPathPtr = pathIsMutable ? pathPtr : &tmpPath;
+ SkPath* devPathPtr = pathIsMutable ? pathPtr : tmpPath;
// transform the path into device space
pathPtr->transform(*matrix, devPathPtr);
- this->drawDevPath(*devPathPtr, *paint, drawCoverage, customBlitter, doFill);
+ this->drawDevPath(*devPathPtr, *paint, drawCoverage, customBlitter, doFill, iData);
}
void SkDraw::drawBitmapAsMask(const SkBitmap& bitmap, const SkPaint& paint) const {
diff --git a/src/core/SkDraw.h b/src/core/SkDraw.h
index e4b2285..8598475 100644
--- a/src/core/SkDraw.h
+++ b/src/core/SkDraw.h
@@ -29,6 +29,7 @@
struct SkDrawProcs;
struct SkRect;
class SkRRect;
+struct SkInitOnceData;
class SkDraw {
public:
@@ -127,11 +128,11 @@
void drawPath(const SkPath&, const SkPaint&, const SkMatrix* preMatrix,
bool pathIsMutable, bool drawCoverage,
- SkBlitter* customBlitter = nullptr) const;
+ SkBlitter* customBlitter = nullptr, SkInitOnceData* iData = nullptr) const;
void drawLine(const SkPoint[2], const SkPaint&) const;
void drawDevPath(const SkPath& devPath, const SkPaint& paint, bool drawCoverage,
- SkBlitter* customBlitter, bool doFill) const;
+ SkBlitter* customBlitter, bool doFill, SkInitOnceData* iData = nullptr) const;
/**
* Return the current clip bounds, in local coordinates, with slop to account
* for antialiasing or hairlines (i.e. device-bounds outset by 1, and then
@@ -156,6 +157,8 @@
#else
void validate() const {}
#endif
+
+ friend class SkThreadedBMPDevice; // to access private method drawPath
};
#endif
diff --git a/src/core/SkScan_DAAPath.cpp b/src/core/SkScan_DAAPath.cpp
index 948acce..dca2bd2 100644
--- a/src/core/SkScan_DAAPath.cpp
+++ b/src/core/SkScan_DAAPath.cpp
@@ -352,6 +352,6 @@
} else {
SkCoverageDeltaList deltaList(&alloc, clippedIR.fTop, clippedIR.fBottom, forceRLE);
gen_alpha_deltas(path, clipBounds, deltaList, blitter, skipRect, containedInClip);
- blitter->blitCoverageDeltas(&deltaList, clipBounds, isEvenOdd, isInverse, isConvex);
+ blitter->blitCoverageDeltas(&deltaList, clipBounds, isEvenOdd, isInverse, isConvex, &alloc);
}
}
diff --git a/src/core/SkTaskGroup2D.cpp b/src/core/SkTaskGroup2D.cpp
index fe8a5a8..4060527 100644
--- a/src/core/SkTaskGroup2D.cpp
+++ b/src/core/SkTaskGroup2D.cpp
@@ -24,39 +24,35 @@
}
void SkSpinningTaskGroup2D::work(int threadId) {
- int& nextColumn = fRowData[threadId].fNextColumn;
+ int workCol = 0;
+ int initCol = 0;
while (true) {
- SkASSERT(nextColumn <= fWidth);
- if (this->isFinishing() && nextColumn >= fWidth) {
+ SkASSERT(workCol <= fWidth);
+ if (this->isFinishing() && workCol >= fWidth) {
return;
}
- if (nextColumn < fWidth) {
- fWork(threadId, nextColumn);
- nextColumn++;
+ // Note that row = threadId
+ if (workCol < fWidth && fKernel->work2D(threadId, workCol, threadId)) {
+ workCol++;
+ } else {
+ // Initialize something if we can't work
+ this->initAnUninitializedColumn(initCol, threadId);
}
}
}
-SkFlexibleTaskGroup2D::SkFlexibleTaskGroup2D(Work2D&& w, int h, SkExecutor* x, int t)
- : SkTaskGroup2D(std::move(w), h, x, t), fRowData(h), fThreadData(t) {
- for (int i = 0; i < t; ++i) {
- fThreadData[i].fRowIndex = i;
- }
-}
-
-
void SkFlexibleTaskGroup2D::work(int threadId) {
- int failCnt = 0;
- int& rowIndex = fThreadData[threadId].fRowIndex;
+ int row = threadId;
+ int initCol = 0;
+ int numRowsCompleted = 0;
+ std::vector<bool> completedRows(fHeight, false);
- // This loop looks for work to do as long as
- // either 1. isFinishing is false
- // or 2. isFinishing is true but some rows still have unfinished tasks
- while (true) {
- RowData& rowData = fRowData[rowIndex];
- bool processed = false;
+ // Only keep fHeight - numRowsCompleted number of threads looping. When rows are about to
+ // complete, this strategy keeps the contention low.
+ while (threadId >= numRowsCompleted) {
+ RowData& rowData = fRowData[row];
// The Android roller somehow gets a false-positive compile warning/error about the try-lock
// and unlock process. Hence we disable -Wthread-safety-analysis to bypass it.
@@ -65,15 +61,16 @@
#pragma clang diagnostic ignored "-Wthread-safety-analysis"
#endif
if (rowData.fMutex.try_lock()) {
- if (rowData.fNextColumn < fWidth) {
- fWork(rowIndex, rowData.fNextColumn);
+ while (rowData.fNextColumn < fWidth &&
+ fKernel->work2D(row, rowData.fNextColumn, threadId)) {
rowData.fNextColumn++;
- processed = true;
- } else {
- // isFinishing can never go from true to false. Once it's true, we count how many
- // times that a row is out of work. If that count reaches fHeight, then we're out of
- // work for the whole group.
- failCnt += this->isFinishing();
+ }
+ // isFinishing can never go from true to false. Once it's true, we count how many rows
+ // are completed (out of work). If that count reaches fHeight, then we're out of work
+ // for the whole group and we can stop.
+ if (rowData.fNextColumn == fWidth && this->isFinishing()) {
+ numRowsCompleted += (completedRows[row] == false);
+ completedRows[row] = true; // so we won't count this row twice
}
rowData.fMutex.unlock();
}
@@ -81,11 +78,9 @@
#pragma clang diagnostic pop
#endif
- if (!processed) {
- if (failCnt >= fHeight) {
- return;
- }
- rowIndex = (rowIndex + 1) % fHeight;
- }
+ // By reaching here, we're either unable to acquire the row, or out of work, or blocked by
+ // initialization
+ row = (row + 1) % fHeight; // Move to the next row
+ this->initAnUninitializedColumn(initCol, threadId); // Initialize something
}
}
diff --git a/src/core/SkTaskGroup2D.h b/src/core/SkTaskGroup2D.h
index b55b96a..851db61 100644
--- a/src/core/SkTaskGroup2D.h
+++ b/src/core/SkTaskGroup2D.h
@@ -13,22 +13,43 @@
#include <mutex>
#include <vector>
-// A 2D grid (height rows x width columns) of tasks.
-//
-// The task on row i and column j is abstracted as Work2D(i, j). We guarantee that the task on the
-// same row will be executed in order (i.e., Work2D(1, 1) is guaranteed to finish before calling
-// Work2D(1, 2)). Tasks in different rows can happen in any order.
+// The interface for doing work on a 2D grid with possible initialization on columns.
+class SkWorkKernel2D {
+public:
+ // Return false iff the column needs initialization and such initialization is not finished yet.
+ virtual bool work2D(int row, int column, int thread) = 0;
+
+ // Return false if no initialization is done for this colum (e.g., it's already initialized; or
+ // maybe some other thread is initializing the column).
+ virtual bool initColumn(int column, int thread) = 0;
+
+ virtual ~SkWorkKernel2D() {}
+};
+
+// A 2D grid (height rows x width columns) of tasks to be executed on a given executor with
+// threadCnt number of threads.
//
// The height (number of rows) is fixed. The width (number of columns) may be dynamically expanded.
//
-// The tasks will eventually be executed on the executor with threadCnt number of hardware threads.
+// The task on row i and column j is abstracted as work2D(i, j, t). Parameter t is the thread id and
+// it shouldn't affect the work to be done. It's only used to allow some variables that are not
+// thread safe and should be used exclusively by one thread (e.g., thread allocators). We guarantee
+// that the task on the same row will be executed in order (i.e., work2D(1, 1, t) is guaranteed to
+// finish before calling work2D(1, 2, t)). Tasks in different rows can happen in any order.
+//
+// There are also width number of init calls, one per column. work2D(i, j, t) may return false if
+// column j requires initialization but it's not initialized yet. In that case, a thread t needs to
+// call initColumn(j, t) once to unblock all rows that depend on the initialization of column j.
+// (Again, t shouldn't affect the init work to be done; it's just for some non-thread-safe
+// variables). The init calls have no order requirement so we can call them in any order.
+//
+// Multiple therads may try to init the same column j at the same time. InitFn is expected to handle
+// this gracefully (e.g., let only one thread do the init and return immediately for other threads).
class SkTaskGroup2D {
public:
- using Work2D = std::function<void(int, int)>;
-
- SkTaskGroup2D(Work2D&& work, int height, SkExecutor* executor, int threadCnt)
- : fWork(work), fHeight(height), fThreadCnt(threadCnt), fIsFinishing(false), fWidth(0)
- , fThreadsGroup(new SkTaskGroup(*executor)) {}
+ SkTaskGroup2D(SkWorkKernel2D* kernel, int height, SkExecutor* executor, int threadCnt)
+ : fKernel(kernel), fHeight(height), fThreadCnt(threadCnt), fIsFinishing(false)
+ , fWidth(0), fThreadsGroup(new SkTaskGroup(*executor)) {}
virtual ~SkTaskGroup2D() {}
@@ -47,9 +68,19 @@
// Finish all tasks on the threadId and then return.
virtual void work(int threadId) = 0;
- Work2D fWork; // fWork(i, j) is the task to be done on row i and column j
- const int fHeight;
- const int fThreadCnt;
+ // Initialize a column that needs to be initialized. The parameter initCol is not thread safe
+ // and should only be exclusively accessed by the working thread which will modify it to the
+ // column that may need to be initialized next.
+ void initAnUninitializedColumn(int& initCol, int threadId) {
+ bool didSomeInit = false;
+ while (initCol < fWidth && !didSomeInit) {
+ didSomeInit = fKernel->initColumn(initCol++, threadId);
+ }
+ }
+
+ SkWorkKernel2D* fKernel;
+ const int fHeight;
+ const int fThreadCnt;
std::atomic<bool> fIsFinishing;
std::atomic<int> fWidth;
@@ -60,28 +91,19 @@
// A simple spinning task group that assumes height equals threadCnt.
class SkSpinningTaskGroup2D final : public SkTaskGroup2D {
public:
- SkSpinningTaskGroup2D(Work2D&& w, int h, SkExecutor* x, int t)
- : SkTaskGroup2D(std::move(w), h, x, t), fRowData(h) {
+ SkSpinningTaskGroup2D(SkWorkKernel2D* kernel, int h, SkExecutor* x, int t)
+ : SkTaskGroup2D(kernel, h, x, t) {
SkASSERT(h == t); // height must be equal to threadCnt
}
protected:
void work(int threadId) override;
-
-private:
- // alignas(MAX_CACHE_LINE) to avoid false sharing by cache lines
- struct alignas(MAX_CACHE_LINE) RowData {
- RowData() : fNextColumn(0) {}
-
- int fNextColumn; // next column index to be executed
- };
-
- std::vector<RowData> fRowData;
};
class SkFlexibleTaskGroup2D final : public SkTaskGroup2D {
public:
- SkFlexibleTaskGroup2D(Work2D&&, int, SkExecutor*, int);
+ SkFlexibleTaskGroup2D(SkWorkKernel2D* kernel, int h, SkExecutor* x, int t)
+ : SkTaskGroup2D(kernel, h, x, t), fRowData(h) {}
protected:
void work(int threadId) override;
@@ -91,18 +113,11 @@
struct alignas(MAX_CACHE_LINE) RowData {
RowData() : fNextColumn(0) {}
- int fNextColumn; // next column index to be executed
+ int fNextColumn; // next column index to work
std::mutex fMutex; // the mutex for the thread to acquire
};
- struct alignas(MAX_CACHE_LINE) ThreadData {
- ThreadData() : fRowIndex(0) {}
-
- int fRowIndex; // the row that the current thread is working on
- };
-
std::vector<RowData> fRowData;
- std::vector<ThreadData> fThreadData;
};
#endif//SkTaskGroup2D_DEFINED
diff --git a/src/core/SkThreadedBMPDevice.cpp b/src/core/SkThreadedBMPDevice.cpp
index a61e9b2..ad3814c 100644
--- a/src/core/SkThreadedBMPDevice.cpp
+++ b/src/core/SkThreadedBMPDevice.cpp
@@ -12,23 +12,30 @@
#include "SkTaskGroup.h"
#include "SkVertices.h"
+// Calling init(j, k) would initialize the j-th element on k-th thread. It returns false if it's
+// already initiailized.
+bool SkThreadedBMPDevice::DrawQueue::initColumn(int column, int thread) {
+ return fElements[column].tryInitOnce(&fThreadAllocs[thread]);
+}
+
+// Calling work(i, j, k) would draw j-th element the i-th tile on k-th thead. If the element still
+// needs to be initialized, drawFn will return false without drawing.
+bool SkThreadedBMPDevice::DrawQueue::work2D(int row, int column, int thread) {
+ return fElements[column].tryDraw(fDevice->fTileBounds[row], &fThreadAllocs[thread]);
+}
+
void SkThreadedBMPDevice::DrawQueue::reset() {
if (fTasks) {
fTasks->finish();
}
+ fThreadAllocs.reset(fDevice->fThreadCnt);
fSize = 0;
// using TaskGroup2D = SkSpinningTaskGroup2D;
using TaskGroup2D = SkFlexibleTaskGroup2D;
- auto draw2D = [this](int row, int column){
- SkThreadedBMPDevice::DrawElement& element = fElements[column];
- if (!SkIRect::Intersects(fDevice->fTileBounds[row], element.fDrawBounds)) {
- return;
- }
- element.fDrawFn(nullptr, element.fDS, fDevice->fTileBounds[row]);
- };
- fTasks.reset(new TaskGroup2D(draw2D, fDevice->fTileCnt, fDevice->fExecutor,
+
+ fTasks.reset(new TaskGroup2D(this, fDevice->fTileCnt, fDevice->fExecutor,
fDevice->fThreadCnt));
fTasks->start();
}
@@ -149,9 +156,16 @@
const SkMatrix* prePathMatrix, bool pathIsMutable) {
SkRect drawBounds = path.isInverseFillType() ? SkRectPriv::MakeLargest()
: get_fast_bounds(path.getBounds(), paint);
- fQueue.push(drawBounds, [=](SkArenaAlloc*, const DrawState& ds, const SkIRect& tileBounds) {
- TileDraw(ds, tileBounds).drawPath(path, paint, prePathMatrix, false);
- });
+ if (path.countVerbs() < 100) { // when path is small, init-once has too much overhead
+ fQueue.push(drawBounds, [=](SkArenaAlloc*, const DrawState& ds, const SkIRect& tileBounds) {
+ TileDraw(ds, tileBounds).drawPath(path, paint, prePathMatrix, false);
+ });
+ } else {
+ fQueue.push(drawBounds, [=](SkArenaAlloc* alloc, DrawElement* elem) {
+ SkInitOnceData data = {alloc, elem};
+ elem->getDraw().drawPath(path, paint, prePathMatrix, false, false, nullptr, &data);
+ });
+ }
}
void SkThreadedBMPDevice::drawBitmap(const SkBitmap& bitmap, SkScalar x, SkScalar y,
diff --git a/src/core/SkThreadedBMPDevice.h b/src/core/SkThreadedBMPDevice.h
index 143657f..a33715e 100644
--- a/src/core/SkThreadedBMPDevice.h
+++ b/src/core/SkThreadedBMPDevice.h
@@ -43,6 +43,7 @@
void flush() override;
private:
+ // We store DrawState inside DrawElement because inifFn and drawFn both want to use it
struct DrawState {
SkPixmap fDst;
SkMatrix fMatrix;
@@ -59,16 +60,59 @@
private: SkRasterClip fTileRC;
};
- struct DrawElement {
+ class DrawElement {
+ public:
+ using InitFn = std::function<void(SkArenaAlloc* threadAlloc, DrawElement* element)>;
using DrawFn = std::function<void(SkArenaAlloc* threadAlloc, const DrawState& ds,
const SkIRect& tileBounds)>;
- DrawFn fDrawFn;
- DrawState fDS;
- SkIRect fDrawBounds;
+ DrawElement() {}
+ DrawElement(SkThreadedBMPDevice* device, DrawFn&& drawFn, const SkRect& rawDrawBounds)
+ : fInitialized(true)
+ , fDrawFn(std::move(drawFn))
+ , fDS(device)
+ , fDrawBounds(device->transformDrawBounds(rawDrawBounds)) {}
+ DrawElement(SkThreadedBMPDevice* device, InitFn&& initFn, const SkRect& rawDrawBounds)
+ : fInitialized(false)
+ , fInitFn(std::move(initFn))
+ , fDS(device)
+ , fDrawBounds(device->transformDrawBounds(rawDrawBounds)) {}
+
+ SK_ALWAYS_INLINE bool tryInitOnce(SkArenaAlloc* alloc) {
+ if (fInitialized) {
+ return false;
+ }
+ std::call_once(fNeedInit, [this, alloc]{
+ fInitFn(alloc, this);
+ fInitialized = true;
+ });
+ return true;
+ }
+
+ SK_ALWAYS_INLINE bool tryDraw(const SkIRect& tileBounds, SkArenaAlloc* alloc) {
+ if (!SkIRect::Intersects(tileBounds, fDrawBounds)) {
+ return true;
+ }
+ if (fInitialized) {
+ fDrawFn(alloc, fDS, tileBounds);
+ return true;
+ }
+ return false;
+ }
+
+ SkDraw getDraw() const { return fDS.getDraw(); }
+ void setDrawFn(DrawFn&& fn) { fDrawFn = std::move(fn); }
+
+ private:
+ std::atomic<bool> fInitialized;
+ std::once_flag fNeedInit;
+ InitFn fInitFn;
+ DrawFn fDrawFn;
+ DrawState fDS;
+ SkIRect fDrawBounds;
};
- class DrawQueue {
+ class DrawQueue : public SkWorkKernel2D {
public:
static constexpr int MAX_QUEUE_SIZE = 100000;
@@ -79,25 +123,29 @@
// will start new tasks.
void finish() { fTasks->finish(); }
- SK_ALWAYS_INLINE void push(const SkRect& rawDrawBounds,
- DrawElement::DrawFn&& drawFn) {
+ // Push a draw command into the queue. If Fn is DrawFn, we're pushing an element without
+ // the need of initialization. If Fn is InitFn, we're pushing an element with init-once
+ // and the InitFn will generate the DrawFn during initialization.
+ template<typename Fn>
+ SK_ALWAYS_INLINE void push(const SkRect& rawDrawBounds, Fn&& fn) {
if (fSize == MAX_QUEUE_SIZE) {
this->reset();
}
SkASSERT(fSize < MAX_QUEUE_SIZE);
-
- DrawElement* element = &fElements[fSize++];
- element->fDS = DrawState(fDevice);
- element->fDrawFn = std::move(drawFn);
- element->fDrawBounds = fDevice->transformDrawBounds(rawDrawBounds);
+ new (&fElements[fSize++]) DrawElement(fDevice, std::move(fn), rawDrawBounds);
fTasks->addColumn();
}
+ // SkWorkKernel2D
+ bool initColumn(int column, int thread) override;
+ bool work2D(int row, int column, int thread) override;
+
private:
- SkThreadedBMPDevice* fDevice;
- std::unique_ptr<SkTaskGroup2D> fTasks;
- DrawElement fElements[MAX_QUEUE_SIZE];
- int fSize;
+ SkThreadedBMPDevice* fDevice;
+ std::unique_ptr<SkTaskGroup2D> fTasks;
+ SkTArray<SkSTArenaAlloc<8 << 10>> fThreadAllocs; // 8k stack size
+ DrawElement fElements[MAX_QUEUE_SIZE];
+ int fSize;
};
SkIRect transformDrawBounds(const SkRect& drawBounds) const;
@@ -117,7 +165,17 @@
DrawQueue fQueue;
+ friend struct SkInitOnceData; // to access DrawElement
+ friend class SkDraw; // to access DrawState
+
typedef SkBitmapDevice INHERITED;
};
+// Passed to SkDraw::drawXXX to enable threaded draw with init-once. The goal is to reuse as much
+// code as possible from SkDraw. (See SkDraw::drawPath and SkDraw::drawDevPath for an example.)
+struct SkInitOnceData {
+ SkArenaAlloc* fAlloc;
+ SkThreadedBMPDevice::DrawElement* fElement;
+};
+
#endif // SkThreadedBMPDevice_DEFINED