Merge commit 'f5ecac92' into manualmerge
Conflicts:
driver/rsdBcc.cpp
Change-Id: I04a06c157165a77f3c5edffa637b9e42a02ef617
diff --git a/driver/rsdBcc.cpp b/driver/rsdBcc.cpp
index 09981d2..ddcaac8 100644
--- a/driver/rsdBcc.cpp
+++ b/driver/rsdBcc.cpp
@@ -343,16 +343,39 @@
RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
if ((dc->mWorkers.mCount >= 1) && isThreadable && !dc->mInForEach) {
+ const size_t targetByteChunk = 16 * 1024;
dc->mInForEach = true;
if (mtls->fep.dimY > 1) {
- mtls->mSliceSize = mtls->fep.dimY / (dc->mWorkers.mCount * 4);
+ uint32_t s1 = mtls->fep.dimY / ((dc->mWorkers.mCount + 1) * 4);
+ uint32_t s2 = 0;
+
+ // This chooses our slice size to rate limit atomic ops to
+ // one per 16k bytes of reads/writes.
+ if (mtls->fep.yStrideOut) {
+ s2 = targetByteChunk / mtls->fep.yStrideOut;
+ } else {
+ s2 = targetByteChunk / mtls->fep.yStrideIn;
+ }
+ mtls->mSliceSize = rsMin(s1, s2);
+
if(mtls->mSliceSize < 1) {
mtls->mSliceSize = 1;
}
rsdLaunchThreads(mrsc, wc_xy, mtls);
} else {
- mtls->mSliceSize = mtls->fep.dimX / (dc->mWorkers.mCount * 4);
+ uint32_t s1 = mtls->fep.dimX / ((dc->mWorkers.mCount + 1) * 4);
+ uint32_t s2 = 0;
+
+ // This chooses our slice size to rate limit atomic ops to
+ // one per 16k bytes of reads/writes.
+ if (mtls->fep.eStrideOut) {
+ s2 = targetByteChunk / mtls->fep.eStrideOut;
+ } else {
+ s2 = targetByteChunk / mtls->fep.eStrideIn;
+ }
+ mtls->mSliceSize = rsMin(s1, s2);
+
if(mtls->mSliceSize < 1) {
mtls->mSliceSize = 1;
}