| /* |
| * Copyright (C) 2013 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "rsCpuIntrinsic.h" |
| #include "rsCpuIntrinsicInlines.h" |
| |
| using namespace android; |
| using namespace android::renderscript; |
| |
| namespace android { |
| namespace renderscript { |
| |
| |
| class RsdCpuScriptIntrinsicHistogram : public RsdCpuScriptIntrinsic { |
| public: |
| void populateScript(Script *) override; |
| void invokeFreeChildren() override; |
| |
| void setGlobalVar(uint32_t slot, const void *data, size_t dataLength) override; |
| void setGlobalObj(uint32_t slot, ObjectBase *data) override; |
| |
| ~RsdCpuScriptIntrinsicHistogram() override; |
| RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e); |
| |
| protected: |
| void preLaunch(uint32_t slot, const Allocation ** ains, uint32_t inLen, |
| Allocation * aout, const void * usr, |
| uint32_t usrLen, const RsScriptCall *sc); |
| void postLaunch(uint32_t slot, const Allocation ** ains, uint32_t inLen, |
| Allocation * aout, const void * usr, |
| uint32_t usrLen, const RsScriptCall *sc); |
| |
| |
| float mDot[4]; |
| int mDotI[4]; |
| int *mSums; |
| ObjectBaseRef<Allocation> mAllocOut; |
| |
| static void kernelP1U4(const RsExpandKernelDriverInfo *info, |
| uint32_t xstart, uint32_t xend, |
| uint32_t outstep); |
| static void kernelP1U3(const RsExpandKernelDriverInfo *info, |
| uint32_t xstart, uint32_t xend, |
| uint32_t outstep); |
| static void kernelP1U2(const RsExpandKernelDriverInfo *info, |
| uint32_t xstart, uint32_t xend, |
| uint32_t outstep); |
| static void kernelP1U1(const RsExpandKernelDriverInfo *info, |
| uint32_t xstart, uint32_t xend, |
| uint32_t outstep); |
| |
| static void kernelP1L4(const RsExpandKernelDriverInfo *info, |
| uint32_t xstart, uint32_t xend, |
| uint32_t outstep); |
| static void kernelP1L3(const RsExpandKernelDriverInfo *info, |
| uint32_t xstart, uint32_t xend, |
| uint32_t outstep); |
| static void kernelP1L2(const RsExpandKernelDriverInfo *info, |
| uint32_t xstart, uint32_t xend, |
| uint32_t outstep); |
| static void kernelP1L1(const RsExpandKernelDriverInfo *info, |
| uint32_t xstart, uint32_t xend, |
| uint32_t outstep); |
| |
| }; |
| |
| } |
| } |
| |
| void RsdCpuScriptIntrinsicHistogram::setGlobalObj(uint32_t slot, ObjectBase *data) { |
| rsAssert(slot == 1); |
| mAllocOut.set(static_cast<Allocation *>(data)); |
| } |
| |
| void RsdCpuScriptIntrinsicHistogram::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) { |
| rsAssert(slot == 0); |
| rsAssert(dataLength == 16); |
| memcpy(mDot, data, 16); |
| mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f); |
| mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f); |
| mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f); |
| mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f); |
| } |
| |
| |
| |
| void |
| RsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot, |
| const Allocation ** ains, |
| uint32_t inLen, Allocation * aout, |
| const void * usr, uint32_t usrLen, |
| const RsScriptCall *sc) { |
| |
| const uint32_t threads = mCtx->getThreadCount(); |
| uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize(); |
| |
| switch (slot) { |
| case 0: |
| switch(vSize) { |
| case 1: |
| mRootPtr = &kernelP1U1; |
| break; |
| case 2: |
| mRootPtr = &kernelP1U2; |
| break; |
| case 3: |
| mRootPtr = &kernelP1U3; |
| vSize = 4; |
| break; |
| case 4: |
| mRootPtr = &kernelP1U4; |
| break; |
| } |
| break; |
| case 1: |
| switch(ains[0]->getType()->getElement()->getVectorSize()) { |
| case 1: |
| mRootPtr = &kernelP1L1; |
| break; |
| case 2: |
| mRootPtr = &kernelP1L2; |
| break; |
| case 3: |
| mRootPtr = &kernelP1L3; |
| break; |
| case 4: |
| mRootPtr = &kernelP1L4; |
| break; |
| } |
| break; |
| } |
| memset(mSums, 0, 256 * sizeof(int32_t) * threads * vSize); |
| } |
| |
| void |
| RsdCpuScriptIntrinsicHistogram::postLaunch(uint32_t slot, |
| const Allocation ** ains, |
| uint32_t inLen, Allocation * aout, |
| const void * usr, uint32_t usrLen, |
| const RsScriptCall *sc) { |
| |
| unsigned int *o = (unsigned int *)mAllocOut->mHal.drvState.lod[0].mallocPtr; |
| uint32_t threads = mCtx->getThreadCount(); |
| uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize(); |
| |
| if (vSize == 3) vSize = 4; |
| |
| for (uint32_t ct=0; ct < (256 * vSize); ct++) { |
| o[ct] = mSums[ct]; |
| for (uint32_t t=1; t < threads; t++) { |
| o[ct] += mSums[ct + (256 * vSize * t)]; |
| } |
| } |
| } |
| |
| void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsExpandKernelDriverInfo *info, |
| uint32_t xstart, uint32_t xend, |
| uint32_t outstep) { |
| |
| RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr; |
| uchar *in = (uchar *)info->inPtr[0]; |
| int * sums = &cp->mSums[256 * 4 * info->lid]; |
| |
| for (uint32_t x = xstart; x < xend; x++) { |
| sums[(in[0] << 2) ] ++; |
| sums[(in[1] << 2) + 1] ++; |
| sums[(in[2] << 2) + 2] ++; |
| sums[(in[3] << 2) + 3] ++; |
| in += info->inStride[0]; |
| } |
| } |
| |
| void RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsExpandKernelDriverInfo *info, |
| uint32_t xstart, uint32_t xend, |
| uint32_t outstep) { |
| |
| RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr; |
| uchar *in = (uchar *)info->inPtr[0]; |
| int * sums = &cp->mSums[256 * 4 * info->lid]; |
| |
| for (uint32_t x = xstart; x < xend; x++) { |
| sums[(in[0] << 2) ] ++; |
| sums[(in[1] << 2) + 1] ++; |
| sums[(in[2] << 2) + 2] ++; |
| in += info->inStride[0]; |
| } |
| } |
| |
| void RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsExpandKernelDriverInfo *info, |
| uint32_t xstart, uint32_t xend, |
| uint32_t outstep) { |
| |
| RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr; |
| uchar *in = (uchar *)info->inPtr[0]; |
| int * sums = &cp->mSums[256 * 2 * info->lid]; |
| |
| for (uint32_t x = xstart; x < xend; x++) { |
| sums[(in[0] << 1) ] ++; |
| sums[(in[1] << 1) + 1] ++; |
| in += info->inStride[0]; |
| } |
| } |
| |
| void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsExpandKernelDriverInfo *info, |
| uint32_t xstart, uint32_t xend, |
| uint32_t outstep) { |
| |
| RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr; |
| uchar *in = (uchar *)info->inPtr[0]; |
| int * sums = &cp->mSums[256 * info->lid]; |
| |
| for (uint32_t x = xstart; x < xend; x++) { |
| int t = (cp->mDotI[0] * in[0]) + |
| (cp->mDotI[1] * in[1]) + |
| (cp->mDotI[2] * in[2]) + |
| (cp->mDotI[3] * in[3]); |
| sums[(t + 0x7f) >> 8] ++; |
| in += info->inStride[0]; |
| } |
| } |
| |
| void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsExpandKernelDriverInfo *info, |
| uint32_t xstart, uint32_t xend, |
| uint32_t outstep) { |
| |
| RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr; |
| uchar *in = (uchar *)info->inPtr[0]; |
| int * sums = &cp->mSums[256 * info->lid]; |
| |
| for (uint32_t x = xstart; x < xend; x++) { |
| int t = (cp->mDotI[0] * in[0]) + |
| (cp->mDotI[1] * in[1]) + |
| (cp->mDotI[2] * in[2]); |
| sums[(t + 0x7f) >> 8] ++; |
| in += info->inStride[0]; |
| } |
| } |
| |
| void RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsExpandKernelDriverInfo *info, |
| uint32_t xstart, uint32_t xend, |
| uint32_t outstep) { |
| |
| RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr; |
| uchar *in = (uchar *)info->inPtr[0]; |
| int * sums = &cp->mSums[256 * info->lid]; |
| |
| for (uint32_t x = xstart; x < xend; x++) { |
| int t = (cp->mDotI[0] * in[0]) + |
| (cp->mDotI[1] * in[1]); |
| sums[(t + 0x7f) >> 8] ++; |
| in += info->inStride[0]; |
| } |
| } |
| |
| void RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsExpandKernelDriverInfo *info, |
| uint32_t xstart, uint32_t xend, |
| uint32_t outstep) { |
| |
| RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr; |
| uchar *in = (uchar *)info->inPtr[0]; |
| int * sums = &cp->mSums[256 * info->lid]; |
| |
| for (uint32_t x = xstart; x < xend; x++) { |
| int t = (cp->mDotI[0] * in[0]); |
| sums[(t + 0x7f) >> 8] ++; |
| in += info->inStride[0]; |
| } |
| } |
| |
| void RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsExpandKernelDriverInfo *info, |
| uint32_t xstart, uint32_t xend, |
| uint32_t outstep) { |
| |
| RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr; |
| uchar *in = (uchar *)info->inPtr[0]; |
| int * sums = &cp->mSums[256 * info->lid]; |
| |
| for (uint32_t x = xstart; x < xend; x++) { |
| sums[in[0]] ++; |
| in += info->inStride[0]; |
| } |
| } |
| |
| |
| RsdCpuScriptIntrinsicHistogram::RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, |
| const Script *s, const Element *e) |
| : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_HISTOGRAM) { |
| |
| mRootPtr = nullptr; |
| mSums = new int[256 * 4 * mCtx->getThreadCount()]; |
| mDot[0] = 0.299f; |
| mDot[1] = 0.587f; |
| mDot[2] = 0.114f; |
| mDot[3] = 0; |
| mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f); |
| mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f); |
| mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f); |
| mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f); |
| } |
| |
| RsdCpuScriptIntrinsicHistogram::~RsdCpuScriptIntrinsicHistogram() { |
| if (mSums) { |
| delete []mSums; |
| } |
| } |
| |
| void RsdCpuScriptIntrinsicHistogram::populateScript(Script *s) { |
| s->mHal.info.exportedVariableCount = 2; |
| } |
| |
| void RsdCpuScriptIntrinsicHistogram::invokeFreeChildren() { |
| } |
| |
| |
| RsdCpuScriptImpl * rsdIntrinsic_Histogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) { |
| |
| return new RsdCpuScriptIntrinsicHistogram(ctx, s, e); |
| } |