blob: 477918774691b391b1f0fe8e4071b6a02349d31c [file] [log] [blame]
Jason Sams2282e282013-06-17 16:52:01 -07001/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsCpuIntrinsic.h"
18#include "rsCpuIntrinsicInlines.h"
19
20using namespace android;
21using namespace android::renderscript;
22
23namespace android {
24namespace renderscript {
25
26
27class RsdCpuScriptIntrinsicHistogram : public RsdCpuScriptIntrinsic {
28public:
29 virtual void populateScript(Script *);
30 virtual void invokeFreeChildren();
31
32 virtual void setGlobalVar(uint32_t slot, const void *data, size_t dataLength);
33 virtual void setGlobalObj(uint32_t slot, ObjectBase *data);
34
35 virtual ~RsdCpuScriptIntrinsicHistogram();
36 RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
37
38protected:
Chris Wailesf3712132014-07-16 15:18:30 -070039 void preLaunch(uint32_t slot, const Allocation ** ains, uint32_t inLen,
Jason Sams2282e282013-06-17 16:52:01 -070040 Allocation * aout, const void * usr,
41 uint32_t usrLen, const RsScriptCall *sc);
Chris Wailesf3712132014-07-16 15:18:30 -070042 void postLaunch(uint32_t slot, const Allocation ** ains, uint32_t inLen,
Jason Sams2282e282013-06-17 16:52:01 -070043 Allocation * aout, const void * usr,
44 uint32_t usrLen, const RsScriptCall *sc);
45
46
47 float mDot[4];
48 int mDotI[4];
49 int *mSums;
50 ObjectBaseRef<Allocation> mAllocOut;
51
Chris Wailes80ef6932014-07-08 11:22:18 -070052 static void kernelP1U4(const RsExpandKernelParams *p,
Chris Wailes9ed79102014-07-25 15:53:28 -070053 uint32_t xstart, uint32_t xend,
54 uint32_t outstep);
Chris Wailes80ef6932014-07-08 11:22:18 -070055 static void kernelP1U3(const RsExpandKernelParams *p,
Chris Wailes9ed79102014-07-25 15:53:28 -070056 uint32_t xstart, uint32_t xend,
57 uint32_t outstep);
Chris Wailes80ef6932014-07-08 11:22:18 -070058 static void kernelP1U2(const RsExpandKernelParams *p,
Chris Wailes9ed79102014-07-25 15:53:28 -070059 uint32_t xstart, uint32_t xend,
60 uint32_t outstep);
Chris Wailes80ef6932014-07-08 11:22:18 -070061 static void kernelP1U1(const RsExpandKernelParams *p,
Chris Wailes9ed79102014-07-25 15:53:28 -070062 uint32_t xstart, uint32_t xend,
63 uint32_t outstep);
Jason Sams2282e282013-06-17 16:52:01 -070064
Chris Wailes80ef6932014-07-08 11:22:18 -070065 static void kernelP1L4(const RsExpandKernelParams *p,
Jason Sams75adb822013-10-22 11:43:54 -070066 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -070067 uint32_t outstep);
Chris Wailes80ef6932014-07-08 11:22:18 -070068 static void kernelP1L3(const RsExpandKernelParams *p,
Jason Sams75adb822013-10-22 11:43:54 -070069 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -070070 uint32_t outstep);
Chris Wailes80ef6932014-07-08 11:22:18 -070071 static void kernelP1L2(const RsExpandKernelParams *p,
Jason Sams75adb822013-10-22 11:43:54 -070072 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -070073 uint32_t outstep);
Chris Wailes80ef6932014-07-08 11:22:18 -070074 static void kernelP1L1(const RsExpandKernelParams *p,
Jason Sams75adb822013-10-22 11:43:54 -070075 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -070076 uint32_t outstep);
Jason Sams75adb822013-10-22 11:43:54 -070077
Jason Sams2282e282013-06-17 16:52:01 -070078};
79
80}
81}
82
83void RsdCpuScriptIntrinsicHistogram::setGlobalObj(uint32_t slot, ObjectBase *data) {
84 rsAssert(slot == 1);
85 mAllocOut.set(static_cast<Allocation *>(data));
86}
87
88void RsdCpuScriptIntrinsicHistogram::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
89 rsAssert(slot == 0);
90 rsAssert(dataLength == 16);
91 memcpy(mDot, data, 16);
92 mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f);
93 mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f);
94 mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f);
95 mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f);
96}
97
98
99
Chris Wailesf3712132014-07-16 15:18:30 -0700100void
101RsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot,
102 const Allocation ** ains,
103 uint32_t inLen, Allocation * aout,
104 const void * usr, uint32_t usrLen,
105 const RsScriptCall *sc) {
Jason Sams2282e282013-06-17 16:52:01 -0700106
107 const uint32_t threads = mCtx->getThreadCount();
Jason Sams75adb822013-10-22 11:43:54 -0700108 uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
Jason Sams2282e282013-06-17 16:52:01 -0700109
110 switch (slot) {
111 case 0:
Jason Sams75adb822013-10-22 11:43:54 -0700112 switch(vSize) {
Jason Samsb68ba7e2013-06-18 16:29:39 -0700113 case 1:
Jason Sams2282e282013-06-17 16:52:01 -0700114 mRootPtr = &kernelP1U1;
Jason Samsb68ba7e2013-06-18 16:29:39 -0700115 break;
116 case 2:
117 mRootPtr = &kernelP1U2;
118 break;
119 case 3:
120 mRootPtr = &kernelP1U3;
Jason Sams75adb822013-10-22 11:43:54 -0700121 vSize = 4;
Jason Samsb68ba7e2013-06-18 16:29:39 -0700122 break;
123 case 4:
Jason Sams2282e282013-06-17 16:52:01 -0700124 mRootPtr = &kernelP1U4;
Jason Samsb68ba7e2013-06-18 16:29:39 -0700125 break;
Jason Sams2282e282013-06-17 16:52:01 -0700126 }
127 break;
128 case 1:
Chris Wailesf3712132014-07-16 15:18:30 -0700129 switch(ains[0]->getType()->getElement()->getVectorSize()) {
Jason Sams75adb822013-10-22 11:43:54 -0700130 case 1:
131 mRootPtr = &kernelP1L1;
132 break;
133 case 2:
134 mRootPtr = &kernelP1L2;
135 break;
136 case 3:
137 mRootPtr = &kernelP1L3;
138 break;
139 case 4:
140 mRootPtr = &kernelP1L4;
141 break;
142 }
Jason Sams2282e282013-06-17 16:52:01 -0700143 break;
144 }
Jason Sams75adb822013-10-22 11:43:54 -0700145 memset(mSums, 0, 256 * sizeof(int32_t) * threads * vSize);
Jason Sams2282e282013-06-17 16:52:01 -0700146}
147
Chris Wailesf3712132014-07-16 15:18:30 -0700148void
149RsdCpuScriptIntrinsicHistogram::postLaunch(uint32_t slot,
150 const Allocation ** ains,
151 uint32_t inLen, Allocation * aout,
152 const void * usr, uint32_t usrLen,
153 const RsScriptCall *sc) {
Jason Sams2282e282013-06-17 16:52:01 -0700154
155 unsigned int *o = (unsigned int *)mAllocOut->mHal.drvState.lod[0].mallocPtr;
156 uint32_t threads = mCtx->getThreadCount();
157 uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
158
Jason Sams75adb822013-10-22 11:43:54 -0700159 if (vSize == 3) vSize = 4;
160
Jason Sams2282e282013-06-17 16:52:01 -0700161 for (uint32_t ct=0; ct < (256 * vSize); ct++) {
162 o[ct] = mSums[ct];
163 for (uint32_t t=1; t < threads; t++) {
Tim Murray6de1d832013-11-13 17:13:37 -0800164 o[ct] += mSums[ct + (256 * vSize * t)];
Jason Sams2282e282013-06-17 16:52:01 -0700165 }
166 }
167}
168
Chris Wailes80ef6932014-07-08 11:22:18 -0700169void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsExpandKernelParams *p,
Jason Sams2282e282013-06-17 16:52:01 -0700170 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -0700171 uint32_t outstep) {
Jason Sams2282e282013-06-17 16:52:01 -0700172
173 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
Chris Wailesf3712132014-07-16 15:18:30 -0700174 uchar *in = (uchar *)p->ins[0];
Jason Sams2282e282013-06-17 16:52:01 -0700175 int * sums = &cp->mSums[256 * 4 * p->lid];
176
177 for (uint32_t x = xstart; x < xend; x++) {
Jason Samsb68ba7e2013-06-18 16:29:39 -0700178 sums[(in[0] << 2) ] ++;
179 sums[(in[1] << 2) + 1] ++;
180 sums[(in[2] << 2) + 2] ++;
181 sums[(in[3] << 2) + 3] ++;
Chris Wailesf3712132014-07-16 15:18:30 -0700182 in += p->inEStrides[0];
Jason Samsb68ba7e2013-06-18 16:29:39 -0700183 }
184}
185
Chris Wailes80ef6932014-07-08 11:22:18 -0700186void RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsExpandKernelParams *p,
Jason Samsb68ba7e2013-06-18 16:29:39 -0700187 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -0700188 uint32_t outstep) {
Jason Samsb68ba7e2013-06-18 16:29:39 -0700189
190 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
Chris Wailesf3712132014-07-16 15:18:30 -0700191 uchar *in = (uchar *)p->ins[0];
Jason Samsb68ba7e2013-06-18 16:29:39 -0700192 int * sums = &cp->mSums[256 * 4 * p->lid];
193
194 for (uint32_t x = xstart; x < xend; x++) {
195 sums[(in[0] << 2) ] ++;
196 sums[(in[1] << 2) + 1] ++;
197 sums[(in[2] << 2) + 2] ++;
Chris Wailesf3712132014-07-16 15:18:30 -0700198 in += p->inEStrides[0];
Jason Samsb68ba7e2013-06-18 16:29:39 -0700199 }
200}
201
Chris Wailes80ef6932014-07-08 11:22:18 -0700202void RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsExpandKernelParams *p,
Jason Samsb68ba7e2013-06-18 16:29:39 -0700203 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -0700204 uint32_t outstep) {
Jason Samsb68ba7e2013-06-18 16:29:39 -0700205
206 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
Chris Wailesf3712132014-07-16 15:18:30 -0700207 uchar *in = (uchar *)p->ins[0];
Jason Samsb68ba7e2013-06-18 16:29:39 -0700208 int * sums = &cp->mSums[256 * 2 * p->lid];
209
210 for (uint32_t x = xstart; x < xend; x++) {
Jason Sams75adb822013-10-22 11:43:54 -0700211 sums[(in[0] << 1) ] ++;
212 sums[(in[1] << 1) + 1] ++;
Chris Wailesf3712132014-07-16 15:18:30 -0700213 in += p->inEStrides[0];
Jason Sams2282e282013-06-17 16:52:01 -0700214 }
215}
216
Chris Wailes80ef6932014-07-08 11:22:18 -0700217void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsExpandKernelParams *p,
Jason Sams75adb822013-10-22 11:43:54 -0700218 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -0700219 uint32_t outstep) {
Jason Sams2282e282013-06-17 16:52:01 -0700220
221 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
Chris Wailesf3712132014-07-16 15:18:30 -0700222 uchar *in = (uchar *)p->ins[0];
Jason Sams2282e282013-06-17 16:52:01 -0700223 int * sums = &cp->mSums[256 * p->lid];
224
225 for (uint32_t x = xstart; x < xend; x++) {
Jason Samsb68ba7e2013-06-18 16:29:39 -0700226 int t = (cp->mDotI[0] * in[0]) +
227 (cp->mDotI[1] * in[1]) +
228 (cp->mDotI[2] * in[2]) +
229 (cp->mDotI[3] * in[3]);
Jason Sams75adb822013-10-22 11:43:54 -0700230 sums[(t + 0x7f) >> 8] ++;
Chris Wailesf3712132014-07-16 15:18:30 -0700231 in += p->inEStrides[0];
Jason Sams75adb822013-10-22 11:43:54 -0700232 }
233}
234
Chris Wailes80ef6932014-07-08 11:22:18 -0700235void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsExpandKernelParams *p,
Jason Sams75adb822013-10-22 11:43:54 -0700236 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -0700237 uint32_t outstep) {
Jason Sams75adb822013-10-22 11:43:54 -0700238
239 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
Chris Wailesf3712132014-07-16 15:18:30 -0700240 uchar *in = (uchar *)p->ins[0];
Jason Sams75adb822013-10-22 11:43:54 -0700241 int * sums = &cp->mSums[256 * p->lid];
242
243 for (uint32_t x = xstart; x < xend; x++) {
244 int t = (cp->mDotI[0] * in[0]) +
245 (cp->mDotI[1] * in[1]) +
246 (cp->mDotI[2] * in[2]);
247 sums[(t + 0x7f) >> 8] ++;
Chris Wailesf3712132014-07-16 15:18:30 -0700248 in += p->inEStrides[0];
Jason Sams75adb822013-10-22 11:43:54 -0700249 }
250}
251
Chris Wailes80ef6932014-07-08 11:22:18 -0700252void RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsExpandKernelParams *p,
Jason Sams75adb822013-10-22 11:43:54 -0700253 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -0700254 uint32_t outstep) {
Jason Sams75adb822013-10-22 11:43:54 -0700255
256 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
Chris Wailesf3712132014-07-16 15:18:30 -0700257 uchar *in = (uchar *)p->ins[0];
Jason Sams75adb822013-10-22 11:43:54 -0700258 int * sums = &cp->mSums[256 * p->lid];
259
260 for (uint32_t x = xstart; x < xend; x++) {
261 int t = (cp->mDotI[0] * in[0]) +
262 (cp->mDotI[1] * in[1]);
263 sums[(t + 0x7f) >> 8] ++;
Chris Wailesf3712132014-07-16 15:18:30 -0700264 in += p->inEStrides[0];
Jason Sams75adb822013-10-22 11:43:54 -0700265 }
266}
267
Chris Wailes80ef6932014-07-08 11:22:18 -0700268void RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsExpandKernelParams *p,
Jason Sams75adb822013-10-22 11:43:54 -0700269 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -0700270 uint32_t outstep) {
Jason Sams75adb822013-10-22 11:43:54 -0700271
272 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
Chris Wailesf3712132014-07-16 15:18:30 -0700273 uchar *in = (uchar *)p->ins[0];
Jason Sams75adb822013-10-22 11:43:54 -0700274 int * sums = &cp->mSums[256 * p->lid];
275
276 for (uint32_t x = xstart; x < xend; x++) {
277 int t = (cp->mDotI[0] * in[0]);
278 sums[(t + 0x7f) >> 8] ++;
Chris Wailesf3712132014-07-16 15:18:30 -0700279 in += p->inEStrides[0];
Jason Sams2282e282013-06-17 16:52:01 -0700280 }
281}
282
Chris Wailes80ef6932014-07-08 11:22:18 -0700283void RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsExpandKernelParams *p,
Jason Sams2282e282013-06-17 16:52:01 -0700284 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -0700285 uint32_t outstep) {
Jason Sams2282e282013-06-17 16:52:01 -0700286
Jason Sams75adb822013-10-22 11:43:54 -0700287 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
Chris Wailesf3712132014-07-16 15:18:30 -0700288 uchar *in = (uchar *)p->ins[0];
Jason Sams75adb822013-10-22 11:43:54 -0700289 int * sums = &cp->mSums[256 * p->lid];
290
291 for (uint32_t x = xstart; x < xend; x++) {
292 sums[in[0]] ++;
Chris Wailesf3712132014-07-16 15:18:30 -0700293 in += p->inEStrides[0];
Jason Sams75adb822013-10-22 11:43:54 -0700294 }
Jason Sams2282e282013-06-17 16:52:01 -0700295}
296
297
298RsdCpuScriptIntrinsicHistogram::RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx,
299 const Script *s, const Element *e)
Tim Murray6de1d832013-11-13 17:13:37 -0800300 : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_HISTOGRAM) {
Jason Sams2282e282013-06-17 16:52:01 -0700301
Chris Wailes44bef6f2014-08-12 13:51:10 -0700302 mRootPtr = nullptr;
Jason Sams2282e282013-06-17 16:52:01 -0700303 mSums = new int[256 * 4 * mCtx->getThreadCount()];
304 mDot[0] = 0.299f;
305 mDot[1] = 0.587f;
306 mDot[2] = 0.114f;
307 mDot[3] = 0;
308 mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f);
309 mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f);
310 mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f);
311 mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f);
312}
313
314RsdCpuScriptIntrinsicHistogram::~RsdCpuScriptIntrinsicHistogram() {
315 if (mSums) {
316 delete []mSums;
317 }
318}
319
320void RsdCpuScriptIntrinsicHistogram::populateScript(Script *s) {
321 s->mHal.info.exportedVariableCount = 2;
322}
323
324void RsdCpuScriptIntrinsicHistogram::invokeFreeChildren() {
325}
326
327
328RsdCpuScriptImpl * rsdIntrinsic_Histogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
329
330 return new RsdCpuScriptIntrinsicHistogram(ctx, s, e);
331}