blob: 0c6836ee953de183565d1e7dda9354efa72ef170 [file] [log] [blame]
Jason Sams7c4b8882013-01-04 10:50:05 -08001/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18#include "rsCpuIntrinsic.h"
19#include "rsCpuIntrinsicInlines.h"
20
Jason Sams7c4b8882013-01-04 10:50:05 -080021namespace android {
22namespace renderscript {
23
24
25class RsdCpuScriptIntrinsic3DLUT : public RsdCpuScriptIntrinsic {
26public:
Stephen Hinesc060f142015-05-13 19:26:09 -070027 void populateScript(Script *) override;
28 void invokeFreeChildren() override;
Jason Sams7c4b8882013-01-04 10:50:05 -080029
Stephen Hinesc060f142015-05-13 19:26:09 -070030 void setGlobalObj(uint32_t slot, ObjectBase *data) override;
Jason Sams7c4b8882013-01-04 10:50:05 -080031
Stephen Hinesc060f142015-05-13 19:26:09 -070032 ~RsdCpuScriptIntrinsic3DLUT() override;
Jason Sams7c4b8882013-01-04 10:50:05 -080033 RsdCpuScriptIntrinsic3DLUT(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
34
35protected:
36 ObjectBaseRef<Allocation> mLUT;
37
David Grossb0abb142015-03-12 15:23:03 -070038 static void kernel(const RsExpandKernelDriverInfo *info,
Jason Sams7c4b8882013-01-04 10:50:05 -080039 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -070040 uint32_t outstep);
Jason Sams7c4b8882013-01-04 10:50:05 -080041};
42
Jason Sams7c4b8882013-01-04 10:50:05 -080043void RsdCpuScriptIntrinsic3DLUT::setGlobalObj(uint32_t slot, ObjectBase *data) {
44 rsAssert(slot == 0);
45 mLUT.set(static_cast<Allocation *>(data));
46}
47
Simon Hosie07e46652014-04-09 17:31:07 -070048extern "C" void rsdIntrinsic3DLUT_K(void *dst, void const *in, size_t count,
Simon Hosie5dcaaa52014-03-15 21:23:51 -070049 void const *lut,
50 int32_t pitchy, int32_t pitchz,
51 int dimx, int dimy, int dimz);
Jason Sams7c4b8882013-01-04 10:50:05 -080052
53
David Grossb0abb142015-03-12 15:23:03 -070054void RsdCpuScriptIntrinsic3DLUT::kernel(const RsExpandKernelDriverInfo *info,
Chris Wailes9ed79102014-07-25 15:53:28 -070055 uint32_t xstart, uint32_t xend,
56 uint32_t outstep) {
David Grossb0abb142015-03-12 15:23:03 -070057 RsdCpuScriptIntrinsic3DLUT *cp = (RsdCpuScriptIntrinsic3DLUT *)info->usr;
Jason Sams7c4b8882013-01-04 10:50:05 -080058
David Grossb0abb142015-03-12 15:23:03 -070059 uchar4 *out = (uchar4 *)info->outPtr[0];
60 uchar4 *in = (uchar4 *)info->inPtr[0];
Jason Sams7c4b8882013-01-04 10:50:05 -080061 uint32_t x1 = xstart;
62 uint32_t x2 = xend;
63
64 const uchar *bp = (const uchar *)cp->mLUT->mHal.drvState.lod[0].mallocPtr;
65
66 int4 dims = {
synergy dev8994abb2013-12-05 00:24:37 -080067 static_cast<int>(cp->mLUT->mHal.drvState.lod[0].dimX - 1),
68 static_cast<int>(cp->mLUT->mHal.drvState.lod[0].dimY - 1),
69 static_cast<int>(cp->mLUT->mHal.drvState.lod[0].dimZ - 1),
Stephen Hinesd533c4c2013-03-06 02:55:32 -080070 -1
Jason Sams7c4b8882013-01-04 10:50:05 -080071 };
Stephen Hinesd533c4c2013-03-06 02:55:32 -080072 const float4 m = (float4)(1.f / 255.f) * convert_float4(dims);
Jason Sams7c4b8882013-01-04 10:50:05 -080073 const int4 coordMul = convert_int4(m * (float4)0x8000);
74 const size_t stride_y = cp->mLUT->mHal.drvState.lod[0].stride;
75 const size_t stride_z = stride_y * cp->mLUT->mHal.drvState.lod[0].dimY;
76
77 //ALOGE("strides %zu %zu", stride_y, stride_z);
78
Jason Sams074424a2014-05-22 13:30:03 -070079#if defined(ARCH_ARM_USE_INTRINSICS)
Simon Hosie07e46652014-04-09 17:31:07 -070080 if (gArchUseSIMD) {
81 int32_t len = x2 - x1;
82 if(len > 0) {
83 rsdIntrinsic3DLUT_K(out, in, len,
84 bp, stride_y, stride_z,
85 dims.x, dims.y, dims.z);
86 x1 += len;
87 out += len;
88 in += len;
Jason Sams7c4b8882013-01-04 10:50:05 -080089 }
Simon Hosie07e46652014-04-09 17:31:07 -070090 }
Jason Sams7c4b8882013-01-04 10:50:05 -080091#endif
92
Simon Hosie07e46652014-04-09 17:31:07 -070093 while (x1 < x2) {
Jason Sams7c4b8882013-01-04 10:50:05 -080094 int4 baseCoord = convert_int4(*in) * coordMul;
95 int4 coord1 = baseCoord >> (int4)15;
96 //int4 coord2 = min(coord1 + 1, gDims - 1);
97
98 int4 weight2 = baseCoord & 0x7fff;
99 int4 weight1 = (int4)0x8000 - weight2;
100
101 //ALOGE("coord1 %08x %08x %08x %08x", coord1.x, coord1.y, coord1.z, coord1.w);
102 const uchar *bp2 = bp + (coord1.x * 4) + (coord1.y * stride_y) + (coord1.z * stride_z);
103 const uchar4 *pt_00 = (const uchar4 *)&bp2[0];
104 const uchar4 *pt_10 = (const uchar4 *)&bp2[stride_y];
105 const uchar4 *pt_01 = (const uchar4 *)&bp2[stride_z];
106 const uchar4 *pt_11 = (const uchar4 *)&bp2[stride_y + stride_z];
107
108 uint4 v000 = convert_uint4(pt_00[0]);
109 uint4 v100 = convert_uint4(pt_00[1]);
110 uint4 v010 = convert_uint4(pt_10[0]);
111 uint4 v110 = convert_uint4(pt_10[1]);
112 uint4 v001 = convert_uint4(pt_01[0]);
113 uint4 v101 = convert_uint4(pt_01[1]);
114 uint4 v011 = convert_uint4(pt_11[0]);
115 uint4 v111 = convert_uint4(pt_11[1]);
116
117 uint4 yz00 = ((v000 * weight1.x) + (v100 * weight2.x)) >> (int4)7;
118 uint4 yz10 = ((v010 * weight1.x) + (v110 * weight2.x)) >> (int4)7;
119 uint4 yz01 = ((v001 * weight1.x) + (v101 * weight2.x)) >> (int4)7;
120 uint4 yz11 = ((v011 * weight1.x) + (v111 * weight2.x)) >> (int4)7;
121
Stephen Hines5e3fb0b2013-01-10 01:45:46 -0800122 uint4 z0 = ((yz00 * weight1.y) + (yz10 * weight2.y)) >> (int4)15;
123 uint4 z1 = ((yz01 * weight1.y) + (yz11 * weight2.y)) >> (int4)15;
Jason Sams7c4b8882013-01-04 10:50:05 -0800124
Stephen Hines5e3fb0b2013-01-10 01:45:46 -0800125 uint4 v = ((z0 * weight1.z) + (z1 * weight2.z)) >> (int4)15;
Jason Sams7c4b8882013-01-04 10:50:05 -0800126 uint4 v2 = (v + 0x7f) >> (int4)8;
127
128 uchar4 ret = convert_uchar4(v2);
Tim Murray0b575de2013-03-15 15:56:43 -0700129 ret.w = in->w;
Jason Sams7c4b8882013-01-04 10:50:05 -0800130
131 #if 0
132 if (!x1) {
133 ALOGE("in %08x %08x %08x %08x", in->r, in->g, in->b, in->a);
134 ALOGE("baseCoord %08x %08x %08x %08x", baseCoord.x, baseCoord.y, baseCoord.z, baseCoord.w);
135 ALOGE("coord1 %08x %08x %08x %08x", coord1.x, coord1.y, coord1.z, coord1.w);
136 ALOGE("weight1 %08x %08x %08x %08x", weight1.x, weight1.y, weight1.z, weight1.w);
137 ALOGE("weight2 %08x %08x %08x %08x", weight2.x, weight2.y, weight2.z, weight2.w);
138
139 ALOGE("v000 %08x %08x %08x %08x", v000.x, v000.y, v000.z, v000.w);
140 ALOGE("v100 %08x %08x %08x %08x", v100.x, v100.y, v100.z, v100.w);
141 ALOGE("yz00 %08x %08x %08x %08x", yz00.x, yz00.y, yz00.z, yz00.w);
142 ALOGE("z0 %08x %08x %08x %08x", z0.x, z0.y, z0.z, z0.w);
143
144 ALOGE("v %08x %08x %08x %08x", v.x, v.y, v.z, v.w);
145 ALOGE("v2 %08x %08x %08x %08x", v2.x, v2.y, v2.z, v2.w);
146 }
147 #endif
148 *out = ret;
149
150
151 in++;
152 out++;
153 x1++;
154 }
Jason Sams7c4b8882013-01-04 10:50:05 -0800155}
156
Chris Wailesf3712132014-07-16 15:18:30 -0700157RsdCpuScriptIntrinsic3DLUT::RsdCpuScriptIntrinsic3DLUT(
158 RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) :
159 RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_3DLUT) {
Jason Sams7c4b8882013-01-04 10:50:05 -0800160
161 mRootPtr = &kernel;
162}
163
164RsdCpuScriptIntrinsic3DLUT::~RsdCpuScriptIntrinsic3DLUT() {
165}
166
167void RsdCpuScriptIntrinsic3DLUT::populateScript(Script *s) {
168 s->mHal.info.exportedVariableCount = 1;
169}
170
171void RsdCpuScriptIntrinsic3DLUT::invokeFreeChildren() {
172 mLUT.clear();
173}
174
Jason Sams7c4b8882013-01-04 10:50:05 -0800175RsdCpuScriptImpl * rsdIntrinsic_3DLUT(RsdCpuReferenceImpl *ctx,
176 const Script *s, const Element *e) {
177
178 return new RsdCpuScriptIntrinsic3DLUT(ctx, s, e);
179}
Chih-Hung Hsieh462de212016-11-16 11:33:57 -0800180
181} // namespace renderscript
182} // namespace android