Jason Sams | 7c4b888 | 2013-01-04 10:50:05 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2012 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | |
| 18 | #include "rsCpuIntrinsic.h" |
| 19 | #include "rsCpuIntrinsicInlines.h" |
| 20 | |
| 21 | using namespace android; |
| 22 | using namespace android::renderscript; |
| 23 | |
| 24 | namespace android { |
| 25 | namespace renderscript { |
| 26 | |
| 27 | |
| 28 | class RsdCpuScriptIntrinsic3DLUT : public RsdCpuScriptIntrinsic { |
| 29 | public: |
| 30 | virtual void populateScript(Script *); |
| 31 | virtual void invokeFreeChildren(); |
| 32 | |
| 33 | virtual void setGlobalObj(uint32_t slot, ObjectBase *data); |
| 34 | |
| 35 | virtual ~RsdCpuScriptIntrinsic3DLUT(); |
| 36 | RsdCpuScriptIntrinsic3DLUT(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e); |
| 37 | |
| 38 | protected: |
| 39 | ObjectBaseRef<Allocation> mLUT; |
| 40 | |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 41 | static void kernel(const RsExpandKernelDriverInfo *info, |
Jason Sams | 7c4b888 | 2013-01-04 10:50:05 -0800 | [diff] [blame] | 42 | uint32_t xstart, uint32_t xend, |
Chris Wailes | 9ed7910 | 2014-07-25 15:53:28 -0700 | [diff] [blame] | 43 | uint32_t outstep); |
Jason Sams | 7c4b888 | 2013-01-04 10:50:05 -0800 | [diff] [blame] | 44 | }; |
| 45 | |
| 46 | } |
| 47 | } |
| 48 | |
| 49 | |
| 50 | void RsdCpuScriptIntrinsic3DLUT::setGlobalObj(uint32_t slot, ObjectBase *data) { |
| 51 | rsAssert(slot == 0); |
| 52 | mLUT.set(static_cast<Allocation *>(data)); |
| 53 | } |
| 54 | |
Simon Hosie | 07e4665 | 2014-04-09 17:31:07 -0700 | [diff] [blame] | 55 | extern "C" void rsdIntrinsic3DLUT_K(void *dst, void const *in, size_t count, |
Simon Hosie | 5dcaaa5 | 2014-03-15 21:23:51 -0700 | [diff] [blame] | 56 | void const *lut, |
| 57 | int32_t pitchy, int32_t pitchz, |
| 58 | int dimx, int dimy, int dimz); |
Jason Sams | 7c4b888 | 2013-01-04 10:50:05 -0800 | [diff] [blame] | 59 | |
| 60 | |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 61 | void RsdCpuScriptIntrinsic3DLUT::kernel(const RsExpandKernelDriverInfo *info, |
Chris Wailes | 9ed7910 | 2014-07-25 15:53:28 -0700 | [diff] [blame] | 62 | uint32_t xstart, uint32_t xend, |
| 63 | uint32_t outstep) { |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 64 | RsdCpuScriptIntrinsic3DLUT *cp = (RsdCpuScriptIntrinsic3DLUT *)info->usr; |
Jason Sams | 7c4b888 | 2013-01-04 10:50:05 -0800 | [diff] [blame] | 65 | |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 66 | uchar4 *out = (uchar4 *)info->outPtr[0]; |
| 67 | uchar4 *in = (uchar4 *)info->inPtr[0]; |
Jason Sams | 7c4b888 | 2013-01-04 10:50:05 -0800 | [diff] [blame] | 68 | uint32_t x1 = xstart; |
| 69 | uint32_t x2 = xend; |
| 70 | |
| 71 | const uchar *bp = (const uchar *)cp->mLUT->mHal.drvState.lod[0].mallocPtr; |
| 72 | |
| 73 | int4 dims = { |
synergy dev | 8994abb | 2013-12-05 00:24:37 -0800 | [diff] [blame] | 74 | static_cast<int>(cp->mLUT->mHal.drvState.lod[0].dimX - 1), |
| 75 | static_cast<int>(cp->mLUT->mHal.drvState.lod[0].dimY - 1), |
| 76 | static_cast<int>(cp->mLUT->mHal.drvState.lod[0].dimZ - 1), |
Stephen Hines | d533c4c | 2013-03-06 02:55:32 -0800 | [diff] [blame] | 77 | -1 |
Jason Sams | 7c4b888 | 2013-01-04 10:50:05 -0800 | [diff] [blame] | 78 | }; |
Stephen Hines | d533c4c | 2013-03-06 02:55:32 -0800 | [diff] [blame] | 79 | const float4 m = (float4)(1.f / 255.f) * convert_float4(dims); |
Jason Sams | 7c4b888 | 2013-01-04 10:50:05 -0800 | [diff] [blame] | 80 | const int4 coordMul = convert_int4(m * (float4)0x8000); |
| 81 | const size_t stride_y = cp->mLUT->mHal.drvState.lod[0].stride; |
| 82 | const size_t stride_z = stride_y * cp->mLUT->mHal.drvState.lod[0].dimY; |
| 83 | |
| 84 | //ALOGE("strides %zu %zu", stride_y, stride_z); |
| 85 | |
Jason Sams | 074424a | 2014-05-22 13:30:03 -0700 | [diff] [blame] | 86 | #if defined(ARCH_ARM_USE_INTRINSICS) |
Simon Hosie | 07e4665 | 2014-04-09 17:31:07 -0700 | [diff] [blame] | 87 | if (gArchUseSIMD) { |
| 88 | int32_t len = x2 - x1; |
| 89 | if(len > 0) { |
| 90 | rsdIntrinsic3DLUT_K(out, in, len, |
| 91 | bp, stride_y, stride_z, |
| 92 | dims.x, dims.y, dims.z); |
| 93 | x1 += len; |
| 94 | out += len; |
| 95 | in += len; |
Jason Sams | 7c4b888 | 2013-01-04 10:50:05 -0800 | [diff] [blame] | 96 | } |
Simon Hosie | 07e4665 | 2014-04-09 17:31:07 -0700 | [diff] [blame] | 97 | } |
Jason Sams | 7c4b888 | 2013-01-04 10:50:05 -0800 | [diff] [blame] | 98 | #endif |
| 99 | |
Simon Hosie | 07e4665 | 2014-04-09 17:31:07 -0700 | [diff] [blame] | 100 | while (x1 < x2) { |
Jason Sams | 7c4b888 | 2013-01-04 10:50:05 -0800 | [diff] [blame] | 101 | int4 baseCoord = convert_int4(*in) * coordMul; |
| 102 | int4 coord1 = baseCoord >> (int4)15; |
| 103 | //int4 coord2 = min(coord1 + 1, gDims - 1); |
| 104 | |
| 105 | int4 weight2 = baseCoord & 0x7fff; |
| 106 | int4 weight1 = (int4)0x8000 - weight2; |
| 107 | |
| 108 | //ALOGE("coord1 %08x %08x %08x %08x", coord1.x, coord1.y, coord1.z, coord1.w); |
| 109 | const uchar *bp2 = bp + (coord1.x * 4) + (coord1.y * stride_y) + (coord1.z * stride_z); |
| 110 | const uchar4 *pt_00 = (const uchar4 *)&bp2[0]; |
| 111 | const uchar4 *pt_10 = (const uchar4 *)&bp2[stride_y]; |
| 112 | const uchar4 *pt_01 = (const uchar4 *)&bp2[stride_z]; |
| 113 | const uchar4 *pt_11 = (const uchar4 *)&bp2[stride_y + stride_z]; |
| 114 | |
| 115 | uint4 v000 = convert_uint4(pt_00[0]); |
| 116 | uint4 v100 = convert_uint4(pt_00[1]); |
| 117 | uint4 v010 = convert_uint4(pt_10[0]); |
| 118 | uint4 v110 = convert_uint4(pt_10[1]); |
| 119 | uint4 v001 = convert_uint4(pt_01[0]); |
| 120 | uint4 v101 = convert_uint4(pt_01[1]); |
| 121 | uint4 v011 = convert_uint4(pt_11[0]); |
| 122 | uint4 v111 = convert_uint4(pt_11[1]); |
| 123 | |
| 124 | uint4 yz00 = ((v000 * weight1.x) + (v100 * weight2.x)) >> (int4)7; |
| 125 | uint4 yz10 = ((v010 * weight1.x) + (v110 * weight2.x)) >> (int4)7; |
| 126 | uint4 yz01 = ((v001 * weight1.x) + (v101 * weight2.x)) >> (int4)7; |
| 127 | uint4 yz11 = ((v011 * weight1.x) + (v111 * weight2.x)) >> (int4)7; |
| 128 | |
Stephen Hines | 5e3fb0b | 2013-01-10 01:45:46 -0800 | [diff] [blame] | 129 | uint4 z0 = ((yz00 * weight1.y) + (yz10 * weight2.y)) >> (int4)15; |
| 130 | uint4 z1 = ((yz01 * weight1.y) + (yz11 * weight2.y)) >> (int4)15; |
Jason Sams | 7c4b888 | 2013-01-04 10:50:05 -0800 | [diff] [blame] | 131 | |
Stephen Hines | 5e3fb0b | 2013-01-10 01:45:46 -0800 | [diff] [blame] | 132 | uint4 v = ((z0 * weight1.z) + (z1 * weight2.z)) >> (int4)15; |
Jason Sams | 7c4b888 | 2013-01-04 10:50:05 -0800 | [diff] [blame] | 133 | uint4 v2 = (v + 0x7f) >> (int4)8; |
| 134 | |
| 135 | uchar4 ret = convert_uchar4(v2); |
Tim Murray | 0b575de | 2013-03-15 15:56:43 -0700 | [diff] [blame] | 136 | ret.w = in->w; |
Jason Sams | 7c4b888 | 2013-01-04 10:50:05 -0800 | [diff] [blame] | 137 | |
| 138 | #if 0 |
| 139 | if (!x1) { |
| 140 | ALOGE("in %08x %08x %08x %08x", in->r, in->g, in->b, in->a); |
| 141 | ALOGE("baseCoord %08x %08x %08x %08x", baseCoord.x, baseCoord.y, baseCoord.z, baseCoord.w); |
| 142 | ALOGE("coord1 %08x %08x %08x %08x", coord1.x, coord1.y, coord1.z, coord1.w); |
| 143 | ALOGE("weight1 %08x %08x %08x %08x", weight1.x, weight1.y, weight1.z, weight1.w); |
| 144 | ALOGE("weight2 %08x %08x %08x %08x", weight2.x, weight2.y, weight2.z, weight2.w); |
| 145 | |
| 146 | ALOGE("v000 %08x %08x %08x %08x", v000.x, v000.y, v000.z, v000.w); |
| 147 | ALOGE("v100 %08x %08x %08x %08x", v100.x, v100.y, v100.z, v100.w); |
| 148 | ALOGE("yz00 %08x %08x %08x %08x", yz00.x, yz00.y, yz00.z, yz00.w); |
| 149 | ALOGE("z0 %08x %08x %08x %08x", z0.x, z0.y, z0.z, z0.w); |
| 150 | |
| 151 | ALOGE("v %08x %08x %08x %08x", v.x, v.y, v.z, v.w); |
| 152 | ALOGE("v2 %08x %08x %08x %08x", v2.x, v2.y, v2.z, v2.w); |
| 153 | } |
| 154 | #endif |
| 155 | *out = ret; |
| 156 | |
| 157 | |
| 158 | in++; |
| 159 | out++; |
| 160 | x1++; |
| 161 | } |
Jason Sams | 7c4b888 | 2013-01-04 10:50:05 -0800 | [diff] [blame] | 162 | } |
| 163 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame] | 164 | RsdCpuScriptIntrinsic3DLUT::RsdCpuScriptIntrinsic3DLUT( |
| 165 | RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) : |
| 166 | RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_3DLUT) { |
Jason Sams | 7c4b888 | 2013-01-04 10:50:05 -0800 | [diff] [blame] | 167 | |
| 168 | mRootPtr = &kernel; |
| 169 | } |
| 170 | |
| 171 | RsdCpuScriptIntrinsic3DLUT::~RsdCpuScriptIntrinsic3DLUT() { |
| 172 | } |
| 173 | |
| 174 | void RsdCpuScriptIntrinsic3DLUT::populateScript(Script *s) { |
| 175 | s->mHal.info.exportedVariableCount = 1; |
| 176 | } |
| 177 | |
| 178 | void RsdCpuScriptIntrinsic3DLUT::invokeFreeChildren() { |
| 179 | mLUT.clear(); |
| 180 | } |
| 181 | |
| 182 | |
| 183 | RsdCpuScriptImpl * rsdIntrinsic_3DLUT(RsdCpuReferenceImpl *ctx, |
| 184 | const Script *s, const Element *e) { |
| 185 | |
| 186 | return new RsdCpuScriptIntrinsic3DLUT(ctx, s, e); |
| 187 | } |