blob: 20e0f2e36d21bae29235b2df5791c62ab519d62c [file] [log] [blame]
Matthieu Delahaye60498fe2014-02-18 13:21:06 -06001#include "rsCpuIntrinsicInterPred.h"
2
3void RsdCpuScriptIntrinsicInterPred::setGlobalObj(uint32_t slot,
4 ObjectBase *data) {
5 Allocation *alloc = static_cast<Allocation *>(data);
6 if (slot == 0) mRef = (uint8_t *)alloc->mHal.state.userProvidedPtr;
7 if (slot == 1) mParam = (uint8_t *)alloc->mHal.state.userProvidedPtr;
8}
9
10void RsdCpuScriptIntrinsicInterPred::setGlobalVar(uint32_t slot,
11 const void *data,
12 size_t dataLength) {
13 mFriParamCount = ((int32_t *)data)[0];
14 mSecParamCount = ((int32_t *)data)[1];
15 mParamOffset = ((int32_t *)data)[2];
16}
17
18void RsdCpuScriptIntrinsicInterPred::kernel(const RsForEachStubParamStruct *p,
19 uint32_t xstart, uint32_t xend,
20 uint32_t instep, uint32_t outstep) {
21 RsdCpuScriptIntrinsicInterPred *cp = (RsdCpuScriptIntrinsicInterPred *)p->usr;
22 cp->mCount++;
23 const int vp9_convolve_mode[2][2] = {{24, 16}, {8, 0}};
24 uint8_t *ref_base = cp->mRef;
25 INTER_PRED_PARAM *fri_param = (INTER_PRED_PARAM *)cp->mParam;
26 INTER_PRED_PARAM *sec_param = (INTER_PRED_PARAM *)(cp->mParam + cp->mParamOffset);
27 int32_t fri_count = cp->mFriParamCount;
28 int32_t sec_count = cp->mSecParamCount;
29 int mode_num;
30 uint8_t *src;
31 uint8_t *dst;
32 const int16_t *filter_x;
33 const int16_t *filter_y;
34 for (int i = 0; i < fri_count; i++) {
35
36 mode_num = vp9_convolve_mode[(fri_param[i].x_step_q4 == 16)]
37 [(fri_param[i].y_step_q4 == 16)];
38 src = ref_base + fri_param[i].src_mv;
39 dst = ref_base + fri_param[i].dst_mv;
40
41 filter_x = inter_pred_filters + fri_param[i].filter_x_mv;
42 filter_y = inter_pred_filters + fri_param[i].filter_y_mv;
43
44 cp->mSwitchConvolve[fri_param[i].pred_mode + mode_num](
45 src, fri_param[i].src_stride,
46 dst, fri_param[i].dst_stride,
47 filter_x, fri_param[i].x_step_q4,
48 filter_y, fri_param[i].y_step_q4,
49 fri_param[i].w, fri_param[i].h
50 );
51 }
52
53 for (int i = 0; i < sec_count; i++) {
54 mode_num = vp9_convolve_mode[(sec_param[i].x_step_q4 == 16)]
55 [(sec_param[i].y_step_q4 == 16)];
56 src = ref_base + sec_param[i].src_mv;
57 dst = ref_base + sec_param[i].dst_mv;
58
59 filter_x = inter_pred_filters + sec_param[i].filter_x_mv;
60 filter_y = inter_pred_filters + sec_param[i].filter_y_mv;
61
62 cp->mSwitchConvolve[sec_param[i].pred_mode + mode_num + 1](
63 src, sec_param[i].src_stride,
64 dst, sec_param[i].dst_stride,
65 filter_x, sec_param[i].x_step_q4,
66 filter_y, sec_param[i].y_step_q4,
67 sec_param[i].w, sec_param[i].h
68 );
69 }
70
71}
72
73RsdCpuScriptIntrinsicInterPred::RsdCpuScriptIntrinsicInterPred(RsdCpuReferenceImpl *ctx,
74 const Script *s, const Element *e)
75 : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_INTER_PRED) {
76 mRootPtr = &kernel;
77 mCount = 0;
78 mParamOffset = 0;
79 mFriParamCount = 0;
80 mSecParamCount = 0;
81 mRef = NULL;
82 mParam = NULL;
83
84#if defined(ARCH_ARM_HAVE_VFP)
85 mSwitchConvolve[0] = vp9_convolve_copy_neon;
86 mSwitchConvolve[1] = vp9_convolve_avg_neon;
87 mSwitchConvolve[2] = vp9_convolve8_vert_neon;
88 mSwitchConvolve[3] = vp9_convolve8_avg_vert_neon;
89 mSwitchConvolve[4] = vp9_convolve8_horiz_neon;
90 mSwitchConvolve[5] = vp9_convolve8_avg_horiz_neon;
91 mSwitchConvolve[6] = vp9_convolve8_neon;
92 mSwitchConvolve[7] = vp9_convolve8_avg_neon;
93
94 mSwitchConvolve[8] = vp9_convolve8_vert_neon;
95 mSwitchConvolve[9] = vp9_convolve8_avg_vert_neon;
96 mSwitchConvolve[10] = vp9_convolve8_vert_neon;
97 mSwitchConvolve[11] = vp9_convolve8_avg_vert_neon;
98 mSwitchConvolve[12] = vp9_convolve8_neon;
99 mSwitchConvolve[13] = vp9_convolve8_avg_neon;
100 mSwitchConvolve[14] = vp9_convolve8_neon;
101 mSwitchConvolve[15] = vp9_convolve8_avg_neon;
102
103 mSwitchConvolve[16] = vp9_convolve8_horiz_neon;
104 mSwitchConvolve[17] = vp9_convolve8_avg_horiz_neon;
105 mSwitchConvolve[18] = vp9_convolve8_neon;
106 mSwitchConvolve[19] = vp9_convolve8_avg_neon;
107 mSwitchConvolve[20] = vp9_convolve8_horiz_neon;
108 mSwitchConvolve[21] = vp9_convolve8_avg_horiz_neon;
109 mSwitchConvolve[22] = vp9_convolve8_neon;
110 mSwitchConvolve[23] = vp9_convolve8_avg_neon;
111
112 mSwitchConvolve[24] = vp9_convolve8_neon;
113 mSwitchConvolve[25] = vp9_convolve8_avg_neon;
114 mSwitchConvolve[26] = vp9_convolve8_neon;
115 mSwitchConvolve[27] = vp9_convolve8_avg_neon;
116 mSwitchConvolve[28] = vp9_convolve8_neon;
117 mSwitchConvolve[29] = vp9_convolve8_avg_neon;
118 mSwitchConvolve[30] = vp9_convolve8_neon;
119 mSwitchConvolve[31] = vp9_convolve8_avg_neon;
120#else
121 mSwitchConvolve[0] = vp9_convolve_copy_c;
122 mSwitchConvolve[1] = vp9_convolve_avg_c;
123 mSwitchConvolve[2] = vp9_convolve8_vert_c;
124 mSwitchConvolve[3] = vp9_convolve8_avg_vert_c;
125 mSwitchConvolve[4] = vp9_convolve8_horiz_c;
126 mSwitchConvolve[5] = vp9_convolve8_avg_horiz_c;
127 mSwitchConvolve[6] = vp9_convolve8_c;
128 mSwitchConvolve[7] = vp9_convolve8_avg_c;
129
130 mSwitchConvolve[8] = vp9_convolve8_vert_c;
131 mSwitchConvolve[9] = vp9_convolve8_avg_vert_c;
132 mSwitchConvolve[10] = vp9_convolve8_vert_c;
133 mSwitchConvolve[11] = vp9_convolve8_avg_vert_c;
134 mSwitchConvolve[12] = vp9_convolve8_c;
135 mSwitchConvolve[13] = vp9_convolve8_avg_c;
136 mSwitchConvolve[14] = vp9_convolve8_c;
137 mSwitchConvolve[15] = vp9_convolve8_avg_c;
138
139 mSwitchConvolve[16] = vp9_convolve8_horiz_c;
140 mSwitchConvolve[17] = vp9_convolve8_avg_horiz_c;
141 mSwitchConvolve[18] = vp9_convolve8_c;
142 mSwitchConvolve[19] = vp9_convolve8_avg_c;
143 mSwitchConvolve[20] = vp9_convolve8_horiz_c;
144 mSwitchConvolve[21] = vp9_convolve8_avg_horiz_c;
145 mSwitchConvolve[22] = vp9_convolve8_c;
146 mSwitchConvolve[23] = vp9_convolve8_avg_c;
147
148 mSwitchConvolve[24] = vp9_convolve8_c;
149 mSwitchConvolve[25] = vp9_convolve8_avg_c;
150 mSwitchConvolve[26] = vp9_convolve8_c;
151 mSwitchConvolve[27] = vp9_convolve8_avg_c;
152 mSwitchConvolve[28] = vp9_convolve8_c;
153 mSwitchConvolve[29] = vp9_convolve8_avg_c;
154 mSwitchConvolve[30] = vp9_convolve8_c;
155 mSwitchConvolve[31] = vp9_convolve8_avg_c;
156#endif
157}
158
159RsdCpuScriptIntrinsicInterPred::~RsdCpuScriptIntrinsicInterPred() {
160}
161
162void RsdCpuScriptIntrinsicInterPred::populateScript(Script *s) {
163 s->mHal.info.exportedVariableCount = 3;
164}
165
166void RsdCpuScriptIntrinsicInterPred::invokeFreeChildren() {
167}
168
169
170RsdCpuScriptImpl * rsdIntrinsic_InterPred(RsdCpuReferenceImpl *ctx,
171 const Script *s, const Element *e) {
172 return new RsdCpuScriptIntrinsicInterPred(ctx, s, e);
173}