blob: 186fb7688ada7c0b26cbaecb04a75f5ac0993f98 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 1998-2003 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26
27
28#include "vis_proto.h"
29#include "mlib_image.h"
30#include "mlib_ImageColormap.h"
31#include "mlib_ImageAffine.h"
32#include "mlib_v_ImageFilters.h"
33
34/***************************************************************/
35#define MLIB_LIMIT 512
36#define MLIB_SHIFT 16
37
38/***************************************************************/
39#undef DECLAREVAR
40#define DECLAREVAR() \
41 DECLAREVAR0(); \
42 mlib_s32 *warp_tbl = param -> warp_tbl; \
43 mlib_s32 xSrc, ySrc; \
44 mlib_s32 srcYStride = param -> srcYStride; \
45 mlib_s32 filter = param -> filter; \
46 mlib_s32 max_xsize = param -> max_xsize; \
47 MLIB_TYPE *srcIndexPtr; \
48 MLIB_TYPE *dstIndexPtr; \
49 mlib_d64 *dstPixelPtr; \
50 mlib_s32 i
51
52/***************************************************************/
53#define DECLAREVAR_U8() \
54 mlib_s32 filterposx, filterposy; \
55 mlib_d64 sum0, sum1, sum2, sum3; \
56 mlib_f32 hi_row00, hi_row10, hi_row20, hi_row30; \
57 mlib_f32 hi_row01, hi_row11, hi_row21, hi_row31; \
58 mlib_f32 lo_row00, lo_row10, lo_row20, lo_row30; \
59 mlib_f32 lo_row01, lo_row11, lo_row21, lo_row31; \
60 mlib_d64 xFilter0, xFilter1, xFilter2, xFilter3, yFilter; \
61 mlib_d64 v00, v10, v20, v30; \
62 mlib_d64 v01, v11, v21, v31; \
63 mlib_d64 v02, v12, v22, v32; \
64 mlib_d64 v03, v13, v23, v33; \
65 mlib_d64 d0, d1, d2, d3; \
66 mlib_d64 d00, d10, d20, d30; \
67 mlib_d64 d01, d11, d21, d31; \
68 mlib_s32 cols; \
69 mlib_d64 res, *xPtr
70
71/***************************************************************/
72#define DECLAREVAR_S16() \
73 mlib_s32 filterposx, filterposy; \
74 mlib_d64 sum0, sum1, sum2, sum3; \
75 mlib_d64 row00, row10, row20, row30; \
76 mlib_d64 row01, row11, row21, row31; \
77 mlib_d64 row02, row12, row22, row32; \
78 mlib_d64 row03, row13, row23, row33; \
79 mlib_d64 xFilter0, xFilter1, xFilter2, xFilter3; \
80 mlib_d64 yFilter0, yFilter1, yFilter2, yFilter3; \
81 mlib_d64 v00, v01, v02, v03, v10, v11, v12, v13; \
82 mlib_d64 v20, v21, v22, v23, v30, v31, v32, v33; \
83 mlib_d64 u00, u01, u10, u11, u20, u21, u30, u31; \
84 mlib_d64 d0, d1, d2, d3; \
85 mlib_d64 *yPtr, *xPtr; \
86 mlib_s32 cols; \
87 mlib_d64 res; \
88 mlib_f32 f_x01000100 = vis_to_float(0x01000100)
89
90/***************************************************************/
91#undef CLIP
92#define CLIP() \
93 dstData += dstYStride; \
94 xLeft = leftEdges[j]; \
95 xRight = rightEdges[j]; \
96 X = xStarts[j]; \
97 Y = yStarts[j]; \
98 PREPARE_DELTAS \
99 if (xLeft > xRight) \
100 continue; \
101 dstIndexPtr = (MLIB_TYPE *)dstData + xLeft; \
102 dstPixelPtr = dstRowPtr
103
104/***************************************************************/
105#define FADD_4BC_U8() \
106 d0 = vis_fpadd16(d00, d10); \
107 d1 = vis_fpadd16(d20, d30); \
108 d0 = vis_fpadd16(d0, d1); \
109 d2 = vis_fpadd16(d01, d11); \
110 d3 = vis_fpadd16(d21, d31); \
111 d2 = vis_fpadd16(d2, d3); \
112 res = vis_fpack16_pair(d0, d2)
113
114/***************************************************************/
115#define LOAD_BC_U8_4CH_1PIXEL(mlib_filters_u8, mlib_filters_u8_4) \
116 filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK; \
117 yFilter = *((mlib_d64 *) ((mlib_u8 *)mlib_filters_u8 + filterposy)); \
118 filterposx = (X >> FILTER_SHIFT) & FILTER_MASK; \
119 xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_u8_4+4*filterposx)); \
120 xFilter0 = xPtr[0]; \
121 xFilter1 = xPtr[1]; \
122 xFilter2 = xPtr[2]; \
123 xFilter3 = xPtr[3]; \
124 X += dX; \
125 Y += dY; \
126 hi_row00 = flut[srcIndexPtr[0]]; \
127 lo_row00 = flut[srcIndexPtr[1]]; \
128 hi_row01 = flut[srcIndexPtr[2]]; \
129 lo_row01 = flut[srcIndexPtr[3]]; \
130 srcIndexPtr += srcYStride; \
131 hi_row10 = flut[srcIndexPtr[0]]; \
132 lo_row10 = flut[srcIndexPtr[1]]; \
133 hi_row11 = flut[srcIndexPtr[2]]; \
134 lo_row11 = flut[srcIndexPtr[3]]; \
135 srcIndexPtr += srcYStride; \
136 hi_row20 = flut[srcIndexPtr[0]]; \
137 lo_row20 = flut[srcIndexPtr[1]]; \
138 hi_row21 = flut[srcIndexPtr[2]]; \
139 lo_row21 = flut[srcIndexPtr[3]]; \
140 srcIndexPtr += srcYStride; \
141 hi_row30 = flut[srcIndexPtr[0]]; \
142 lo_row30 = flut[srcIndexPtr[1]]; \
143 hi_row31 = flut[srcIndexPtr[2]]; \
144 lo_row31 = flut[srcIndexPtr[3]]
145
146/***************************************************************/
147#define NEXT_PIXEL_4BC() \
148 xSrc = (X >> MLIB_SHIFT)-1; \
149 ySrc = (Y >> MLIB_SHIFT)-1; \
150 srcIndexPtr = (MLIB_TYPE *)lineAddr[ySrc] + xSrc
151
152/***************************************************************/
153#define RESULT_4BC_U8_1PIXEL(ind) \
154 v00 = vis_fmul8x16au(hi_row00, vis_read_hi(yFilter)); \
155 v01 = vis_fmul8x16au(lo_row00, vis_read_hi(yFilter)); \
156 v02 = vis_fmul8x16au(hi_row01, vis_read_hi(yFilter)); \
157 v03 = vis_fmul8x16au(lo_row01, vis_read_hi(yFilter)); \
158 v10 = vis_fmul8x16al(hi_row10, vis_read_hi(yFilter)); \
159 v11 = vis_fmul8x16al(lo_row10, vis_read_hi(yFilter)); \
160 sum0 = vis_fpadd16(v00, v10); \
161 v12 = vis_fmul8x16al(hi_row11, vis_read_hi(yFilter)); \
162 sum1 = vis_fpadd16(v01, v11); \
163 v13 = vis_fmul8x16al(lo_row11, vis_read_hi(yFilter)); \
164 sum2 = vis_fpadd16(v02, v12); \
165 v20 = vis_fmul8x16au(hi_row20, vis_read_lo(yFilter)); \
166 sum3 = vis_fpadd16(v03, v13); \
167 v21 = vis_fmul8x16au(lo_row20, vis_read_lo(yFilter)); \
168 sum0 = vis_fpadd16(sum0, v20); \
169 v22 = vis_fmul8x16au(hi_row21, vis_read_lo(yFilter)); \
170 sum1 = vis_fpadd16(sum1, v21); \
171 v23 = vis_fmul8x16au(lo_row21, vis_read_lo(yFilter)); \
172 sum2 = vis_fpadd16(sum2, v22); \
173 v30 = vis_fmul8x16al(hi_row30, vis_read_lo(yFilter)); \
174 sum3 = vis_fpadd16(sum3, v23); \
175 v31 = vis_fmul8x16al(lo_row30, vis_read_lo(yFilter)); \
176 sum0 = vis_fpadd16(sum0, v30); \
177 v32 = vis_fmul8x16al(hi_row31, vis_read_lo(yFilter)); \
178 sum1 = vis_fpadd16(sum1, v31); \
179 v33 = vis_fmul8x16al(lo_row31, vis_read_lo(yFilter)); \
180 sum2 = vis_fpadd16(sum2, v32); \
181 v00 = vis_fmul8sux16(sum0, xFilter0); \
182 sum3 = vis_fpadd16(sum3, v33); \
183 v01 = vis_fmul8ulx16(sum0, xFilter0); \
184 v10 = vis_fmul8sux16(sum1, xFilter1); \
185 d0##ind = vis_fpadd16(v00, v01); \
186 v11 = vis_fmul8ulx16(sum1, xFilter1); \
187 v20 = vis_fmul8sux16(sum2, xFilter2); \
188 d1##ind = vis_fpadd16(v10, v11); \
189 v21 = vis_fmul8ulx16(sum2, xFilter2); \
190 v30 = vis_fmul8sux16(sum3, xFilter3); \
191 d2##ind = vis_fpadd16(v20, v21); \
192 v31 = vis_fmul8ulx16(sum3, xFilter3); \
193 d3##ind = vis_fpadd16(v30, v31)
194
195/***************************************************************/
196#define BC_U8_4CH(ind, mlib_filters_u8, mlib_filters_u8_4) \
197 v00 = vis_fmul8x16au(hi_row00, vis_read_hi(yFilter)); \
198 v01 = vis_fmul8x16au(lo_row00, vis_read_hi(yFilter)); \
199 v02 = vis_fmul8x16au(hi_row01, vis_read_hi(yFilter)); \
200 v03 = vis_fmul8x16au(lo_row01, vis_read_hi(yFilter)); \
201 hi_row00 = flut[srcIndexPtr[0]]; \
202 filterposy = (Y >> FILTER_SHIFT); \
203 v10 = vis_fmul8x16al(hi_row10, vis_read_hi(yFilter)); \
204 lo_row00 = flut[srcIndexPtr[1]]; \
205 v11 = vis_fmul8x16al(lo_row10, vis_read_hi(yFilter)); \
206 sum0 = vis_fpadd16(v00, v10); \
207 hi_row01 = flut[srcIndexPtr[2]]; \
208 v12 = vis_fmul8x16al(hi_row11, vis_read_hi(yFilter)); \
209 lo_row01 = flut[srcIndexPtr[3]]; \
210 filterposx = (X >> FILTER_SHIFT); \
211 v13 = vis_fmul8x16al(lo_row11, vis_read_hi(yFilter)); \
212 srcIndexPtr += srcYStride; \
213 hi_row10 = flut[srcIndexPtr[0]]; \
214 v20 = vis_fmul8x16au(hi_row20, vis_read_lo(yFilter)); \
215 sum1 = vis_fpadd16(v01, v11); \
216 lo_row10 = flut[srcIndexPtr[1]]; \
217 X += dX; \
218 hi_row11 = flut[srcIndexPtr[2]]; \
219 v21 = vis_fmul8x16au(lo_row20, vis_read_lo(yFilter)); \
220 sum2 = vis_fpadd16(v02, v12); \
221 lo_row11 = flut[srcIndexPtr[3]]; \
222 v22 = vis_fmul8x16au(hi_row21, vis_read_lo(yFilter)); \
223 srcIndexPtr += srcYStride; \
224 hi_row20 = flut[srcIndexPtr[0]]; \
225 v23 = vis_fmul8x16au(lo_row21, vis_read_lo(yFilter)); \
226 sum3 = vis_fpadd16(v03, v13); \
227 Y += dY; \
228 xSrc = (X >> MLIB_SHIFT)-1; \
229 v30 = vis_fmul8x16al(hi_row30, vis_read_lo(yFilter)); \
230 sum0 = vis_fpadd16(sum0, v20); \
231 lo_row20 = flut[srcIndexPtr[1]]; \
232 ySrc = (Y >> MLIB_SHIFT)-1; \
233 hi_row21 = flut[srcIndexPtr[2]]; \
234 v31 = vis_fmul8x16al(lo_row30, vis_read_lo(yFilter)); \
235 sum1 = vis_fpadd16(sum1, v21); \
236 filterposy &= FILTER_MASK; \
237 lo_row21 = flut[srcIndexPtr[3]]; \
238 v32 = vis_fmul8x16al(hi_row31, vis_read_lo(yFilter)); \
239 srcIndexPtr += srcYStride; \
240 filterposx &= FILTER_MASK; \
241 v33 = vis_fmul8x16al(lo_row31, vis_read_lo(yFilter)); \
242 sum2 = vis_fpadd16(sum2, v22); \
243 hi_row30 = flut[srcIndexPtr[0]]; \
244 sum3 = vis_fpadd16(sum3, v23); \
245 sum0 = vis_fpadd16(sum0, v30); \
246 lo_row30 = flut[srcIndexPtr[1]]; \
247 sum1 = vis_fpadd16(sum1, v31); \
248 v00 = vis_fmul8sux16(sum0, xFilter0); \
249 hi_row31 = flut[srcIndexPtr[2]]; \
250 sum2 = vis_fpadd16(sum2, v32); \
251 v01 = vis_fmul8ulx16(sum0, xFilter0); \
252 sum3 = vis_fpadd16(sum3, v33); \
253 lo_row31 = flut[srcIndexPtr[3]]; \
254 v10 = vis_fmul8sux16(sum1, xFilter1); \
255 d0##ind = vis_fpadd16(v00, v01); \
256 yFilter = *((mlib_d64 *)((mlib_u8 *)mlib_filters_u8 + filterposy)); \
257 v11 = vis_fmul8ulx16(sum1, xFilter1); \
258 xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_u8_4+4*filterposx)); \
259 xFilter0 = xPtr[0]; \
260 v20 = vis_fmul8sux16(sum2, xFilter2); \
261 d1##ind = vis_fpadd16(v10, v11); \
262 xFilter1 = xPtr[1]; \
263 v21 = vis_fmul8ulx16(sum2, xFilter2); \
264 xFilter2 = xPtr[2]; \
265 v30 = vis_fmul8sux16(sum3, xFilter3); \
266 d2##ind = vis_fpadd16(v20, v21); \
267 xFilter3 = xPtr[3]; \
268 v31 = vis_fmul8ulx16(sum3, xFilter3); \
269 srcIndexPtr = (MLIB_TYPE *)lineAddr[ySrc] + xSrc; \
270 d3##ind = vis_fpadd16(v30, v31)
271
272/***************************************************************/
273#define LOAD_BC_S16_4CH_1PIXEL(mlib_filters_s16_4) \
274 row00 = flut[srcIndexPtr[0]]; \
275 row01 = flut[srcIndexPtr[1]]; \
276 row02 = flut[srcIndexPtr[2]]; \
277 row03 = flut[srcIndexPtr[3]]; \
278 srcIndexPtr += srcYStride; \
279 row10 = flut[srcIndexPtr[0]]; \
280 row11 = flut[srcIndexPtr[1]]; \
281 row12 = flut[srcIndexPtr[2]]; \
282 row13 = flut[srcIndexPtr[3]]; \
283 srcIndexPtr += srcYStride; \
284 row20 = flut[srcIndexPtr[0]]; \
285 row21 = flut[srcIndexPtr[1]]; \
286 row22 = flut[srcIndexPtr[2]]; \
287 row23 = flut[srcIndexPtr[3]]; \
288 srcIndexPtr += srcYStride; \
289 row30 = flut[srcIndexPtr[0]]; \
290 row31 = flut[srcIndexPtr[1]]; \
291 row32 = flut[srcIndexPtr[2]]; \
292 row33 = flut[srcIndexPtr[3]]; \
293 filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK; \
294 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \
295 yFilter0 = yPtr[0]; \
296 yFilter1 = yPtr[1]; \
297 yFilter2 = yPtr[2]; \
298 yFilter3 = yPtr[3]; \
299 filterposx = (X >> FILTER_SHIFT) & FILTER_MASK; \
300 xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_4 + filterposx*4)); \
301 xFilter0 = xPtr[0]; \
302 xFilter1 = xPtr[1]; \
303 xFilter2 = xPtr[2]; \
304 xFilter3 = xPtr[3]; \
305 X += dX; \
306 Y += dY
307
308/***************************************************************/
309#define RESULT_4BC_S16_1PIXEL() \
310 u00 = vis_fmul8sux16(row00, yFilter0); \
311 u01 = vis_fmul8ulx16(row00, yFilter0); \
312 u10 = vis_fmul8sux16(row01, yFilter0); \
313 u11 = vis_fmul8ulx16(row01, yFilter0); \
314 v00 = vis_fpadd16(u00, u01); \
315 u20 = vis_fmul8sux16(row02, yFilter0); \
316 v01 = vis_fpadd16(u10, u11); \
317 u21 = vis_fmul8ulx16(row02, yFilter0); \
318 u30 = vis_fmul8sux16(row03, yFilter0); \
319 u31 = vis_fmul8ulx16(row03, yFilter0); \
320 v02 = vis_fpadd16(u20, u21); \
321 u00 = vis_fmul8sux16(row10, yFilter1); \
322 u01 = vis_fmul8ulx16(row10, yFilter1); \
323 v03 = vis_fpadd16(u30, u31); \
324 u10 = vis_fmul8sux16(row11, yFilter1); \
325 u11 = vis_fmul8ulx16(row11, yFilter1); \
326 v10 = vis_fpadd16(u00, u01); \
327 u20 = vis_fmul8sux16(row12, yFilter1); \
328 v11 = vis_fpadd16(u10, u11); \
329 u21 = vis_fmul8ulx16(row12, yFilter1); \
330 u30 = vis_fmul8sux16(row13, yFilter1); \
331 u31 = vis_fmul8ulx16(row13, yFilter1); \
332 u00 = vis_fmul8sux16(row20, yFilter2); \
333 v12 = vis_fpadd16(u20, u21); \
334 u01 = vis_fmul8ulx16(row20, yFilter2); \
335 v13 = vis_fpadd16(u30, u31); \
336 u10 = vis_fmul8sux16(row21, yFilter2); \
337 u11 = vis_fmul8ulx16(row21, yFilter2); \
338 v20 = vis_fpadd16(u00, u01); \
339 u20 = vis_fmul8sux16(row22, yFilter2); \
340 sum0 = vis_fpadd16(v00, v10); \
341 u21 = vis_fmul8ulx16(row22, yFilter2); \
342 u30 = vis_fmul8sux16(row23, yFilter2); \
343 u31 = vis_fmul8ulx16(row23, yFilter2); \
344 u00 = vis_fmul8sux16(row30, yFilter3); \
345 u01 = vis_fmul8ulx16(row30, yFilter3); \
346 v21 = vis_fpadd16(u10, u11); \
347 sum1 = vis_fpadd16(v01, v11); \
348 u10 = vis_fmul8sux16(row31, yFilter3); \
349 sum2 = vis_fpadd16(v02, v12); \
350 sum3 = vis_fpadd16(v03, v13); \
351 v22 = vis_fpadd16(u20, u21); \
352 u11 = vis_fmul8ulx16(row31, yFilter3); \
353 sum0 = vis_fpadd16(sum0, v20); \
354 u20 = vis_fmul8sux16(row32, yFilter3); \
355 u21 = vis_fmul8ulx16(row32, yFilter3); \
356 v23 = vis_fpadd16(u30, u31); \
357 v30 = vis_fpadd16(u00, u01); \
358 sum1 = vis_fpadd16(sum1, v21); \
359 u30 = vis_fmul8sux16(row33, yFilter3); \
360 u31 = vis_fmul8ulx16(row33, yFilter3); \
361 v31 = vis_fpadd16(u10, u11); \
362 sum2 = vis_fpadd16(sum2, v22); \
363 sum3 = vis_fpadd16(sum3, v23); \
364 v32 = vis_fpadd16(u20, u21); \
365 sum0 = vis_fpadd16(sum0, v30); \
366 v33 = vis_fpadd16(u30, u31); \
367 v00 = vis_fmul8sux16(sum0, xFilter0); \
368 sum1 = vis_fpadd16(sum1, v31); \
369 sum2 = vis_fpadd16(sum2, v32); \
370 v01 = vis_fmul8ulx16(sum0, xFilter0); \
371 v10 = vis_fmul8sux16(sum1, xFilter1); \
372 sum3 = vis_fpadd16(sum3, v33); \
373 v11 = vis_fmul8ulx16(sum1, xFilter1); \
374 d0 = vis_fpadd16(v00, v01); \
375 v20 = vis_fmul8sux16(sum2, xFilter2); \
376 v21 = vis_fmul8ulx16(sum2, xFilter2); \
377 d1 = vis_fpadd16(v10, v11); \
378 v30 = vis_fmul8sux16(sum3, xFilter3); \
379 v31 = vis_fmul8ulx16(sum3, xFilter3); \
380 d2 = vis_fpadd16(v20, v21); \
381 d3 = vis_fpadd16(v30, v31); \
382 d0 = vis_fpadd16(d0, d1); \
383 d2 = vis_fpadd16(d2, d3); \
384 d0 = vis_fpadd16(d0, d2); \
385 d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0)); \
386 d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0)); \
387 res = vis_fpackfix_pair(d2, d3)
388
389/***************************************************************/
390#define BC_S16_4CH(mlib_filters_s16_4) \
391 u00 = vis_fmul8sux16(row00, yFilter0); \
392 u01 = vis_fmul8ulx16(row00, yFilter0); \
393 u10 = vis_fmul8sux16(row01, yFilter0); \
394 u11 = vis_fmul8ulx16(row01, yFilter0); \
395 v00 = vis_fpadd16(u00, u01); \
396 u20 = vis_fmul8sux16(row02, yFilter0); \
397 v01 = vis_fpadd16(u10, u11); \
398 u21 = vis_fmul8ulx16(row02, yFilter0); \
399 u30 = vis_fmul8sux16(row03, yFilter0); \
400 u31 = vis_fmul8ulx16(row03, yFilter0); \
401 v02 = vis_fpadd16(u20, u21); \
402 row00 = flut[srcIndexPtr[0]]; \
403 u00 = vis_fmul8sux16(row10, yFilter1); \
404 u01 = vis_fmul8ulx16(row10, yFilter1); \
405 filterposy = (Y >> FILTER_SHIFT); \
406 v03 = vis_fpadd16(u30, u31); \
407 row01 = flut[srcIndexPtr[1]]; \
408 u10 = vis_fmul8sux16(row11, yFilter1); \
409 u11 = vis_fmul8ulx16(row11, yFilter1); \
410 v10 = vis_fpadd16(u00, u01); \
411 row02 = flut[srcIndexPtr[2]]; \
412 u20 = vis_fmul8sux16(row12, yFilter1); \
413 v11 = vis_fpadd16(u10, u11); \
414 u21 = vis_fmul8ulx16(row12, yFilter1); \
415 u30 = vis_fmul8sux16(row13, yFilter1); \
416 row03 = flut[srcIndexPtr[3]]; \
417 u31 = vis_fmul8ulx16(row13, yFilter1); \
418 u00 = vis_fmul8sux16(row20, yFilter2); \
419 filterposx = (X >> FILTER_SHIFT); \
420 srcIndexPtr += srcYStride; \
421 v12 = vis_fpadd16(u20, u21); \
422 u01 = vis_fmul8ulx16(row20, yFilter2); \
423 v13 = vis_fpadd16(u30, u31); \
424 row10 = flut[srcIndexPtr[0]]; \
425 u10 = vis_fmul8sux16(row21, yFilter2); \
426 X += dX; \
427 u11 = vis_fmul8ulx16(row21, yFilter2); \
428 v20 = vis_fpadd16(u00, u01); \
429 row11 = flut[srcIndexPtr[1]]; \
430 u20 = vis_fmul8sux16(row22, yFilter2); \
431 sum0 = vis_fpadd16(v00, v10); \
432 u21 = vis_fmul8ulx16(row22, yFilter2); \
433 row12 = flut[srcIndexPtr[2]]; \
434 u30 = vis_fmul8sux16(row23, yFilter2); \
435 u31 = vis_fmul8ulx16(row23, yFilter2); \
436 row13 = flut[srcIndexPtr[3]]; \
437 u00 = vis_fmul8sux16(row30, yFilter3); \
438 srcIndexPtr += srcYStride; \
439 u01 = vis_fmul8ulx16(row30, yFilter3); \
440 v21 = vis_fpadd16(u10, u11); \
441 Y += dY; \
442 xSrc = (X >> MLIB_SHIFT)-1; \
443 sum1 = vis_fpadd16(v01, v11); \
444 row20 = flut[srcIndexPtr[0]]; \
445 u10 = vis_fmul8sux16(row31, yFilter3); \
446 sum2 = vis_fpadd16(v02, v12); \
447 sum3 = vis_fpadd16(v03, v13); \
448 ySrc = (Y >> MLIB_SHIFT)-1; \
449 row21 = flut[srcIndexPtr[1]]; \
450 v22 = vis_fpadd16(u20, u21); \
451 u11 = vis_fmul8ulx16(row31, yFilter3); \
452 sum0 = vis_fpadd16(sum0, v20); \
453 u20 = vis_fmul8sux16(row32, yFilter3); \
454 row22 = flut[srcIndexPtr[2]]; \
455 u21 = vis_fmul8ulx16(row32, yFilter3); \
456 v23 = vis_fpadd16(u30, u31); \
457 v30 = vis_fpadd16(u00, u01); \
458 filterposy &= FILTER_MASK; \
459 sum1 = vis_fpadd16(sum1, v21); \
460 u30 = vis_fmul8sux16(row33, yFilter3); \
461 row23 = flut[srcIndexPtr[3]]; \
462 u31 = vis_fmul8ulx16(row33, yFilter3); \
463 srcIndexPtr += srcYStride; \
464 filterposx &= FILTER_MASK; \
465 v31 = vis_fpadd16(u10, u11); \
466 row30 = flut[srcIndexPtr[0]]; \
467 sum2 = vis_fpadd16(sum2, v22); \
468 sum3 = vis_fpadd16(sum3, v23); \
469 row31 = flut[srcIndexPtr[1]]; \
470 v32 = vis_fpadd16(u20, u21); \
471 sum0 = vis_fpadd16(sum0, v30); \
472 row32 = flut[srcIndexPtr[2]]; \
473 v33 = vis_fpadd16(u30, u31); \
474 row33 = flut[srcIndexPtr[3]]; \
475 v00 = vis_fmul8sux16(sum0, xFilter0); \
476 yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \
477 sum1 = vis_fpadd16(sum1, v31); \
478 yFilter0 = yPtr[0]; \
479 sum2 = vis_fpadd16(sum2, v32); \
480 v01 = vis_fmul8ulx16(sum0, xFilter0); \
481 yFilter1 = yPtr[1]; \
482 v10 = vis_fmul8sux16(sum1, xFilter1); \
483 sum3 = vis_fpadd16(sum3, v33); \
484 yFilter2 = yPtr[2]; \
485 v11 = vis_fmul8ulx16(sum1, xFilter1); \
486 d0 = vis_fpadd16(v00, v01); \
487 yFilter3 = yPtr[3]; \
488 xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_4 + filterposx*4)); \
489 v20 = vis_fmul8sux16(sum2, xFilter2); \
490 xFilter0 = xPtr[0]; \
491 v21 = vis_fmul8ulx16(sum2, xFilter2); \
492 d1 = vis_fpadd16(v10, v11); \
493 xFilter1 = xPtr[1]; \
494 v30 = vis_fmul8sux16(sum3, xFilter3); \
495 v31 = vis_fmul8ulx16(sum3, xFilter3); \
496 d2 = vis_fpadd16(v20, v21); \
497 xFilter2 = xPtr[2]; \
498 d3 = vis_fpadd16(v30, v31); \
499 xFilter3 = xPtr[3]; \
500 srcIndexPtr = (MLIB_TYPE *)lineAddr[ySrc] + xSrc
501
502/***************************************************************/
503#define FADD_4BC_S16() \
504 d0 = vis_fpadd16(d0, d1); \
505 d2 = vis_fpadd16(d2, d3); \
506 d0 = vis_fpadd16(d0, d2); \
507 d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0)); \
508 d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0)); \
509 res = vis_fpackfix_pair(d2, d3)
510
511/***************************************************************/
512#undef MLIB_TYPE
513#define MLIB_TYPE mlib_u8
514
515/***************************************************************/
516#undef FILTER_SHIFT
517#define FILTER_SHIFT 5
518#undef FILTER_MASK
519#define FILTER_MASK (((1 << 8) - 1) << 3)
520
521/***************************************************************/
522mlib_status mlib_ImageAffineIndex_U8_U8_3CH_BC(mlib_affine_param *param,
523 const void *colormap)
524{
525 DECLAREVAR();
526 DECLAREVAR_U8();
527 mlib_f32 *flut = (mlib_f32 *)mlib_ImageGetLutNormalTable(colormap) -
528 mlib_ImageGetLutOffset(colormap);
529 mlib_d64 dstRowData[MLIB_LIMIT/2];
530 mlib_d64 *dstRowPtr = dstRowData;
531 const mlib_s16 *mlib_filters_table_u8, *mlib_filters_table_u8_4;
532
533 if (filter == MLIB_BICUBIC) {
534 mlib_filters_table_u8 = mlib_filters_u8_bc;
535 mlib_filters_table_u8_4 = mlib_filters_u8_bc_4;
536 } else {
537 mlib_filters_table_u8 = mlib_filters_u8_bc2;
538 mlib_filters_table_u8_4 = mlib_filters_u8_bc2_4;
539 }
540
541 if (max_xsize > MLIB_LIMIT) {
542 dstRowPtr = mlib_malloc(sizeof(mlib_d64) * ((max_xsize + 1) >> 1));
543
544 if (dstRowPtr == NULL) return MLIB_FAILURE;
545 }
546
547 vis_write_gsr(3 << 3);
548
549 for (j = yStart; j <= yFinish; j++) {
550
551 CLIP();
552
553 cols = xRight - xLeft + 1;
554
555 i = 0;
556
557 if (i <= cols - 6) {
558
559 NEXT_PIXEL_4BC();
560 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
561
562 NEXT_PIXEL_4BC();
563
564 BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
565 BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
566 FADD_4BC_U8();
567
568 BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
569 BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
570
571#pragma pipeloop(0)
572 for (; i <= cols-8; i += 2) {
573 *dstPixelPtr++ = res;
574
575 FADD_4BC_U8();
576 BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
577 BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
578 }
579
580 *dstPixelPtr++ = res;
581
582 FADD_4BC_U8();
583 *dstPixelPtr++ = res;
584
585 RESULT_4BC_U8_1PIXEL(0);
586 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
587 RESULT_4BC_U8_1PIXEL(1);
588 FADD_4BC_U8();
589
590 *dstPixelPtr++ = res;
591 i += 6;
592 }
593
594 if (i <= cols-4) {
595 NEXT_PIXEL_4BC();
596 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
597
598 NEXT_PIXEL_4BC();
599
600 BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
601 BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
602 FADD_4BC_U8();
603 *dstPixelPtr++ = res;
604
605 RESULT_4BC_U8_1PIXEL(0);
606 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
607 RESULT_4BC_U8_1PIXEL(1);
608 FADD_4BC_U8();
609
610 *dstPixelPtr++ = res;
611 i += 4;
612 }
613
614 if (i <= cols-2) {
615 NEXT_PIXEL_4BC();
616 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
617 RESULT_4BC_U8_1PIXEL(0);
618
619 NEXT_PIXEL_4BC();
620 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
621 RESULT_4BC_U8_1PIXEL(1);
622 FADD_4BC_U8();
623
624 *dstPixelPtr++ = res;
625 i += 2;
626 }
627
628 if (i < cols) {
629 NEXT_PIXEL_4BC();
630 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
631 RESULT_4BC_U8_1PIXEL(0);
632
633 d0 = vis_fpadd16(d00, d10);
634 d1 = vis_fpadd16(d20, d30);
635 d0 = vis_fpadd16(d0, d1);
636 res = vis_fpack16_pair(d0, d0);
637 *dstPixelPtr++ = res;
638 }
639
640 mlib_ImageColorTrue2IndexLine_U8_U8_3_in_4((mlib_u8 *)dstRowPtr,
641 dstIndexPtr,
642 xRight - xLeft + 1,
643 colormap);
644 }
645
646 if (dstRowPtr != dstRowData) mlib_free(dstRowPtr);
647
648 return MLIB_SUCCESS;
649}
650
651/***************************************************************/
652#undef FILTER_SHIFT
653#define FILTER_SHIFT 4
654#undef FILTER_MASK
655#define FILTER_MASK (((1 << 9) - 1) << 3)
656
657/***************************************************************/
658mlib_status mlib_ImageAffineIndex_U8_S16_3CH_BC(mlib_affine_param *param,
659 const void *colormap)
660{
661 DECLAREVAR();
662 DECLAREVAR_S16();
663 mlib_d64 *flut = (mlib_d64 *)mlib_ImageGetLutNormalTable(colormap) -
664 mlib_ImageGetLutOffset(colormap);
665 mlib_d64 dstRowData[MLIB_LIMIT];
666 mlib_d64 *dstRowPtr = dstRowData;
667 const mlib_s16 *mlib_filters_table_s16_4;
668
669 if (filter == MLIB_BICUBIC) {
670 mlib_filters_table_s16_4 = mlib_filters_s16_bc_4;
671 } else {
672 mlib_filters_table_s16_4 = mlib_filters_s16_bc2_4;
673 }
674
675 if (max_xsize > MLIB_LIMIT) {
676 dstRowPtr = mlib_malloc(sizeof(mlib_d64) * max_xsize);
677
678 if (dstRowPtr == NULL) return MLIB_FAILURE;
679 }
680
681 for (j = yStart; j <= yFinish; j++) {
682
683 CLIP();
684
685 vis_write_gsr(10 << 3);
686
687 cols = xRight - xLeft + 1;
688 i = 0;
689
690 if (i <= cols - 4) {
691
692 NEXT_PIXEL_4BC();
693 LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4);
694
695 NEXT_PIXEL_4BC();
696
697 BC_S16_4CH(mlib_filters_table_s16_4);
698 FADD_4BC_S16();
699
700 BC_S16_4CH(mlib_filters_table_s16_4);
701
702#pragma pipeloop(0)
703
704 for (; i < cols-4; i++) {
705 *dstPixelPtr++ = res;
706
707 FADD_4BC_S16();
708 BC_S16_4CH(mlib_filters_table_s16_4);
709 }
710
711 *dstPixelPtr++ = res;
712
713 FADD_4BC_S16();
714 *dstPixelPtr++ = res;
715
716 RESULT_4BC_S16_1PIXEL();
717 *dstPixelPtr++ = res;
718
719 LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4);
720 RESULT_4BC_S16_1PIXEL();
721 *dstPixelPtr++ = res;
722 i += 4;
723 }
724
725#pragma pipeloop(0)
726 for (; i < cols; i++) {
727 NEXT_PIXEL_4BC();
728 LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4);
729 RESULT_4BC_S16_1PIXEL();
730 *dstPixelPtr++ = res;
731 }
732
733 mlib_ImageColorTrue2IndexLine_S16_U8_3_in_4((mlib_s16 *)dstRowPtr,
734 dstIndexPtr,
735 xRight - xLeft + 1,
736 colormap);
737 }
738
739 if (dstRowPtr != dstRowData) mlib_free(dstRowPtr);
740
741 return MLIB_SUCCESS;
742}
743
744/***************************************************************/
745#undef FILTER_SHIFT
746#define FILTER_SHIFT 5
747#undef FILTER_MASK
748#define FILTER_MASK (((1 << 8) - 1) << 3)
749
750/***************************************************************/
751mlib_status mlib_ImageAffineIndex_U8_U8_4CH_BC(mlib_affine_param *param,
752 const void *colormap)
753{
754 DECLAREVAR();
755 DECLAREVAR_U8();
756 mlib_f32 *flut = (mlib_f32 *)mlib_ImageGetLutNormalTable(colormap) -
757 mlib_ImageGetLutOffset(colormap);
758 mlib_d64 dstRowData[MLIB_LIMIT/2];
759 mlib_d64 *dstRowPtr = dstRowData;
760 const mlib_s16 *mlib_filters_table_u8, *mlib_filters_table_u8_4;
761
762 if (filter == MLIB_BICUBIC) {
763 mlib_filters_table_u8 = mlib_filters_u8_bc;
764 mlib_filters_table_u8_4 = mlib_filters_u8_bc_4;
765 } else {
766 mlib_filters_table_u8 = mlib_filters_u8_bc2;
767 mlib_filters_table_u8_4 = mlib_filters_u8_bc2_4;
768 }
769
770 if (max_xsize > MLIB_LIMIT) {
771 dstRowPtr = mlib_malloc(sizeof(mlib_d64) * ((max_xsize + 1) >> 1));
772
773 if (dstRowPtr == NULL) return MLIB_FAILURE;
774 }
775
776 vis_write_gsr(3 << 3);
777
778 for (j = yStart; j <= yFinish; j++) {
779
780 CLIP();
781
782 cols = xRight - xLeft + 1;
783
784 i = 0;
785
786 if (i <= cols - 6) {
787
788 NEXT_PIXEL_4BC();
789 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
790
791 NEXT_PIXEL_4BC();
792
793 BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
794 BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
795 FADD_4BC_U8();
796
797 BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
798 BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
799
800#pragma pipeloop(0)
801 for (; i <= cols-8; i += 2) {
802 *dstPixelPtr++ = res;
803
804 FADD_4BC_U8();
805 BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
806 BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
807 }
808
809 *dstPixelPtr++ = res;
810
811 FADD_4BC_U8();
812 *dstPixelPtr++ = res;
813
814 RESULT_4BC_U8_1PIXEL(0);
815 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
816 RESULT_4BC_U8_1PIXEL(1);
817 FADD_4BC_U8();
818
819 *dstPixelPtr++ = res;
820 i += 6;
821 }
822
823 if (i <= cols-4) {
824 NEXT_PIXEL_4BC();
825 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
826
827 NEXT_PIXEL_4BC();
828
829 BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
830 BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
831 FADD_4BC_U8();
832 *dstPixelPtr++ = res;
833
834 RESULT_4BC_U8_1PIXEL(0);
835 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
836 RESULT_4BC_U8_1PIXEL(1);
837 FADD_4BC_U8();
838
839 *dstPixelPtr++ = res;
840 i += 4;
841 }
842
843 if (i <= cols-2) {
844 NEXT_PIXEL_4BC();
845 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
846 RESULT_4BC_U8_1PIXEL(0);
847
848 NEXT_PIXEL_4BC();
849 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
850 RESULT_4BC_U8_1PIXEL(1);
851 FADD_4BC_U8();
852
853 *dstPixelPtr++ = res;
854 i += 2;
855 }
856
857 if (i < cols) {
858 NEXT_PIXEL_4BC();
859 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
860 RESULT_4BC_U8_1PIXEL(0);
861
862 d0 = vis_fpadd16(d00, d10);
863 d1 = vis_fpadd16(d20, d30);
864 d0 = vis_fpadd16(d0, d1);
865 res = vis_fpack16_pair(d0, d0);
866 *dstPixelPtr++ = res;
867 }
868
869 mlib_ImageColorTrue2IndexLine_U8_U8_4((mlib_u8 *)dstRowPtr,
870 dstIndexPtr,
871 xRight - xLeft + 1,
872 colormap);
873 }
874
875 if (dstRowPtr != dstRowData) mlib_free(dstRowPtr);
876
877 return MLIB_SUCCESS;
878}
879
880/***************************************************************/
881#undef FILTER_SHIFT
882#define FILTER_SHIFT 4
883#undef FILTER_MASK
884#define FILTER_MASK (((1 << 9) - 1) << 3)
885
886/***************************************************************/
887mlib_status mlib_ImageAffineIndex_U8_S16_4CH_BC(mlib_affine_param *param,
888 const void *colormap)
889{
890 DECLAREVAR();
891 DECLAREVAR_S16();
892 mlib_d64 *flut = (mlib_d64 *)mlib_ImageGetLutNormalTable(colormap) -
893 mlib_ImageGetLutOffset(colormap);
894 mlib_d64 dstRowData[MLIB_LIMIT];
895 mlib_d64 *dstRowPtr = dstRowData;
896 const mlib_s16 *mlib_filters_table_s16_4;
897
898 if (filter == MLIB_BICUBIC) {
899 mlib_filters_table_s16_4 = mlib_filters_s16_bc_4;
900 } else {
901 mlib_filters_table_s16_4 = mlib_filters_s16_bc2_4;
902 }
903
904 if (max_xsize > MLIB_LIMIT) {
905 dstRowPtr = mlib_malloc(sizeof(mlib_d64) * max_xsize);
906
907 if (dstRowPtr == NULL) return MLIB_FAILURE;
908 }
909
910 for (j = yStart; j <= yFinish; j++) {
911
912 CLIP();
913
914 vis_write_gsr(10 << 3);
915
916 cols = xRight - xLeft + 1;
917 i = 0;
918
919 if (i <= cols - 4) {
920
921 NEXT_PIXEL_4BC();
922 LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4);
923
924 NEXT_PIXEL_4BC();
925
926 BC_S16_4CH(mlib_filters_table_s16_4);
927 FADD_4BC_S16();
928
929 BC_S16_4CH(mlib_filters_table_s16_4);
930
931#pragma pipeloop(0)
932
933 for (; i < cols-4; i++) {
934 *dstPixelPtr++ = res;
935
936 FADD_4BC_S16();
937 BC_S16_4CH(mlib_filters_table_s16_4);
938 }
939
940 *dstPixelPtr++ = res;
941
942 FADD_4BC_S16();
943 *dstPixelPtr++ = res;
944
945 RESULT_4BC_S16_1PIXEL();
946 *dstPixelPtr++ = res;
947
948 LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4);
949 RESULT_4BC_S16_1PIXEL();
950 *dstPixelPtr++ = res;
951 i += 4;
952 }
953
954#pragma pipeloop(0)
955 for (; i < cols; i++) {
956 NEXT_PIXEL_4BC();
957 LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4);
958 RESULT_4BC_S16_1PIXEL();
959 *dstPixelPtr++ = res;
960 }
961
962 mlib_ImageColorTrue2IndexLine_S16_U8_4((mlib_s16 *)dstRowPtr,
963 dstIndexPtr,
964 xRight - xLeft + 1,
965 colormap);
966 }
967
968 if (dstRowPtr != dstRowData) mlib_free(dstRowPtr);
969
970 return MLIB_SUCCESS;
971}
972
973/***************************************************************/
974#undef MLIB_TYPE
975#define MLIB_TYPE mlib_s16
976
977/***************************************************************/
978#undef FILTER_SHIFT
979#define FILTER_SHIFT 5
980#undef FILTER_MASK
981#define FILTER_MASK (((1 << 8) - 1) << 3)
982
983/***************************************************************/
984mlib_status mlib_ImageAffineIndex_S16_U8_3CH_BC(mlib_affine_param *param,
985 const void *colormap)
986{
987 DECLAREVAR();
988 DECLAREVAR_U8();
989 mlib_f32 *flut = (mlib_f32 *)mlib_ImageGetLutNormalTable(colormap) -
990 mlib_ImageGetLutOffset(colormap);
991 mlib_d64 dstRowData[MLIB_LIMIT/2];
992 mlib_d64 *dstRowPtr = dstRowData;
993 const mlib_s16 *mlib_filters_table_u8, *mlib_filters_table_u8_4;
994
995 if (filter == MLIB_BICUBIC) {
996 mlib_filters_table_u8 = mlib_filters_u8_bc;
997 mlib_filters_table_u8_4 = mlib_filters_u8_bc_4;
998 } else {
999 mlib_filters_table_u8 = mlib_filters_u8_bc2;
1000 mlib_filters_table_u8_4 = mlib_filters_u8_bc2_4;
1001 }
1002
1003 srcYStride >>= 1;
1004
1005 if (max_xsize > MLIB_LIMIT) {
1006 dstRowPtr = mlib_malloc(sizeof(mlib_d64) * ((max_xsize + 1) >> 1));
1007
1008 if (dstRowPtr == NULL) return MLIB_FAILURE;
1009 }
1010
1011 vis_write_gsr(3 << 3);
1012
1013 for (j = yStart; j <= yFinish; j++) {
1014
1015 CLIP();
1016
1017 cols = xRight - xLeft + 1;
1018
1019 i = 0;
1020
1021 if (i <= cols - 6) {
1022
1023 NEXT_PIXEL_4BC();
1024 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
1025
1026 NEXT_PIXEL_4BC();
1027
1028 BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
1029 BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
1030 FADD_4BC_U8();
1031
1032 BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
1033 BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
1034
1035#pragma pipeloop(0)
1036 for (; i <= cols-8; i += 2) {
1037 *dstPixelPtr++ = res;
1038
1039 FADD_4BC_U8();
1040 BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
1041 BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
1042 }
1043
1044 *dstPixelPtr++ = res;
1045
1046 FADD_4BC_U8();
1047 *dstPixelPtr++ = res;
1048
1049 RESULT_4BC_U8_1PIXEL(0);
1050 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
1051 RESULT_4BC_U8_1PIXEL(1);
1052 FADD_4BC_U8();
1053
1054 *dstPixelPtr++ = res;
1055 i += 6;
1056 }
1057
1058 if (i <= cols-4) {
1059 NEXT_PIXEL_4BC();
1060 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
1061
1062 NEXT_PIXEL_4BC();
1063
1064 BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
1065 BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
1066 FADD_4BC_U8();
1067 *dstPixelPtr++ = res;
1068
1069 RESULT_4BC_U8_1PIXEL(0);
1070 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
1071 RESULT_4BC_U8_1PIXEL(1);
1072 FADD_4BC_U8();
1073
1074 *dstPixelPtr++ = res;
1075 i += 4;
1076 }
1077
1078 if (i <= cols-2) {
1079 NEXT_PIXEL_4BC();
1080 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
1081 RESULT_4BC_U8_1PIXEL(0);
1082
1083 NEXT_PIXEL_4BC();
1084 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
1085 RESULT_4BC_U8_1PIXEL(1);
1086 FADD_4BC_U8();
1087
1088 *dstPixelPtr++ = res;
1089 i += 2;
1090 }
1091
1092 if (i < cols) {
1093 NEXT_PIXEL_4BC();
1094 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
1095 RESULT_4BC_U8_1PIXEL(0);
1096
1097 d0 = vis_fpadd16(d00, d10);
1098 d1 = vis_fpadd16(d20, d30);
1099 d0 = vis_fpadd16(d0, d1);
1100 res = vis_fpack16_pair(d0, d0);
1101 *dstPixelPtr++ = res;
1102 }
1103
1104 mlib_ImageColorTrue2IndexLine_U8_S16_3_in_4((mlib_u8 *)dstRowPtr,
1105 dstIndexPtr,
1106 xRight - xLeft + 1,
1107 colormap);
1108 }
1109
1110 if (dstRowPtr != dstRowData) mlib_free(dstRowPtr);
1111
1112 return MLIB_SUCCESS;
1113}
1114
1115/***************************************************************/
1116#undef FILTER_SHIFT
1117#define FILTER_SHIFT 4
1118#undef FILTER_MASK
1119#define FILTER_MASK (((1 << 9) - 1) << 3)
1120
1121/***************************************************************/
1122mlib_status mlib_ImageAffineIndex_S16_S16_3CH_BC(mlib_affine_param *param,
1123 const void *colormap)
1124{
1125 DECLAREVAR();
1126 DECLAREVAR_S16();
1127 mlib_d64 *flut = (mlib_d64 *)mlib_ImageGetLutNormalTable(colormap) -
1128 mlib_ImageGetLutOffset(colormap);
1129 mlib_d64 dstRowData[MLIB_LIMIT];
1130 mlib_d64 *dstRowPtr = dstRowData;
1131 const mlib_s16 *mlib_filters_table_s16_4;
1132
1133 if (filter == MLIB_BICUBIC) {
1134 mlib_filters_table_s16_4 = mlib_filters_s16_bc_4;
1135 } else {
1136 mlib_filters_table_s16_4 = mlib_filters_s16_bc2_4;
1137 }
1138
1139 srcYStride >>= 1;
1140
1141 if (max_xsize > MLIB_LIMIT) {
1142 dstRowPtr = mlib_malloc(sizeof(mlib_d64) * max_xsize);
1143
1144 if (dstRowPtr == NULL) return MLIB_FAILURE;
1145 }
1146
1147 for (j = yStart; j <= yFinish; j++) {
1148
1149 CLIP();
1150
1151 vis_write_gsr(10 << 3);
1152
1153 cols = xRight - xLeft + 1;
1154 i = 0;
1155
1156 if (i <= cols - 4) {
1157
1158 NEXT_PIXEL_4BC();
1159 LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4);
1160
1161 NEXT_PIXEL_4BC();
1162
1163 BC_S16_4CH(mlib_filters_table_s16_4);
1164 FADD_4BC_S16();
1165
1166 BC_S16_4CH(mlib_filters_table_s16_4);
1167
1168#pragma pipeloop(0)
1169
1170 for (; i < cols-4; i++) {
1171 *dstPixelPtr++ = res;
1172
1173 FADD_4BC_S16();
1174 BC_S16_4CH(mlib_filters_table_s16_4);
1175 }
1176
1177 *dstPixelPtr++ = res;
1178
1179 FADD_4BC_S16();
1180 *dstPixelPtr++ = res;
1181
1182 RESULT_4BC_S16_1PIXEL();
1183 *dstPixelPtr++ = res;
1184
1185 LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4);
1186 RESULT_4BC_S16_1PIXEL();
1187 *dstPixelPtr++ = res;
1188 i += 4;
1189 }
1190
1191#pragma pipeloop(0)
1192 for (; i < cols; i++) {
1193 NEXT_PIXEL_4BC();
1194 LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4);
1195 RESULT_4BC_S16_1PIXEL();
1196 *dstPixelPtr++ = res;
1197 }
1198
1199 mlib_ImageColorTrue2IndexLine_S16_S16_3_in_4((mlib_s16 *)dstRowPtr,
1200 dstIndexPtr,
1201 xRight - xLeft + 1,
1202 colormap);
1203 }
1204
1205 if (dstRowPtr != dstRowData) mlib_free(dstRowPtr);
1206
1207 return MLIB_SUCCESS;
1208}
1209
1210/***************************************************************/
1211#undef FILTER_SHIFT
1212#define FILTER_SHIFT 5
1213#undef FILTER_MASK
1214#define FILTER_MASK (((1 << 8) - 1) << 3)
1215
1216/***************************************************************/
1217mlib_status mlib_ImageAffineIndex_S16_U8_4CH_BC(mlib_affine_param *param,
1218 const void *colormap)
1219{
1220 DECLAREVAR();
1221 DECLAREVAR_U8();
1222 mlib_f32 *flut = (mlib_f32 *)mlib_ImageGetLutNormalTable(colormap) -
1223 mlib_ImageGetLutOffset(colormap);
1224 mlib_d64 dstRowData[MLIB_LIMIT/2];
1225 mlib_d64 *dstRowPtr = dstRowData;
1226 const mlib_s16 *mlib_filters_table_u8, *mlib_filters_table_u8_4;
1227
1228 if (filter == MLIB_BICUBIC) {
1229 mlib_filters_table_u8 = mlib_filters_u8_bc;
1230 mlib_filters_table_u8_4 = mlib_filters_u8_bc_4;
1231 } else {
1232 mlib_filters_table_u8 = mlib_filters_u8_bc2;
1233 mlib_filters_table_u8_4 = mlib_filters_u8_bc2_4;
1234 }
1235
1236 srcYStride >>= 1;
1237
1238 if (max_xsize > MLIB_LIMIT) {
1239 dstRowPtr = mlib_malloc(sizeof(mlib_d64) * ((max_xsize + 1) >> 1));
1240
1241 if (dstRowPtr == NULL) return MLIB_FAILURE;
1242 }
1243
1244 vis_write_gsr(3 << 3);
1245
1246 for (j = yStart; j <= yFinish; j++) {
1247
1248 CLIP();
1249
1250 cols = xRight - xLeft + 1;
1251
1252 i = 0;
1253
1254 if (i <= cols - 6) {
1255
1256 NEXT_PIXEL_4BC();
1257 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
1258
1259 NEXT_PIXEL_4BC();
1260
1261 BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
1262 BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
1263 FADD_4BC_U8();
1264
1265 BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
1266 BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
1267
1268#pragma pipeloop(0)
1269 for (; i <= cols-8; i += 2) {
1270 *dstPixelPtr++ = res;
1271
1272 FADD_4BC_U8();
1273 BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
1274 BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
1275 }
1276
1277 *dstPixelPtr++ = res;
1278
1279 FADD_4BC_U8();
1280 *dstPixelPtr++ = res;
1281
1282 RESULT_4BC_U8_1PIXEL(0);
1283 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
1284 RESULT_4BC_U8_1PIXEL(1);
1285 FADD_4BC_U8();
1286
1287 *dstPixelPtr++ = res;
1288 i += 6;
1289 }
1290
1291 if (i <= cols-4) {
1292 NEXT_PIXEL_4BC();
1293 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
1294
1295 NEXT_PIXEL_4BC();
1296
1297 BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4);
1298 BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4);
1299 FADD_4BC_U8();
1300 *dstPixelPtr++ = res;
1301
1302 RESULT_4BC_U8_1PIXEL(0);
1303 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
1304 RESULT_4BC_U8_1PIXEL(1);
1305 FADD_4BC_U8();
1306
1307 *dstPixelPtr++ = res;
1308 i += 4;
1309 }
1310
1311 if (i <= cols-2) {
1312 NEXT_PIXEL_4BC();
1313 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
1314 RESULT_4BC_U8_1PIXEL(0);
1315
1316 NEXT_PIXEL_4BC();
1317 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
1318 RESULT_4BC_U8_1PIXEL(1);
1319 FADD_4BC_U8();
1320
1321 *dstPixelPtr++ = res;
1322 i += 2;
1323 }
1324
1325 if (i < cols) {
1326 NEXT_PIXEL_4BC();
1327 LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4);
1328 RESULT_4BC_U8_1PIXEL(0);
1329
1330 d0 = vis_fpadd16(d00, d10);
1331 d1 = vis_fpadd16(d20, d30);
1332 d0 = vis_fpadd16(d0, d1);
1333 res = vis_fpack16_pair(d0, d0);
1334 *dstPixelPtr++ = res;
1335 }
1336
1337 mlib_ImageColorTrue2IndexLine_U8_S16_4((mlib_u8 *)dstRowPtr,
1338 dstIndexPtr,
1339 xRight - xLeft + 1,
1340 colormap);
1341 }
1342
1343 if (dstRowPtr != dstRowData) mlib_free(dstRowPtr);
1344
1345 return MLIB_SUCCESS;
1346}
1347
1348/***************************************************************/
1349#undef FILTER_SHIFT
1350#define FILTER_SHIFT 4
1351#undef FILTER_MASK
1352#define FILTER_MASK (((1 << 9) - 1) << 3)
1353
1354/***************************************************************/
1355mlib_status mlib_ImageAffineIndex_S16_S16_4CH_BC(mlib_affine_param *param,
1356 const void *colormap)
1357{
1358 DECLAREVAR();
1359 DECLAREVAR_S16();
1360 mlib_d64 *flut = (mlib_d64 *)mlib_ImageGetLutNormalTable(colormap) -
1361 mlib_ImageGetLutOffset(colormap);
1362 mlib_d64 dstRowData[MLIB_LIMIT];
1363 mlib_d64 *dstRowPtr = dstRowData;
1364 const mlib_s16 *mlib_filters_table_s16_4;
1365
1366 if (filter == MLIB_BICUBIC) {
1367 mlib_filters_table_s16_4 = mlib_filters_s16_bc_4;
1368 } else {
1369 mlib_filters_table_s16_4 = mlib_filters_s16_bc2_4;
1370 }
1371
1372 srcYStride >>= 1;
1373
1374 if (max_xsize > MLIB_LIMIT) {
1375 dstRowPtr = mlib_malloc(sizeof(mlib_d64) * max_xsize);
1376
1377 if (dstRowPtr == NULL) return MLIB_FAILURE;
1378 }
1379
1380 for (j = yStart; j <= yFinish; j++) {
1381
1382 CLIP();
1383
1384 vis_write_gsr(10 << 3);
1385
1386 cols = xRight - xLeft + 1;
1387 i = 0;
1388
1389 if (i <= cols - 4) {
1390
1391 NEXT_PIXEL_4BC();
1392 LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4);
1393
1394 NEXT_PIXEL_4BC();
1395
1396 BC_S16_4CH(mlib_filters_table_s16_4);
1397 FADD_4BC_S16();
1398
1399 BC_S16_4CH(mlib_filters_table_s16_4);
1400
1401#pragma pipeloop(0)
1402
1403 for (; i < cols-4; i++) {
1404 *dstPixelPtr++ = res;
1405
1406 FADD_4BC_S16();
1407 BC_S16_4CH(mlib_filters_table_s16_4);
1408 }
1409
1410 *dstPixelPtr++ = res;
1411
1412 FADD_4BC_S16();
1413 *dstPixelPtr++ = res;
1414
1415 RESULT_4BC_S16_1PIXEL();
1416 *dstPixelPtr++ = res;
1417
1418 LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4);
1419 RESULT_4BC_S16_1PIXEL();
1420 *dstPixelPtr++ = res;
1421 i += 4;
1422 }
1423
1424#pragma pipeloop(0)
1425 for (; i < cols; i++) {
1426 NEXT_PIXEL_4BC();
1427 LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4);
1428 RESULT_4BC_S16_1PIXEL();
1429 *dstPixelPtr++ = res;
1430 }
1431
1432 mlib_ImageColorTrue2IndexLine_S16_S16_4((mlib_s16 *)dstRowPtr,
1433 dstIndexPtr,
1434 xRight - xLeft + 1,
1435 colormap);
1436 }
1437
1438 if (dstRowPtr != dstRowData) mlib_free(dstRowPtr);
1439
1440 return MLIB_SUCCESS;
1441}
1442
1443/***************************************************************/