blob: 2c49a6bde1b863f79ed0f4a8ed4c5888163a3e36 [file] [log] [blame]
Pierre Ossman59a39382009-03-09 13:15:56 +00001/*
2 * jsimd.c
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
DRCf25c0712009-04-03 12:00:51 +00005 * Copyright 2009 D. R. Commander
Pierre Ossman59a39382009-03-09 13:15:56 +00006 *
7 * Based on the x86 SIMD extension for IJG JPEG library,
8 * Copyright (C) 1999-2006, MIYASAKA Masaru.
9 *
10 * This file contains the interface between the "normal" portions
11 * of the library and the SIMD implementations.
12 */
13
14#define JPEG_INTERNALS
15#include "jinclude.h"
16#include "jpeglib.h"
Pierre Ossman2ae181c2009-03-09 13:21:27 +000017#include "jsimd.h"
Pierre Ossman59a39382009-03-09 13:15:56 +000018#include "jdct.h"
Pierre Ossman2ae181c2009-03-09 13:21:27 +000019#include "jsimddct.h"
20#include "simd/jsimd.h"
Pierre Ossman59a39382009-03-09 13:15:56 +000021
Pierre Ossman018fc422009-03-09 13:31:56 +000022/*
23 * In the PIC cases, we have no guarantee that constants will keep
24 * their alignment. This macro allows us to verify it at runtime.
25 */
26#ifdef WITH_SIMD
27#define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0)
28#else
29#define IS_ALIGNED(ptr, order) (0)
30#endif
31
32#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
33
Pierre Ossman59a39382009-03-09 13:15:56 +000034static unsigned int simd_support = ~0;
35
36/*
37 * Check what SIMD accelerations are supported.
38 *
39 * FIXME: This code is racy under a multi-threaded environment.
40 */
41LOCAL(void)
42init_simd (void)
43{
DRC59c1a252009-04-03 11:27:17 +000044#ifdef WITH_SIMD
45 char *env = NULL;
46#endif
Pierre Ossman59a39382009-03-09 13:15:56 +000047 if (simd_support != ~0)
48 return;
49
Pierre Ossman2ae181c2009-03-09 13:21:27 +000050#ifdef WITH_SIMD
51 simd_support = jpeg_simd_cpu_support();
DRC59c1a252009-04-03 11:27:17 +000052 if((env=getenv("JSIMD_FORCEMMX"))!=NULL && !strcmp(env, "1"))
53 simd_support = JSIMD_MMX;
54 else if((env=getenv("JSIMD_FORCESSE2"))!=NULL && !strcmp(env, "1"))
55 simd_support = JSIMD_SSE2;
Pierre Ossman2ae181c2009-03-09 13:21:27 +000056#else
Pierre Ossman59a39382009-03-09 13:15:56 +000057 simd_support = JSIMD_NONE;
Pierre Ossman2ae181c2009-03-09 13:21:27 +000058#endif
Pierre Ossman59a39382009-03-09 13:15:56 +000059}
60
61GLOBAL(int)
62jsimd_can_rgb_ycc (void)
63{
64 init_simd();
65
Pierre Ossman5eb84ff2009-03-09 13:25:30 +000066 /* The code is optimised for these values only */
67 if (BITS_IN_JSAMPLE != 8)
68 return 0;
69 if (sizeof(JDIMENSION) != 4)
70 return 0;
71 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
72 return 0;
73
Pierre Ossmaneea72152009-03-09 13:34:17 +000074 if ((simd_support & JSIMD_SSE2) &&
75 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
76 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +000077 if (simd_support & JSIMD_MMX)
78 return 1;
79
Pierre Ossman59a39382009-03-09 13:15:56 +000080 return 0;
81}
82
83GLOBAL(int)
84jsimd_can_ycc_rgb (void)
85{
86 init_simd();
87
Pierre Ossman5eb84ff2009-03-09 13:25:30 +000088 /* The code is optimised for these values only */
89 if (BITS_IN_JSAMPLE != 8)
90 return 0;
91 if (sizeof(JDIMENSION) != 4)
92 return 0;
93 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
94 return 0;
95
Pierre Ossmaneea72152009-03-09 13:34:17 +000096 if ((simd_support & JSIMD_SSE2) &&
97 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
98 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +000099 if (simd_support & JSIMD_MMX)
100 return 1;
101
Pierre Ossman59a39382009-03-09 13:15:56 +0000102 return 0;
103}
104
105GLOBAL(void)
106jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
107 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
108 JDIMENSION output_row, int num_rows)
109{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000110#ifdef WITH_SIMD
DRCf25c0712009-04-03 12:00:51 +0000111 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
112 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
113 switch(cinfo->in_color_space)
114 {
115 case JCS_EXT_RGB:
116 sse2fct=jsimd_extrgb_ycc_convert_sse2;
117 mmxfct=jsimd_extrgb_ycc_convert_mmx;
118 break;
119 case JCS_EXT_RGBX:
120 sse2fct=jsimd_extrgbx_ycc_convert_sse2;
121 mmxfct=jsimd_extrgbx_ycc_convert_mmx;
122 break;
123 case JCS_EXT_BGR:
124 sse2fct=jsimd_extbgr_ycc_convert_sse2;
125 mmxfct=jsimd_extbgr_ycc_convert_mmx;
126 break;
127 case JCS_EXT_BGRX:
128 sse2fct=jsimd_extbgrx_ycc_convert_sse2;
129 mmxfct=jsimd_extbgrx_ycc_convert_mmx;
130 break;
131 case JCS_EXT_XBGR:
132 sse2fct=jsimd_extxbgr_ycc_convert_sse2;
133 mmxfct=jsimd_extxbgr_ycc_convert_mmx;
134 break;
135 case JCS_EXT_XRGB:
136 sse2fct=jsimd_extxrgb_ycc_convert_sse2;
137 mmxfct=jsimd_extxrgb_ycc_convert_mmx;
138 break;
139 default:
140 sse2fct=jsimd_rgb_ycc_convert_sse2;
141 mmxfct=jsimd_rgb_ycc_convert_mmx;
142 break;
143 }
Pierre Ossmaneea72152009-03-09 13:34:17 +0000144 if ((simd_support & JSIMD_SSE2) &&
145 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
DRCf25c0712009-04-03 12:00:51 +0000146 sse2fct(cinfo->image_width, input_buf,
Pierre Ossmaneea72152009-03-09 13:34:17 +0000147 output_buf, output_row, num_rows);
148 else if (simd_support & JSIMD_MMX)
DRCf25c0712009-04-03 12:00:51 +0000149 mmxfct(cinfo->image_width, input_buf,
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000150 output_buf, output_row, num_rows);
151#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000152}
153
154GLOBAL(void)
155jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
156 JSAMPIMAGE input_buf, JDIMENSION input_row,
157 JSAMPARRAY output_buf, int num_rows)
158{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000159#ifdef WITH_SIMD
DRCf25c0712009-04-03 12:00:51 +0000160 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
161 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
162 switch(cinfo->out_color_space)
163 {
164 case JCS_EXT_RGB:
165 sse2fct=jsimd_ycc_extrgb_convert_sse2;
166 mmxfct=jsimd_ycc_extrgb_convert_mmx;
167 break;
168 case JCS_EXT_RGBX:
169 sse2fct=jsimd_ycc_extrgbx_convert_sse2;
170 mmxfct=jsimd_ycc_extrgbx_convert_mmx;
171 break;
172 case JCS_EXT_BGR:
173 sse2fct=jsimd_ycc_extbgr_convert_sse2;
174 mmxfct=jsimd_ycc_extbgr_convert_mmx;
175 break;
176 case JCS_EXT_BGRX:
177 sse2fct=jsimd_ycc_extbgrx_convert_sse2;
178 mmxfct=jsimd_ycc_extbgrx_convert_mmx;
179 break;
180 case JCS_EXT_XBGR:
181 sse2fct=jsimd_ycc_extxbgr_convert_sse2;
182 mmxfct=jsimd_ycc_extxbgr_convert_mmx;
183 break;
184 case JCS_EXT_XRGB:
185 sse2fct=jsimd_ycc_extxrgb_convert_sse2;
186 mmxfct=jsimd_ycc_extxrgb_convert_mmx;
187 break;
188 default:
189 sse2fct=jsimd_ycc_rgb_convert_sse2;
190 mmxfct=jsimd_ycc_rgb_convert_mmx;
191 break;
192 }
Pierre Ossmaneea72152009-03-09 13:34:17 +0000193 if ((simd_support & JSIMD_SSE2) &&
194 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
DRCf25c0712009-04-03 12:00:51 +0000195 sse2fct(cinfo->output_width, input_buf,
Pierre Ossmaneea72152009-03-09 13:34:17 +0000196 input_row, output_buf, num_rows);
197 else if (simd_support & JSIMD_MMX)
DRCf25c0712009-04-03 12:00:51 +0000198 mmxfct(cinfo->output_width, input_buf,
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000199 input_row, output_buf, num_rows);
200#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000201}
202
203GLOBAL(int)
204jsimd_can_h2v2_downsample (void)
205{
206 init_simd();
207
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000208 /* The code is optimised for these values only */
209 if (BITS_IN_JSAMPLE != 8)
210 return 0;
211 if (sizeof(JDIMENSION) != 4)
212 return 0;
213
Pierre Ossmaneea72152009-03-09 13:34:17 +0000214 if (simd_support & JSIMD_SSE2)
215 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000216 if (simd_support & JSIMD_MMX)
217 return 1;
218
Pierre Ossman59a39382009-03-09 13:15:56 +0000219 return 0;
220}
221
222GLOBAL(int)
223jsimd_can_h2v1_downsample (void)
224{
225 init_simd();
226
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000227 /* The code is optimised for these values only */
228 if (BITS_IN_JSAMPLE != 8)
229 return 0;
230 if (sizeof(JDIMENSION) != 4)
231 return 0;
232
Pierre Ossmaneea72152009-03-09 13:34:17 +0000233 if (simd_support & JSIMD_SSE2)
234 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000235 if (simd_support & JSIMD_MMX)
236 return 1;
237
Pierre Ossman59a39382009-03-09 13:15:56 +0000238 return 0;
239}
240
241GLOBAL(void)
242jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
243 JSAMPARRAY input_data, JSAMPARRAY output_data)
244{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000245#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000246 if (simd_support & JSIMD_SSE2)
247 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
248 compptr->v_samp_factor, compptr->width_in_blocks,
249 input_data, output_data);
250 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000251 jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
252 compptr->v_samp_factor, compptr->width_in_blocks,
253 input_data, output_data);
254#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000255}
256
257GLOBAL(void)
258jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
259 JSAMPARRAY input_data, JSAMPARRAY output_data)
260{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000261#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000262 if (simd_support & JSIMD_SSE2)
263 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
264 compptr->v_samp_factor, compptr->width_in_blocks,
265 input_data, output_data);
266 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000267 jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
268 compptr->v_samp_factor, compptr->width_in_blocks,
269 input_data, output_data);
270#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000271}
272
273GLOBAL(int)
274jsimd_can_h2v2_upsample (void)
275{
276 init_simd();
277
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000278 /* The code is optimised for these values only */
279 if (BITS_IN_JSAMPLE != 8)
280 return 0;
281 if (sizeof(JDIMENSION) != 4)
282 return 0;
283
Pierre Ossmaneea72152009-03-09 13:34:17 +0000284 if (simd_support & JSIMD_SSE2)
285 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000286 if (simd_support & JSIMD_MMX)
287 return 1;
288
Pierre Ossman59a39382009-03-09 13:15:56 +0000289 return 0;
290}
291
292GLOBAL(int)
293jsimd_can_h2v1_upsample (void)
294{
295 init_simd();
296
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000297 /* The code is optimised for these values only */
298 if (BITS_IN_JSAMPLE != 8)
299 return 0;
300 if (sizeof(JDIMENSION) != 4)
301 return 0;
302
Pierre Ossmaneea72152009-03-09 13:34:17 +0000303 if (simd_support & JSIMD_SSE2)
304 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000305 if (simd_support & JSIMD_MMX)
306 return 1;
307
Pierre Ossman59a39382009-03-09 13:15:56 +0000308 return 0;
309}
310
311GLOBAL(void)
312jsimd_h2v2_upsample (j_decompress_ptr cinfo,
313 jpeg_component_info * compptr,
314 JSAMPARRAY input_data,
315 JSAMPARRAY * output_data_ptr)
316{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000317#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000318 if (simd_support & JSIMD_SSE2)
319 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor,
320 cinfo->output_width, input_data, output_data_ptr);
321 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000322 jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor,
323 cinfo->output_width, input_data, output_data_ptr);
324#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000325}
326
327GLOBAL(void)
328jsimd_h2v1_upsample (j_decompress_ptr cinfo,
329 jpeg_component_info * compptr,
330 JSAMPARRAY input_data,
331 JSAMPARRAY * output_data_ptr)
332{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000333#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000334 if (simd_support & JSIMD_SSE2)
335 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor,
336 cinfo->output_width, input_data, output_data_ptr);
337 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000338 jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor,
339 cinfo->output_width, input_data, output_data_ptr);
340#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000341}
342
343GLOBAL(int)
344jsimd_can_h2v2_fancy_upsample (void)
345{
346 init_simd();
347
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000348 /* The code is optimised for these values only */
349 if (BITS_IN_JSAMPLE != 8)
350 return 0;
351 if (sizeof(JDIMENSION) != 4)
352 return 0;
353
Pierre Ossmaneea72152009-03-09 13:34:17 +0000354 if ((simd_support & JSIMD_SSE2) &&
355 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
356 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000357 if (simd_support & JSIMD_MMX)
358 return 1;
359
Pierre Ossman59a39382009-03-09 13:15:56 +0000360 return 0;
361}
362
363GLOBAL(int)
364jsimd_can_h2v1_fancy_upsample (void)
365{
366 init_simd();
367
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000368 /* The code is optimised for these values only */
369 if (BITS_IN_JSAMPLE != 8)
370 return 0;
371 if (sizeof(JDIMENSION) != 4)
372 return 0;
373
Pierre Ossmaneea72152009-03-09 13:34:17 +0000374 if ((simd_support & JSIMD_SSE2) &&
375 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
376 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000377 if (simd_support & JSIMD_MMX)
378 return 1;
379
Pierre Ossman59a39382009-03-09 13:15:56 +0000380 return 0;
381}
382
383GLOBAL(void)
384jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
385 jpeg_component_info * compptr,
386 JSAMPARRAY input_data,
387 JSAMPARRAY * output_data_ptr)
388{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000389#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000390 if ((simd_support & JSIMD_SSE2) &&
391 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
392 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
393 compptr->downsampled_width, input_data, output_data_ptr);
394 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000395 jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
396 compptr->downsampled_width, input_data, output_data_ptr);
397#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000398}
399
400GLOBAL(void)
401jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
402 jpeg_component_info * compptr,
403 JSAMPARRAY input_data,
404 JSAMPARRAY * output_data_ptr)
405{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000406#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000407 if ((simd_support & JSIMD_SSE2) &&
408 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
409 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
410 compptr->downsampled_width, input_data, output_data_ptr);
411 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000412 jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor,
413 compptr->downsampled_width, input_data, output_data_ptr);
414#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000415}
416
417GLOBAL(int)
418jsimd_can_h2v2_merged_upsample (void)
419{
420 init_simd();
421
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000422 /* The code is optimised for these values only */
423 if (BITS_IN_JSAMPLE != 8)
424 return 0;
425 if (sizeof(JDIMENSION) != 4)
426 return 0;
427
Pierre Ossmaneea72152009-03-09 13:34:17 +0000428 if ((simd_support & JSIMD_SSE2) &&
429 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
430 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000431 if (simd_support & JSIMD_MMX)
432 return 1;
433
Pierre Ossman59a39382009-03-09 13:15:56 +0000434 return 0;
435}
436
437GLOBAL(int)
438jsimd_can_h2v1_merged_upsample (void)
439{
440 init_simd();
441
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000442 /* The code is optimised for these values only */
443 if (BITS_IN_JSAMPLE != 8)
444 return 0;
445 if (sizeof(JDIMENSION) != 4)
446 return 0;
447
Pierre Ossmaneea72152009-03-09 13:34:17 +0000448 if ((simd_support & JSIMD_SSE2) &&
449 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
450 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000451 if (simd_support & JSIMD_MMX)
452 return 1;
453
Pierre Ossman59a39382009-03-09 13:15:56 +0000454 return 0;
455}
456
457GLOBAL(void)
458jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
459 JSAMPIMAGE input_buf,
460 JDIMENSION in_row_group_ctr,
461 JSAMPARRAY output_buf)
462{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000463#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000464 if ((simd_support & JSIMD_SSE2) &&
465 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
466 jsimd_h2v2_merged_upsample_sse2(cinfo->output_width, input_buf,
467 in_row_group_ctr, output_buf);
468 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000469 jsimd_h2v2_merged_upsample_mmx(cinfo->output_width, input_buf,
470 in_row_group_ctr, output_buf);
471#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000472}
473
474GLOBAL(void)
475jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
476 JSAMPIMAGE input_buf,
477 JDIMENSION in_row_group_ctr,
478 JSAMPARRAY output_buf)
479{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000480#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000481 if ((simd_support & JSIMD_SSE2) &&
482 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
483 jsimd_h2v1_merged_upsample_sse2(cinfo->output_width, input_buf,
484 in_row_group_ctr, output_buf);
485 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000486 jsimd_h2v1_merged_upsample_mmx(cinfo->output_width, input_buf,
487 in_row_group_ctr, output_buf);
488#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000489}
490
491GLOBAL(int)
492jsimd_can_convsamp (void)
493{
494 init_simd();
495
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000496 /* The code is optimised for these values only */
497 if (DCTSIZE != 8)
498 return 0;
499 if (BITS_IN_JSAMPLE != 8)
500 return 0;
501 if (sizeof(JDIMENSION) != 4)
502 return 0;
503 if (sizeof(DCTELEM) != 2)
504 return 0;
505
Pierre Ossmaneea72152009-03-09 13:34:17 +0000506 if (simd_support & JSIMD_SSE2)
507 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000508 if (simd_support & JSIMD_MMX)
509 return 1;
510
Pierre Ossman59a39382009-03-09 13:15:56 +0000511 return 0;
512}
513
514GLOBAL(int)
515jsimd_can_convsamp_float (void)
516{
517 init_simd();
518
Pierre Ossman65d03172009-03-09 13:28:10 +0000519 /* The code is optimised for these values only */
520 if (DCTSIZE != 8)
521 return 0;
522 if (BITS_IN_JSAMPLE != 8)
523 return 0;
524 if (sizeof(JDIMENSION) != 4)
525 return 0;
526 if (sizeof(FAST_FLOAT) != 4)
527 return 0;
528
Pierre Ossmaneea72152009-03-09 13:34:17 +0000529 if (simd_support & JSIMD_SSE2)
530 return 1;
Pierre Ossman018fc422009-03-09 13:31:56 +0000531 if (simd_support & JSIMD_SSE)
532 return 1;
Pierre Ossman65d03172009-03-09 13:28:10 +0000533 if (simd_support & JSIMD_3DNOW)
534 return 1;
535
Pierre Ossman59a39382009-03-09 13:15:56 +0000536 return 0;
537}
538
539GLOBAL(void)
540jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
541 DCTELEM * workspace)
542{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000543#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000544 if (simd_support & JSIMD_SSE2)
545 jsimd_convsamp_sse2(sample_data, start_col, workspace);
546 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000547 jsimd_convsamp_mmx(sample_data, start_col, workspace);
548#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000549}
550
551GLOBAL(void)
552jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
553 FAST_FLOAT * workspace)
554{
Pierre Ossman65d03172009-03-09 13:28:10 +0000555#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000556 if (simd_support & JSIMD_SSE2)
557 jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
558 else if (simd_support & JSIMD_SSE)
Pierre Ossman018fc422009-03-09 13:31:56 +0000559 jsimd_convsamp_float_sse(sample_data, start_col, workspace);
560 else if (simd_support & JSIMD_3DNOW)
Pierre Ossman65d03172009-03-09 13:28:10 +0000561 jsimd_convsamp_float_3dnow(sample_data, start_col, workspace);
562#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000563}
564
565GLOBAL(int)
566jsimd_can_fdct_islow (void)
567{
568 init_simd();
569
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000570 /* The code is optimised for these values only */
571 if (DCTSIZE != 8)
572 return 0;
573 if (sizeof(DCTELEM) != 2)
574 return 0;
575
Pierre Ossmaneea72152009-03-09 13:34:17 +0000576 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
577 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000578 if (simd_support & JSIMD_MMX)
579 return 1;
580
Pierre Ossman59a39382009-03-09 13:15:56 +0000581 return 0;
582}
583
584GLOBAL(int)
585jsimd_can_fdct_ifast (void)
586{
587 init_simd();
588
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000589 /* The code is optimised for these values only */
590 if (DCTSIZE != 8)
591 return 0;
592 if (sizeof(DCTELEM) != 2)
593 return 0;
594
Pierre Ossmaneea72152009-03-09 13:34:17 +0000595 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
596 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000597 if (simd_support & JSIMD_MMX)
598 return 1;
599
Pierre Ossman59a39382009-03-09 13:15:56 +0000600 return 0;
601}
602
603GLOBAL(int)
604jsimd_can_fdct_float (void)
605{
606 init_simd();
607
Pierre Ossman65d03172009-03-09 13:28:10 +0000608 /* The code is optimised for these values only */
609 if (DCTSIZE != 8)
610 return 0;
611 if (sizeof(FAST_FLOAT) != 4)
612 return 0;
613
Pierre Ossman018fc422009-03-09 13:31:56 +0000614 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
615 return 1;
Pierre Ossman65d03172009-03-09 13:28:10 +0000616 if (simd_support & JSIMD_3DNOW)
617 return 1;
618
Pierre Ossman59a39382009-03-09 13:15:56 +0000619 return 0;
620}
621
622GLOBAL(void)
623jsimd_fdct_islow (DCTELEM * data)
624{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000625#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000626 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
627 jsimd_fdct_islow_sse2(data);
628 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000629 jsimd_fdct_islow_mmx(data);
630#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000631}
632
633GLOBAL(void)
634jsimd_fdct_ifast (DCTELEM * data)
635{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000636#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000637 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
638 jsimd_fdct_ifast_sse2(data);
639 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000640 jsimd_fdct_ifast_mmx(data);
641#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000642}
643
644GLOBAL(void)
645jsimd_fdct_float (FAST_FLOAT * data)
646{
Pierre Ossman65d03172009-03-09 13:28:10 +0000647#ifdef WITH_SIMD
Pierre Ossman018fc422009-03-09 13:31:56 +0000648 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
649 jsimd_fdct_float_sse(data);
650 else if (simd_support & JSIMD_3DNOW)
Pierre Ossman65d03172009-03-09 13:28:10 +0000651 jsimd_fdct_float_3dnow(data);
652#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000653}
654
655GLOBAL(int)
656jsimd_can_quantize (void)
657{
658 init_simd();
659
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000660 /* The code is optimised for these values only */
661 if (DCTSIZE != 8)
662 return 0;
663 if (sizeof(JCOEF) != 2)
664 return 0;
665 if (sizeof(DCTELEM) != 2)
666 return 0;
667
Pierre Ossmaneea72152009-03-09 13:34:17 +0000668 if (simd_support & JSIMD_SSE2)
669 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000670 if (simd_support & JSIMD_MMX)
671 return 1;
672
Pierre Ossman59a39382009-03-09 13:15:56 +0000673 return 0;
674}
675
676GLOBAL(int)
677jsimd_can_quantize_float (void)
678{
679 init_simd();
680
Pierre Ossman65d03172009-03-09 13:28:10 +0000681 /* The code is optimised for these values only */
682 if (DCTSIZE != 8)
683 return 0;
684 if (sizeof(JCOEF) != 2)
685 return 0;
686 if (sizeof(FAST_FLOAT) != 4)
687 return 0;
688
Pierre Ossmaneea72152009-03-09 13:34:17 +0000689 if (simd_support & JSIMD_SSE2)
690 return 1;
Pierre Ossman018fc422009-03-09 13:31:56 +0000691 if (simd_support & JSIMD_SSE)
692 return 1;
Pierre Ossman65d03172009-03-09 13:28:10 +0000693 if (simd_support & JSIMD_3DNOW)
694 return 1;
695
Pierre Ossman59a39382009-03-09 13:15:56 +0000696 return 0;
697}
698
699GLOBAL(void)
700jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
701 DCTELEM * workspace)
702{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000703#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000704 if (simd_support & JSIMD_SSE2)
705 jsimd_quantize_sse2(coef_block, divisors, workspace);
706 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000707 jsimd_quantize_mmx(coef_block, divisors, workspace);
708#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000709}
710
711GLOBAL(void)
712jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
713 FAST_FLOAT * workspace)
714{
Pierre Ossman65d03172009-03-09 13:28:10 +0000715#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000716 if (simd_support & JSIMD_SSE2)
717 jsimd_quantize_float_sse2(coef_block, divisors, workspace);
718 else if (simd_support & JSIMD_SSE)
Pierre Ossman018fc422009-03-09 13:31:56 +0000719 jsimd_quantize_float_sse(coef_block, divisors, workspace);
720 else if (simd_support & JSIMD_3DNOW)
Pierre Ossman65d03172009-03-09 13:28:10 +0000721 jsimd_quantize_float_3dnow(coef_block, divisors, workspace);
722#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000723}
724
725GLOBAL(int)
726jsimd_can_idct_2x2 (void)
727{
728 init_simd();
729
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000730 /* The code is optimised for these values only */
731 if (DCTSIZE != 8)
732 return 0;
733 if (sizeof(JCOEF) != 2)
734 return 0;
735 if (BITS_IN_JSAMPLE != 8)
736 return 0;
737 if (sizeof(JDIMENSION) != 4)
738 return 0;
739 if (sizeof(ISLOW_MULT_TYPE) != 2)
740 return 0;
741
Pierre Ossmaneea72152009-03-09 13:34:17 +0000742 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
743 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000744 if (simd_support & JSIMD_MMX)
745 return 1;
746
Pierre Ossman59a39382009-03-09 13:15:56 +0000747 return 0;
748}
749
750GLOBAL(int)
751jsimd_can_idct_4x4 (void)
752{
753 init_simd();
754
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000755 /* The code is optimised for these values only */
756 if (DCTSIZE != 8)
757 return 0;
758 if (sizeof(JCOEF) != 2)
759 return 0;
760 if (BITS_IN_JSAMPLE != 8)
761 return 0;
762 if (sizeof(JDIMENSION) != 4)
763 return 0;
764 if (sizeof(ISLOW_MULT_TYPE) != 2)
765 return 0;
766
Pierre Ossmaneea72152009-03-09 13:34:17 +0000767 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
768 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000769 if (simd_support & JSIMD_MMX)
770 return 1;
771
Pierre Ossman59a39382009-03-09 13:15:56 +0000772 return 0;
773}
774
775GLOBAL(void)
776jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
777 JCOEFPTR coef_block, JSAMPARRAY output_buf,
778 JDIMENSION output_col)
779{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000780#if WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000781 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
782 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
783 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000784 jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col);
785#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000786}
787
788GLOBAL(void)
789jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
790 JCOEFPTR coef_block, JSAMPARRAY output_buf,
791 JDIMENSION output_col)
792{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000793#if WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000794 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
795 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
796 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000797 jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col);
798#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000799}
800
801GLOBAL(int)
802jsimd_can_idct_islow (void)
803{
804 init_simd();
805
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000806 /* The code is optimised for these values only */
807 if (DCTSIZE != 8)
808 return 0;
809 if (sizeof(JCOEF) != 2)
810 return 0;
811 if (BITS_IN_JSAMPLE != 8)
812 return 0;
813 if (sizeof(JDIMENSION) != 4)
814 return 0;
815 if (sizeof(ISLOW_MULT_TYPE) != 2)
816 return 0;
817
Pierre Ossmaneea72152009-03-09 13:34:17 +0000818 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
819 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000820 if (simd_support & JSIMD_MMX)
821 return 1;
822
Pierre Ossman59a39382009-03-09 13:15:56 +0000823 return 0;
824}
825
826GLOBAL(int)
827jsimd_can_idct_ifast (void)
828{
829 init_simd();
830
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000831 /* The code is optimised for these values only */
832 if (DCTSIZE != 8)
833 return 0;
834 if (sizeof(JCOEF) != 2)
835 return 0;
836 if (BITS_IN_JSAMPLE != 8)
837 return 0;
838 if (sizeof(JDIMENSION) != 4)
839 return 0;
840 if (sizeof(IFAST_MULT_TYPE) != 2)
841 return 0;
842 if (IFAST_SCALE_BITS != 2)
843 return 0;
844
Pierre Ossmaneea72152009-03-09 13:34:17 +0000845 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
846 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000847 if (simd_support & JSIMD_MMX)
848 return 1;
849
Pierre Ossman59a39382009-03-09 13:15:56 +0000850 return 0;
851}
852
853GLOBAL(int)
854jsimd_can_idct_float (void)
855{
856 init_simd();
857
Pierre Ossman65d03172009-03-09 13:28:10 +0000858 if (DCTSIZE != 8)
859 return 0;
860 if (sizeof(JCOEF) != 2)
861 return 0;
862 if (BITS_IN_JSAMPLE != 8)
863 return 0;
864 if (sizeof(JDIMENSION) != 4)
865 return 0;
866 if (sizeof(FAST_FLOAT) != 4)
867 return 0;
868 if (sizeof(FLOAT_MULT_TYPE) != 4)
869 return 0;
870
Pierre Ossmaneea72152009-03-09 13:34:17 +0000871 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
872 return 1;
Pierre Ossman018fc422009-03-09 13:31:56 +0000873 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
874 return 1;
Pierre Ossman65d03172009-03-09 13:28:10 +0000875 if (simd_support & JSIMD_3DNOW)
876 return 1;
877
Pierre Ossman59a39382009-03-09 13:15:56 +0000878 return 0;
879}
880
881GLOBAL(void)
882jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
883 JCOEFPTR coef_block, JSAMPARRAY output_buf,
884 JDIMENSION output_col)
885{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000886#if WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000887 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
888 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, output_col);
889 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000890 jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf, output_col);
891#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000892}
893
894GLOBAL(void)
895jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
896 JCOEFPTR coef_block, JSAMPARRAY output_buf,
897 JDIMENSION output_col)
898{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000899#if WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000900 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
901 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, output_col);
902 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000903 jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf, output_col);
904#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000905}
906
907GLOBAL(void)
908jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
909 JCOEFPTR coef_block, JSAMPARRAY output_buf,
910 JDIMENSION output_col)
911{
Pierre Ossman65d03172009-03-09 13:28:10 +0000912#if WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000913 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
914 jsimd_idct_float_sse2(compptr->dct_table, coef_block,
915 output_buf, output_col);
916 else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
Pierre Ossman018fc422009-03-09 13:31:56 +0000917 jsimd_idct_float_sse(compptr->dct_table, coef_block,
918 output_buf, output_col);
919 else if (simd_support & JSIMD_3DNOW)
Pierre Ossman65d03172009-03-09 13:28:10 +0000920 jsimd_idct_float_3dnow(compptr->dct_table, coef_block,
921 output_buf, output_col);
922#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000923}
924