blob: 20e83fc59da788577b0769d413001a09d4e7f350 [file] [log] [blame]
Pierre Ossman59a39382009-03-09 13:15:56 +00001/*
2 * jsimd.c
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
DRCf25c0712009-04-03 12:00:51 +00005 * Copyright 2009 D. R. Commander
Pierre Ossman59a39382009-03-09 13:15:56 +00006 *
7 * Based on the x86 SIMD extension for IJG JPEG library,
8 * Copyright (C) 1999-2006, MIYASAKA Masaru.
9 *
10 * This file contains the interface between the "normal" portions
11 * of the library and the SIMD implementations.
12 */
13
14#define JPEG_INTERNALS
15#include "jinclude.h"
16#include "jpeglib.h"
Pierre Ossman2ae181c2009-03-09 13:21:27 +000017#include "jsimd.h"
Pierre Ossman59a39382009-03-09 13:15:56 +000018#include "jdct.h"
Pierre Ossman2ae181c2009-03-09 13:21:27 +000019#include "jsimddct.h"
20#include "simd/jsimd.h"
Pierre Ossman59a39382009-03-09 13:15:56 +000021
Pierre Ossman018fc422009-03-09 13:31:56 +000022/*
23 * In the PIC cases, we have no guarantee that constants will keep
24 * their alignment. This macro allows us to verify it at runtime.
25 */
26#ifdef WITH_SIMD
27#define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0)
28#else
29#define IS_ALIGNED(ptr, order) (0)
30#endif
31
32#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
33
Pierre Ossman59a39382009-03-09 13:15:56 +000034static unsigned int simd_support = ~0;
35
36/*
37 * Check what SIMD accelerations are supported.
38 *
39 * FIXME: This code is racy under a multi-threaded environment.
40 */
41LOCAL(void)
42init_simd (void)
43{
DRC59c1a252009-04-03 11:27:17 +000044#ifdef WITH_SIMD
45 char *env = NULL;
46#endif
Pierre Ossman59a39382009-03-09 13:15:56 +000047 if (simd_support != ~0)
48 return;
49
Pierre Ossman2ae181c2009-03-09 13:21:27 +000050#ifdef WITH_SIMD
51 simd_support = jpeg_simd_cpu_support();
DRCcdc8ac32009-06-25 20:38:31 +000052 #ifndef __x86_64__
DRC59c1a252009-04-03 11:27:17 +000053 if((env=getenv("JSIMD_FORCEMMX"))!=NULL && !strcmp(env, "1"))
54 simd_support = JSIMD_MMX;
55 else if((env=getenv("JSIMD_FORCESSE2"))!=NULL && !strcmp(env, "1"))
56 simd_support = JSIMD_SSE2;
DRCcdc8ac32009-06-25 20:38:31 +000057 #endif
Pierre Ossman2ae181c2009-03-09 13:21:27 +000058#else
Pierre Ossman59a39382009-03-09 13:15:56 +000059 simd_support = JSIMD_NONE;
Pierre Ossman2ae181c2009-03-09 13:21:27 +000060#endif
Pierre Ossman59a39382009-03-09 13:15:56 +000061}
62
63GLOBAL(int)
64jsimd_can_rgb_ycc (void)
65{
66 init_simd();
67
Pierre Ossman5eb84ff2009-03-09 13:25:30 +000068 /* The code is optimised for these values only */
69 if (BITS_IN_JSAMPLE != 8)
70 return 0;
71 if (sizeof(JDIMENSION) != 4)
72 return 0;
73 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
74 return 0;
75
Pierre Ossmaneea72152009-03-09 13:34:17 +000076 if ((simd_support & JSIMD_SSE2) &&
77 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
78 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +000079 if (simd_support & JSIMD_MMX)
80 return 1;
81
Pierre Ossman59a39382009-03-09 13:15:56 +000082 return 0;
83}
84
85GLOBAL(int)
86jsimd_can_ycc_rgb (void)
87{
88 init_simd();
89
Pierre Ossman5eb84ff2009-03-09 13:25:30 +000090 /* The code is optimised for these values only */
91 if (BITS_IN_JSAMPLE != 8)
92 return 0;
93 if (sizeof(JDIMENSION) != 4)
94 return 0;
95 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
96 return 0;
97
Pierre Ossmaneea72152009-03-09 13:34:17 +000098 if ((simd_support & JSIMD_SSE2) &&
99 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
100 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000101 if (simd_support & JSIMD_MMX)
102 return 1;
103
Pierre Ossman59a39382009-03-09 13:15:56 +0000104 return 0;
105}
106
107GLOBAL(void)
108jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
109 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
110 JDIMENSION output_row, int num_rows)
111{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000112#ifdef WITH_SIMD
DRCf25c0712009-04-03 12:00:51 +0000113 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
DRCcdc8ac32009-06-25 20:38:31 +0000114 #ifndef __x86_64__
DRCf25c0712009-04-03 12:00:51 +0000115 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
DRCcdc8ac32009-06-25 20:38:31 +0000116 #endif
DRCf25c0712009-04-03 12:00:51 +0000117 switch(cinfo->in_color_space)
118 {
119 case JCS_EXT_RGB:
120 sse2fct=jsimd_extrgb_ycc_convert_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000121 #ifndef __x86_64__
DRCf25c0712009-04-03 12:00:51 +0000122 mmxfct=jsimd_extrgb_ycc_convert_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000123 #endif
DRCf25c0712009-04-03 12:00:51 +0000124 break;
125 case JCS_EXT_RGBX:
126 sse2fct=jsimd_extrgbx_ycc_convert_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000127 #ifndef __x86_64__
DRCf25c0712009-04-03 12:00:51 +0000128 mmxfct=jsimd_extrgbx_ycc_convert_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000129 #endif
DRCf25c0712009-04-03 12:00:51 +0000130 break;
131 case JCS_EXT_BGR:
132 sse2fct=jsimd_extbgr_ycc_convert_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000133 #ifndef __x86_64__
DRCf25c0712009-04-03 12:00:51 +0000134 mmxfct=jsimd_extbgr_ycc_convert_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000135 #endif
DRCf25c0712009-04-03 12:00:51 +0000136 break;
137 case JCS_EXT_BGRX:
138 sse2fct=jsimd_extbgrx_ycc_convert_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000139 #ifndef __x86_64__
DRCf25c0712009-04-03 12:00:51 +0000140 mmxfct=jsimd_extbgrx_ycc_convert_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000141 #endif
DRCf25c0712009-04-03 12:00:51 +0000142 break;
143 case JCS_EXT_XBGR:
144 sse2fct=jsimd_extxbgr_ycc_convert_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000145 #ifndef __x86_64__
DRCf25c0712009-04-03 12:00:51 +0000146 mmxfct=jsimd_extxbgr_ycc_convert_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000147 #endif
DRCf25c0712009-04-03 12:00:51 +0000148 break;
149 case JCS_EXT_XRGB:
150 sse2fct=jsimd_extxrgb_ycc_convert_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000151 #ifndef __x86_64__
DRCf25c0712009-04-03 12:00:51 +0000152 mmxfct=jsimd_extxrgb_ycc_convert_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000153 #endif
DRCf25c0712009-04-03 12:00:51 +0000154 break;
155 default:
156 sse2fct=jsimd_rgb_ycc_convert_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000157 #ifndef __x86_64__
DRCf25c0712009-04-03 12:00:51 +0000158 mmxfct=jsimd_rgb_ycc_convert_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000159 #endif
DRCf25c0712009-04-03 12:00:51 +0000160 break;
161 }
Pierre Ossmaneea72152009-03-09 13:34:17 +0000162 if ((simd_support & JSIMD_SSE2) &&
163 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
DRCf25c0712009-04-03 12:00:51 +0000164 sse2fct(cinfo->image_width, input_buf,
Pierre Ossmaneea72152009-03-09 13:34:17 +0000165 output_buf, output_row, num_rows);
DRCcdc8ac32009-06-25 20:38:31 +0000166 #ifndef __x86_64__
Pierre Ossmaneea72152009-03-09 13:34:17 +0000167 else if (simd_support & JSIMD_MMX)
DRCf25c0712009-04-03 12:00:51 +0000168 mmxfct(cinfo->image_width, input_buf,
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000169 output_buf, output_row, num_rows);
DRCcdc8ac32009-06-25 20:38:31 +0000170 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000171#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000172}
173
174GLOBAL(void)
175jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
176 JSAMPIMAGE input_buf, JDIMENSION input_row,
177 JSAMPARRAY output_buf, int num_rows)
178{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000179#ifdef WITH_SIMD
DRCf25c0712009-04-03 12:00:51 +0000180 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
DRCcdc8ac32009-06-25 20:38:31 +0000181 #ifndef __x86_64__
DRCf25c0712009-04-03 12:00:51 +0000182 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
DRCcdc8ac32009-06-25 20:38:31 +0000183 #endif
DRCf25c0712009-04-03 12:00:51 +0000184 switch(cinfo->out_color_space)
185 {
186 case JCS_EXT_RGB:
187 sse2fct=jsimd_ycc_extrgb_convert_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000188 #ifndef __x86_64__
DRCf25c0712009-04-03 12:00:51 +0000189 mmxfct=jsimd_ycc_extrgb_convert_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000190 #endif
DRCf25c0712009-04-03 12:00:51 +0000191 break;
192 case JCS_EXT_RGBX:
193 sse2fct=jsimd_ycc_extrgbx_convert_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000194 #ifndef __x86_64__
DRCf25c0712009-04-03 12:00:51 +0000195 mmxfct=jsimd_ycc_extrgbx_convert_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000196 #endif
DRCf25c0712009-04-03 12:00:51 +0000197 break;
198 case JCS_EXT_BGR:
199 sse2fct=jsimd_ycc_extbgr_convert_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000200 #ifndef __x86_64__
DRCf25c0712009-04-03 12:00:51 +0000201 mmxfct=jsimd_ycc_extbgr_convert_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000202 #endif
DRCf25c0712009-04-03 12:00:51 +0000203 break;
204 case JCS_EXT_BGRX:
205 sse2fct=jsimd_ycc_extbgrx_convert_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000206 #ifndef __x86_64__
DRCf25c0712009-04-03 12:00:51 +0000207 mmxfct=jsimd_ycc_extbgrx_convert_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000208 #endif
DRCf25c0712009-04-03 12:00:51 +0000209 break;
210 case JCS_EXT_XBGR:
211 sse2fct=jsimd_ycc_extxbgr_convert_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000212 #ifndef __x86_64__
DRCf25c0712009-04-03 12:00:51 +0000213 mmxfct=jsimd_ycc_extxbgr_convert_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000214 #endif
DRCf25c0712009-04-03 12:00:51 +0000215 break;
216 case JCS_EXT_XRGB:
217 sse2fct=jsimd_ycc_extxrgb_convert_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000218 #ifndef __x86_64__
DRCf25c0712009-04-03 12:00:51 +0000219 mmxfct=jsimd_ycc_extxrgb_convert_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000220 #endif
DRCf25c0712009-04-03 12:00:51 +0000221 break;
222 default:
223 sse2fct=jsimd_ycc_rgb_convert_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000224 #ifndef __x86_64__
DRCf25c0712009-04-03 12:00:51 +0000225 mmxfct=jsimd_ycc_rgb_convert_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000226 #endif
DRCf25c0712009-04-03 12:00:51 +0000227 break;
228 }
Pierre Ossmaneea72152009-03-09 13:34:17 +0000229 if ((simd_support & JSIMD_SSE2) &&
230 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
DRCf25c0712009-04-03 12:00:51 +0000231 sse2fct(cinfo->output_width, input_buf,
Pierre Ossmaneea72152009-03-09 13:34:17 +0000232 input_row, output_buf, num_rows);
DRCcdc8ac32009-06-25 20:38:31 +0000233 #ifndef __x86_64__
Pierre Ossmaneea72152009-03-09 13:34:17 +0000234 else if (simd_support & JSIMD_MMX)
DRCf25c0712009-04-03 12:00:51 +0000235 mmxfct(cinfo->output_width, input_buf,
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000236 input_row, output_buf, num_rows);
DRCcdc8ac32009-06-25 20:38:31 +0000237 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000238#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000239}
240
241GLOBAL(int)
242jsimd_can_h2v2_downsample (void)
243{
244 init_simd();
245
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000246 /* The code is optimised for these values only */
247 if (BITS_IN_JSAMPLE != 8)
248 return 0;
249 if (sizeof(JDIMENSION) != 4)
250 return 0;
251
Pierre Ossmaneea72152009-03-09 13:34:17 +0000252 if (simd_support & JSIMD_SSE2)
253 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000254 #ifndef __x86_64__
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000255 if (simd_support & JSIMD_MMX)
256 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000257 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000258
Pierre Ossman59a39382009-03-09 13:15:56 +0000259 return 0;
260}
261
262GLOBAL(int)
263jsimd_can_h2v1_downsample (void)
264{
265 init_simd();
266
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000267 /* The code is optimised for these values only */
268 if (BITS_IN_JSAMPLE != 8)
269 return 0;
270 if (sizeof(JDIMENSION) != 4)
271 return 0;
272
Pierre Ossmaneea72152009-03-09 13:34:17 +0000273 if (simd_support & JSIMD_SSE2)
274 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000275 #ifndef __x86_64__
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000276 if (simd_support & JSIMD_MMX)
277 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000278 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000279
Pierre Ossman59a39382009-03-09 13:15:56 +0000280 return 0;
281}
282
283GLOBAL(void)
284jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
285 JSAMPARRAY input_data, JSAMPARRAY output_data)
286{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000287#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000288 if (simd_support & JSIMD_SSE2)
289 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
290 compptr->v_samp_factor, compptr->width_in_blocks,
291 input_data, output_data);
DRCcdc8ac32009-06-25 20:38:31 +0000292 #ifndef __x86_64__
Pierre Ossmaneea72152009-03-09 13:34:17 +0000293 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000294 jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
295 compptr->v_samp_factor, compptr->width_in_blocks,
296 input_data, output_data);
DRCcdc8ac32009-06-25 20:38:31 +0000297 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000298#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000299}
300
301GLOBAL(void)
302jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
303 JSAMPARRAY input_data, JSAMPARRAY output_data)
304{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000305#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000306 if (simd_support & JSIMD_SSE2)
307 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
308 compptr->v_samp_factor, compptr->width_in_blocks,
309 input_data, output_data);
DRCcdc8ac32009-06-25 20:38:31 +0000310 #ifndef __x86_64__
Pierre Ossmaneea72152009-03-09 13:34:17 +0000311 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000312 jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
313 compptr->v_samp_factor, compptr->width_in_blocks,
314 input_data, output_data);
DRCcdc8ac32009-06-25 20:38:31 +0000315 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000316#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000317}
318
319GLOBAL(int)
320jsimd_can_h2v2_upsample (void)
321{
322 init_simd();
323
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000324 /* The code is optimised for these values only */
325 if (BITS_IN_JSAMPLE != 8)
326 return 0;
327 if (sizeof(JDIMENSION) != 4)
328 return 0;
329
Pierre Ossmaneea72152009-03-09 13:34:17 +0000330 if (simd_support & JSIMD_SSE2)
331 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000332 #ifndef __x86_64__
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000333 if (simd_support & JSIMD_MMX)
334 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000335 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000336
Pierre Ossman59a39382009-03-09 13:15:56 +0000337 return 0;
338}
339
340GLOBAL(int)
341jsimd_can_h2v1_upsample (void)
342{
343 init_simd();
344
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000345 /* The code is optimised for these values only */
346 if (BITS_IN_JSAMPLE != 8)
347 return 0;
348 if (sizeof(JDIMENSION) != 4)
349 return 0;
350
Pierre Ossmaneea72152009-03-09 13:34:17 +0000351 if (simd_support & JSIMD_SSE2)
352 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000353 #ifndef __x86_64__
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000354 if (simd_support & JSIMD_MMX)
355 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000356 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000357
Pierre Ossman59a39382009-03-09 13:15:56 +0000358 return 0;
359}
360
361GLOBAL(void)
362jsimd_h2v2_upsample (j_decompress_ptr cinfo,
363 jpeg_component_info * compptr,
364 JSAMPARRAY input_data,
365 JSAMPARRAY * output_data_ptr)
366{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000367#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000368 if (simd_support & JSIMD_SSE2)
369 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor,
370 cinfo->output_width, input_data, output_data_ptr);
DRCcdc8ac32009-06-25 20:38:31 +0000371 #ifndef __x86_64__
Pierre Ossmaneea72152009-03-09 13:34:17 +0000372 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000373 jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor,
374 cinfo->output_width, input_data, output_data_ptr);
DRCcdc8ac32009-06-25 20:38:31 +0000375 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000376#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000377}
378
379GLOBAL(void)
380jsimd_h2v1_upsample (j_decompress_ptr cinfo,
381 jpeg_component_info * compptr,
382 JSAMPARRAY input_data,
383 JSAMPARRAY * output_data_ptr)
384{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000385#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000386 if (simd_support & JSIMD_SSE2)
387 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor,
388 cinfo->output_width, input_data, output_data_ptr);
DRCcdc8ac32009-06-25 20:38:31 +0000389 #ifndef __x86_64__
Pierre Ossmaneea72152009-03-09 13:34:17 +0000390 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000391 jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor,
392 cinfo->output_width, input_data, output_data_ptr);
DRCcdc8ac32009-06-25 20:38:31 +0000393 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000394#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000395}
396
397GLOBAL(int)
398jsimd_can_h2v2_fancy_upsample (void)
399{
400 init_simd();
401
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000402 /* The code is optimised for these values only */
403 if (BITS_IN_JSAMPLE != 8)
404 return 0;
405 if (sizeof(JDIMENSION) != 4)
406 return 0;
407
Pierre Ossmaneea72152009-03-09 13:34:17 +0000408 if ((simd_support & JSIMD_SSE2) &&
409 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
410 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000411 #ifndef __x86_64__
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000412 if (simd_support & JSIMD_MMX)
413 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000414 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000415
Pierre Ossman59a39382009-03-09 13:15:56 +0000416 return 0;
417}
418
419GLOBAL(int)
420jsimd_can_h2v1_fancy_upsample (void)
421{
422 init_simd();
423
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000424 /* The code is optimised for these values only */
425 if (BITS_IN_JSAMPLE != 8)
426 return 0;
427 if (sizeof(JDIMENSION) != 4)
428 return 0;
429
Pierre Ossmaneea72152009-03-09 13:34:17 +0000430 if ((simd_support & JSIMD_SSE2) &&
431 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
432 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000433 #ifndef __x86_64__
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000434 if (simd_support & JSIMD_MMX)
435 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000436 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000437
Pierre Ossman59a39382009-03-09 13:15:56 +0000438 return 0;
439}
440
441GLOBAL(void)
442jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
443 jpeg_component_info * compptr,
444 JSAMPARRAY input_data,
445 JSAMPARRAY * output_data_ptr)
446{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000447#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000448 if ((simd_support & JSIMD_SSE2) &&
449 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
450 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
451 compptr->downsampled_width, input_data, output_data_ptr);
DRCcdc8ac32009-06-25 20:38:31 +0000452 #ifndef __x86_64__
Pierre Ossmaneea72152009-03-09 13:34:17 +0000453 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000454 jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
455 compptr->downsampled_width, input_data, output_data_ptr);
DRCcdc8ac32009-06-25 20:38:31 +0000456 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000457#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000458}
459
460GLOBAL(void)
461jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
462 jpeg_component_info * compptr,
463 JSAMPARRAY input_data,
464 JSAMPARRAY * output_data_ptr)
465{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000466#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000467 if ((simd_support & JSIMD_SSE2) &&
468 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
469 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
470 compptr->downsampled_width, input_data, output_data_ptr);
DRCcdc8ac32009-06-25 20:38:31 +0000471 #ifndef __x86_64__
Pierre Ossmaneea72152009-03-09 13:34:17 +0000472 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000473 jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor,
474 compptr->downsampled_width, input_data, output_data_ptr);
DRCcdc8ac32009-06-25 20:38:31 +0000475 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000476#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000477}
478
479GLOBAL(int)
480jsimd_can_h2v2_merged_upsample (void)
481{
482 init_simd();
483
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000484 /* The code is optimised for these values only */
485 if (BITS_IN_JSAMPLE != 8)
486 return 0;
487 if (sizeof(JDIMENSION) != 4)
488 return 0;
489
Pierre Ossmaneea72152009-03-09 13:34:17 +0000490 if ((simd_support & JSIMD_SSE2) &&
491 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
492 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000493 #ifndef __x86_64__
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000494 if (simd_support & JSIMD_MMX)
495 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000496 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000497
Pierre Ossman59a39382009-03-09 13:15:56 +0000498 return 0;
499}
500
501GLOBAL(int)
502jsimd_can_h2v1_merged_upsample (void)
503{
504 init_simd();
505
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000506 /* The code is optimised for these values only */
507 if (BITS_IN_JSAMPLE != 8)
508 return 0;
509 if (sizeof(JDIMENSION) != 4)
510 return 0;
511
Pierre Ossmaneea72152009-03-09 13:34:17 +0000512 if ((simd_support & JSIMD_SSE2) &&
513 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
514 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000515 #ifndef __x86_64__
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000516 if (simd_support & JSIMD_MMX)
517 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000518 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000519
Pierre Ossman59a39382009-03-09 13:15:56 +0000520 return 0;
521}
522
523GLOBAL(void)
524jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
525 JSAMPIMAGE input_buf,
526 JDIMENSION in_row_group_ctr,
527 JSAMPARRAY output_buf)
528{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000529#ifdef WITH_SIMD
DRC720e1612009-04-05 21:51:25 +0000530 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
DRCcdc8ac32009-06-25 20:38:31 +0000531 #ifndef __x86_64__
DRC720e1612009-04-05 21:51:25 +0000532 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
DRCcdc8ac32009-06-25 20:38:31 +0000533 #endif
DRC720e1612009-04-05 21:51:25 +0000534 switch(cinfo->out_color_space)
535 {
536 case JCS_EXT_RGB:
537 sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000538 #ifndef __x86_64__
DRC720e1612009-04-05 21:51:25 +0000539 mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000540 #endif
DRC720e1612009-04-05 21:51:25 +0000541 break;
542 case JCS_EXT_RGBX:
543 sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000544 #ifndef __x86_64__
DRC720e1612009-04-05 21:51:25 +0000545 mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000546 #endif
DRC720e1612009-04-05 21:51:25 +0000547 break;
548 case JCS_EXT_BGR:
549 sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000550 #ifndef __x86_64__
DRC720e1612009-04-05 21:51:25 +0000551 mmxfct=jsimd_h2v2_extbgr_merged_upsample_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000552 #endif
DRC720e1612009-04-05 21:51:25 +0000553 break;
554 case JCS_EXT_BGRX:
555 sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000556 #ifndef __x86_64__
DRC720e1612009-04-05 21:51:25 +0000557 mmxfct=jsimd_h2v2_extbgrx_merged_upsample_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000558 #endif
DRC720e1612009-04-05 21:51:25 +0000559 break;
560 case JCS_EXT_XBGR:
561 sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000562 #ifndef __x86_64__
DRC720e1612009-04-05 21:51:25 +0000563 mmxfct=jsimd_h2v2_extxbgr_merged_upsample_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000564 #endif
DRC720e1612009-04-05 21:51:25 +0000565 break;
566 case JCS_EXT_XRGB:
567 sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000568 #ifndef __x86_64__
DRC720e1612009-04-05 21:51:25 +0000569 mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000570 #endif
DRC720e1612009-04-05 21:51:25 +0000571 break;
572 default:
573 sse2fct=jsimd_h2v2_merged_upsample_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000574 #ifndef __x86_64__
DRC720e1612009-04-05 21:51:25 +0000575 mmxfct=jsimd_h2v2_merged_upsample_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000576 #endif
DRC720e1612009-04-05 21:51:25 +0000577 break;
578 }
Pierre Ossmaneea72152009-03-09 13:34:17 +0000579 if ((simd_support & JSIMD_SSE2) &&
580 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
DRC720e1612009-04-05 21:51:25 +0000581 sse2fct(cinfo->output_width, input_buf,
Pierre Ossmaneea72152009-03-09 13:34:17 +0000582 in_row_group_ctr, output_buf);
DRCcdc8ac32009-06-25 20:38:31 +0000583 #ifndef __x86_64__
Pierre Ossmaneea72152009-03-09 13:34:17 +0000584 else if (simd_support & JSIMD_MMX)
DRC720e1612009-04-05 21:51:25 +0000585 mmxfct(cinfo->output_width, input_buf,
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000586 in_row_group_ctr, output_buf);
DRCcdc8ac32009-06-25 20:38:31 +0000587 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000588#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000589}
590
591GLOBAL(void)
592jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
593 JSAMPIMAGE input_buf,
594 JDIMENSION in_row_group_ctr,
595 JSAMPARRAY output_buf)
596{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000597#ifdef WITH_SIMD
DRC720e1612009-04-05 21:51:25 +0000598 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
DRCcdc8ac32009-06-25 20:38:31 +0000599 #ifndef __x86_64__
DRC720e1612009-04-05 21:51:25 +0000600 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
DRCcdc8ac32009-06-25 20:38:31 +0000601 #endif
DRC720e1612009-04-05 21:51:25 +0000602 switch(cinfo->out_color_space)
603 {
604 case JCS_EXT_RGB:
605 sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000606 #ifndef __x86_64__
DRC720e1612009-04-05 21:51:25 +0000607 mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000608 #endif
DRC720e1612009-04-05 21:51:25 +0000609 break;
610 case JCS_EXT_RGBX:
611 sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000612 #ifndef __x86_64__
DRC720e1612009-04-05 21:51:25 +0000613 mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000614 #endif
DRC720e1612009-04-05 21:51:25 +0000615 break;
616 case JCS_EXT_BGR:
617 sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000618 #ifndef __x86_64__
DRC720e1612009-04-05 21:51:25 +0000619 mmxfct=jsimd_h2v1_extbgr_merged_upsample_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000620 #endif
DRC720e1612009-04-05 21:51:25 +0000621 break;
622 case JCS_EXT_BGRX:
623 sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000624 #ifndef __x86_64__
DRC720e1612009-04-05 21:51:25 +0000625 mmxfct=jsimd_h2v1_extbgrx_merged_upsample_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000626 #endif
DRC720e1612009-04-05 21:51:25 +0000627 break;
628 case JCS_EXT_XBGR:
629 sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000630 #ifndef __x86_64__
DRC720e1612009-04-05 21:51:25 +0000631 mmxfct=jsimd_h2v1_extxbgr_merged_upsample_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000632 #endif
DRC720e1612009-04-05 21:51:25 +0000633 break;
634 case JCS_EXT_XRGB:
635 sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000636 #ifndef __x86_64__
DRC720e1612009-04-05 21:51:25 +0000637 mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000638 #endif
DRC720e1612009-04-05 21:51:25 +0000639 break;
640 default:
641 sse2fct=jsimd_h2v1_merged_upsample_sse2;
DRCcdc8ac32009-06-25 20:38:31 +0000642 #ifndef __x86_64__
DRC720e1612009-04-05 21:51:25 +0000643 mmxfct=jsimd_h2v1_merged_upsample_mmx;
DRCcdc8ac32009-06-25 20:38:31 +0000644 #endif
DRC720e1612009-04-05 21:51:25 +0000645 break;
646 }
Pierre Ossmaneea72152009-03-09 13:34:17 +0000647 if ((simd_support & JSIMD_SSE2) &&
648 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
DRC720e1612009-04-05 21:51:25 +0000649 sse2fct(cinfo->output_width, input_buf,
Pierre Ossmaneea72152009-03-09 13:34:17 +0000650 in_row_group_ctr, output_buf);
DRCcdc8ac32009-06-25 20:38:31 +0000651 #ifndef __x86_64__
Pierre Ossmaneea72152009-03-09 13:34:17 +0000652 else if (simd_support & JSIMD_MMX)
DRC720e1612009-04-05 21:51:25 +0000653 mmxfct(cinfo->output_width, input_buf,
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000654 in_row_group_ctr, output_buf);
DRCcdc8ac32009-06-25 20:38:31 +0000655 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000656#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000657}
658
659GLOBAL(int)
660jsimd_can_convsamp (void)
661{
662 init_simd();
663
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000664 /* The code is optimised for these values only */
665 if (DCTSIZE != 8)
666 return 0;
667 if (BITS_IN_JSAMPLE != 8)
668 return 0;
669 if (sizeof(JDIMENSION) != 4)
670 return 0;
671 if (sizeof(DCTELEM) != 2)
672 return 0;
673
Pierre Ossmaneea72152009-03-09 13:34:17 +0000674 if (simd_support & JSIMD_SSE2)
675 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000676 #ifndef __x86_64__
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000677 if (simd_support & JSIMD_MMX)
678 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000679 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000680
Pierre Ossman59a39382009-03-09 13:15:56 +0000681 return 0;
682}
683
684GLOBAL(int)
685jsimd_can_convsamp_float (void)
686{
687 init_simd();
688
Pierre Ossman65d03172009-03-09 13:28:10 +0000689 /* The code is optimised for these values only */
690 if (DCTSIZE != 8)
691 return 0;
692 if (BITS_IN_JSAMPLE != 8)
693 return 0;
694 if (sizeof(JDIMENSION) != 4)
695 return 0;
696 if (sizeof(FAST_FLOAT) != 4)
697 return 0;
698
Pierre Ossmaneea72152009-03-09 13:34:17 +0000699 if (simd_support & JSIMD_SSE2)
700 return 1;
Pierre Ossman018fc422009-03-09 13:31:56 +0000701 if (simd_support & JSIMD_SSE)
702 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000703 #ifndef __x86_64__
Pierre Ossman65d03172009-03-09 13:28:10 +0000704 if (simd_support & JSIMD_3DNOW)
705 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000706 #endif
Pierre Ossman65d03172009-03-09 13:28:10 +0000707
Pierre Ossman59a39382009-03-09 13:15:56 +0000708 return 0;
709}
710
711GLOBAL(void)
712jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
713 DCTELEM * workspace)
714{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000715#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000716 if (simd_support & JSIMD_SSE2)
717 jsimd_convsamp_sse2(sample_data, start_col, workspace);
DRCcdc8ac32009-06-25 20:38:31 +0000718 #ifndef __x86_64__
Pierre Ossmaneea72152009-03-09 13:34:17 +0000719 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000720 jsimd_convsamp_mmx(sample_data, start_col, workspace);
DRCcdc8ac32009-06-25 20:38:31 +0000721 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000722#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000723}
724
725GLOBAL(void)
726jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
727 FAST_FLOAT * workspace)
728{
Pierre Ossman65d03172009-03-09 13:28:10 +0000729#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000730 if (simd_support & JSIMD_SSE2)
731 jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
DRCcdc8ac32009-06-25 20:38:31 +0000732 #ifndef __x86_64__
Pierre Ossmaneea72152009-03-09 13:34:17 +0000733 else if (simd_support & JSIMD_SSE)
Pierre Ossman018fc422009-03-09 13:31:56 +0000734 jsimd_convsamp_float_sse(sample_data, start_col, workspace);
735 else if (simd_support & JSIMD_3DNOW)
Pierre Ossman65d03172009-03-09 13:28:10 +0000736 jsimd_convsamp_float_3dnow(sample_data, start_col, workspace);
DRCcdc8ac32009-06-25 20:38:31 +0000737 #endif
Pierre Ossman65d03172009-03-09 13:28:10 +0000738#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000739}
740
741GLOBAL(int)
742jsimd_can_fdct_islow (void)
743{
744 init_simd();
745
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000746 /* The code is optimised for these values only */
747 if (DCTSIZE != 8)
748 return 0;
749 if (sizeof(DCTELEM) != 2)
750 return 0;
751
Pierre Ossmaneea72152009-03-09 13:34:17 +0000752 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
753 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000754 #ifndef __x86_64__
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000755 if (simd_support & JSIMD_MMX)
756 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000757 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000758
Pierre Ossman59a39382009-03-09 13:15:56 +0000759 return 0;
760}
761
762GLOBAL(int)
763jsimd_can_fdct_ifast (void)
764{
765 init_simd();
766
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000767 /* The code is optimised for these values only */
768 if (DCTSIZE != 8)
769 return 0;
770 if (sizeof(DCTELEM) != 2)
771 return 0;
772
Pierre Ossmaneea72152009-03-09 13:34:17 +0000773 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
774 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000775 #ifndef __x86_64__
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000776 if (simd_support & JSIMD_MMX)
777 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000778 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000779
Pierre Ossman59a39382009-03-09 13:15:56 +0000780 return 0;
781}
782
783GLOBAL(int)
784jsimd_can_fdct_float (void)
785{
786 init_simd();
787
Pierre Ossman65d03172009-03-09 13:28:10 +0000788 /* The code is optimised for these values only */
789 if (DCTSIZE != 8)
790 return 0;
791 if (sizeof(FAST_FLOAT) != 4)
792 return 0;
793
Pierre Ossman018fc422009-03-09 13:31:56 +0000794 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
795 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000796 #ifndef __x86_64__
Pierre Ossman65d03172009-03-09 13:28:10 +0000797 if (simd_support & JSIMD_3DNOW)
798 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000799 #endif
Pierre Ossman65d03172009-03-09 13:28:10 +0000800
Pierre Ossman59a39382009-03-09 13:15:56 +0000801 return 0;
802}
803
804GLOBAL(void)
805jsimd_fdct_islow (DCTELEM * data)
806{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000807#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000808 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
809 jsimd_fdct_islow_sse2(data);
DRCcdc8ac32009-06-25 20:38:31 +0000810 #ifndef __x86_64__
Pierre Ossmaneea72152009-03-09 13:34:17 +0000811 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000812 jsimd_fdct_islow_mmx(data);
DRCcdc8ac32009-06-25 20:38:31 +0000813 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000814#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000815}
816
817GLOBAL(void)
818jsimd_fdct_ifast (DCTELEM * data)
819{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000820#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000821 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
822 jsimd_fdct_ifast_sse2(data);
DRCcdc8ac32009-06-25 20:38:31 +0000823 #ifndef __x86_64__
Pierre Ossmaneea72152009-03-09 13:34:17 +0000824 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000825 jsimd_fdct_ifast_mmx(data);
DRCcdc8ac32009-06-25 20:38:31 +0000826 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000827#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000828}
829
830GLOBAL(void)
831jsimd_fdct_float (FAST_FLOAT * data)
832{
Pierre Ossman65d03172009-03-09 13:28:10 +0000833#ifdef WITH_SIMD
Pierre Ossman018fc422009-03-09 13:31:56 +0000834 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
835 jsimd_fdct_float_sse(data);
DRCcdc8ac32009-06-25 20:38:31 +0000836 #ifndef __x86_64__
Pierre Ossman018fc422009-03-09 13:31:56 +0000837 else if (simd_support & JSIMD_3DNOW)
Pierre Ossman65d03172009-03-09 13:28:10 +0000838 jsimd_fdct_float_3dnow(data);
DRCcdc8ac32009-06-25 20:38:31 +0000839 #endif
Pierre Ossman65d03172009-03-09 13:28:10 +0000840#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000841}
842
843GLOBAL(int)
844jsimd_can_quantize (void)
845{
846 init_simd();
847
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000848 /* The code is optimised for these values only */
849 if (DCTSIZE != 8)
850 return 0;
851 if (sizeof(JCOEF) != 2)
852 return 0;
853 if (sizeof(DCTELEM) != 2)
854 return 0;
855
Pierre Ossmaneea72152009-03-09 13:34:17 +0000856 if (simd_support & JSIMD_SSE2)
857 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000858 #ifndef __x86_64__
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000859 if (simd_support & JSIMD_MMX)
860 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000861 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000862
Pierre Ossman59a39382009-03-09 13:15:56 +0000863 return 0;
864}
865
866GLOBAL(int)
867jsimd_can_quantize_float (void)
868{
869 init_simd();
870
Pierre Ossman65d03172009-03-09 13:28:10 +0000871 /* The code is optimised for these values only */
872 if (DCTSIZE != 8)
873 return 0;
874 if (sizeof(JCOEF) != 2)
875 return 0;
876 if (sizeof(FAST_FLOAT) != 4)
877 return 0;
878
Pierre Ossmaneea72152009-03-09 13:34:17 +0000879 if (simd_support & JSIMD_SSE2)
880 return 1;
Pierre Ossman018fc422009-03-09 13:31:56 +0000881 if (simd_support & JSIMD_SSE)
882 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000883 #ifndef __x86_64__
Pierre Ossman65d03172009-03-09 13:28:10 +0000884 if (simd_support & JSIMD_3DNOW)
885 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000886 #endif
Pierre Ossman65d03172009-03-09 13:28:10 +0000887
Pierre Ossman59a39382009-03-09 13:15:56 +0000888 return 0;
889}
890
891GLOBAL(void)
892jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
893 DCTELEM * workspace)
894{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000895#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000896 if (simd_support & JSIMD_SSE2)
897 jsimd_quantize_sse2(coef_block, divisors, workspace);
DRCcdc8ac32009-06-25 20:38:31 +0000898 #ifndef __x86_64__
Pierre Ossmaneea72152009-03-09 13:34:17 +0000899 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000900 jsimd_quantize_mmx(coef_block, divisors, workspace);
DRCcdc8ac32009-06-25 20:38:31 +0000901 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000902#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000903}
904
905GLOBAL(void)
906jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
907 FAST_FLOAT * workspace)
908{
Pierre Ossman65d03172009-03-09 13:28:10 +0000909#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000910 if (simd_support & JSIMD_SSE2)
911 jsimd_quantize_float_sse2(coef_block, divisors, workspace);
DRCcdc8ac32009-06-25 20:38:31 +0000912 #ifndef __x86_64__
Pierre Ossmaneea72152009-03-09 13:34:17 +0000913 else if (simd_support & JSIMD_SSE)
Pierre Ossman018fc422009-03-09 13:31:56 +0000914 jsimd_quantize_float_sse(coef_block, divisors, workspace);
915 else if (simd_support & JSIMD_3DNOW)
Pierre Ossman65d03172009-03-09 13:28:10 +0000916 jsimd_quantize_float_3dnow(coef_block, divisors, workspace);
DRCcdc8ac32009-06-25 20:38:31 +0000917 #endif
Pierre Ossman65d03172009-03-09 13:28:10 +0000918#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000919}
920
921GLOBAL(int)
922jsimd_can_idct_2x2 (void)
923{
924 init_simd();
925
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000926 /* The code is optimised for these values only */
927 if (DCTSIZE != 8)
928 return 0;
929 if (sizeof(JCOEF) != 2)
930 return 0;
931 if (BITS_IN_JSAMPLE != 8)
932 return 0;
933 if (sizeof(JDIMENSION) != 4)
934 return 0;
935 if (sizeof(ISLOW_MULT_TYPE) != 2)
936 return 0;
937
Pierre Ossmaneea72152009-03-09 13:34:17 +0000938 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
939 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000940 #ifndef __x86_64__
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000941 if (simd_support & JSIMD_MMX)
942 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000943 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000944
Pierre Ossman59a39382009-03-09 13:15:56 +0000945 return 0;
946}
947
948GLOBAL(int)
949jsimd_can_idct_4x4 (void)
950{
951 init_simd();
952
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000953 /* The code is optimised for these values only */
954 if (DCTSIZE != 8)
955 return 0;
956 if (sizeof(JCOEF) != 2)
957 return 0;
958 if (BITS_IN_JSAMPLE != 8)
959 return 0;
960 if (sizeof(JDIMENSION) != 4)
961 return 0;
962 if (sizeof(ISLOW_MULT_TYPE) != 2)
963 return 0;
964
Pierre Ossmaneea72152009-03-09 13:34:17 +0000965 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
966 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000967 #ifndef __x86_64__
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000968 if (simd_support & JSIMD_MMX)
969 return 1;
DRCcdc8ac32009-06-25 20:38:31 +0000970 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000971
Pierre Ossman59a39382009-03-09 13:15:56 +0000972 return 0;
973}
974
975GLOBAL(void)
976jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
977 JCOEFPTR coef_block, JSAMPARRAY output_buf,
978 JDIMENSION output_col)
979{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000980#if WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000981 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
982 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
DRCcdc8ac32009-06-25 20:38:31 +0000983 #ifndef __x86_64__
Pierre Ossmaneea72152009-03-09 13:34:17 +0000984 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000985 jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col);
DRCcdc8ac32009-06-25 20:38:31 +0000986 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000987#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000988}
989
990GLOBAL(void)
991jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
992 JCOEFPTR coef_block, JSAMPARRAY output_buf,
993 JDIMENSION output_col)
994{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000995#if WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000996 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
997 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
DRCcdc8ac32009-06-25 20:38:31 +0000998 #ifndef __x86_64__
Pierre Ossmaneea72152009-03-09 13:34:17 +0000999 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +00001000 jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col);
DRCcdc8ac32009-06-25 20:38:31 +00001001 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +00001002#endif
Pierre Ossman59a39382009-03-09 13:15:56 +00001003}
1004
1005GLOBAL(int)
1006jsimd_can_idct_islow (void)
1007{
1008 init_simd();
1009
Pierre Ossman5eb84ff2009-03-09 13:25:30 +00001010 /* The code is optimised for these values only */
1011 if (DCTSIZE != 8)
1012 return 0;
1013 if (sizeof(JCOEF) != 2)
1014 return 0;
1015 if (BITS_IN_JSAMPLE != 8)
1016 return 0;
1017 if (sizeof(JDIMENSION) != 4)
1018 return 0;
1019 if (sizeof(ISLOW_MULT_TYPE) != 2)
1020 return 0;
1021
Pierre Ossmaneea72152009-03-09 13:34:17 +00001022 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
1023 return 1;
DRCcdc8ac32009-06-25 20:38:31 +00001024 #ifndef __x86_64__
Pierre Ossman5eb84ff2009-03-09 13:25:30 +00001025 if (simd_support & JSIMD_MMX)
1026 return 1;
DRCcdc8ac32009-06-25 20:38:31 +00001027 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +00001028
Pierre Ossman59a39382009-03-09 13:15:56 +00001029 return 0;
1030}
1031
1032GLOBAL(int)
1033jsimd_can_idct_ifast (void)
1034{
1035 init_simd();
1036
Pierre Ossman5eb84ff2009-03-09 13:25:30 +00001037 /* The code is optimised for these values only */
1038 if (DCTSIZE != 8)
1039 return 0;
1040 if (sizeof(JCOEF) != 2)
1041 return 0;
1042 if (BITS_IN_JSAMPLE != 8)
1043 return 0;
1044 if (sizeof(JDIMENSION) != 4)
1045 return 0;
1046 if (sizeof(IFAST_MULT_TYPE) != 2)
1047 return 0;
1048 if (IFAST_SCALE_BITS != 2)
1049 return 0;
1050
Pierre Ossmaneea72152009-03-09 13:34:17 +00001051 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
1052 return 1;
DRCcdc8ac32009-06-25 20:38:31 +00001053 #ifndef __x86_64__
Pierre Ossman5eb84ff2009-03-09 13:25:30 +00001054 if (simd_support & JSIMD_MMX)
1055 return 1;
DRCcdc8ac32009-06-25 20:38:31 +00001056 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +00001057
Pierre Ossman59a39382009-03-09 13:15:56 +00001058 return 0;
1059}
1060
1061GLOBAL(int)
1062jsimd_can_idct_float (void)
1063{
1064 init_simd();
1065
Pierre Ossman65d03172009-03-09 13:28:10 +00001066 if (DCTSIZE != 8)
1067 return 0;
1068 if (sizeof(JCOEF) != 2)
1069 return 0;
1070 if (BITS_IN_JSAMPLE != 8)
1071 return 0;
1072 if (sizeof(JDIMENSION) != 4)
1073 return 0;
1074 if (sizeof(FAST_FLOAT) != 4)
1075 return 0;
1076 if (sizeof(FLOAT_MULT_TYPE) != 4)
1077 return 0;
1078
Pierre Ossmaneea72152009-03-09 13:34:17 +00001079 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1080 return 1;
DRCcdc8ac32009-06-25 20:38:31 +00001081 #ifndef __x86_64__
Pierre Ossman018fc422009-03-09 13:31:56 +00001082 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
1083 return 1;
Pierre Ossman65d03172009-03-09 13:28:10 +00001084 if (simd_support & JSIMD_3DNOW)
1085 return 1;
DRCcdc8ac32009-06-25 20:38:31 +00001086 #endif
Pierre Ossman65d03172009-03-09 13:28:10 +00001087
Pierre Ossman59a39382009-03-09 13:15:56 +00001088 return 0;
1089}
1090
1091GLOBAL(void)
1092jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1093 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1094 JDIMENSION output_col)
1095{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +00001096#if WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +00001097 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
1098 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, output_col);
DRCcdc8ac32009-06-25 20:38:31 +00001099 #ifndef __x86_64__
Pierre Ossmaneea72152009-03-09 13:34:17 +00001100 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +00001101 jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf, output_col);
DRCcdc8ac32009-06-25 20:38:31 +00001102 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +00001103#endif
Pierre Ossman59a39382009-03-09 13:15:56 +00001104}
1105
1106GLOBAL(void)
1107jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1108 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1109 JDIMENSION output_col)
1110{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +00001111#if WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +00001112 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
1113 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, output_col);
DRCcdc8ac32009-06-25 20:38:31 +00001114 #ifndef __x86_64__
Pierre Ossmaneea72152009-03-09 13:34:17 +00001115 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +00001116 jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf, output_col);
DRCcdc8ac32009-06-25 20:38:31 +00001117 #endif
Pierre Ossman5eb84ff2009-03-09 13:25:30 +00001118#endif
Pierre Ossman59a39382009-03-09 13:15:56 +00001119}
1120
1121GLOBAL(void)
1122jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1123 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1124 JDIMENSION output_col)
1125{
Pierre Ossman65d03172009-03-09 13:28:10 +00001126#if WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +00001127 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1128 jsimd_idct_float_sse2(compptr->dct_table, coef_block,
1129 output_buf, output_col);
DRCcdc8ac32009-06-25 20:38:31 +00001130 #ifndef __x86_64__
Pierre Ossmaneea72152009-03-09 13:34:17 +00001131 else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
Pierre Ossman018fc422009-03-09 13:31:56 +00001132 jsimd_idct_float_sse(compptr->dct_table, coef_block,
1133 output_buf, output_col);
1134 else if (simd_support & JSIMD_3DNOW)
Pierre Ossman65d03172009-03-09 13:28:10 +00001135 jsimd_idct_float_3dnow(compptr->dct_table, coef_block,
1136 output_buf, output_col);
DRCcdc8ac32009-06-25 20:38:31 +00001137 #endif
Pierre Ossman65d03172009-03-09 13:28:10 +00001138#endif
Pierre Ossman59a39382009-03-09 13:15:56 +00001139}
1140