blob: 32489116021fe9dd51b077d62eb7390c8278dc20 [file] [log] [blame]
Pierre Ossman59a39382009-03-09 13:15:56 +00001/*
2 * jsimd.c
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 *
6 * Based on the x86 SIMD extension for IJG JPEG library,
7 * Copyright (C) 1999-2006, MIYASAKA Masaru.
8 *
9 * This file contains the interface between the "normal" portions
10 * of the library and the SIMD implementations.
11 */
12
13#define JPEG_INTERNALS
14#include "jinclude.h"
15#include "jpeglib.h"
Pierre Ossman2ae181c2009-03-09 13:21:27 +000016#include "jsimd.h"
Pierre Ossman59a39382009-03-09 13:15:56 +000017#include "jdct.h"
Pierre Ossman2ae181c2009-03-09 13:21:27 +000018#include "jsimddct.h"
19#include "simd/jsimd.h"
Pierre Ossman59a39382009-03-09 13:15:56 +000020
Pierre Ossman018fc422009-03-09 13:31:56 +000021/*
22 * In the PIC cases, we have no guarantee that constants will keep
23 * their alignment. This macro allows us to verify it at runtime.
24 */
25#ifdef WITH_SIMD
26#define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0)
27#else
28#define IS_ALIGNED(ptr, order) (0)
29#endif
30
31#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
32
Pierre Ossman59a39382009-03-09 13:15:56 +000033static unsigned int simd_support = ~0;
34
35/*
36 * Check what SIMD accelerations are supported.
37 *
38 * FIXME: This code is racy under a multi-threaded environment.
39 */
40LOCAL(void)
41init_simd (void)
42{
43 if (simd_support != ~0)
44 return;
45
Pierre Ossman2ae181c2009-03-09 13:21:27 +000046#ifdef WITH_SIMD
47 simd_support = jpeg_simd_cpu_support();
48#else
Pierre Ossman59a39382009-03-09 13:15:56 +000049 simd_support = JSIMD_NONE;
Pierre Ossman2ae181c2009-03-09 13:21:27 +000050#endif
Pierre Ossman59a39382009-03-09 13:15:56 +000051}
52
53GLOBAL(int)
54jsimd_can_rgb_ycc (void)
55{
56 init_simd();
57
Pierre Ossman5eb84ff2009-03-09 13:25:30 +000058 /* The code is optimised for these values only */
59 if (BITS_IN_JSAMPLE != 8)
60 return 0;
61 if (sizeof(JDIMENSION) != 4)
62 return 0;
63 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
64 return 0;
65
Pierre Ossmaneea72152009-03-09 13:34:17 +000066 if ((simd_support & JSIMD_SSE2) &&
67 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
68 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +000069 if (simd_support & JSIMD_MMX)
70 return 1;
71
Pierre Ossman59a39382009-03-09 13:15:56 +000072 return 0;
73}
74
75GLOBAL(int)
76jsimd_can_ycc_rgb (void)
77{
78 init_simd();
79
Pierre Ossman5eb84ff2009-03-09 13:25:30 +000080 /* The code is optimised for these values only */
81 if (BITS_IN_JSAMPLE != 8)
82 return 0;
83 if (sizeof(JDIMENSION) != 4)
84 return 0;
85 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
86 return 0;
87
Pierre Ossmaneea72152009-03-09 13:34:17 +000088 if ((simd_support & JSIMD_SSE2) &&
89 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
90 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +000091 if (simd_support & JSIMD_MMX)
92 return 1;
93
Pierre Ossman59a39382009-03-09 13:15:56 +000094 return 0;
95}
96
97GLOBAL(void)
98jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
99 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
100 JDIMENSION output_row, int num_rows)
101{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000102#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000103 if ((simd_support & JSIMD_SSE2) &&
104 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
105 jsimd_rgb_ycc_convert_sse2(cinfo->image_width, input_buf,
106 output_buf, output_row, num_rows);
107 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000108 jsimd_rgb_ycc_convert_mmx(cinfo->image_width, input_buf,
109 output_buf, output_row, num_rows);
110#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000111}
112
113GLOBAL(void)
114jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
115 JSAMPIMAGE input_buf, JDIMENSION input_row,
116 JSAMPARRAY output_buf, int num_rows)
117{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000118#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000119 if ((simd_support & JSIMD_SSE2) &&
120 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
121 jsimd_ycc_rgb_convert_sse2(cinfo->output_width, input_buf,
122 input_row, output_buf, num_rows);
123 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000124 jsimd_ycc_rgb_convert_mmx(cinfo->output_width, input_buf,
125 input_row, output_buf, num_rows);
126#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000127}
128
129GLOBAL(int)
130jsimd_can_h2v2_downsample (void)
131{
132 init_simd();
133
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000134 /* The code is optimised for these values only */
135 if (BITS_IN_JSAMPLE != 8)
136 return 0;
137 if (sizeof(JDIMENSION) != 4)
138 return 0;
139
Pierre Ossmaneea72152009-03-09 13:34:17 +0000140 if (simd_support & JSIMD_SSE2)
141 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000142 if (simd_support & JSIMD_MMX)
143 return 1;
144
Pierre Ossman59a39382009-03-09 13:15:56 +0000145 return 0;
146}
147
148GLOBAL(int)
149jsimd_can_h2v1_downsample (void)
150{
151 init_simd();
152
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000153 /* The code is optimised for these values only */
154 if (BITS_IN_JSAMPLE != 8)
155 return 0;
156 if (sizeof(JDIMENSION) != 4)
157 return 0;
158
Pierre Ossmaneea72152009-03-09 13:34:17 +0000159 if (simd_support & JSIMD_SSE2)
160 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000161 if (simd_support & JSIMD_MMX)
162 return 1;
163
Pierre Ossman59a39382009-03-09 13:15:56 +0000164 return 0;
165}
166
167GLOBAL(void)
168jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
169 JSAMPARRAY input_data, JSAMPARRAY output_data)
170{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000171#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000172 if (simd_support & JSIMD_SSE2)
173 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
174 compptr->v_samp_factor, compptr->width_in_blocks,
175 input_data, output_data);
176 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000177 jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
178 compptr->v_samp_factor, compptr->width_in_blocks,
179 input_data, output_data);
180#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000181}
182
183GLOBAL(void)
184jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
185 JSAMPARRAY input_data, JSAMPARRAY output_data)
186{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000187#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000188 if (simd_support & JSIMD_SSE2)
189 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
190 compptr->v_samp_factor, compptr->width_in_blocks,
191 input_data, output_data);
192 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000193 jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
194 compptr->v_samp_factor, compptr->width_in_blocks,
195 input_data, output_data);
196#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000197}
198
199GLOBAL(int)
200jsimd_can_h2v2_upsample (void)
201{
202 init_simd();
203
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000204 /* The code is optimised for these values only */
205 if (BITS_IN_JSAMPLE != 8)
206 return 0;
207 if (sizeof(JDIMENSION) != 4)
208 return 0;
209
Pierre Ossmaneea72152009-03-09 13:34:17 +0000210 if (simd_support & JSIMD_SSE2)
211 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000212 if (simd_support & JSIMD_MMX)
213 return 1;
214
Pierre Ossman59a39382009-03-09 13:15:56 +0000215 return 0;
216}
217
218GLOBAL(int)
219jsimd_can_h2v1_upsample (void)
220{
221 init_simd();
222
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000223 /* The code is optimised for these values only */
224 if (BITS_IN_JSAMPLE != 8)
225 return 0;
226 if (sizeof(JDIMENSION) != 4)
227 return 0;
228
Pierre Ossmaneea72152009-03-09 13:34:17 +0000229 if (simd_support & JSIMD_SSE2)
230 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000231 if (simd_support & JSIMD_MMX)
232 return 1;
233
Pierre Ossman59a39382009-03-09 13:15:56 +0000234 return 0;
235}
236
237GLOBAL(void)
238jsimd_h2v2_upsample (j_decompress_ptr cinfo,
239 jpeg_component_info * compptr,
240 JSAMPARRAY input_data,
241 JSAMPARRAY * output_data_ptr)
242{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000243#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000244 if (simd_support & JSIMD_SSE2)
245 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor,
246 cinfo->output_width, input_data, output_data_ptr);
247 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000248 jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor,
249 cinfo->output_width, input_data, output_data_ptr);
250#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000251}
252
253GLOBAL(void)
254jsimd_h2v1_upsample (j_decompress_ptr cinfo,
255 jpeg_component_info * compptr,
256 JSAMPARRAY input_data,
257 JSAMPARRAY * output_data_ptr)
258{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000259#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000260 if (simd_support & JSIMD_SSE2)
261 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor,
262 cinfo->output_width, input_data, output_data_ptr);
263 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000264 jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor,
265 cinfo->output_width, input_data, output_data_ptr);
266#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000267}
268
269GLOBAL(int)
270jsimd_can_h2v2_fancy_upsample (void)
271{
272 init_simd();
273
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000274 /* The code is optimised for these values only */
275 if (BITS_IN_JSAMPLE != 8)
276 return 0;
277 if (sizeof(JDIMENSION) != 4)
278 return 0;
279
Pierre Ossmaneea72152009-03-09 13:34:17 +0000280 if ((simd_support & JSIMD_SSE2) &&
281 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
282 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000283 if (simd_support & JSIMD_MMX)
284 return 1;
285
Pierre Ossman59a39382009-03-09 13:15:56 +0000286 return 0;
287}
288
289GLOBAL(int)
290jsimd_can_h2v1_fancy_upsample (void)
291{
292 init_simd();
293
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000294 /* The code is optimised for these values only */
295 if (BITS_IN_JSAMPLE != 8)
296 return 0;
297 if (sizeof(JDIMENSION) != 4)
298 return 0;
299
Pierre Ossmaneea72152009-03-09 13:34:17 +0000300 if ((simd_support & JSIMD_SSE2) &&
301 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
302 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000303 if (simd_support & JSIMD_MMX)
304 return 1;
305
Pierre Ossman59a39382009-03-09 13:15:56 +0000306 return 0;
307}
308
309GLOBAL(void)
310jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
311 jpeg_component_info * compptr,
312 JSAMPARRAY input_data,
313 JSAMPARRAY * output_data_ptr)
314{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000315#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000316 if ((simd_support & JSIMD_SSE2) &&
317 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
318 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
319 compptr->downsampled_width, input_data, output_data_ptr);
320 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000321 jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
322 compptr->downsampled_width, input_data, output_data_ptr);
323#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000324}
325
326GLOBAL(void)
327jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
328 jpeg_component_info * compptr,
329 JSAMPARRAY input_data,
330 JSAMPARRAY * output_data_ptr)
331{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000332#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000333 if ((simd_support & JSIMD_SSE2) &&
334 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
335 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
336 compptr->downsampled_width, input_data, output_data_ptr);
337 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000338 jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor,
339 compptr->downsampled_width, input_data, output_data_ptr);
340#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000341}
342
343GLOBAL(int)
344jsimd_can_h2v2_merged_upsample (void)
345{
346 init_simd();
347
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000348 /* The code is optimised for these values only */
349 if (BITS_IN_JSAMPLE != 8)
350 return 0;
351 if (sizeof(JDIMENSION) != 4)
352 return 0;
353
Pierre Ossmaneea72152009-03-09 13:34:17 +0000354 if ((simd_support & JSIMD_SSE2) &&
355 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
356 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000357 if (simd_support & JSIMD_MMX)
358 return 1;
359
Pierre Ossman59a39382009-03-09 13:15:56 +0000360 return 0;
361}
362
363GLOBAL(int)
364jsimd_can_h2v1_merged_upsample (void)
365{
366 init_simd();
367
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000368 /* The code is optimised for these values only */
369 if (BITS_IN_JSAMPLE != 8)
370 return 0;
371 if (sizeof(JDIMENSION) != 4)
372 return 0;
373
Pierre Ossmaneea72152009-03-09 13:34:17 +0000374 if ((simd_support & JSIMD_SSE2) &&
375 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
376 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000377 if (simd_support & JSIMD_MMX)
378 return 1;
379
Pierre Ossman59a39382009-03-09 13:15:56 +0000380 return 0;
381}
382
383GLOBAL(void)
384jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
385 JSAMPIMAGE input_buf,
386 JDIMENSION in_row_group_ctr,
387 JSAMPARRAY output_buf)
388{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000389#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000390 if ((simd_support & JSIMD_SSE2) &&
391 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
392 jsimd_h2v2_merged_upsample_sse2(cinfo->output_width, input_buf,
393 in_row_group_ctr, output_buf);
394 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000395 jsimd_h2v2_merged_upsample_mmx(cinfo->output_width, input_buf,
396 in_row_group_ctr, output_buf);
397#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000398}
399
400GLOBAL(void)
401jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
402 JSAMPIMAGE input_buf,
403 JDIMENSION in_row_group_ctr,
404 JSAMPARRAY output_buf)
405{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000406#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000407 if ((simd_support & JSIMD_SSE2) &&
408 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
409 jsimd_h2v1_merged_upsample_sse2(cinfo->output_width, input_buf,
410 in_row_group_ctr, output_buf);
411 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000412 jsimd_h2v1_merged_upsample_mmx(cinfo->output_width, input_buf,
413 in_row_group_ctr, output_buf);
414#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000415}
416
417GLOBAL(int)
418jsimd_can_convsamp (void)
419{
420 init_simd();
421
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000422 /* The code is optimised for these values only */
423 if (DCTSIZE != 8)
424 return 0;
425 if (BITS_IN_JSAMPLE != 8)
426 return 0;
427 if (sizeof(JDIMENSION) != 4)
428 return 0;
429 if (sizeof(DCTELEM) != 2)
430 return 0;
431
Pierre Ossmaneea72152009-03-09 13:34:17 +0000432 if (simd_support & JSIMD_SSE2)
433 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000434 if (simd_support & JSIMD_MMX)
435 return 1;
436
Pierre Ossman59a39382009-03-09 13:15:56 +0000437 return 0;
438}
439
440GLOBAL(int)
441jsimd_can_convsamp_float (void)
442{
443 init_simd();
444
Pierre Ossman65d03172009-03-09 13:28:10 +0000445 /* The code is optimised for these values only */
446 if (DCTSIZE != 8)
447 return 0;
448 if (BITS_IN_JSAMPLE != 8)
449 return 0;
450 if (sizeof(JDIMENSION) != 4)
451 return 0;
452 if (sizeof(FAST_FLOAT) != 4)
453 return 0;
454
Pierre Ossmaneea72152009-03-09 13:34:17 +0000455 if (simd_support & JSIMD_SSE2)
456 return 1;
Pierre Ossman018fc422009-03-09 13:31:56 +0000457 if (simd_support & JSIMD_SSE)
458 return 1;
Pierre Ossman65d03172009-03-09 13:28:10 +0000459 if (simd_support & JSIMD_3DNOW)
460 return 1;
461
Pierre Ossman59a39382009-03-09 13:15:56 +0000462 return 0;
463}
464
465GLOBAL(void)
466jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
467 DCTELEM * workspace)
468{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000469#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000470 if (simd_support & JSIMD_SSE2)
471 jsimd_convsamp_sse2(sample_data, start_col, workspace);
472 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000473 jsimd_convsamp_mmx(sample_data, start_col, workspace);
474#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000475}
476
477GLOBAL(void)
478jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
479 FAST_FLOAT * workspace)
480{
Pierre Ossman65d03172009-03-09 13:28:10 +0000481#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000482 if (simd_support & JSIMD_SSE2)
483 jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
484 else if (simd_support & JSIMD_SSE)
Pierre Ossman018fc422009-03-09 13:31:56 +0000485 jsimd_convsamp_float_sse(sample_data, start_col, workspace);
486 else if (simd_support & JSIMD_3DNOW)
Pierre Ossman65d03172009-03-09 13:28:10 +0000487 jsimd_convsamp_float_3dnow(sample_data, start_col, workspace);
488#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000489}
490
491GLOBAL(int)
492jsimd_can_fdct_islow (void)
493{
494 init_simd();
495
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000496 /* The code is optimised for these values only */
497 if (DCTSIZE != 8)
498 return 0;
499 if (sizeof(DCTELEM) != 2)
500 return 0;
501
Pierre Ossmaneea72152009-03-09 13:34:17 +0000502 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
503 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000504 if (simd_support & JSIMD_MMX)
505 return 1;
506
Pierre Ossman59a39382009-03-09 13:15:56 +0000507 return 0;
508}
509
510GLOBAL(int)
511jsimd_can_fdct_ifast (void)
512{
513 init_simd();
514
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000515 /* The code is optimised for these values only */
516 if (DCTSIZE != 8)
517 return 0;
518 if (sizeof(DCTELEM) != 2)
519 return 0;
520
Pierre Ossmaneea72152009-03-09 13:34:17 +0000521 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
522 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000523 if (simd_support & JSIMD_MMX)
524 return 1;
525
Pierre Ossman59a39382009-03-09 13:15:56 +0000526 return 0;
527}
528
529GLOBAL(int)
530jsimd_can_fdct_float (void)
531{
532 init_simd();
533
Pierre Ossman65d03172009-03-09 13:28:10 +0000534 /* The code is optimised for these values only */
535 if (DCTSIZE != 8)
536 return 0;
537 if (sizeof(FAST_FLOAT) != 4)
538 return 0;
539
Pierre Ossman018fc422009-03-09 13:31:56 +0000540 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
541 return 1;
Pierre Ossman65d03172009-03-09 13:28:10 +0000542 if (simd_support & JSIMD_3DNOW)
543 return 1;
544
Pierre Ossman59a39382009-03-09 13:15:56 +0000545 return 0;
546}
547
548GLOBAL(void)
549jsimd_fdct_islow (DCTELEM * data)
550{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000551#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000552 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
553 jsimd_fdct_islow_sse2(data);
554 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000555 jsimd_fdct_islow_mmx(data);
556#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000557}
558
559GLOBAL(void)
560jsimd_fdct_ifast (DCTELEM * data)
561{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000562#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000563 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
564 jsimd_fdct_ifast_sse2(data);
565 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000566 jsimd_fdct_ifast_mmx(data);
567#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000568}
569
570GLOBAL(void)
571jsimd_fdct_float (FAST_FLOAT * data)
572{
Pierre Ossman65d03172009-03-09 13:28:10 +0000573#ifdef WITH_SIMD
Pierre Ossman018fc422009-03-09 13:31:56 +0000574 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
575 jsimd_fdct_float_sse(data);
576 else if (simd_support & JSIMD_3DNOW)
Pierre Ossman65d03172009-03-09 13:28:10 +0000577 jsimd_fdct_float_3dnow(data);
578#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000579}
580
581GLOBAL(int)
582jsimd_can_quantize (void)
583{
584 init_simd();
585
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000586 /* The code is optimised for these values only */
587 if (DCTSIZE != 8)
588 return 0;
589 if (sizeof(JCOEF) != 2)
590 return 0;
591 if (sizeof(DCTELEM) != 2)
592 return 0;
593
Pierre Ossmaneea72152009-03-09 13:34:17 +0000594 if (simd_support & JSIMD_SSE2)
595 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000596 if (simd_support & JSIMD_MMX)
597 return 1;
598
Pierre Ossman59a39382009-03-09 13:15:56 +0000599 return 0;
600}
601
602GLOBAL(int)
603jsimd_can_quantize_float (void)
604{
605 init_simd();
606
Pierre Ossman65d03172009-03-09 13:28:10 +0000607 /* The code is optimised for these values only */
608 if (DCTSIZE != 8)
609 return 0;
610 if (sizeof(JCOEF) != 2)
611 return 0;
612 if (sizeof(FAST_FLOAT) != 4)
613 return 0;
614
Pierre Ossmaneea72152009-03-09 13:34:17 +0000615 if (simd_support & JSIMD_SSE2)
616 return 1;
Pierre Ossman018fc422009-03-09 13:31:56 +0000617 if (simd_support & JSIMD_SSE)
618 return 1;
Pierre Ossman65d03172009-03-09 13:28:10 +0000619 if (simd_support & JSIMD_3DNOW)
620 return 1;
621
Pierre Ossman59a39382009-03-09 13:15:56 +0000622 return 0;
623}
624
625GLOBAL(void)
626jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
627 DCTELEM * workspace)
628{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000629#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000630 if (simd_support & JSIMD_SSE2)
631 jsimd_quantize_sse2(coef_block, divisors, workspace);
632 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000633 jsimd_quantize_mmx(coef_block, divisors, workspace);
634#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000635}
636
637GLOBAL(void)
638jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
639 FAST_FLOAT * workspace)
640{
Pierre Ossman65d03172009-03-09 13:28:10 +0000641#ifdef WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000642 if (simd_support & JSIMD_SSE2)
643 jsimd_quantize_float_sse2(coef_block, divisors, workspace);
644 else if (simd_support & JSIMD_SSE)
Pierre Ossman018fc422009-03-09 13:31:56 +0000645 jsimd_quantize_float_sse(coef_block, divisors, workspace);
646 else if (simd_support & JSIMD_3DNOW)
Pierre Ossman65d03172009-03-09 13:28:10 +0000647 jsimd_quantize_float_3dnow(coef_block, divisors, workspace);
648#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000649}
650
651GLOBAL(int)
652jsimd_can_idct_2x2 (void)
653{
654 init_simd();
655
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000656 /* The code is optimised for these values only */
657 if (DCTSIZE != 8)
658 return 0;
659 if (sizeof(JCOEF) != 2)
660 return 0;
661 if (BITS_IN_JSAMPLE != 8)
662 return 0;
663 if (sizeof(JDIMENSION) != 4)
664 return 0;
665 if (sizeof(ISLOW_MULT_TYPE) != 2)
666 return 0;
667
Pierre Ossmaneea72152009-03-09 13:34:17 +0000668 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
669 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000670 if (simd_support & JSIMD_MMX)
671 return 1;
672
Pierre Ossman59a39382009-03-09 13:15:56 +0000673 return 0;
674}
675
676GLOBAL(int)
677jsimd_can_idct_4x4 (void)
678{
679 init_simd();
680
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000681 /* The code is optimised for these values only */
682 if (DCTSIZE != 8)
683 return 0;
684 if (sizeof(JCOEF) != 2)
685 return 0;
686 if (BITS_IN_JSAMPLE != 8)
687 return 0;
688 if (sizeof(JDIMENSION) != 4)
689 return 0;
690 if (sizeof(ISLOW_MULT_TYPE) != 2)
691 return 0;
692
Pierre Ossmaneea72152009-03-09 13:34:17 +0000693 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
694 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000695 if (simd_support & JSIMD_MMX)
696 return 1;
697
Pierre Ossman59a39382009-03-09 13:15:56 +0000698 return 0;
699}
700
701GLOBAL(void)
702jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
703 JCOEFPTR coef_block, JSAMPARRAY output_buf,
704 JDIMENSION output_col)
705{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000706#if WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000707 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
708 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
709 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000710 jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col);
711#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000712}
713
714GLOBAL(void)
715jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
716 JCOEFPTR coef_block, JSAMPARRAY output_buf,
717 JDIMENSION output_col)
718{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000719#if WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000720 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
721 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
722 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000723 jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col);
724#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000725}
726
727GLOBAL(int)
728jsimd_can_idct_islow (void)
729{
730 init_simd();
731
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000732 /* The code is optimised for these values only */
733 if (DCTSIZE != 8)
734 return 0;
735 if (sizeof(JCOEF) != 2)
736 return 0;
737 if (BITS_IN_JSAMPLE != 8)
738 return 0;
739 if (sizeof(JDIMENSION) != 4)
740 return 0;
741 if (sizeof(ISLOW_MULT_TYPE) != 2)
742 return 0;
743
Pierre Ossmaneea72152009-03-09 13:34:17 +0000744 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
745 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000746 if (simd_support & JSIMD_MMX)
747 return 1;
748
Pierre Ossman59a39382009-03-09 13:15:56 +0000749 return 0;
750}
751
752GLOBAL(int)
753jsimd_can_idct_ifast (void)
754{
755 init_simd();
756
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000757 /* The code is optimised for these values only */
758 if (DCTSIZE != 8)
759 return 0;
760 if (sizeof(JCOEF) != 2)
761 return 0;
762 if (BITS_IN_JSAMPLE != 8)
763 return 0;
764 if (sizeof(JDIMENSION) != 4)
765 return 0;
766 if (sizeof(IFAST_MULT_TYPE) != 2)
767 return 0;
768 if (IFAST_SCALE_BITS != 2)
769 return 0;
770
Pierre Ossmaneea72152009-03-09 13:34:17 +0000771 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
772 return 1;
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000773 if (simd_support & JSIMD_MMX)
774 return 1;
775
Pierre Ossman59a39382009-03-09 13:15:56 +0000776 return 0;
777}
778
779GLOBAL(int)
780jsimd_can_idct_float (void)
781{
782 init_simd();
783
Pierre Ossman65d03172009-03-09 13:28:10 +0000784 if (DCTSIZE != 8)
785 return 0;
786 if (sizeof(JCOEF) != 2)
787 return 0;
788 if (BITS_IN_JSAMPLE != 8)
789 return 0;
790 if (sizeof(JDIMENSION) != 4)
791 return 0;
792 if (sizeof(FAST_FLOAT) != 4)
793 return 0;
794 if (sizeof(FLOAT_MULT_TYPE) != 4)
795 return 0;
796
Pierre Ossmaneea72152009-03-09 13:34:17 +0000797 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
798 return 1;
Pierre Ossman018fc422009-03-09 13:31:56 +0000799 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
800 return 1;
Pierre Ossman65d03172009-03-09 13:28:10 +0000801 if (simd_support & JSIMD_3DNOW)
802 return 1;
803
Pierre Ossman59a39382009-03-09 13:15:56 +0000804 return 0;
805}
806
807GLOBAL(void)
808jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
809 JCOEFPTR coef_block, JSAMPARRAY output_buf,
810 JDIMENSION output_col)
811{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000812#if WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000813 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
814 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, output_col);
815 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000816 jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf, output_col);
817#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000818}
819
820GLOBAL(void)
821jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
822 JCOEFPTR coef_block, JSAMPARRAY output_buf,
823 JDIMENSION output_col)
824{
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000825#if WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000826 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
827 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, output_col);
828 else if (simd_support & JSIMD_MMX)
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000829 jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf, output_col);
830#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000831}
832
833GLOBAL(void)
834jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
835 JCOEFPTR coef_block, JSAMPARRAY output_buf,
836 JDIMENSION output_col)
837{
Pierre Ossman65d03172009-03-09 13:28:10 +0000838#if WITH_SIMD
Pierre Ossmaneea72152009-03-09 13:34:17 +0000839 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
840 jsimd_idct_float_sse2(compptr->dct_table, coef_block,
841 output_buf, output_col);
842 else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
Pierre Ossman018fc422009-03-09 13:31:56 +0000843 jsimd_idct_float_sse(compptr->dct_table, coef_block,
844 output_buf, output_col);
845 else if (simd_support & JSIMD_3DNOW)
Pierre Ossman65d03172009-03-09 13:28:10 +0000846 jsimd_idct_float_3dnow(compptr->dct_table, coef_block,
847 output_buf, output_col);
848#endif
Pierre Ossman59a39382009-03-09 13:15:56 +0000849}
850