blob: ab52519bedadf124698ab758a6e254e67a938739 [file] [log] [blame]
hbono@chromium.orgf0c4f332010-11-01 05:14:55 +00001/*
2 * jsimd_i386.c
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright 2009 D. R. Commander
6 *
7 * Based on the x86 SIMD extension for IJG JPEG library,
8 * Copyright (C) 1999-2006, MIYASAKA Masaru.
9 *
10 * This file contains the interface between the "normal" portions
11 * of the library and the SIMD implementations when running on a
12 * 32-bit x86 architecture.
13 */
14
15#define JPEG_INTERNALS
16#include "../jinclude.h"
17#include "../jpeglib.h"
18#include "../jsimd.h"
19#include "../jdct.h"
20#include "../jsimddct.h"
21#include "jsimd.h"
22
23/*
24 * In the PIC cases, we have no guarantee that constants will keep
25 * their alignment. This macro allows us to verify it at runtime.
26 */
27#define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0)
28
29#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
30
31static unsigned int simd_support = ~0;
32
33/*
34 * Check what SIMD accelerations are supported.
35 *
36 * FIXME: This code is racy under a multi-threaded environment.
37 */
38LOCAL(void)
39init_simd (void)
40{
41 char *env = NULL;
42
43 if (simd_support != ~0)
44 return;
45
46 simd_support = jpeg_simd_cpu_support();
47
48 /* Force different settings through environment variables */
49 env = getenv("JSIMD_FORCEMMX");
50 if ((env != NULL) && (strcmp(env, "1") == 0))
51 simd_support &= JSIMD_MMX;
52 env = getenv("JSIMD_FORCE3DNOW");
53 if ((env != NULL) && (strcmp(env, "1") == 0))
54 simd_support &= JSIMD_3DNOW|JSIMD_MMX;
55 env = getenv("JSIMD_FORCESSE");
56 if ((env != NULL) && (strcmp(env, "1") == 0))
57 simd_support &= JSIMD_SSE|JSIMD_MMX;
58 env = getenv("JSIMD_FORCESSE2");
59 if ((env != NULL) && (strcmp(env, "1") == 0))
60 simd_support &= JSIMD_SSE2;
61}
62
63GLOBAL(int)
64jsimd_can_rgb_ycc (void)
65{
66 init_simd();
67
68 /* The code is optimised for these values only */
69 if (BITS_IN_JSAMPLE != 8)
70 return 0;
71 if (sizeof(JDIMENSION) != 4)
72 return 0;
73 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
74 return 0;
75
76 if ((simd_support & JSIMD_SSE2) &&
77 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
78 return 1;
79 if (simd_support & JSIMD_MMX)
80 return 1;
81
82 return 0;
83}
84
85GLOBAL(int)
86jsimd_can_ycc_rgb (void)
87{
88 init_simd();
89
90 /* The code is optimised for these values only */
91 if (BITS_IN_JSAMPLE != 8)
92 return 0;
93 if (sizeof(JDIMENSION) != 4)
94 return 0;
95 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
96 return 0;
97
98 if ((simd_support & JSIMD_SSE2) &&
99 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
100 return 1;
101 if (simd_support & JSIMD_MMX)
102 return 1;
103
104 return 0;
105}
106
107GLOBAL(void)
108jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
109 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
110 JDIMENSION output_row, int num_rows)
111{
112 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
113 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
114
115 switch(cinfo->in_color_space)
116 {
117 case JCS_EXT_RGB:
118 sse2fct=jsimd_extrgb_ycc_convert_sse2;
119 mmxfct=jsimd_extrgb_ycc_convert_mmx;
120 break;
121 case JCS_EXT_RGBX:
122 sse2fct=jsimd_extrgbx_ycc_convert_sse2;
123 mmxfct=jsimd_extrgbx_ycc_convert_mmx;
124 break;
125 case JCS_EXT_BGR:
126 sse2fct=jsimd_extbgr_ycc_convert_sse2;
127 mmxfct=jsimd_extbgr_ycc_convert_mmx;
128 break;
129 case JCS_EXT_BGRX:
130 sse2fct=jsimd_extbgrx_ycc_convert_sse2;
131 mmxfct=jsimd_extbgrx_ycc_convert_mmx;
132 break;
133 case JCS_EXT_XBGR:
134 sse2fct=jsimd_extxbgr_ycc_convert_sse2;
135 mmxfct=jsimd_extxbgr_ycc_convert_mmx;
136 break;
137 case JCS_EXT_XRGB:
138 sse2fct=jsimd_extxrgb_ycc_convert_sse2;
139 mmxfct=jsimd_extxrgb_ycc_convert_mmx;
140 break;
141 default:
142 sse2fct=jsimd_rgb_ycc_convert_sse2;
143 mmxfct=jsimd_rgb_ycc_convert_mmx;
144 break;
145 }
146
147 if ((simd_support & JSIMD_SSE2) &&
148 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
149 sse2fct(cinfo->image_width, input_buf,
150 output_buf, output_row, num_rows);
151 else if (simd_support & JSIMD_MMX)
152 mmxfct(cinfo->image_width, input_buf,
153 output_buf, output_row, num_rows);
154}
155
156GLOBAL(void)
157jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
158 JSAMPIMAGE input_buf, JDIMENSION input_row,
159 JSAMPARRAY output_buf, int num_rows)
160{
161 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
162 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
163
164 switch(cinfo->out_color_space)
165 {
166 case JCS_EXT_RGB:
167 sse2fct=jsimd_ycc_extrgb_convert_sse2;
168 mmxfct=jsimd_ycc_extrgb_convert_mmx;
169 break;
170 case JCS_EXT_RGBX:
171 sse2fct=jsimd_ycc_extrgbx_convert_sse2;
172 mmxfct=jsimd_ycc_extrgbx_convert_mmx;
173 break;
174 case JCS_EXT_BGR:
175 sse2fct=jsimd_ycc_extbgr_convert_sse2;
176 mmxfct=jsimd_ycc_extbgr_convert_mmx;
177 break;
178 case JCS_EXT_BGRX:
179 sse2fct=jsimd_ycc_extbgrx_convert_sse2;
180 mmxfct=jsimd_ycc_extbgrx_convert_mmx;
181 break;
182 case JCS_EXT_XBGR:
183 sse2fct=jsimd_ycc_extxbgr_convert_sse2;
184 mmxfct=jsimd_ycc_extxbgr_convert_mmx;
185 break;
186 case JCS_EXT_XRGB:
187 sse2fct=jsimd_ycc_extxrgb_convert_sse2;
188 mmxfct=jsimd_ycc_extxrgb_convert_mmx;
189 break;
190 default:
191 sse2fct=jsimd_ycc_rgb_convert_sse2;
192 mmxfct=jsimd_ycc_rgb_convert_mmx;
193 break;
194 }
195
196 if ((simd_support & JSIMD_SSE2) &&
197 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
198 sse2fct(cinfo->output_width, input_buf,
199 input_row, output_buf, num_rows);
200 else if (simd_support & JSIMD_MMX)
201 mmxfct(cinfo->output_width, input_buf,
202 input_row, output_buf, num_rows);
203}
204
205GLOBAL(int)
206jsimd_can_h2v2_downsample (void)
207{
208 init_simd();
209
210 /* The code is optimised for these values only */
211 if (BITS_IN_JSAMPLE != 8)
212 return 0;
213 if (sizeof(JDIMENSION) != 4)
214 return 0;
215
216 if (simd_support & JSIMD_SSE2)
217 return 1;
218 if (simd_support & JSIMD_MMX)
219 return 1;
220
221 return 0;
222}
223
224GLOBAL(int)
225jsimd_can_h2v1_downsample (void)
226{
227 init_simd();
228
229 /* The code is optimised for these values only */
230 if (BITS_IN_JSAMPLE != 8)
231 return 0;
232 if (sizeof(JDIMENSION) != 4)
233 return 0;
234
235 if (simd_support & JSIMD_SSE2)
236 return 1;
237 if (simd_support & JSIMD_MMX)
238 return 1;
239
240 return 0;
241}
242
243GLOBAL(void)
244jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
245 JSAMPARRAY input_data, JSAMPARRAY output_data)
246{
247 if (simd_support & JSIMD_SSE2)
248 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
249 compptr->v_samp_factor, compptr->width_in_blocks,
250 input_data, output_data);
251 else if (simd_support & JSIMD_MMX)
252 jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
253 compptr->v_samp_factor, compptr->width_in_blocks,
254 input_data, output_data);
255}
256
257GLOBAL(void)
258jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
259 JSAMPARRAY input_data, JSAMPARRAY output_data)
260{
261 if (simd_support & JSIMD_SSE2)
262 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
263 compptr->v_samp_factor, compptr->width_in_blocks,
264 input_data, output_data);
265 else if (simd_support & JSIMD_MMX)
266 jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
267 compptr->v_samp_factor, compptr->width_in_blocks,
268 input_data, output_data);
269}
270
271GLOBAL(int)
272jsimd_can_h2v2_upsample (void)
273{
274 init_simd();
275
276 /* The code is optimised for these values only */
277 if (BITS_IN_JSAMPLE != 8)
278 return 0;
279 if (sizeof(JDIMENSION) != 4)
280 return 0;
281
282 if (simd_support & JSIMD_SSE2)
283 return 1;
284 if (simd_support & JSIMD_MMX)
285 return 1;
286
287 return 0;
288}
289
290GLOBAL(int)
291jsimd_can_h2v1_upsample (void)
292{
293 init_simd();
294
295 /* The code is optimised for these values only */
296 if (BITS_IN_JSAMPLE != 8)
297 return 0;
298 if (sizeof(JDIMENSION) != 4)
299 return 0;
300
301 if (simd_support & JSIMD_SSE2)
302 return 1;
303 if (simd_support & JSIMD_MMX)
304 return 1;
305
306 return 0;
307}
308
309GLOBAL(void)
310jsimd_h2v2_upsample (j_decompress_ptr cinfo,
311 jpeg_component_info * compptr,
312 JSAMPARRAY input_data,
313 JSAMPARRAY * output_data_ptr)
314{
315 if (simd_support & JSIMD_SSE2)
316 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor,
317 cinfo->output_width, input_data, output_data_ptr);
318 else if (simd_support & JSIMD_MMX)
319 jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor,
320 cinfo->output_width, input_data, output_data_ptr);
321}
322
323GLOBAL(void)
324jsimd_h2v1_upsample (j_decompress_ptr cinfo,
325 jpeg_component_info * compptr,
326 JSAMPARRAY input_data,
327 JSAMPARRAY * output_data_ptr)
328{
329 if (simd_support & JSIMD_SSE2)
330 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor,
331 cinfo->output_width, input_data, output_data_ptr);
332 else if (simd_support & JSIMD_MMX)
333 jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor,
334 cinfo->output_width, input_data, output_data_ptr);
335}
336
337GLOBAL(int)
338jsimd_can_h2v2_fancy_upsample (void)
339{
340 init_simd();
341
342 /* The code is optimised for these values only */
343 if (BITS_IN_JSAMPLE != 8)
344 return 0;
345 if (sizeof(JDIMENSION) != 4)
346 return 0;
347
348 if ((simd_support & JSIMD_SSE2) &&
349 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
350 return 1;
351 if (simd_support & JSIMD_MMX)
352 return 1;
353
354 return 0;
355}
356
357GLOBAL(int)
358jsimd_can_h2v1_fancy_upsample (void)
359{
360 init_simd();
361
362 /* The code is optimised for these values only */
363 if (BITS_IN_JSAMPLE != 8)
364 return 0;
365 if (sizeof(JDIMENSION) != 4)
366 return 0;
367
368 if ((simd_support & JSIMD_SSE2) &&
369 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
370 return 1;
371 if (simd_support & JSIMD_MMX)
372 return 1;
373
374 return 0;
375}
376
377GLOBAL(void)
378jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
379 jpeg_component_info * compptr,
380 JSAMPARRAY input_data,
381 JSAMPARRAY * output_data_ptr)
382{
383 if ((simd_support & JSIMD_SSE2) &&
384 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
385 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
386 compptr->downsampled_width, input_data, output_data_ptr);
387 else if (simd_support & JSIMD_MMX)
388 jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
389 compptr->downsampled_width, input_data, output_data_ptr);
390}
391
392GLOBAL(void)
393jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
394 jpeg_component_info * compptr,
395 JSAMPARRAY input_data,
396 JSAMPARRAY * output_data_ptr)
397{
398 if ((simd_support & JSIMD_SSE2) &&
399 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
400 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
401 compptr->downsampled_width, input_data, output_data_ptr);
402 else if (simd_support & JSIMD_MMX)
403 jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor,
404 compptr->downsampled_width, input_data, output_data_ptr);
405}
406
407GLOBAL(int)
408jsimd_can_h2v2_merged_upsample (void)
409{
410 init_simd();
411
412 /* The code is optimised for these values only */
413 if (BITS_IN_JSAMPLE != 8)
414 return 0;
415 if (sizeof(JDIMENSION) != 4)
416 return 0;
417
418 if ((simd_support & JSIMD_SSE2) &&
419 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
420 return 1;
421 if (simd_support & JSIMD_MMX)
422 return 1;
423
424 return 0;
425}
426
427GLOBAL(int)
428jsimd_can_h2v1_merged_upsample (void)
429{
430 init_simd();
431
432 /* The code is optimised for these values only */
433 if (BITS_IN_JSAMPLE != 8)
434 return 0;
435 if (sizeof(JDIMENSION) != 4)
436 return 0;
437
438 if ((simd_support & JSIMD_SSE2) &&
439 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
440 return 1;
441 if (simd_support & JSIMD_MMX)
442 return 1;
443
444 return 0;
445}
446
447GLOBAL(void)
448jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
449 JSAMPIMAGE input_buf,
450 JDIMENSION in_row_group_ctr,
451 JSAMPARRAY output_buf)
452{
453 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
454 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
455
456 switch(cinfo->out_color_space)
457 {
458 case JCS_EXT_RGB:
459 sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
460 mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx;
461 break;
462 case JCS_EXT_RGBX:
463 sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
464 mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx;
465 break;
466 case JCS_EXT_BGR:
467 sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2;
468 mmxfct=jsimd_h2v2_extbgr_merged_upsample_mmx;
469 break;
470 case JCS_EXT_BGRX:
471 sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
472 mmxfct=jsimd_h2v2_extbgrx_merged_upsample_mmx;
473 break;
474 case JCS_EXT_XBGR:
475 sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
476 mmxfct=jsimd_h2v2_extxbgr_merged_upsample_mmx;
477 break;
478 case JCS_EXT_XRGB:
479 sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
480 mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx;
481 break;
482 default:
483 sse2fct=jsimd_h2v2_merged_upsample_sse2;
484 mmxfct=jsimd_h2v2_merged_upsample_mmx;
485 break;
486 }
487
488 if ((simd_support & JSIMD_SSE2) &&
489 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
490 sse2fct(cinfo->output_width, input_buf,
491 in_row_group_ctr, output_buf);
492 else if (simd_support & JSIMD_MMX)
493 mmxfct(cinfo->output_width, input_buf,
494 in_row_group_ctr, output_buf);
495}
496
497GLOBAL(void)
498jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
499 JSAMPIMAGE input_buf,
500 JDIMENSION in_row_group_ctr,
501 JSAMPARRAY output_buf)
502{
503 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
504 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
505
506 switch(cinfo->out_color_space)
507 {
508 case JCS_EXT_RGB:
509 sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
510 mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx;
511 break;
512 case JCS_EXT_RGBX:
513 sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
514 mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx;
515 break;
516 case JCS_EXT_BGR:
517 sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2;
518 mmxfct=jsimd_h2v1_extbgr_merged_upsample_mmx;
519 break;
520 case JCS_EXT_BGRX:
521 sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
522 mmxfct=jsimd_h2v1_extbgrx_merged_upsample_mmx;
523 break;
524 case JCS_EXT_XBGR:
525 sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
526 mmxfct=jsimd_h2v1_extxbgr_merged_upsample_mmx;
527 break;
528 case JCS_EXT_XRGB:
529 sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
530 mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx;
531 break;
532 default:
533 sse2fct=jsimd_h2v1_merged_upsample_sse2;
534 mmxfct=jsimd_h2v1_merged_upsample_mmx;
535 break;
536 }
537
538 if ((simd_support & JSIMD_SSE2) &&
539 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
540 sse2fct(cinfo->output_width, input_buf,
541 in_row_group_ctr, output_buf);
542 else if (simd_support & JSIMD_MMX)
543 mmxfct(cinfo->output_width, input_buf,
544 in_row_group_ctr, output_buf);
545}
546
547GLOBAL(int)
548jsimd_can_convsamp (void)
549{
550 init_simd();
551
552 /* The code is optimised for these values only */
553 if (DCTSIZE != 8)
554 return 0;
555 if (BITS_IN_JSAMPLE != 8)
556 return 0;
557 if (sizeof(JDIMENSION) != 4)
558 return 0;
559 if (sizeof(DCTELEM) != 2)
560 return 0;
561
562 if (simd_support & JSIMD_SSE2)
563 return 1;
564 if (simd_support & JSIMD_MMX)
565 return 1;
566
567 return 0;
568}
569
570GLOBAL(int)
571jsimd_can_convsamp_float (void)
572{
573 init_simd();
574
575 /* The code is optimised for these values only */
576 if (DCTSIZE != 8)
577 return 0;
578 if (BITS_IN_JSAMPLE != 8)
579 return 0;
580 if (sizeof(JDIMENSION) != 4)
581 return 0;
582 if (sizeof(FAST_FLOAT) != 4)
583 return 0;
584
585 if (simd_support & JSIMD_SSE2)
586 return 1;
587 if (simd_support & JSIMD_SSE)
588 return 1;
589 if (simd_support & JSIMD_3DNOW)
590 return 1;
591
592 return 0;
593}
594
595GLOBAL(void)
596jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
597 DCTELEM * workspace)
598{
599 if (simd_support & JSIMD_SSE2)
600 jsimd_convsamp_sse2(sample_data, start_col, workspace);
601 else if (simd_support & JSIMD_MMX)
602 jsimd_convsamp_mmx(sample_data, start_col, workspace);
603}
604
605GLOBAL(void)
606jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
607 FAST_FLOAT * workspace)
608{
609 if (simd_support & JSIMD_SSE2)
610 jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
611 else if (simd_support & JSIMD_SSE)
612 jsimd_convsamp_float_sse(sample_data, start_col, workspace);
613 else if (simd_support & JSIMD_3DNOW)
614 jsimd_convsamp_float_3dnow(sample_data, start_col, workspace);
615}
616
617GLOBAL(int)
618jsimd_can_fdct_islow (void)
619{
620 init_simd();
621
622 /* The code is optimised for these values only */
623 if (DCTSIZE != 8)
624 return 0;
625 if (sizeof(DCTELEM) != 2)
626 return 0;
627
628 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
629 return 1;
630 if (simd_support & JSIMD_MMX)
631 return 1;
632
633 return 0;
634}
635
636GLOBAL(int)
637jsimd_can_fdct_ifast (void)
638{
639 init_simd();
640
641 /* The code is optimised for these values only */
642 if (DCTSIZE != 8)
643 return 0;
644 if (sizeof(DCTELEM) != 2)
645 return 0;
646
647 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
648 return 1;
649 if (simd_support & JSIMD_MMX)
650 return 1;
651
652 return 0;
653}
654
655GLOBAL(int)
656jsimd_can_fdct_float (void)
657{
658 init_simd();
659
660 /* The code is optimised for these values only */
661 if (DCTSIZE != 8)
662 return 0;
663 if (sizeof(FAST_FLOAT) != 4)
664 return 0;
665
666 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
667 return 1;
668 if (simd_support & JSIMD_3DNOW)
669 return 1;
670
671 return 0;
672}
673
674GLOBAL(void)
675jsimd_fdct_islow (DCTELEM * data)
676{
677 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
678 jsimd_fdct_islow_sse2(data);
679 else if (simd_support & JSIMD_MMX)
680 jsimd_fdct_islow_mmx(data);
681}
682
683GLOBAL(void)
684jsimd_fdct_ifast (DCTELEM * data)
685{
686 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
687 jsimd_fdct_ifast_sse2(data);
688 else if (simd_support & JSIMD_MMX)
689 jsimd_fdct_ifast_mmx(data);
690}
691
692GLOBAL(void)
693jsimd_fdct_float (FAST_FLOAT * data)
694{
695 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
696 jsimd_fdct_float_sse(data);
697 else if (simd_support & JSIMD_3DNOW)
698 jsimd_fdct_float_3dnow(data);
699}
700
701GLOBAL(int)
702jsimd_can_quantize (void)
703{
704 init_simd();
705
706 /* The code is optimised for these values only */
707 if (DCTSIZE != 8)
708 return 0;
709 if (sizeof(JCOEF) != 2)
710 return 0;
711 if (sizeof(DCTELEM) != 2)
712 return 0;
713
714 if (simd_support & JSIMD_SSE2)
715 return 1;
716 if (simd_support & JSIMD_MMX)
717 return 1;
718
719 return 0;
720}
721
722GLOBAL(int)
723jsimd_can_quantize_float (void)
724{
725 init_simd();
726
727 /* The code is optimised for these values only */
728 if (DCTSIZE != 8)
729 return 0;
730 if (sizeof(JCOEF) != 2)
731 return 0;
732 if (sizeof(FAST_FLOAT) != 4)
733 return 0;
734
735 if (simd_support & JSIMD_SSE2)
736 return 1;
737 if (simd_support & JSIMD_SSE)
738 return 1;
739 if (simd_support & JSIMD_3DNOW)
740 return 1;
741
742 return 0;
743}
744
745GLOBAL(void)
746jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
747 DCTELEM * workspace)
748{
749 if (simd_support & JSIMD_SSE2)
750 jsimd_quantize_sse2(coef_block, divisors, workspace);
751 else if (simd_support & JSIMD_MMX)
752 jsimd_quantize_mmx(coef_block, divisors, workspace);
753}
754
755GLOBAL(void)
756jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
757 FAST_FLOAT * workspace)
758{
759 if (simd_support & JSIMD_SSE2)
760 jsimd_quantize_float_sse2(coef_block, divisors, workspace);
761 else if (simd_support & JSIMD_SSE)
762 jsimd_quantize_float_sse(coef_block, divisors, workspace);
763 else if (simd_support & JSIMD_3DNOW)
764 jsimd_quantize_float_3dnow(coef_block, divisors, workspace);
765}
766
767GLOBAL(int)
768jsimd_can_idct_2x2 (void)
769{
770 init_simd();
771
772 /* The code is optimised for these values only */
773 if (DCTSIZE != 8)
774 return 0;
775 if (sizeof(JCOEF) != 2)
776 return 0;
777 if (BITS_IN_JSAMPLE != 8)
778 return 0;
779 if (sizeof(JDIMENSION) != 4)
780 return 0;
781 if (sizeof(ISLOW_MULT_TYPE) != 2)
782 return 0;
783
784 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
785 return 1;
786 if (simd_support & JSIMD_MMX)
787 return 1;
788
789 return 0;
790}
791
792GLOBAL(int)
793jsimd_can_idct_4x4 (void)
794{
795 init_simd();
796
797 /* The code is optimised for these values only */
798 if (DCTSIZE != 8)
799 return 0;
800 if (sizeof(JCOEF) != 2)
801 return 0;
802 if (BITS_IN_JSAMPLE != 8)
803 return 0;
804 if (sizeof(JDIMENSION) != 4)
805 return 0;
806 if (sizeof(ISLOW_MULT_TYPE) != 2)
807 return 0;
808
809 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
810 return 1;
811 if (simd_support & JSIMD_MMX)
812 return 1;
813
814 return 0;
815}
816
817GLOBAL(void)
818jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
819 JCOEFPTR coef_block, JSAMPARRAY output_buf,
820 JDIMENSION output_col)
821{
822 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
823 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
824 else if (simd_support & JSIMD_MMX)
825 jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col);
826}
827
828GLOBAL(void)
829jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
830 JCOEFPTR coef_block, JSAMPARRAY output_buf,
831 JDIMENSION output_col)
832{
833 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
834 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
835 else if (simd_support & JSIMD_MMX)
836 jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col);
837}
838
839GLOBAL(int)
840jsimd_can_idct_islow (void)
841{
842 init_simd();
843
844 /* The code is optimised for these values only */
845 if (DCTSIZE != 8)
846 return 0;
847 if (sizeof(JCOEF) != 2)
848 return 0;
849 if (BITS_IN_JSAMPLE != 8)
850 return 0;
851 if (sizeof(JDIMENSION) != 4)
852 return 0;
853 if (sizeof(ISLOW_MULT_TYPE) != 2)
854 return 0;
855
856 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
857 return 1;
858 if (simd_support & JSIMD_MMX)
859 return 1;
860
861 return 0;
862}
863
864GLOBAL(int)
865jsimd_can_idct_ifast (void)
866{
867 init_simd();
868
869 /* The code is optimised for these values only */
870 if (DCTSIZE != 8)
871 return 0;
872 if (sizeof(JCOEF) != 2)
873 return 0;
874 if (BITS_IN_JSAMPLE != 8)
875 return 0;
876 if (sizeof(JDIMENSION) != 4)
877 return 0;
878 if (sizeof(IFAST_MULT_TYPE) != 2)
879 return 0;
880 if (IFAST_SCALE_BITS != 2)
881 return 0;
882
883 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
884 return 1;
885 if (simd_support & JSIMD_MMX)
886 return 1;
887
888 return 0;
889}
890
891GLOBAL(int)
892jsimd_can_idct_float (void)
893{
894 init_simd();
895
896 if (DCTSIZE != 8)
897 return 0;
898 if (sizeof(JCOEF) != 2)
899 return 0;
900 if (BITS_IN_JSAMPLE != 8)
901 return 0;
902 if (sizeof(JDIMENSION) != 4)
903 return 0;
904 if (sizeof(FAST_FLOAT) != 4)
905 return 0;
906 if (sizeof(FLOAT_MULT_TYPE) != 4)
907 return 0;
908
909 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
910 return 1;
911 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
912 return 1;
913 if (simd_support & JSIMD_3DNOW)
914 return 1;
915
916 return 0;
917}
918
919GLOBAL(void)
920jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
921 JCOEFPTR coef_block, JSAMPARRAY output_buf,
922 JDIMENSION output_col)
923{
924 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
925 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, output_col);
926 else if (simd_support & JSIMD_MMX)
927 jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf, output_col);
928}
929
930GLOBAL(void)
931jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
932 JCOEFPTR coef_block, JSAMPARRAY output_buf,
933 JDIMENSION output_col)
934{
935 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
936 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, output_col);
937 else if (simd_support & JSIMD_MMX)
938 jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf, output_col);
939}
940
941GLOBAL(void)
942jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
943 JCOEFPTR coef_block, JSAMPARRAY output_buf,
944 JDIMENSION output_col)
945{
946 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
947 jsimd_idct_float_sse2(compptr->dct_table, coef_block,
948 output_buf, output_col);
949 else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
950 jsimd_idct_float_sse(compptr->dct_table, coef_block,
951 output_buf, output_col);
952 else if (simd_support & JSIMD_3DNOW)
953 jsimd_idct_float_3dnow(compptr->dct_table, coef_block,
954 output_buf, output_col);
955}
956