blob: f77c5ef0deaad64f30b68a096027503dc54375b0 [file] [log] [blame]
hbono@chromium.orgf0c4f332010-11-01 05:14:55 +00001/*
2 * jsimd_i386.c
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
hbono@chromium.org98626972011-08-03 03:13:08 +00005 * Copyright 2009-2011 D. R. Commander
hbono@chromium.orgf0c4f332010-11-01 05:14:55 +00006 *
7 * Based on the x86 SIMD extension for IJG JPEG library,
8 * Copyright (C) 1999-2006, MIYASAKA Masaru.
hbono@chromium.org98626972011-08-03 03:13:08 +00009 * For conditions of distribution and use, see copyright notice in jsimdext.inc
hbono@chromium.orgf0c4f332010-11-01 05:14:55 +000010 *
11 * This file contains the interface between the "normal" portions
12 * of the library and the SIMD implementations when running on a
13 * 32-bit x86 architecture.
14 */
15
16#define JPEG_INTERNALS
17#include "../jinclude.h"
18#include "../jpeglib.h"
19#include "../jsimd.h"
20#include "../jdct.h"
21#include "../jsimddct.h"
22#include "jsimd.h"
23
24/*
25 * In the PIC cases, we have no guarantee that constants will keep
26 * their alignment. This macro allows us to verify it at runtime.
27 */
28#define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0)
29
30#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
31
32static unsigned int simd_support = ~0;
33
34/*
35 * Check what SIMD accelerations are supported.
36 *
37 * FIXME: This code is racy under a multi-threaded environment.
38 */
39LOCAL(void)
40init_simd (void)
41{
42 char *env = NULL;
43
44 if (simd_support != ~0)
45 return;
46
47 simd_support = jpeg_simd_cpu_support();
48
49 /* Force different settings through environment variables */
50 env = getenv("JSIMD_FORCEMMX");
51 if ((env != NULL) && (strcmp(env, "1") == 0))
52 simd_support &= JSIMD_MMX;
53 env = getenv("JSIMD_FORCE3DNOW");
54 if ((env != NULL) && (strcmp(env, "1") == 0))
55 simd_support &= JSIMD_3DNOW|JSIMD_MMX;
56 env = getenv("JSIMD_FORCESSE");
57 if ((env != NULL) && (strcmp(env, "1") == 0))
58 simd_support &= JSIMD_SSE|JSIMD_MMX;
59 env = getenv("JSIMD_FORCESSE2");
60 if ((env != NULL) && (strcmp(env, "1") == 0))
61 simd_support &= JSIMD_SSE2;
62}
63
64GLOBAL(int)
65jsimd_can_rgb_ycc (void)
66{
67 init_simd();
68
69 /* The code is optimised for these values only */
70 if (BITS_IN_JSAMPLE != 8)
71 return 0;
72 if (sizeof(JDIMENSION) != 4)
73 return 0;
74 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
75 return 0;
76
77 if ((simd_support & JSIMD_SSE2) &&
78 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
79 return 1;
80 if (simd_support & JSIMD_MMX)
81 return 1;
82
83 return 0;
84}
85
86GLOBAL(int)
hbono@chromium.org98626972011-08-03 03:13:08 +000087jsimd_can_rgb_gray (void)
88{
89 init_simd();
90
91 /* The code is optimised for these values only */
92 if (BITS_IN_JSAMPLE != 8)
93 return 0;
94 if (sizeof(JDIMENSION) != 4)
95 return 0;
96 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
97 return 0;
98
99 if ((simd_support & JSIMD_SSE2) &&
100 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
101 return 1;
102 if (simd_support & JSIMD_MMX)
103 return 1;
104
105 return 0;
106}
107
108GLOBAL(int)
hbono@chromium.orgf0c4f332010-11-01 05:14:55 +0000109jsimd_can_ycc_rgb (void)
110{
111 init_simd();
112
113 /* The code is optimised for these values only */
114 if (BITS_IN_JSAMPLE != 8)
115 return 0;
116 if (sizeof(JDIMENSION) != 4)
117 return 0;
118 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
119 return 0;
120
121 if ((simd_support & JSIMD_SSE2) &&
122 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
123 return 1;
124 if (simd_support & JSIMD_MMX)
125 return 1;
126
127 return 0;
128}
129
130GLOBAL(void)
131jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
132 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
133 JDIMENSION output_row, int num_rows)
134{
135 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
136 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
137
138 switch(cinfo->in_color_space)
139 {
140 case JCS_EXT_RGB:
141 sse2fct=jsimd_extrgb_ycc_convert_sse2;
142 mmxfct=jsimd_extrgb_ycc_convert_mmx;
143 break;
144 case JCS_EXT_RGBX:
145 sse2fct=jsimd_extrgbx_ycc_convert_sse2;
146 mmxfct=jsimd_extrgbx_ycc_convert_mmx;
147 break;
148 case JCS_EXT_BGR:
149 sse2fct=jsimd_extbgr_ycc_convert_sse2;
150 mmxfct=jsimd_extbgr_ycc_convert_mmx;
151 break;
152 case JCS_EXT_BGRX:
153 sse2fct=jsimd_extbgrx_ycc_convert_sse2;
154 mmxfct=jsimd_extbgrx_ycc_convert_mmx;
155 break;
156 case JCS_EXT_XBGR:
157 sse2fct=jsimd_extxbgr_ycc_convert_sse2;
158 mmxfct=jsimd_extxbgr_ycc_convert_mmx;
159 break;
160 case JCS_EXT_XRGB:
161 sse2fct=jsimd_extxrgb_ycc_convert_sse2;
162 mmxfct=jsimd_extxrgb_ycc_convert_mmx;
163 break;
164 default:
165 sse2fct=jsimd_rgb_ycc_convert_sse2;
166 mmxfct=jsimd_rgb_ycc_convert_mmx;
167 break;
168 }
169
170 if ((simd_support & JSIMD_SSE2) &&
171 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
172 sse2fct(cinfo->image_width, input_buf,
173 output_buf, output_row, num_rows);
174 else if (simd_support & JSIMD_MMX)
175 mmxfct(cinfo->image_width, input_buf,
176 output_buf, output_row, num_rows);
177}
178
179GLOBAL(void)
hbono@chromium.org98626972011-08-03 03:13:08 +0000180jsimd_rgb_gray_convert (j_compress_ptr cinfo,
181 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
182 JDIMENSION output_row, int num_rows)
183{
184 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
185 void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
186
187 switch(cinfo->in_color_space)
188 {
189 case JCS_EXT_RGB:
190 sse2fct=jsimd_extrgb_gray_convert_sse2;
191 mmxfct=jsimd_extrgb_gray_convert_mmx;
192 break;
193 case JCS_EXT_RGBX:
194 sse2fct=jsimd_extrgbx_gray_convert_sse2;
195 mmxfct=jsimd_extrgbx_gray_convert_mmx;
196 break;
197 case JCS_EXT_BGR:
198 sse2fct=jsimd_extbgr_gray_convert_sse2;
199 mmxfct=jsimd_extbgr_gray_convert_mmx;
200 break;
201 case JCS_EXT_BGRX:
202 sse2fct=jsimd_extbgrx_gray_convert_sse2;
203 mmxfct=jsimd_extbgrx_gray_convert_mmx;
204 break;
205 case JCS_EXT_XBGR:
206 sse2fct=jsimd_extxbgr_gray_convert_sse2;
207 mmxfct=jsimd_extxbgr_gray_convert_mmx;
208 break;
209 case JCS_EXT_XRGB:
210 sse2fct=jsimd_extxrgb_gray_convert_sse2;
211 mmxfct=jsimd_extxrgb_gray_convert_mmx;
212 break;
213 default:
214 sse2fct=jsimd_rgb_gray_convert_sse2;
215 mmxfct=jsimd_rgb_gray_convert_mmx;
216 break;
217 }
218
219 if ((simd_support & JSIMD_SSE2) &&
220 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
221 sse2fct(cinfo->image_width, input_buf,
222 output_buf, output_row, num_rows);
223 else if (simd_support & JSIMD_MMX)
224 mmxfct(cinfo->image_width, input_buf,
225 output_buf, output_row, num_rows);
226}
227
228GLOBAL(void)
hbono@chromium.orgf0c4f332010-11-01 05:14:55 +0000229jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
230 JSAMPIMAGE input_buf, JDIMENSION input_row,
231 JSAMPARRAY output_buf, int num_rows)
232{
233 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
234 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
235
236 switch(cinfo->out_color_space)
237 {
238 case JCS_EXT_RGB:
239 sse2fct=jsimd_ycc_extrgb_convert_sse2;
240 mmxfct=jsimd_ycc_extrgb_convert_mmx;
241 break;
242 case JCS_EXT_RGBX:
243 sse2fct=jsimd_ycc_extrgbx_convert_sse2;
244 mmxfct=jsimd_ycc_extrgbx_convert_mmx;
245 break;
246 case JCS_EXT_BGR:
247 sse2fct=jsimd_ycc_extbgr_convert_sse2;
248 mmxfct=jsimd_ycc_extbgr_convert_mmx;
249 break;
250 case JCS_EXT_BGRX:
251 sse2fct=jsimd_ycc_extbgrx_convert_sse2;
252 mmxfct=jsimd_ycc_extbgrx_convert_mmx;
253 break;
254 case JCS_EXT_XBGR:
255 sse2fct=jsimd_ycc_extxbgr_convert_sse2;
256 mmxfct=jsimd_ycc_extxbgr_convert_mmx;
257 break;
258 case JCS_EXT_XRGB:
259 sse2fct=jsimd_ycc_extxrgb_convert_sse2;
260 mmxfct=jsimd_ycc_extxrgb_convert_mmx;
261 break;
262 default:
263 sse2fct=jsimd_ycc_rgb_convert_sse2;
264 mmxfct=jsimd_ycc_rgb_convert_mmx;
265 break;
266 }
267
268 if ((simd_support & JSIMD_SSE2) &&
269 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
270 sse2fct(cinfo->output_width, input_buf,
271 input_row, output_buf, num_rows);
272 else if (simd_support & JSIMD_MMX)
273 mmxfct(cinfo->output_width, input_buf,
274 input_row, output_buf, num_rows);
275}
276
277GLOBAL(int)
278jsimd_can_h2v2_downsample (void)
279{
280 init_simd();
281
282 /* The code is optimised for these values only */
283 if (BITS_IN_JSAMPLE != 8)
284 return 0;
285 if (sizeof(JDIMENSION) != 4)
286 return 0;
287
288 if (simd_support & JSIMD_SSE2)
289 return 1;
290 if (simd_support & JSIMD_MMX)
291 return 1;
292
293 return 0;
294}
295
296GLOBAL(int)
297jsimd_can_h2v1_downsample (void)
298{
299 init_simd();
300
301 /* The code is optimised for these values only */
302 if (BITS_IN_JSAMPLE != 8)
303 return 0;
304 if (sizeof(JDIMENSION) != 4)
305 return 0;
306
307 if (simd_support & JSIMD_SSE2)
308 return 1;
309 if (simd_support & JSIMD_MMX)
310 return 1;
311
312 return 0;
313}
314
315GLOBAL(void)
316jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
317 JSAMPARRAY input_data, JSAMPARRAY output_data)
318{
319 if (simd_support & JSIMD_SSE2)
320 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
321 compptr->v_samp_factor, compptr->width_in_blocks,
322 input_data, output_data);
323 else if (simd_support & JSIMD_MMX)
324 jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
325 compptr->v_samp_factor, compptr->width_in_blocks,
326 input_data, output_data);
327}
328
329GLOBAL(void)
330jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
331 JSAMPARRAY input_data, JSAMPARRAY output_data)
332{
333 if (simd_support & JSIMD_SSE2)
334 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
335 compptr->v_samp_factor, compptr->width_in_blocks,
336 input_data, output_data);
337 else if (simd_support & JSIMD_MMX)
338 jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
339 compptr->v_samp_factor, compptr->width_in_blocks,
340 input_data, output_data);
341}
342
343GLOBAL(int)
344jsimd_can_h2v2_upsample (void)
345{
346 init_simd();
347
348 /* The code is optimised for these values only */
349 if (BITS_IN_JSAMPLE != 8)
350 return 0;
351 if (sizeof(JDIMENSION) != 4)
352 return 0;
353
354 if (simd_support & JSIMD_SSE2)
355 return 1;
356 if (simd_support & JSIMD_MMX)
357 return 1;
358
359 return 0;
360}
361
362GLOBAL(int)
363jsimd_can_h2v1_upsample (void)
364{
365 init_simd();
366
367 /* The code is optimised for these values only */
368 if (BITS_IN_JSAMPLE != 8)
369 return 0;
370 if (sizeof(JDIMENSION) != 4)
371 return 0;
372
373 if (simd_support & JSIMD_SSE2)
374 return 1;
375 if (simd_support & JSIMD_MMX)
376 return 1;
377
378 return 0;
379}
380
381GLOBAL(void)
382jsimd_h2v2_upsample (j_decompress_ptr cinfo,
383 jpeg_component_info * compptr,
384 JSAMPARRAY input_data,
385 JSAMPARRAY * output_data_ptr)
386{
387 if (simd_support & JSIMD_SSE2)
388 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor,
389 cinfo->output_width, input_data, output_data_ptr);
390 else if (simd_support & JSIMD_MMX)
391 jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor,
392 cinfo->output_width, input_data, output_data_ptr);
393}
394
395GLOBAL(void)
396jsimd_h2v1_upsample (j_decompress_ptr cinfo,
397 jpeg_component_info * compptr,
398 JSAMPARRAY input_data,
399 JSAMPARRAY * output_data_ptr)
400{
401 if (simd_support & JSIMD_SSE2)
402 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor,
403 cinfo->output_width, input_data, output_data_ptr);
404 else if (simd_support & JSIMD_MMX)
405 jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor,
406 cinfo->output_width, input_data, output_data_ptr);
407}
408
409GLOBAL(int)
410jsimd_can_h2v2_fancy_upsample (void)
411{
412 init_simd();
413
414 /* The code is optimised for these values only */
415 if (BITS_IN_JSAMPLE != 8)
416 return 0;
417 if (sizeof(JDIMENSION) != 4)
418 return 0;
419
420 if ((simd_support & JSIMD_SSE2) &&
421 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
422 return 1;
423 if (simd_support & JSIMD_MMX)
424 return 1;
425
426 return 0;
427}
428
429GLOBAL(int)
430jsimd_can_h2v1_fancy_upsample (void)
431{
432 init_simd();
433
434 /* The code is optimised for these values only */
435 if (BITS_IN_JSAMPLE != 8)
436 return 0;
437 if (sizeof(JDIMENSION) != 4)
438 return 0;
439
440 if ((simd_support & JSIMD_SSE2) &&
441 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
442 return 1;
443 if (simd_support & JSIMD_MMX)
444 return 1;
445
446 return 0;
447}
448
449GLOBAL(void)
450jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
451 jpeg_component_info * compptr,
452 JSAMPARRAY input_data,
453 JSAMPARRAY * output_data_ptr)
454{
455 if ((simd_support & JSIMD_SSE2) &&
456 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
457 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
458 compptr->downsampled_width, input_data, output_data_ptr);
459 else if (simd_support & JSIMD_MMX)
460 jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
461 compptr->downsampled_width, input_data, output_data_ptr);
462}
463
464GLOBAL(void)
465jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
466 jpeg_component_info * compptr,
467 JSAMPARRAY input_data,
468 JSAMPARRAY * output_data_ptr)
469{
470 if ((simd_support & JSIMD_SSE2) &&
471 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
472 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
473 compptr->downsampled_width, input_data, output_data_ptr);
474 else if (simd_support & JSIMD_MMX)
475 jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor,
476 compptr->downsampled_width, input_data, output_data_ptr);
477}
478
479GLOBAL(int)
480jsimd_can_h2v2_merged_upsample (void)
481{
482 init_simd();
483
484 /* The code is optimised for these values only */
485 if (BITS_IN_JSAMPLE != 8)
486 return 0;
487 if (sizeof(JDIMENSION) != 4)
488 return 0;
489
490 if ((simd_support & JSIMD_SSE2) &&
491 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
492 return 1;
493 if (simd_support & JSIMD_MMX)
494 return 1;
495
496 return 0;
497}
498
499GLOBAL(int)
500jsimd_can_h2v1_merged_upsample (void)
501{
502 init_simd();
503
504 /* The code is optimised for these values only */
505 if (BITS_IN_JSAMPLE != 8)
506 return 0;
507 if (sizeof(JDIMENSION) != 4)
508 return 0;
509
510 if ((simd_support & JSIMD_SSE2) &&
511 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
512 return 1;
513 if (simd_support & JSIMD_MMX)
514 return 1;
515
516 return 0;
517}
518
519GLOBAL(void)
520jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
521 JSAMPIMAGE input_buf,
522 JDIMENSION in_row_group_ctr,
523 JSAMPARRAY output_buf)
524{
525 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
526 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
527
528 switch(cinfo->out_color_space)
529 {
530 case JCS_EXT_RGB:
531 sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
532 mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx;
533 break;
534 case JCS_EXT_RGBX:
535 sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
536 mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx;
537 break;
538 case JCS_EXT_BGR:
539 sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2;
540 mmxfct=jsimd_h2v2_extbgr_merged_upsample_mmx;
541 break;
542 case JCS_EXT_BGRX:
543 sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
544 mmxfct=jsimd_h2v2_extbgrx_merged_upsample_mmx;
545 break;
546 case JCS_EXT_XBGR:
547 sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
548 mmxfct=jsimd_h2v2_extxbgr_merged_upsample_mmx;
549 break;
550 case JCS_EXT_XRGB:
551 sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
552 mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx;
553 break;
554 default:
555 sse2fct=jsimd_h2v2_merged_upsample_sse2;
556 mmxfct=jsimd_h2v2_merged_upsample_mmx;
557 break;
558 }
559
560 if ((simd_support & JSIMD_SSE2) &&
561 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
562 sse2fct(cinfo->output_width, input_buf,
563 in_row_group_ctr, output_buf);
564 else if (simd_support & JSIMD_MMX)
565 mmxfct(cinfo->output_width, input_buf,
566 in_row_group_ctr, output_buf);
567}
568
569GLOBAL(void)
570jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
571 JSAMPIMAGE input_buf,
572 JDIMENSION in_row_group_ctr,
573 JSAMPARRAY output_buf)
574{
575 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
576 void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
577
578 switch(cinfo->out_color_space)
579 {
580 case JCS_EXT_RGB:
581 sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
582 mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx;
583 break;
584 case JCS_EXT_RGBX:
585 sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
586 mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx;
587 break;
588 case JCS_EXT_BGR:
589 sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2;
590 mmxfct=jsimd_h2v1_extbgr_merged_upsample_mmx;
591 break;
592 case JCS_EXT_BGRX:
593 sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
594 mmxfct=jsimd_h2v1_extbgrx_merged_upsample_mmx;
595 break;
596 case JCS_EXT_XBGR:
597 sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
598 mmxfct=jsimd_h2v1_extxbgr_merged_upsample_mmx;
599 break;
600 case JCS_EXT_XRGB:
601 sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
602 mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx;
603 break;
604 default:
605 sse2fct=jsimd_h2v1_merged_upsample_sse2;
606 mmxfct=jsimd_h2v1_merged_upsample_mmx;
607 break;
608 }
609
610 if ((simd_support & JSIMD_SSE2) &&
611 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
612 sse2fct(cinfo->output_width, input_buf,
613 in_row_group_ctr, output_buf);
614 else if (simd_support & JSIMD_MMX)
615 mmxfct(cinfo->output_width, input_buf,
616 in_row_group_ctr, output_buf);
617}
618
619GLOBAL(int)
620jsimd_can_convsamp (void)
621{
622 init_simd();
623
624 /* The code is optimised for these values only */
625 if (DCTSIZE != 8)
626 return 0;
627 if (BITS_IN_JSAMPLE != 8)
628 return 0;
629 if (sizeof(JDIMENSION) != 4)
630 return 0;
631 if (sizeof(DCTELEM) != 2)
632 return 0;
633
634 if (simd_support & JSIMD_SSE2)
635 return 1;
636 if (simd_support & JSIMD_MMX)
637 return 1;
638
639 return 0;
640}
641
642GLOBAL(int)
643jsimd_can_convsamp_float (void)
644{
645 init_simd();
646
647 /* The code is optimised for these values only */
648 if (DCTSIZE != 8)
649 return 0;
650 if (BITS_IN_JSAMPLE != 8)
651 return 0;
652 if (sizeof(JDIMENSION) != 4)
653 return 0;
654 if (sizeof(FAST_FLOAT) != 4)
655 return 0;
656
657 if (simd_support & JSIMD_SSE2)
658 return 1;
659 if (simd_support & JSIMD_SSE)
660 return 1;
661 if (simd_support & JSIMD_3DNOW)
662 return 1;
663
664 return 0;
665}
666
667GLOBAL(void)
668jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
669 DCTELEM * workspace)
670{
671 if (simd_support & JSIMD_SSE2)
672 jsimd_convsamp_sse2(sample_data, start_col, workspace);
673 else if (simd_support & JSIMD_MMX)
674 jsimd_convsamp_mmx(sample_data, start_col, workspace);
675}
676
677GLOBAL(void)
678jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
679 FAST_FLOAT * workspace)
680{
681 if (simd_support & JSIMD_SSE2)
682 jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
683 else if (simd_support & JSIMD_SSE)
684 jsimd_convsamp_float_sse(sample_data, start_col, workspace);
685 else if (simd_support & JSIMD_3DNOW)
686 jsimd_convsamp_float_3dnow(sample_data, start_col, workspace);
687}
688
689GLOBAL(int)
690jsimd_can_fdct_islow (void)
691{
692 init_simd();
693
694 /* The code is optimised for these values only */
695 if (DCTSIZE != 8)
696 return 0;
697 if (sizeof(DCTELEM) != 2)
698 return 0;
699
700 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
701 return 1;
702 if (simd_support & JSIMD_MMX)
703 return 1;
704
705 return 0;
706}
707
708GLOBAL(int)
709jsimd_can_fdct_ifast (void)
710{
711 init_simd();
712
713 /* The code is optimised for these values only */
714 if (DCTSIZE != 8)
715 return 0;
716 if (sizeof(DCTELEM) != 2)
717 return 0;
718
719 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
720 return 1;
721 if (simd_support & JSIMD_MMX)
722 return 1;
723
724 return 0;
725}
726
727GLOBAL(int)
728jsimd_can_fdct_float (void)
729{
730 init_simd();
731
732 /* The code is optimised for these values only */
733 if (DCTSIZE != 8)
734 return 0;
735 if (sizeof(FAST_FLOAT) != 4)
736 return 0;
737
738 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
739 return 1;
740 if (simd_support & JSIMD_3DNOW)
741 return 1;
742
743 return 0;
744}
745
746GLOBAL(void)
747jsimd_fdct_islow (DCTELEM * data)
748{
749 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
750 jsimd_fdct_islow_sse2(data);
751 else if (simd_support & JSIMD_MMX)
752 jsimd_fdct_islow_mmx(data);
753}
754
755GLOBAL(void)
756jsimd_fdct_ifast (DCTELEM * data)
757{
758 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
759 jsimd_fdct_ifast_sse2(data);
760 else if (simd_support & JSIMD_MMX)
761 jsimd_fdct_ifast_mmx(data);
762}
763
764GLOBAL(void)
765jsimd_fdct_float (FAST_FLOAT * data)
766{
767 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
768 jsimd_fdct_float_sse(data);
769 else if (simd_support & JSIMD_3DNOW)
770 jsimd_fdct_float_3dnow(data);
771}
772
773GLOBAL(int)
774jsimd_can_quantize (void)
775{
776 init_simd();
777
778 /* The code is optimised for these values only */
779 if (DCTSIZE != 8)
780 return 0;
781 if (sizeof(JCOEF) != 2)
782 return 0;
783 if (sizeof(DCTELEM) != 2)
784 return 0;
785
786 if (simd_support & JSIMD_SSE2)
787 return 1;
788 if (simd_support & JSIMD_MMX)
789 return 1;
790
791 return 0;
792}
793
794GLOBAL(int)
795jsimd_can_quantize_float (void)
796{
797 init_simd();
798
799 /* The code is optimised for these values only */
800 if (DCTSIZE != 8)
801 return 0;
802 if (sizeof(JCOEF) != 2)
803 return 0;
804 if (sizeof(FAST_FLOAT) != 4)
805 return 0;
806
807 if (simd_support & JSIMD_SSE2)
808 return 1;
809 if (simd_support & JSIMD_SSE)
810 return 1;
811 if (simd_support & JSIMD_3DNOW)
812 return 1;
813
814 return 0;
815}
816
817GLOBAL(void)
818jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
819 DCTELEM * workspace)
820{
821 if (simd_support & JSIMD_SSE2)
822 jsimd_quantize_sse2(coef_block, divisors, workspace);
823 else if (simd_support & JSIMD_MMX)
824 jsimd_quantize_mmx(coef_block, divisors, workspace);
825}
826
827GLOBAL(void)
828jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
829 FAST_FLOAT * workspace)
830{
831 if (simd_support & JSIMD_SSE2)
832 jsimd_quantize_float_sse2(coef_block, divisors, workspace);
833 else if (simd_support & JSIMD_SSE)
834 jsimd_quantize_float_sse(coef_block, divisors, workspace);
835 else if (simd_support & JSIMD_3DNOW)
836 jsimd_quantize_float_3dnow(coef_block, divisors, workspace);
837}
838
839GLOBAL(int)
840jsimd_can_idct_2x2 (void)
841{
842 init_simd();
843
844 /* The code is optimised for these values only */
845 if (DCTSIZE != 8)
846 return 0;
847 if (sizeof(JCOEF) != 2)
848 return 0;
849 if (BITS_IN_JSAMPLE != 8)
850 return 0;
851 if (sizeof(JDIMENSION) != 4)
852 return 0;
853 if (sizeof(ISLOW_MULT_TYPE) != 2)
854 return 0;
855
856 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
857 return 1;
858 if (simd_support & JSIMD_MMX)
859 return 1;
860
861 return 0;
862}
863
864GLOBAL(int)
865jsimd_can_idct_4x4 (void)
866{
867 init_simd();
868
869 /* The code is optimised for these values only */
870 if (DCTSIZE != 8)
871 return 0;
872 if (sizeof(JCOEF) != 2)
873 return 0;
874 if (BITS_IN_JSAMPLE != 8)
875 return 0;
876 if (sizeof(JDIMENSION) != 4)
877 return 0;
878 if (sizeof(ISLOW_MULT_TYPE) != 2)
879 return 0;
880
881 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
882 return 1;
883 if (simd_support & JSIMD_MMX)
884 return 1;
885
886 return 0;
887}
888
889GLOBAL(void)
890jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
891 JCOEFPTR coef_block, JSAMPARRAY output_buf,
892 JDIMENSION output_col)
893{
894 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
895 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
896 else if (simd_support & JSIMD_MMX)
897 jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col);
898}
899
900GLOBAL(void)
901jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
902 JCOEFPTR coef_block, JSAMPARRAY output_buf,
903 JDIMENSION output_col)
904{
905 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
906 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
907 else if (simd_support & JSIMD_MMX)
908 jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col);
909}
910
911GLOBAL(int)
912jsimd_can_idct_islow (void)
913{
914 init_simd();
915
916 /* The code is optimised for these values only */
917 if (DCTSIZE != 8)
918 return 0;
919 if (sizeof(JCOEF) != 2)
920 return 0;
921 if (BITS_IN_JSAMPLE != 8)
922 return 0;
923 if (sizeof(JDIMENSION) != 4)
924 return 0;
925 if (sizeof(ISLOW_MULT_TYPE) != 2)
926 return 0;
927
928 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
929 return 1;
930 if (simd_support & JSIMD_MMX)
931 return 1;
932
933 return 0;
934}
935
936GLOBAL(int)
937jsimd_can_idct_ifast (void)
938{
939 init_simd();
940
941 /* The code is optimised for these values only */
942 if (DCTSIZE != 8)
943 return 0;
944 if (sizeof(JCOEF) != 2)
945 return 0;
946 if (BITS_IN_JSAMPLE != 8)
947 return 0;
948 if (sizeof(JDIMENSION) != 4)
949 return 0;
950 if (sizeof(IFAST_MULT_TYPE) != 2)
951 return 0;
952 if (IFAST_SCALE_BITS != 2)
953 return 0;
954
955 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
956 return 1;
957 if (simd_support & JSIMD_MMX)
958 return 1;
959
960 return 0;
961}
962
963GLOBAL(int)
964jsimd_can_idct_float (void)
965{
966 init_simd();
967
968 if (DCTSIZE != 8)
969 return 0;
970 if (sizeof(JCOEF) != 2)
971 return 0;
972 if (BITS_IN_JSAMPLE != 8)
973 return 0;
974 if (sizeof(JDIMENSION) != 4)
975 return 0;
976 if (sizeof(FAST_FLOAT) != 4)
977 return 0;
978 if (sizeof(FLOAT_MULT_TYPE) != 4)
979 return 0;
980
981 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
982 return 1;
983 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
984 return 1;
985 if (simd_support & JSIMD_3DNOW)
986 return 1;
987
988 return 0;
989}
990
991GLOBAL(void)
992jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
993 JCOEFPTR coef_block, JSAMPARRAY output_buf,
994 JDIMENSION output_col)
995{
996 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
997 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, output_col);
998 else if (simd_support & JSIMD_MMX)
999 jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf, output_col);
1000}
1001
1002GLOBAL(void)
1003jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1004 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1005 JDIMENSION output_col)
1006{
1007 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
1008 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, output_col);
1009 else if (simd_support & JSIMD_MMX)
1010 jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf, output_col);
1011}
1012
1013GLOBAL(void)
1014jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1015 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1016 JDIMENSION output_col)
1017{
1018 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
1019 jsimd_idct_float_sse2(compptr->dct_table, coef_block,
1020 output_buf, output_col);
1021 else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
1022 jsimd_idct_float_sse(compptr->dct_table, coef_block,
1023 output_buf, output_col);
1024 else if (simd_support & JSIMD_3DNOW)
1025 jsimd_idct_float_3dnow(compptr->dct_table, coef_block,
1026 output_buf, output_col);
1027}
1028