blob: 4bff6c2a3b798ebf5e48b8522dd2e6fea6c0df14 [file] [log] [blame]
Pierre Ossmanba82ddf2009-06-29 11:20:42 +00001/*
2 * jsimd_x86_64.c
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright 2009 D. R. Commander
6 *
7 * Based on the x86 SIMD extension for IJG JPEG library,
8 * Copyright (C) 1999-2006, MIYASAKA Masaru.
9 *
10 * This file contains the interface between the "normal" portions
11 * of the library and the SIMD implementations when running on a
12 * x86_64 architecture.
13 */
14
15#define JPEG_INTERNALS
16#include "../jinclude.h"
17#include "../jpeglib.h"
18#include "../jsimd.h"
19#include "../jdct.h"
20#include "../jsimddct.h"
DRCc06073a2010-09-06 17:37:12 +000021#include "jsimd.h"
Pierre Ossmanba82ddf2009-06-29 11:20:42 +000022
23/*
24 * In the PIC cases, we have no guarantee that constants will keep
25 * their alignment. This macro allows us to verify it at runtime.
26 */
DRC04899092010-02-26 23:01:19 +000027#define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0)
Pierre Ossmanba82ddf2009-06-29 11:20:42 +000028
29#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
30
31GLOBAL(int)
32jsimd_can_rgb_ycc (void)
33{
34 /* The code is optimised for these values only */
35 if (BITS_IN_JSAMPLE != 8)
36 return 0;
37 if (sizeof(JDIMENSION) != 4)
38 return 0;
39 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
40 return 0;
41
42 if (!IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
43 return 0;
44
45 return 1;
46}
47
48GLOBAL(int)
49jsimd_can_ycc_rgb (void)
50{
51 /* The code is optimised for these values only */
52 if (BITS_IN_JSAMPLE != 8)
53 return 0;
54 if (sizeof(JDIMENSION) != 4)
55 return 0;
56 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
57 return 0;
58
59 if (!IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
60 return 0;
61
62 return 1;
63}
64
65GLOBAL(void)
66jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
67 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
68 JDIMENSION output_row, int num_rows)
69{
70 void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
71
72 switch(cinfo->in_color_space)
73 {
74 case JCS_EXT_RGB:
75 sse2fct=jsimd_extrgb_ycc_convert_sse2;
76 break;
77 case JCS_EXT_RGBX:
78 sse2fct=jsimd_extrgbx_ycc_convert_sse2;
79 break;
80 case JCS_EXT_BGR:
81 sse2fct=jsimd_extbgr_ycc_convert_sse2;
82 break;
83 case JCS_EXT_BGRX:
84 sse2fct=jsimd_extbgrx_ycc_convert_sse2;
85 break;
86 case JCS_EXT_XBGR:
87 sse2fct=jsimd_extxbgr_ycc_convert_sse2;
88 break;
89 case JCS_EXT_XRGB:
90 sse2fct=jsimd_extxrgb_ycc_convert_sse2;
91 break;
92 default:
93 sse2fct=jsimd_rgb_ycc_convert_sse2;
94 break;
95 }
96
97 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
98}
99
100GLOBAL(void)
101jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
102 JSAMPIMAGE input_buf, JDIMENSION input_row,
103 JSAMPARRAY output_buf, int num_rows)
104{
105 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
106
107 switch(cinfo->out_color_space)
108 {
109 case JCS_EXT_RGB:
110 sse2fct=jsimd_ycc_extrgb_convert_sse2;
111 break;
112 case JCS_EXT_RGBX:
113 sse2fct=jsimd_ycc_extrgbx_convert_sse2;
114 break;
115 case JCS_EXT_BGR:
116 sse2fct=jsimd_ycc_extbgr_convert_sse2;
117 break;
118 case JCS_EXT_BGRX:
119 sse2fct=jsimd_ycc_extbgrx_convert_sse2;
120 break;
121 case JCS_EXT_XBGR:
122 sse2fct=jsimd_ycc_extxbgr_convert_sse2;
123 break;
124 case JCS_EXT_XRGB:
125 sse2fct=jsimd_ycc_extxrgb_convert_sse2;
126 break;
127 default:
128 sse2fct=jsimd_ycc_rgb_convert_sse2;
129 break;
130 }
131
132 sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
133}
134
135GLOBAL(int)
136jsimd_can_h2v2_downsample (void)
137{
138 /* The code is optimised for these values only */
139 if (BITS_IN_JSAMPLE != 8)
140 return 0;
141 if (sizeof(JDIMENSION) != 4)
142 return 0;
143
144 return 1;
145}
146
147GLOBAL(int)
148jsimd_can_h2v1_downsample (void)
149{
150 /* The code is optimised for these values only */
151 if (BITS_IN_JSAMPLE != 8)
152 return 0;
153 if (sizeof(JDIMENSION) != 4)
154 return 0;
155
156 return 1;
157}
158
159GLOBAL(void)
160jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
161 JSAMPARRAY input_data, JSAMPARRAY output_data)
162{
163 jsimd_h2v2_downsample_sse2(cinfo->image_width,
164 cinfo->max_v_samp_factor,
165 compptr->v_samp_factor,
166 compptr->width_in_blocks,
167 input_data, output_data);
168}
169
170GLOBAL(void)
171jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
172 JSAMPARRAY input_data, JSAMPARRAY output_data)
173{
174 jsimd_h2v1_downsample_sse2(cinfo->image_width,
175 cinfo->max_v_samp_factor,
176 compptr->v_samp_factor,
177 compptr->width_in_blocks,
178 input_data, output_data);
179}
180
181GLOBAL(int)
182jsimd_can_h2v2_upsample (void)
183{
184 /* The code is optimised for these values only */
185 if (BITS_IN_JSAMPLE != 8)
186 return 0;
187 if (sizeof(JDIMENSION) != 4)
188 return 0;
189
190 return 1;
191}
192
193GLOBAL(int)
194jsimd_can_h2v1_upsample (void)
195{
196 /* The code is optimised for these values only */
197 if (BITS_IN_JSAMPLE != 8)
198 return 0;
199 if (sizeof(JDIMENSION) != 4)
200 return 0;
201
202 return 1;
203}
204
205GLOBAL(void)
206jsimd_h2v2_upsample (j_decompress_ptr cinfo,
207 jpeg_component_info * compptr,
208 JSAMPARRAY input_data,
209 JSAMPARRAY * output_data_ptr)
210{
211 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor,
212 cinfo->output_width,
213 input_data, output_data_ptr);
214}
215
216GLOBAL(void)
217jsimd_h2v1_upsample (j_decompress_ptr cinfo,
218 jpeg_component_info * compptr,
219 JSAMPARRAY input_data,
220 JSAMPARRAY * output_data_ptr)
221{
222 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor,
223 cinfo->output_width,
224 input_data, output_data_ptr);
225}
226
227GLOBAL(int)
228jsimd_can_h2v2_fancy_upsample (void)
229{
230 /* The code is optimised for these values only */
231 if (BITS_IN_JSAMPLE != 8)
232 return 0;
233 if (sizeof(JDIMENSION) != 4)
234 return 0;
235
236 if (!IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
237 return 0;
238
239 return 1;
240}
241
242GLOBAL(int)
243jsimd_can_h2v1_fancy_upsample (void)
244{
245 /* The code is optimised for these values only */
246 if (BITS_IN_JSAMPLE != 8)
247 return 0;
248 if (sizeof(JDIMENSION) != 4)
249 return 0;
250
251 if (!IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
252 return 0;
253
254 return 1;
255}
256
257GLOBAL(void)
258jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
259 jpeg_component_info * compptr,
260 JSAMPARRAY input_data,
261 JSAMPARRAY * output_data_ptr)
262{
DRC30959712010-08-07 16:06:56 +0000263 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
Pierre Ossmanba82ddf2009-06-29 11:20:42 +0000264 compptr->downsampled_width,
265 input_data, output_data_ptr);
266}
267
268GLOBAL(void)
269jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
270 jpeg_component_info * compptr,
271 JSAMPARRAY input_data,
272 JSAMPARRAY * output_data_ptr)
273{
274 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
275 compptr->downsampled_width,
276 input_data, output_data_ptr);
277}
278
279GLOBAL(int)
280jsimd_can_h2v2_merged_upsample (void)
281{
282 /* The code is optimised for these values only */
283 if (BITS_IN_JSAMPLE != 8)
284 return 0;
285 if (sizeof(JDIMENSION) != 4)
286 return 0;
287
288 if (!IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
289 return 0;
290
291 return 1;
292}
293
294GLOBAL(int)
295jsimd_can_h2v1_merged_upsample (void)
296{
297 /* The code is optimised for these values only */
298 if (BITS_IN_JSAMPLE != 8)
299 return 0;
300 if (sizeof(JDIMENSION) != 4)
301 return 0;
302
303 if (!IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
304 return 0;
305
306 return 1;
307}
308
309GLOBAL(void)
310jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
311 JSAMPIMAGE input_buf,
312 JDIMENSION in_row_group_ctr,
313 JSAMPARRAY output_buf)
314{
315 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
316
317 switch(cinfo->out_color_space)
318 {
319 case JCS_EXT_RGB:
320 sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
321 break;
322 case JCS_EXT_RGBX:
323 sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
324 break;
325 case JCS_EXT_BGR:
326 sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2;
327 break;
328 case JCS_EXT_BGRX:
329 sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
330 break;
331 case JCS_EXT_XBGR:
332 sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
333 break;
334 case JCS_EXT_XRGB:
335 sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
336 break;
337 default:
338 sse2fct=jsimd_h2v2_merged_upsample_sse2;
339 break;
340 }
341
342 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
343}
344
345GLOBAL(void)
346jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
347 JSAMPIMAGE input_buf,
348 JDIMENSION in_row_group_ctr,
349 JSAMPARRAY output_buf)
350{
351 void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
352
353 switch(cinfo->out_color_space)
354 {
355 case JCS_EXT_RGB:
356 sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
357 break;
358 case JCS_EXT_RGBX:
359 sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
360 break;
361 case JCS_EXT_BGR:
362 sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2;
363 break;
364 case JCS_EXT_BGRX:
365 sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
366 break;
367 case JCS_EXT_XBGR:
368 sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
369 break;
370 case JCS_EXT_XRGB:
371 sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
372 break;
373 default:
374 sse2fct=jsimd_h2v1_merged_upsample_sse2;
375 break;
376 }
377
378 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
379}
380
381GLOBAL(int)
382jsimd_can_convsamp (void)
383{
384 /* The code is optimised for these values only */
385 if (DCTSIZE != 8)
386 return 0;
387 if (BITS_IN_JSAMPLE != 8)
388 return 0;
389 if (sizeof(JDIMENSION) != 4)
390 return 0;
391 if (sizeof(DCTELEM) != 2)
392 return 0;
393
394 return 1;
395}
396
397GLOBAL(int)
398jsimd_can_convsamp_float (void)
399{
400 /* The code is optimised for these values only */
401 if (DCTSIZE != 8)
402 return 0;
403 if (BITS_IN_JSAMPLE != 8)
404 return 0;
405 if (sizeof(JDIMENSION) != 4)
406 return 0;
407 if (sizeof(FAST_FLOAT) != 4)
408 return 0;
409
410 return 1;
411}
412
413GLOBAL(void)
414jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
415 DCTELEM * workspace)
416{
417 jsimd_convsamp_sse2(sample_data, start_col, workspace);
418}
419
420GLOBAL(void)
421jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
422 FAST_FLOAT * workspace)
423{
424 jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
425}
426
427GLOBAL(int)
428jsimd_can_fdct_islow (void)
429{
430 /* The code is optimised for these values only */
431 if (DCTSIZE != 8)
432 return 0;
433 if (sizeof(DCTELEM) != 2)
434 return 0;
435
436 if (!IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
437 return 0;
438
439 return 1;
440}
441
442GLOBAL(int)
443jsimd_can_fdct_ifast (void)
444{
445 /* The code is optimised for these values only */
446 if (DCTSIZE != 8)
447 return 0;
448 if (sizeof(DCTELEM) != 2)
449 return 0;
450
451 if (!IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
452 return 0;
453
454 return 1;
455}
456
457GLOBAL(int)
458jsimd_can_fdct_float (void)
459{
460 /* The code is optimised for these values only */
461 if (DCTSIZE != 8)
462 return 0;
463 if (sizeof(FAST_FLOAT) != 4)
464 return 0;
465
466 if (!IS_ALIGNED_SSE(jconst_fdct_float_sse))
467 return 0;
468
469 return 1;
470}
471
472GLOBAL(void)
473jsimd_fdct_islow (DCTELEM * data)
474{
475 jsimd_fdct_islow_sse2(data);
476}
477
478GLOBAL(void)
479jsimd_fdct_ifast (DCTELEM * data)
480{
481 jsimd_fdct_ifast_sse2(data);
482}
483
484GLOBAL(void)
485jsimd_fdct_float (FAST_FLOAT * data)
486{
487 jsimd_fdct_float_sse(data);
488}
489
490GLOBAL(int)
491jsimd_can_quantize (void)
492{
493 /* The code is optimised for these values only */
494 if (DCTSIZE != 8)
495 return 0;
496 if (sizeof(JCOEF) != 2)
497 return 0;
498 if (sizeof(DCTELEM) != 2)
499 return 0;
500
501 return 1;
502}
503
504GLOBAL(int)
505jsimd_can_quantize_float (void)
506{
507 /* The code is optimised for these values only */
508 if (DCTSIZE != 8)
509 return 0;
510 if (sizeof(JCOEF) != 2)
511 return 0;
512 if (sizeof(FAST_FLOAT) != 4)
513 return 0;
514
515 return 1;
516}
517
518GLOBAL(void)
519jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
520 DCTELEM * workspace)
521{
522 jsimd_quantize_sse2(coef_block, divisors, workspace);
523}
524
525GLOBAL(void)
526jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
527 FAST_FLOAT * workspace)
528{
529 jsimd_quantize_float_sse2(coef_block, divisors, workspace);
530}
531
532GLOBAL(int)
533jsimd_can_idct_2x2 (void)
534{
535 /* The code is optimised for these values only */
536 if (DCTSIZE != 8)
537 return 0;
538 if (sizeof(JCOEF) != 2)
539 return 0;
540 if (BITS_IN_JSAMPLE != 8)
541 return 0;
542 if (sizeof(JDIMENSION) != 4)
543 return 0;
544 if (sizeof(ISLOW_MULT_TYPE) != 2)
545 return 0;
546
547 if (!IS_ALIGNED_SSE(jconst_idct_red_sse2))
548 return 0;
549
550 return 1;
551}
552
553GLOBAL(int)
554jsimd_can_idct_4x4 (void)
555{
556 /* The code is optimised for these values only */
557 if (DCTSIZE != 8)
558 return 0;
559 if (sizeof(JCOEF) != 2)
560 return 0;
561 if (BITS_IN_JSAMPLE != 8)
562 return 0;
563 if (sizeof(JDIMENSION) != 4)
564 return 0;
565 if (sizeof(ISLOW_MULT_TYPE) != 2)
566 return 0;
567
568 if (!IS_ALIGNED_SSE(jconst_idct_red_sse2))
569 return 0;
570
571 return 1;
572}
573
574GLOBAL(void)
575jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
576 JCOEFPTR coef_block, JSAMPARRAY output_buf,
577 JDIMENSION output_col)
578{
579 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
580}
581
582GLOBAL(void)
583jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
584 JCOEFPTR coef_block, JSAMPARRAY output_buf,
585 JDIMENSION output_col)
586{
587 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
588}
589
590GLOBAL(int)
591jsimd_can_idct_islow (void)
592{
593 /* The code is optimised for these values only */
594 if (DCTSIZE != 8)
595 return 0;
596 if (sizeof(JCOEF) != 2)
597 return 0;
598 if (BITS_IN_JSAMPLE != 8)
599 return 0;
600 if (sizeof(JDIMENSION) != 4)
601 return 0;
602 if (sizeof(ISLOW_MULT_TYPE) != 2)
603 return 0;
604
605 if (!IS_ALIGNED_SSE(jconst_idct_islow_sse2))
606 return 0;
607
608 return 1;
609}
610
611GLOBAL(int)
612jsimd_can_idct_ifast (void)
613{
614 /* The code is optimised for these values only */
615 if (DCTSIZE != 8)
616 return 0;
617 if (sizeof(JCOEF) != 2)
618 return 0;
619 if (BITS_IN_JSAMPLE != 8)
620 return 0;
621 if (sizeof(JDIMENSION) != 4)
622 return 0;
623 if (sizeof(IFAST_MULT_TYPE) != 2)
624 return 0;
625 if (IFAST_SCALE_BITS != 2)
626 return 0;
627
628 if (!IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
629 return 0;
630
631 return 1;
632}
633
634GLOBAL(int)
635jsimd_can_idct_float (void)
636{
637 if (DCTSIZE != 8)
638 return 0;
639 if (sizeof(JCOEF) != 2)
640 return 0;
641 if (BITS_IN_JSAMPLE != 8)
642 return 0;
643 if (sizeof(JDIMENSION) != 4)
644 return 0;
645 if (sizeof(FAST_FLOAT) != 4)
646 return 0;
647 if (sizeof(FLOAT_MULT_TYPE) != 4)
648 return 0;
649
650 if (!IS_ALIGNED_SSE(jconst_idct_float_sse2))
651 return 0;
652
653 return 1;
654}
655
656GLOBAL(void)
657jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
658 JCOEFPTR coef_block, JSAMPARRAY output_buf,
659 JDIMENSION output_col)
660{
661 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf, output_col);
662}
663
664GLOBAL(void)
665jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
666 JCOEFPTR coef_block, JSAMPARRAY output_buf,
667 JDIMENSION output_col)
668{
669 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, output_col);
670}
671
672GLOBAL(void)
673jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
674 JCOEFPTR coef_block, JSAMPARRAY output_buf,
675 JDIMENSION output_col)
676{
677 jsimd_idct_float_sse2(compptr->dct_table, coef_block,
678 output_buf, output_col);
679}
680