blob: 05e0ca27289cbfbe469bc358eb8a8b4a6f5b2657 [file] [log] [blame]
DRC321e0682011-05-03 08:47:43 +00001/*
2 * jsimd_arm.c
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright 2009-2011 D. R. Commander
6 *
7 * Based on the x86 SIMD extension for IJG JPEG library,
8 * Copyright (C) 1999-2006, MIYASAKA Masaru.
9 * For conditions of distribution and use, see copyright notice in jsimdext.inc
10 *
11 * This file contains the interface between the "normal" portions
12 * of the library and the SIMD implementations when running on
13 * ARM architecture.
14 *
15 * Based on the stubs from 'jsimd_none.c'
16 */
17
18#define JPEG_INTERNALS
19#include "../jinclude.h"
20#include "../jpeglib.h"
21#include "../jsimd.h"
22#include "../jdct.h"
23#include "../jsimddct.h"
24#include "jsimd.h"
25
26#include <stdio.h>
27#include <string.h>
28#include <ctype.h>
29
30static unsigned int simd_support = ~0;
31
DRC4346f912011-06-14 22:16:50 +000032#if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
DRC321e0682011-05-03 08:47:43 +000033
34#define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
35
36LOCAL(int)
37check_feature (char *buffer, char *feature)
38{
39 char *p;
40 if (*feature == 0)
41 return 0;
42 if (strncmp(buffer, "Features", 8) != 0)
43 return 0;
44 buffer += 8;
45 while (isspace(*buffer))
46 buffer++;
47
48 /* Check if 'feature' is present in the buffer as a separate word */
49 while ((p = strstr(buffer, feature))) {
50 if (p > buffer && !isspace(*(p - 1))) {
51 buffer++;
52 continue;
53 }
54 p += strlen(feature);
55 if (*p != 0 && !isspace(*p)) {
56 buffer++;
57 continue;
58 }
59 return 1;
60 }
61 return 0;
62}
63
64LOCAL(int)
65parse_proc_cpuinfo (int bufsize)
66{
67 char *buffer = (char *)malloc(bufsize);
68 FILE *fd;
69 simd_support = 0;
70
71 if (!buffer)
72 return 0;
73
74 fd = fopen("/proc/cpuinfo", "r");
75 if (fd) {
76 while (fgets(buffer, bufsize, fd)) {
77 if (!strchr(buffer, '\n') && !feof(fd)) {
78 /* "impossible" happened - insufficient size of the buffer! */
79 fclose(fd);
80 free(buffer);
81 return 0;
82 }
83 if (check_feature(buffer, "neon"))
84 simd_support |= JSIMD_ARM_NEON;
85 }
86 fclose(fd);
87 }
88 free(buffer);
89 return 1;
90}
91
92#endif
93
94/*
95 * Check what SIMD accelerations are supported.
96 *
97 * FIXME: This code is racy under a multi-threaded environment.
98 */
99LOCAL(void)
100init_simd (void)
101{
102 char *env = NULL;
DRC4346f912011-06-14 22:16:50 +0000103#if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
DRC321e0682011-05-03 08:47:43 +0000104 int bufsize = 1024; /* an initial guess for the line buffer size limit */
DRC4346f912011-06-14 22:16:50 +0000105#endif
DRC321e0682011-05-03 08:47:43 +0000106
107 if (simd_support != ~0)
108 return;
109
110 simd_support = 0;
111
DRC4346f912011-06-14 22:16:50 +0000112#if defined(__ARM_NEON__)
113 simd_support |= JSIMD_ARM_NEON;
114#elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
115 /* We still have a chance to use NEON regardless of globally used
116 * -mcpu/-mfpu options passed to gcc by performing runtime detection via
117 * /proc/cpuinfo parsing on linux/android */
DRC321e0682011-05-03 08:47:43 +0000118 while (!parse_proc_cpuinfo(bufsize)) {
119 bufsize *= 2;
120 if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
121 break;
122 }
123#endif
124
125 /* Force different settings through environment variables */
126 env = getenv("JSIMD_FORCE_ARM_NEON");
127 if ((env != NULL) && (strcmp(env, "1") == 0))
128 simd_support &= JSIMD_ARM_NEON;
129 env = getenv("JSIMD_FORCE_NO_SIMD");
130 if ((env != NULL) && (strcmp(env, "1") == 0))
131 simd_support = 0;
132}
133
134GLOBAL(int)
135jsimd_can_rgb_ycc (void)
136{
137 init_simd();
138
DRC7a9376c2011-08-12 19:27:20 +0000139 /* The code is optimised for these values only */
140 if (BITS_IN_JSAMPLE != 8)
141 return 0;
142 if (sizeof(JDIMENSION) != 4)
143 return 0;
144 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
145 return 0;
146
147 if (simd_support & JSIMD_ARM_NEON)
148 return 1;
149
DRC321e0682011-05-03 08:47:43 +0000150 return 0;
151}
152
153GLOBAL(int)
154jsimd_can_rgb_gray (void)
155{
156 init_simd();
157
158 return 0;
159}
160
161GLOBAL(int)
162jsimd_can_ycc_rgb (void)
163{
164 init_simd();
165
166 /* The code is optimised for these values only */
167 if (BITS_IN_JSAMPLE != 8)
168 return 0;
169 if (sizeof(JDIMENSION) != 4)
170 return 0;
171 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
172 return 0;
173 if (simd_support & JSIMD_ARM_NEON)
174 return 1;
175
176 return 0;
177}
178
179GLOBAL(void)
180jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
181 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
182 JDIMENSION output_row, int num_rows)
183{
DRC7a9376c2011-08-12 19:27:20 +0000184 void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
185
186 switch(cinfo->in_color_space)
187 {
188 case JCS_EXT_RGB:
189 neonfct=jsimd_extrgb_ycc_convert_neon;
190 break;
191 case JCS_EXT_RGBX:
192 neonfct=jsimd_extrgbx_ycc_convert_neon;
193 break;
194 case JCS_EXT_BGR:
195 neonfct=jsimd_extbgr_ycc_convert_neon;
196 break;
197 case JCS_EXT_BGRX:
198 neonfct=jsimd_extbgrx_ycc_convert_neon;
199 break;
200 case JCS_EXT_XBGR:
201 neonfct=jsimd_extxbgr_ycc_convert_neon;
202 break;
203 case JCS_EXT_XRGB:
204 neonfct=jsimd_extxrgb_ycc_convert_neon;
205 break;
206 default:
207 neonfct=jsimd_extrgb_ycc_convert_neon;
208 break;
209 }
210
211 if (simd_support & JSIMD_ARM_NEON)
212 neonfct(cinfo->image_width, input_buf,
213 output_buf, output_row, num_rows);
DRC321e0682011-05-03 08:47:43 +0000214}
215
216GLOBAL(void)
217jsimd_rgb_gray_convert (j_compress_ptr cinfo,
218 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
219 JDIMENSION output_row, int num_rows)
220{
221}
222
223GLOBAL(void)
224jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
225 JSAMPIMAGE input_buf, JDIMENSION input_row,
226 JSAMPARRAY output_buf, int num_rows)
227{
228 void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
229
230 switch(cinfo->out_color_space)
231 {
232 case JCS_EXT_RGB:
233 neonfct=jsimd_ycc_extrgb_convert_neon;
234 break;
235 case JCS_EXT_RGBX:
236 neonfct=jsimd_ycc_extrgbx_convert_neon;
237 break;
238 case JCS_EXT_BGR:
239 neonfct=jsimd_ycc_extbgr_convert_neon;
240 break;
241 case JCS_EXT_BGRX:
242 neonfct=jsimd_ycc_extbgrx_convert_neon;
243 break;
244 case JCS_EXT_XBGR:
245 neonfct=jsimd_ycc_extxbgr_convert_neon;
246 break;
247 case JCS_EXT_XRGB:
248 neonfct=jsimd_ycc_extxrgb_convert_neon;
249 break;
250 default:
251 neonfct=jsimd_ycc_extrgb_convert_neon;
252 break;
253 }
254
255 if (simd_support & JSIMD_ARM_NEON)
256 neonfct(cinfo->output_width, input_buf,
257 input_row, output_buf, num_rows);
258}
259
260GLOBAL(int)
261jsimd_can_h2v2_downsample (void)
262{
263 init_simd();
264
265 return 0;
266}
267
268GLOBAL(int)
269jsimd_can_h2v1_downsample (void)
270{
271 init_simd();
272
273 return 0;
274}
275
276GLOBAL(void)
277jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
278 JSAMPARRAY input_data, JSAMPARRAY output_data)
279{
280}
281
282GLOBAL(void)
283jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
284 JSAMPARRAY input_data, JSAMPARRAY output_data)
285{
286}
287
288GLOBAL(int)
289jsimd_can_h2v2_upsample (void)
290{
291 init_simd();
292
293 return 0;
294}
295
296GLOBAL(int)
297jsimd_can_h2v1_upsample (void)
298{
299 init_simd();
300
301 return 0;
302}
303
304GLOBAL(void)
305jsimd_h2v2_upsample (j_decompress_ptr cinfo,
306 jpeg_component_info * compptr,
307 JSAMPARRAY input_data,
308 JSAMPARRAY * output_data_ptr)
309{
310}
311
312GLOBAL(void)
313jsimd_h2v1_upsample (j_decompress_ptr cinfo,
314 jpeg_component_info * compptr,
315 JSAMPARRAY input_data,
316 JSAMPARRAY * output_data_ptr)
317{
318}
319
320GLOBAL(int)
321jsimd_can_h2v2_fancy_upsample (void)
322{
323 init_simd();
324
325 return 0;
326}
327
328GLOBAL(int)
329jsimd_can_h2v1_fancy_upsample (void)
330{
331 init_simd();
332
333 return 0;
334}
335
336GLOBAL(void)
337jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
338 jpeg_component_info * compptr,
339 JSAMPARRAY input_data,
340 JSAMPARRAY * output_data_ptr)
341{
342}
343
344GLOBAL(void)
345jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
346 jpeg_component_info * compptr,
347 JSAMPARRAY input_data,
348 JSAMPARRAY * output_data_ptr)
349{
350}
351
352GLOBAL(int)
353jsimd_can_h2v2_merged_upsample (void)
354{
355 init_simd();
356
357 return 0;
358}
359
360GLOBAL(int)
361jsimd_can_h2v1_merged_upsample (void)
362{
363 init_simd();
364
365 return 0;
366}
367
368GLOBAL(void)
369jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
370 JSAMPIMAGE input_buf,
371 JDIMENSION in_row_group_ctr,
372 JSAMPARRAY output_buf)
373{
374}
375
376GLOBAL(void)
377jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
378 JSAMPIMAGE input_buf,
379 JDIMENSION in_row_group_ctr,
380 JSAMPARRAY output_buf)
381{
382}
383
384GLOBAL(int)
385jsimd_can_convsamp (void)
386{
387 init_simd();
388
DRCb7400542011-08-10 23:31:13 +0000389 /* The code is optimised for these values only */
390 if (DCTSIZE != 8)
391 return 0;
392 if (BITS_IN_JSAMPLE != 8)
393 return 0;
394 if (sizeof(JDIMENSION) != 4)
395 return 0;
396 if (sizeof(DCTELEM) != 2)
397 return 0;
398
399 if (simd_support & JSIMD_ARM_NEON)
400 return 1;
401
DRC321e0682011-05-03 08:47:43 +0000402 return 0;
403}
404
405GLOBAL(int)
406jsimd_can_convsamp_float (void)
407{
408 init_simd();
409
410 return 0;
411}
412
413GLOBAL(void)
414jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
415 DCTELEM * workspace)
416{
DRCb7400542011-08-10 23:31:13 +0000417 if (simd_support & JSIMD_ARM_NEON)
418 jsimd_convsamp_neon(sample_data, start_col, workspace);
DRC321e0682011-05-03 08:47:43 +0000419}
420
421GLOBAL(void)
422jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
423 FAST_FLOAT * workspace)
424{
425}
426
427GLOBAL(int)
428jsimd_can_fdct_islow (void)
429{
430 init_simd();
431
432 return 0;
433}
434
435GLOBAL(int)
436jsimd_can_fdct_ifast (void)
437{
438 init_simd();
439
DRCb7400542011-08-10 23:31:13 +0000440 /* The code is optimised for these values only */
441 if (DCTSIZE != 8)
442 return 0;
443 if (sizeof(DCTELEM) != 2)
444 return 0;
445
446 if (simd_support & JSIMD_ARM_NEON)
447 return 1;
448
DRC321e0682011-05-03 08:47:43 +0000449 return 0;
450}
451
452GLOBAL(int)
453jsimd_can_fdct_float (void)
454{
455 init_simd();
456
457 return 0;
458}
459
460GLOBAL(void)
461jsimd_fdct_islow (DCTELEM * data)
462{
463}
464
465GLOBAL(void)
466jsimd_fdct_ifast (DCTELEM * data)
467{
DRCb7400542011-08-10 23:31:13 +0000468 if (simd_support & JSIMD_ARM_NEON)
469 jsimd_fdct_ifast_neon(data);
DRC321e0682011-05-03 08:47:43 +0000470}
471
472GLOBAL(void)
473jsimd_fdct_float (FAST_FLOAT * data)
474{
475}
476
477GLOBAL(int)
478jsimd_can_quantize (void)
479{
480 init_simd();
481
482 return 0;
483}
484
485GLOBAL(int)
486jsimd_can_quantize_float (void)
487{
488 init_simd();
489
490 return 0;
491}
492
493GLOBAL(void)
494jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
495 DCTELEM * workspace)
496{
497}
498
499GLOBAL(void)
500jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
501 FAST_FLOAT * workspace)
502{
503}
504
505GLOBAL(int)
506jsimd_can_idct_2x2 (void)
507{
508 init_simd();
509
DRC8c60d222011-06-17 21:12:58 +0000510 /* The code is optimised for these values only */
511 if (DCTSIZE != 8)
512 return 0;
513 if (sizeof(JCOEF) != 2)
514 return 0;
515 if (BITS_IN_JSAMPLE != 8)
516 return 0;
517 if (sizeof(JDIMENSION) != 4)
518 return 0;
519 if (sizeof(ISLOW_MULT_TYPE) != 2)
520 return 0;
521
522 if ((simd_support & JSIMD_ARM_NEON))
523 return 1;
524
DRC321e0682011-05-03 08:47:43 +0000525 return 0;
526}
527
528GLOBAL(int)
529jsimd_can_idct_4x4 (void)
530{
531 init_simd();
532
DRC8c60d222011-06-17 21:12:58 +0000533 /* The code is optimised for these values only */
534 if (DCTSIZE != 8)
535 return 0;
536 if (sizeof(JCOEF) != 2)
537 return 0;
538 if (BITS_IN_JSAMPLE != 8)
539 return 0;
540 if (sizeof(JDIMENSION) != 4)
541 return 0;
542 if (sizeof(ISLOW_MULT_TYPE) != 2)
543 return 0;
544
545 if ((simd_support & JSIMD_ARM_NEON))
546 return 1;
547
DRC321e0682011-05-03 08:47:43 +0000548 return 0;
549}
550
551GLOBAL(void)
552jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
553 JCOEFPTR coef_block, JSAMPARRAY output_buf,
554 JDIMENSION output_col)
555{
DRC8c60d222011-06-17 21:12:58 +0000556 if ((simd_support & JSIMD_ARM_NEON))
557 jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col);
DRC321e0682011-05-03 08:47:43 +0000558}
559
560GLOBAL(void)
561jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
562 JCOEFPTR coef_block, JSAMPARRAY output_buf,
563 JDIMENSION output_col)
564{
DRC8c60d222011-06-17 21:12:58 +0000565 if ((simd_support & JSIMD_ARM_NEON))
566 jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col);
DRC321e0682011-05-03 08:47:43 +0000567}
568
569GLOBAL(int)
570jsimd_can_idct_islow (void)
571{
572 init_simd();
573
574 return 0;
575}
576
577GLOBAL(int)
578jsimd_can_idct_ifast (void)
579{
580 init_simd();
581
582 /* The code is optimised for these values only */
583 if (DCTSIZE != 8)
584 return 0;
585 if (sizeof(JCOEF) != 2)
586 return 0;
587 if (BITS_IN_JSAMPLE != 8)
588 return 0;
589 if (sizeof(JDIMENSION) != 4)
590 return 0;
591 if (sizeof(IFAST_MULT_TYPE) != 2)
592 return 0;
593 if (IFAST_SCALE_BITS != 2)
594 return 0;
595
596 if ((simd_support & JSIMD_ARM_NEON))
597 return 1;
598
599 return 0;
600}
601
602GLOBAL(int)
603jsimd_can_idct_float (void)
604{
605 init_simd();
606
607 return 0;
608}
609
610GLOBAL(void)
611jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
612 JCOEFPTR coef_block, JSAMPARRAY output_buf,
613 JDIMENSION output_col)
614{
615}
616
617GLOBAL(void)
618jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
619 JCOEFPTR coef_block, JSAMPARRAY output_buf,
620 JDIMENSION output_col)
621{
622 if ((simd_support & JSIMD_ARM_NEON))
623 jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, output_col);
624}
625
626GLOBAL(void)
627jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
628 JCOEFPTR coef_block, JSAMPARRAY output_buf,
629 JDIMENSION output_col)
630{
631}
632