hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1 | Index: jdmarker.c |
hbono@chromium.org | 920a8a9 | 2010-11-22 09:17:38 +0000 | [diff] [blame] | 2 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 3 | --- jdmarker.c (revision 829) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 4 | +++ jdmarker.c (working copy) |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 5 | @@ -910,7 +910,7 @@ |
hbono@chromium.org | 538d9fd | 2011-08-15 06:52:21 +0000 | [diff] [blame] | 6 | } |
| 7 | |
| 8 | if (cinfo->marker->discarded_bytes != 0) { |
| 9 | - WARNMS2(cinfo, JWRN_EXTRANEOUS_DATA, cinfo->marker->discarded_bytes, c); |
| 10 | + TRACEMS2(cinfo, 1, JWRN_EXTRANEOUS_DATA, cinfo->marker->discarded_bytes, c); |
| 11 | cinfo->marker->discarded_bytes = 0; |
| 12 | } |
| 13 | |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 14 | @@ -944,7 +944,144 @@ |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 15 | return TRUE; |
| 16 | } |
hbono@chromium.org | 920a8a9 | 2010-11-22 09:17:38 +0000 | [diff] [blame] | 17 | |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 18 | +#ifdef MOTION_JPEG_SUPPORTED |
hbono@chromium.org | 920a8a9 | 2010-11-22 09:17:38 +0000 | [diff] [blame] | 19 | |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 20 | +/* The default Huffman tables used by motion JPEG frames. When a motion JPEG |
| 21 | + * frame does not have DHT tables, we should use the huffman tables suggested by |
| 22 | + * the JPEG standard. Each of these tables represents a member of the JHUFF_TBLS |
| 23 | + * struct so we can just copy it to the according JHUFF_TBLS member. |
| 24 | + */ |
| 25 | +/* DC table 0 */ |
| 26 | +LOCAL(const unsigned char) mjpg_dc0_bits[] = { |
| 27 | + 0x00, 0x01, 0x05, 0x01, 0x01, 0x01, 0x01, 0x01, |
| 28 | + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 |
| 29 | +}; |
| 30 | + |
| 31 | +LOCAL(const unsigned char) mjpg_dc0_huffval[] = { |
| 32 | + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| 33 | + 0x08, 0x09, 0x0A, 0x0B |
| 34 | +}; |
| 35 | + |
| 36 | +/* DC table 1 */ |
| 37 | +LOCAL(const unsigned char) mjpg_dc1_bits[] = { |
| 38 | + 0x00, 0x03, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, |
| 39 | + 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00 |
| 40 | +}; |
| 41 | + |
| 42 | +LOCAL(const unsigned char) mjpg_dc1_huffval[] = { |
| 43 | + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
| 44 | + 0x08, 0x09, 0x0A, 0x0B |
| 45 | +}; |
| 46 | + |
| 47 | +/* AC table 0 */ |
| 48 | +LOCAL(const unsigned char) mjpg_ac0_bits[] = { |
| 49 | + 0x00, 0x02, 0x01, 0x03, 0x03, 0x02, 0x04, 0x03, |
| 50 | + 0x05, 0x05, 0x04, 0x04, 0x00, 0x00, 0x01, 0x7D |
| 51 | +}; |
| 52 | + |
| 53 | +LOCAL(const unsigned char) mjpg_ac0_huffval[] = { |
| 54 | + 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, |
| 55 | + 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, |
| 56 | + 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xA1, 0x08, |
| 57 | + 0x23, 0x42, 0xB1, 0xC1, 0x15, 0x52, 0xD1, 0xF0, |
| 58 | + 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0A, 0x16, |
| 59 | + 0x17, 0x18, 0x19, 0x1A, 0x25, 0x26, 0x27, 0x28, |
| 60 | + 0x29, 0x2A, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, |
| 61 | + 0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, |
| 62 | + 0x4A, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, |
| 63 | + 0x5A, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, |
| 64 | + 0x6A, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, |
| 65 | + 0x7A, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, |
| 66 | + 0x8A, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, |
| 67 | + 0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, |
| 68 | + 0xA8, 0xA9, 0xAA, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, |
| 69 | + 0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3, 0xC4, 0xC5, |
| 70 | + 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xD2, 0xD3, 0xD4, |
| 71 | + 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xE1, 0xE2, |
| 72 | + 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, |
| 73 | + 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, |
| 74 | + 0xF9, 0xFA |
| 75 | +}; |
| 76 | + |
| 77 | +/* AC table 1 */ |
| 78 | +LOCAL(const unsigned char) mjpg_ac1_bits[] = { |
| 79 | + 0x00, 0x02, 0x01, 0x02, 0x04, 0x04, 0x03, 0x04, |
| 80 | + 0x07, 0x05, 0x04, 0x04, 0x00, 0x01, 0x02, 0x77 |
| 81 | +}; |
| 82 | + |
| 83 | +LOCAL(const unsigned char) mjpg_ac1_huffval[] = { |
| 84 | + 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, |
| 85 | + 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71, |
| 86 | + 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, |
| 87 | + 0xA1, 0xB1, 0xC1, 0x09, 0x23, 0x33, 0x52, 0xF0, |
| 88 | + 0x15, 0x62, 0x72, 0xD1, 0x0A, 0x16, 0x24, 0x34, |
| 89 | + 0xE1, 0x25, 0xF1, 0x17, 0x18, 0x19, 0x1A, 0x26, |
| 90 | + 0x27, 0x28, 0x29, 0x2A, 0x35, 0x36, 0x37, 0x38, |
| 91 | + 0x39, 0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, |
| 92 | + 0x49, 0x4A, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, |
| 93 | + 0x59, 0x5A, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, |
| 94 | + 0x69, 0x6A, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, |
| 95 | + 0x79, 0x7A, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| 96 | + 0x88, 0x89, 0x8A, 0x92, 0x93, 0x94, 0x95, 0x96, |
| 97 | + 0x97, 0x98, 0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5, |
| 98 | + 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xB2, 0xB3, 0xB4, |
| 99 | + 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3, |
| 100 | + 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xD2, |
| 101 | + 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, |
| 102 | + 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, |
| 103 | + 0xEA, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, |
| 104 | + 0xF9, 0xFA |
| 105 | +}; |
| 106 | + |
| 107 | +/* Loads the default Huffman tables used by motion JPEG frames. This function |
| 108 | + * just copies the huffman tables suggested in the JPEG standard when we have |
| 109 | + * not load them. |
| 110 | + */ |
| 111 | +LOCAL(void) |
| 112 | +mjpg_load_huff_tables (j_decompress_ptr cinfo) |
| 113 | +{ |
| 114 | + JHUFF_TBL *htblptr; |
| 115 | + |
| 116 | + if (! cinfo->dc_huff_tbl_ptrs[0]) { |
| 117 | + htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); |
| 118 | + MEMZERO(htblptr, SIZEOF(JHUFF_TBL)); |
| 119 | + MEMCOPY(&htblptr->bits[1], mjpg_dc0_bits, SIZEOF(mjpg_dc0_bits)); |
| 120 | + MEMCOPY(&htblptr->huffval[0], mjpg_dc0_huffval, SIZEOF(mjpg_dc0_huffval)); |
| 121 | + cinfo->dc_huff_tbl_ptrs[0] = htblptr; |
| 122 | + } |
| 123 | + |
| 124 | + if (! cinfo->dc_huff_tbl_ptrs[1]) { |
| 125 | + htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); |
| 126 | + MEMZERO(htblptr, SIZEOF(JHUFF_TBL)); |
| 127 | + MEMCOPY(&htblptr->bits[1], mjpg_dc1_bits, SIZEOF(mjpg_dc1_bits)); |
| 128 | + MEMCOPY(&htblptr->huffval[0], mjpg_dc1_huffval, SIZEOF(mjpg_dc1_huffval)); |
| 129 | + cinfo->dc_huff_tbl_ptrs[1] = htblptr; |
| 130 | + } |
| 131 | + |
| 132 | + if (! cinfo->ac_huff_tbl_ptrs[0]) { |
| 133 | + htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); |
| 134 | + MEMZERO(htblptr, SIZEOF(JHUFF_TBL)); |
| 135 | + MEMCOPY(&htblptr->bits[1], mjpg_ac0_bits, SIZEOF(mjpg_ac0_bits)); |
| 136 | + MEMCOPY(&htblptr->huffval[0], mjpg_ac0_huffval, SIZEOF(mjpg_ac0_huffval)); |
| 137 | + cinfo->ac_huff_tbl_ptrs[0] = htblptr; |
| 138 | + } |
| 139 | + |
| 140 | + if (! cinfo->ac_huff_tbl_ptrs[1]) { |
| 141 | + htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); |
| 142 | + MEMZERO(htblptr, SIZEOF(JHUFF_TBL)); |
| 143 | + MEMCOPY(&htblptr->bits[1], mjpg_ac1_bits, SIZEOF(mjpg_ac1_bits)); |
| 144 | + MEMCOPY(&htblptr->huffval[0], mjpg_ac1_huffval, SIZEOF(mjpg_ac1_huffval)); |
| 145 | + cinfo->ac_huff_tbl_ptrs[1] = htblptr; |
| 146 | + } |
| 147 | +} |
| 148 | + |
| 149 | +#else |
| 150 | + |
| 151 | +#define mjpg_load_huff_tables(cinfo) |
| 152 | + |
| 153 | +#endif /* MOTION_JPEG_SUPPORTED */ |
| 154 | + |
| 155 | + |
hbono@chromium.org | 920a8a9 | 2010-11-22 09:17:38 +0000 | [diff] [blame] | 156 | /* |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 157 | * Read markers until SOS or EOI. |
| 158 | * |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 159 | @@ -1013,6 +1150,7 @@ |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 160 | break; |
| 161 | |
| 162 | case M_SOS: |
| 163 | + mjpg_load_huff_tables(cinfo); |
| 164 | if (! get_sos(cinfo)) |
| 165 | return JPEG_SUSPENDED; |
| 166 | cinfo->unread_marker = 0; /* processed the marker */ |
hbono@chromium.org | 5c4dda9 | 2011-08-03 05:14:47 +0000 | [diff] [blame] | 167 | Index: jmorecfg.h |
| 168 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 169 | --- jmorecfg.h (revision 829) |
hbono@chromium.org | 5c4dda9 | 2011-08-03 05:14:47 +0000 | [diff] [blame] | 170 | +++ jmorecfg.h (working copy) |
| 171 | @@ -153,14 +153,18 @@ |
| 172 | /* INT16 must hold at least the values -32768..32767. */ |
| 173 | |
| 174 | #ifndef XMD_H /* X11/xmd.h correctly defines INT16 */ |
| 175 | +#ifndef _BASETSD_H_ /* basetsd.h correctly defines INT32 */ |
| 176 | typedef short INT16; |
| 177 | #endif |
| 178 | +#endif |
| 179 | |
| 180 | /* INT32 must hold at least signed 32-bit values. */ |
| 181 | |
| 182 | #ifndef XMD_H /* X11/xmd.h correctly defines INT32 */ |
| 183 | +#ifndef _BASETSD_H_ /* basetsd.h correctly defines INT32 */ |
| 184 | typedef long INT32; |
| 185 | #endif |
| 186 | +#endif |
| 187 | |
| 188 | /* Datatype used for image dimensions. The JPEG standard only supports |
| 189 | * images up to 64K*64K due to 16-bit fields in SOF markers. Therefore |
| 190 | @@ -210,11 +214,13 @@ |
| 191 | * explicit coding is needed; see uses of the NEED_FAR_POINTERS symbol. |
| 192 | */ |
| 193 | |
| 194 | +#ifndef FAR |
| 195 | #ifdef NEED_FAR_POINTERS |
| 196 | #define FAR far |
| 197 | #else |
| 198 | #define FAR |
| 199 | #endif |
| 200 | +#endif |
| 201 | |
| 202 | |
| 203 | /* |
hbono@chromium.org | 920a8a9 | 2010-11-22 09:17:38 +0000 | [diff] [blame] | 204 | Index: jpeglib.h |
| 205 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 206 | --- jpeglib.h (revision 829) |
hbono@chromium.org | 920a8a9 | 2010-11-22 09:17:38 +0000 | [diff] [blame] | 207 | +++ jpeglib.h (working copy) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 208 | @@ -15,6 +15,10 @@ |
| 209 | #ifndef JPEGLIB_H |
| 210 | #define JPEGLIB_H |
hbono@chromium.org | 920a8a9 | 2010-11-22 09:17:38 +0000 | [diff] [blame] | 211 | |
| 212 | +/* Begin chromium edits */ |
| 213 | +#include "jpeglibmangler.h" |
| 214 | +/* End chromium edits */ |
| 215 | + |
| 216 | /* |
| 217 | * First we include the configuration files that record how this |
| 218 | * installation of the JPEG library is set up. jconfig.h can be |
| 219 | Index: jpeglibmangler.h |
| 220 | =================================================================== |
| 221 | --- jpeglibmangler.h (revision 0) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 222 | +++ jpeglibmangler.h (revision 0) |
hbono@chromium.org | 313e025 | 2011-04-28 09:03:50 +0000 | [diff] [blame] | 223 | @@ -0,0 +1,113 @@ |
hbono@chromium.org | 920a8a9 | 2010-11-22 09:17:38 +0000 | [diff] [blame] | 224 | +// Copyright (c) 2009 The Chromium Authors. All rights reserved. |
| 225 | +// Use of this source code is governed by a BSD-style license that can be |
| 226 | +// found in the LICENSE file. |
| 227 | + |
| 228 | +#ifndef THIRD_PARTY_LIBJPEG_TURBO_JPEGLIBMANGLER_H_ |
| 229 | +#define THIRD_PARTY_LIBJPEG_TURBO_JPEGLIBMANGLER_H_ |
| 230 | + |
| 231 | +// Mangle all externally visible function names so we can build our own libjpeg |
| 232 | +// without system libraries trying to use it. |
| 233 | + |
| 234 | +#define jpeg_make_c_derived_tbl chromium_jpeg_make_c_derived_tbl |
| 235 | +#define jpeg_gen_optimal_table chromium_jpeg_gen_optimal_table |
| 236 | +#define jpeg_make_d_derived_tbl chromium_jpeg_make_d_derived_tbl |
| 237 | +#define jpeg_fill_bit_buffer chromium_jpeg_fill_bit_buffer |
| 238 | +#define jpeg_huff_decode chromium_jpeg_huff_decode |
| 239 | +#define jpeg_fdct_islow chromium_jpeg_fdct_islow |
| 240 | +#define jpeg_fdct_ifast chromium_jpeg_fdct_ifast |
| 241 | +#define jpeg_fdct_float chromium_jpeg_fdct_float |
| 242 | +#define jpeg_idct_islow chromium_jpeg_idct_islow |
| 243 | +#define jpeg_idct_ifast chromium_jpeg_idct_ifast |
| 244 | +#define jpeg_idct_float chromium_jpeg_idct_float |
| 245 | +#define jpeg_idct_4x4 chromium_jpeg_idct_4x4 |
| 246 | +#define jpeg_idct_2x2 chromium_jpeg_idct_2x2 |
| 247 | +#define jpeg_idct_1x1 chromium_jpeg_idct_1x1 |
| 248 | +#define jinit_compress_master chromium_jinit_compress_master |
| 249 | +#define jinit_c_master_control chromium_jinit_c_master_control |
| 250 | +#define jinit_c_main_controller chromium_jinit_c_main_controller |
| 251 | +#define jinit_c_prep_controller chromium_jinit_c_prep_controller |
| 252 | +#define jinit_c_coef_controller chromium_jinit_c_coef_controller |
| 253 | +#define jinit_color_converter chromium_jinit_color_converter |
| 254 | +#define jinit_downsampler chromium_jinit_downsampler |
| 255 | +#define jinit_forward_dct chromium_jinit_forward_dct |
| 256 | +#define jinit_huff_encoder chromium_jinit_huff_encoder |
| 257 | +#define jinit_phuff_encoder chromium_jinit_phuff_encoder |
| 258 | +#define jinit_marker_writer chromium_jinit_marker_writer |
| 259 | +#define jinit_master_decompress chromium_jinit_master_decompress |
| 260 | +#define jinit_d_main_controller chromium_jinit_d_main_controller |
| 261 | +#define jinit_d_coef_controller chromium_jinit_d_coef_controller |
| 262 | +#define jinit_d_post_controller chromium_jinit_d_post_controller |
| 263 | +#define jinit_input_controller chromium_jinit_input_controller |
| 264 | +#define jinit_marker_reader chromium_jinit_marker_reader |
| 265 | +#define jinit_huff_decoder chromium_jinit_huff_decoder |
| 266 | +#define jinit_phuff_decoder chromium_jinit_phuff_decoder |
| 267 | +#define jinit_inverse_dct chromium_jinit_inverse_dct |
| 268 | +#define jinit_upsampler chromium_jinit_upsampler |
| 269 | +#define jinit_color_deconverter chromium_jinit_color_deconverter |
| 270 | +#define jinit_1pass_quantizer chromium_jinit_1pass_quantizer |
| 271 | +#define jinit_2pass_quantizer chromium_jinit_2pass_quantizer |
| 272 | +#define jinit_merged_upsampler chromium_jinit_merged_upsampler |
| 273 | +#define jinit_memory_mgr chromium_jinit_memory_mgr |
| 274 | +#define jdiv_round_up chromium_jdiv_round_up |
| 275 | +#define jround_up chromium_jround_up |
| 276 | +#define jcopy_sample_rows chromium_jcopy_sample_rows |
| 277 | +#define jcopy_block_row chromium_jcopy_block_row |
| 278 | +#define jzero_far chromium_jzero_far |
| 279 | +#define jpeg_std_error chromium_jpeg_std_error |
| 280 | +#define jpeg_CreateCompress chromium_jpeg_CreateCompress |
| 281 | +#define jpeg_CreateDecompress chromium_jpeg_CreateDecompress |
| 282 | +#define jpeg_destroy_compress chromium_jpeg_destroy_compress |
| 283 | +#define jpeg_destroy_decompress chromium_jpeg_destroy_decompress |
| 284 | +#define jpeg_stdio_dest chromium_jpeg_stdio_dest |
| 285 | +#define jpeg_stdio_src chromium_jpeg_stdio_src |
| 286 | +#define jpeg_set_defaults chromium_jpeg_set_defaults |
| 287 | +#define jpeg_set_colorspace chromium_jpeg_set_colorspace |
| 288 | +#define jpeg_default_colorspace chromium_jpeg_default_colorspace |
| 289 | +#define jpeg_set_quality chromium_jpeg_set_quality |
| 290 | +#define jpeg_set_linear_quality chromium_jpeg_set_linear_quality |
| 291 | +#define jpeg_add_quant_table chromium_jpeg_add_quant_table |
| 292 | +#define jpeg_quality_scaling chromium_jpeg_quality_scaling |
| 293 | +#define jpeg_simple_progression chromium_jpeg_simple_progression |
| 294 | +#define jpeg_suppress_tables chromium_jpeg_suppress_tables |
| 295 | +#define jpeg_alloc_quant_table chromium_jpeg_alloc_quant_table |
| 296 | +#define jpeg_alloc_huff_table chromium_jpeg_alloc_huff_table |
| 297 | +#define jpeg_start_compress chromium_jpeg_start_compress |
| 298 | +#define jpeg_write_scanlines chromium_jpeg_write_scanlines |
| 299 | +#define jpeg_finish_compress chromium_jpeg_finish_compress |
| 300 | +#define jpeg_write_raw_data chromium_jpeg_write_raw_data |
| 301 | +#define jpeg_write_marker chromium_jpeg_write_marker |
| 302 | +#define jpeg_write_m_header chromium_jpeg_write_m_header |
| 303 | +#define jpeg_write_m_byte chromium_jpeg_write_m_byte |
| 304 | +#define jpeg_write_tables chromium_jpeg_write_tables |
| 305 | +#define jpeg_read_header chromium_jpeg_read_header |
| 306 | +#define jpeg_start_decompress chromium_jpeg_start_decompress |
| 307 | +#define jpeg_read_scanlines chromium_jpeg_read_scanlines |
| 308 | +#define jpeg_finish_decompress chromium_jpeg_finish_decompress |
| 309 | +#define jpeg_read_raw_data chromium_jpeg_read_raw_data |
| 310 | +#define jpeg_has_multiple_scans chromium_jpeg_has_multiple_scans |
| 311 | +#define jpeg_start_output chromium_jpeg_start_output |
| 312 | +#define jpeg_finish_output chromium_jpeg_finish_output |
| 313 | +#define jpeg_input_complete chromium_jpeg_input_complete |
| 314 | +#define jpeg_new_colormap chromium_jpeg_new_colormap |
| 315 | +#define jpeg_consume_input chromium_jpeg_consume_input |
| 316 | +#define jpeg_calc_output_dimensions chromium_jpeg_calc_output_dimensions |
| 317 | +#define jpeg_save_markers chromium_jpeg_save_markers |
| 318 | +#define jpeg_set_marker_processor chromium_jpeg_set_marker_processor |
| 319 | +#define jpeg_read_coefficients chromium_jpeg_read_coefficients |
| 320 | +#define jpeg_write_coefficients chromium_jpeg_write_coefficients |
| 321 | +#define jpeg_copy_critical_parameters chromium_jpeg_copy_critical_parameters |
| 322 | +#define jpeg_abort_compress chromium_jpeg_abort_compress |
| 323 | +#define jpeg_abort_decompress chromium_jpeg_abort_decompress |
| 324 | +#define jpeg_abort chromium_jpeg_abort |
| 325 | +#define jpeg_destroy chromium_jpeg_destroy |
| 326 | +#define jpeg_resync_to_restart chromium_jpeg_resync_to_restart |
| 327 | +#define jpeg_get_small chromium_jpeg_get_small |
| 328 | +#define jpeg_free_small chromium_jpeg_free_small |
| 329 | +#define jpeg_get_large chromium_jpeg_get_large |
| 330 | +#define jpeg_free_large chromium_jpeg_free_large |
| 331 | +#define jpeg_mem_available chromium_jpeg_mem_available |
| 332 | +#define jpeg_open_backing_store chromium_jpeg_open_backing_store |
| 333 | +#define jpeg_mem_init chromium_jpeg_mem_init |
| 334 | +#define jpeg_mem_term chromium_jpeg_mem_term |
| 335 | + |
| 336 | +#endif // THIRD_PARTY_LIBJPEG_TURBO_JPEGLIBMANGLER_H_ |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 337 | Index: simd/jcgrass2-64.asm |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 338 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 339 | --- simd/jcgrass2-64.asm (revision 829) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 340 | +++ simd/jcgrass2-64.asm (working copy) |
| 341 | @@ -30,7 +30,7 @@ |
| 342 | SECTION SEG_CONST |
hbono@chromium.org | 321292e | 2011-02-17 04:45:42 +0000 | [diff] [blame] | 343 | |
| 344 | alignz 16 |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 345 | - global EXTN(jconst_rgb_gray_convert_sse2) |
| 346 | + global EXTN(jconst_rgb_gray_convert_sse2) PRIVATE |
hbono@chromium.org | 321292e | 2011-02-17 04:45:42 +0000 | [diff] [blame] | 347 | |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 348 | EXTN(jconst_rgb_gray_convert_sse2): |
hbono@chromium.org | 321292e | 2011-02-17 04:45:42 +0000 | [diff] [blame] | 349 | |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 350 | Index: simd/jiss2fst.asm |
| 351 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 352 | --- simd/jiss2fst.asm (revision 829) |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 353 | +++ simd/jiss2fst.asm (working copy) |
| 354 | @@ -59,7 +59,7 @@ |
| 355 | %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) |
| 356 | |
| 357 | alignz 16 |
| 358 | - global EXTN(jconst_idct_ifast_sse2) |
| 359 | + global EXTN(jconst_idct_ifast_sse2) PRIVATE |
| 360 | |
| 361 | EXTN(jconst_idct_ifast_sse2): |
| 362 | |
| 363 | @@ -92,7 +92,7 @@ |
| 364 | %define WK_NUM 2 |
| 365 | |
| 366 | align 16 |
| 367 | - global EXTN(jsimd_idct_ifast_sse2) |
| 368 | + global EXTN(jsimd_idct_ifast_sse2) PRIVATE |
| 369 | |
| 370 | EXTN(jsimd_idct_ifast_sse2): |
| 371 | push ebp |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 372 | Index: simd/jcclrss2-64.asm |
| 373 | =================================================================== |
| 374 | --- simd/jcclrss2-64.asm (revision 829) |
| 375 | +++ simd/jcclrss2-64.asm (working copy) |
| 376 | @@ -37,7 +37,7 @@ |
| 377 | |
| 378 | align 16 |
| 379 | |
| 380 | - global EXTN(jsimd_rgb_ycc_convert_sse2) |
| 381 | + global EXTN(jsimd_rgb_ycc_convert_sse2) PRIVATE |
| 382 | |
| 383 | EXTN(jsimd_rgb_ycc_convert_sse2): |
| 384 | push rbp |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 385 | Index: simd/jiss2red-64.asm |
| 386 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 387 | --- simd/jiss2red-64.asm (revision 829) |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 388 | +++ simd/jiss2red-64.asm (working copy) |
| 389 | @@ -73,7 +73,7 @@ |
| 390 | SECTION SEG_CONST |
| 391 | |
| 392 | alignz 16 |
| 393 | - global EXTN(jconst_idct_red_sse2) |
| 394 | + global EXTN(jconst_idct_red_sse2) PRIVATE |
| 395 | |
| 396 | EXTN(jconst_idct_red_sse2): |
| 397 | |
| 398 | @@ -114,7 +114,7 @@ |
| 399 | %define WK_NUM 2 |
| 400 | |
| 401 | align 16 |
| 402 | - global EXTN(jsimd_idct_4x4_sse2) |
| 403 | + global EXTN(jsimd_idct_4x4_sse2) PRIVATE |
| 404 | |
| 405 | EXTN(jsimd_idct_4x4_sse2): |
| 406 | push rbp |
| 407 | @@ -413,7 +413,7 @@ |
| 408 | ; r13 = JDIMENSION output_col |
| 409 | |
| 410 | align 16 |
| 411 | - global EXTN(jsimd_idct_2x2_sse2) |
| 412 | + global EXTN(jsimd_idct_2x2_sse2) PRIVATE |
| 413 | |
| 414 | EXTN(jsimd_idct_2x2_sse2): |
| 415 | push rbp |
hbono@chromium.org | 321292e | 2011-02-17 04:45:42 +0000 | [diff] [blame] | 416 | Index: simd/ji3dnflt.asm |
| 417 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 418 | --- simd/ji3dnflt.asm (revision 829) |
hbono@chromium.org | 321292e | 2011-02-17 04:45:42 +0000 | [diff] [blame] | 419 | +++ simd/ji3dnflt.asm (working copy) |
| 420 | @@ -27,7 +27,7 @@ |
| 421 | SECTION SEG_CONST |
| 422 | |
| 423 | alignz 16 |
| 424 | - global EXTN(jconst_idct_float_3dnow) |
| 425 | + global EXTN(jconst_idct_float_3dnow) PRIVATE |
| 426 | |
| 427 | EXTN(jconst_idct_float_3dnow): |
| 428 | |
| 429 | @@ -63,7 +63,7 @@ |
| 430 | ; FAST_FLOAT workspace[DCTSIZE2] |
| 431 | |
| 432 | align 16 |
| 433 | - global EXTN(jsimd_idct_float_3dnow) |
| 434 | + global EXTN(jsimd_idct_float_3dnow) PRIVATE |
| 435 | |
| 436 | EXTN(jsimd_idct_float_3dnow): |
| 437 | push ebp |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 438 | Index: simd/jsimdcpu.asm |
| 439 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 440 | --- simd/jsimdcpu.asm (revision 829) |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 441 | +++ simd/jsimdcpu.asm (working copy) |
| 442 | @@ -29,7 +29,7 @@ |
| 443 | ; |
| 444 | |
| 445 | align 16 |
| 446 | - global EXTN(jpeg_simd_cpu_support) |
| 447 | + global EXTN(jpeg_simd_cpu_support) PRIVATE |
| 448 | |
| 449 | EXTN(jpeg_simd_cpu_support): |
| 450 | push ebx |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 451 | Index: simd/jdmerss2-64.asm |
| 452 | =================================================================== |
| 453 | --- simd/jdmerss2-64.asm (revision 829) |
| 454 | +++ simd/jdmerss2-64.asm (working copy) |
| 455 | @@ -35,7 +35,7 @@ |
| 456 | SECTION SEG_CONST |
| 457 | |
| 458 | alignz 16 |
| 459 | - global EXTN(jconst_merged_upsample_sse2) |
| 460 | + global EXTN(jconst_merged_upsample_sse2) PRIVATE |
| 461 | |
| 462 | EXTN(jconst_merged_upsample_sse2): |
| 463 | |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 464 | Index: simd/jdsammmx.asm |
| 465 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 466 | --- simd/jdsammmx.asm (revision 829) |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 467 | +++ simd/jdsammmx.asm (working copy) |
| 468 | @@ -22,7 +22,7 @@ |
| 469 | SECTION SEG_CONST |
| 470 | |
| 471 | alignz 16 |
| 472 | - global EXTN(jconst_fancy_upsample_mmx) |
| 473 | + global EXTN(jconst_fancy_upsample_mmx) PRIVATE |
| 474 | |
| 475 | EXTN(jconst_fancy_upsample_mmx): |
| 476 | |
| 477 | @@ -58,7 +58,7 @@ |
| 478 | %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr |
| 479 | |
| 480 | align 16 |
| 481 | - global EXTN(jsimd_h2v1_fancy_upsample_mmx) |
| 482 | + global EXTN(jsimd_h2v1_fancy_upsample_mmx) PRIVATE |
| 483 | |
| 484 | EXTN(jsimd_h2v1_fancy_upsample_mmx): |
| 485 | push ebp |
| 486 | @@ -216,7 +216,7 @@ |
| 487 | %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr |
| 488 | |
| 489 | align 16 |
| 490 | - global EXTN(jsimd_h2v2_fancy_upsample_mmx) |
| 491 | + global EXTN(jsimd_h2v2_fancy_upsample_mmx) PRIVATE |
| 492 | |
| 493 | EXTN(jsimd_h2v2_fancy_upsample_mmx): |
| 494 | push ebp |
| 495 | @@ -542,7 +542,7 @@ |
| 496 | %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr |
| 497 | |
| 498 | align 16 |
| 499 | - global EXTN(jsimd_h2v1_upsample_mmx) |
| 500 | + global EXTN(jsimd_h2v1_upsample_mmx) PRIVATE |
| 501 | |
| 502 | EXTN(jsimd_h2v1_upsample_mmx): |
| 503 | push ebp |
| 504 | @@ -643,7 +643,7 @@ |
| 505 | %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr |
| 506 | |
| 507 | align 16 |
| 508 | - global EXTN(jsimd_h2v2_upsample_mmx) |
| 509 | + global EXTN(jsimd_h2v2_upsample_mmx) PRIVATE |
| 510 | |
| 511 | EXTN(jsimd_h2v2_upsample_mmx): |
| 512 | push ebp |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 513 | Index: simd/jdmrgmmx.asm |
| 514 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 515 | --- simd/jdmrgmmx.asm (revision 829) |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 516 | +++ simd/jdmrgmmx.asm (working copy) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 517 | @@ -40,7 +40,7 @@ |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 518 | %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr |
| 519 | |
| 520 | align 16 |
| 521 | - global EXTN(jsimd_h2v1_merged_upsample_mmx) |
| 522 | + global EXTN(jsimd_h2v1_merged_upsample_mmx) PRIVATE |
| 523 | |
| 524 | EXTN(jsimd_h2v1_merged_upsample_mmx): |
| 525 | push ebp |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 526 | @@ -409,7 +409,7 @@ |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 527 | %define output_buf(b) (b)+20 ; JSAMPARRAY output_buf |
| 528 | |
| 529 | align 16 |
| 530 | - global EXTN(jsimd_h2v2_merged_upsample_mmx) |
| 531 | + global EXTN(jsimd_h2v2_merged_upsample_mmx) PRIVATE |
| 532 | |
| 533 | EXTN(jsimd_h2v2_merged_upsample_mmx): |
| 534 | push ebp |
| 535 | Index: simd/jdsamss2.asm |
| 536 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 537 | --- simd/jdsamss2.asm (revision 829) |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 538 | +++ simd/jdsamss2.asm (working copy) |
| 539 | @@ -22,7 +22,7 @@ |
| 540 | SECTION SEG_CONST |
| 541 | |
| 542 | alignz 16 |
| 543 | - global EXTN(jconst_fancy_upsample_sse2) |
| 544 | + global EXTN(jconst_fancy_upsample_sse2) PRIVATE |
| 545 | |
| 546 | EXTN(jconst_fancy_upsample_sse2): |
| 547 | |
| 548 | @@ -58,7 +58,7 @@ |
| 549 | %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr |
| 550 | |
| 551 | align 16 |
| 552 | - global EXTN(jsimd_h2v1_fancy_upsample_sse2) |
| 553 | + global EXTN(jsimd_h2v1_fancy_upsample_sse2) PRIVATE |
| 554 | |
| 555 | EXTN(jsimd_h2v1_fancy_upsample_sse2): |
| 556 | push ebp |
| 557 | @@ -214,7 +214,7 @@ |
| 558 | %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr |
| 559 | |
| 560 | align 16 |
| 561 | - global EXTN(jsimd_h2v2_fancy_upsample_sse2) |
| 562 | + global EXTN(jsimd_h2v2_fancy_upsample_sse2) PRIVATE |
| 563 | |
| 564 | EXTN(jsimd_h2v2_fancy_upsample_sse2): |
| 565 | push ebp |
| 566 | @@ -538,7 +538,7 @@ |
| 567 | %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr |
| 568 | |
| 569 | align 16 |
| 570 | - global EXTN(jsimd_h2v1_upsample_sse2) |
| 571 | + global EXTN(jsimd_h2v1_upsample_sse2) PRIVATE |
| 572 | |
| 573 | EXTN(jsimd_h2v1_upsample_sse2): |
| 574 | push ebp |
| 575 | @@ -637,7 +637,7 @@ |
| 576 | %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr |
| 577 | |
| 578 | align 16 |
| 579 | - global EXTN(jsimd_h2v2_upsample_sse2) |
| 580 | + global EXTN(jsimd_h2v2_upsample_sse2) PRIVATE |
| 581 | |
| 582 | EXTN(jsimd_h2v2_upsample_sse2): |
| 583 | push ebp |
| 584 | Index: simd/jiss2flt-64.asm |
| 585 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 586 | --- simd/jiss2flt-64.asm (revision 829) |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 587 | +++ simd/jiss2flt-64.asm (working copy) |
| 588 | @@ -38,7 +38,7 @@ |
| 589 | SECTION SEG_CONST |
| 590 | |
| 591 | alignz 16 |
| 592 | - global EXTN(jconst_idct_float_sse2) |
| 593 | + global EXTN(jconst_idct_float_sse2) PRIVATE |
| 594 | |
| 595 | EXTN(jconst_idct_float_sse2): |
| 596 | |
| 597 | @@ -74,7 +74,7 @@ |
| 598 | ; FAST_FLOAT workspace[DCTSIZE2] |
| 599 | |
| 600 | align 16 |
| 601 | - global EXTN(jsimd_idct_float_sse2) |
| 602 | + global EXTN(jsimd_idct_float_sse2) PRIVATE |
| 603 | |
| 604 | EXTN(jsimd_idct_float_sse2): |
| 605 | push rbp |
| 606 | Index: simd/jfss2int-64.asm |
| 607 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 608 | --- simd/jfss2int-64.asm (revision 829) |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 609 | +++ simd/jfss2int-64.asm (working copy) |
| 610 | @@ -67,7 +67,7 @@ |
| 611 | SECTION SEG_CONST |
| 612 | |
| 613 | alignz 16 |
| 614 | - global EXTN(jconst_fdct_islow_sse2) |
| 615 | + global EXTN(jconst_fdct_islow_sse2) PRIVATE |
| 616 | |
| 617 | EXTN(jconst_fdct_islow_sse2): |
| 618 | |
| 619 | @@ -101,7 +101,7 @@ |
| 620 | %define WK_NUM 6 |
| 621 | |
| 622 | align 16 |
| 623 | - global EXTN(jsimd_fdct_islow_sse2) |
| 624 | + global EXTN(jsimd_fdct_islow_sse2) PRIVATE |
| 625 | |
| 626 | EXTN(jsimd_fdct_islow_sse2): |
| 627 | push rbp |
| 628 | Index: simd/jcqnts2f.asm |
| 629 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 630 | --- simd/jcqnts2f.asm (revision 829) |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 631 | +++ simd/jcqnts2f.asm (working copy) |
| 632 | @@ -35,7 +35,7 @@ |
| 633 | %define workspace ebp+16 ; FAST_FLOAT * workspace |
| 634 | |
| 635 | align 16 |
| 636 | - global EXTN(jsimd_convsamp_float_sse2) |
| 637 | + global EXTN(jsimd_convsamp_float_sse2) PRIVATE |
| 638 | |
| 639 | EXTN(jsimd_convsamp_float_sse2): |
| 640 | push ebp |
| 641 | @@ -115,7 +115,7 @@ |
| 642 | %define workspace ebp+16 ; FAST_FLOAT * workspace |
| 643 | |
| 644 | align 16 |
| 645 | - global EXTN(jsimd_quantize_float_sse2) |
| 646 | + global EXTN(jsimd_quantize_float_sse2) PRIVATE |
| 647 | |
| 648 | EXTN(jsimd_quantize_float_sse2): |
| 649 | push ebp |
| 650 | Index: simd/jdmrgss2.asm |
| 651 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 652 | --- simd/jdmrgss2.asm (revision 829) |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 653 | +++ simd/jdmrgss2.asm (working copy) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 654 | @@ -40,7 +40,7 @@ |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 655 | %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr |
| 656 | |
| 657 | align 16 |
| 658 | - global EXTN(jsimd_h2v1_merged_upsample_sse2) |
| 659 | + global EXTN(jsimd_h2v1_merged_upsample_sse2) PRIVATE |
| 660 | |
| 661 | EXTN(jsimd_h2v1_merged_upsample_sse2): |
| 662 | push ebp |
hbono@chromium.org | 0ec930e | 2012-01-18 07:01:04 +0000 | [diff] [blame] | 663 | @@ -560,7 +560,7 @@ |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 664 | %define output_buf(b) (b)+20 ; JSAMPARRAY output_buf |
| 665 | |
| 666 | align 16 |
| 667 | - global EXTN(jsimd_h2v2_merged_upsample_sse2) |
| 668 | + global EXTN(jsimd_h2v2_merged_upsample_sse2) PRIVATE |
| 669 | |
| 670 | EXTN(jsimd_h2v2_merged_upsample_sse2): |
| 671 | push ebp |
| 672 | Index: simd/jfmmxint.asm |
| 673 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 674 | --- simd/jfmmxint.asm (revision 829) |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 675 | +++ simd/jfmmxint.asm (working copy) |
| 676 | @@ -66,7 +66,7 @@ |
| 677 | SECTION SEG_CONST |
| 678 | |
| 679 | alignz 16 |
| 680 | - global EXTN(jconst_fdct_islow_mmx) |
| 681 | + global EXTN(jconst_fdct_islow_mmx) PRIVATE |
| 682 | |
| 683 | EXTN(jconst_fdct_islow_mmx): |
| 684 | |
| 685 | @@ -101,7 +101,7 @@ |
| 686 | %define WK_NUM 2 |
| 687 | |
| 688 | align 16 |
| 689 | - global EXTN(jsimd_fdct_islow_mmx) |
| 690 | + global EXTN(jsimd_fdct_islow_mmx) PRIVATE |
| 691 | |
| 692 | EXTN(jsimd_fdct_islow_mmx): |
| 693 | push ebp |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 694 | Index: simd/jcgryss2-64.asm |
| 695 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 696 | --- simd/jcgryss2-64.asm (revision 829) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 697 | +++ simd/jcgryss2-64.asm (working copy) |
| 698 | @@ -37,7 +37,7 @@ |
| 699 | |
| 700 | align 16 |
| 701 | |
| 702 | - global EXTN(jsimd_rgb_gray_convert_sse2) |
| 703 | + global EXTN(jsimd_rgb_gray_convert_sse2) PRIVATE |
| 704 | |
| 705 | EXTN(jsimd_rgb_gray_convert_sse2): |
| 706 | push rbp |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 707 | Index: simd/jcqnts2i.asm |
| 708 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 709 | --- simd/jcqnts2i.asm (revision 829) |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 710 | +++ simd/jcqnts2i.asm (working copy) |
| 711 | @@ -35,7 +35,7 @@ |
| 712 | %define workspace ebp+16 ; DCTELEM * workspace |
| 713 | |
| 714 | align 16 |
| 715 | - global EXTN(jsimd_convsamp_sse2) |
| 716 | + global EXTN(jsimd_convsamp_sse2) PRIVATE |
| 717 | |
| 718 | EXTN(jsimd_convsamp_sse2): |
| 719 | push ebp |
| 720 | @@ -117,7 +117,7 @@ |
| 721 | %define workspace ebp+16 ; DCTELEM * workspace |
| 722 | |
| 723 | align 16 |
| 724 | - global EXTN(jsimd_quantize_sse2) |
| 725 | + global EXTN(jsimd_quantize_sse2) PRIVATE |
| 726 | |
| 727 | EXTN(jsimd_quantize_sse2): |
| 728 | push ebp |
| 729 | Index: simd/jiss2fst-64.asm |
| 730 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 731 | --- simd/jiss2fst-64.asm (revision 829) |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 732 | +++ simd/jiss2fst-64.asm (working copy) |
| 733 | @@ -60,7 +60,7 @@ |
| 734 | %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) |
| 735 | |
| 736 | alignz 16 |
| 737 | - global EXTN(jconst_idct_ifast_sse2) |
| 738 | + global EXTN(jconst_idct_ifast_sse2) PRIVATE |
| 739 | |
| 740 | EXTN(jconst_idct_ifast_sse2): |
| 741 | |
| 742 | @@ -93,7 +93,7 @@ |
| 743 | %define WK_NUM 2 |
| 744 | |
| 745 | align 16 |
| 746 | - global EXTN(jsimd_idct_ifast_sse2) |
| 747 | + global EXTN(jsimd_idct_ifast_sse2) PRIVATE |
| 748 | |
| 749 | EXTN(jsimd_idct_ifast_sse2): |
| 750 | push rbp |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 751 | Index: simd/jiss2flt.asm |
| 752 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 753 | --- simd/jiss2flt.asm (revision 829) |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 754 | +++ simd/jiss2flt.asm (working copy) |
| 755 | @@ -37,7 +37,7 @@ |
| 756 | SECTION SEG_CONST |
| 757 | |
| 758 | alignz 16 |
| 759 | - global EXTN(jconst_idct_float_sse2) |
| 760 | + global EXTN(jconst_idct_float_sse2) PRIVATE |
| 761 | |
| 762 | EXTN(jconst_idct_float_sse2): |
| 763 | |
| 764 | @@ -73,7 +73,7 @@ |
| 765 | ; FAST_FLOAT workspace[DCTSIZE2] |
| 766 | |
| 767 | align 16 |
| 768 | - global EXTN(jsimd_idct_float_sse2) |
| 769 | + global EXTN(jsimd_idct_float_sse2) PRIVATE |
| 770 | |
| 771 | EXTN(jsimd_idct_float_sse2): |
| 772 | push ebp |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 773 | Index: simd/jiss2int.asm |
| 774 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 775 | --- simd/jiss2int.asm (revision 829) |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 776 | +++ simd/jiss2int.asm (working copy) |
| 777 | @@ -66,7 +66,7 @@ |
| 778 | SECTION SEG_CONST |
| 779 | |
| 780 | alignz 16 |
| 781 | - global EXTN(jconst_idct_islow_sse2) |
| 782 | + global EXTN(jconst_idct_islow_sse2) PRIVATE |
| 783 | |
| 784 | EXTN(jconst_idct_islow_sse2): |
| 785 | |
| 786 | @@ -105,7 +105,7 @@ |
| 787 | %define WK_NUM 12 |
| 788 | |
| 789 | align 16 |
| 790 | - global EXTN(jsimd_idct_islow_sse2) |
| 791 | + global EXTN(jsimd_idct_islow_sse2) PRIVATE |
| 792 | |
| 793 | EXTN(jsimd_idct_islow_sse2): |
| 794 | push ebp |
| 795 | Index: simd/jfsseflt-64.asm |
| 796 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 797 | --- simd/jfsseflt-64.asm (revision 829) |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 798 | +++ simd/jfsseflt-64.asm (working copy) |
| 799 | @@ -38,7 +38,7 @@ |
| 800 | SECTION SEG_CONST |
| 801 | |
| 802 | alignz 16 |
| 803 | - global EXTN(jconst_fdct_float_sse) |
| 804 | + global EXTN(jconst_fdct_float_sse) PRIVATE |
| 805 | |
| 806 | EXTN(jconst_fdct_float_sse): |
| 807 | |
| 808 | @@ -65,7 +65,7 @@ |
| 809 | %define WK_NUM 2 |
| 810 | |
| 811 | align 16 |
| 812 | - global EXTN(jsimd_fdct_float_sse) |
| 813 | + global EXTN(jsimd_fdct_float_sse) PRIVATE |
| 814 | |
| 815 | EXTN(jsimd_fdct_float_sse): |
| 816 | push rbp |
| 817 | Index: simd/jccolss2-64.asm |
| 818 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 819 | --- simd/jccolss2-64.asm (revision 829) |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 820 | +++ simd/jccolss2-64.asm (working copy) |
| 821 | @@ -34,7 +34,7 @@ |
| 822 | SECTION SEG_CONST |
| 823 | |
| 824 | alignz 16 |
| 825 | - global EXTN(jconst_rgb_ycc_convert_sse2) |
| 826 | + global EXTN(jconst_rgb_ycc_convert_sse2) PRIVATE |
| 827 | |
| 828 | EXTN(jconst_rgb_ycc_convert_sse2): |
| 829 | |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 830 | Index: simd/jcsamss2-64.asm |
| 831 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 832 | --- simd/jcsamss2-64.asm (revision 829) |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 833 | +++ simd/jcsamss2-64.asm (working copy) |
| 834 | @@ -41,7 +41,7 @@ |
| 835 | ; r15 = JSAMPARRAY output_data |
| 836 | |
| 837 | align 16 |
| 838 | - global EXTN(jsimd_h2v1_downsample_sse2) |
| 839 | + global EXTN(jsimd_h2v1_downsample_sse2) PRIVATE |
| 840 | |
| 841 | EXTN(jsimd_h2v1_downsample_sse2): |
| 842 | push rbp |
| 843 | @@ -185,7 +185,7 @@ |
| 844 | ; r15 = JSAMPARRAY output_data |
| 845 | |
| 846 | align 16 |
| 847 | - global EXTN(jsimd_h2v2_downsample_sse2) |
| 848 | + global EXTN(jsimd_h2v2_downsample_sse2) PRIVATE |
| 849 | |
| 850 | EXTN(jsimd_h2v2_downsample_sse2): |
| 851 | push rbp |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 852 | Index: simd/jdclrss2-64.asm |
| 853 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 854 | --- simd/jdclrss2-64.asm (revision 829) |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 855 | +++ simd/jdclrss2-64.asm (working copy) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 856 | @@ -39,7 +39,7 @@ |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 857 | %define WK_NUM 2 |
| 858 | |
| 859 | align 16 |
| 860 | - global EXTN(jsimd_ycc_rgb_convert_sse2) |
| 861 | + global EXTN(jsimd_ycc_rgb_convert_sse2) PRIVATE |
| 862 | |
| 863 | EXTN(jsimd_ycc_rgb_convert_sse2): |
| 864 | push rbp |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 865 | Index: simd/jdcolmmx.asm |
| 866 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 867 | --- simd/jdcolmmx.asm (revision 829) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 868 | +++ simd/jdcolmmx.asm (working copy) |
| 869 | @@ -35,7 +35,7 @@ |
| 870 | SECTION SEG_CONST |
| 871 | |
| 872 | alignz 16 |
| 873 | - global EXTN(jconst_ycc_rgb_convert_mmx) |
| 874 | + global EXTN(jconst_ycc_rgb_convert_mmx) PRIVATE |
| 875 | |
| 876 | EXTN(jconst_ycc_rgb_convert_mmx): |
| 877 | |
hbono@chromium.org | 321292e | 2011-02-17 04:45:42 +0000 | [diff] [blame] | 878 | Index: simd/jcclrmmx.asm |
| 879 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 880 | --- simd/jcclrmmx.asm (revision 829) |
hbono@chromium.org | 321292e | 2011-02-17 04:45:42 +0000 | [diff] [blame] | 881 | +++ simd/jcclrmmx.asm (working copy) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 882 | @@ -40,7 +40,7 @@ |
hbono@chromium.org | 321292e | 2011-02-17 04:45:42 +0000 | [diff] [blame] | 883 | %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr |
| 884 | |
| 885 | align 16 |
| 886 | - global EXTN(jsimd_rgb_ycc_convert_mmx) |
| 887 | + global EXTN(jsimd_rgb_ycc_convert_mmx) PRIVATE |
| 888 | |
| 889 | EXTN(jsimd_rgb_ycc_convert_mmx): |
| 890 | push ebp |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 891 | Index: simd/jfsseflt.asm |
| 892 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 893 | --- simd/jfsseflt.asm (revision 829) |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 894 | +++ simd/jfsseflt.asm (working copy) |
| 895 | @@ -37,7 +37,7 @@ |
| 896 | SECTION SEG_CONST |
| 897 | |
| 898 | alignz 16 |
| 899 | - global EXTN(jconst_fdct_float_sse) |
| 900 | + global EXTN(jconst_fdct_float_sse) PRIVATE |
| 901 | |
| 902 | EXTN(jconst_fdct_float_sse): |
| 903 | |
| 904 | @@ -65,7 +65,7 @@ |
| 905 | %define WK_NUM 2 |
| 906 | |
| 907 | align 16 |
| 908 | - global EXTN(jsimd_fdct_float_sse) |
| 909 | + global EXTN(jsimd_fdct_float_sse) PRIVATE |
| 910 | |
| 911 | EXTN(jsimd_fdct_float_sse): |
| 912 | push ebp |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 913 | Index: simd/jdmrgss2-64.asm |
| 914 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 915 | --- simd/jdmrgss2-64.asm (revision 829) |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 916 | +++ simd/jdmrgss2-64.asm (working copy) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 917 | @@ -39,7 +39,7 @@ |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 918 | %define WK_NUM 3 |
| 919 | |
| 920 | align 16 |
| 921 | - global EXTN(jsimd_h2v1_merged_upsample_sse2) |
| 922 | + global EXTN(jsimd_h2v1_merged_upsample_sse2) PRIVATE |
| 923 | |
| 924 | EXTN(jsimd_h2v1_merged_upsample_sse2): |
| 925 | push rbp |
hbono@chromium.org | 0ec930e | 2012-01-18 07:01:04 +0000 | [diff] [blame] | 926 | @@ -543,7 +543,7 @@ |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 927 | ; r13 = JSAMPARRAY output_buf |
| 928 | |
| 929 | align 16 |
| 930 | - global EXTN(jsimd_h2v2_merged_upsample_sse2) |
| 931 | + global EXTN(jsimd_h2v2_merged_upsample_sse2) PRIVATE |
| 932 | |
| 933 | EXTN(jsimd_h2v2_merged_upsample_sse2): |
| 934 | push rbp |
hbono@chromium.org | c6beb74 | 2011-11-29 05:16:26 +0000 | [diff] [blame] | 935 | Index: simd/jdcolss2.asm |
hbono@chromium.org | 321292e | 2011-02-17 04:45:42 +0000 | [diff] [blame] | 936 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 937 | --- simd/jdcolss2.asm (revision 829) |
hbono@chromium.org | c6beb74 | 2011-11-29 05:16:26 +0000 | [diff] [blame] | 938 | +++ simd/jdcolss2.asm (working copy) |
| 939 | @@ -35,7 +35,7 @@ |
| 940 | SECTION SEG_CONST |
hbono@chromium.org | 321292e | 2011-02-17 04:45:42 +0000 | [diff] [blame] | 941 | |
hbono@chromium.org | c6beb74 | 2011-11-29 05:16:26 +0000 | [diff] [blame] | 942 | alignz 16 |
| 943 | - global EXTN(jconst_ycc_rgb_convert_sse2) |
| 944 | + global EXTN(jconst_ycc_rgb_convert_sse2) PRIVATE |
hbono@chromium.org | 321292e | 2011-02-17 04:45:42 +0000 | [diff] [blame] | 945 | |
hbono@chromium.org | c6beb74 | 2011-11-29 05:16:26 +0000 | [diff] [blame] | 946 | EXTN(jconst_ycc_rgb_convert_sse2): |
hbono@chromium.org | 321292e | 2011-02-17 04:45:42 +0000 | [diff] [blame] | 947 | |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 948 | Index: simd/jdmermmx.asm |
| 949 | =================================================================== |
| 950 | --- simd/jdmermmx.asm (revision 829) |
| 951 | +++ simd/jdmermmx.asm (working copy) |
| 952 | @@ -35,7 +35,7 @@ |
| 953 | SECTION SEG_CONST |
| 954 | |
| 955 | alignz 16 |
| 956 | - global EXTN(jconst_merged_upsample_mmx) |
| 957 | + global EXTN(jconst_merged_upsample_mmx) PRIVATE |
| 958 | |
| 959 | EXTN(jconst_merged_upsample_mmx): |
| 960 | |
| 961 | Index: simd/jcclrss2.asm |
| 962 | =================================================================== |
| 963 | --- simd/jcclrss2.asm (revision 829) |
| 964 | +++ simd/jcclrss2.asm (working copy) |
| 965 | @@ -38,7 +38,7 @@ |
| 966 | |
| 967 | align 16 |
| 968 | |
| 969 | - global EXTN(jsimd_rgb_ycc_convert_sse2) |
| 970 | + global EXTN(jsimd_rgb_ycc_convert_sse2) PRIVATE |
| 971 | |
| 972 | EXTN(jsimd_rgb_ycc_convert_sse2): |
| 973 | push ebp |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 974 | Index: simd/jiss2red.asm |
| 975 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 976 | --- simd/jiss2red.asm (revision 829) |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 977 | +++ simd/jiss2red.asm (working copy) |
| 978 | @@ -72,7 +72,7 @@ |
| 979 | SECTION SEG_CONST |
| 980 | |
| 981 | alignz 16 |
| 982 | - global EXTN(jconst_idct_red_sse2) |
| 983 | + global EXTN(jconst_idct_red_sse2) PRIVATE |
| 984 | |
| 985 | EXTN(jconst_idct_red_sse2): |
| 986 | |
| 987 | @@ -113,7 +113,7 @@ |
| 988 | %define WK_NUM 2 |
| 989 | |
| 990 | align 16 |
| 991 | - global EXTN(jsimd_idct_4x4_sse2) |
| 992 | + global EXTN(jsimd_idct_4x4_sse2) PRIVATE |
| 993 | |
| 994 | EXTN(jsimd_idct_4x4_sse2): |
| 995 | push ebp |
| 996 | @@ -424,7 +424,7 @@ |
| 997 | %define output_col(b) (b)+20 ; JDIMENSION output_col |
| 998 | |
| 999 | align 16 |
| 1000 | - global EXTN(jsimd_idct_2x2_sse2) |
| 1001 | + global EXTN(jsimd_idct_2x2_sse2) PRIVATE |
| 1002 | |
| 1003 | EXTN(jsimd_idct_2x2_sse2): |
| 1004 | push ebp |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1005 | Index: simd/jdmerss2.asm |
| 1006 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1007 | --- simd/jdmerss2.asm (revision 829) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1008 | +++ simd/jdmerss2.asm (working copy) |
| 1009 | @@ -35,7 +35,7 @@ |
| 1010 | SECTION SEG_CONST |
| 1011 | |
| 1012 | alignz 16 |
| 1013 | - global EXTN(jconst_merged_upsample_sse2) |
| 1014 | + global EXTN(jconst_merged_upsample_sse2) PRIVATE |
| 1015 | |
| 1016 | EXTN(jconst_merged_upsample_sse2): |
| 1017 | |
| 1018 | Index: simd/jfss2fst-64.asm |
| 1019 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1020 | --- simd/jfss2fst-64.asm (revision 829) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1021 | +++ simd/jfss2fst-64.asm (working copy) |
| 1022 | @@ -53,7 +53,7 @@ |
| 1023 | %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) |
| 1024 | |
| 1025 | alignz 16 |
| 1026 | - global EXTN(jconst_fdct_ifast_sse2) |
| 1027 | + global EXTN(jconst_fdct_ifast_sse2) PRIVATE |
| 1028 | |
| 1029 | EXTN(jconst_fdct_ifast_sse2): |
| 1030 | |
| 1031 | @@ -80,7 +80,7 @@ |
| 1032 | %define WK_NUM 2 |
| 1033 | |
| 1034 | align 16 |
| 1035 | - global EXTN(jsimd_fdct_ifast_sse2) |
| 1036 | + global EXTN(jsimd_fdct_ifast_sse2) PRIVATE |
| 1037 | |
| 1038 | EXTN(jsimd_fdct_ifast_sse2): |
| 1039 | push rbp |
hbono@chromium.org | c6beb74 | 2011-11-29 05:16:26 +0000 | [diff] [blame] | 1040 | Index: simd/jcqntmmx.asm |
| 1041 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1042 | --- simd/jcqntmmx.asm (revision 829) |
hbono@chromium.org | c6beb74 | 2011-11-29 05:16:26 +0000 | [diff] [blame] | 1043 | +++ simd/jcqntmmx.asm (working copy) |
| 1044 | @@ -35,7 +35,7 @@ |
| 1045 | %define workspace ebp+16 ; DCTELEM * workspace |
| 1046 | |
| 1047 | align 16 |
| 1048 | - global EXTN(jsimd_convsamp_mmx) |
| 1049 | + global EXTN(jsimd_convsamp_mmx) PRIVATE |
| 1050 | |
| 1051 | EXTN(jsimd_convsamp_mmx): |
| 1052 | push ebp |
| 1053 | @@ -140,7 +140,7 @@ |
| 1054 | %define workspace ebp+16 ; DCTELEM * workspace |
| 1055 | |
| 1056 | align 16 |
| 1057 | - global EXTN(jsimd_quantize_mmx) |
| 1058 | + global EXTN(jsimd_quantize_mmx) PRIVATE |
| 1059 | |
| 1060 | EXTN(jsimd_quantize_mmx): |
| 1061 | push ebp |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1062 | Index: simd/jimmxfst.asm |
| 1063 | =================================================================== |
| 1064 | --- simd/jimmxfst.asm (revision 829) |
| 1065 | +++ simd/jimmxfst.asm (working copy) |
| 1066 | @@ -59,7 +59,7 @@ |
| 1067 | %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) |
| 1068 | |
| 1069 | alignz 16 |
| 1070 | - global EXTN(jconst_idct_ifast_mmx) |
| 1071 | + global EXTN(jconst_idct_ifast_mmx) PRIVATE |
| 1072 | |
| 1073 | EXTN(jconst_idct_ifast_mmx): |
| 1074 | |
| 1075 | @@ -94,7 +94,7 @@ |
| 1076 | ; JCOEF workspace[DCTSIZE2] |
| 1077 | |
| 1078 | align 16 |
| 1079 | - global EXTN(jsimd_idct_ifast_mmx) |
| 1080 | + global EXTN(jsimd_idct_ifast_mmx) PRIVATE |
| 1081 | |
| 1082 | EXTN(jsimd_idct_ifast_mmx): |
| 1083 | push ebp |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1084 | Index: simd/jfss2fst.asm |
| 1085 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1086 | --- simd/jfss2fst.asm (revision 829) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1087 | +++ simd/jfss2fst.asm (working copy) |
| 1088 | @@ -52,7 +52,7 @@ |
| 1089 | %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) |
| 1090 | |
| 1091 | alignz 16 |
| 1092 | - global EXTN(jconst_fdct_ifast_sse2) |
| 1093 | + global EXTN(jconst_fdct_ifast_sse2) PRIVATE |
| 1094 | |
| 1095 | EXTN(jconst_fdct_ifast_sse2): |
| 1096 | |
| 1097 | @@ -80,7 +80,7 @@ |
| 1098 | %define WK_NUM 2 |
| 1099 | |
| 1100 | align 16 |
| 1101 | - global EXTN(jsimd_fdct_ifast_sse2) |
| 1102 | + global EXTN(jsimd_fdct_ifast_sse2) PRIVATE |
| 1103 | |
| 1104 | EXTN(jsimd_fdct_ifast_sse2): |
| 1105 | push ebp |
| 1106 | Index: simd/jcgrammx.asm |
| 1107 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1108 | --- simd/jcgrammx.asm (revision 829) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1109 | +++ simd/jcgrammx.asm (working copy) |
| 1110 | @@ -33,7 +33,7 @@ |
| 1111 | SECTION SEG_CONST |
| 1112 | |
| 1113 | alignz 16 |
| 1114 | - global EXTN(jconst_rgb_gray_convert_mmx) |
| 1115 | + global EXTN(jconst_rgb_gray_convert_mmx) PRIVATE |
| 1116 | |
| 1117 | EXTN(jconst_rgb_gray_convert_mmx): |
| 1118 | |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1119 | Index: simd/jdcolss2-64.asm |
| 1120 | =================================================================== |
| 1121 | --- simd/jdcolss2-64.asm (revision 829) |
| 1122 | +++ simd/jdcolss2-64.asm (working copy) |
| 1123 | @@ -35,7 +35,7 @@ |
| 1124 | SECTION SEG_CONST |
| 1125 | |
| 1126 | alignz 16 |
| 1127 | - global EXTN(jconst_ycc_rgb_convert_sse2) |
| 1128 | + global EXTN(jconst_ycc_rgb_convert_sse2) PRIVATE |
| 1129 | |
| 1130 | EXTN(jconst_ycc_rgb_convert_sse2): |
| 1131 | |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1132 | Index: simd/jf3dnflt.asm |
| 1133 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1134 | --- simd/jf3dnflt.asm (revision 829) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1135 | +++ simd/jf3dnflt.asm (working copy) |
| 1136 | @@ -27,7 +27,7 @@ |
| 1137 | SECTION SEG_CONST |
| 1138 | |
| 1139 | alignz 16 |
| 1140 | - global EXTN(jconst_fdct_float_3dnow) |
| 1141 | + global EXTN(jconst_fdct_float_3dnow) PRIVATE |
| 1142 | |
| 1143 | EXTN(jconst_fdct_float_3dnow): |
| 1144 | |
| 1145 | @@ -55,7 +55,7 @@ |
| 1146 | %define WK_NUM 2 |
| 1147 | |
| 1148 | align 16 |
| 1149 | - global EXTN(jsimd_fdct_float_3dnow) |
| 1150 | + global EXTN(jsimd_fdct_float_3dnow) PRIVATE |
| 1151 | |
| 1152 | EXTN(jsimd_fdct_float_3dnow): |
| 1153 | push ebp |
| 1154 | Index: simd/jdsamss2-64.asm |
| 1155 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1156 | --- simd/jdsamss2-64.asm (revision 829) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1157 | +++ simd/jdsamss2-64.asm (working copy) |
| 1158 | @@ -23,7 +23,7 @@ |
| 1159 | SECTION SEG_CONST |
| 1160 | |
| 1161 | alignz 16 |
| 1162 | - global EXTN(jconst_fancy_upsample_sse2) |
| 1163 | + global EXTN(jconst_fancy_upsample_sse2) PRIVATE |
| 1164 | |
| 1165 | EXTN(jconst_fancy_upsample_sse2): |
| 1166 | |
| 1167 | @@ -59,7 +59,7 @@ |
| 1168 | ; r13 = JSAMPARRAY * output_data_ptr |
| 1169 | |
| 1170 | align 16 |
| 1171 | - global EXTN(jsimd_h2v1_fancy_upsample_sse2) |
| 1172 | + global EXTN(jsimd_h2v1_fancy_upsample_sse2) PRIVATE |
| 1173 | |
| 1174 | EXTN(jsimd_h2v1_fancy_upsample_sse2): |
| 1175 | push rbp |
| 1176 | @@ -201,7 +201,7 @@ |
| 1177 | %define WK_NUM 4 |
| 1178 | |
| 1179 | align 16 |
| 1180 | - global EXTN(jsimd_h2v2_fancy_upsample_sse2) |
| 1181 | + global EXTN(jsimd_h2v2_fancy_upsample_sse2) PRIVATE |
| 1182 | |
| 1183 | EXTN(jsimd_h2v2_fancy_upsample_sse2): |
| 1184 | push rbp |
| 1185 | @@ -498,7 +498,7 @@ |
| 1186 | ; r13 = JSAMPARRAY * output_data_ptr |
| 1187 | |
| 1188 | align 16 |
| 1189 | - global EXTN(jsimd_h2v1_upsample_sse2) |
| 1190 | + global EXTN(jsimd_h2v1_upsample_sse2) PRIVATE |
| 1191 | |
| 1192 | EXTN(jsimd_h2v1_upsample_sse2): |
| 1193 | push rbp |
| 1194 | @@ -587,7 +587,7 @@ |
| 1195 | ; r13 = JSAMPARRAY * output_data_ptr |
| 1196 | |
| 1197 | align 16 |
| 1198 | - global EXTN(jsimd_h2v2_upsample_sse2) |
| 1199 | + global EXTN(jsimd_h2v2_upsample_sse2) PRIVATE |
| 1200 | |
| 1201 | EXTN(jsimd_h2v2_upsample_sse2): |
| 1202 | push rbp |
| 1203 | Index: simd/jcgrass2.asm |
| 1204 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1205 | --- simd/jcgrass2.asm (revision 829) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1206 | +++ simd/jcgrass2.asm (working copy) |
| 1207 | @@ -30,7 +30,7 @@ |
| 1208 | SECTION SEG_CONST |
| 1209 | |
| 1210 | alignz 16 |
| 1211 | - global EXTN(jconst_rgb_gray_convert_sse2) |
| 1212 | + global EXTN(jconst_rgb_gray_convert_sse2) PRIVATE |
| 1213 | |
| 1214 | EXTN(jconst_rgb_gray_convert_sse2): |
| 1215 | |
| 1216 | Index: simd/jcsammmx.asm |
| 1217 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1218 | --- simd/jcsammmx.asm (revision 829) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1219 | +++ simd/jcsammmx.asm (working copy) |
| 1220 | @@ -40,7 +40,7 @@ |
| 1221 | %define output_data(b) (b)+28 ; JSAMPARRAY output_data |
| 1222 | |
| 1223 | align 16 |
| 1224 | - global EXTN(jsimd_h2v1_downsample_mmx) |
| 1225 | + global EXTN(jsimd_h2v1_downsample_mmx) PRIVATE |
| 1226 | |
| 1227 | EXTN(jsimd_h2v1_downsample_mmx): |
| 1228 | push ebp |
| 1229 | @@ -182,7 +182,7 @@ |
| 1230 | %define output_data(b) (b)+28 ; JSAMPARRAY output_data |
| 1231 | |
| 1232 | align 16 |
| 1233 | - global EXTN(jsimd_h2v2_downsample_mmx) |
| 1234 | + global EXTN(jsimd_h2v2_downsample_mmx) PRIVATE |
| 1235 | |
| 1236 | EXTN(jsimd_h2v2_downsample_mmx): |
| 1237 | push ebp |
thakis@chromium.org | 7a076b5 | 2014-08-18 22:56:54 +0000 | [diff] [blame] | 1238 | Index: simd/jsimd_arm_neon.S |
| 1239 | =================================================================== |
| 1240 | --- simd/jsimd_arm_neon.S (revision 272637) |
| 1241 | +++ simd/jsimd_arm_neon.S (working copy) |
| 1242 | @@ -41,11 +41,9 @@ |
| 1243 | /* Supplementary macro for setting function attributes */ |
| 1244 | .macro asm_function fname |
| 1245 | #ifdef __APPLE__ |
| 1246 | - .func _\fname |
| 1247 | .globl _\fname |
| 1248 | _\fname: |
| 1249 | #else |
| 1250 | - .func \fname |
| 1251 | .global \fname |
| 1252 | #ifdef __ELF__ |
| 1253 | .hidden \fname |
| 1254 | @@ -670,7 +668,6 @@ |
| 1255 | .unreq ROW6R |
| 1256 | .unreq ROW7L |
| 1257 | .unreq ROW7R |
| 1258 | -.endfunc |
| 1259 | |
| 1260 | |
| 1261 | /*****************************************************************************/ |
| 1262 | @@ -895,7 +892,6 @@ |
| 1263 | .unreq TMP2 |
| 1264 | .unreq TMP3 |
| 1265 | .unreq TMP4 |
| 1266 | -.endfunc |
| 1267 | |
| 1268 | |
| 1269 | /*****************************************************************************/ |
| 1270 | @@ -1108,7 +1104,6 @@ |
| 1271 | .unreq TMP2 |
| 1272 | .unreq TMP3 |
| 1273 | .unreq TMP4 |
| 1274 | -.endfunc |
| 1275 | |
| 1276 | .purgem idct_helper |
| 1277 | |
| 1278 | @@ -1263,7 +1258,6 @@ |
| 1279 | .unreq OUTPUT_COL |
| 1280 | .unreq TMP1 |
| 1281 | .unreq TMP2 |
| 1282 | -.endfunc |
| 1283 | |
| 1284 | .purgem idct_helper |
| 1285 | |
| 1286 | @@ -1547,7 +1541,6 @@ |
| 1287 | .unreq U |
| 1288 | .unreq V |
| 1289 | .unreq N |
| 1290 | -.endfunc |
| 1291 | |
| 1292 | .purgem do_yuv_to_rgb |
| 1293 | .purgem do_yuv_to_rgb_stage1 |
| 1294 | @@ -1858,7 +1851,6 @@ |
| 1295 | .unreq U |
| 1296 | .unreq V |
| 1297 | .unreq N |
| 1298 | -.endfunc |
| 1299 | |
| 1300 | .purgem do_rgb_to_yuv |
| 1301 | .purgem do_rgb_to_yuv_stage1 |
| 1302 | @@ -1940,7 +1932,6 @@ |
| 1303 | .unreq TMP2 |
| 1304 | .unreq TMP3 |
| 1305 | .unreq TMP4 |
| 1306 | -.endfunc |
| 1307 | |
| 1308 | |
| 1309 | /*****************************************************************************/ |
| 1310 | @@ -2064,7 +2055,6 @@ |
| 1311 | |
| 1312 | .unreq DATA |
| 1313 | .unreq TMP |
| 1314 | -.endfunc |
| 1315 | |
| 1316 | |
| 1317 | /*****************************************************************************/ |
| 1318 | @@ -2166,7 +2156,6 @@ |
| 1319 | .unreq CORRECTION |
| 1320 | .unreq SHIFT |
| 1321 | .unreq LOOP_COUNT |
| 1322 | -.endfunc |
| 1323 | |
| 1324 | |
| 1325 | /*****************************************************************************/ |
| 1326 | @@ -2401,7 +2390,6 @@ |
| 1327 | .unreq WIDTH |
| 1328 | .unreq TMP |
| 1329 | |
| 1330 | -.endfunc |
| 1331 | |
| 1332 | .purgem upsample16 |
| 1333 | .purgem upsample32 |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1334 | Index: simd/jsimd_i386.c |
| 1335 | =================================================================== |
| 1336 | --- simd/jsimd_i386.c (revision 829) |
| 1337 | +++ simd/jsimd_i386.c (working copy) |
| 1338 | @@ -61,6 +61,7 @@ |
| 1339 | simd_support &= JSIMD_SSE2; |
| 1340 | } |
| 1341 | |
| 1342 | +#ifndef JPEG_DECODE_ONLY |
| 1343 | GLOBAL(int) |
| 1344 | jsimd_can_rgb_ycc (void) |
| 1345 | { |
| 1346 | @@ -82,6 +83,7 @@ |
| 1347 | |
| 1348 | return 0; |
| 1349 | } |
| 1350 | +#endif |
| 1351 | |
| 1352 | GLOBAL(int) |
| 1353 | jsimd_can_rgb_gray (void) |
| 1354 | @@ -127,6 +129,7 @@ |
| 1355 | return 0; |
| 1356 | } |
| 1357 | |
| 1358 | +#ifndef JPEG_DECODE_ONLY |
| 1359 | GLOBAL(void) |
| 1360 | jsimd_rgb_ycc_convert (j_compress_ptr cinfo, |
| 1361 | JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
| 1362 | @@ -179,6 +182,7 @@ |
| 1363 | mmxfct(cinfo->image_width, input_buf, |
| 1364 | output_buf, output_row, num_rows); |
| 1365 | } |
| 1366 | +#endif |
| 1367 | |
| 1368 | GLOBAL(void) |
| 1369 | jsimd_rgb_gray_convert (j_compress_ptr cinfo, |
| 1370 | @@ -286,6 +290,7 @@ |
| 1371 | input_row, output_buf, num_rows); |
| 1372 | } |
| 1373 | |
| 1374 | +#ifndef JPEG_DECODE_ONLY |
| 1375 | GLOBAL(int) |
| 1376 | jsimd_can_h2v2_downsample (void) |
| 1377 | { |
| 1378 | @@ -351,6 +356,7 @@ |
| 1379 | compptr->v_samp_factor, compptr->width_in_blocks, |
| 1380 | input_data, output_data); |
| 1381 | } |
| 1382 | +#endif |
| 1383 | |
| 1384 | GLOBAL(int) |
| 1385 | jsimd_can_h2v2_upsample (void) |
| 1386 | @@ -636,6 +642,7 @@ |
| 1387 | in_row_group_ctr, output_buf); |
| 1388 | } |
| 1389 | |
| 1390 | +#ifndef JPEG_DECODE_ONLY |
| 1391 | GLOBAL(int) |
| 1392 | jsimd_can_convsamp (void) |
| 1393 | { |
| 1394 | @@ -855,6 +862,7 @@ |
| 1395 | else if (simd_support & JSIMD_3DNOW) |
| 1396 | jsimd_quantize_float_3dnow(coef_block, divisors, workspace); |
| 1397 | } |
| 1398 | +#endif |
| 1399 | |
| 1400 | GLOBAL(int) |
| 1401 | jsimd_can_idct_2x2 (void) |
| 1402 | @@ -1045,4 +1053,3 @@ |
| 1403 | jsimd_idct_float_3dnow(compptr->dct_table, coef_block, |
| 1404 | output_buf, output_col); |
| 1405 | } |
| 1406 | - |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1407 | Index: simd/jcqnts2f-64.asm |
| 1408 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1409 | --- simd/jcqnts2f-64.asm (revision 829) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1410 | +++ simd/jcqnts2f-64.asm (working copy) |
| 1411 | @@ -36,7 +36,7 @@ |
| 1412 | ; r12 = FAST_FLOAT * workspace |
| 1413 | |
| 1414 | align 16 |
| 1415 | - global EXTN(jsimd_convsamp_float_sse2) |
| 1416 | + global EXTN(jsimd_convsamp_float_sse2) PRIVATE |
| 1417 | |
| 1418 | EXTN(jsimd_convsamp_float_sse2): |
| 1419 | push rbp |
| 1420 | @@ -110,7 +110,7 @@ |
| 1421 | ; r12 = FAST_FLOAT * workspace |
| 1422 | |
| 1423 | align 16 |
| 1424 | - global EXTN(jsimd_quantize_float_sse2) |
| 1425 | + global EXTN(jsimd_quantize_float_sse2) PRIVATE |
| 1426 | |
| 1427 | EXTN(jsimd_quantize_float_sse2): |
| 1428 | push rbp |
| 1429 | Index: simd/jcqnt3dn.asm |
| 1430 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1431 | --- simd/jcqnt3dn.asm (revision 829) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1432 | +++ simd/jcqnt3dn.asm (working copy) |
| 1433 | @@ -35,7 +35,7 @@ |
| 1434 | %define workspace ebp+16 ; FAST_FLOAT * workspace |
| 1435 | |
| 1436 | align 16 |
| 1437 | - global EXTN(jsimd_convsamp_float_3dnow) |
| 1438 | + global EXTN(jsimd_convsamp_float_3dnow) PRIVATE |
| 1439 | |
| 1440 | EXTN(jsimd_convsamp_float_3dnow): |
| 1441 | push ebp |
| 1442 | @@ -138,7 +138,7 @@ |
| 1443 | %define workspace ebp+16 ; FAST_FLOAT * workspace |
| 1444 | |
| 1445 | align 16 |
| 1446 | - global EXTN(jsimd_quantize_float_3dnow) |
| 1447 | + global EXTN(jsimd_quantize_float_3dnow) PRIVATE |
| 1448 | |
| 1449 | EXTN(jsimd_quantize_float_3dnow): |
| 1450 | push ebp |
| 1451 | Index: simd/jcsamss2.asm |
| 1452 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1453 | --- simd/jcsamss2.asm (revision 829) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1454 | +++ simd/jcsamss2.asm (working copy) |
| 1455 | @@ -40,7 +40,7 @@ |
| 1456 | %define output_data(b) (b)+28 ; JSAMPARRAY output_data |
| 1457 | |
| 1458 | align 16 |
| 1459 | - global EXTN(jsimd_h2v1_downsample_sse2) |
| 1460 | + global EXTN(jsimd_h2v1_downsample_sse2) PRIVATE |
| 1461 | |
| 1462 | EXTN(jsimd_h2v1_downsample_sse2): |
| 1463 | push ebp |
| 1464 | @@ -195,7 +195,7 @@ |
| 1465 | %define output_data(b) (b)+28 ; JSAMPARRAY output_data |
| 1466 | |
| 1467 | align 16 |
| 1468 | - global EXTN(jsimd_h2v2_downsample_sse2) |
| 1469 | + global EXTN(jsimd_h2v2_downsample_sse2) PRIVATE |
| 1470 | |
| 1471 | EXTN(jsimd_h2v2_downsample_sse2): |
| 1472 | push ebp |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1473 | Index: simd/jsimd_x86_64.c |
| 1474 | =================================================================== |
| 1475 | --- simd/jsimd_x86_64.c (revision 829) |
| 1476 | +++ simd/jsimd_x86_64.c (working copy) |
| 1477 | @@ -29,6 +29,7 @@ |
| 1478 | |
| 1479 | #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ |
| 1480 | |
| 1481 | +#ifndef JPEG_DECODE_ONLY |
| 1482 | GLOBAL(int) |
| 1483 | jsimd_can_rgb_ycc (void) |
| 1484 | { |
| 1485 | @@ -45,6 +46,7 @@ |
| 1486 | |
| 1487 | return 1; |
| 1488 | } |
| 1489 | +#endif |
| 1490 | |
| 1491 | GLOBAL(int) |
| 1492 | jsimd_can_rgb_gray (void) |
| 1493 | @@ -80,6 +82,7 @@ |
| 1494 | return 1; |
| 1495 | } |
| 1496 | |
| 1497 | +#ifndef JPEG_DECODE_ONLY |
| 1498 | GLOBAL(void) |
| 1499 | jsimd_rgb_ycc_convert (j_compress_ptr cinfo, |
| 1500 | JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
| 1501 | @@ -118,6 +121,7 @@ |
| 1502 | |
| 1503 | sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); |
| 1504 | } |
| 1505 | +#endif |
| 1506 | |
| 1507 | GLOBAL(void) |
| 1508 | jsimd_rgb_gray_convert (j_compress_ptr cinfo, |
| 1509 | @@ -197,6 +201,7 @@ |
| 1510 | sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); |
| 1511 | } |
| 1512 | |
| 1513 | +#ifndef JPEG_DECODE_ONLY |
| 1514 | GLOBAL(int) |
| 1515 | jsimd_can_h2v2_downsample (void) |
| 1516 | { |
| 1517 | @@ -242,6 +247,7 @@ |
| 1518 | compptr->width_in_blocks, |
| 1519 | input_data, output_data); |
| 1520 | } |
| 1521 | +#endif |
| 1522 | |
| 1523 | GLOBAL(int) |
| 1524 | jsimd_can_h2v2_upsample (void) |
| 1525 | @@ -451,6 +457,7 @@ |
| 1526 | sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); |
| 1527 | } |
| 1528 | |
| 1529 | +#ifndef JPEG_DECODE_ONLY |
| 1530 | GLOBAL(int) |
| 1531 | jsimd_can_convsamp (void) |
| 1532 | { |
| 1533 | @@ -601,6 +608,7 @@ |
| 1534 | { |
| 1535 | jsimd_quantize_float_sse2(coef_block, divisors, workspace); |
| 1536 | } |
| 1537 | +#endif |
| 1538 | |
| 1539 | GLOBAL(int) |
| 1540 | jsimd_can_idct_2x2 (void) |
| 1541 | @@ -750,4 +758,3 @@ |
| 1542 | jsimd_idct_float_sse2(compptr->dct_table, coef_block, |
| 1543 | output_buf, output_col); |
| 1544 | } |
| 1545 | - |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1546 | Index: simd/jimmxint.asm |
| 1547 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1548 | --- simd/jimmxint.asm (revision 829) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1549 | +++ simd/jimmxint.asm (working copy) |
| 1550 | @@ -66,7 +66,7 @@ |
| 1551 | SECTION SEG_CONST |
| 1552 | |
| 1553 | alignz 16 |
| 1554 | - global EXTN(jconst_idct_islow_mmx) |
| 1555 | + global EXTN(jconst_idct_islow_mmx) PRIVATE |
| 1556 | |
| 1557 | EXTN(jconst_idct_islow_mmx): |
| 1558 | |
| 1559 | @@ -107,7 +107,7 @@ |
| 1560 | ; JCOEF workspace[DCTSIZE2] |
| 1561 | |
| 1562 | align 16 |
| 1563 | - global EXTN(jsimd_idct_islow_mmx) |
| 1564 | + global EXTN(jsimd_idct_islow_mmx) PRIVATE |
| 1565 | |
| 1566 | EXTN(jsimd_idct_islow_mmx): |
| 1567 | push ebp |
| 1568 | Index: simd/jcgrymmx.asm |
| 1569 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1570 | --- simd/jcgrymmx.asm (revision 829) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1571 | +++ simd/jcgrymmx.asm (working copy) |
| 1572 | @@ -41,7 +41,7 @@ |
| 1573 | %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr |
| 1574 | |
| 1575 | align 16 |
| 1576 | - global EXTN(jsimd_rgb_gray_convert_mmx) |
| 1577 | + global EXTN(jsimd_rgb_gray_convert_mmx) PRIVATE |
| 1578 | |
| 1579 | EXTN(jsimd_rgb_gray_convert_mmx): |
| 1580 | push ebp |
| 1581 | Index: simd/jfss2int.asm |
| 1582 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1583 | --- simd/jfss2int.asm (revision 829) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1584 | +++ simd/jfss2int.asm (working copy) |
| 1585 | @@ -66,7 +66,7 @@ |
| 1586 | SECTION SEG_CONST |
| 1587 | |
| 1588 | alignz 16 |
| 1589 | - global EXTN(jconst_fdct_islow_sse2) |
| 1590 | + global EXTN(jconst_fdct_islow_sse2) PRIVATE |
| 1591 | |
| 1592 | EXTN(jconst_fdct_islow_sse2): |
| 1593 | |
| 1594 | @@ -101,7 +101,7 @@ |
| 1595 | %define WK_NUM 6 |
| 1596 | |
| 1597 | align 16 |
| 1598 | - global EXTN(jsimd_fdct_islow_sse2) |
| 1599 | + global EXTN(jsimd_fdct_islow_sse2) PRIVATE |
| 1600 | |
| 1601 | EXTN(jsimd_fdct_islow_sse2): |
| 1602 | push ebp |
| 1603 | Index: simd/jcgryss2.asm |
| 1604 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1605 | --- simd/jcgryss2.asm (revision 829) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1606 | +++ simd/jcgryss2.asm (working copy) |
| 1607 | @@ -39,7 +39,7 @@ |
| 1608 | |
| 1609 | align 16 |
| 1610 | |
| 1611 | - global EXTN(jsimd_rgb_gray_convert_sse2) |
| 1612 | + global EXTN(jsimd_rgb_gray_convert_sse2) PRIVATE |
| 1613 | |
| 1614 | EXTN(jsimd_rgb_gray_convert_sse2): |
| 1615 | push ebp |
| 1616 | Index: simd/jccolmmx.asm |
| 1617 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1618 | --- simd/jccolmmx.asm (revision 829) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1619 | +++ simd/jccolmmx.asm (working copy) |
| 1620 | @@ -37,7 +37,7 @@ |
| 1621 | SECTION SEG_CONST |
| 1622 | |
| 1623 | alignz 16 |
| 1624 | - global EXTN(jconst_rgb_ycc_convert_mmx) |
| 1625 | + global EXTN(jconst_rgb_ycc_convert_mmx) PRIVATE |
| 1626 | |
| 1627 | EXTN(jconst_rgb_ycc_convert_mmx): |
| 1628 | |
| 1629 | Index: simd/jimmxred.asm |
| 1630 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1631 | --- simd/jimmxred.asm (revision 829) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1632 | +++ simd/jimmxred.asm (working copy) |
| 1633 | @@ -72,7 +72,7 @@ |
| 1634 | SECTION SEG_CONST |
| 1635 | |
| 1636 | alignz 16 |
| 1637 | - global EXTN(jconst_idct_red_mmx) |
| 1638 | + global EXTN(jconst_idct_red_mmx) PRIVATE |
| 1639 | |
| 1640 | EXTN(jconst_idct_red_mmx): |
| 1641 | |
| 1642 | @@ -115,7 +115,7 @@ |
| 1643 | ; JCOEF workspace[DCTSIZE2] |
| 1644 | |
| 1645 | align 16 |
| 1646 | - global EXTN(jsimd_idct_4x4_mmx) |
| 1647 | + global EXTN(jsimd_idct_4x4_mmx) PRIVATE |
| 1648 | |
| 1649 | EXTN(jsimd_idct_4x4_mmx): |
| 1650 | push ebp |
| 1651 | @@ -503,7 +503,7 @@ |
| 1652 | %define output_col(b) (b)+20 ; JDIMENSION output_col |
| 1653 | |
| 1654 | align 16 |
| 1655 | - global EXTN(jsimd_idct_2x2_mmx) |
| 1656 | + global EXTN(jsimd_idct_2x2_mmx) PRIVATE |
| 1657 | |
| 1658 | EXTN(jsimd_idct_2x2_mmx): |
| 1659 | push ebp |
| 1660 | Index: simd/jsimdext.inc |
| 1661 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1662 | --- simd/jsimdext.inc (revision 829) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1663 | +++ simd/jsimdext.inc (working copy) |
hbono@chromium.org | c6beb74 | 2011-11-29 05:16:26 +0000 | [diff] [blame] | 1664 | @@ -73,6 +73,9 @@ |
| 1665 | ; * *BSD family Unix using elf format |
| 1666 | ; * Unix System V, including Solaris x86, UnixWare and SCO Unix |
| 1667 | |
| 1668 | +; PIC is the default on Linux |
| 1669 | +%define PIC |
| 1670 | + |
| 1671 | ; mark stack as non-executable |
| 1672 | section .note.GNU-stack noalloc noexec nowrite progbits |
| 1673 | |
hbono@chromium.org | 0ec930e | 2012-01-18 07:01:04 +0000 | [diff] [blame] | 1674 | @@ -375,4 +378,14 @@ |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1675 | ; |
| 1676 | %include "jsimdcfg.inc" |
| 1677 | |
| 1678 | +; Begin chromium edits |
| 1679 | +%ifdef MACHO ; ----(nasm -fmacho -DMACHO ...)-------- |
| 1680 | +%define PRIVATE :private_extern |
| 1681 | +%elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------ |
| 1682 | +%define PRIVATE :hidden |
| 1683 | +%else |
| 1684 | +%define PRIVATE |
| 1685 | +%endif |
| 1686 | +; End chromium edits |
| 1687 | + |
| 1688 | ; -------------------------------------------------------------------------- |
| 1689 | Index: simd/jdclrmmx.asm |
| 1690 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1691 | --- simd/jdclrmmx.asm (revision 829) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1692 | +++ simd/jdclrmmx.asm (working copy) |
| 1693 | @@ -40,7 +40,7 @@ |
| 1694 | %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr |
| 1695 | |
| 1696 | align 16 |
| 1697 | - global EXTN(jsimd_ycc_rgb_convert_mmx) |
| 1698 | + global EXTN(jsimd_ycc_rgb_convert_mmx) PRIVATE |
| 1699 | |
| 1700 | EXTN(jsimd_ycc_rgb_convert_mmx): |
| 1701 | push ebp |
| 1702 | Index: simd/jccolss2.asm |
| 1703 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1704 | --- simd/jccolss2.asm (revision 829) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1705 | +++ simd/jccolss2.asm (working copy) |
| 1706 | @@ -34,7 +34,7 @@ |
| 1707 | SECTION SEG_CONST |
| 1708 | |
| 1709 | alignz 16 |
| 1710 | - global EXTN(jconst_rgb_ycc_convert_sse2) |
| 1711 | + global EXTN(jconst_rgb_ycc_convert_sse2) PRIVATE |
| 1712 | |
| 1713 | EXTN(jconst_rgb_ycc_convert_sse2): |
| 1714 | |
| 1715 | Index: simd/jisseflt.asm |
| 1716 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1717 | --- simd/jisseflt.asm (revision 829) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1718 | +++ simd/jisseflt.asm (working copy) |
| 1719 | @@ -37,7 +37,7 @@ |
| 1720 | SECTION SEG_CONST |
| 1721 | |
| 1722 | alignz 16 |
| 1723 | - global EXTN(jconst_idct_float_sse) |
| 1724 | + global EXTN(jconst_idct_float_sse) PRIVATE |
| 1725 | |
| 1726 | EXTN(jconst_idct_float_sse): |
| 1727 | |
| 1728 | @@ -73,7 +73,7 @@ |
| 1729 | ; FAST_FLOAT workspace[DCTSIZE2] |
| 1730 | |
| 1731 | align 16 |
| 1732 | - global EXTN(jsimd_idct_float_sse) |
| 1733 | + global EXTN(jsimd_idct_float_sse) PRIVATE |
| 1734 | |
| 1735 | EXTN(jsimd_idct_float_sse): |
| 1736 | push ebp |
| 1737 | Index: simd/jcqnts2i-64.asm |
| 1738 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1739 | --- simd/jcqnts2i-64.asm (revision 829) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1740 | +++ simd/jcqnts2i-64.asm (working copy) |
| 1741 | @@ -36,7 +36,7 @@ |
| 1742 | ; r12 = DCTELEM * workspace |
| 1743 | |
| 1744 | align 16 |
| 1745 | - global EXTN(jsimd_convsamp_sse2) |
| 1746 | + global EXTN(jsimd_convsamp_sse2) PRIVATE |
| 1747 | |
| 1748 | EXTN(jsimd_convsamp_sse2): |
| 1749 | push rbp |
| 1750 | @@ -112,7 +112,7 @@ |
| 1751 | ; r12 = DCTELEM * workspace |
| 1752 | |
| 1753 | align 16 |
| 1754 | - global EXTN(jsimd_quantize_sse2) |
| 1755 | + global EXTN(jsimd_quantize_sse2) PRIVATE |
| 1756 | |
| 1757 | EXTN(jsimd_quantize_sse2): |
| 1758 | push rbp |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 1759 | Index: simd/jdclrss2.asm |
| 1760 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1761 | --- simd/jdclrss2.asm (revision 829) |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 1762 | +++ simd/jdclrss2.asm (working copy) |
hbono@chromium.org | 9862697 | 2011-08-03 03:13:08 +0000 | [diff] [blame] | 1763 | @@ -40,7 +40,7 @@ |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 1764 | %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr |
| 1765 | |
| 1766 | align 16 |
| 1767 | - global EXTN(jsimd_ycc_rgb_convert_sse2) |
| 1768 | + global EXTN(jsimd_ycc_rgb_convert_sse2) PRIVATE |
| 1769 | |
| 1770 | EXTN(jsimd_ycc_rgb_convert_sse2): |
| 1771 | push ebp |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 1772 | Index: simd/jcqntsse.asm |
| 1773 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1774 | --- simd/jcqntsse.asm (revision 829) |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 1775 | +++ simd/jcqntsse.asm (working copy) |
| 1776 | @@ -35,7 +35,7 @@ |
| 1777 | %define workspace ebp+16 ; FAST_FLOAT * workspace |
| 1778 | |
| 1779 | align 16 |
| 1780 | - global EXTN(jsimd_convsamp_float_sse) |
| 1781 | + global EXTN(jsimd_convsamp_float_sse) PRIVATE |
| 1782 | |
| 1783 | EXTN(jsimd_convsamp_float_sse): |
| 1784 | push ebp |
| 1785 | @@ -138,7 +138,7 @@ |
| 1786 | %define workspace ebp+16 ; FAST_FLOAT * workspace |
| 1787 | |
| 1788 | align 16 |
| 1789 | - global EXTN(jsimd_quantize_float_sse) |
| 1790 | + global EXTN(jsimd_quantize_float_sse) PRIVATE |
| 1791 | |
| 1792 | EXTN(jsimd_quantize_float_sse): |
| 1793 | push ebp |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 1794 | Index: simd/jiss2int-64.asm |
| 1795 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1796 | --- simd/jiss2int-64.asm (revision 829) |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 1797 | +++ simd/jiss2int-64.asm (working copy) |
| 1798 | @@ -67,7 +67,7 @@ |
| 1799 | SECTION SEG_CONST |
| 1800 | |
| 1801 | alignz 16 |
| 1802 | - global EXTN(jconst_idct_islow_sse2) |
| 1803 | + global EXTN(jconst_idct_islow_sse2) PRIVATE |
| 1804 | |
| 1805 | EXTN(jconst_idct_islow_sse2): |
| 1806 | |
| 1807 | @@ -106,7 +106,7 @@ |
| 1808 | %define WK_NUM 12 |
| 1809 | |
| 1810 | align 16 |
| 1811 | - global EXTN(jsimd_idct_islow_sse2) |
| 1812 | + global EXTN(jsimd_idct_islow_sse2) PRIVATE |
| 1813 | |
| 1814 | EXTN(jsimd_idct_islow_sse2): |
| 1815 | push rbp |
| 1816 | Index: simd/jfmmxfst.asm |
| 1817 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1818 | --- simd/jfmmxfst.asm (revision 829) |
hbono@chromium.org | 6863548 | 2011-02-07 06:02:41 +0000 | [diff] [blame] | 1819 | +++ simd/jfmmxfst.asm (working copy) |
| 1820 | @@ -52,7 +52,7 @@ |
| 1821 | %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) |
| 1822 | |
| 1823 | alignz 16 |
| 1824 | - global EXTN(jconst_fdct_ifast_mmx) |
| 1825 | + global EXTN(jconst_fdct_ifast_mmx) PRIVATE |
| 1826 | |
| 1827 | EXTN(jconst_fdct_ifast_mmx): |
| 1828 | |
| 1829 | @@ -80,7 +80,7 @@ |
| 1830 | %define WK_NUM 2 |
| 1831 | |
| 1832 | align 16 |
| 1833 | - global EXTN(jsimd_fdct_ifast_mmx) |
| 1834 | + global EXTN(jsimd_fdct_ifast_mmx) PRIVATE |
| 1835 | |
| 1836 | EXTN(jsimd_fdct_ifast_mmx): |
| 1837 | push ebp |
hbono@chromium.org | 538d9fd | 2011-08-15 06:52:21 +0000 | [diff] [blame] | 1838 | Index: jdarith.c |
| 1839 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1840 | --- jdarith.c (revision 829) |
hbono@chromium.org | 538d9fd | 2011-08-15 06:52:21 +0000 | [diff] [blame] | 1841 | +++ jdarith.c (working copy) |
| 1842 | @@ -150,8 +150,8 @@ |
| 1843 | */ |
| 1844 | sv = *st; |
| 1845 | qe = jpeg_aritab[sv & 0x7F]; /* => Qe_Value */ |
| 1846 | - nl = qe & 0xFF; qe >>= 8; /* Next_Index_LPS + Switch_MPS */ |
| 1847 | - nm = qe & 0xFF; qe >>= 8; /* Next_Index_MPS */ |
| 1848 | + nl = (unsigned char) qe & 0xFF; qe >>= 8; /* Next_Index_LPS + Switch_MPS */ |
| 1849 | + nm = (unsigned char) qe & 0xFF; qe >>= 8; /* Next_Index_MPS */ |
| 1850 | |
| 1851 | /* Decode & estimation procedures per sections D.2.4 & D.2.5 */ |
| 1852 | temp = e->a - qe; |
| 1853 | Index: jdhuff.c |
| 1854 | =================================================================== |
hbono@chromium.org | df5ffdd | 2012-05-11 07:46:03 +0000 | [diff] [blame] | 1855 | --- jdhuff.c (revision 829) |
hbono@chromium.org | 538d9fd | 2011-08-15 06:52:21 +0000 | [diff] [blame] | 1856 | +++ jdhuff.c (working copy) |
| 1857 | @@ -742,7 +742,7 @@ |
| 1858 | * this module, since we'll just re-assign them on the next call.) |
| 1859 | */ |
| 1860 | |
| 1861 | -#define BUFSIZE (DCTSIZE2 * 2) |
| 1862 | +#define BUFSIZE (DCTSIZE2 * 2u) |
| 1863 | |
| 1864 | METHODDEF(boolean) |
| 1865 | decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) |
noel@chromium.org | 841fff8 | 2014-05-23 23:38:59 +0000 | [diff] [blame] | 1866 | Index: jchuff.c |
| 1867 | =================================================================== |
| 1868 | --- jchuff.c (revision 1219) |
| 1869 | +++ jchuff.c (revision 1220) |
| 1870 | @@ -22,8 +22,36 @@ |
| 1871 | #include "jchuff.h" /* Declarations shared with jcphuff.c */ |
| 1872 | #include <limits.h> |
| 1873 | |
| 1874 | +/* |
| 1875 | + * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be |
| 1876 | + * used for bit counting rather than the lookup table. This will reduce the |
| 1877 | + * memory footprint by 64k, which is important for some mobile applications |
| 1878 | + * that create many isolated instances of libjpeg-turbo (web browsers, for |
| 1879 | + * instance.) This may improve performance on some mobile platforms as well. |
| 1880 | + * This feature is enabled by default only on ARM processors, because some x86 |
| 1881 | + * chips have a slow implementation of bsr, and the use of clz/bsr cannot be |
| 1882 | + * shown to have a significant performance impact even on the x86 chips that |
| 1883 | + * have a fast implementation of it. When building for ARMv6, you can |
| 1884 | + * explicitly disable the use of clz/bsr by adding -mthumb to the compiler |
| 1885 | + * flags (this defines __thumb__). |
| 1886 | + */ |
| 1887 | + |
| 1888 | +/* NOTE: Both GCC and Clang define __GNUC__ */ |
| 1889 | +#if defined __GNUC__ && defined __arm__ |
| 1890 | +#if !defined __thumb__ || defined __thumb2__ |
| 1891 | +#define USE_CLZ_INTRINSIC |
| 1892 | +#endif |
| 1893 | +#endif |
| 1894 | + |
| 1895 | +#ifdef USE_CLZ_INTRINSIC |
| 1896 | +#define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x)) |
| 1897 | +#define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0) |
| 1898 | +#else |
| 1899 | static unsigned char jpeg_nbits_table[65536]; |
| 1900 | static int jpeg_nbits_table_init = 0; |
| 1901 | +#define JPEG_NBITS(x) (jpeg_nbits_table[x]) |
| 1902 | +#define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x) |
| 1903 | +#endif |
| 1904 | |
| 1905 | #ifndef min |
| 1906 | #define min(a,b) ((a)<(b)?(a):(b)) |
| 1907 | @@ -272,6 +300,7 @@ |
| 1908 | dtbl->ehufsi[i] = huffsize[p]; |
| 1909 | } |
| 1910 | |
| 1911 | +#ifndef USE_CLZ_INTRINSIC |
| 1912 | if(!jpeg_nbits_table_init) { |
| 1913 | for(i = 0; i < 65536; i++) { |
| 1914 | int nbits = 0, temp = i; |
| 1915 | @@ -280,6 +309,7 @@ |
| 1916 | } |
| 1917 | jpeg_nbits_table_init = 1; |
| 1918 | } |
| 1919 | +#endif |
| 1920 | } |
| 1921 | |
| 1922 | |
| 1923 | @@ -482,7 +512,7 @@ |
| 1924 | temp2 += temp3; |
| 1925 | |
| 1926 | /* Find the number of bits needed for the magnitude of the coefficient */ |
| 1927 | - nbits = jpeg_nbits_table[temp]; |
| 1928 | + nbits = JPEG_NBITS(temp); |
| 1929 | |
| 1930 | /* Emit the Huffman-coded symbol for the number of bits */ |
| 1931 | code = dctbl->ehufco[nbits]; |
| 1932 | @@ -516,7 +546,7 @@ |
| 1933 | temp ^= temp3; \ |
| 1934 | temp -= temp3; \ |
| 1935 | temp2 += temp3; \ |
| 1936 | - nbits = jpeg_nbits_table[temp]; \ |
| 1937 | + nbits = JPEG_NBITS_NONZERO(temp); \ |
| 1938 | /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \ |
| 1939 | while (r > 15) { \ |
| 1940 | EMIT_BITS(code_0xf0, size_0xf0) \ |
rmcilroy@chromium.org | 2ed5319 | 2014-08-29 12:32:19 +0000 | [diff] [blame] | 1941 | Index: simd/jsimd_arm64.c |
| 1942 | =================================================================== |
| 1943 | --- /dev/null |
| 1944 | +++ simd/jsimd_arm64.c |
| 1945 | @@ -0,0 +1,544 @@ |
| 1946 | +/* |
| 1947 | + * jsimd_arm64.c |
| 1948 | + * |
| 1949 | + * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 1950 | + * Copyright 2009-2011, 2013-2014 D. R. Commander |
| 1951 | + * |
| 1952 | + * Based on the x86 SIMD extension for IJG JPEG library, |
| 1953 | + * Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 1954 | + * For conditions of distribution and use, see copyright notice in jsimdext.inc |
| 1955 | + * |
| 1956 | + * This file contains the interface between the "normal" portions |
| 1957 | + * of the library and the SIMD implementations when running on a |
| 1958 | + * 64-bit ARM architecture. |
| 1959 | + */ |
| 1960 | + |
| 1961 | +#define JPEG_INTERNALS |
| 1962 | +#include "../jinclude.h" |
| 1963 | +#include "../jpeglib.h" |
| 1964 | +#include "../jsimd.h" |
| 1965 | +#include "../jdct.h" |
| 1966 | +#include "../jsimddct.h" |
| 1967 | +#include "jsimd.h" |
| 1968 | + |
| 1969 | +#include <stdio.h> |
| 1970 | +#include <string.h> |
| 1971 | +#include <ctype.h> |
| 1972 | + |
| 1973 | +static unsigned int simd_support = ~0; |
| 1974 | + |
| 1975 | +/* |
| 1976 | + * Check what SIMD accelerations are supported. |
| 1977 | + * |
| 1978 | + * FIXME: This code is racy under a multi-threaded environment. |
| 1979 | + */ |
| 1980 | + |
| 1981 | +/* |
| 1982 | + * ARMv8 architectures support NEON extensions by default. |
| 1983 | + * It is no longer optional as it was with ARMv7. |
| 1984 | + */ |
| 1985 | + |
| 1986 | + |
| 1987 | +LOCAL(void) |
| 1988 | +init_simd (void) |
| 1989 | +{ |
| 1990 | + char *env = NULL; |
| 1991 | + |
| 1992 | + if (simd_support != ~0U) |
| 1993 | + return; |
| 1994 | + |
| 1995 | + simd_support = 0; |
| 1996 | + |
| 1997 | + simd_support |= JSIMD_ARM_NEON; |
| 1998 | + |
| 1999 | + /* Force different settings through environment variables */ |
| 2000 | + env = getenv("JSIMD_FORCENEON"); |
| 2001 | + if ((env != NULL) && (strcmp(env, "1") == 0)) |
| 2002 | + simd_support &= JSIMD_ARM_NEON; |
| 2003 | + env = getenv("JSIMD_FORCENONE"); |
| 2004 | + if ((env != NULL) && (strcmp(env, "1") == 0)) |
| 2005 | + simd_support = 0; |
| 2006 | +} |
| 2007 | + |
| 2008 | +GLOBAL(int) |
| 2009 | +jsimd_can_rgb_ycc (void) |
| 2010 | +{ |
| 2011 | + init_simd(); |
| 2012 | + |
| 2013 | + return 0; |
| 2014 | +} |
| 2015 | + |
| 2016 | +GLOBAL(int) |
| 2017 | +jsimd_can_rgb_gray (void) |
| 2018 | +{ |
| 2019 | + init_simd(); |
| 2020 | + |
| 2021 | + return 0; |
| 2022 | +} |
| 2023 | + |
| 2024 | +GLOBAL(int) |
| 2025 | +jsimd_can_ycc_rgb (void) |
| 2026 | +{ |
| 2027 | + init_simd(); |
| 2028 | + |
| 2029 | + /* The code is optimised for these values only */ |
| 2030 | + if (BITS_IN_JSAMPLE != 8) |
| 2031 | + return 0; |
| 2032 | + if (sizeof(JDIMENSION) != 4) |
| 2033 | + return 0; |
| 2034 | + if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) |
| 2035 | + return 0; |
| 2036 | + |
| 2037 | + if (simd_support & JSIMD_ARM_NEON) |
| 2038 | + return 1; |
| 2039 | + |
| 2040 | + return 0; |
| 2041 | +} |
| 2042 | + |
| 2043 | +GLOBAL(int) |
| 2044 | +jsimd_can_ycc_rgb565 (void) |
| 2045 | +{ |
| 2046 | + init_simd(); |
| 2047 | + |
| 2048 | + /* The code is optimised for these values only */ |
| 2049 | + if (BITS_IN_JSAMPLE != 8) |
| 2050 | + return 0; |
| 2051 | + if (sizeof(JDIMENSION) != 4) |
| 2052 | + return 0; |
| 2053 | + |
| 2054 | + if (simd_support & JSIMD_ARM_NEON) |
| 2055 | + return 1; |
| 2056 | + |
| 2057 | + return 0; |
| 2058 | +} |
| 2059 | + |
| 2060 | +GLOBAL(void) |
| 2061 | +jsimd_rgb_ycc_convert (j_compress_ptr cinfo, |
| 2062 | + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
| 2063 | + JDIMENSION output_row, int num_rows) |
| 2064 | +{ |
| 2065 | +} |
| 2066 | + |
| 2067 | +GLOBAL(void) |
| 2068 | +jsimd_rgb_gray_convert (j_compress_ptr cinfo, |
| 2069 | + JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
| 2070 | + JDIMENSION output_row, int num_rows) |
| 2071 | +{ |
| 2072 | +} |
| 2073 | + |
| 2074 | +GLOBAL(void) |
| 2075 | +jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, |
| 2076 | + JSAMPIMAGE input_buf, JDIMENSION input_row, |
| 2077 | + JSAMPARRAY output_buf, int num_rows) |
| 2078 | +{ |
| 2079 | + void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); |
| 2080 | + |
| 2081 | + switch(cinfo->out_color_space) { |
| 2082 | + case JCS_EXT_RGB: |
| 2083 | + neonfct=jsimd_ycc_extrgb_convert_neon; |
| 2084 | + break; |
| 2085 | + case JCS_EXT_RGBX: |
| 2086 | + case JCS_EXT_RGBA: |
| 2087 | + neonfct=jsimd_ycc_extrgbx_convert_neon; |
| 2088 | + break; |
| 2089 | + case JCS_EXT_BGR: |
| 2090 | + neonfct=jsimd_ycc_extbgr_convert_neon; |
| 2091 | + break; |
| 2092 | + case JCS_EXT_BGRX: |
| 2093 | + case JCS_EXT_BGRA: |
| 2094 | + neonfct=jsimd_ycc_extbgrx_convert_neon; |
| 2095 | + break; |
| 2096 | + case JCS_EXT_XBGR: |
| 2097 | + case JCS_EXT_ABGR: |
| 2098 | + neonfct=jsimd_ycc_extxbgr_convert_neon; |
| 2099 | + break; |
| 2100 | + case JCS_EXT_XRGB: |
| 2101 | + case JCS_EXT_ARGB: |
| 2102 | + neonfct=jsimd_ycc_extxrgb_convert_neon; |
| 2103 | + break; |
| 2104 | + default: |
| 2105 | + neonfct=jsimd_ycc_extrgb_convert_neon; |
| 2106 | + break; |
| 2107 | + } |
| 2108 | + |
| 2109 | + if (simd_support & JSIMD_ARM_NEON) |
| 2110 | + neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); |
| 2111 | +} |
| 2112 | + |
| 2113 | +GLOBAL(void) |
| 2114 | +jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, |
| 2115 | + JSAMPIMAGE input_buf, JDIMENSION input_row, |
| 2116 | + JSAMPARRAY output_buf, int num_rows) |
| 2117 | +{ |
| 2118 | + if (simd_support & JSIMD_ARM_NEON) |
| 2119 | + jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row, |
| 2120 | + output_buf, num_rows); |
| 2121 | +} |
| 2122 | + |
| 2123 | +GLOBAL(int) |
| 2124 | +jsimd_can_h2v2_downsample (void) |
| 2125 | +{ |
| 2126 | + init_simd(); |
| 2127 | + |
| 2128 | + return 0; |
| 2129 | +} |
| 2130 | + |
| 2131 | +GLOBAL(int) |
| 2132 | +jsimd_can_h2v1_downsample (void) |
| 2133 | +{ |
| 2134 | + init_simd(); |
| 2135 | + |
| 2136 | + return 0; |
| 2137 | +} |
| 2138 | + |
| 2139 | +GLOBAL(void) |
| 2140 | +jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, |
| 2141 | + JSAMPARRAY input_data, JSAMPARRAY output_data) |
| 2142 | +{ |
| 2143 | +} |
| 2144 | + |
| 2145 | +GLOBAL(void) |
| 2146 | +jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, |
| 2147 | + JSAMPARRAY input_data, JSAMPARRAY output_data) |
| 2148 | +{ |
| 2149 | +} |
| 2150 | + |
| 2151 | +GLOBAL(int) |
| 2152 | +jsimd_can_h2v2_upsample (void) |
| 2153 | +{ |
| 2154 | + init_simd(); |
| 2155 | + |
| 2156 | + return 0; |
| 2157 | +} |
| 2158 | + |
| 2159 | +GLOBAL(int) |
| 2160 | +jsimd_can_h2v1_upsample (void) |
| 2161 | +{ |
| 2162 | + init_simd(); |
| 2163 | + |
| 2164 | + return 0; |
| 2165 | +} |
| 2166 | + |
| 2167 | +GLOBAL(void) |
| 2168 | +jsimd_h2v2_upsample (j_decompress_ptr cinfo, |
| 2169 | + jpeg_component_info * compptr, |
| 2170 | + JSAMPARRAY input_data, |
| 2171 | + JSAMPARRAY * output_data_ptr) |
| 2172 | +{ |
| 2173 | +} |
| 2174 | + |
| 2175 | +GLOBAL(void) |
| 2176 | +jsimd_h2v1_upsample (j_decompress_ptr cinfo, |
| 2177 | + jpeg_component_info * compptr, |
| 2178 | + JSAMPARRAY input_data, |
| 2179 | + JSAMPARRAY * output_data_ptr) |
| 2180 | +{ |
| 2181 | +} |
| 2182 | + |
| 2183 | +GLOBAL(int) |
| 2184 | +jsimd_can_h2v2_fancy_upsample (void) |
| 2185 | +{ |
| 2186 | + init_simd(); |
| 2187 | + |
| 2188 | + return 0; |
| 2189 | +} |
| 2190 | + |
| 2191 | +GLOBAL(int) |
| 2192 | +jsimd_can_h2v1_fancy_upsample (void) |
| 2193 | +{ |
| 2194 | + init_simd(); |
| 2195 | + |
| 2196 | + return 0; |
| 2197 | +} |
| 2198 | + |
| 2199 | +GLOBAL(void) |
| 2200 | +jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, |
| 2201 | + jpeg_component_info * compptr, |
| 2202 | + JSAMPARRAY input_data, |
| 2203 | + JSAMPARRAY * output_data_ptr) |
| 2204 | +{ |
| 2205 | +} |
| 2206 | + |
| 2207 | +GLOBAL(void) |
| 2208 | +jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, |
| 2209 | + jpeg_component_info * compptr, |
| 2210 | + JSAMPARRAY input_data, |
| 2211 | + JSAMPARRAY * output_data_ptr) |
| 2212 | +{ |
| 2213 | +} |
| 2214 | + |
| 2215 | +GLOBAL(int) |
| 2216 | +jsimd_can_h2v2_merged_upsample (void) |
| 2217 | +{ |
| 2218 | + init_simd(); |
| 2219 | + |
| 2220 | + return 0; |
| 2221 | +} |
| 2222 | + |
| 2223 | +GLOBAL(int) |
| 2224 | +jsimd_can_h2v1_merged_upsample (void) |
| 2225 | +{ |
| 2226 | + init_simd(); |
| 2227 | + |
| 2228 | + return 0; |
| 2229 | +} |
| 2230 | + |
| 2231 | +GLOBAL(void) |
| 2232 | +jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, |
| 2233 | + JSAMPIMAGE input_buf, |
| 2234 | + JDIMENSION in_row_group_ctr, |
| 2235 | + JSAMPARRAY output_buf) |
| 2236 | +{ |
| 2237 | +} |
| 2238 | + |
| 2239 | +GLOBAL(void) |
| 2240 | +jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, |
| 2241 | + JSAMPIMAGE input_buf, |
| 2242 | + JDIMENSION in_row_group_ctr, |
| 2243 | + JSAMPARRAY output_buf) |
| 2244 | +{ |
| 2245 | +} |
| 2246 | + |
| 2247 | +GLOBAL(int) |
| 2248 | +jsimd_can_convsamp (void) |
| 2249 | +{ |
| 2250 | + init_simd(); |
| 2251 | + |
| 2252 | + return 0; |
| 2253 | +} |
| 2254 | + |
| 2255 | +GLOBAL(int) |
| 2256 | +jsimd_can_convsamp_float (void) |
| 2257 | +{ |
| 2258 | + init_simd(); |
| 2259 | + |
| 2260 | + return 0; |
| 2261 | +} |
| 2262 | + |
| 2263 | +GLOBAL(void) |
| 2264 | +jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, |
| 2265 | + DCTELEM * workspace) |
| 2266 | +{ |
| 2267 | +} |
| 2268 | + |
| 2269 | +GLOBAL(void) |
| 2270 | +jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, |
| 2271 | + FAST_FLOAT * workspace) |
| 2272 | +{ |
| 2273 | +} |
| 2274 | + |
| 2275 | +GLOBAL(int) |
| 2276 | +jsimd_can_fdct_islow (void) |
| 2277 | +{ |
| 2278 | + init_simd(); |
| 2279 | + |
| 2280 | + return 0; |
| 2281 | +} |
| 2282 | + |
| 2283 | +GLOBAL(int) |
| 2284 | +jsimd_can_fdct_ifast (void) |
| 2285 | +{ |
| 2286 | + init_simd(); |
| 2287 | + |
| 2288 | + return 0; |
| 2289 | +} |
| 2290 | + |
| 2291 | +GLOBAL(int) |
| 2292 | +jsimd_can_fdct_float (void) |
| 2293 | +{ |
| 2294 | + init_simd(); |
| 2295 | + |
| 2296 | + return 0; |
| 2297 | +} |
| 2298 | + |
| 2299 | +GLOBAL(void) |
| 2300 | +jsimd_fdct_islow (DCTELEM * data) |
| 2301 | +{ |
| 2302 | +} |
| 2303 | + |
| 2304 | +GLOBAL(void) |
| 2305 | +jsimd_fdct_ifast (DCTELEM * data) |
| 2306 | +{ |
| 2307 | +} |
| 2308 | + |
| 2309 | +GLOBAL(void) |
| 2310 | +jsimd_fdct_float (FAST_FLOAT * data) |
| 2311 | +{ |
| 2312 | +} |
| 2313 | + |
| 2314 | +GLOBAL(int) |
| 2315 | +jsimd_can_quantize (void) |
| 2316 | +{ |
| 2317 | + init_simd(); |
| 2318 | + |
| 2319 | + return 0; |
| 2320 | +} |
| 2321 | + |
| 2322 | +GLOBAL(int) |
| 2323 | +jsimd_can_quantize_float (void) |
| 2324 | +{ |
| 2325 | + init_simd(); |
| 2326 | + |
| 2327 | + return 0; |
| 2328 | +} |
| 2329 | + |
| 2330 | +GLOBAL(void) |
| 2331 | +jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors, |
| 2332 | + DCTELEM * workspace) |
| 2333 | +{ |
| 2334 | +} |
| 2335 | + |
| 2336 | +GLOBAL(void) |
| 2337 | +jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, |
| 2338 | + FAST_FLOAT * workspace) |
| 2339 | +{ |
| 2340 | +} |
| 2341 | + |
| 2342 | +GLOBAL(int) |
| 2343 | +jsimd_can_idct_2x2 (void) |
| 2344 | +{ |
| 2345 | + init_simd(); |
| 2346 | + |
| 2347 | + /* The code is optimised for these values only */ |
| 2348 | + if (DCTSIZE != 8) |
| 2349 | + return 0; |
| 2350 | + if (sizeof(JCOEF) != 2) |
| 2351 | + return 0; |
| 2352 | + if (BITS_IN_JSAMPLE != 8) |
| 2353 | + return 0; |
| 2354 | + if (sizeof(JDIMENSION) != 4) |
| 2355 | + return 0; |
| 2356 | + if (sizeof(ISLOW_MULT_TYPE) != 2) |
| 2357 | + return 0; |
| 2358 | + |
| 2359 | + if (simd_support & JSIMD_ARM_NEON) |
| 2360 | + return 1; |
| 2361 | + |
| 2362 | + return 0; |
| 2363 | +} |
| 2364 | + |
| 2365 | +GLOBAL(int) |
| 2366 | +jsimd_can_idct_4x4 (void) |
| 2367 | +{ |
| 2368 | + init_simd(); |
| 2369 | + |
| 2370 | + /* The code is optimised for these values only */ |
| 2371 | + if (DCTSIZE != 8) |
| 2372 | + return 0; |
| 2373 | + if (sizeof(JCOEF) != 2) |
| 2374 | + return 0; |
| 2375 | + if (BITS_IN_JSAMPLE != 8) |
| 2376 | + return 0; |
| 2377 | + if (sizeof(JDIMENSION) != 4) |
| 2378 | + return 0; |
| 2379 | + if (sizeof(ISLOW_MULT_TYPE) != 2) |
| 2380 | + return 0; |
| 2381 | + |
| 2382 | + if (simd_support & JSIMD_ARM_NEON) |
| 2383 | + return 1; |
| 2384 | + |
| 2385 | + return 0; |
| 2386 | +} |
| 2387 | + |
| 2388 | +GLOBAL(void) |
| 2389 | +jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
| 2390 | + JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 2391 | + JDIMENSION output_col) |
| 2392 | +{ |
| 2393 | + if (simd_support & JSIMD_ARM_NEON) |
| 2394 | + jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, |
| 2395 | + output_col); |
| 2396 | +} |
| 2397 | + |
| 2398 | +GLOBAL(void) |
| 2399 | +jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
| 2400 | + JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 2401 | + JDIMENSION output_col) |
| 2402 | +{ |
| 2403 | + if (simd_support & JSIMD_ARM_NEON) |
| 2404 | + jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, |
| 2405 | + output_col); |
| 2406 | +} |
| 2407 | + |
| 2408 | +GLOBAL(int) |
| 2409 | +jsimd_can_idct_islow (void) |
| 2410 | +{ |
| 2411 | + init_simd(); |
| 2412 | + |
| 2413 | + /* The code is optimised for these values only */ |
| 2414 | + if (DCTSIZE != 8) |
| 2415 | + return 0; |
| 2416 | + if (sizeof(JCOEF) != 2) |
| 2417 | + return 0; |
| 2418 | + if (BITS_IN_JSAMPLE != 8) |
| 2419 | + return 0; |
| 2420 | + if (sizeof(JDIMENSION) != 4) |
| 2421 | + return 0; |
| 2422 | + if (sizeof(ISLOW_MULT_TYPE) != 2) |
| 2423 | + return 0; |
| 2424 | + |
| 2425 | + if (simd_support & JSIMD_ARM_NEON) |
| 2426 | + return 1; |
| 2427 | + |
| 2428 | + return 0; |
| 2429 | +} |
| 2430 | + |
| 2431 | +GLOBAL(int) |
| 2432 | +jsimd_can_idct_ifast (void) |
| 2433 | +{ |
| 2434 | + init_simd(); |
| 2435 | + |
| 2436 | + /* The code is optimised for these values only */ |
| 2437 | + if (DCTSIZE != 8) |
| 2438 | + return 0; |
| 2439 | + if (sizeof(JCOEF) != 2) |
| 2440 | + return 0; |
| 2441 | + if (BITS_IN_JSAMPLE != 8) |
| 2442 | + return 0; |
| 2443 | + if (sizeof(JDIMENSION) != 4) |
| 2444 | + return 0; |
| 2445 | + if (sizeof(IFAST_MULT_TYPE) != 2) |
| 2446 | + return 0; |
| 2447 | + if (IFAST_SCALE_BITS != 2) |
| 2448 | + return 0; |
| 2449 | + |
| 2450 | + if (simd_support & JSIMD_ARM_NEON) |
| 2451 | + return 1; |
| 2452 | + |
| 2453 | + return 0; |
| 2454 | +} |
| 2455 | + |
| 2456 | +GLOBAL(int) |
| 2457 | +jsimd_can_idct_float (void) |
| 2458 | +{ |
| 2459 | + init_simd(); |
| 2460 | + |
| 2461 | + return 0; |
| 2462 | +} |
| 2463 | + |
| 2464 | +GLOBAL(void) |
| 2465 | +jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
| 2466 | + JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 2467 | + JDIMENSION output_col) |
| 2468 | +{ |
| 2469 | + if (simd_support & JSIMD_ARM_NEON) |
| 2470 | + jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, |
| 2471 | + output_col); |
| 2472 | +} |
| 2473 | + |
| 2474 | +GLOBAL(void) |
| 2475 | +jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
| 2476 | + JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 2477 | + JDIMENSION output_col) |
| 2478 | +{ |
| 2479 | + if (simd_support & JSIMD_ARM_NEON) |
| 2480 | + jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, |
| 2481 | + output_col); |
| 2482 | +} |
| 2483 | + |
| 2484 | +GLOBAL(void) |
| 2485 | +jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
| 2486 | + JCOEFPTR coef_block, JSAMPARRAY output_buf, |
| 2487 | + JDIMENSION output_col) |
| 2488 | +{ |
| 2489 | +} |
| 2490 | Index: simd/jsimd_arm64_neon.S |
| 2491 | new file mode 100644 |
| 2492 | =================================================================== |
| 2493 | --- /dev/null |
| 2494 | +++ simd/jsimd_arm64_neon.S |
| 2495 | @@ -0,0 +1,1861 @@ |
| 2496 | +/* |
| 2497 | + * ARMv8 NEON optimizations for libjpeg-turbo |
| 2498 | + * |
| 2499 | + * Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies). |
| 2500 | + * All rights reserved. |
| 2501 | + * Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> |
| 2502 | + * Copyright (C) 2013-2014, Linaro Limited |
| 2503 | + * Author: Ragesh Radhakrishnan <ragesh.r@linaro.org> |
| 2504 | + * |
| 2505 | + * This software is provided 'as-is', without any express or implied |
| 2506 | + * warranty. In no event will the authors be held liable for any damages |
| 2507 | + * arising from the use of this software. |
| 2508 | + * |
| 2509 | + * Permission is granted to anyone to use this software for any purpose, |
| 2510 | + * including commercial applications, and to alter it and redistribute it |
| 2511 | + * freely, subject to the following restrictions: |
| 2512 | + * |
| 2513 | + * 1. The origin of this software must not be misrepresented; you must not |
| 2514 | + * claim that you wrote the original software. If you use this software |
| 2515 | + * in a product, an acknowledgment in the product documentation would be |
| 2516 | + * appreciated but is not required. |
| 2517 | + * 2. Altered source versions must be plainly marked as such, and must not be |
| 2518 | + * misrepresented as being the original software. |
| 2519 | + * 3. This notice may not be removed or altered from any source distribution. |
| 2520 | + */ |
| 2521 | + |
| 2522 | +#if defined(__linux__) && defined(__ELF__) |
| 2523 | +.section .note.GNU-stack,"",%progbits /* mark stack as non-executable */ |
| 2524 | +#endif |
| 2525 | + |
| 2526 | +.text |
| 2527 | +.arch armv8-a+fp+simd |
| 2528 | + |
| 2529 | + |
| 2530 | +#define RESPECT_STRICT_ALIGNMENT 1 |
| 2531 | + |
| 2532 | + |
| 2533 | +/*****************************************************************************/ |
| 2534 | + |
| 2535 | +/* Supplementary macro for setting function attributes */ |
| 2536 | +.macro asm_function fname |
| 2537 | +#ifdef __APPLE__ |
| 2538 | + .globl _\fname |
| 2539 | +_\fname: |
| 2540 | +#else |
| 2541 | + .global \fname |
| 2542 | +#ifdef __ELF__ |
| 2543 | + .hidden \fname |
| 2544 | + .type \fname, %function |
| 2545 | +#endif |
| 2546 | +\fname: |
| 2547 | +#endif |
| 2548 | +.endm |
| 2549 | + |
| 2550 | +/* Transpose elements of single 128 bit registers */ |
| 2551 | +.macro transpose_single x0,x1,xi,xilen,literal |
| 2552 | + ins \xi\xilen[0], \x0\xilen[0] |
| 2553 | + ins \x1\xilen[0], \x0\xilen[1] |
| 2554 | + trn1 \x0\literal, \x0\literal, \x1\literal |
| 2555 | + trn2 \x1\literal, \xi\literal, \x1\literal |
| 2556 | +.endm |
| 2557 | + |
| 2558 | +/* Transpose elements of 2 differnet registers */ |
| 2559 | +.macro transpose x0,x1,xi,xilen,literal |
| 2560 | + mov \xi\xilen, \x0\xilen |
| 2561 | + trn1 \x0\literal, \x0\literal, \x1\literal |
| 2562 | + trn2 \x1\literal, \xi\literal, \x1\literal |
| 2563 | +.endm |
| 2564 | + |
| 2565 | +/* Transpose a block of 4x4 coefficients in four 64-bit registers */ |
| 2566 | +.macro transpose_4x4_32 x0,x0len x1,x1len x2,x2len x3,x3len,xi,xilen |
| 2567 | + mov \xi\xilen, \x0\xilen |
| 2568 | + trn1 \x0\x0len, \x0\x0len, \x2\x2len |
| 2569 | + trn2 \x2\x2len, \xi\x0len, \x2\x2len |
| 2570 | + mov \xi\xilen, \x1\xilen |
| 2571 | + trn1 \x1\x1len, \x1\x1len, \x3\x3len |
| 2572 | + trn2 \x3\x3len, \xi\x1len, \x3\x3len |
| 2573 | +.endm |
| 2574 | + |
| 2575 | +.macro transpose_4x4_16 x0,x0len x1,x1len, x2,x2len, x3,x3len,xi,xilen |
| 2576 | + mov \xi\xilen, \x0\xilen |
| 2577 | + trn1 \x0\x0len, \x0\x0len, \x1\x1len |
| 2578 | + trn2 \x1\x2len, \xi\x0len, \x1\x2len |
| 2579 | + mov \xi\xilen, \x2\xilen |
| 2580 | + trn1 \x2\x2len, \x2\x2len, \x3\x3len |
| 2581 | + trn2 \x3\x2len, \xi\x1len, \x3\x3len |
| 2582 | +.endm |
| 2583 | + |
| 2584 | +.macro transpose_4x4 x0, x1, x2, x3,x5 |
| 2585 | + transpose_4x4_16 \x0,.4h, \x1,.4h, \x2,.4h,\x3,.4h,\x5,.16b |
| 2586 | + transpose_4x4_32 \x0,.2s, \x1,.2s, \x2,.2s,\x3,.2s,\x5,.16b |
| 2587 | +.endm |
| 2588 | + |
| 2589 | + |
| 2590 | +#define CENTERJSAMPLE 128 |
| 2591 | + |
| 2592 | +/*****************************************************************************/ |
| 2593 | + |
| 2594 | +/* |
| 2595 | + * Perform dequantization and inverse DCT on one block of coefficients. |
| 2596 | + * |
| 2597 | + * GLOBAL(void) |
| 2598 | + * jsimd_idct_islow_neon (void * dct_table, JCOEFPTR coef_block, |
| 2599 | + * JSAMPARRAY output_buf, JDIMENSION output_col) |
| 2600 | + */ |
| 2601 | + |
| 2602 | +#define FIX_0_298631336 (2446) |
| 2603 | +#define FIX_0_390180644 (3196) |
| 2604 | +#define FIX_0_541196100 (4433) |
| 2605 | +#define FIX_0_765366865 (6270) |
| 2606 | +#define FIX_0_899976223 (7373) |
| 2607 | +#define FIX_1_175875602 (9633) |
| 2608 | +#define FIX_1_501321110 (12299) |
| 2609 | +#define FIX_1_847759065 (15137) |
| 2610 | +#define FIX_1_961570560 (16069) |
| 2611 | +#define FIX_2_053119869 (16819) |
| 2612 | +#define FIX_2_562915447 (20995) |
| 2613 | +#define FIX_3_072711026 (25172) |
| 2614 | + |
| 2615 | +#define FIX_1_175875602_MINUS_1_961570560 (FIX_1_175875602 - FIX_1_961570560) |
| 2616 | +#define FIX_1_175875602_MINUS_0_390180644 (FIX_1_175875602 - FIX_0_390180644) |
| 2617 | +#define FIX_0_541196100_MINUS_1_847759065 (FIX_0_541196100 - FIX_1_847759065) |
| 2618 | +#define FIX_3_072711026_MINUS_2_562915447 (FIX_3_072711026 - FIX_2_562915447) |
| 2619 | +#define FIX_0_298631336_MINUS_0_899976223 (FIX_0_298631336 - FIX_0_899976223) |
| 2620 | +#define FIX_1_501321110_MINUS_0_899976223 (FIX_1_501321110 - FIX_0_899976223) |
| 2621 | +#define FIX_2_053119869_MINUS_2_562915447 (FIX_2_053119869 - FIX_2_562915447) |
| 2622 | +#define FIX_0_541196100_PLUS_0_765366865 (FIX_0_541196100 + FIX_0_765366865) |
| 2623 | + |
| 2624 | +/* |
| 2625 | + * Reference SIMD-friendly 1-D ISLOW iDCT C implementation. |
| 2626 | + * Uses some ideas from the comments in 'simd/jiss2int-64.asm' |
| 2627 | + */ |
| 2628 | +#define REF_1D_IDCT(xrow0, xrow1, xrow2, xrow3, xrow4, xrow5, xrow6, xrow7) \ |
| 2629 | +{ \ |
| 2630 | + DCTELEM row0, row1, row2, row3, row4, row5, row6, row7; \ |
| 2631 | + INT32 q1, q2, q3, q4, q5, q6, q7; \ |
| 2632 | + INT32 tmp11_plus_tmp2, tmp11_minus_tmp2; \ |
| 2633 | + \ |
| 2634 | + /* 1-D iDCT input data */ \ |
| 2635 | + row0 = xrow0; \ |
| 2636 | + row1 = xrow1; \ |
| 2637 | + row2 = xrow2; \ |
| 2638 | + row3 = xrow3; \ |
| 2639 | + row4 = xrow4; \ |
| 2640 | + row5 = xrow5; \ |
| 2641 | + row6 = xrow6; \ |
| 2642 | + row7 = xrow7; \ |
| 2643 | + \ |
| 2644 | + q5 = row7 + row3; \ |
| 2645 | + q4 = row5 + row1; \ |
| 2646 | + q6 = MULTIPLY(q5, FIX_1_175875602_MINUS_1_961570560) + \ |
| 2647 | + MULTIPLY(q4, FIX_1_175875602); \ |
| 2648 | + q7 = MULTIPLY(q5, FIX_1_175875602) + \ |
| 2649 | + MULTIPLY(q4, FIX_1_175875602_MINUS_0_390180644); \ |
| 2650 | + q2 = MULTIPLY(row2, FIX_0_541196100) + \ |
| 2651 | + MULTIPLY(row6, FIX_0_541196100_MINUS_1_847759065); \ |
| 2652 | + q4 = q6; \ |
| 2653 | + q3 = ((INT32) row0 - (INT32) row4) << 13; \ |
| 2654 | + q6 += MULTIPLY(row5, -FIX_2_562915447) + \ |
| 2655 | + MULTIPLY(row3, FIX_3_072711026_MINUS_2_562915447); \ |
| 2656 | + /* now we can use q1 (reloadable constants have been used up) */ \ |
| 2657 | + q1 = q3 + q2; \ |
| 2658 | + q4 += MULTIPLY(row7, FIX_0_298631336_MINUS_0_899976223) + \ |
| 2659 | + MULTIPLY(row1, -FIX_0_899976223); \ |
| 2660 | + q5 = q7; \ |
| 2661 | + q1 = q1 + q6; \ |
| 2662 | + q7 += MULTIPLY(row7, -FIX_0_899976223) + \ |
| 2663 | + MULTIPLY(row1, FIX_1_501321110_MINUS_0_899976223); \ |
| 2664 | + \ |
| 2665 | + /* (tmp11 + tmp2) has been calculated (out_row1 before descale) */ \ |
| 2666 | + tmp11_plus_tmp2 = q1; \ |
| 2667 | + row1 = 0; \ |
| 2668 | + \ |
| 2669 | + q1 = q1 - q6; \ |
| 2670 | + q5 += MULTIPLY(row5, FIX_2_053119869_MINUS_2_562915447) + \ |
| 2671 | + MULTIPLY(row3, -FIX_2_562915447); \ |
| 2672 | + q1 = q1 - q6; \ |
| 2673 | + q6 = MULTIPLY(row2, FIX_0_541196100_PLUS_0_765366865) + \ |
| 2674 | + MULTIPLY(row6, FIX_0_541196100); \ |
| 2675 | + q3 = q3 - q2; \ |
| 2676 | + \ |
| 2677 | + /* (tmp11 - tmp2) has been calculated (out_row6 before descale) */ \ |
| 2678 | + tmp11_minus_tmp2 = q1; \ |
| 2679 | + \ |
| 2680 | + q1 = ((INT32) row0 + (INT32) row4) << 13; \ |
| 2681 | + q2 = q1 + q6; \ |
| 2682 | + q1 = q1 - q6; \ |
| 2683 | + \ |
| 2684 | + /* pick up the results */ \ |
| 2685 | + tmp0 = q4; \ |
| 2686 | + tmp1 = q5; \ |
| 2687 | + tmp2 = (tmp11_plus_tmp2 - tmp11_minus_tmp2) / 2; \ |
| 2688 | + tmp3 = q7; \ |
| 2689 | + tmp10 = q2; \ |
| 2690 | + tmp11 = (tmp11_plus_tmp2 + tmp11_minus_tmp2) / 2; \ |
| 2691 | + tmp12 = q3; \ |
| 2692 | + tmp13 = q1; \ |
| 2693 | +} |
| 2694 | + |
| 2695 | +#define XFIX_0_899976223 v0.4h[0] |
| 2696 | +#define XFIX_0_541196100 v0.4h[1] |
| 2697 | +#define XFIX_2_562915447 v0.4h[2] |
| 2698 | +#define XFIX_0_298631336_MINUS_0_899976223 v0.4h[3] |
| 2699 | +#define XFIX_1_501321110_MINUS_0_899976223 v1.4h[0] |
| 2700 | +#define XFIX_2_053119869_MINUS_2_562915447 v1.4h[1] |
| 2701 | +#define XFIX_0_541196100_PLUS_0_765366865 v1.4h[2] |
| 2702 | +#define XFIX_1_175875602 v1.4h[3] |
| 2703 | +#define XFIX_1_175875602_MINUS_0_390180644 v2.4h[0] |
| 2704 | +#define XFIX_0_541196100_MINUS_1_847759065 v2.4h[1] |
| 2705 | +#define XFIX_3_072711026_MINUS_2_562915447 v2.4h[2] |
| 2706 | +#define XFIX_1_175875602_MINUS_1_961570560 v2.4h[3] |
| 2707 | + |
| 2708 | +.balign 16 |
| 2709 | +jsimd_idct_islow_neon_consts: |
| 2710 | + .short FIX_0_899976223 /* d0[0] */ |
| 2711 | + .short FIX_0_541196100 /* d0[1] */ |
| 2712 | + .short FIX_2_562915447 /* d0[2] */ |
| 2713 | + .short FIX_0_298631336_MINUS_0_899976223 /* d0[3] */ |
| 2714 | + .short FIX_1_501321110_MINUS_0_899976223 /* d1[0] */ |
| 2715 | + .short FIX_2_053119869_MINUS_2_562915447 /* d1[1] */ |
| 2716 | + .short FIX_0_541196100_PLUS_0_765366865 /* d1[2] */ |
| 2717 | + .short FIX_1_175875602 /* d1[3] */ |
| 2718 | + /* reloadable constants */ |
| 2719 | + .short FIX_1_175875602_MINUS_0_390180644 /* d2[0] */ |
| 2720 | + .short FIX_0_541196100_MINUS_1_847759065 /* d2[1] */ |
| 2721 | + .short FIX_3_072711026_MINUS_2_562915447 /* d2[2] */ |
| 2722 | + .short FIX_1_175875602_MINUS_1_961570560 /* d2[3] */ |
| 2723 | + |
| 2724 | +asm_function jsimd_idct_islow_neon |
| 2725 | + |
| 2726 | + DCT_TABLE .req x0 |
| 2727 | + COEF_BLOCK .req x1 |
| 2728 | + OUTPUT_BUF .req x2 |
| 2729 | + OUTPUT_COL .req x3 |
| 2730 | + TMP1 .req x0 |
| 2731 | + TMP2 .req x1 |
| 2732 | + TMP3 .req x2 |
| 2733 | + TMP4 .req x15 |
| 2734 | + |
| 2735 | + ROW0L .req v16 |
| 2736 | + ROW0R .req v17 |
| 2737 | + ROW1L .req v18 |
| 2738 | + ROW1R .req v19 |
| 2739 | + ROW2L .req v20 |
| 2740 | + ROW2R .req v21 |
| 2741 | + ROW3L .req v22 |
| 2742 | + ROW3R .req v23 |
| 2743 | + ROW4L .req v24 |
| 2744 | + ROW4R .req v25 |
| 2745 | + ROW5L .req v26 |
| 2746 | + ROW5R .req v27 |
| 2747 | + ROW6L .req v28 |
| 2748 | + ROW6R .req v29 |
| 2749 | + ROW7L .req v30 |
| 2750 | + ROW7R .req v31 |
| 2751 | + /* Save all NEON registers and x15 (32 NEON registers * 8 bytes + 16) */ |
| 2752 | + sub sp, sp, 272 |
| 2753 | + str x15, [sp], 16 |
| 2754 | + adr x15, jsimd_idct_islow_neon_consts |
| 2755 | + st1 {v0.8b - v3.8b}, [sp], 32 |
| 2756 | + st1 {v4.8b - v7.8b}, [sp], 32 |
| 2757 | + st1 {v8.8b - v11.8b}, [sp], 32 |
| 2758 | + st1 {v12.8b - v15.8b}, [sp], 32 |
| 2759 | + st1 {v16.8b - v19.8b}, [sp], 32 |
| 2760 | + st1 {v20.8b - v23.8b}, [sp], 32 |
| 2761 | + st1 {v24.8b - v27.8b}, [sp], 32 |
| 2762 | + st1 {v28.8b - v31.8b}, [sp], 32 |
| 2763 | + ld1 {v16.4h, v17.4h, v18.4h, v19.4h}, [COEF_BLOCK], 32 |
| 2764 | + ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [DCT_TABLE], 32 |
| 2765 | + ld1 {v20.4h, v21.4h, v22.4h, v23.4h}, [COEF_BLOCK], 32 |
| 2766 | + mul v16.4h, v16.4h, v0.4h |
| 2767 | + mul v17.4h, v17.4h, v1.4h |
| 2768 | + ins v16.2d[1], v17.2d[0] /* 128 bit q8 */ |
| 2769 | + ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [DCT_TABLE], 32 |
| 2770 | + mul v18.4h, v18.4h, v2.4h |
| 2771 | + mul v19.4h, v19.4h, v3.4h |
| 2772 | + ins v18.2d[1], v19.2d[0] /* 128 bit q9 */ |
| 2773 | + ld1 {v24.4h, v25.4h, v26.4h, v27.4h}, [COEF_BLOCK], 32 |
| 2774 | + mul v20.4h, v20.4h, v4.4h |
| 2775 | + mul v21.4h, v21.4h, v5.4h |
| 2776 | + ins v20.2d[1], v21.2d[0] /* 128 bit q10 */ |
| 2777 | + ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [DCT_TABLE], 32 |
| 2778 | + mul v22.4h, v22.4h, v6.4h |
| 2779 | + mul v23.4h, v23.4h, v7.4h |
| 2780 | + ins v22.2d[1], v23.2d[0] /* 128 bit q11 */ |
| 2781 | + ld1 {v28.4h, v29.4h, v30.4h, v31.4h}, [COEF_BLOCK] |
| 2782 | + mul v24.4h, v24.4h, v0.4h |
| 2783 | + mul v25.4h, v25.4h, v1.4h |
| 2784 | + ins v24.2d[1], v25.2d[0] /* 128 bit q12 */ |
| 2785 | + ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [DCT_TABLE], 32 |
| 2786 | + mul v28.4h, v28.4h, v4.4h |
| 2787 | + mul v29.4h, v29.4h, v5.4h |
| 2788 | + ins v28.2d[1], v29.2d[0] /* 128 bit q14 */ |
| 2789 | + mul v26.4h, v26.4h, v2.4h |
| 2790 | + mul v27.4h, v27.4h, v3.4h |
| 2791 | + ins v26.2d[1], v27.2d[0] /* 128 bit q13 */ |
| 2792 | + ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [x15] /* load constants */ |
| 2793 | + add x15, x15, #16 |
| 2794 | + mul v30.4h, v30.4h, v6.4h |
| 2795 | + mul v31.4h, v31.4h, v7.4h |
| 2796 | + ins v30.2d[1], v31.2d[0] /* 128 bit q15 */ |
| 2797 | + /* Go to the bottom of the stack */ |
| 2798 | + sub sp, sp, 352 |
| 2799 | + stp x4, x5, [sp], 16 |
| 2800 | + st1 {v8.4h - v11.4h}, [sp], 32 /* save NEON registers */ |
| 2801 | + st1 {v12.4h - v15.4h}, [sp], 32 |
| 2802 | + /* 1-D IDCT, pass 1, left 4x8 half */ |
| 2803 | + add v4.4h, ROW7L.4h, ROW3L.4h |
| 2804 | + add v5.4h, ROW5L.4h, ROW1L.4h |
| 2805 | + smull v12.4s, v4.4h, XFIX_1_175875602_MINUS_1_961570560 |
| 2806 | + smlal v12.4s, v5.4h, XFIX_1_175875602 |
| 2807 | + smull v14.4s, v4.4h, XFIX_1_175875602 |
| 2808 | + /* Check for the zero coefficients in the right 4x8 half */ |
| 2809 | + smlal v14.4s, v5.4h, XFIX_1_175875602_MINUS_0_390180644 |
| 2810 | + ssubl v6.4s, ROW0L.4h, ROW4L.4h |
| 2811 | + ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 1 * 8))] |
| 2812 | + smull v4.4s, ROW2L.4h, XFIX_0_541196100 |
| 2813 | + smlal v4.4s, ROW6L.4h, XFIX_0_541196100_MINUS_1_847759065 |
| 2814 | + orr x0, x4, x5 |
| 2815 | + mov v8.16b, v12.16b |
| 2816 | + smlsl v12.4s, ROW5L.4h, XFIX_2_562915447 |
| 2817 | + ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 2 * 8))] |
| 2818 | + smlal v12.4s, ROW3L.4h, XFIX_3_072711026_MINUS_2_562915447 |
| 2819 | + shl v6.4s, v6.4s, #13 |
| 2820 | + orr x0, x0, x4 |
| 2821 | + smlsl v8.4s, ROW1L.4h, XFIX_0_899976223 |
| 2822 | + orr x0, x0 , x5 |
| 2823 | + add v2.4s, v6.4s, v4.4s |
| 2824 | + ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 3 * 8))] |
| 2825 | + mov v10.16b, v14.16b |
| 2826 | + add v2.4s, v2.4s, v12.4s |
| 2827 | + orr x0, x0, x4 |
| 2828 | + smlsl v14.4s, ROW7L.4h, XFIX_0_899976223 |
| 2829 | + orr x0, x0, x5 |
| 2830 | + smlal v14.4s, ROW1L.4h, XFIX_1_501321110_MINUS_0_899976223 |
| 2831 | + rshrn ROW1L.4h, v2.4s, #11 |
| 2832 | + ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 4 * 8))] |
| 2833 | + sub v2.4s, v2.4s, v12.4s |
| 2834 | + smlal v10.4s, ROW5L.4h, XFIX_2_053119869_MINUS_2_562915447 |
| 2835 | + orr x0, x0, x4 |
| 2836 | + smlsl v10.4s, ROW3L.4h, XFIX_2_562915447 |
| 2837 | + orr x0, x0, x5 |
| 2838 | + sub v2.4s, v2.4s, v12.4s |
| 2839 | + smull v12.4s, ROW2L.4h, XFIX_0_541196100_PLUS_0_765366865 |
| 2840 | + ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 5 * 8))] |
| 2841 | + smlal v12.4s, ROW6L.4h, XFIX_0_541196100 |
| 2842 | + sub v6.4s, v6.4s, v4.4s |
| 2843 | + orr x0, x0, x4 |
| 2844 | + rshrn ROW6L.4h, v2.4s, #11 |
| 2845 | + orr x0, x0, x5 |
| 2846 | + add v2.4s, v6.4s, v10.4s |
| 2847 | + ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 6 * 8))] |
| 2848 | + sub v6.4s, v6.4s, v10.4s |
| 2849 | + saddl v10.4s, ROW0L.4h, ROW4L.4h |
| 2850 | + orr x0, x0, x4 |
| 2851 | + rshrn ROW2L.4h, v2.4s, #11 |
| 2852 | + orr x0, x0, x5 |
| 2853 | + rshrn ROW5L.4h, v6.4s, #11 |
| 2854 | + ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 7 * 8))] |
| 2855 | + shl v10.4s, v10.4s, #13 |
| 2856 | + smlal v8.4s, ROW7L.4h, XFIX_0_298631336_MINUS_0_899976223 |
| 2857 | + orr x0, x0, x4 |
| 2858 | + add v4.4s, v10.4s, v12.4s |
| 2859 | + orr x0, x0, x5 |
| 2860 | + cmp x0, #0 /* orrs instruction removed */ |
| 2861 | + sub v2.4s, v10.4s, v12.4s |
| 2862 | + add v12.4s, v4.4s, v14.4s |
| 2863 | + ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 0 * 8))] |
| 2864 | + sub v4.4s, v4.4s, v14.4s |
| 2865 | + add v10.4s, v2.4s, v8.4s |
| 2866 | + orr x0, x4, x5 |
| 2867 | + sub v6.4s, v2.4s, v8.4s |
| 2868 | + /* pop {x4, x5} */ |
| 2869 | + sub sp, sp, 80 |
| 2870 | + ldp x4, x5, [sp], 16 |
| 2871 | + rshrn ROW7L.4h, v4.4s, #11 |
| 2872 | + rshrn ROW3L.4h, v10.4s, #11 |
| 2873 | + rshrn ROW0L.4h, v12.4s, #11 |
| 2874 | + rshrn ROW4L.4h, v6.4s, #11 |
| 2875 | + |
| 2876 | + beq 3f /* Go to do some special handling for the sparse right 4x8 half */ |
| 2877 | + |
| 2878 | + /* 1-D IDCT, pass 1, right 4x8 half */ |
| 2879 | + ld1 {v2.4h}, [x15] /* reload constants */ |
| 2880 | + add v10.4h, ROW7R.4h, ROW3R.4h |
| 2881 | + add v8.4h, ROW5R.4h, ROW1R.4h |
| 2882 | + /* Transpose ROW6L <-> ROW7L (v3 available free register) */ |
| 2883 | + transpose ROW6L, ROW7L, v3, .16b, .4h |
| 2884 | + smull v12.4s, v10.4h, XFIX_1_175875602_MINUS_1_961570560 |
| 2885 | + smlal v12.4s, v8.4h, XFIX_1_175875602 |
| 2886 | + /* Transpose ROW2L <-> ROW3L (v3 available free register) */ |
| 2887 | + transpose ROW2L, ROW3L, v3, .16b, .4h |
| 2888 | + smull v14.4s, v10.4h, XFIX_1_175875602 |
| 2889 | + smlal v14.4s, v8.4h, XFIX_1_175875602_MINUS_0_390180644 |
| 2890 | + /* Transpose ROW0L <-> ROW1L (v3 available free register) */ |
| 2891 | + transpose ROW0L, ROW1L, v3, .16b, .4h |
| 2892 | + ssubl v6.4s, ROW0R.4h, ROW4R.4h |
| 2893 | + smull v4.4s, ROW2R.4h, XFIX_0_541196100 |
| 2894 | + smlal v4.4s, ROW6R.4h, XFIX_0_541196100_MINUS_1_847759065 |
| 2895 | + /* Transpose ROW4L <-> ROW5L (v3 available free register) */ |
| 2896 | + transpose ROW4L, ROW5L, v3, .16b, .4h |
| 2897 | + mov v8.16b, v12.16b |
| 2898 | + smlsl v12.4s, ROW5R.4h, XFIX_2_562915447 |
| 2899 | + smlal v12.4s, ROW3R.4h, XFIX_3_072711026_MINUS_2_562915447 |
| 2900 | + /* Transpose ROW1L <-> ROW3L (v3 available free register) */ |
| 2901 | + transpose ROW1L, ROW3L, v3, .16b, .2s |
| 2902 | + shl v6.4s, v6.4s, #13 |
| 2903 | + smlsl v8.4s, ROW1R.4h, XFIX_0_899976223 |
| 2904 | + /* Transpose ROW4L <-> ROW6L (v3 available free register) */ |
| 2905 | + transpose ROW4L, ROW6L, v3, .16b, .2s |
| 2906 | + add v2.4s, v6.4s, v4.4s |
| 2907 | + mov v10.16b, v14.16b |
| 2908 | + add v2.4s, v2.4s, v12.4s |
| 2909 | + /* Transpose ROW0L <-> ROW2L (v3 available free register) */ |
| 2910 | + transpose ROW0L, ROW2L, v3, .16b, .2s |
| 2911 | + smlsl v14.4s, ROW7R.4h, XFIX_0_899976223 |
| 2912 | + smlal v14.4s, ROW1R.4h, XFIX_1_501321110_MINUS_0_899976223 |
| 2913 | + rshrn ROW1R.4h, v2.4s, #11 |
| 2914 | + /* Transpose ROW5L <-> ROW7L (v3 available free register) */ |
| 2915 | + transpose ROW5L, ROW7L, v3, .16b, .2s |
| 2916 | + sub v2.4s, v2.4s, v12.4s |
| 2917 | + smlal v10.4s, ROW5R.4h, XFIX_2_053119869_MINUS_2_562915447 |
| 2918 | + smlsl v10.4s, ROW3R.4h, XFIX_2_562915447 |
| 2919 | + sub v2.4s, v2.4s, v12.4s |
| 2920 | + smull v12.4s, ROW2R.4h, XFIX_0_541196100_PLUS_0_765366865 |
| 2921 | + smlal v12.4s, ROW6R.4h, XFIX_0_541196100 |
| 2922 | + sub v6.4s, v6.4s, v4.4s |
| 2923 | + rshrn ROW6R.4h, v2.4s, #11 |
| 2924 | + add v2.4s, v6.4s, v10.4s |
| 2925 | + sub v6.4s, v6.4s, v10.4s |
| 2926 | + saddl v10.4s, ROW0R.4h, ROW4R.4h |
| 2927 | + rshrn ROW2R.4h, v2.4s, #11 |
| 2928 | + rshrn ROW5R.4h, v6.4s, #11 |
| 2929 | + shl v10.4s, v10.4s, #13 |
| 2930 | + smlal v8.4s, ROW7R.4h, XFIX_0_298631336_MINUS_0_899976223 |
| 2931 | + add v4.4s, v10.4s, v12.4s |
| 2932 | + sub v2.4s, v10.4s, v12.4s |
| 2933 | + add v12.4s, v4.4s, v14.4s |
| 2934 | + sub v4.4s, v4.4s, v14.4s |
| 2935 | + add v10.4s, v2.4s, v8.4s |
| 2936 | + sub v6.4s, v2.4s, v8.4s |
| 2937 | + rshrn ROW7R.4h, v4.4s, #11 |
| 2938 | + rshrn ROW3R.4h, v10.4s, #11 |
| 2939 | + rshrn ROW0R.4h, v12.4s, #11 |
| 2940 | + rshrn ROW4R.4h, v6.4s, #11 |
| 2941 | + /* Transpose right 4x8 half */ |
| 2942 | + transpose ROW6R, ROW7R, v3, .16b, .4h |
| 2943 | + transpose ROW2R, ROW3R, v3, .16b, .4h |
| 2944 | + transpose ROW0R, ROW1R, v3, .16b, .4h |
| 2945 | + transpose ROW4R, ROW5R, v3, .16b, .4h |
| 2946 | + transpose ROW1R, ROW3R, v3, .16b, .2s |
| 2947 | + transpose ROW4R, ROW6R, v3, .16b, .2s |
| 2948 | + transpose ROW0R, ROW2R, v3, .16b, .2s |
| 2949 | + transpose ROW5R, ROW7R, v3, .16b, .2s |
| 2950 | + |
| 2951 | +1: /* 1-D IDCT, pass 2 (normal variant), left 4x8 half */ |
| 2952 | + ld1 {v2.4h}, [x15] /* reload constants */ |
| 2953 | + smull v12.4S, ROW1R.4h, XFIX_1_175875602 /* ROW5L.4h <-> ROW1R.4h */ |
| 2954 | + smlal v12.4s, ROW1L.4h, XFIX_1_175875602 |
| 2955 | + smlal v12.4s, ROW3R.4h, XFIX_1_175875602_MINUS_1_961570560 /* ROW7L.4h <-> ROW3R.4h */ |
| 2956 | + smlal v12.4s, ROW3L.4h, XFIX_1_175875602_MINUS_1_961570560 |
| 2957 | + smull v14.4s, ROW3R.4h, XFIX_1_175875602 /* ROW7L.4h <-> ROW3R.4h */ |
| 2958 | + smlal v14.4s, ROW3L.4h, XFIX_1_175875602 |
| 2959 | + smlal v14.4s, ROW1R.4h, XFIX_1_175875602_MINUS_0_390180644 /* ROW5L.4h <-> ROW1R.4h */ |
| 2960 | + smlal v14.4s, ROW1L.4h, XFIX_1_175875602_MINUS_0_390180644 |
| 2961 | + ssubl v6.4s, ROW0L.4h, ROW0R.4h /* ROW4L.4h <-> ROW0R.4h */ |
| 2962 | + smull v4.4s, ROW2L.4h, XFIX_0_541196100 |
| 2963 | + smlal v4.4s, ROW2R.4h, XFIX_0_541196100_MINUS_1_847759065 /* ROW6L.4h <-> ROW2R.4h */ |
| 2964 | + mov v8.16b, v12.16b |
| 2965 | + smlsl v12.4s, ROW1R.4h, XFIX_2_562915447 /* ROW5L.4h <-> ROW1R.4h */ |
| 2966 | + smlal v12.4s, ROW3L.4h, XFIX_3_072711026_MINUS_2_562915447 |
| 2967 | + shl v6.4s, v6.4s, #13 |
| 2968 | + smlsl v8.4s, ROW1L.4h, XFIX_0_899976223 |
| 2969 | + add v2.4s, v6.4s, v4.4s |
| 2970 | + mov v10.16b, v14.16b |
| 2971 | + add v2.4s, v2.4s, v12.4s |
| 2972 | + smlsl v14.4s, ROW3R.4h, XFIX_0_899976223 /* ROW7L.4h <-> ROW3R.4h */ |
| 2973 | + smlal v14.4s, ROW1L.4h, XFIX_1_501321110_MINUS_0_899976223 |
| 2974 | + shrn ROW1L.4h, v2.4s, #16 |
| 2975 | + sub v2.4s, v2.4s, v12.4s |
| 2976 | + smlal v10.4s, ROW1R.4h, XFIX_2_053119869_MINUS_2_562915447 /* ROW5L.4h <-> ROW1R.4h */ |
| 2977 | + smlsl v10.4s, ROW3L.4h, XFIX_2_562915447 |
| 2978 | + sub v2.4s, v2.4s, v12.4s |
| 2979 | + smull v12.4s, ROW2L.4h, XFIX_0_541196100_PLUS_0_765366865 |
| 2980 | + smlal v12.4s, ROW2R.4h, XFIX_0_541196100 /* ROW6L.4h <-> ROW2R.4h */ |
| 2981 | + sub v6.4s, v6.4s, v4.4s |
| 2982 | + shrn ROW2R.4h, v2.4s, #16 /* ROW6L.4h <-> ROW2R.4h */ |
| 2983 | + add v2.4s, v6.4s, v10.4s |
| 2984 | + sub v6.4s, v6.4s, v10.4s |
| 2985 | + saddl v10.4s, ROW0L.4h, ROW0R.4h /* ROW4L.4h <-> ROW0R.4h */ |
| 2986 | + shrn ROW2L.4h, v2.4s, #16 |
| 2987 | + shrn ROW1R.4h, v6.4s, #16 /* ROW5L.4h <-> ROW1R.4h */ |
| 2988 | + shl v10.4s, v10.4s, #13 |
| 2989 | + smlal v8.4s, ROW3R.4h, XFIX_0_298631336_MINUS_0_899976223 /* ROW7L.4h <-> ROW3R.4h */ |
| 2990 | + add v4.4s, v10.4s, v12.4s |
| 2991 | + sub v2.4s, v10.4s, v12.4s |
| 2992 | + add v12.4s, v4.4s, v14.4s |
| 2993 | + sub v4.4s, v4.4s, v14.4s |
| 2994 | + add v10.4s, v2.4s, v8.4s |
| 2995 | + sub v6.4s, v2.4s, v8.4s |
| 2996 | + shrn ROW3R.4h, v4.4s, #16 /* ROW7L.4h <-> ROW3R.4h */ |
| 2997 | + shrn ROW3L.4h, v10.4s, #16 |
| 2998 | + shrn ROW0L.4h, v12.4s, #16 |
| 2999 | + shrn ROW0R.4h, v6.4s, #16 /* ROW4L.4h <-> ROW0R.4h */ |
| 3000 | + /* 1-D IDCT, pass 2, right 4x8 half */ |
| 3001 | + ld1 {v2.4h}, [x15] /* reload constants */ |
| 3002 | + smull v12.4s, ROW5R.4h, XFIX_1_175875602 |
| 3003 | + smlal v12.4s, ROW5L.4h, XFIX_1_175875602 /* ROW5L.4h <-> ROW1R.4h */ |
| 3004 | + smlal v12.4s, ROW7R.4h, XFIX_1_175875602_MINUS_1_961570560 |
| 3005 | + smlal v12.4s, ROW7L.4h, XFIX_1_175875602_MINUS_1_961570560 /* ROW7L.4h <-> ROW3R.4h */ |
| 3006 | + smull v14.4s, ROW7R.4h, XFIX_1_175875602 |
| 3007 | + smlal v14.4s, ROW7L.4h, XFIX_1_175875602 /* ROW7L.4h <-> ROW3R.4h */ |
| 3008 | + smlal v14.4s, ROW5R.4h, XFIX_1_175875602_MINUS_0_390180644 |
| 3009 | + smlal v14.4s, ROW5L.4h, XFIX_1_175875602_MINUS_0_390180644 /* ROW5L.4h <-> ROW1R.4h */ |
| 3010 | + ssubl v6.4s, ROW4L.4h, ROW4R.4h /* ROW4L.4h <-> ROW0R.4h */ |
| 3011 | + smull v4.4s, ROW6L.4h, XFIX_0_541196100 /* ROW6L.4h <-> ROW2R.4h */ |
| 3012 | + smlal v4.4s, ROW6R.4h, XFIX_0_541196100_MINUS_1_847759065 |
| 3013 | + mov v8.16b, v12.16b |
| 3014 | + smlsl v12.4s, ROW5R.4h, XFIX_2_562915447 |
| 3015 | + smlal v12.4s, ROW7L.4h, XFIX_3_072711026_MINUS_2_562915447 /* ROW7L.4h <-> ROW3R.4h */ |
| 3016 | + shl v6.4s, v6.4s, #13 |
| 3017 | + smlsl v8.4s, ROW5L.4h, XFIX_0_899976223 /* ROW5L.4h <-> ROW1R.4h */ |
| 3018 | + add v2.4s, v6.4s, v4.4s |
| 3019 | + mov v10.16b, v14.16b |
| 3020 | + add v2.4s, v2.4s, v12.4s |
| 3021 | + smlsl v14.4s, ROW7R.4h, XFIX_0_899976223 |
| 3022 | + smlal v14.4s, ROW5L.4h, XFIX_1_501321110_MINUS_0_899976223 /* ROW5L.4h <-> ROW1R.4h */ |
| 3023 | + shrn ROW5L.4h, v2.4s, #16 /* ROW5L.4h <-> ROW1R.4h */ |
| 3024 | + sub v2.4s, v2.4s, v12.4s |
| 3025 | + smlal v10.4s, ROW5R.4h, XFIX_2_053119869_MINUS_2_562915447 |
| 3026 | + smlsl v10.4s, ROW7L.4h, XFIX_2_562915447 /* ROW7L.4h <-> ROW3R.4h */ |
| 3027 | + sub v2.4s, v2.4s, v12.4s |
| 3028 | + smull v12.4s, ROW6L.4h, XFIX_0_541196100_PLUS_0_765366865 /* ROW6L.4h <-> ROW2R.4h */ |
| 3029 | + smlal v12.4s, ROW6R.4h, XFIX_0_541196100 |
| 3030 | + sub v6.4s, v6.4s, v4.4s |
| 3031 | + shrn ROW6R.4h, v2.4s, #16 |
| 3032 | + add v2.4s, v6.4s, v10.4s |
| 3033 | + sub v6.4s, v6.4s, v10.4s |
| 3034 | + saddl v10.4s, ROW4L.4h, ROW4R.4h /* ROW4L.4h <-> ROW0R.4h */ |
| 3035 | + shrn ROW6L.4h, v2.4s, #16 /* ROW6L.4h <-> ROW2R.4h */ |
| 3036 | + shrn ROW5R.4h, v6.4s, #16 |
| 3037 | + shl v10.4s, v10.4s, #13 |
| 3038 | + smlal v8.4s, ROW7R.4h, XFIX_0_298631336_MINUS_0_899976223 |
| 3039 | + add v4.4s, v10.4s, v12.4s |
| 3040 | + sub v2.4s, v10.4s, v12.4s |
| 3041 | + add v12.4s, v4.4s, v14.4s |
| 3042 | + sub v4.4s, v4.4s, v14.4s |
| 3043 | + add v10.4s, v2.4s, v8.4s |
| 3044 | + sub v6.4s, v2.4s, v8.4s |
| 3045 | + shrn ROW7R.4h, v4.4s, #16 |
| 3046 | + shrn ROW7L.4h, v10.4s, #16 /* ROW7L.4h <-> ROW3R.4h */ |
| 3047 | + shrn ROW4L.4h, v12.4s, #16 /* ROW4L.4h <-> ROW0R.4h */ |
| 3048 | + shrn ROW4R.4h, v6.4s, #16 |
| 3049 | + |
| 3050 | +2: /* Descale to 8-bit and range limit */ |
| 3051 | + ins v16.2d[1], v17.2d[0] |
| 3052 | + ins v18.2d[1], v19.2d[0] |
| 3053 | + ins v20.2d[1], v21.2d[0] |
| 3054 | + ins v22.2d[1], v23.2d[0] |
| 3055 | + sqrshrn v16.8b, v16.8h, #2 |
| 3056 | + sqrshrn2 v16.16b, v18.8h, #2 |
| 3057 | + sqrshrn v18.8b, v20.8h, #2 |
| 3058 | + sqrshrn2 v18.16b, v22.8h, #2 |
| 3059 | + |
| 3060 | + /* vpop {v8.4h - d15.4h} */ /* restore NEON registers */ |
| 3061 | + ld1 {v8.4h - v11.4h}, [sp], 32 |
| 3062 | + ld1 {v12.4h - v15.4h}, [sp], 32 |
| 3063 | + ins v24.2d[1], v25.2d[0] |
| 3064 | + |
| 3065 | + sqrshrn v20.8b, v24.8h, #2 |
| 3066 | + /* Transpose the final 8-bit samples and do signed->unsigned conversion */ |
| 3067 | + /* trn1 v16.8h, v16.8h, v18.8h */ |
| 3068 | + transpose v16, v18, v3, .16b, .8h |
| 3069 | + ins v26.2d[1], v27.2d[0] |
| 3070 | + ins v28.2d[1], v29.2d[0] |
| 3071 | + ins v30.2d[1], v31.2d[0] |
| 3072 | + sqrshrn2 v20.16b, v26.8h, #2 |
| 3073 | + sqrshrn v22.8b, v28.8h, #2 |
| 3074 | + movi v0.16b, #(CENTERJSAMPLE) |
| 3075 | + sqrshrn2 v22.16b, v30.8h, #2 |
| 3076 | + transpose_single v16, v17, v3, .2d, .8b |
| 3077 | + transpose_single v18, v19, v3, .2d, .8b |
| 3078 | + add v16.8b, v16.8b, v0.8b |
| 3079 | + add v17.8b, v17.8b, v0.8b |
| 3080 | + add v18.8b, v18.8b, v0.8b |
| 3081 | + add v19.8b, v19.8b, v0.8b |
| 3082 | + transpose v20, v22, v3, .16b, .8h |
| 3083 | + /* Store results to the output buffer */ |
| 3084 | + ldp TMP1, TMP2, [OUTPUT_BUF], 16 |
| 3085 | + add TMP1, TMP1, OUTPUT_COL |
| 3086 | + add TMP2, TMP2, OUTPUT_COL |
| 3087 | + st1 {v16.8b}, [TMP1] |
| 3088 | + transpose_single v20, v21, v3, .2d, .8b |
| 3089 | + st1 {v17.8b}, [TMP2] |
| 3090 | + ldp TMP1, TMP2, [OUTPUT_BUF], 16 |
| 3091 | + add TMP1, TMP1, OUTPUT_COL |
| 3092 | + add TMP2, TMP2, OUTPUT_COL |
| 3093 | + st1 {v18.8b}, [TMP1] |
| 3094 | + add v20.8b, v20.8b, v0.8b |
| 3095 | + add v21.8b, v21.8b, v0.8b |
| 3096 | + st1 {v19.8b}, [TMP2] |
| 3097 | + ldp TMP1, TMP2, [OUTPUT_BUF], 16 |
| 3098 | + ldp TMP3, TMP4, [OUTPUT_BUF] |
| 3099 | + add TMP1, TMP1, OUTPUT_COL |
| 3100 | + add TMP2, TMP2, OUTPUT_COL |
| 3101 | + add TMP3, TMP3, OUTPUT_COL |
| 3102 | + add TMP4, TMP4, OUTPUT_COL |
| 3103 | + transpose_single v22, v23, v3, .2d, .8b |
| 3104 | + st1 {v20.8b}, [TMP1] |
| 3105 | + add v22.8b, v22.8b, v0.8b |
| 3106 | + add v23.8b, v23.8b, v0.8b |
| 3107 | + st1 {v21.8b}, [TMP2] |
| 3108 | + st1 {v22.8b}, [TMP3] |
| 3109 | + st1 {v23.8b}, [TMP4] |
| 3110 | + ldr x15, [sp], 16 |
| 3111 | + ld1 {v0.8b - v3.8b}, [sp], 32 |
| 3112 | + ld1 {v4.8b - v7.8b}, [sp], 32 |
| 3113 | + ld1 {v8.8b - v11.8b}, [sp], 32 |
| 3114 | + ld1 {v12.8b - v15.8b}, [sp], 32 |
| 3115 | + ld1 {v16.8b - v19.8b}, [sp], 32 |
| 3116 | + ld1 {v20.8b - v23.8b}, [sp], 32 |
| 3117 | + ld1 {v24.8b - v27.8b}, [sp], 32 |
| 3118 | + ld1 {v28.8b - v31.8b}, [sp], 32 |
| 3119 | + blr x30 |
| 3120 | + |
| 3121 | +3: /* Left 4x8 half is done, right 4x8 half contains mostly zeros */ |
| 3122 | + |
| 3123 | + /* Transpose left 4x8 half */ |
| 3124 | + transpose ROW6L, ROW7L, v3, .16b, .4h |
| 3125 | + transpose ROW2L, ROW3L, v3, .16b, .4h |
| 3126 | + transpose ROW0L, ROW1L, v3, .16b, .4h |
| 3127 | + transpose ROW4L, ROW5L, v3, .16b, .4h |
| 3128 | + shl ROW0R.4h, ROW0R.4h, #2 /* PASS1_BITS */ |
| 3129 | + transpose ROW1L, ROW3L, v3, .16b, .2s |
| 3130 | + transpose ROW4L, ROW6L, v3, .16b, .2s |
| 3131 | + transpose ROW0L, ROW2L, v3, .16b, .2s |
| 3132 | + transpose ROW5L, ROW7L, v3, .16b, .2s |
| 3133 | + cmp x0, #0 |
| 3134 | + beq 4f /* Right 4x8 half has all zeros, go to 'sparse' second pass */ |
| 3135 | + |
| 3136 | + /* Only row 0 is non-zero for the right 4x8 half */ |
| 3137 | + dup ROW1R.4h, ROW0R.4h[1] |
| 3138 | + dup ROW2R.4h, ROW0R.4h[2] |
| 3139 | + dup ROW3R.4h, ROW0R.4h[3] |
| 3140 | + dup ROW4R.4h, ROW0R.4h[0] |
| 3141 | + dup ROW5R.4h, ROW0R.4h[1] |
| 3142 | + dup ROW6R.4h, ROW0R.4h[2] |
| 3143 | + dup ROW7R.4h, ROW0R.4h[3] |
| 3144 | + dup ROW0R.4h, ROW0R.4h[0] |
| 3145 | + b 1b /* Go to 'normal' second pass */ |
| 3146 | + |
| 3147 | +4: /* 1-D IDCT, pass 2 (sparse variant with zero rows 4-7), left 4x8 half */ |
| 3148 | + ld1 {v2.4h}, [x15] /* reload constants */ |
| 3149 | + smull v12.4s, ROW1L.4h, XFIX_1_175875602 |
| 3150 | + smlal v12.4s, ROW3L.4h, XFIX_1_175875602_MINUS_1_961570560 |
| 3151 | + smull v14.4s, ROW3L.4h, XFIX_1_175875602 |
| 3152 | + smlal v14.4s, ROW1L.4h, XFIX_1_175875602_MINUS_0_390180644 |
| 3153 | + smull v4.4s, ROW2L.4h, XFIX_0_541196100 |
| 3154 | + sshll v6.4s, ROW0L.4h, #13 |
| 3155 | + mov v8.16b, v12.16b |
| 3156 | + smlal v12.4s, ROW3L.4h, XFIX_3_072711026_MINUS_2_562915447 |
| 3157 | + smlsl v8.4s, ROW1L.4h, XFIX_0_899976223 |
| 3158 | + add v2.4s, v6.4s, v4.4s |
| 3159 | + mov v10.16b, v14.16b |
| 3160 | + smlal v14.4s, ROW1L.4h, XFIX_1_501321110_MINUS_0_899976223 |
| 3161 | + add v2.4s, v2.4s, v12.4s |
| 3162 | + add v12.4s, v12.4s, v12.4s |
| 3163 | + smlsl v10.4s, ROW3L.4h, XFIX_2_562915447 |
| 3164 | + shrn ROW1L.4h, v2.4s, #16 |
| 3165 | + sub v2.4s, v2.4s, v12.4s |
| 3166 | + smull v12.4s, ROW2L.4h, XFIX_0_541196100_PLUS_0_765366865 |
| 3167 | + sub v6.4s, v6.4s, v4.4s |
| 3168 | + shrn ROW2R.4h, v2.4s, #16 /* ROW6L.4h <-> ROW2R.4h */ |
| 3169 | + add v2.4s, v6.4s, v10.4s |
| 3170 | + sub v6.4s, v6.4s, v10.4s |
| 3171 | + sshll v10.4s, ROW0L.4h, #13 |
| 3172 | + shrn ROW2L.4h, v2.4s, #16 |
| 3173 | + shrn ROW1R.4h, v6.4s, #16 /* ROW5L.4h <-> ROW1R.4h */ |
| 3174 | + add v4.4s, v10.4s, v12.4s |
| 3175 | + sub v2.4s, v10.4s, v12.4s |
| 3176 | + add v12.4s, v4.4s, v14.4s |
| 3177 | + sub v4.4s, v4.4s, v14.4s |
| 3178 | + add v10.4s, v2.4s, v8.4s |
| 3179 | + sub v6.4s, v2.4s, v8.4s |
| 3180 | + shrn ROW3R.4h, v4.4s, #16 /* ROW7L.4h <-> ROW3R.4h */ |
| 3181 | + shrn ROW3L.4h, v10.4s, #16 |
| 3182 | + shrn ROW0L.4h, v12.4s, #16 |
| 3183 | + shrn ROW0R.4h, v6.4s, #16 /* ROW4L.4h <-> ROW0R.4h */ |
| 3184 | + /* 1-D IDCT, pass 2 (sparse variant with zero rows 4-7), right 4x8 half */ |
| 3185 | + ld1 {v2.4h}, [x15] /* reload constants */ |
| 3186 | + smull v12.4s, ROW5L.4h, XFIX_1_175875602 |
| 3187 | + smlal v12.4s, ROW7L.4h, XFIX_1_175875602_MINUS_1_961570560 |
| 3188 | + smull v14.4s, ROW7L.4h, XFIX_1_175875602 |
| 3189 | + smlal v14.4s, ROW5L.4h, XFIX_1_175875602_MINUS_0_390180644 |
| 3190 | + smull v4.4s, ROW6L.4h, XFIX_0_541196100 |
| 3191 | + sshll v6.4s, ROW4L.4h, #13 |
| 3192 | + mov v8.16b, v12.16b |
| 3193 | + smlal v12.4s, ROW7L.4h, XFIX_3_072711026_MINUS_2_562915447 |
| 3194 | + smlsl v8.4s, ROW5L.4h, XFIX_0_899976223 |
| 3195 | + add v2.4s, v6.4s, v4.4s |
| 3196 | + mov v10.16b, v14.16b |
| 3197 | + smlal v14.4s, ROW5L.4h, XFIX_1_501321110_MINUS_0_899976223 |
| 3198 | + add v2.4s, v2.4s, v12.4s |
| 3199 | + add v12.4s, v12.4s, v12.4s |
| 3200 | + smlsl v10.4s, ROW7L.4h, XFIX_2_562915447 |
| 3201 | + shrn ROW5L.4h, v2.4s, #16 /* ROW5L.4h <-> ROW1R.4h */ |
| 3202 | + sub v2.4s, v2.4s, v12.4s |
| 3203 | + smull v12.4s, ROW6L.4h, XFIX_0_541196100_PLUS_0_765366865 |
| 3204 | + sub v6.4s, v6.4s, v4.4s |
| 3205 | + shrn ROW6R.4h, v2.4s, #16 |
| 3206 | + add v2.4s, v6.4s, v10.4s |
| 3207 | + sub v6.4s, v6.4s, v10.4s |
| 3208 | + sshll v10.4s, ROW4L.4h, #13 |
| 3209 | + shrn ROW6L.4h, v2.4s, #16 /* ROW6L.4h <-> ROW2R.4h */ |
| 3210 | + shrn ROW5R.4h, v6.4s, #16 |
| 3211 | + add v4.4s, v10.4s, v12.4s |
| 3212 | + sub v2.4s, v10.4s, v12.4s |
| 3213 | + add v12.4s, v4.4s, v14.4s |
| 3214 | + sub v4.4s, v4.4s, v14.4s |
| 3215 | + add v10.4s, v2.4s, v8.4s |
| 3216 | + sub v6.4s, v2.4s, v8.4s |
| 3217 | + shrn ROW7R.4h, v4.4s, #16 |
| 3218 | + shrn ROW7L.4h, v10.4s, #16 /* ROW7L.4h <-> ROW3R.4h */ |
| 3219 | + shrn ROW4L.4h, v12.4s, #16 /* ROW4L.4h <-> ROW0R.4h */ |
| 3220 | + shrn ROW4R.4h, v6.4s, #16 |
| 3221 | + b 2b /* Go to epilogue */ |
| 3222 | + |
| 3223 | + .unreq DCT_TABLE |
| 3224 | + .unreq COEF_BLOCK |
| 3225 | + .unreq OUTPUT_BUF |
| 3226 | + .unreq OUTPUT_COL |
| 3227 | + .unreq TMP1 |
| 3228 | + .unreq TMP2 |
| 3229 | + .unreq TMP3 |
| 3230 | + .unreq TMP4 |
| 3231 | + |
| 3232 | + .unreq ROW0L |
| 3233 | + .unreq ROW0R |
| 3234 | + .unreq ROW1L |
| 3235 | + .unreq ROW1R |
| 3236 | + .unreq ROW2L |
| 3237 | + .unreq ROW2R |
| 3238 | + .unreq ROW3L |
| 3239 | + .unreq ROW3R |
| 3240 | + .unreq ROW4L |
| 3241 | + .unreq ROW4R |
| 3242 | + .unreq ROW5L |
| 3243 | + .unreq ROW5R |
| 3244 | + .unreq ROW6L |
| 3245 | + .unreq ROW6R |
| 3246 | + .unreq ROW7L |
| 3247 | + .unreq ROW7R |
| 3248 | + |
| 3249 | + |
| 3250 | +/*****************************************************************************/ |
| 3251 | + |
| 3252 | +/* |
| 3253 | + * jsimd_idct_ifast_neon |
| 3254 | + * |
| 3255 | + * This function contains a fast, not so accurate integer implementation of |
| 3256 | + * the inverse DCT (Discrete Cosine Transform). It uses the same calculations |
| 3257 | + * and produces exactly the same output as IJG's original 'jpeg_idct_ifast' |
| 3258 | + * function from jidctfst.c |
| 3259 | + * |
| 3260 | + * Normally 1-D AAN DCT needs 5 multiplications and 29 additions. |
| 3261 | + * But in ARM NEON case some extra additions are required because VQDMULH |
| 3262 | + * instruction can't handle the constants larger than 1. So the expressions |
| 3263 | + * like "x * 1.082392200" have to be converted to "x * 0.082392200 + x", |
| 3264 | + * which introduces an extra addition. Overall, there are 6 extra additions |
| 3265 | + * per 1-D IDCT pass, totalling to 5 VQDMULH and 35 VADD/VSUB instructions. |
| 3266 | + */ |
| 3267 | + |
| 3268 | +#define XFIX_1_082392200 v0.4h[0] |
| 3269 | +#define XFIX_1_414213562 v0.4h[1] |
| 3270 | +#define XFIX_1_847759065 v0.4h[2] |
| 3271 | +#define XFIX_2_613125930 v0.4h[3] |
| 3272 | + |
| 3273 | +.balign 16 |
| 3274 | +jsimd_idct_ifast_neon_consts: |
| 3275 | + .short (277 * 128 - 256 * 128) /* XFIX_1_082392200 */ |
| 3276 | + .short (362 * 128 - 256 * 128) /* XFIX_1_414213562 */ |
| 3277 | + .short (473 * 128 - 256 * 128) /* XFIX_1_847759065 */ |
| 3278 | + .short (669 * 128 - 512 * 128) /* XFIX_2_613125930 */ |
| 3279 | + |
| 3280 | +asm_function jsimd_idct_ifast_neon |
| 3281 | + |
| 3282 | + DCT_TABLE .req x0 |
| 3283 | + COEF_BLOCK .req x1 |
| 3284 | + OUTPUT_BUF .req x2 |
| 3285 | + OUTPUT_COL .req x3 |
| 3286 | + TMP1 .req x0 |
| 3287 | + TMP2 .req x1 |
| 3288 | + TMP3 .req x2 |
| 3289 | + TMP4 .req x22 |
| 3290 | + TMP5 .req x23 |
| 3291 | + |
| 3292 | + /* Load and dequantize coefficients into NEON registers |
| 3293 | + * with the following allocation: |
| 3294 | + * 0 1 2 3 | 4 5 6 7 |
| 3295 | + * ---------+-------- |
| 3296 | + * 0 | d16 | d17 ( v8.8h ) |
| 3297 | + * 1 | d18 | d19 ( v9.8h ) |
| 3298 | + * 2 | d20 | d21 ( v10.8h ) |
| 3299 | + * 3 | d22 | d23 ( v11.8h ) |
| 3300 | + * 4 | d24 | d25 ( v12.8h ) |
| 3301 | + * 5 | d26 | d27 ( v13.8h ) |
| 3302 | + * 6 | d28 | d29 ( v14.8h ) |
| 3303 | + * 7 | d30 | d31 ( v15.8h ) |
| 3304 | + */ |
| 3305 | + /* Save NEON registers used in fast IDCT */ |
| 3306 | + sub sp, sp, #176 |
| 3307 | + stp x22, x23, [sp], 16 |
| 3308 | + adr x23, jsimd_idct_ifast_neon_consts |
| 3309 | + st1 {v0.8b - v3.8b}, [sp], 32 |
| 3310 | + st1 {v4.8b - v7.8b}, [sp], 32 |
| 3311 | + st1 {v8.8b - v11.8b}, [sp], 32 |
| 3312 | + st1 {v12.8b - v15.8b}, [sp], 32 |
| 3313 | + st1 {v16.8b - v19.8b}, [sp], 32 |
| 3314 | + ld1 {v8.8h, v9.8h}, [COEF_BLOCK], 32 |
| 3315 | + ld1 {v0.8h, v1.8h}, [DCT_TABLE], 32 |
| 3316 | + ld1 {v10.8h, v11.8h}, [COEF_BLOCK], 32 |
| 3317 | + mul v8.8h, v8.8h, v0.8h |
| 3318 | + ld1 {v2.8h, v3.8h}, [DCT_TABLE], 32 |
| 3319 | + mul v9.8h, v9.8h, v1.8h |
| 3320 | + ld1 {v12.8h, v13.8h}, [COEF_BLOCK], 32 |
| 3321 | + mul v10.8h, v10.8h, v2.8h |
| 3322 | + ld1 {v0.8h, v1.8h}, [DCT_TABLE], 32 |
| 3323 | + mul v11.8h, v11.8h, v3.8h |
| 3324 | + ld1 {v14.8h, v15.8h}, [COEF_BLOCK], 32 |
| 3325 | + mul v12.8h, v12.8h, v0.8h |
| 3326 | + ld1 {v2.8h, v3.8h}, [DCT_TABLE], 32 |
| 3327 | + mul v14.8h, v14.8h, v2.8h |
| 3328 | + mul v13.8h, v13.8h, v1.8h |
| 3329 | + ld1 {v0.4h}, [x23] /* load constants */ |
| 3330 | + mul v15.8h, v15.8h, v3.8h |
| 3331 | + |
| 3332 | + /* 1-D IDCT, pass 1 */ |
| 3333 | + sub v2.8h, v10.8h, v14.8h |
| 3334 | + add v14.8h, v10.8h, v14.8h |
| 3335 | + sub v1.8h, v11.8h, v13.8h |
| 3336 | + add v13.8h, v11.8h, v13.8h |
| 3337 | + sub v5.8h, v9.8h, v15.8h |
| 3338 | + add v15.8h, v9.8h, v15.8h |
| 3339 | + sqdmulh v4.8h, v2.8h, XFIX_1_414213562 |
| 3340 | + sqdmulh v6.8h, v1.8h, XFIX_2_613125930 |
| 3341 | + add v3.8h, v1.8h, v1.8h |
| 3342 | + sub v1.8h, v5.8h, v1.8h |
| 3343 | + add v10.8h, v2.8h, v4.8h |
| 3344 | + sqdmulh v4.8h, v1.8h, XFIX_1_847759065 |
| 3345 | + sub v2.8h, v15.8h, v13.8h |
| 3346 | + add v3.8h, v3.8h, v6.8h |
| 3347 | + sqdmulh v6.8h, v2.8h, XFIX_1_414213562 |
| 3348 | + add v1.8h, v1.8h, v4.8h |
| 3349 | + sqdmulh v4.8h, v5.8h, XFIX_1_082392200 |
| 3350 | + sub v10.8h, v10.8h, v14.8h |
| 3351 | + add v2.8h, v2.8h, v6.8h |
| 3352 | + sub v6.8h, v8.8h, v12.8h |
| 3353 | + add v12.8h, v8.8h, v12.8h |
| 3354 | + add v9.8h, v5.8h, v4.8h |
| 3355 | + add v5.8h, v6.8h, v10.8h |
| 3356 | + sub v10.8h, v6.8h, v10.8h |
| 3357 | + add v6.8h, v15.8h, v13.8h |
| 3358 | + add v8.8h, v12.8h, v14.8h |
| 3359 | + sub v3.8h, v6.8h, v3.8h |
| 3360 | + sub v12.8h, v12.8h, v14.8h |
| 3361 | + sub v3.8h, v3.8h, v1.8h |
| 3362 | + sub v1.8h, v9.8h, v1.8h |
| 3363 | + add v2.8h, v3.8h, v2.8h |
| 3364 | + sub v15.8h, v8.8h, v6.8h |
| 3365 | + add v1.8h, v1.8h, v2.8h |
| 3366 | + add v8.8h, v8.8h, v6.8h |
| 3367 | + add v14.8h, v5.8h, v3.8h |
| 3368 | + sub v9.8h, v5.8h, v3.8h |
| 3369 | + sub v13.8h, v10.8h, v2.8h |
| 3370 | + add v10.8h, v10.8h, v2.8h |
| 3371 | + /* Transpose q8-q9 */ |
| 3372 | + mov v18.16b, v8.16b |
| 3373 | + trn1 v8.8h, v8.8h, v9.8h |
| 3374 | + trn2 v9.8h, v18.8h, v9.8h |
| 3375 | + sub v11.8h, v12.8h, v1.8h |
| 3376 | + /* Transpose q14-q15 */ |
| 3377 | + mov v18.16b, v14.16b |
| 3378 | + trn1 v14.8h, v14.8h, v15.8h |
| 3379 | + trn2 v15.8h, v18.8h, v15.8h |
| 3380 | + add v12.8h, v12.8h, v1.8h |
| 3381 | + /* Transpose q10-q11 */ |
| 3382 | + mov v18.16b, v10.16b |
| 3383 | + trn1 v10.8h, v10.8h, v11.8h |
| 3384 | + trn2 v11.8h, v18.8h, v11.8h |
| 3385 | + /* Transpose q12-q13 */ |
| 3386 | + mov v18.16b, v12.16b |
| 3387 | + trn1 v12.8h, v12.8h, v13.8h |
| 3388 | + trn2 v13.8h, v18.8h, v13.8h |
| 3389 | + /* Transpose q9-q11 */ |
| 3390 | + mov v18.16b, v9.16b |
| 3391 | + trn1 v9.4s, v9.4s, v11.4s |
| 3392 | + trn2 v11.4s, v18.4s, v11.4s |
| 3393 | + /* Transpose q12-q14 */ |
| 3394 | + mov v18.16b, v12.16b |
| 3395 | + trn1 v12.4s, v12.4s, v14.4s |
| 3396 | + trn2 v14.4s, v18.4s, v14.4s |
| 3397 | + /* Transpose q8-q10 */ |
| 3398 | + mov v18.16b, v8.16b |
| 3399 | + trn1 v8.4s, v8.4s, v10.4s |
| 3400 | + trn2 v10.4s, v18.4s, v10.4s |
| 3401 | + /* Transpose q13-q15 */ |
| 3402 | + mov v18.16b, v13.16b |
| 3403 | + trn1 v13.4s, v13.4s, v15.4s |
| 3404 | + trn2 v15.4s, v18.4s, v15.4s |
| 3405 | + /* vswp v14.4h, v10-MSB.4h */ |
| 3406 | + umov x22, v14.d[0] |
| 3407 | + ins v14.2d[0], v10.2d[1] |
| 3408 | + ins v10.2d[1], x22 |
| 3409 | + /* vswp v13.4h, v9MSB.4h */ |
| 3410 | + |
| 3411 | + umov x22, v13.d[0] |
| 3412 | + ins v13.2d[0], v9.2d[1] |
| 3413 | + ins v9.2d[1], x22 |
| 3414 | + /* 1-D IDCT, pass 2 */ |
| 3415 | + sub v2.8h, v10.8h, v14.8h |
| 3416 | + /* vswp v15.4h, v11MSB.4h */ |
| 3417 | + umov x22, v15.d[0] |
| 3418 | + ins v15.2d[0], v11.2d[1] |
| 3419 | + ins v11.2d[1], x22 |
| 3420 | + add v14.8h, v10.8h, v14.8h |
| 3421 | + /* vswp v12.4h, v8-MSB.4h */ |
| 3422 | + umov x22, v12.d[0] |
| 3423 | + ins v12.2d[0], v8.2d[1] |
| 3424 | + ins v8.2d[1], x22 |
| 3425 | + sub v1.8h, v11.8h, v13.8h |
| 3426 | + add v13.8h, v11.8h, v13.8h |
| 3427 | + sub v5.8h, v9.8h, v15.8h |
| 3428 | + add v15.8h, v9.8h, v15.8h |
| 3429 | + sqdmulh v4.8h, v2.8h, XFIX_1_414213562 |
| 3430 | + sqdmulh v6.8h, v1.8h, XFIX_2_613125930 |
| 3431 | + add v3.8h, v1.8h, v1.8h |
| 3432 | + sub v1.8h, v5.8h, v1.8h |
| 3433 | + add v10.8h, v2.8h, v4.8h |
| 3434 | + sqdmulh v4.8h, v1.8h, XFIX_1_847759065 |
| 3435 | + sub v2.8h, v15.8h, v13.8h |
| 3436 | + add v3.8h, v3.8h, v6.8h |
| 3437 | + sqdmulh v6.8h, v2.8h, XFIX_1_414213562 |
| 3438 | + add v1.8h, v1.8h, v4.8h |
| 3439 | + sqdmulh v4.8h, v5.8h, XFIX_1_082392200 |
| 3440 | + sub v10.8h, v10.8h, v14.8h |
| 3441 | + add v2.8h, v2.8h, v6.8h |
| 3442 | + sub v6.8h, v8.8h, v12.8h |
| 3443 | + add v12.8h, v8.8h, v12.8h |
| 3444 | + add v9.8h, v5.8h, v4.8h |
| 3445 | + add v5.8h, v6.8h, v10.8h |
| 3446 | + sub v10.8h, v6.8h, v10.8h |
| 3447 | + add v6.8h, v15.8h, v13.8h |
| 3448 | + add v8.8h, v12.8h, v14.8h |
| 3449 | + sub v3.8h, v6.8h, v3.8h |
| 3450 | + sub v12.8h, v12.8h, v14.8h |
| 3451 | + sub v3.8h, v3.8h, v1.8h |
| 3452 | + sub v1.8h, v9.8h, v1.8h |
| 3453 | + add v2.8h, v3.8h, v2.8h |
| 3454 | + sub v15.8h, v8.8h, v6.8h |
| 3455 | + add v1.8h, v1.8h, v2.8h |
| 3456 | + add v8.8h, v8.8h, v6.8h |
| 3457 | + add v14.8h, v5.8h, v3.8h |
| 3458 | + sub v9.8h, v5.8h, v3.8h |
| 3459 | + sub v13.8h, v10.8h, v2.8h |
| 3460 | + add v10.8h, v10.8h, v2.8h |
| 3461 | + sub v11.8h, v12.8h, v1.8h |
| 3462 | + add v12.8h, v12.8h, v1.8h |
| 3463 | + /* Descale to 8-bit and range limit */ |
| 3464 | + movi v0.16b, #0x80 |
| 3465 | + sqshrn v8.8b, v8.8h, #5 |
| 3466 | + sqshrn2 v8.16b, v9.8h, #5 |
| 3467 | + sqshrn v9.8b, v10.8h, #5 |
| 3468 | + sqshrn2 v9.16b, v11.8h, #5 |
| 3469 | + sqshrn v10.8b, v12.8h, #5 |
| 3470 | + sqshrn2 v10.16b, v13.8h, #5 |
| 3471 | + sqshrn v11.8b, v14.8h, #5 |
| 3472 | + sqshrn2 v11.16b, v15.8h, #5 |
| 3473 | + add v8.16b, v8.16b, v0.16b |
| 3474 | + add v9.16b, v9.16b, v0.16b |
| 3475 | + add v10.16b, v10.16b, v0.16b |
| 3476 | + add v11.16b, v11.16b, v0.16b |
| 3477 | + /* Transpose the final 8-bit samples */ |
| 3478 | + /* Transpose q8-q9 */ |
| 3479 | + mov v18.16b, v8.16b |
| 3480 | + trn1 v8.8h, v8.8h, v9.8h |
| 3481 | + trn2 v9.8h, v18.8h, v9.8h |
| 3482 | + /* Transpose q10-q11 */ |
| 3483 | + mov v18.16b, v10.16b |
| 3484 | + trn1 v10.8h, v10.8h, v11.8h |
| 3485 | + trn2 v11.8h, v18.8h, v11.8h |
| 3486 | + /* Transpose q8-q10 */ |
| 3487 | + mov v18.16b, v8.16b |
| 3488 | + trn1 v8.4s, v8.4s, v10.4s |
| 3489 | + trn2 v10.4s, v18.4s, v10.4s |
| 3490 | + /* Transpose q9-q11 */ |
| 3491 | + mov v18.16b, v9.16b |
| 3492 | + trn1 v9.4s, v9.4s, v11.4s |
| 3493 | + trn2 v11.4s, v18.4s, v11.4s |
| 3494 | + /* make copy */ |
| 3495 | + ins v17.2d[0], v8.2d[1] |
| 3496 | + /* Transpose d16-d17-msb */ |
| 3497 | + mov v18.16b, v8.16b |
| 3498 | + trn1 v8.8b, v8.8b, v17.8b |
| 3499 | + trn2 v17.8b, v18.8b, v17.8b |
| 3500 | + /* make copy */ |
| 3501 | + ins v19.2d[0], v9.2d[1] |
| 3502 | + mov v18.16b, v9.16b |
| 3503 | + trn1 v9.8b, v9.8b, v19.8b |
| 3504 | + trn2 v19.8b, v18.8b, v19.8b |
| 3505 | + /* Store results to the output buffer */ |
| 3506 | + ldp TMP1, TMP2, [OUTPUT_BUF], 16 |
| 3507 | + add TMP1, TMP1, OUTPUT_COL |
| 3508 | + add TMP2, TMP2, OUTPUT_COL |
| 3509 | + st1 {v8.8b}, [TMP1] |
| 3510 | + st1 {v17.8b}, [TMP2] |
| 3511 | + ldp TMP1, TMP2, [OUTPUT_BUF], 16 |
| 3512 | + add TMP1, TMP1, OUTPUT_COL |
| 3513 | + add TMP2, TMP2, OUTPUT_COL |
| 3514 | + st1 {v9.8b}, [TMP1] |
| 3515 | + /* make copy */ |
| 3516 | + ins v7.2d[0], v10.2d[1] |
| 3517 | + mov v18.16b, v10.16b |
| 3518 | + trn1 v10.8b, v10.8b, v7.8b |
| 3519 | + trn2 v7.8b, v18.8b, v7.8b |
| 3520 | + st1 {v19.8b}, [TMP2] |
| 3521 | + ldp TMP1, TMP2, [OUTPUT_BUF], 16 |
| 3522 | + ldp TMP4, TMP5, [OUTPUT_BUF], 16 |
| 3523 | + add TMP1, TMP1, OUTPUT_COL |
| 3524 | + add TMP2, TMP2, OUTPUT_COL |
| 3525 | + add TMP4, TMP4, OUTPUT_COL |
| 3526 | + add TMP5, TMP5, OUTPUT_COL |
| 3527 | + st1 {v10.8b}, [TMP1] |
| 3528 | + /* make copy */ |
| 3529 | + ins v16.2d[0], v11.2d[1] |
| 3530 | + mov v18.16b, v11.16b |
| 3531 | + trn1 v11.8b, v11.8b, v16.8b |
| 3532 | + trn2 v16.8b, v18.8b, v16.8b |
| 3533 | + st1 {v7.8b}, [TMP2] |
| 3534 | + st1 {v11.8b}, [TMP4] |
| 3535 | + st1 {v16.8b}, [TMP5] |
| 3536 | + sub sp, sp, #176 |
| 3537 | + ldp x22, x23, [sp], 16 |
| 3538 | + ld1 {v0.8b - v3.8b}, [sp], 32 |
| 3539 | + ld1 {v4.8b - v7.8b}, [sp], 32 |
| 3540 | + ld1 {v8.8b - v11.8b}, [sp], 32 |
| 3541 | + ld1 {v12.8b - v15.8b}, [sp], 32 |
| 3542 | + ld1 {v16.8b - v19.8b}, [sp], 32 |
| 3543 | + blr x30 |
| 3544 | + |
| 3545 | + .unreq DCT_TABLE |
| 3546 | + .unreq COEF_BLOCK |
| 3547 | + .unreq OUTPUT_BUF |
| 3548 | + .unreq OUTPUT_COL |
| 3549 | + .unreq TMP1 |
| 3550 | + .unreq TMP2 |
| 3551 | + .unreq TMP3 |
| 3552 | + .unreq TMP4 |
| 3553 | + |
| 3554 | + |
| 3555 | +/*****************************************************************************/ |
| 3556 | + |
| 3557 | +/* |
| 3558 | + * jsimd_idct_4x4_neon |
| 3559 | + * |
| 3560 | + * This function contains inverse-DCT code for getting reduced-size |
| 3561 | + * 4x4 pixels output from an 8x8 DCT block. It uses the same calculations |
| 3562 | + * and produces exactly the same output as IJG's original 'jpeg_idct_4x4' |
| 3563 | + * function from jpeg-6b (jidctred.c). |
| 3564 | + * |
| 3565 | + * NOTE: jpeg-8 has an improved implementation of 4x4 inverse-DCT, which |
| 3566 | + * requires much less arithmetic operations and hence should be faster. |
| 3567 | + * The primary purpose of this particular NEON optimized function is |
| 3568 | + * bit exact compatibility with jpeg-6b. |
| 3569 | + * |
| 3570 | + * TODO: a bit better instructions scheduling can be achieved by expanding |
| 3571 | + * idct_helper/transpose_4x4 macros and reordering instructions, |
| 3572 | + * but readability will suffer somewhat. |
| 3573 | + */ |
| 3574 | + |
| 3575 | +#define CONST_BITS 13 |
| 3576 | + |
| 3577 | +#define FIX_0_211164243 (1730) /* FIX(0.211164243) */ |
| 3578 | +#define FIX_0_509795579 (4176) /* FIX(0.509795579) */ |
| 3579 | +#define FIX_0_601344887 (4926) /* FIX(0.601344887) */ |
| 3580 | +#define FIX_0_720959822 (5906) /* FIX(0.720959822) */ |
| 3581 | +#define FIX_0_765366865 (6270) /* FIX(0.765366865) */ |
| 3582 | +#define FIX_0_850430095 (6967) /* FIX(0.850430095) */ |
| 3583 | +#define FIX_0_899976223 (7373) /* FIX(0.899976223) */ |
| 3584 | +#define FIX_1_061594337 (8697) /* FIX(1.061594337) */ |
| 3585 | +#define FIX_1_272758580 (10426) /* FIX(1.272758580) */ |
| 3586 | +#define FIX_1_451774981 (11893) /* FIX(1.451774981) */ |
| 3587 | +#define FIX_1_847759065 (15137) /* FIX(1.847759065) */ |
| 3588 | +#define FIX_2_172734803 (17799) /* FIX(2.172734803) */ |
| 3589 | +#define FIX_2_562915447 (20995) /* FIX(2.562915447) */ |
| 3590 | +#define FIX_3_624509785 (29692) /* FIX(3.624509785) */ |
| 3591 | + |
| 3592 | +.balign 16 |
| 3593 | +jsimd_idct_4x4_neon_consts: |
| 3594 | + .short FIX_1_847759065 /* v0.4h[0] */ |
| 3595 | + .short -FIX_0_765366865 /* v0.4h[1] */ |
| 3596 | + .short -FIX_0_211164243 /* v0.4h[2] */ |
| 3597 | + .short FIX_1_451774981 /* v0.4h[3] */ |
| 3598 | + .short -FIX_2_172734803 /* d1[0] */ |
| 3599 | + .short FIX_1_061594337 /* d1[1] */ |
| 3600 | + .short -FIX_0_509795579 /* d1[2] */ |
| 3601 | + .short -FIX_0_601344887 /* d1[3] */ |
| 3602 | + .short FIX_0_899976223 /* v2.4h[0] */ |
| 3603 | + .short FIX_2_562915447 /* v2.4h[1] */ |
| 3604 | + .short 1 << (CONST_BITS+1) /* v2.4h[2] */ |
| 3605 | + .short 0 /* v2.4h[3] */ |
| 3606 | + |
| 3607 | +.macro idct_helper x4, x6, x8, x10, x12, x14, x16, shift, y26, y27, y28, y29 |
| 3608 | + smull v28.4s, \x4, v2.4h[2] |
| 3609 | + smlal v28.4s, \x8, v0.4h[0] |
| 3610 | + smlal v28.4s, \x14, v0.4h[1] |
| 3611 | + |
| 3612 | + smull v26.4s, \x16, v1.4h[2] |
| 3613 | + smlal v26.4s, \x12, v1.4h[3] |
| 3614 | + smlal v26.4s, \x10, v2.4h[0] |
| 3615 | + smlal v26.4s, \x6, v2.4h[1] |
| 3616 | + |
| 3617 | + smull v30.4s, \x4, v2.4h[2] |
| 3618 | + smlsl v30.4s, \x8, v0.4h[0] |
| 3619 | + smlsl v30.4s, \x14, v0.4h[1] |
| 3620 | + |
| 3621 | + smull v24.4s, \x16, v0.4h[2] |
| 3622 | + smlal v24.4s, \x12, v0.4h[3] |
| 3623 | + smlal v24.4s, \x10, v1.4h[0] |
| 3624 | + smlal v24.4s, \x6, v1.4h[1] |
| 3625 | + |
| 3626 | + add v20.4s, v28.4s, v26.4s |
| 3627 | + sub v28.4s, v28.4s, v26.4s |
| 3628 | + |
| 3629 | +.if \shift > 16 |
| 3630 | + srshr v20.4s, v20.4s, #\shift |
| 3631 | + srshr v28.4s, v28.4s, #\shift |
| 3632 | + xtn \y26, v20.4s |
| 3633 | + xtn \y29, v28.4s |
| 3634 | +.else |
| 3635 | + rshrn \y26, v20.4s, #\shift |
| 3636 | + rshrn \y29, v28.4s, #\shift |
| 3637 | +.endif |
| 3638 | + |
| 3639 | + add v20.4s, v30.4s, v24.4s |
| 3640 | + sub v30.4s, v30.4s, v24.4s |
| 3641 | + |
| 3642 | +.if \shift > 16 |
| 3643 | + srshr v20.4s, v20.4s, #\shift |
| 3644 | + srshr v30.4s, v30.4s, #\shift |
| 3645 | + xtn \y27, v20.4s |
| 3646 | + xtn \y28, v30.4s |
| 3647 | +.else |
| 3648 | + rshrn \y27, v20.4s, #\shift |
| 3649 | + rshrn \y28, v30.4s, #\shift |
| 3650 | +.endif |
| 3651 | + |
| 3652 | +.endm |
| 3653 | + |
| 3654 | +asm_function jsimd_idct_4x4_neon |
| 3655 | + |
| 3656 | + DCT_TABLE .req x0 |
| 3657 | + COEF_BLOCK .req x1 |
| 3658 | + OUTPUT_BUF .req x2 |
| 3659 | + OUTPUT_COL .req x3 |
| 3660 | + TMP1 .req x0 |
| 3661 | + TMP2 .req x1 |
| 3662 | + TMP3 .req x2 |
| 3663 | + TMP4 .req x15 |
| 3664 | + |
| 3665 | + /* Save all used NEON registers */ |
| 3666 | + sub sp, sp, 272 |
| 3667 | + str x15, [sp], 16 |
| 3668 | + /* Load constants (v3.4h is just used for padding) */ |
| 3669 | + adr TMP4, jsimd_idct_4x4_neon_consts |
| 3670 | + st1 {v0.8b - v3.8b}, [sp], 32 |
| 3671 | + st1 {v4.8b - v7.8b}, [sp], 32 |
| 3672 | + st1 {v8.8b - v11.8b}, [sp], 32 |
| 3673 | + st1 {v12.8b - v15.8b}, [sp], 32 |
| 3674 | + st1 {v16.8b - v19.8b}, [sp], 32 |
| 3675 | + st1 {v20.8b - v23.8b}, [sp], 32 |
| 3676 | + st1 {v24.8b - v27.8b}, [sp], 32 |
| 3677 | + st1 {v28.8b - v31.8b}, [sp], 32 |
| 3678 | + ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [TMP4] |
| 3679 | + |
| 3680 | + /* Load all COEF_BLOCK into NEON registers with the following allocation: |
| 3681 | + * 0 1 2 3 | 4 5 6 7 |
| 3682 | + * ---------+-------- |
| 3683 | + * 0 | v4.4h | v5.4h |
| 3684 | + * 1 | v6.4h | v7.4h |
| 3685 | + * 2 | v8.4h | v9.4h |
| 3686 | + * 3 | v10.4h | v11.4h |
| 3687 | + * 4 | - | - |
| 3688 | + * 5 | v12.4h | v13.4h |
| 3689 | + * 6 | v14.4h | v15.4h |
| 3690 | + * 7 | v16.4h | v17.4h |
| 3691 | + */ |
| 3692 | + ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [COEF_BLOCK], 32 |
| 3693 | + ld1 {v8.4h, v9.4h, v10.4h, v11.4h}, [COEF_BLOCK], 32 |
| 3694 | + add COEF_BLOCK, COEF_BLOCK, #16 |
| 3695 | + ld1 {v12.4h, v13.4h, v14.4h, v15.4h}, [COEF_BLOCK], 32 |
| 3696 | + ld1 {v16.4h, v17.4h}, [COEF_BLOCK], 16 |
| 3697 | + /* dequantize */ |
| 3698 | + ld1 {v18.4h, v19.4h, v20.4h, v21.4h}, [DCT_TABLE], 32 |
| 3699 | + mul v4.4h, v4.4h, v18.4h |
| 3700 | + mul v5.4h, v5.4h, v19.4h |
| 3701 | + ins v4.2d[1], v5.2d[0] /* 128 bit q4 */ |
| 3702 | + ld1 {v22.4h, v23.4h, v24.4h, v25.4h}, [DCT_TABLE], 32 |
| 3703 | + mul v6.4h, v6.4h, v20.4h |
| 3704 | + mul v7.4h, v7.4h, v21.4h |
| 3705 | + ins v6.2d[1], v7.2d[0] /* 128 bit q6 */ |
| 3706 | + mul v8.4h, v8.4h, v22.4h |
| 3707 | + mul v9.4h, v9.4h, v23.4h |
| 3708 | + ins v8.2d[1], v9.2d[0] /* 128 bit q8 */ |
| 3709 | + add DCT_TABLE, DCT_TABLE, #16 |
| 3710 | + ld1 {v26.4h, v27.4h, v28.4h, v29.4h}, [DCT_TABLE], 32 |
| 3711 | + mul v10.4h, v10.4h, v24.4h |
| 3712 | + mul v11.4h, v11.4h, v25.4h |
| 3713 | + ins v10.2d[1], v11.2d[0] /* 128 bit q10 */ |
| 3714 | + mul v12.4h, v12.4h, v26.4h |
| 3715 | + mul v13.4h, v13.4h, v27.4h |
| 3716 | + ins v12.2d[1], v13.2d[0] /* 128 bit q12 */ |
| 3717 | + ld1 {v30.4h, v31.4h}, [DCT_TABLE], 16 |
| 3718 | + mul v14.4h, v14.4h, v28.4h |
| 3719 | + mul v15.4h, v15.4h, v29.4h |
| 3720 | + ins v14.2d[1], v15.2d[0] /* 128 bit q14 */ |
| 3721 | + mul v16.4h, v16.4h, v30.4h |
| 3722 | + mul v17.4h, v17.4h, v31.4h |
| 3723 | + ins v16.2d[1], v17.2d[0] /* 128 bit q16 */ |
| 3724 | + |
| 3725 | + /* Pass 1 */ |
| 3726 | + idct_helper v4.4h, v6.4h, v8.4h, v10.4h, v12.4h, v14.4h, v16.4h, 12, v4.4h, v6.4h, v8.4h, v10.4h |
| 3727 | + transpose_4x4 v4, v6, v8, v10, v3 |
| 3728 | + ins v10.2d[1], v11.2d[0] |
| 3729 | + idct_helper v5.4h, v7.4h, v9.4h, v11.4h, v13.4h, v15.4h, v17.4h, 12, v5.4h, v7.4h, v9.4h, v11.4h |
| 3730 | + transpose_4x4 v5, v7, v9, v11, v3 |
| 3731 | + ins v10.2d[1], v11.2d[0] |
| 3732 | + /* Pass 2 */ |
| 3733 | + idct_helper v4.4h, v6.4h, v8.4h, v10.4h, v7.4h, v9.4h, v11.4h, 19, v26.4h, v27.4h, v28.4h, v29.4h |
| 3734 | + transpose_4x4 v26, v27, v28, v29, v3 |
| 3735 | + |
| 3736 | + /* Range limit */ |
| 3737 | + movi v30.8h, #0x80 |
| 3738 | + ins v26.2d[1], v27.2d[0] |
| 3739 | + ins v28.2d[1], v29.2d[0] |
| 3740 | + add v26.8h, v26.8h, v30.8h |
| 3741 | + add v28.8h, v28.8h, v30.8h |
| 3742 | + sqxtun v26.8b, v26.8h |
| 3743 | + sqxtun v27.8b, v28.8h |
| 3744 | + |
| 3745 | + /* Store results to the output buffer */ |
| 3746 | + ldp TMP1, TMP2, [OUTPUT_BUF], 16 |
| 3747 | + ldp TMP3, TMP4, [OUTPUT_BUF] |
| 3748 | + add TMP1, TMP1, OUTPUT_COL |
| 3749 | + add TMP2, TMP2, OUTPUT_COL |
| 3750 | + add TMP3, TMP3, OUTPUT_COL |
| 3751 | + add TMP4, TMP4, OUTPUT_COL |
| 3752 | + |
| 3753 | +#if defined(__ARMEL__) && !RESPECT_STRICT_ALIGNMENT |
| 3754 | + /* We can use much less instructions on little endian systems if the |
| 3755 | + * OS kernel is not configured to trap unaligned memory accesses |
| 3756 | + */ |
| 3757 | + st1 {v26.s}[0], [TMP1], 4 |
| 3758 | + st1 {v27.s}[0], [TMP3], 4 |
| 3759 | + st1 {v26.s}[1], [TMP2], 4 |
| 3760 | + st1 {v27.s}[1], [TMP4], 4 |
| 3761 | +#else |
| 3762 | + st1 {v26.b}[0], [TMP1], 1 |
| 3763 | + st1 {v27.b}[0], [TMP3], 1 |
| 3764 | + st1 {v26.b}[1], [TMP1], 1 |
| 3765 | + st1 {v27.b}[1], [TMP3], 1 |
| 3766 | + st1 {v26.b}[2], [TMP1], 1 |
| 3767 | + st1 {v27.b}[2], [TMP3], 1 |
| 3768 | + st1 {v26.b}[3], [TMP1], 1 |
| 3769 | + st1 {v27.b}[3], [TMP3], 1 |
| 3770 | + |
| 3771 | + st1 {v26.b}[4], [TMP2], 1 |
| 3772 | + st1 {v27.b}[4], [TMP4], 1 |
| 3773 | + st1 {v26.b}[5], [TMP2], 1 |
| 3774 | + st1 {v27.b}[5], [TMP4], 1 |
| 3775 | + st1 {v26.b}[6], [TMP2], 1 |
| 3776 | + st1 {v27.b}[6], [TMP4], 1 |
| 3777 | + st1 {v26.b}[7], [TMP2], 1 |
| 3778 | + st1 {v27.b}[7], [TMP4], 1 |
| 3779 | +#endif |
| 3780 | + |
| 3781 | + /* vpop {v8.4h - v15.4h} ;not available */ |
| 3782 | + sub sp, sp, #272 |
| 3783 | + ldr x15, [sp], 16 |
| 3784 | + ld1 {v0.8b - v3.8b}, [sp], 32 |
| 3785 | + ld1 {v4.8b - v7.8b}, [sp], 32 |
| 3786 | + ld1 {v8.8b - v11.8b}, [sp], 32 |
| 3787 | + ld1 {v12.8b - v15.8b}, [sp], 32 |
| 3788 | + ld1 {v16.8b - v19.8b}, [sp], 32 |
| 3789 | + ld1 {v20.8b - v23.8b}, [sp], 32 |
| 3790 | + ld1 {v24.8b - v27.8b}, [sp], 32 |
| 3791 | + ld1 {v28.8b - v31.8b}, [sp], 32 |
| 3792 | + blr x30 |
| 3793 | + |
| 3794 | + .unreq DCT_TABLE |
| 3795 | + .unreq COEF_BLOCK |
| 3796 | + .unreq OUTPUT_BUF |
| 3797 | + .unreq OUTPUT_COL |
| 3798 | + .unreq TMP1 |
| 3799 | + .unreq TMP2 |
| 3800 | + .unreq TMP3 |
| 3801 | + .unreq TMP4 |
| 3802 | + |
| 3803 | +.purgem idct_helper |
| 3804 | + |
| 3805 | + |
| 3806 | +/*****************************************************************************/ |
| 3807 | + |
| 3808 | +/* |
| 3809 | + * jsimd_idct_2x2_neon |
| 3810 | + * |
| 3811 | + * This function contains inverse-DCT code for getting reduced-size |
| 3812 | + * 2x2 pixels output from an 8x8 DCT block. It uses the same calculations |
| 3813 | + * and produces exactly the same output as IJG's original 'jpeg_idct_2x2' |
| 3814 | + * function from jpeg-6b (jidctred.c). |
| 3815 | + * |
| 3816 | + * NOTE: jpeg-8 has an improved implementation of 2x2 inverse-DCT, which |
| 3817 | + * requires much less arithmetic operations and hence should be faster. |
| 3818 | + * The primary purpose of this particular NEON optimized function is |
| 3819 | + * bit exact compatibility with jpeg-6b. |
| 3820 | + */ |
| 3821 | + |
| 3822 | +.balign 8 |
| 3823 | +jsimd_idct_2x2_neon_consts: |
| 3824 | + .short -FIX_0_720959822 /* v14[0] */ |
| 3825 | + .short FIX_0_850430095 /* v14[1] */ |
| 3826 | + .short -FIX_1_272758580 /* v14[2] */ |
| 3827 | + .short FIX_3_624509785 /* v14[3] */ |
| 3828 | + |
| 3829 | +.macro idct_helper x4, x6, x10, x12, x16, shift, y26, y27 |
| 3830 | + sshll v15.4s, \x4, #15 |
| 3831 | + smull v26.4s, \x6, v14.4h[3] |
| 3832 | + smlal v26.4s, \x10, v14.4h[2] |
| 3833 | + smlal v26.4s, \x12, v14.4h[1] |
| 3834 | + smlal v26.4s, \x16, v14.4h[0] |
| 3835 | + |
| 3836 | + add v20.4s, v15.4s, v26.4s |
| 3837 | + sub v15.4s, v15.4s, v26.4s |
| 3838 | + |
| 3839 | +.if \shift > 16 |
| 3840 | + srshr v20.4s, v20.4s, #\shift |
| 3841 | + srshr v15.4s, v15.4s, #\shift |
| 3842 | + xtn \y26, v20.4s |
| 3843 | + xtn \y27, v15.4s |
| 3844 | +.else |
| 3845 | + rshrn \y26, v20.4s, #\shift |
| 3846 | + rshrn \y27, v15.4s, #\shift |
| 3847 | +.endif |
| 3848 | + |
| 3849 | +.endm |
| 3850 | + |
| 3851 | +asm_function jsimd_idct_2x2_neon |
| 3852 | + |
| 3853 | + DCT_TABLE .req x0 |
| 3854 | + COEF_BLOCK .req x1 |
| 3855 | + OUTPUT_BUF .req x2 |
| 3856 | + OUTPUT_COL .req x3 |
| 3857 | + TMP1 .req x0 |
| 3858 | + TMP2 .req x15 |
| 3859 | + |
| 3860 | + /* vpush {v8.4h - v15.4h} ; not available */ |
| 3861 | + sub sp, sp, 208 |
| 3862 | + str x15, [sp], 16 |
| 3863 | + |
| 3864 | + /* Load constants */ |
| 3865 | + adr TMP2, jsimd_idct_2x2_neon_consts |
| 3866 | + st1 {v4.8b - v7.8b}, [sp], 32 |
| 3867 | + st1 {v8.8b - v11.8b}, [sp], 32 |
| 3868 | + st1 {v12.8b - v15.8b}, [sp], 32 |
| 3869 | + st1 {v16.8b - v19.8b}, [sp], 32 |
| 3870 | + st1 {v21.8b - v22.8b}, [sp], 16 |
| 3871 | + st1 {v24.8b - v27.8b}, [sp], 32 |
| 3872 | + st1 {v30.8b - v31.8b}, [sp], 16 |
| 3873 | + ld1 {v14.4h}, [TMP2] |
| 3874 | + |
| 3875 | + /* Load all COEF_BLOCK into NEON registers with the following allocation: |
| 3876 | + * 0 1 2 3 | 4 5 6 7 |
| 3877 | + * ---------+-------- |
| 3878 | + * 0 | v4.4h | v5.4h |
| 3879 | + * 1 | v6.4h | v7.4h |
| 3880 | + * 2 | - | - |
| 3881 | + * 3 | v10.4h | v11.4h |
| 3882 | + * 4 | - | - |
| 3883 | + * 5 | v12.4h | v13.4h |
| 3884 | + * 6 | - | - |
| 3885 | + * 7 | v16.4h | v17.4h |
| 3886 | + */ |
| 3887 | + ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [COEF_BLOCK], 32 |
| 3888 | + add COEF_BLOCK, COEF_BLOCK, #16 |
| 3889 | + ld1 {v10.4h, v11.4h}, [COEF_BLOCK], 16 |
| 3890 | + add COEF_BLOCK, COEF_BLOCK, #16 |
| 3891 | + ld1 {v12.4h, v13.4h}, [COEF_BLOCK], 16 |
| 3892 | + add COEF_BLOCK, COEF_BLOCK, #16 |
| 3893 | + ld1 {v16.4h, v17.4h}, [COEF_BLOCK], 16 |
| 3894 | + /* Dequantize */ |
| 3895 | + ld1 {v18.4h, v19.4h, v20.4h, v21.4h}, [DCT_TABLE], 32 |
| 3896 | + mul v4.4h, v4.4h, v18.4h |
| 3897 | + mul v5.4h, v5.4h, v19.4h |
| 3898 | + ins v4.2d[1], v5.2d[0] |
| 3899 | + mul v6.4h, v6.4h, v20.4h |
| 3900 | + mul v7.4h, v7.4h, v21.4h |
| 3901 | + ins v6.2d[1], v7.2d[0] |
| 3902 | + add DCT_TABLE, DCT_TABLE, #16 |
| 3903 | + ld1 {v24.4h, v25.4h}, [DCT_TABLE], 16 |
| 3904 | + mul v10.4h, v10.4h, v24.4h |
| 3905 | + mul v11.4h, v11.4h, v25.4h |
| 3906 | + ins v10.2d[1], v11.2d[0] |
| 3907 | + add DCT_TABLE, DCT_TABLE, #16 |
| 3908 | + ld1 {v26.4h, v27.4h}, [DCT_TABLE], 16 |
| 3909 | + mul v12.4h, v12.4h, v26.4h |
| 3910 | + mul v13.4h, v13.4h, v27.4h |
| 3911 | + ins v12.2d[1], v13.2d[0] |
| 3912 | + add DCT_TABLE, DCT_TABLE, #16 |
| 3913 | + ld1 {v30.4h, v31.4h}, [DCT_TABLE], 16 |
| 3914 | + mul v16.4h, v16.4h, v30.4h |
| 3915 | + mul v17.4h, v17.4h, v31.4h |
| 3916 | + ins v16.2d[1], v17.2d[0] |
| 3917 | + |
| 3918 | + /* Pass 1 */ |
| 3919 | +#if 0 |
| 3920 | + idct_helper v4.4h, v6.4h, v10.4h, v12.4h, v16.4h, 13, v4.4h, v6.4h |
| 3921 | + transpose_4x4 v4.4h, v6.4h, v8.4h, v10.4h |
| 3922 | + idct_helper v5.4h, v7.4h, v11.4h, v13.4h, v17.4h, 13, v5.4h, v7.4h |
| 3923 | + transpose_4x4 v5.4h, v7.4h, v9.4h, v11.4h |
| 3924 | +#else |
| 3925 | + smull v26.4s, v6.4h, v14.4h[3] |
| 3926 | + smlal v26.4s, v10.4h, v14.4h[2] |
| 3927 | + smlal v26.4s, v12.4h, v14.4h[1] |
| 3928 | + smlal v26.4s, v16.4h, v14.4h[0] |
| 3929 | + smull v24.4s, v7.4h, v14.4h[3] |
| 3930 | + smlal v24.4s, v11.4h, v14.4h[2] |
| 3931 | + smlal v24.4s, v13.4h, v14.4h[1] |
| 3932 | + smlal v24.4s, v17.4h, v14.4h[0] |
| 3933 | + sshll v15.4s, v4.4h, #15 |
| 3934 | + sshll v30.4s, v5.4h, #15 |
| 3935 | + add v20.4s, v15.4s, v26.4s |
| 3936 | + sub v15.4s, v15.4s, v26.4s |
| 3937 | + rshrn v4.4h, v20.4s, #13 |
| 3938 | + rshrn v6.4h, v15.4s, #13 |
| 3939 | + add v20.4s, v30.4s, v24.4s |
| 3940 | + sub v15.4s, v30.4s, v24.4s |
| 3941 | + rshrn v5.4h, v20.4s, #13 |
| 3942 | + rshrn v7.4h, v15.4s, #13 |
| 3943 | + ins v4.2d[1], v5.2d[0] |
| 3944 | + ins v6.2d[1], v7.2d[0] |
| 3945 | + transpose v4, v6, v3, .16b, .8h |
| 3946 | + transpose v6, v10, v3, .16b, .4s |
| 3947 | + ins v11.2d[0], v10.2d[1] |
| 3948 | + ins v7.2d[0], v6.2d[1] |
| 3949 | +#endif |
| 3950 | + |
| 3951 | + /* Pass 2 */ |
| 3952 | + idct_helper v4.4h, v6.4h, v10.4h, v7.4h, v11.4h, 20, v26.4h, v27.4h |
| 3953 | + |
| 3954 | + /* Range limit */ |
| 3955 | + movi v30.8h, #0x80 |
| 3956 | + ins v26.2d[1], v27.2d[0] |
| 3957 | + add v26.8h, v26.8h, v30.8h |
| 3958 | + sqxtun v30.8b, v26.8h |
| 3959 | + ins v26.2d[0], v30.2d[0] |
| 3960 | + sqxtun v27.8b, v26.8h |
| 3961 | + |
| 3962 | + /* Store results to the output buffer */ |
| 3963 | + ldp TMP1, TMP2, [OUTPUT_BUF] |
| 3964 | + add TMP1, TMP1, OUTPUT_COL |
| 3965 | + add TMP2, TMP2, OUTPUT_COL |
| 3966 | + |
| 3967 | + st1 {v26.b}[0], [TMP1], 1 |
| 3968 | + st1 {v27.b}[4], [TMP1], 1 |
| 3969 | + st1 {v26.b}[1], [TMP2], 1 |
| 3970 | + st1 {v27.b}[5], [TMP2], 1 |
| 3971 | + |
| 3972 | + sub sp, sp, #208 |
| 3973 | + ldr x15, [sp], 16 |
| 3974 | + ld1 {v4.8b - v7.8b}, [sp], 32 |
| 3975 | + ld1 {v8.8b - v11.8b}, [sp], 32 |
| 3976 | + ld1 {v12.8b - v15.8b}, [sp], 32 |
| 3977 | + ld1 {v16.8b - v19.8b}, [sp], 32 |
| 3978 | + ld1 {v21.8b - v22.8b}, [sp], 16 |
| 3979 | + ld1 {v24.8b - v27.8b}, [sp], 32 |
| 3980 | + ld1 {v30.8b - v31.8b}, [sp], 16 |
| 3981 | + blr x30 |
| 3982 | + |
| 3983 | + .unreq DCT_TABLE |
| 3984 | + .unreq COEF_BLOCK |
| 3985 | + .unreq OUTPUT_BUF |
| 3986 | + .unreq OUTPUT_COL |
| 3987 | + .unreq TMP1 |
| 3988 | + .unreq TMP2 |
| 3989 | + |
| 3990 | +.purgem idct_helper |
| 3991 | + |
| 3992 | + |
| 3993 | +/*****************************************************************************/ |
| 3994 | + |
| 3995 | +/* |
| 3996 | + * jsimd_ycc_extrgb_convert_neon |
| 3997 | + * jsimd_ycc_extbgr_convert_neon |
| 3998 | + * jsimd_ycc_extrgbx_convert_neon |
| 3999 | + * jsimd_ycc_extbgrx_convert_neon |
| 4000 | + * jsimd_ycc_extxbgr_convert_neon |
| 4001 | + * jsimd_ycc_extxrgb_convert_neon |
| 4002 | + * |
| 4003 | + * Colorspace conversion YCbCr -> RGB |
| 4004 | + */ |
| 4005 | + |
| 4006 | + |
| 4007 | +.macro do_load size |
| 4008 | + .if \size == 8 |
| 4009 | + ld1 {v4.8b}, [U], 8 |
| 4010 | + ld1 {v5.8b}, [V], 8 |
| 4011 | + ld1 {v0.8b}, [Y], 8 |
| 4012 | + prfm PLDL1KEEP, [U, #64] |
| 4013 | + prfm PLDL1KEEP, [V, #64] |
| 4014 | + prfm PLDL1KEEP, [Y, #64] |
| 4015 | + .elseif \size == 4 |
| 4016 | + ld1 {v4.b}[0], [U], 1 |
| 4017 | + ld1 {v4.b}[1], [U], 1 |
| 4018 | + ld1 {v4.b}[2], [U], 1 |
| 4019 | + ld1 {v4.b}[3], [U], 1 |
| 4020 | + ld1 {v5.b}[0], [V], 1 |
| 4021 | + ld1 {v5.b}[1], [V], 1 |
| 4022 | + ld1 {v5.b}[2], [V], 1 |
| 4023 | + ld1 {v5.b}[3], [V], 1 |
| 4024 | + ld1 {v0.b}[0], [Y], 1 |
| 4025 | + ld1 {v0.b}[1], [Y], 1 |
| 4026 | + ld1 {v0.b}[2], [Y], 1 |
| 4027 | + ld1 {v0.b}[3], [Y], 1 |
| 4028 | + .elseif \size == 2 |
| 4029 | + ld1 {v4.b}[4], [U], 1 |
| 4030 | + ld1 {v4.b}[5], [U], 1 |
| 4031 | + ld1 {v5.b}[4], [V], 1 |
| 4032 | + ld1 {v5.b}[5], [V], 1 |
| 4033 | + ld1 {v0.b}[4], [Y], 1 |
| 4034 | + ld1 {v0.b}[5], [Y], 1 |
| 4035 | + .elseif \size == 1 |
| 4036 | + ld1 {v4.b}[6], [U], 1 |
| 4037 | + ld1 {v5.b}[6], [V], 1 |
| 4038 | + ld1 {v0.b}[6], [Y], 1 |
| 4039 | + .else |
| 4040 | + .error unsupported macroblock size |
| 4041 | + .endif |
| 4042 | +.endm |
| 4043 | + |
| 4044 | +.macro do_store bpp, size |
| 4045 | + .if \bpp == 24 |
| 4046 | + .if \size == 8 |
| 4047 | + st3 {v10.8b, v11.8b, v12.8b}, [RGB], 24 |
| 4048 | + .elseif \size == 4 |
| 4049 | + st3 {v10.b, v11.b, v12.b}[0], [RGB], 3 |
| 4050 | + st3 {v10.b, v11.b, v12.b}[1], [RGB], 3 |
| 4051 | + st3 {v10.b, v11.b, v12.b}[2], [RGB], 3 |
| 4052 | + st3 {v10.b, v11.b, v12.b}[3], [RGB], 3 |
| 4053 | + .elseif \size == 2 |
| 4054 | + st3 {v10.b, v11.b, v12.b}[4], [RGB], 3 |
| 4055 | + st3 {v10.b, v11.b, v12.b}[5], [RGB], 3 |
| 4056 | + .elseif \size == 1 |
| 4057 | + st3 {v10.b, v11.b, v12.b}[6], [RGB], 3 |
| 4058 | + .else |
| 4059 | + .error unsupported macroblock size |
| 4060 | + .endif |
| 4061 | + .elseif \bpp == 32 |
| 4062 | + .if \size == 8 |
| 4063 | + st4 {v10.8b, v11.8b, v12.8b, v13.8b}, [RGB], 32 |
| 4064 | + .elseif \size == 4 |
| 4065 | + st4 {v10.b, v11.b, v12.b, v13.b}[0], [RGB], 4 |
| 4066 | + st4 {v10.b, v11.b, v12.b, v13.b}[1], [RGB], 4 |
| 4067 | + st4 {v10.b, v11.b, v12.b, v13.b}[2], [RGB], 4 |
| 4068 | + st4 {v10.b, v11.b, v12.b, v13.b}[3], [RGB], 4 |
| 4069 | + .elseif \size == 2 |
| 4070 | + st4 {v10.b, v11.b, v12.b, v13.b}[4], [RGB], 4 |
| 4071 | + st4 {v10.b, v11.b, v12.b, v13.b}[5], [RGB], 4 |
| 4072 | + .elseif \size == 1 |
| 4073 | + st4 {v10.b, v11.b, v12.b, v13.b}[6], [RGB], 4 |
| 4074 | + .else |
| 4075 | + .error unsupported macroblock size |
| 4076 | + .endif |
| 4077 | + .elseif \bpp==16 |
| 4078 | + .if \size == 8 |
| 4079 | + st1 {v25.8h}, [RGB],16 |
| 4080 | + .elseif \size == 4 |
| 4081 | + st1 {v25.4h}, [RGB],8 |
| 4082 | + .elseif \size == 2 |
| 4083 | + st1 {v25.h}[4], [RGB],2 |
| 4084 | + st1 {v25.h}[5], [RGB],2 |
| 4085 | + .elseif \size == 1 |
| 4086 | + st1 {v25.h}[6], [RGB],2 |
| 4087 | + .else |
| 4088 | + .error unsupported macroblock size |
| 4089 | + .endif |
| 4090 | + .else |
| 4091 | + .error unsupported bpp |
| 4092 | + .endif |
| 4093 | +.endm |
| 4094 | + |
| 4095 | +.macro generate_jsimd_ycc_rgb_convert_neon colorid, bpp, r_offs, rsize, g_offs, gsize, b_offs, bsize, defsize |
| 4096 | + |
| 4097 | +/* |
| 4098 | + * 2-stage pipelined YCbCr->RGB conversion |
| 4099 | + */ |
| 4100 | + |
| 4101 | +.macro do_yuv_to_rgb_stage1 |
| 4102 | + uaddw v6.8h, v2.8h, v4.8b /* q3 = u - 128 */ |
| 4103 | + uaddw v8.8h, v2.8h, v5.8b /* q2 = v - 128 */ |
| 4104 | + smull v20.4s, v6.4h, v1.4h[1] /* multiply by -11277 */ |
| 4105 | + smlal v20.4s, v8.4h, v1.4h[2] /* multiply by -23401 */ |
| 4106 | + smull2 v22.4s, v6.8h, v1.4h[1] /* multiply by -11277 */ |
| 4107 | + smlal2 v22.4s, v8.8h, v1.4h[2] /* multiply by -23401 */ |
| 4108 | + smull v24.4s, v8.4h, v1.4h[0] /* multiply by 22971 */ |
| 4109 | + smull2 v26.4s, v8.8h, v1.4h[0] /* multiply by 22971 */ |
| 4110 | + smull v28.4s, v6.4h, v1.4h[3] /* multiply by 29033 */ |
| 4111 | + smull2 v30.4s, v6.8h, v1.4h[3] /* multiply by 29033 */ |
| 4112 | +.endm |
| 4113 | + |
| 4114 | +.macro do_yuv_to_rgb_stage2 |
| 4115 | + rshrn v20.4h, v20.4s, #15 |
| 4116 | + rshrn2 v20.8h, v22.4s, #15 |
| 4117 | + rshrn v24.4h, v24.4s, #14 |
| 4118 | + rshrn2 v24.8h, v26.4s, #14 |
| 4119 | + rshrn v28.4h, v28.4s, #14 |
| 4120 | + rshrn2 v28.8h, v30.4s, #14 |
| 4121 | + uaddw v20.8h, v20.8h, v0.8b |
| 4122 | + uaddw v24.8h, v24.8h, v0.8b |
| 4123 | + uaddw v28.8h, v28.8h, v0.8b |
| 4124 | +.if \bpp != 16 |
| 4125 | + sqxtun v1\g_offs\defsize, v20.8h |
| 4126 | + sqxtun v1\r_offs\defsize, v24.8h |
| 4127 | + sqxtun v1\b_offs\defsize, v28.8h |
| 4128 | +.else |
| 4129 | + sqshlu v21.8h, v20.8h, #8 |
| 4130 | + sqshlu v25.8h, v24.8h, #8 |
| 4131 | + sqshlu v29.8h, v28.8h, #8 |
| 4132 | + sri v25.8h, v21.8h, #5 |
| 4133 | + sri v25.8h, v29.8h, #11 |
| 4134 | +.endif |
| 4135 | + |
| 4136 | +.endm |
| 4137 | + |
| 4138 | +.macro do_yuv_to_rgb_stage2_store_load_stage1 |
| 4139 | + rshrn v20.4h, v20.4s, #15 |
| 4140 | + rshrn v24.4h, v24.4s, #14 |
| 4141 | + rshrn v28.4h, v28.4s, #14 |
| 4142 | + ld1 {v4.8b}, [U], 8 |
| 4143 | + rshrn2 v20.8h, v22.4s, #15 |
| 4144 | + rshrn2 v24.8h, v26.4s, #14 |
| 4145 | + rshrn2 v28.8h, v30.4s, #14 |
| 4146 | + ld1 {v5.8b}, [V], 8 |
| 4147 | + uaddw v20.8h, v20.8h, v0.8b |
| 4148 | + uaddw v24.8h, v24.8h, v0.8b |
| 4149 | + uaddw v28.8h, v28.8h, v0.8b |
| 4150 | +.if \bpp != 16 /**************** rgb24/rgb32 *********************************/ |
| 4151 | + sqxtun v1\g_offs\defsize, v20.8h |
| 4152 | + ld1 {v0.8b}, [Y], 8 |
| 4153 | + sqxtun v1\r_offs\defsize, v24.8h |
| 4154 | + prfm PLDL1KEEP, [U, #64] |
| 4155 | + prfm PLDL1KEEP, [V, #64] |
| 4156 | + prfm PLDL1KEEP, [Y, #64] |
| 4157 | + sqxtun v1\b_offs\defsize, v28.8h |
| 4158 | + uaddw v6.8h, v2.8h, v4.8b /* v6.16b = u - 128 */ |
| 4159 | + uaddw v8.8h, v2.8h, v5.8b /* q2 = v - 128 */ |
| 4160 | + smull v20.4s, v6.4h, v1.4h[1] /* multiply by -11277 */ |
| 4161 | + smlal v20.4s, v8.4h, v1.4h[2] /* multiply by -23401 */ |
| 4162 | + smull2 v22.4s, v6.8h, v1.4h[1] /* multiply by -11277 */ |
| 4163 | + smlal2 v22.4s, v8.8h, v1.4h[2] /* multiply by -23401 */ |
| 4164 | + smull v24.4s, v8.4h, v1.4h[0] /* multiply by 22971 */ |
| 4165 | + smull2 v26.4s, v8.8h, v1.4h[0] /* multiply by 22971 */ |
| 4166 | +.else /**************************** rgb565 ***********************************/ |
| 4167 | + sqshlu v21.8h, v20.8h, #8 |
| 4168 | + sqshlu v25.8h, v24.8h, #8 |
| 4169 | + sqshlu v29.8h, v28.8h, #8 |
| 4170 | + uaddw v6.8h, v2.8h, v4.8b /* v6.16b = u - 128 */ |
| 4171 | + uaddw v8.8h, v2.8h, v5.8b /* q2 = v - 128 */ |
| 4172 | + ld1 {v0.8b}, [Y], 8 |
| 4173 | + smull v20.4s, v6.4h, v1.4h[1] /* multiply by -11277 */ |
| 4174 | + smlal v20.4s, v8.4h, v1.4h[2] /* multiply by -23401 */ |
| 4175 | + smull2 v22.4s, v6.8h, v1.4h[1] /* multiply by -11277 */ |
| 4176 | + smlal2 v22.4s, v8.8h, v1.4h[2] /* multiply by -23401 */ |
| 4177 | + sri v25.8h, v21.8h, #5 |
| 4178 | + smull v24.4s, v8.4h, v1.4h[0] /* multiply by 22971 */ |
| 4179 | + smull2 v26.4s, v8.8h, v1.4h[0] /* multiply by 22971 */ |
| 4180 | + prfm PLDL1KEEP, [U, #64] |
| 4181 | + prfm PLDL1KEEP, [V, #64] |
| 4182 | + prfm PLDL1KEEP, [Y, #64] |
| 4183 | + sri v25.8h, v29.8h, #11 |
| 4184 | +.endif |
| 4185 | + do_store \bpp, 8 |
| 4186 | + smull v28.4s, v6.4h, v1.4h[3] /* multiply by 29033 */ |
| 4187 | + smull2 v30.4s, v6.8h, v1.4h[3] /* multiply by 29033 */ |
| 4188 | +.endm |
| 4189 | + |
| 4190 | +.macro do_yuv_to_rgb |
| 4191 | + do_yuv_to_rgb_stage1 |
| 4192 | + do_yuv_to_rgb_stage2 |
| 4193 | +.endm |
| 4194 | + |
| 4195 | +/* Apple gas crashes on adrl, work around that by using adr. |
| 4196 | + * But this requires a copy of these constants for each function. |
| 4197 | + */ |
| 4198 | + |
| 4199 | +.balign 16 |
| 4200 | +jsimd_ycc_\colorid\()_neon_consts: |
| 4201 | + .short 0, 0, 0, 0 |
| 4202 | + .short 22971, -11277, -23401, 29033 |
| 4203 | + .short -128, -128, -128, -128 |
| 4204 | + .short -128, -128, -128, -128 |
| 4205 | + |
| 4206 | +asm_function jsimd_ycc_\colorid\()_convert_neon |
| 4207 | + OUTPUT_WIDTH .req x0 |
| 4208 | + INPUT_BUF .req x1 |
| 4209 | + INPUT_ROW .req x2 |
| 4210 | + OUTPUT_BUF .req x3 |
| 4211 | + NUM_ROWS .req x4 |
| 4212 | + |
| 4213 | + INPUT_BUF0 .req x5 |
| 4214 | + INPUT_BUF1 .req x6 |
| 4215 | + INPUT_BUF2 .req INPUT_BUF |
| 4216 | + |
| 4217 | + RGB .req x7 |
| 4218 | + Y .req x8 |
| 4219 | + U .req x9 |
| 4220 | + V .req x10 |
| 4221 | + N .req x15 |
| 4222 | + |
| 4223 | + sub sp, sp, 336 |
| 4224 | + str x15, [sp], 16 |
| 4225 | + /* Load constants to d1, d2, d3 (v0.4h is just used for padding) */ |
| 4226 | + adr x15, jsimd_ycc_\colorid\()_neon_consts |
| 4227 | + /* Save NEON registers */ |
| 4228 | + st1 {v0.8b - v3.8b}, [sp], 32 |
| 4229 | + st1 {v4.8b - v7.8b}, [sp], 32 |
| 4230 | + st1 {v8.8b - v11.8b}, [sp], 32 |
| 4231 | + st1 {v12.8b - v15.8b}, [sp], 32 |
| 4232 | + st1 {v16.8b - v19.8b}, [sp], 32 |
| 4233 | + st1 {v20.8b - v23.8b}, [sp], 32 |
| 4234 | + st1 {v24.8b - v27.8b}, [sp], 32 |
| 4235 | + st1 {v28.8b - v31.8b}, [sp], 32 |
| 4236 | + ld1 {v0.4h, v1.4h}, [x15], 16 |
| 4237 | + ld1 {v2.8h}, [x15] |
| 4238 | + |
| 4239 | + /* Save ARM registers and handle input arguments */ |
| 4240 | + /* push {x4, x5, x6, x7, x8, x9, x10, x30} */ |
| 4241 | + stp x4, x5, [sp], 16 |
| 4242 | + stp x6, x7, [sp], 16 |
| 4243 | + stp x8, x9, [sp], 16 |
| 4244 | + stp x10, x30, [sp], 16 |
| 4245 | + ldr INPUT_BUF0, [INPUT_BUF] |
| 4246 | + ldr INPUT_BUF1, [INPUT_BUF, 8] |
| 4247 | + ldr INPUT_BUF2, [INPUT_BUF, 16] |
| 4248 | + .unreq INPUT_BUF |
| 4249 | + |
| 4250 | + /* Initially set v10, v11.4h, v12.8b, d13 to 0xFF */ |
| 4251 | + movi v10.16b, #255 |
| 4252 | + movi v13.16b, #255 |
| 4253 | + |
| 4254 | + /* Outer loop over scanlines */ |
| 4255 | + cmp NUM_ROWS, #1 |
| 4256 | + blt 9f |
| 4257 | +0: |
| 4258 | + lsl x16, INPUT_ROW, #3 |
| 4259 | + ldr Y, [INPUT_BUF0, x16] |
| 4260 | + ldr U, [INPUT_BUF1, x16] |
| 4261 | + mov N, OUTPUT_WIDTH |
| 4262 | + ldr V, [INPUT_BUF2, x16] |
| 4263 | + add INPUT_ROW, INPUT_ROW, #1 |
| 4264 | + ldr RGB, [OUTPUT_BUF], #8 |
| 4265 | + |
| 4266 | + /* Inner loop over pixels */ |
| 4267 | + subs N, N, #8 |
| 4268 | + blt 3f |
| 4269 | + do_load 8 |
| 4270 | + do_yuv_to_rgb_stage1 |
| 4271 | + subs N, N, #8 |
| 4272 | + blt 2f |
| 4273 | +1: |
| 4274 | + do_yuv_to_rgb_stage2_store_load_stage1 |
| 4275 | + subs N, N, #8 |
| 4276 | + bge 1b |
| 4277 | +2: |
| 4278 | + do_yuv_to_rgb_stage2 |
| 4279 | + do_store \bpp, 8 |
| 4280 | + tst N, #7 |
| 4281 | + beq 8f |
| 4282 | +3: |
| 4283 | + tst N, #4 |
| 4284 | + beq 3f |
| 4285 | + do_load 4 |
| 4286 | +3: |
| 4287 | + tst N, #2 |
| 4288 | + beq 4f |
| 4289 | + do_load 2 |
| 4290 | +4: |
| 4291 | + tst N, #1 |
| 4292 | + beq 5f |
| 4293 | + do_load 1 |
| 4294 | +5: |
| 4295 | + do_yuv_to_rgb |
| 4296 | + tst N, #4 |
| 4297 | + beq 6f |
| 4298 | + do_store \bpp, 4 |
| 4299 | +6: |
| 4300 | + tst N, #2 |
| 4301 | + beq 7f |
| 4302 | + do_store \bpp, 2 |
| 4303 | +7: |
| 4304 | + tst N, #1 |
| 4305 | + beq 8f |
| 4306 | + do_store \bpp, 1 |
| 4307 | +8: |
| 4308 | + subs NUM_ROWS, NUM_ROWS, #1 |
| 4309 | + bgt 0b |
| 4310 | +9: |
| 4311 | + /* Restore all registers and return */ |
| 4312 | + sub sp, sp, #336 |
| 4313 | + ldr x15, [sp], 16 |
| 4314 | + ld1 {v0.8b - v3.8b}, [sp], 32 |
| 4315 | + ld1 {v4.8b - v7.8b}, [sp], 32 |
| 4316 | + ld1 {v8.8b - v11.8b}, [sp], 32 |
| 4317 | + ld1 {v12.8b - v15.8b}, [sp], 32 |
| 4318 | + ld1 {v16.8b - v19.8b}, [sp], 32 |
| 4319 | + ld1 {v20.8b - v23.8b}, [sp], 32 |
| 4320 | + ld1 {v24.8b - v27.8b}, [sp], 32 |
| 4321 | + ld1 {v28.8b - v31.8b}, [sp], 32 |
| 4322 | + /* pop {r4, r5, r6, r7, r8, r9, r10, pc} */ |
| 4323 | + ldp x4, x5, [sp], 16 |
| 4324 | + ldp x6, x7, [sp], 16 |
| 4325 | + ldp x8, x9, [sp], 16 |
| 4326 | + ldp x10, x30, [sp], 16 |
| 4327 | + br x30 |
| 4328 | + .unreq OUTPUT_WIDTH |
| 4329 | + .unreq INPUT_ROW |
| 4330 | + .unreq OUTPUT_BUF |
| 4331 | + .unreq NUM_ROWS |
| 4332 | + .unreq INPUT_BUF0 |
| 4333 | + .unreq INPUT_BUF1 |
| 4334 | + .unreq INPUT_BUF2 |
| 4335 | + .unreq RGB |
| 4336 | + .unreq Y |
| 4337 | + .unreq U |
| 4338 | + .unreq V |
| 4339 | + .unreq N |
| 4340 | + |
| 4341 | +.purgem do_yuv_to_rgb |
| 4342 | +.purgem do_yuv_to_rgb_stage1 |
| 4343 | +.purgem do_yuv_to_rgb_stage2 |
| 4344 | +.purgem do_yuv_to_rgb_stage2_store_load_stage1 |
| 4345 | +.endm |
| 4346 | + |
| 4347 | +/*--------------------------------- id ----- bpp R rsize G gsize B bsize defsize */ |
| 4348 | +generate_jsimd_ycc_rgb_convert_neon extrgb, 24, 0, .4h, 1, .4h, 2, .4h, .8b |
| 4349 | +generate_jsimd_ycc_rgb_convert_neon extbgr, 24, 2, .4h, 1, .4h, 0, .4h, .8b |
| 4350 | +generate_jsimd_ycc_rgb_convert_neon extrgbx, 32, 0, .4h, 1, .4h, 2, .4h, .8b |
| 4351 | +generate_jsimd_ycc_rgb_convert_neon extbgrx, 32, 2, .4h, 1, .4h, 0, .4h, .8b |
| 4352 | +generate_jsimd_ycc_rgb_convert_neon extxbgr, 32, 3, .4h, 2, .4h, 1, .4h, .8b |
| 4353 | +generate_jsimd_ycc_rgb_convert_neon extxrgb, 32, 1, .4h, 2, .4h, 3, .4h, .8b |
| 4354 | +generate_jsimd_ycc_rgb_convert_neon rgb565, 16, 0, .4h, 0, .4h, 0, .4h, .8b |
| 4355 | +.purgem do_load |
| 4356 | +.purgem do_store |