Blame - jcdctmgr.c - platform/external/libjpeg-turbo

blob: cef4b5e7520242808dca64420aa62436df6103bb [file] [log] [blame]

Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	1	/*
				2	* jcdctmgr.c
				3	*
DRC	a73e870	2012-12-31 02:52:30 +0000	[diff] [blame]	4	* This file was part of the Independent JPEG Group's software:
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	5	* Copyright (C) 1994-1996, Thomas G. Lane.
DRC	a6ef282	2013-09-28 03:23:49 +0000	[diff] [blame]	6	* libjpeg-turbo Modifications:
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	7	* Copyright (C) 1999-2006, MIYASAKA Masaru.
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	8	* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
DRC	b5a55e6	2015-08-29 18:05:43 -0500	[diff] [blame]	9	* Copyright (C) 2011, 2014-2015 D. R. Commander
DRC	7e3acc0	2015-10-10 10:25:46 -0500	[diff] [blame]	10	* For conditions of distribution and use, see the accompanying README.ijg
				11	* file.
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	12	*
				13	* This file contains the forward-DCT management logic.
				14	* This code selects a particular DCT implementation to be used,
				15	* and it performs related housekeeping chores including coefficient
				16	* quantization.
				17	*/
				18
				19	#define JPEG_INTERNALS
				20	#include "jinclude.h"
				21	#include "jpeglib.h"
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	22	#include "jdct.h" /* Private declarations for DCT subsystem */
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	23	#include "jsimddct.h"
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	24
				25
				26	/* Private subobject for this module */
				27
DRC	bc56b75	2014-05-16 10:43:44 +0000	[diff] [blame]	28	typedef void (forward_DCT_method_ptr) (DCTELEM data);
				29	typedef void (float_DCT_method_ptr) (FAST_FLOAT data);
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	30
DRC	bc56b75	2014-05-16 10:43:44 +0000	[diff] [blame]	31	typedef void (*convsamp_method_ptr) (JSAMPARRAY sample_data,
				32	JDIMENSION start_col,
				33	DCTELEM * workspace);
				34	typedef void (*float_convsamp_method_ptr) (JSAMPARRAY sample_data,
				35	JDIMENSION start_col,
				36	FAST_FLOAT *workspace);
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	37
DRC	bc56b75	2014-05-16 10:43:44 +0000	[diff] [blame]	38	typedef void (quantize_method_ptr) (JCOEFPTR coef_block, DCTELEM divisors,
				39	DCTELEM * workspace);
				40	typedef void (*float_quantize_method_ptr) (JCOEFPTR coef_block,
				41	FAST_FLOAT * divisors,
				42	FAST_FLOAT * workspace);
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	43
DRC	a49c4e5	2011-02-18 20:50:08 +0000	[diff] [blame]	44	METHODDEF(void) quantize (JCOEFPTR, DCTELEM , DCTELEM );
				45
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	46	typedef struct {
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	47	struct jpeg_forward_dct pub; /* public fields */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	48
				49	/* Pointer to the DCT routine actually in use */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	50	forward_DCT_method_ptr dct;
				51	convsamp_method_ptr convsamp;
				52	quantize_method_ptr quantize;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	53
				54	/* The actual post-DCT divisors --- not identical to the quant table
				55	* entries, because of scaling (especially for an unnormalized DCT).
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	56	* Each table is given in normal array order.
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	57	*/
				58	DCTELEM * divisors[NUM_QUANT_TBLS];
				59
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	60	/* work area for FDCT subroutine */
				61	DCTELEM * workspace;
				62
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	63	#ifdef DCT_FLOAT_SUPPORTED
				64	/* Same as above for the floating-point case. */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	65	float_DCT_method_ptr float_dct;
				66	float_convsamp_method_ptr float_convsamp;
				67	float_quantize_method_ptr float_quantize;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	68	FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	69	FAST_FLOAT * float_workspace;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	70	#endif
				71	} my_fdct_controller;
				72
				73	typedef my_fdct_controller * my_fdct_ptr;
				74
				75
DRC	aee4f72	2014-08-09 23:06:07 +0000	[diff] [blame]	76	#if BITS_IN_JSAMPLE == 8
				77
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	78	/*
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	79	* Find the highest bit in an integer through binary search.
				80	*/
DRC	aee4f72	2014-08-09 23:06:07 +0000	[diff] [blame]	81
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	82	LOCAL(int)
DRC	fc5dc4f	2009-10-01 22:26:14 +0000	[diff] [blame]	83	flss (UINT16 val)
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	84	{
				85	int bit;
				86
				87	bit = 16;
				88
				89	if (!val)
				90	return 0;
				91
				92	if (!(val & 0xff00)) {
				93	bit -= 8;
				94	val <<= 8;
				95	}
				96	if (!(val & 0xf000)) {
				97	bit -= 4;
				98	val <<= 4;
				99	}
				100	if (!(val & 0xc000)) {
				101	bit -= 2;
				102	val <<= 2;
				103	}
				104	if (!(val & 0x8000)) {
				105	bit -= 1;
				106	val <<= 1;
				107	}
				108
				109	return bit;
				110	}
				111
DRC	aee4f72	2014-08-09 23:06:07 +0000	[diff] [blame]	112
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	113	/*
				114	* Compute values to do a division using reciprocal.
				115	*
				116	* This implementation is based on an algorithm described in
				117	* "How to optimize for the Pentium family of microprocessors"
				118	* (http://www.agner.org/assem/).
				119	* More information about the basic algorithm can be found in
				120	* the paper "Integer Division Using Reciprocals" by Robert Alverson.
				121	*
				122	* The basic idea is to replace x/d by x * d^-1. In order to store
				123	* d^-1 with enough precision we shift it left a few places. It turns
				124	* out that this algoright gives just enough precision, and also fits
				125	* into DCTELEM:
				126	*
				127	* b = (the number of significant bits in divisor) - 1
				128	* r = (word size) + b
				129	* f = 2^r / divisor
				130	*
				131	* f will not be an integer for most cases, so we need to compensate
				132	* for the rounding error introduced:
				133	*
				134	* no fractional part:
				135	*
				136	* result = input >> r
				137	*
				138	* fractional part of f < 0.5:
				139	*
				140	* round f down to nearest integer
				141	* result = ((input + 1) * f) >> r
				142	*
				143	* fractional part of f > 0.5:
				144	*
				145	* round f up to nearest integer
				146	* result = (input * f) >> r
				147	*
				148	* This is the original algorithm that gives truncated results. But we
				149	* want properly rounded results, so we replace "input" with
				150	* "input + divisor/2".
				151	*
				152	* In order to allow SIMD implementations we also tweak the values to
				153	* allow the same calculation to be made at all times:
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	154	*
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	155	* dctbl[0] = f rounded to nearest integer
				156	* dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
				157	* dctbl[2] = 1 << ((word size) * 2 - r)
				158	* dctbl[3] = r - (word size)
				159	*
				160	* dctbl[2] is for stupid instruction sets where the shift operation
				161	* isn't member wise (e.g. MMX).
				162	*
				163	* The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
				164	* is that most SIMD implementations have a "multiply and store top
				165	* half" operation.
				166	*
				167	* Lastly, we store each of the values in their own table instead
				168	* of in a consecutive manner, yet again in order to allow SIMD
				169	* routines.
				170	*/
DRC	aee4f72	2014-08-09 23:06:07 +0000	[diff] [blame]	171
DRC	a49c4e5	2011-02-18 20:50:08 +0000	[diff] [blame]	172	LOCAL(int)
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	173	compute_reciprocal (UINT16 divisor, DCTELEM * dtbl)
				174	{
				175	UDCTELEM2 fq, fr;
				176	UDCTELEM c;
				177	int b, r;
				178
DRC	b5a55e6	2015-08-29 18:05:43 -0500	[diff] [blame]	179	if (divisor == 1) {
				180	/* divisor == 1 means unquantized, so these reciprocal/correction/shift
				181	* values will cause the C quantization algorithm to act like the
				182	* identity function. Since only the C quantization algorithm is used in
				183	* these cases, the scale value is irrelevant.
				184	*/
				185	dtbl[DCTSIZE2 * 0] = (DCTELEM) 1; /* reciprocal */
				186	dtbl[DCTSIZE2 * 1] = (DCTELEM) 0; /* correction */
				187	dtbl[DCTSIZE2 * 2] = (DCTELEM) 1; /* scale */
				188	dtbl[DCTSIZE2 * 3] = (DCTELEM) (-sizeof(DCTELEM) * 8); /* shift */
				189	return 0;
				190	}
				191
DRC	fc5dc4f	2009-10-01 22:26:14 +0000	[diff] [blame]	192	b = flss(divisor) - 1;
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	193	r = sizeof(DCTELEM) * 8 + b;
				194
				195	fq = ((UDCTELEM2)1 << r) / divisor;
				196	fr = ((UDCTELEM2)1 << r) % divisor;
				197
				198	c = divisor / 2; /* for rounding */
				199
				200	if (fr == 0) { /* divisor is power of two */
				201	/* fq will be one bit too large to fit in DCTELEM, so adjust */
				202	fq >>= 1;
				203	r--;
DRC	d65d99a	2012-01-31 03:39:23 +0000	[diff] [blame]	204	} else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	205	c++;
				206	} else { /* fractional part is > 0.5 */
				207	fq++;
				208	}
				209
				210	dtbl[DCTSIZE2 * 0] = (DCTELEM) fq; /* reciprocal */
				211	dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */
				212	dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)82 - r)); /* scale */
				213	dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)8; / shift */
DRC	a49c4e5	2011-02-18 20:50:08 +0000	[diff] [blame]	214
				215	if(r <= 16) return 0;
				216	else return 1;
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	217	}
				218
DRC	aee4f72	2014-08-09 23:06:07 +0000	[diff] [blame]	219	#endif
				220
				221
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	222	/*
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	223	* Initialize for a processing pass.
				224	* Verify that all referenced Q-tables are present, and set up
				225	* the divisor table for each one.
				226	* In the current implementation, DCT of all components is done during
				227	* the first pass, even if only some components will be output in the
				228	* first scan. Hence all components should be examined here.
				229	*/
				230
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	231	METHODDEF(void)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	232	start_pass_fdctmgr (j_compress_ptr cinfo)
				233	{
				234	my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
				235	int ci, qtblno, i;
				236	jpeg_component_info *compptr;
				237	JQUANT_TBL * qtbl;
				238	DCTELEM * dtbl;
				239
				240	for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
				241	ci++, compptr++) {
				242	qtblno = compptr->quant_tbl_no;
				243	/* Make sure specified quantization table is present */
				244	if (qtblno < 0 \|\| qtblno >= NUM_QUANT_TBLS \|\|
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	245	cinfo->quant_tbl_ptrs[qtblno] == NULL)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	246	ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
				247	qtbl = cinfo->quant_tbl_ptrs[qtblno];
				248	/* Compute divisors for this quant table */
				249	/* We may do this more than once for same table, but it's not a big deal */
				250	switch (cinfo->dct_method) {
				251	#ifdef DCT_ISLOW_SUPPORTED
				252	case JDCT_ISLOW:
				253	/* For LL&M IDCT method, divisors are equal to raw quantization
				254	* coefficients multiplied by 8 (to counteract scaling).
				255	*/
				256	if (fdct->divisors[qtblno] == NULL) {
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	257	fdct->divisors[qtblno] = (DCTELEM *)
				258	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC	5de454b	2014-05-18 19:04:03 +0000	[diff] [blame]	259	(DCTSIZE2 * 4) * sizeof(DCTELEM));
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	260	}
				261	dtbl = fdct->divisors[qtblno];
				262	for (i = 0; i < DCTSIZE2; i++) {
DRC	aee4f72	2014-08-09 23:06:07 +0000	[diff] [blame]	263	#if BITS_IN_JSAMPLE == 8
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	264	if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i])
				265	&& fdct->quantize == jsimd_quantize)
				266	fdct->quantize = quantize;
DRC	aee4f72	2014-08-09 23:06:07 +0000	[diff] [blame]	267	#else
				268	dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
				269	#endif
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	270	}
				271	break;
				272	#endif
				273	#ifdef DCT_IFAST_SUPPORTED
				274	case JDCT_IFAST:
				275	{
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	276	/* For AA&N IDCT method, divisors are equal to quantization
				277	* coefficients scaled by scalefactor[row]*scalefactor[col], where
				278	* scalefactor[0] = 1
				279	* scalefactor[k] = cos(kPI/16) sqrt(2) for k=1..7
				280	* We apply a further scale factor of 8.
				281	*/
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	282	#define CONST_BITS 14
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	283	static const INT16 aanscales[DCTSIZE2] = {
				284	/* precomputed values scaled up by 14 bits */
				285	16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
				286	22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
				287	21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
				288	19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
				289	16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
				290	12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
				291	8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
				292	4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
				293	};
				294	SHIFT_TEMPS
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	295
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	296	if (fdct->divisors[qtblno] == NULL) {
				297	fdct->divisors[qtblno] = (DCTELEM *)
				298	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC	5de454b	2014-05-18 19:04:03 +0000	[diff] [blame]	299	(DCTSIZE2 * 4) * sizeof(DCTELEM));
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	300	}
				301	dtbl = fdct->divisors[qtblno];
				302	for (i = 0; i < DCTSIZE2; i++) {
DRC	aee4f72	2014-08-09 23:06:07 +0000	[diff] [blame]	303	#if BITS_IN_JSAMPLE == 8
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	304	if(!compute_reciprocal(
DRC	1e32fe3	2015-10-14 17:32:39 -0500	[diff] [blame^]	305	DESCALE(MULTIPLY16V16((JLONG) qtbl->quantval[i],
				306	(JLONG) aanscales[i]),
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	307	CONST_BITS-3), &dtbl[i])
				308	&& fdct->quantize == jsimd_quantize)
				309	fdct->quantize = quantize;
DRC	aee4f72	2014-08-09 23:06:07 +0000	[diff] [blame]	310	#else
				311	dtbl[i] = (DCTELEM)
DRC	1e32fe3	2015-10-14 17:32:39 -0500	[diff] [blame^]	312	DESCALE(MULTIPLY16V16((JLONG) qtbl->quantval[i],
				313	(JLONG) aanscales[i]),
DRC	aee4f72	2014-08-09 23:06:07 +0000	[diff] [blame]	314	CONST_BITS-3);
				315	#endif
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	316	}
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	317	}
				318	break;
				319	#endif
				320	#ifdef DCT_FLOAT_SUPPORTED
				321	case JDCT_FLOAT:
				322	{
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	323	/* For float AA&N IDCT method, divisors are equal to quantization
				324	* coefficients scaled by scalefactor[row]*scalefactor[col], where
				325	* scalefactor[0] = 1
				326	* scalefactor[k] = cos(kPI/16) sqrt(2) for k=1..7
				327	* We apply a further scale factor of 8.
				328	* What's actually stored is 1/divisor so that the inner loop can
				329	* use a multiplication rather than a division.
				330	*/
				331	FAST_FLOAT * fdtbl;
				332	int row, col;
				333	static const double aanscalefactor[DCTSIZE] = {
				334	1.0, 1.387039845, 1.306562965, 1.175875602,
				335	1.0, 0.785694958, 0.541196100, 0.275899379
				336	};
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	337
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	338	if (fdct->float_divisors[qtblno] == NULL) {
				339	fdct->float_divisors[qtblno] = (FAST_FLOAT *)
				340	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC	5de454b	2014-05-18 19:04:03 +0000	[diff] [blame]	341	DCTSIZE2 * sizeof(FAST_FLOAT));
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	342	}
				343	fdtbl = fdct->float_divisors[qtblno];
				344	i = 0;
				345	for (row = 0; row < DCTSIZE; row++) {
				346	for (col = 0; col < DCTSIZE; col++) {
				347	fdtbl[i] = (FAST_FLOAT)
				348	(1.0 / (((double) qtbl->quantval[i] *
				349	aanscalefactor[row] * aanscalefactor[col] * 8.0)));
				350	i++;
				351	}
				352	}
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	353	}
				354	break;
				355	#endif
				356	default:
				357	ERREXIT(cinfo, JERR_NOT_COMPILED);
				358	break;
				359	}
				360	}
				361	}
				362
				363
				364	/*
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	365	* Load data into workspace, applying unsigned->signed conversion.
				366	*/
				367
				368	METHODDEF(void)
				369	convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)
				370	{
				371	register DCTELEM *workspaceptr;
				372	register JSAMPROW elemptr;
				373	register int elemr;
				374
				375	workspaceptr = workspace;
				376	for (elemr = 0; elemr < DCTSIZE; elemr++) {
				377	elemptr = sample_data[elemr] + start_col;
				378
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	379	#if DCTSIZE == 8 /* unroll the inner loop */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	380	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				381	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				382	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				383	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				384	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				385	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				386	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				387	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				388	#else
				389	{
				390	register int elemc;
				391	for (elemc = DCTSIZE; elemc > 0; elemc--)
				392	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				393	}
				394	#endif
				395	}
				396	}
				397
				398
				399	/*
				400	* Quantize/descale the coefficients, and store into coef_blocks[].
				401	*/
				402
				403	METHODDEF(void)
				404	quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
				405	{
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	406	int i;
				407	DCTELEM temp;
DRC	aee4f72	2014-08-09 23:06:07 +0000	[diff] [blame]	408	JCOEFPTR output_ptr = coef_block;
				409
				410	#if BITS_IN_JSAMPLE == 8
				411
DRC	b5a55e6	2015-08-29 18:05:43 -0500	[diff] [blame]	412	UDCTELEM recip, corr;
				413	int shift;
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	414	UDCTELEM2 product;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	415
				416	for (i = 0; i < DCTSIZE2; i++) {
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	417	temp = workspace[i];
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	418	recip = divisors[i + DCTSIZE2 * 0];
				419	corr = divisors[i + DCTSIZE2 * 1];
				420	shift = divisors[i + DCTSIZE2 * 3];
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	421
				422	if (temp < 0) {
				423	temp = -temp;
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	424	product = (UDCTELEM2)(temp + corr) * recip;
				425	product >>= shift + sizeof(DCTELEM)*8;
				426	temp = product;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	427	temp = -temp;
				428	} else {
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	429	product = (UDCTELEM2)(temp + corr) * recip;
				430	product >>= shift + sizeof(DCTELEM)*8;
				431	temp = product;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	432	}
				433	output_ptr[i] = (JCOEF) temp;
				434	}
DRC	aee4f72	2014-08-09 23:06:07 +0000	[diff] [blame]	435
				436	#else
				437
				438	register DCTELEM qval;
				439
				440	for (i = 0; i < DCTSIZE2; i++) {
				441	qval = divisors[i];
				442	temp = workspace[i];
				443	/* Divide the coefficient value by qval, ensuring proper rounding.
				444	* Since C does not specify the direction of rounding for negative
				445	* quotients, we have to force the dividend positive for portability.
				446	*
				447	* In most files, at least half of the output values will be zero
				448	* (at default quantization settings, more like three-quarters...)
				449	* so we should ensure that this case is fast. On many machines,
				450	* a comparison is enough cheaper than a divide to make a special test
				451	* a win. Since both inputs will be nonnegative, we need only test
				452	* for a < b to discover whether a/b is 0.
				453	* If your machine's division is fast enough, define FAST_DIVIDE.
				454	*/
				455	#ifdef FAST_DIVIDE
				456	#define DIVIDE_BY(a,b) a /= b
				457	#else
				458	#define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0
				459	#endif
				460	if (temp < 0) {
				461	temp = -temp;
				462	temp += qval>>1; /* for rounding */
				463	DIVIDE_BY(temp, qval);
DRC	eca0637	2014-11-06 09:32:38 +0000	[diff] [blame]	464	temp = -temp;
DRC	aee4f72	2014-08-09 23:06:07 +0000	[diff] [blame]	465	} else {
				466	temp += qval>>1; /* for rounding */
				467	DIVIDE_BY(temp, qval);
				468	}
				469	output_ptr[i] = (JCOEF) temp;
				470	}
				471
				472	#endif
				473
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	474	}
				475
				476
				477	/*
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	478	* Perform forward DCT on one or more blocks of a component.
				479	*
				480	* The input samples are taken from the sample_data[] array starting at
				481	* position start_row/start_col, and moving to the right for any additional
Thomas G. Lane	bc79e06	1995-08-02 00:00:00 +0000	[diff] [blame]	482	* blocks. The quantized coefficients are returned in coef_blocks[].
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	483	*/
				484
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	485	METHODDEF(void)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	486	forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	487	JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
				488	JDIMENSION start_row, JDIMENSION start_col,
				489	JDIMENSION num_blocks)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	490	/* This version is used for integer DCT implementations. */
				491	{
				492	/* This routine is heavily used, so it's worth coding it tightly. */
				493	my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	494	DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	495	DCTELEM * workspace;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	496	JDIMENSION bi;
				497
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	498	/* Make sure the compiler doesn't look up these every pass */
				499	forward_DCT_method_ptr do_dct = fdct->dct;
				500	convsamp_method_ptr do_convsamp = fdct->convsamp;
				501	quantize_method_ptr do_quantize = fdct->quantize;
Pierre Ossman	dc5db14	2009-03-13 12:17:26 +0000	[diff] [blame]	502	workspace = fdct->workspace;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	503
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	504	sample_data += start_row; /* fold in the vertical offset once */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	505
				506	for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
				507	/* Load data into workspace, applying unsigned->signed conversion */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	508	(*do_convsamp) (sample_data, start_col, workspace);
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	509
				510	/* Perform the DCT */
				511	(*do_dct) (workspace);
				512
				513	/* Quantize/descale the coefficients, and store into coef_blocks[] */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	514	(*do_quantize) (coef_blocks[bi], divisors, workspace);
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	515	}
				516	}
				517
				518
				519	#ifdef DCT_FLOAT_SUPPORTED
				520
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	521
				522	METHODDEF(void)
				523	convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)
				524	{
				525	register FAST_FLOAT *workspaceptr;
				526	register JSAMPROW elemptr;
				527	register int elemr;
				528
				529	workspaceptr = workspace;
				530	for (elemr = 0; elemr < DCTSIZE; elemr++) {
				531	elemptr = sample_data[elemr] + start_col;
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	532	#if DCTSIZE == 8 /* unroll the inner loop */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	533	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				534	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				535	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				536	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				537	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				538	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				539	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				540	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				541	#else
				542	{
				543	register int elemc;
				544	for (elemc = DCTSIZE; elemc > 0; elemc--)
				545	*workspaceptr++ = (FAST_FLOAT)
				546	(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
				547	}
				548	#endif
				549	}
				550	}
				551
				552
				553	METHODDEF(void)
				554	quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)
				555	{
				556	register FAST_FLOAT temp;
				557	register int i;
				558	register JCOEFPTR output_ptr = coef_block;
				559
				560	for (i = 0; i < DCTSIZE2; i++) {
				561	/* Apply the quantization and scaling factor */
				562	temp = workspace[i] * divisors[i];
				563
				564	/* Round to nearest integer.
				565	* Since C does not specify the direction of rounding for negative
				566	* quotients, we have to force the dividend positive for portability.
				567	* The maximum coefficient size is +-16K (for 12-bit data), so this
				568	* code should work for either 16-bit or 32-bit ints.
				569	*/
				570	output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
				571	}
				572	}
				573
				574
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	575	METHODDEF(void)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	576	forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	577	JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
				578	JDIMENSION start_row, JDIMENSION start_col,
				579	JDIMENSION num_blocks)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	580	/* This version is used for floating-point DCT implementations. */
				581	{
				582	/* This routine is heavily used, so it's worth coding it tightly. */
				583	my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	584	FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	585	FAST_FLOAT * workspace;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	586	JDIMENSION bi;
				587
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	588
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	589	/* Make sure the compiler doesn't look up these every pass */
				590	float_DCT_method_ptr do_dct = fdct->float_dct;
				591	float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
				592	float_quantize_method_ptr do_quantize = fdct->float_quantize;
Pierre Ossman	dc5db14	2009-03-13 12:17:26 +0000	[diff] [blame]	593	workspace = fdct->float_workspace;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	594
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	595	sample_data += start_row; /* fold in the vertical offset once */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	596
				597	for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
				598	/* Load data into workspace, applying unsigned->signed conversion */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	599	(*do_convsamp) (sample_data, start_col, workspace);
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	600
				601	/* Perform the DCT */
				602	(*do_dct) (workspace);
				603
				604	/* Quantize/descale the coefficients, and store into coef_blocks[] */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	605	(*do_quantize) (coef_blocks[bi], divisors, workspace);
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	606	}
				607	}
				608
				609	#endif /* DCT_FLOAT_SUPPORTED */
				610
				611
				612	/*
				613	* Initialize FDCT manager.
				614	*/
				615
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	616	GLOBAL(void)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	617	jinit_forward_dct (j_compress_ptr cinfo)
				618	{
				619	my_fdct_ptr fdct;
				620	int i;
				621
				622	fdct = (my_fdct_ptr)
				623	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC	5de454b	2014-05-18 19:04:03 +0000	[diff] [blame]	624	sizeof(my_fdct_controller));
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	625	cinfo->fdct = (struct jpeg_forward_dct *) fdct;
				626	fdct->pub.start_pass = start_pass_fdctmgr;
				627
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	628	/* First determine the DCT... */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	629	switch (cinfo->dct_method) {
				630	#ifdef DCT_ISLOW_SUPPORTED
				631	case JDCT_ISLOW:
				632	fdct->pub.forward_DCT = forward_DCT;
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	633	if (jsimd_can_fdct_islow())
				634	fdct->dct = jsimd_fdct_islow;
				635	else
				636	fdct->dct = jpeg_fdct_islow;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	637	break;
				638	#endif
				639	#ifdef DCT_IFAST_SUPPORTED
				640	case JDCT_IFAST:
				641	fdct->pub.forward_DCT = forward_DCT;
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	642	if (jsimd_can_fdct_ifast())
				643	fdct->dct = jsimd_fdct_ifast;
				644	else
				645	fdct->dct = jpeg_fdct_ifast;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	646	break;
				647	#endif
				648	#ifdef DCT_FLOAT_SUPPORTED
				649	case JDCT_FLOAT:
				650	fdct->pub.forward_DCT = forward_DCT_float;
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	651	if (jsimd_can_fdct_float())
				652	fdct->float_dct = jsimd_fdct_float;
				653	else
				654	fdct->float_dct = jpeg_fdct_float;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	655	break;
				656	#endif
				657	default:
				658	ERREXIT(cinfo, JERR_NOT_COMPILED);
				659	break;
				660	}
				661
				662	/* ...then the supporting stages. */
				663	switch (cinfo->dct_method) {
				664	#ifdef DCT_ISLOW_SUPPORTED
				665	case JDCT_ISLOW:
				666	#endif
				667	#ifdef DCT_IFAST_SUPPORTED
				668	case JDCT_IFAST:
				669	#endif
				670	#if defined(DCT_ISLOW_SUPPORTED) \|\| defined(DCT_IFAST_SUPPORTED)
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	671	if (jsimd_can_convsamp())
				672	fdct->convsamp = jsimd_convsamp;
				673	else
				674	fdct->convsamp = convsamp;
				675	if (jsimd_can_quantize())
				676	fdct->quantize = jsimd_quantize;
				677	else
				678	fdct->quantize = quantize;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	679	break;
				680	#endif
				681	#ifdef DCT_FLOAT_SUPPORTED
				682	case JDCT_FLOAT:
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	683	if (jsimd_can_convsamp_float())
				684	fdct->float_convsamp = jsimd_convsamp_float;
				685	else
				686	fdct->float_convsamp = convsamp_float;
				687	if (jsimd_can_quantize_float())
				688	fdct->float_quantize = jsimd_quantize_float;
				689	else
				690	fdct->float_quantize = quantize_float;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	691	break;
				692	#endif
				693	default:
				694	ERREXIT(cinfo, JERR_NOT_COMPILED);
				695	break;
				696	}
				697
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	698	/* Allocate workspace memory */
				699	#ifdef DCT_FLOAT_SUPPORTED
				700	if (cinfo->dct_method == JDCT_FLOAT)
				701	fdct->float_workspace = (FAST_FLOAT *)
				702	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC	5de454b	2014-05-18 19:04:03 +0000	[diff] [blame]	703	sizeof(FAST_FLOAT) * DCTSIZE2);
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	704	else
				705	#endif
				706	fdct->workspace = (DCTELEM *)
				707	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC	5de454b	2014-05-18 19:04:03 +0000	[diff] [blame]	708	sizeof(DCTELEM) * DCTSIZE2);
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	709
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	710	/* Mark divisor tables unallocated */
				711	for (i = 0; i < NUM_QUANT_TBLS; i++) {
				712	fdct->divisors[i] = NULL;
				713	#ifdef DCT_FLOAT_SUPPORTED
				714	fdct->float_divisors[i] = NULL;
				715	#endif
				716	}
				717	}