Blame - jcdctmgr.c - platform/external/libjpeg-turbo

blob: 7dae17a6e1494457638908fddb475689c9346f48 [file] [log] [blame]

hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	1	/*
				2	* jcdctmgr.c
				3	*
noel@chromium.org	3395bcc	2014-04-14 06:56:00 +0000	[diff] [blame]	4	* This file was part of the Independent JPEG Group's software:
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	5	* Copyright (C) 1994-1996, Thomas G. Lane.
noel@chromium.org	3395bcc	2014-04-14 06:56:00 +0000	[diff] [blame]	6	* libjpeg-turbo Modifications:
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	7	* Copyright (C) 1999-2006, MIYASAKA Masaru.
				8	* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	9	* Copyright (C) 2011, 2014-2015, D. R. Commander.
				10	* For conditions of distribution and use, see the accompanying README.ijg
				11	* file.
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	12	*
				13	* This file contains the forward-DCT management logic.
				14	* This code selects a particular DCT implementation to be used,
				15	* and it performs related housekeeping chores including coefficient
				16	* quantization.
				17	*/
				18
				19	#define JPEG_INTERNALS
				20	#include "jinclude.h"
				21	#include "jpeglib.h"
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	22	#include "jdct.h" /* Private declarations for DCT subsystem */
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	23	#include "jsimddct.h"
				24
				25
				26	/* Private subobject for this module */
				27
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	28	typedef void (forward_DCT_method_ptr) (DCTELEM data);
				29	typedef void (float_DCT_method_ptr) (FAST_FLOAT data);
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	30
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	31	typedef void (*convsamp_method_ptr) (JSAMPARRAY sample_data,
				32	JDIMENSION start_col,
				33	DCTELEM *workspace);
				34	typedef void (*float_convsamp_method_ptr) (JSAMPARRAY sample_data,
				35	JDIMENSION start_col,
				36	FAST_FLOAT *workspace);
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	37
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	38	typedef void (quantize_method_ptr) (JCOEFPTR coef_block, DCTELEM divisors,
				39	DCTELEM *workspace);
				40	typedef void (*float_quantize_method_ptr) (JCOEFPTR coef_block,
				41	FAST_FLOAT *divisors,
				42	FAST_FLOAT *workspace);
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	43
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	44	METHODDEF(void) quantize(JCOEFPTR, DCTELEM , DCTELEM );
hbono@chromium.org	9862697	2011-08-03 03:13:08 +0000	[diff] [blame]	45
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	46	typedef struct {
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	47	struct jpeg_forward_dct pub; /* public fields */
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	48
				49	/* Pointer to the DCT routine actually in use */
				50	forward_DCT_method_ptr dct;
				51	convsamp_method_ptr convsamp;
				52	quantize_method_ptr quantize;
				53
				54	/* The actual post-DCT divisors --- not identical to the quant table
				55	* entries, because of scaling (especially for an unnormalized DCT).
				56	* Each table is given in normal array order.
				57	*/
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	58	DCTELEM *divisors[NUM_QUANT_TBLS];
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	59
				60	/* work area for FDCT subroutine */
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	61	DCTELEM *workspace;
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	62
				63	#ifdef DCT_FLOAT_SUPPORTED
				64	/* Same as above for the floating-point case. */
				65	float_DCT_method_ptr float_dct;
				66	float_convsamp_method_ptr float_convsamp;
				67	float_quantize_method_ptr float_quantize;
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	68	FAST_FLOAT *float_divisors[NUM_QUANT_TBLS];
				69	FAST_FLOAT *float_workspace;
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	70	#endif
				71	} my_fdct_controller;
				72
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	73	typedef my_fdct_controller *my_fdct_ptr;
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	74
				75
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	76	#if BITS_IN_JSAMPLE == 8
				77
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	78	/*
				79	* Find the highest bit in an integer through binary search.
				80	*/
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	81
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	82	LOCAL(int)
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	83	flss(UINT16 val)
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	84	{
				85	int bit;
				86
				87	bit = 16;
				88
				89	if (!val)
				90	return 0;
				91
				92	if (!(val & 0xff00)) {
				93	bit -= 8;
				94	val <<= 8;
				95	}
				96	if (!(val & 0xf000)) {
				97	bit -= 4;
				98	val <<= 4;
				99	}
				100	if (!(val & 0xc000)) {
				101	bit -= 2;
				102	val <<= 2;
				103	}
				104	if (!(val & 0x8000)) {
				105	bit -= 1;
				106	val <<= 1;
				107	}
				108
				109	return bit;
				110	}
				111
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	112
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	113	/*
				114	* Compute values to do a division using reciprocal.
				115	*
				116	* This implementation is based on an algorithm described in
				117	* "How to optimize for the Pentium family of microprocessors"
				118	* (http://www.agner.org/assem/).
				119	* More information about the basic algorithm can be found in
				120	* the paper "Integer Division Using Reciprocals" by Robert Alverson.
				121	*
				122	* The basic idea is to replace x/d by x * d^-1. In order to store
				123	* d^-1 with enough precision we shift it left a few places. It turns
				124	* out that this algoright gives just enough precision, and also fits
				125	* into DCTELEM:
				126	*
				127	* b = (the number of significant bits in divisor) - 1
				128	* r = (word size) + b
				129	* f = 2^r / divisor
				130	*
				131	* f will not be an integer for most cases, so we need to compensate
				132	* for the rounding error introduced:
				133	*
				134	* no fractional part:
				135	*
				136	* result = input >> r
				137	*
				138	* fractional part of f < 0.5:
				139	*
				140	* round f down to nearest integer
				141	* result = ((input + 1) * f) >> r
				142	*
				143	* fractional part of f > 0.5:
				144	*
				145	* round f up to nearest integer
				146	* result = (input * f) >> r
				147	*
				148	* This is the original algorithm that gives truncated results. But we
				149	* want properly rounded results, so we replace "input" with
				150	* "input + divisor/2".
				151	*
				152	* In order to allow SIMD implementations we also tweak the values to
				153	* allow the same calculation to be made at all times:
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	154	*
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	155	* dctbl[0] = f rounded to nearest integer
				156	* dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
				157	* dctbl[2] = 1 << ((word size) * 2 - r)
				158	* dctbl[3] = r - (word size)
				159	*
				160	* dctbl[2] is for stupid instruction sets where the shift operation
				161	* isn't member wise (e.g. MMX).
				162	*
				163	* The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
				164	* is that most SIMD implementations have a "multiply and store top
				165	* half" operation.
				166	*
				167	* Lastly, we store each of the values in their own table instead
				168	* of in a consecutive manner, yet again in order to allow SIMD
				169	* routines.
				170	*/
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	171
hbono@chromium.org	9862697	2011-08-03 03:13:08 +0000	[diff] [blame]	172	LOCAL(int)
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	173	compute_reciprocal(UINT16 divisor, DCTELEM *dtbl)
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	174	{
				175	UDCTELEM2 fq, fr;
				176	UDCTELEM c;
				177	int b, r;
				178
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	179	if (divisor == 1) {
				180	/* divisor == 1 means unquantized, so these reciprocal/correction/shift
				181	* values will cause the C quantization algorithm to act like the
				182	* identity function. Since only the C quantization algorithm is used in
				183	* these cases, the scale value is irrelevant.
				184	*/
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	185	dtbl[DCTSIZE2 * 0] = (DCTELEM)1; /* reciprocal */
				186	dtbl[DCTSIZE2 * 1] = (DCTELEM)0; /* correction */
				187	dtbl[DCTSIZE2 * 2] = (DCTELEM)1; /* scale */
				188	dtbl[DCTSIZE2 * 3] = -(DCTELEM)(sizeof(DCTELEM) * 8); /* shift */
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	189	return 0;
				190	}
				191
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	192	b = flss(divisor) - 1;
				193	r = sizeof(DCTELEM) * 8 + b;
				194
				195	fq = ((UDCTELEM2)1 << r) / divisor;
				196	fr = ((UDCTELEM2)1 << r) % divisor;
				197
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	198	c = divisor / 2; /* for rounding */
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	199
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	200	if (fr == 0) { /* divisor is power of two */
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	201	/* fq will be one bit too large to fit in DCTELEM, so adjust */
				202	fq >>= 1;
				203	r--;
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	204	} else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	205	c++;
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	206	} else { /* fractional part is > 0.5 */
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	207	fq++;
				208	}
				209
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	210	dtbl[DCTSIZE2 * 0] = (DCTELEM)fq; /* reciprocal */
				211	dtbl[DCTSIZE2 * 1] = (DCTELEM)c; /* correction + roundfactor */
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	212	#ifdef WITH_SIMD
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	213	dtbl[DCTSIZE2 * 2] = (DCTELEM)(1 << (sizeof(DCTELEM) * 8 * 2 - r)); /* scale */
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	214	#else
				215	dtbl[DCTSIZE2 * 2] = 1;
				216	#endif
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	217	dtbl[DCTSIZE2 * 3] = (DCTELEM)r - sizeof(DCTELEM) * 8; /* shift */
hbono@chromium.org	9862697	2011-08-03 03:13:08 +0000	[diff] [blame]	218
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	219	if (r <= 16) return 0;
hbono@chromium.org	9862697	2011-08-03 03:13:08 +0000	[diff] [blame]	220	else return 1;
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	221	}
				222
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	223	#endif
				224
				225
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	226	/*
				227	* Initialize for a processing pass.
				228	* Verify that all referenced Q-tables are present, and set up
				229	* the divisor table for each one.
				230	* In the current implementation, DCT of all components is done during
				231	* the first pass, even if only some components will be output in the
				232	* first scan. Hence all components should be examined here.
				233	*/
				234
				235	METHODDEF(void)
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	236	start_pass_fdctmgr(j_compress_ptr cinfo)
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	237	{
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	238	my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	239	int ci, qtblno, i;
				240	jpeg_component_info *compptr;
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	241	JQUANT_TBL *qtbl;
				242	DCTELEM *dtbl;
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	243
				244	for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
				245	ci++, compptr++) {
				246	qtblno = compptr->quant_tbl_no;
				247	/* Make sure specified quantization table is present */
				248	if (qtblno < 0 \|\| qtblno >= NUM_QUANT_TBLS \|\|
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	249	cinfo->quant_tbl_ptrs[qtblno] == NULL)
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	250	ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
				251	qtbl = cinfo->quant_tbl_ptrs[qtblno];
				252	/* Compute divisors for this quant table */
				253	/* We may do this more than once for same table, but it's not a big deal */
				254	switch (cinfo->dct_method) {
				255	#ifdef DCT_ISLOW_SUPPORTED
				256	case JDCT_ISLOW:
				257	/* For LL&M IDCT method, divisors are equal to raw quantization
				258	* coefficients multiplied by 8 (to counteract scaling).
				259	*/
				260	if (fdct->divisors[qtblno] == NULL) {
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	261	fdct->divisors[qtblno] = (DCTELEM *)
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	262	(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	263	(DCTSIZE2 * 4) * sizeof(DCTELEM));
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	264	}
				265	dtbl = fdct->divisors[qtblno];
				266	for (i = 0; i < DCTSIZE2; i++) {
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	267	#if BITS_IN_JSAMPLE == 8
				268	if (!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) &&
				269	fdct->quantize == jsimd_quantize)
				270	fdct->quantize = quantize;
				271	#else
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	272	dtbl[i] = ((DCTELEM)qtbl->quantval[i]) << 3;
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	273	#endif
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	274	}
				275	break;
				276	#endif
				277	#ifdef DCT_IFAST_SUPPORTED
				278	case JDCT_IFAST:
				279	{
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	280	/* For AA&N IDCT method, divisors are equal to quantization
				281	* coefficients scaled by scalefactor[row]*scalefactor[col], where
				282	* scalefactor[0] = 1
				283	* scalefactor[k] = cos(kPI/16) sqrt(2) for k=1..7
				284	* We apply a further scale factor of 8.
				285	*/
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	286	#define CONST_BITS 14
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	287	static const INT16 aanscales[DCTSIZE2] = {
				288	/* precomputed values scaled up by 14 bits */
				289	16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
				290	22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
				291	21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
				292	19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
				293	16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
				294	12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
				295	8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
				296	4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
				297	};
				298	SHIFT_TEMPS
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	299
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	300	if (fdct->divisors[qtblno] == NULL) {
				301	fdct->divisors[qtblno] = (DCTELEM *)
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	302	(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	303	(DCTSIZE2 * 4) * sizeof(DCTELEM));
				304	}
				305	dtbl = fdct->divisors[qtblno];
				306	for (i = 0; i < DCTSIZE2; i++) {
				307	#if BITS_IN_JSAMPLE == 8
				308	if (!compute_reciprocal(
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	309	DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
				310	(JLONG)aanscales[i]),
				311	CONST_BITS - 3), &dtbl[i]) &&
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	312	fdct->quantize == jsimd_quantize)
				313	fdct->quantize = quantize;
				314	#else
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	315	dtbl[i] = (DCTELEM)
				316	DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
				317	(JLONG)aanscales[i]),
				318	CONST_BITS - 3);
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	319	#endif
				320	}
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	321	}
				322	break;
				323	#endif
				324	#ifdef DCT_FLOAT_SUPPORTED
				325	case JDCT_FLOAT:
				326	{
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	327	/* For float AA&N IDCT method, divisors are equal to quantization
				328	* coefficients scaled by scalefactor[row]*scalefactor[col], where
				329	* scalefactor[0] = 1
				330	* scalefactor[k] = cos(kPI/16) sqrt(2) for k=1..7
				331	* We apply a further scale factor of 8.
				332	* What's actually stored is 1/divisor so that the inner loop can
				333	* use a multiplication rather than a division.
				334	*/
				335	FAST_FLOAT *fdtbl;
				336	int row, col;
				337	static const double aanscalefactor[DCTSIZE] = {
				338	1.0, 1.387039845, 1.306562965, 1.175875602,
				339	1.0, 0.785694958, 0.541196100, 0.275899379
				340	};
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	341
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	342	if (fdct->float_divisors[qtblno] == NULL) {
				343	fdct->float_divisors[qtblno] = (FAST_FLOAT *)
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	344	(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	345	DCTSIZE2 * sizeof(FAST_FLOAT));
				346	}
				347	fdtbl = fdct->float_divisors[qtblno];
				348	i = 0;
				349	for (row = 0; row < DCTSIZE; row++) {
				350	for (col = 0; col < DCTSIZE; col++) {
				351	fdtbl[i] = (FAST_FLOAT)
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	352	(1.0 / (((double)qtbl->quantval[i] *
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	353	aanscalefactor[row] * aanscalefactor[col] * 8.0)));
				354	i++;
				355	}
				356	}
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	357	}
				358	break;
				359	#endif
				360	default:
				361	ERREXIT(cinfo, JERR_NOT_COMPILED);
				362	break;
				363	}
				364	}
				365	}
				366
				367
				368	/*
				369	* Load data into workspace, applying unsigned->signed conversion.
				370	*/
				371
				372	METHODDEF(void)
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	373	convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace)
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	374	{
				375	register DCTELEM *workspaceptr;
				376	register JSAMPROW elemptr;
				377	register int elemr;
				378
				379	workspaceptr = workspace;
				380	for (elemr = 0; elemr < DCTSIZE; elemr++) {
				381	elemptr = sample_data[elemr] + start_col;
				382
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	383	#if DCTSIZE == 8 /* unroll the inner loop */
Jonathan Wright	bbb8282	2020-11-25 13:36:43 +0000	[diff] [blame]	384	workspaceptr++ = (elemptr++) - CENTERJSAMPLE;
				385	workspaceptr++ = (elemptr++) - CENTERJSAMPLE;
				386	workspaceptr++ = (elemptr++) - CENTERJSAMPLE;
				387	workspaceptr++ = (elemptr++) - CENTERJSAMPLE;
				388	workspaceptr++ = (elemptr++) - CENTERJSAMPLE;
				389	workspaceptr++ = (elemptr++) - CENTERJSAMPLE;
				390	workspaceptr++ = (elemptr++) - CENTERJSAMPLE;
				391	workspaceptr++ = (elemptr++) - CENTERJSAMPLE;
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	392	#else
				393	{
				394	register int elemc;
				395	for (elemc = DCTSIZE; elemc > 0; elemc--)
Jonathan Wright	bbb8282	2020-11-25 13:36:43 +0000	[diff] [blame]	396	workspaceptr++ = (elemptr++) - CENTERJSAMPLE;
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	397	}
				398	#endif
				399	}
				400	}
				401
				402
				403	/*
				404	* Quantize/descale the coefficients, and store into coef_blocks[].
				405	*/
				406
				407	METHODDEF(void)
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	408	quantize(JCOEFPTR coef_block, DCTELEM divisors, DCTELEM workspace)
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	409	{
				410	int i;
				411	DCTELEM temp;
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	412	JCOEFPTR output_ptr = coef_block;
				413
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	414	#if BITS_IN_JSAMPLE == 8
				415
				416	UDCTELEM recip, corr;
				417	int shift;
				418	UDCTELEM2 product;
				419
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	420	for (i = 0; i < DCTSIZE2; i++) {
				421	temp = workspace[i];
				422	recip = divisors[i + DCTSIZE2 * 0];
				423	corr = divisors[i + DCTSIZE2 * 1];
				424	shift = divisors[i + DCTSIZE2 * 3];
				425
				426	if (temp < 0) {
				427	temp = -temp;
				428	product = (UDCTELEM2)(temp + corr) * recip;
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	429	product >>= shift + sizeof(DCTELEM) * 8;
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	430	temp = (DCTELEM)product;
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	431	temp = -temp;
				432	} else {
				433	product = (UDCTELEM2)(temp + corr) * recip;
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	434	product >>= shift + sizeof(DCTELEM) * 8;
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	435	temp = (DCTELEM)product;
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	436	}
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	437	output_ptr[i] = (JCOEF)temp;
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	438	}
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	439
				440	#else
				441
				442	register DCTELEM qval;
				443
				444	for (i = 0; i < DCTSIZE2; i++) {
				445	qval = divisors[i];
				446	temp = workspace[i];
				447	/* Divide the coefficient value by qval, ensuring proper rounding.
				448	* Since C does not specify the direction of rounding for negative
				449	* quotients, we have to force the dividend positive for portability.
				450	*
				451	* In most files, at least half of the output values will be zero
				452	* (at default quantization settings, more like three-quarters...)
				453	* so we should ensure that this case is fast. On many machines,
				454	* a comparison is enough cheaper than a divide to make a special test
				455	* a win. Since both inputs will be nonnegative, we need only test
				456	* for a < b to discover whether a/b is 0.
				457	* If your machine's division is fast enough, define FAST_DIVIDE.
				458	*/
				459	#ifdef FAST_DIVIDE
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	460	#define DIVIDE_BY(a, b) a /= b
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	461	#else
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	462	#define DIVIDE_BY(a, b) if (a >= b) a /= b; else a = 0
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	463	#endif
				464	if (temp < 0) {
				465	temp = -temp;
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	466	temp += qval >> 1; /* for rounding */
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	467	DIVIDE_BY(temp, qval);
				468	temp = -temp;
				469	} else {
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	470	temp += qval >> 1; /* for rounding */
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	471	DIVIDE_BY(temp, qval);
				472	}
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	473	output_ptr[i] = (JCOEF)temp;
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	474	}
				475
				476	#endif
				477
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	478	}
				479
				480
				481	/*
				482	* Perform forward DCT on one or more blocks of a component.
				483	*
				484	* The input samples are taken from the sample_data[] array starting at
				485	* position start_row/start_col, and moving to the right for any additional
				486	* blocks. The quantized coefficients are returned in coef_blocks[].
				487	*/
				488
				489	METHODDEF(void)
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	490	forward_DCT(j_compress_ptr cinfo, jpeg_component_info *compptr,
				491	JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
				492	JDIMENSION start_row, JDIMENSION start_col, JDIMENSION num_blocks)
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	493	/* This version is used for integer DCT implementations. */
				494	{
				495	/* This routine is heavily used, so it's worth coding it tightly. */
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	496	my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	497	DCTELEM *divisors = fdct->divisors[compptr->quant_tbl_no];
				498	DCTELEM *workspace;
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	499	JDIMENSION bi;
				500
				501	/* Make sure the compiler doesn't look up these every pass */
				502	forward_DCT_method_ptr do_dct = fdct->dct;
				503	convsamp_method_ptr do_convsamp = fdct->convsamp;
				504	quantize_method_ptr do_quantize = fdct->quantize;
				505	workspace = fdct->workspace;
				506
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	507	sample_data += start_row; /* fold in the vertical offset once */
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	508
				509	for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
				510	/* Load data into workspace, applying unsigned->signed conversion */
				511	(*do_convsamp) (sample_data, start_col, workspace);
				512
				513	/* Perform the DCT */
				514	(*do_dct) (workspace);
				515
				516	/* Quantize/descale the coefficients, and store into coef_blocks[] */
				517	(*do_quantize) (coef_blocks[bi], divisors, workspace);
				518	}
				519	}
				520
				521
				522	#ifdef DCT_FLOAT_SUPPORTED
				523
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	524	METHODDEF(void)
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	525	convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
				526	FAST_FLOAT *workspace)
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	527	{
				528	register FAST_FLOAT *workspaceptr;
				529	register JSAMPROW elemptr;
				530	register int elemr;
				531
				532	workspaceptr = workspace;
				533	for (elemr = 0; elemr < DCTSIZE; elemr++) {
				534	elemptr = sample_data[elemr] + start_col;
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	535	#if DCTSIZE == 8 /* unroll the inner loop */
Jonathan Wright	bbb8282	2020-11-25 13:36:43 +0000	[diff] [blame]	536	workspaceptr++ = (FAST_FLOAT)((elemptr++) - CENTERJSAMPLE);
				537	workspaceptr++ = (FAST_FLOAT)((elemptr++) - CENTERJSAMPLE);
				538	workspaceptr++ = (FAST_FLOAT)((elemptr++) - CENTERJSAMPLE);
				539	workspaceptr++ = (FAST_FLOAT)((elemptr++) - CENTERJSAMPLE);
				540	workspaceptr++ = (FAST_FLOAT)((elemptr++) - CENTERJSAMPLE);
				541	workspaceptr++ = (FAST_FLOAT)((elemptr++) - CENTERJSAMPLE);
				542	workspaceptr++ = (FAST_FLOAT)((elemptr++) - CENTERJSAMPLE);
				543	workspaceptr++ = (FAST_FLOAT)((elemptr++) - CENTERJSAMPLE);
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	544	#else
				545	{
				546	register int elemc;
				547	for (elemc = DCTSIZE; elemc > 0; elemc--)
Jonathan Wright	bbb8282	2020-11-25 13:36:43 +0000	[diff] [blame]	548	workspaceptr++ = (FAST_FLOAT)((elemptr++) - CENTERJSAMPLE);
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	549	}
				550	#endif
				551	}
				552	}
				553
				554
				555	METHODDEF(void)
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	556	quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
				557	FAST_FLOAT *workspace)
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	558	{
				559	register FAST_FLOAT temp;
				560	register int i;
				561	register JCOEFPTR output_ptr = coef_block;
				562
				563	for (i = 0; i < DCTSIZE2; i++) {
				564	/* Apply the quantization and scaling factor */
				565	temp = workspace[i] * divisors[i];
				566
				567	/* Round to nearest integer.
				568	* Since C does not specify the direction of rounding for negative
				569	* quotients, we have to force the dividend positive for portability.
				570	* The maximum coefficient size is +-16K (for 12-bit data), so this
				571	* code should work for either 16-bit or 32-bit ints.
				572	*/
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	573	output_ptr[i] = (JCOEF)((int)(temp + (FAST_FLOAT)16384.5) - 16384);
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	574	}
				575	}
				576
				577
				578	METHODDEF(void)
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	579	forward_DCT_float(j_compress_ptr cinfo, jpeg_component_info *compptr,
				580	JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
				581	JDIMENSION start_row, JDIMENSION start_col,
				582	JDIMENSION num_blocks)
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	583	/* This version is used for floating-point DCT implementations. */
				584	{
				585	/* This routine is heavily used, so it's worth coding it tightly. */
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	586	my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	587	FAST_FLOAT *divisors = fdct->float_divisors[compptr->quant_tbl_no];
				588	FAST_FLOAT *workspace;
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	589	JDIMENSION bi;
				590
				591
				592	/* Make sure the compiler doesn't look up these every pass */
				593	float_DCT_method_ptr do_dct = fdct->float_dct;
				594	float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
				595	float_quantize_method_ptr do_quantize = fdct->float_quantize;
				596	workspace = fdct->float_workspace;
				597
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	598	sample_data += start_row; /* fold in the vertical offset once */
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	599
				600	for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
				601	/* Load data into workspace, applying unsigned->signed conversion */
				602	(*do_convsamp) (sample_data, start_col, workspace);
				603
				604	/* Perform the DCT */
				605	(*do_dct) (workspace);
				606
				607	/* Quantize/descale the coefficients, and store into coef_blocks[] */
				608	(*do_quantize) (coef_blocks[bi], divisors, workspace);
				609	}
				610	}
				611
				612	#endif /* DCT_FLOAT_SUPPORTED */
				613
				614
				615	/*
				616	* Initialize FDCT manager.
				617	*/
				618
				619	GLOBAL(void)
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	620	jinit_forward_dct(j_compress_ptr cinfo)
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	621	{
				622	my_fdct_ptr fdct;
				623	int i;
				624
				625	fdct = (my_fdct_ptr)
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	626	(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	627	sizeof(my_fdct_controller));
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	628	cinfo->fdct = (struct jpeg_forward_dct *)fdct;
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	629	fdct->pub.start_pass = start_pass_fdctmgr;
				630
				631	/* First determine the DCT... */
				632	switch (cinfo->dct_method) {
				633	#ifdef DCT_ISLOW_SUPPORTED
				634	case JDCT_ISLOW:
				635	fdct->pub.forward_DCT = forward_DCT;
				636	if (jsimd_can_fdct_islow())
				637	fdct->dct = jsimd_fdct_islow;
				638	else
				639	fdct->dct = jpeg_fdct_islow;
				640	break;
				641	#endif
				642	#ifdef DCT_IFAST_SUPPORTED
				643	case JDCT_IFAST:
				644	fdct->pub.forward_DCT = forward_DCT;
				645	if (jsimd_can_fdct_ifast())
				646	fdct->dct = jsimd_fdct_ifast;
				647	else
				648	fdct->dct = jpeg_fdct_ifast;
				649	break;
				650	#endif
				651	#ifdef DCT_FLOAT_SUPPORTED
				652	case JDCT_FLOAT:
				653	fdct->pub.forward_DCT = forward_DCT_float;
				654	if (jsimd_can_fdct_float())
				655	fdct->float_dct = jsimd_fdct_float;
				656	else
				657	fdct->float_dct = jpeg_fdct_float;
				658	break;
				659	#endif
				660	default:
				661	ERREXIT(cinfo, JERR_NOT_COMPILED);
				662	break;
				663	}
				664
				665	/* ...then the supporting stages. */
				666	switch (cinfo->dct_method) {
				667	#ifdef DCT_ISLOW_SUPPORTED
				668	case JDCT_ISLOW:
				669	#endif
				670	#ifdef DCT_IFAST_SUPPORTED
				671	case JDCT_IFAST:
				672	#endif
				673	#if defined(DCT_ISLOW_SUPPORTED) \|\| defined(DCT_IFAST_SUPPORTED)
				674	if (jsimd_can_convsamp())
				675	fdct->convsamp = jsimd_convsamp;
				676	else
				677	fdct->convsamp = convsamp;
				678	if (jsimd_can_quantize())
				679	fdct->quantize = jsimd_quantize;
				680	else
				681	fdct->quantize = quantize;
				682	break;
				683	#endif
				684	#ifdef DCT_FLOAT_SUPPORTED
				685	case JDCT_FLOAT:
				686	if (jsimd_can_convsamp_float())
				687	fdct->float_convsamp = jsimd_convsamp_float;
				688	else
				689	fdct->float_convsamp = convsamp_float;
				690	if (jsimd_can_quantize_float())
				691	fdct->float_quantize = jsimd_quantize_float;
				692	else
				693	fdct->float_quantize = quantize_float;
				694	break;
				695	#endif
				696	default:
				697	ERREXIT(cinfo, JERR_NOT_COMPILED);
				698	break;
				699	}
				700
				701	/* Allocate workspace memory */
				702	#ifdef DCT_FLOAT_SUPPORTED
				703	if (cinfo->dct_method == JDCT_FLOAT)
				704	fdct->float_workspace = (FAST_FLOAT *)
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	705	(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	706	sizeof(FAST_FLOAT) * DCTSIZE2);
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	707	else
				708	#endif
				709	fdct->workspace = (DCTELEM *)
Chris Blume	cca8c4d	2019-03-01 01:09:50 -0800	[diff] [blame]	710	(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
Tom Hudson	0d47d2d	2016-05-04 13:22:56 -0400	[diff] [blame]	711	sizeof(DCTELEM) * DCTSIZE2);
hbono@chromium.org	f0c4f33	2010-11-01 05:14:55 +0000	[diff] [blame]	712
				713	/* Mark divisor tables unallocated */
				714	for (i = 0; i < NUM_QUANT_TBLS; i++) {
				715	fdct->divisors[i] = NULL;
				716	#ifdef DCT_FLOAT_SUPPORTED
				717	fdct->float_divisors[i] = NULL;
				718	#endif
				719	}
				720	}