Blame - jcdctmgr.c - platform/external/libjpeg-turbo

blob: 4cac666483985063e7c28439616758a911183f5f [file] [log] [blame]

Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	1	/*
				2	* jcdctmgr.c
				3	*
DRC	a73e870	2012-12-31 02:52:30 +0000	[diff] [blame]	4	* This file was part of the Independent JPEG Group's software:
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	5	* Copyright (C) 1994-1996, Thomas G. Lane.
DRC	a6ef282	2013-09-28 03:23:49 +0000	[diff] [blame]	6	* libjpeg-turbo Modifications:
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	7	* Copyright (C) 1999-2006, MIYASAKA Masaru.
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	8	* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
DRC	b5a55e6	2015-08-29 18:05:43 -0500	[diff] [blame^]	9	* Copyright (C) 2011, 2014-2015 D. R. Commander
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	10	* For conditions of distribution and use, see the accompanying README file.
				11	*
				12	* This file contains the forward-DCT management logic.
				13	* This code selects a particular DCT implementation to be used,
				14	* and it performs related housekeeping chores including coefficient
				15	* quantization.
				16	*/
				17
				18	#define JPEG_INTERNALS
				19	#include "jinclude.h"
				20	#include "jpeglib.h"
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	21	#include "jdct.h" /* Private declarations for DCT subsystem */
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	22	#include "jsimddct.h"
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	23
				24
				25	/* Private subobject for this module */
				26
DRC	bc56b75	2014-05-16 10:43:44 +0000	[diff] [blame]	27	typedef void (forward_DCT_method_ptr) (DCTELEM data);
				28	typedef void (float_DCT_method_ptr) (FAST_FLOAT data);
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	29
DRC	bc56b75	2014-05-16 10:43:44 +0000	[diff] [blame]	30	typedef void (*convsamp_method_ptr) (JSAMPARRAY sample_data,
				31	JDIMENSION start_col,
				32	DCTELEM * workspace);
				33	typedef void (*float_convsamp_method_ptr) (JSAMPARRAY sample_data,
				34	JDIMENSION start_col,
				35	FAST_FLOAT *workspace);
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	36
DRC	bc56b75	2014-05-16 10:43:44 +0000	[diff] [blame]	37	typedef void (quantize_method_ptr) (JCOEFPTR coef_block, DCTELEM divisors,
				38	DCTELEM * workspace);
				39	typedef void (*float_quantize_method_ptr) (JCOEFPTR coef_block,
				40	FAST_FLOAT * divisors,
				41	FAST_FLOAT * workspace);
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	42
DRC	a49c4e5	2011-02-18 20:50:08 +0000	[diff] [blame]	43	METHODDEF(void) quantize (JCOEFPTR, DCTELEM , DCTELEM );
				44
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	45	typedef struct {
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	46	struct jpeg_forward_dct pub; /* public fields */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	47
				48	/* Pointer to the DCT routine actually in use */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	49	forward_DCT_method_ptr dct;
				50	convsamp_method_ptr convsamp;
				51	quantize_method_ptr quantize;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	52
				53	/* The actual post-DCT divisors --- not identical to the quant table
				54	* entries, because of scaling (especially for an unnormalized DCT).
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	55	* Each table is given in normal array order.
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	56	*/
				57	DCTELEM * divisors[NUM_QUANT_TBLS];
				58
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	59	/* work area for FDCT subroutine */
				60	DCTELEM * workspace;
				61
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	62	#ifdef DCT_FLOAT_SUPPORTED
				63	/* Same as above for the floating-point case. */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	64	float_DCT_method_ptr float_dct;
				65	float_convsamp_method_ptr float_convsamp;
				66	float_quantize_method_ptr float_quantize;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	67	FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	68	FAST_FLOAT * float_workspace;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	69	#endif
				70	} my_fdct_controller;
				71
				72	typedef my_fdct_controller * my_fdct_ptr;
				73
				74
DRC	aee4f72	2014-08-09 23:06:07 +0000	[diff] [blame]	75	#if BITS_IN_JSAMPLE == 8
				76
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	77	/*
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	78	* Find the highest bit in an integer through binary search.
				79	*/
DRC	aee4f72	2014-08-09 23:06:07 +0000	[diff] [blame]	80
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	81	LOCAL(int)
DRC	fc5dc4f	2009-10-01 22:26:14 +0000	[diff] [blame]	82	flss (UINT16 val)
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	83	{
				84	int bit;
				85
				86	bit = 16;
				87
				88	if (!val)
				89	return 0;
				90
				91	if (!(val & 0xff00)) {
				92	bit -= 8;
				93	val <<= 8;
				94	}
				95	if (!(val & 0xf000)) {
				96	bit -= 4;
				97	val <<= 4;
				98	}
				99	if (!(val & 0xc000)) {
				100	bit -= 2;
				101	val <<= 2;
				102	}
				103	if (!(val & 0x8000)) {
				104	bit -= 1;
				105	val <<= 1;
				106	}
				107
				108	return bit;
				109	}
				110
DRC	aee4f72	2014-08-09 23:06:07 +0000	[diff] [blame]	111
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	112	/*
				113	* Compute values to do a division using reciprocal.
				114	*
				115	* This implementation is based on an algorithm described in
				116	* "How to optimize for the Pentium family of microprocessors"
				117	* (http://www.agner.org/assem/).
				118	* More information about the basic algorithm can be found in
				119	* the paper "Integer Division Using Reciprocals" by Robert Alverson.
				120	*
				121	* The basic idea is to replace x/d by x * d^-1. In order to store
				122	* d^-1 with enough precision we shift it left a few places. It turns
				123	* out that this algoright gives just enough precision, and also fits
				124	* into DCTELEM:
				125	*
				126	* b = (the number of significant bits in divisor) - 1
				127	* r = (word size) + b
				128	* f = 2^r / divisor
				129	*
				130	* f will not be an integer for most cases, so we need to compensate
				131	* for the rounding error introduced:
				132	*
				133	* no fractional part:
				134	*
				135	* result = input >> r
				136	*
				137	* fractional part of f < 0.5:
				138	*
				139	* round f down to nearest integer
				140	* result = ((input + 1) * f) >> r
				141	*
				142	* fractional part of f > 0.5:
				143	*
				144	* round f up to nearest integer
				145	* result = (input * f) >> r
				146	*
				147	* This is the original algorithm that gives truncated results. But we
				148	* want properly rounded results, so we replace "input" with
				149	* "input + divisor/2".
				150	*
				151	* In order to allow SIMD implementations we also tweak the values to
				152	* allow the same calculation to be made at all times:
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	153	*
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	154	* dctbl[0] = f rounded to nearest integer
				155	* dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
				156	* dctbl[2] = 1 << ((word size) * 2 - r)
				157	* dctbl[3] = r - (word size)
				158	*
				159	* dctbl[2] is for stupid instruction sets where the shift operation
				160	* isn't member wise (e.g. MMX).
				161	*
				162	* The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
				163	* is that most SIMD implementations have a "multiply and store top
				164	* half" operation.
				165	*
				166	* Lastly, we store each of the values in their own table instead
				167	* of in a consecutive manner, yet again in order to allow SIMD
				168	* routines.
				169	*/
DRC	aee4f72	2014-08-09 23:06:07 +0000	[diff] [blame]	170
DRC	a49c4e5	2011-02-18 20:50:08 +0000	[diff] [blame]	171	LOCAL(int)
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	172	compute_reciprocal (UINT16 divisor, DCTELEM * dtbl)
				173	{
				174	UDCTELEM2 fq, fr;
				175	UDCTELEM c;
				176	int b, r;
				177
DRC	b5a55e6	2015-08-29 18:05:43 -0500	[diff] [blame^]	178	if (divisor == 1) {
				179	/* divisor == 1 means unquantized, so these reciprocal/correction/shift
				180	* values will cause the C quantization algorithm to act like the
				181	* identity function. Since only the C quantization algorithm is used in
				182	* these cases, the scale value is irrelevant.
				183	*/
				184	dtbl[DCTSIZE2 * 0] = (DCTELEM) 1; /* reciprocal */
				185	dtbl[DCTSIZE2 * 1] = (DCTELEM) 0; /* correction */
				186	dtbl[DCTSIZE2 * 2] = (DCTELEM) 1; /* scale */
				187	dtbl[DCTSIZE2 * 3] = (DCTELEM) (-sizeof(DCTELEM) * 8); /* shift */
				188	return 0;
				189	}
				190
DRC	fc5dc4f	2009-10-01 22:26:14 +0000	[diff] [blame]	191	b = flss(divisor) - 1;
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	192	r = sizeof(DCTELEM) * 8 + b;
				193
				194	fq = ((UDCTELEM2)1 << r) / divisor;
				195	fr = ((UDCTELEM2)1 << r) % divisor;
				196
				197	c = divisor / 2; /* for rounding */
				198
				199	if (fr == 0) { /* divisor is power of two */
				200	/* fq will be one bit too large to fit in DCTELEM, so adjust */
				201	fq >>= 1;
				202	r--;
DRC	d65d99a	2012-01-31 03:39:23 +0000	[diff] [blame]	203	} else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	204	c++;
				205	} else { /* fractional part is > 0.5 */
				206	fq++;
				207	}
				208
				209	dtbl[DCTSIZE2 * 0] = (DCTELEM) fq; /* reciprocal */
				210	dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */
				211	dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)82 - r)); /* scale */
				212	dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)8; / shift */
DRC	a49c4e5	2011-02-18 20:50:08 +0000	[diff] [blame]	213
				214	if(r <= 16) return 0;
				215	else return 1;
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	216	}
				217
DRC	aee4f72	2014-08-09 23:06:07 +0000	[diff] [blame]	218	#endif
				219
				220
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	221	/*
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	222	* Initialize for a processing pass.
				223	* Verify that all referenced Q-tables are present, and set up
				224	* the divisor table for each one.
				225	* In the current implementation, DCT of all components is done during
				226	* the first pass, even if only some components will be output in the
				227	* first scan. Hence all components should be examined here.
				228	*/
				229
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	230	METHODDEF(void)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	231	start_pass_fdctmgr (j_compress_ptr cinfo)
				232	{
				233	my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
				234	int ci, qtblno, i;
				235	jpeg_component_info *compptr;
				236	JQUANT_TBL * qtbl;
				237	DCTELEM * dtbl;
				238
				239	for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
				240	ci++, compptr++) {
				241	qtblno = compptr->quant_tbl_no;
				242	/* Make sure specified quantization table is present */
				243	if (qtblno < 0 \|\| qtblno >= NUM_QUANT_TBLS \|\|
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	244	cinfo->quant_tbl_ptrs[qtblno] == NULL)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	245	ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
				246	qtbl = cinfo->quant_tbl_ptrs[qtblno];
				247	/* Compute divisors for this quant table */
				248	/* We may do this more than once for same table, but it's not a big deal */
				249	switch (cinfo->dct_method) {
				250	#ifdef DCT_ISLOW_SUPPORTED
				251	case JDCT_ISLOW:
				252	/* For LL&M IDCT method, divisors are equal to raw quantization
				253	* coefficients multiplied by 8 (to counteract scaling).
				254	*/
				255	if (fdct->divisors[qtblno] == NULL) {
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	256	fdct->divisors[qtblno] = (DCTELEM *)
				257	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC	5de454b	2014-05-18 19:04:03 +0000	[diff] [blame]	258	(DCTSIZE2 * 4) * sizeof(DCTELEM));
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	259	}
				260	dtbl = fdct->divisors[qtblno];
				261	for (i = 0; i < DCTSIZE2; i++) {
DRC	aee4f72	2014-08-09 23:06:07 +0000	[diff] [blame]	262	#if BITS_IN_JSAMPLE == 8
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	263	if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i])
				264	&& fdct->quantize == jsimd_quantize)
				265	fdct->quantize = quantize;
DRC	aee4f72	2014-08-09 23:06:07 +0000	[diff] [blame]	266	#else
				267	dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
				268	#endif
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	269	}
				270	break;
				271	#endif
				272	#ifdef DCT_IFAST_SUPPORTED
				273	case JDCT_IFAST:
				274	{
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	275	/* For AA&N IDCT method, divisors are equal to quantization
				276	* coefficients scaled by scalefactor[row]*scalefactor[col], where
				277	* scalefactor[0] = 1
				278	* scalefactor[k] = cos(kPI/16) sqrt(2) for k=1..7
				279	* We apply a further scale factor of 8.
				280	*/
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	281	#define CONST_BITS 14
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	282	static const INT16 aanscales[DCTSIZE2] = {
				283	/* precomputed values scaled up by 14 bits */
				284	16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
				285	22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
				286	21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
				287	19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
				288	16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
				289	12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
				290	8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
				291	4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
				292	};
				293	SHIFT_TEMPS
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	294
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	295	if (fdct->divisors[qtblno] == NULL) {
				296	fdct->divisors[qtblno] = (DCTELEM *)
				297	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC	5de454b	2014-05-18 19:04:03 +0000	[diff] [blame]	298	(DCTSIZE2 * 4) * sizeof(DCTELEM));
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	299	}
				300	dtbl = fdct->divisors[qtblno];
				301	for (i = 0; i < DCTSIZE2; i++) {
DRC	aee4f72	2014-08-09 23:06:07 +0000	[diff] [blame]	302	#if BITS_IN_JSAMPLE == 8
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	303	if(!compute_reciprocal(
				304	DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
				305	(INT32) aanscales[i]),
				306	CONST_BITS-3), &dtbl[i])
				307	&& fdct->quantize == jsimd_quantize)
				308	fdct->quantize = quantize;
DRC	aee4f72	2014-08-09 23:06:07 +0000	[diff] [blame]	309	#else
				310	dtbl[i] = (DCTELEM)
				311	DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
				312	(INT32) aanscales[i]),
				313	CONST_BITS-3);
				314	#endif
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	315	}
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	316	}
				317	break;
				318	#endif
				319	#ifdef DCT_FLOAT_SUPPORTED
				320	case JDCT_FLOAT:
				321	{
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	322	/* For float AA&N IDCT method, divisors are equal to quantization
				323	* coefficients scaled by scalefactor[row]*scalefactor[col], where
				324	* scalefactor[0] = 1
				325	* scalefactor[k] = cos(kPI/16) sqrt(2) for k=1..7
				326	* We apply a further scale factor of 8.
				327	* What's actually stored is 1/divisor so that the inner loop can
				328	* use a multiplication rather than a division.
				329	*/
				330	FAST_FLOAT * fdtbl;
				331	int row, col;
				332	static const double aanscalefactor[DCTSIZE] = {
				333	1.0, 1.387039845, 1.306562965, 1.175875602,
				334	1.0, 0.785694958, 0.541196100, 0.275899379
				335	};
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	336
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	337	if (fdct->float_divisors[qtblno] == NULL) {
				338	fdct->float_divisors[qtblno] = (FAST_FLOAT *)
				339	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC	5de454b	2014-05-18 19:04:03 +0000	[diff] [blame]	340	DCTSIZE2 * sizeof(FAST_FLOAT));
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	341	}
				342	fdtbl = fdct->float_divisors[qtblno];
				343	i = 0;
				344	for (row = 0; row < DCTSIZE; row++) {
				345	for (col = 0; col < DCTSIZE; col++) {
				346	fdtbl[i] = (FAST_FLOAT)
				347	(1.0 / (((double) qtbl->quantval[i] *
				348	aanscalefactor[row] * aanscalefactor[col] * 8.0)));
				349	i++;
				350	}
				351	}
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	352	}
				353	break;
				354	#endif
				355	default:
				356	ERREXIT(cinfo, JERR_NOT_COMPILED);
				357	break;
				358	}
				359	}
				360	}
				361
				362
				363	/*
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	364	* Load data into workspace, applying unsigned->signed conversion.
				365	*/
				366
				367	METHODDEF(void)
				368	convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)
				369	{
				370	register DCTELEM *workspaceptr;
				371	register JSAMPROW elemptr;
				372	register int elemr;
				373
				374	workspaceptr = workspace;
				375	for (elemr = 0; elemr < DCTSIZE; elemr++) {
				376	elemptr = sample_data[elemr] + start_col;
				377
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	378	#if DCTSIZE == 8 /* unroll the inner loop */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	379	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				380	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				381	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				382	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				383	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				384	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				385	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				386	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				387	#else
				388	{
				389	register int elemc;
				390	for (elemc = DCTSIZE; elemc > 0; elemc--)
				391	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				392	}
				393	#endif
				394	}
				395	}
				396
				397
				398	/*
				399	* Quantize/descale the coefficients, and store into coef_blocks[].
				400	*/
				401
				402	METHODDEF(void)
				403	quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
				404	{
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	405	int i;
				406	DCTELEM temp;
DRC	aee4f72	2014-08-09 23:06:07 +0000	[diff] [blame]	407	JCOEFPTR output_ptr = coef_block;
				408
				409	#if BITS_IN_JSAMPLE == 8
				410
DRC	b5a55e6	2015-08-29 18:05:43 -0500	[diff] [blame^]	411	UDCTELEM recip, corr;
				412	int shift;
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	413	UDCTELEM2 product;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	414
				415	for (i = 0; i < DCTSIZE2; i++) {
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	416	temp = workspace[i];
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	417	recip = divisors[i + DCTSIZE2 * 0];
				418	corr = divisors[i + DCTSIZE2 * 1];
				419	shift = divisors[i + DCTSIZE2 * 3];
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	420
				421	if (temp < 0) {
				422	temp = -temp;
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	423	product = (UDCTELEM2)(temp + corr) * recip;
				424	product >>= shift + sizeof(DCTELEM)*8;
				425	temp = product;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	426	temp = -temp;
				427	} else {
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	428	product = (UDCTELEM2)(temp + corr) * recip;
				429	product >>= shift + sizeof(DCTELEM)*8;
				430	temp = product;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	431	}
				432	output_ptr[i] = (JCOEF) temp;
				433	}
DRC	aee4f72	2014-08-09 23:06:07 +0000	[diff] [blame]	434
				435	#else
				436
				437	register DCTELEM qval;
				438
				439	for (i = 0; i < DCTSIZE2; i++) {
				440	qval = divisors[i];
				441	temp = workspace[i];
				442	/* Divide the coefficient value by qval, ensuring proper rounding.
				443	* Since C does not specify the direction of rounding for negative
				444	* quotients, we have to force the dividend positive for portability.
				445	*
				446	* In most files, at least half of the output values will be zero
				447	* (at default quantization settings, more like three-quarters...)
				448	* so we should ensure that this case is fast. On many machines,
				449	* a comparison is enough cheaper than a divide to make a special test
				450	* a win. Since both inputs will be nonnegative, we need only test
				451	* for a < b to discover whether a/b is 0.
				452	* If your machine's division is fast enough, define FAST_DIVIDE.
				453	*/
				454	#ifdef FAST_DIVIDE
				455	#define DIVIDE_BY(a,b) a /= b
				456	#else
				457	#define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0
				458	#endif
				459	if (temp < 0) {
				460	temp = -temp;
				461	temp += qval>>1; /* for rounding */
				462	DIVIDE_BY(temp, qval);
DRC	eca0637	2014-11-06 09:32:38 +0000	[diff] [blame]	463	temp = -temp;
DRC	aee4f72	2014-08-09 23:06:07 +0000	[diff] [blame]	464	} else {
				465	temp += qval>>1; /* for rounding */
				466	DIVIDE_BY(temp, qval);
				467	}
				468	output_ptr[i] = (JCOEF) temp;
				469	}
				470
				471	#endif
				472
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	473	}
				474
				475
				476	/*
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	477	* Perform forward DCT on one or more blocks of a component.
				478	*
				479	* The input samples are taken from the sample_data[] array starting at
				480	* position start_row/start_col, and moving to the right for any additional
Thomas G. Lane	bc79e06	1995-08-02 00:00:00 +0000	[diff] [blame]	481	* blocks. The quantized coefficients are returned in coef_blocks[].
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	482	*/
				483
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	484	METHODDEF(void)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	485	forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	486	JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
				487	JDIMENSION start_row, JDIMENSION start_col,
				488	JDIMENSION num_blocks)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	489	/* This version is used for integer DCT implementations. */
				490	{
				491	/* This routine is heavily used, so it's worth coding it tightly. */
				492	my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	493	DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	494	DCTELEM * workspace;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	495	JDIMENSION bi;
				496
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	497	/* Make sure the compiler doesn't look up these every pass */
				498	forward_DCT_method_ptr do_dct = fdct->dct;
				499	convsamp_method_ptr do_convsamp = fdct->convsamp;
				500	quantize_method_ptr do_quantize = fdct->quantize;
Pierre Ossman	dc5db14	2009-03-13 12:17:26 +0000	[diff] [blame]	501	workspace = fdct->workspace;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	502
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	503	sample_data += start_row; /* fold in the vertical offset once */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	504
				505	for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
				506	/* Load data into workspace, applying unsigned->signed conversion */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	507	(*do_convsamp) (sample_data, start_col, workspace);
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	508
				509	/* Perform the DCT */
				510	(*do_dct) (workspace);
				511
				512	/* Quantize/descale the coefficients, and store into coef_blocks[] */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	513	(*do_quantize) (coef_blocks[bi], divisors, workspace);
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	514	}
				515	}
				516
				517
				518	#ifdef DCT_FLOAT_SUPPORTED
				519
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	520
				521	METHODDEF(void)
				522	convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)
				523	{
				524	register FAST_FLOAT *workspaceptr;
				525	register JSAMPROW elemptr;
				526	register int elemr;
				527
				528	workspaceptr = workspace;
				529	for (elemr = 0; elemr < DCTSIZE; elemr++) {
				530	elemptr = sample_data[elemr] + start_col;
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	531	#if DCTSIZE == 8 /* unroll the inner loop */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	532	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				533	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				534	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				535	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				536	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				537	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				538	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				539	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				540	#else
				541	{
				542	register int elemc;
				543	for (elemc = DCTSIZE; elemc > 0; elemc--)
				544	*workspaceptr++ = (FAST_FLOAT)
				545	(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
				546	}
				547	#endif
				548	}
				549	}
				550
				551
				552	METHODDEF(void)
				553	quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)
				554	{
				555	register FAST_FLOAT temp;
				556	register int i;
				557	register JCOEFPTR output_ptr = coef_block;
				558
				559	for (i = 0; i < DCTSIZE2; i++) {
				560	/* Apply the quantization and scaling factor */
				561	temp = workspace[i] * divisors[i];
				562
				563	/* Round to nearest integer.
				564	* Since C does not specify the direction of rounding for negative
				565	* quotients, we have to force the dividend positive for portability.
				566	* The maximum coefficient size is +-16K (for 12-bit data), so this
				567	* code should work for either 16-bit or 32-bit ints.
				568	*/
				569	output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
				570	}
				571	}
				572
				573
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	574	METHODDEF(void)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	575	forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	576	JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
				577	JDIMENSION start_row, JDIMENSION start_col,
				578	JDIMENSION num_blocks)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	579	/* This version is used for floating-point DCT implementations. */
				580	{
				581	/* This routine is heavily used, so it's worth coding it tightly. */
				582	my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	583	FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	584	FAST_FLOAT * workspace;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	585	JDIMENSION bi;
				586
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	587
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	588	/* Make sure the compiler doesn't look up these every pass */
				589	float_DCT_method_ptr do_dct = fdct->float_dct;
				590	float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
				591	float_quantize_method_ptr do_quantize = fdct->float_quantize;
Pierre Ossman	dc5db14	2009-03-13 12:17:26 +0000	[diff] [blame]	592	workspace = fdct->float_workspace;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	593
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	594	sample_data += start_row; /* fold in the vertical offset once */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	595
				596	for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
				597	/* Load data into workspace, applying unsigned->signed conversion */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	598	(*do_convsamp) (sample_data, start_col, workspace);
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	599
				600	/* Perform the DCT */
				601	(*do_dct) (workspace);
				602
				603	/* Quantize/descale the coefficients, and store into coef_blocks[] */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	604	(*do_quantize) (coef_blocks[bi], divisors, workspace);
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	605	}
				606	}
				607
				608	#endif /* DCT_FLOAT_SUPPORTED */
				609
				610
				611	/*
				612	* Initialize FDCT manager.
				613	*/
				614
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	615	GLOBAL(void)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	616	jinit_forward_dct (j_compress_ptr cinfo)
				617	{
				618	my_fdct_ptr fdct;
				619	int i;
				620
				621	fdct = (my_fdct_ptr)
				622	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC	5de454b	2014-05-18 19:04:03 +0000	[diff] [blame]	623	sizeof(my_fdct_controller));
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	624	cinfo->fdct = (struct jpeg_forward_dct *) fdct;
				625	fdct->pub.start_pass = start_pass_fdctmgr;
				626
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	627	/* First determine the DCT... */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	628	switch (cinfo->dct_method) {
				629	#ifdef DCT_ISLOW_SUPPORTED
				630	case JDCT_ISLOW:
				631	fdct->pub.forward_DCT = forward_DCT;
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	632	if (jsimd_can_fdct_islow())
				633	fdct->dct = jsimd_fdct_islow;
				634	else
				635	fdct->dct = jpeg_fdct_islow;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	636	break;
				637	#endif
				638	#ifdef DCT_IFAST_SUPPORTED
				639	case JDCT_IFAST:
				640	fdct->pub.forward_DCT = forward_DCT;
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	641	if (jsimd_can_fdct_ifast())
				642	fdct->dct = jsimd_fdct_ifast;
				643	else
				644	fdct->dct = jpeg_fdct_ifast;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	645	break;
				646	#endif
				647	#ifdef DCT_FLOAT_SUPPORTED
				648	case JDCT_FLOAT:
				649	fdct->pub.forward_DCT = forward_DCT_float;
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	650	if (jsimd_can_fdct_float())
				651	fdct->float_dct = jsimd_fdct_float;
				652	else
				653	fdct->float_dct = jpeg_fdct_float;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	654	break;
				655	#endif
				656	default:
				657	ERREXIT(cinfo, JERR_NOT_COMPILED);
				658	break;
				659	}
				660
				661	/* ...then the supporting stages. */
				662	switch (cinfo->dct_method) {
				663	#ifdef DCT_ISLOW_SUPPORTED
				664	case JDCT_ISLOW:
				665	#endif
				666	#ifdef DCT_IFAST_SUPPORTED
				667	case JDCT_IFAST:
				668	#endif
				669	#if defined(DCT_ISLOW_SUPPORTED) \|\| defined(DCT_IFAST_SUPPORTED)
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	670	if (jsimd_can_convsamp())
				671	fdct->convsamp = jsimd_convsamp;
				672	else
				673	fdct->convsamp = convsamp;
				674	if (jsimd_can_quantize())
				675	fdct->quantize = jsimd_quantize;
				676	else
				677	fdct->quantize = quantize;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	678	break;
				679	#endif
				680	#ifdef DCT_FLOAT_SUPPORTED
				681	case JDCT_FLOAT:
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	682	if (jsimd_can_convsamp_float())
				683	fdct->float_convsamp = jsimd_convsamp_float;
				684	else
				685	fdct->float_convsamp = convsamp_float;
				686	if (jsimd_can_quantize_float())
				687	fdct->float_quantize = jsimd_quantize_float;
				688	else
				689	fdct->float_quantize = quantize_float;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	690	break;
				691	#endif
				692	default:
				693	ERREXIT(cinfo, JERR_NOT_COMPILED);
				694	break;
				695	}
				696
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	697	/* Allocate workspace memory */
				698	#ifdef DCT_FLOAT_SUPPORTED
				699	if (cinfo->dct_method == JDCT_FLOAT)
				700	fdct->float_workspace = (FAST_FLOAT *)
				701	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC	5de454b	2014-05-18 19:04:03 +0000	[diff] [blame]	702	sizeof(FAST_FLOAT) * DCTSIZE2);
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	703	else
				704	#endif
				705	fdct->workspace = (DCTELEM *)
				706	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC	5de454b	2014-05-18 19:04:03 +0000	[diff] [blame]	707	sizeof(DCTELEM) * DCTSIZE2);
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	708
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	709	/* Mark divisor tables unallocated */
				710	for (i = 0; i < NUM_QUANT_TBLS; i++) {
				711	fdct->divisors[i] = NULL;
				712	#ifdef DCT_FLOAT_SUPPORTED
				713	fdct->float_divisors[i] = NULL;
				714	#endif
				715	}
				716	}