Blame - jcdctmgr.c - platform/external/libjpeg-turbo

blob: 2d256590ebff380ea368eb6c1bdf4736539e5d28 [file] [log] [blame]

Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	1	/*
				2	* jcdctmgr.c
				3	*
DRC	a73e870	2012-12-31 02:52:30 +0000	[diff] [blame]	4	* This file was part of the Independent JPEG Group's software:
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	5	* Copyright (C) 1994-1996, Thomas G. Lane.
DRC	a6ef282	2013-09-28 03:23:49 +0000	[diff] [blame]	6	* libjpeg-turbo Modifications:
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	7	* Copyright (C) 1999-2006, MIYASAKA Masaru.
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	8	* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
DRC	a49c4e5	2011-02-18 20:50:08 +0000	[diff] [blame]	9	* Copyright (C) 2011 D. R. Commander
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	10	* For conditions of distribution and use, see the accompanying README file.
				11	*
				12	* This file contains the forward-DCT management logic.
				13	* This code selects a particular DCT implementation to be used,
				14	* and it performs related housekeeping chores including coefficient
				15	* quantization.
				16	*/
				17
				18	#define JPEG_INTERNALS
				19	#include "jinclude.h"
				20	#include "jpeglib.h"
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	21	#include "jdct.h" /* Private declarations for DCT subsystem */
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	22	#include "jsimddct.h"
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	23
				24
				25	/* Private subobject for this module */
				26
DRC	bc56b75	2014-05-16 10:43:44 +0000	[diff] [blame]	27	typedef void (forward_DCT_method_ptr) (DCTELEM data);
				28	typedef void (float_DCT_method_ptr) (FAST_FLOAT data);
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	29
DRC	bc56b75	2014-05-16 10:43:44 +0000	[diff] [blame]	30	typedef void (*convsamp_method_ptr) (JSAMPARRAY sample_data,
				31	JDIMENSION start_col,
				32	DCTELEM * workspace);
				33	typedef void (*float_convsamp_method_ptr) (JSAMPARRAY sample_data,
				34	JDIMENSION start_col,
				35	FAST_FLOAT *workspace);
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	36
DRC	bc56b75	2014-05-16 10:43:44 +0000	[diff] [blame]	37	typedef void (quantize_method_ptr) (JCOEFPTR coef_block, DCTELEM divisors,
				38	DCTELEM * workspace);
				39	typedef void (*float_quantize_method_ptr) (JCOEFPTR coef_block,
				40	FAST_FLOAT * divisors,
				41	FAST_FLOAT * workspace);
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	42
DRC	a49c4e5	2011-02-18 20:50:08 +0000	[diff] [blame]	43	METHODDEF(void) quantize (JCOEFPTR, DCTELEM , DCTELEM );
				44
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	45	typedef struct {
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	46	struct jpeg_forward_dct pub; /* public fields */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	47
				48	/* Pointer to the DCT routine actually in use */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	49	forward_DCT_method_ptr dct;
				50	convsamp_method_ptr convsamp;
				51	quantize_method_ptr quantize;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	52
				53	/* The actual post-DCT divisors --- not identical to the quant table
				54	* entries, because of scaling (especially for an unnormalized DCT).
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	55	* Each table is given in normal array order.
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	56	*/
				57	DCTELEM * divisors[NUM_QUANT_TBLS];
				58
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	59	/* work area for FDCT subroutine */
				60	DCTELEM * workspace;
				61
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	62	#ifdef DCT_FLOAT_SUPPORTED
				63	/* Same as above for the floating-point case. */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	64	float_DCT_method_ptr float_dct;
				65	float_convsamp_method_ptr float_convsamp;
				66	float_quantize_method_ptr float_quantize;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	67	FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	68	FAST_FLOAT * float_workspace;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	69	#endif
				70	} my_fdct_controller;
				71
				72	typedef my_fdct_controller * my_fdct_ptr;
				73
				74
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	75	/*
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	76	* Find the highest bit in an integer through binary search.
				77	*/
				78	LOCAL(int)
DRC	fc5dc4f	2009-10-01 22:26:14 +0000	[diff] [blame]	79	flss (UINT16 val)
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	80	{
				81	int bit;
				82
				83	bit = 16;
				84
				85	if (!val)
				86	return 0;
				87
				88	if (!(val & 0xff00)) {
				89	bit -= 8;
				90	val <<= 8;
				91	}
				92	if (!(val & 0xf000)) {
				93	bit -= 4;
				94	val <<= 4;
				95	}
				96	if (!(val & 0xc000)) {
				97	bit -= 2;
				98	val <<= 2;
				99	}
				100	if (!(val & 0x8000)) {
				101	bit -= 1;
				102	val <<= 1;
				103	}
				104
				105	return bit;
				106	}
				107
				108	/*
				109	* Compute values to do a division using reciprocal.
				110	*
				111	* This implementation is based on an algorithm described in
				112	* "How to optimize for the Pentium family of microprocessors"
				113	* (http://www.agner.org/assem/).
				114	* More information about the basic algorithm can be found in
				115	* the paper "Integer Division Using Reciprocals" by Robert Alverson.
				116	*
				117	* The basic idea is to replace x/d by x * d^-1. In order to store
				118	* d^-1 with enough precision we shift it left a few places. It turns
				119	* out that this algoright gives just enough precision, and also fits
				120	* into DCTELEM:
				121	*
				122	* b = (the number of significant bits in divisor) - 1
				123	* r = (word size) + b
				124	* f = 2^r / divisor
				125	*
				126	* f will not be an integer for most cases, so we need to compensate
				127	* for the rounding error introduced:
				128	*
				129	* no fractional part:
				130	*
				131	* result = input >> r
				132	*
				133	* fractional part of f < 0.5:
				134	*
				135	* round f down to nearest integer
				136	* result = ((input + 1) * f) >> r
				137	*
				138	* fractional part of f > 0.5:
				139	*
				140	* round f up to nearest integer
				141	* result = (input * f) >> r
				142	*
				143	* This is the original algorithm that gives truncated results. But we
				144	* want properly rounded results, so we replace "input" with
				145	* "input + divisor/2".
				146	*
				147	* In order to allow SIMD implementations we also tweak the values to
				148	* allow the same calculation to be made at all times:
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	149	*
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	150	* dctbl[0] = f rounded to nearest integer
				151	* dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
				152	* dctbl[2] = 1 << ((word size) * 2 - r)
				153	* dctbl[3] = r - (word size)
				154	*
				155	* dctbl[2] is for stupid instruction sets where the shift operation
				156	* isn't member wise (e.g. MMX).
				157	*
				158	* The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
				159	* is that most SIMD implementations have a "multiply and store top
				160	* half" operation.
				161	*
				162	* Lastly, we store each of the values in their own table instead
				163	* of in a consecutive manner, yet again in order to allow SIMD
				164	* routines.
				165	*/
DRC	a49c4e5	2011-02-18 20:50:08 +0000	[diff] [blame]	166	LOCAL(int)
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	167	compute_reciprocal (UINT16 divisor, DCTELEM * dtbl)
				168	{
				169	UDCTELEM2 fq, fr;
				170	UDCTELEM c;
				171	int b, r;
				172
DRC	fc5dc4f	2009-10-01 22:26:14 +0000	[diff] [blame]	173	b = flss(divisor) - 1;
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	174	r = sizeof(DCTELEM) * 8 + b;
				175
				176	fq = ((UDCTELEM2)1 << r) / divisor;
				177	fr = ((UDCTELEM2)1 << r) % divisor;
				178
				179	c = divisor / 2; /* for rounding */
				180
				181	if (fr == 0) { /* divisor is power of two */
				182	/* fq will be one bit too large to fit in DCTELEM, so adjust */
				183	fq >>= 1;
				184	r--;
DRC	d65d99a	2012-01-31 03:39:23 +0000	[diff] [blame]	185	} else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	186	c++;
				187	} else { /* fractional part is > 0.5 */
				188	fq++;
				189	}
				190
				191	dtbl[DCTSIZE2 * 0] = (DCTELEM) fq; /* reciprocal */
				192	dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */
				193	dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)82 - r)); /* scale */
				194	dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)8; / shift */
DRC	a49c4e5	2011-02-18 20:50:08 +0000	[diff] [blame]	195
				196	if(r <= 16) return 0;
				197	else return 1;
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	198	}
				199
				200	/*
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	201	* Initialize for a processing pass.
				202	* Verify that all referenced Q-tables are present, and set up
				203	* the divisor table for each one.
				204	* In the current implementation, DCT of all components is done during
				205	* the first pass, even if only some components will be output in the
				206	* first scan. Hence all components should be examined here.
				207	*/
				208
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	209	METHODDEF(void)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	210	start_pass_fdctmgr (j_compress_ptr cinfo)
				211	{
				212	my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
				213	int ci, qtblno, i;
				214	jpeg_component_info *compptr;
				215	JQUANT_TBL * qtbl;
				216	DCTELEM * dtbl;
				217
				218	for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
				219	ci++, compptr++) {
				220	qtblno = compptr->quant_tbl_no;
				221	/* Make sure specified quantization table is present */
				222	if (qtblno < 0 \|\| qtblno >= NUM_QUANT_TBLS \|\|
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	223	cinfo->quant_tbl_ptrs[qtblno] == NULL)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	224	ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
				225	qtbl = cinfo->quant_tbl_ptrs[qtblno];
				226	/* Compute divisors for this quant table */
				227	/* We may do this more than once for same table, but it's not a big deal */
				228	switch (cinfo->dct_method) {
				229	#ifdef DCT_ISLOW_SUPPORTED
				230	case JDCT_ISLOW:
				231	/* For LL&M IDCT method, divisors are equal to raw quantization
				232	* coefficients multiplied by 8 (to counteract scaling).
				233	*/
				234	if (fdct->divisors[qtblno] == NULL) {
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	235	fdct->divisors[qtblno] = (DCTELEM *)
				236	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC	5de454b	2014-05-18 19:04:03 +0000	[diff] [blame]	237	(DCTSIZE2 * 4) * sizeof(DCTELEM));
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	238	}
				239	dtbl = fdct->divisors[qtblno];
				240	for (i = 0; i < DCTSIZE2; i++) {
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	241	if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i])
				242	&& fdct->quantize == jsimd_quantize)
				243	fdct->quantize = quantize;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	244	}
				245	break;
				246	#endif
				247	#ifdef DCT_IFAST_SUPPORTED
				248	case JDCT_IFAST:
				249	{
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	250	/* For AA&N IDCT method, divisors are equal to quantization
				251	* coefficients scaled by scalefactor[row]*scalefactor[col], where
				252	* scalefactor[0] = 1
				253	* scalefactor[k] = cos(kPI/16) sqrt(2) for k=1..7
				254	* We apply a further scale factor of 8.
				255	*/
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	256	#define CONST_BITS 14
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	257	static const INT16 aanscales[DCTSIZE2] = {
				258	/* precomputed values scaled up by 14 bits */
				259	16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
				260	22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
				261	21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
				262	19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
				263	16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
				264	12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
				265	8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
				266	4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
				267	};
				268	SHIFT_TEMPS
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	269
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	270	if (fdct->divisors[qtblno] == NULL) {
				271	fdct->divisors[qtblno] = (DCTELEM *)
				272	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC	5de454b	2014-05-18 19:04:03 +0000	[diff] [blame]	273	(DCTSIZE2 * 4) * sizeof(DCTELEM));
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	274	}
				275	dtbl = fdct->divisors[qtblno];
				276	for (i = 0; i < DCTSIZE2; i++) {
				277	if(!compute_reciprocal(
				278	DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
				279	(INT32) aanscales[i]),
				280	CONST_BITS-3), &dtbl[i])
				281	&& fdct->quantize == jsimd_quantize)
				282	fdct->quantize = quantize;
				283	}
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	284	}
				285	break;
				286	#endif
				287	#ifdef DCT_FLOAT_SUPPORTED
				288	case JDCT_FLOAT:
				289	{
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	290	/* For float AA&N IDCT method, divisors are equal to quantization
				291	* coefficients scaled by scalefactor[row]*scalefactor[col], where
				292	* scalefactor[0] = 1
				293	* scalefactor[k] = cos(kPI/16) sqrt(2) for k=1..7
				294	* We apply a further scale factor of 8.
				295	* What's actually stored is 1/divisor so that the inner loop can
				296	* use a multiplication rather than a division.
				297	*/
				298	FAST_FLOAT * fdtbl;
				299	int row, col;
				300	static const double aanscalefactor[DCTSIZE] = {
				301	1.0, 1.387039845, 1.306562965, 1.175875602,
				302	1.0, 0.785694958, 0.541196100, 0.275899379
				303	};
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	304
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	305	if (fdct->float_divisors[qtblno] == NULL) {
				306	fdct->float_divisors[qtblno] = (FAST_FLOAT *)
				307	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC	5de454b	2014-05-18 19:04:03 +0000	[diff] [blame]	308	DCTSIZE2 * sizeof(FAST_FLOAT));
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	309	}
				310	fdtbl = fdct->float_divisors[qtblno];
				311	i = 0;
				312	for (row = 0; row < DCTSIZE; row++) {
				313	for (col = 0; col < DCTSIZE; col++) {
				314	fdtbl[i] = (FAST_FLOAT)
				315	(1.0 / (((double) qtbl->quantval[i] *
				316	aanscalefactor[row] * aanscalefactor[col] * 8.0)));
				317	i++;
				318	}
				319	}
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	320	}
				321	break;
				322	#endif
				323	default:
				324	ERREXIT(cinfo, JERR_NOT_COMPILED);
				325	break;
				326	}
				327	}
				328	}
				329
				330
				331	/*
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	332	* Load data into workspace, applying unsigned->signed conversion.
				333	*/
				334
				335	METHODDEF(void)
				336	convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)
				337	{
				338	register DCTELEM *workspaceptr;
				339	register JSAMPROW elemptr;
				340	register int elemr;
				341
				342	workspaceptr = workspace;
				343	for (elemr = 0; elemr < DCTSIZE; elemr++) {
				344	elemptr = sample_data[elemr] + start_col;
				345
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	346	#if DCTSIZE == 8 /* unroll the inner loop */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	347	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				348	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				349	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				350	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				351	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				352	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				353	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				354	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				355	#else
				356	{
				357	register int elemc;
				358	for (elemc = DCTSIZE; elemc > 0; elemc--)
				359	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				360	}
				361	#endif
				362	}
				363	}
				364
				365
				366	/*
				367	* Quantize/descale the coefficients, and store into coef_blocks[].
				368	*/
				369
				370	METHODDEF(void)
				371	quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
				372	{
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	373	int i;
				374	DCTELEM temp;
				375	UDCTELEM recip, corr, shift;
				376	UDCTELEM2 product;
				377	JCOEFPTR output_ptr = coef_block;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	378
				379	for (i = 0; i < DCTSIZE2; i++) {
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	380	temp = workspace[i];
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	381	recip = divisors[i + DCTSIZE2 * 0];
				382	corr = divisors[i + DCTSIZE2 * 1];
				383	shift = divisors[i + DCTSIZE2 * 3];
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	384
				385	if (temp < 0) {
				386	temp = -temp;
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	387	product = (UDCTELEM2)(temp + corr) * recip;
				388	product >>= shift + sizeof(DCTELEM)*8;
				389	temp = product;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	390	temp = -temp;
				391	} else {
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	392	product = (UDCTELEM2)(temp + corr) * recip;
				393	product >>= shift + sizeof(DCTELEM)*8;
				394	temp = product;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	395	}
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	396
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	397	output_ptr[i] = (JCOEF) temp;
				398	}
				399	}
				400
				401
				402	/*
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	403	* Perform forward DCT on one or more blocks of a component.
				404	*
				405	* The input samples are taken from the sample_data[] array starting at
				406	* position start_row/start_col, and moving to the right for any additional
Thomas G. Lane	bc79e06	1995-08-02 00:00:00 +0000	[diff] [blame]	407	* blocks. The quantized coefficients are returned in coef_blocks[].
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	408	*/
				409
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	410	METHODDEF(void)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	411	forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	412	JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
				413	JDIMENSION start_row, JDIMENSION start_col,
				414	JDIMENSION num_blocks)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	415	/* This version is used for integer DCT implementations. */
				416	{
				417	/* This routine is heavily used, so it's worth coding it tightly. */
				418	my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	419	DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	420	DCTELEM * workspace;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	421	JDIMENSION bi;
				422
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	423	/* Make sure the compiler doesn't look up these every pass */
				424	forward_DCT_method_ptr do_dct = fdct->dct;
				425	convsamp_method_ptr do_convsamp = fdct->convsamp;
				426	quantize_method_ptr do_quantize = fdct->quantize;
Pierre Ossman	dc5db14	2009-03-13 12:17:26 +0000	[diff] [blame]	427	workspace = fdct->workspace;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	428
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	429	sample_data += start_row; /* fold in the vertical offset once */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	430
				431	for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
				432	/* Load data into workspace, applying unsigned->signed conversion */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	433	(*do_convsamp) (sample_data, start_col, workspace);
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	434
				435	/* Perform the DCT */
				436	(*do_dct) (workspace);
				437
				438	/* Quantize/descale the coefficients, and store into coef_blocks[] */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	439	(*do_quantize) (coef_blocks[bi], divisors, workspace);
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	440	}
				441	}
				442
				443
				444	#ifdef DCT_FLOAT_SUPPORTED
				445
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	446
				447	METHODDEF(void)
				448	convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)
				449	{
				450	register FAST_FLOAT *workspaceptr;
				451	register JSAMPROW elemptr;
				452	register int elemr;
				453
				454	workspaceptr = workspace;
				455	for (elemr = 0; elemr < DCTSIZE; elemr++) {
				456	elemptr = sample_data[elemr] + start_col;
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	457	#if DCTSIZE == 8 /* unroll the inner loop */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	458	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				459	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				460	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				461	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				462	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				463	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				464	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				465	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				466	#else
				467	{
				468	register int elemc;
				469	for (elemc = DCTSIZE; elemc > 0; elemc--)
				470	*workspaceptr++ = (FAST_FLOAT)
				471	(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
				472	}
				473	#endif
				474	}
				475	}
				476
				477
				478	METHODDEF(void)
				479	quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)
				480	{
				481	register FAST_FLOAT temp;
				482	register int i;
				483	register JCOEFPTR output_ptr = coef_block;
				484
				485	for (i = 0; i < DCTSIZE2; i++) {
				486	/* Apply the quantization and scaling factor */
				487	temp = workspace[i] * divisors[i];
				488
				489	/* Round to nearest integer.
				490	* Since C does not specify the direction of rounding for negative
				491	* quotients, we have to force the dividend positive for portability.
				492	* The maximum coefficient size is +-16K (for 12-bit data), so this
				493	* code should work for either 16-bit or 32-bit ints.
				494	*/
				495	output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
				496	}
				497	}
				498
				499
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	500	METHODDEF(void)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	501	forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	502	JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
				503	JDIMENSION start_row, JDIMENSION start_col,
				504	JDIMENSION num_blocks)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	505	/* This version is used for floating-point DCT implementations. */
				506	{
				507	/* This routine is heavily used, so it's worth coding it tightly. */
				508	my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	509	FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	510	FAST_FLOAT * workspace;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	511	JDIMENSION bi;
				512
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	513
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	514	/* Make sure the compiler doesn't look up these every pass */
				515	float_DCT_method_ptr do_dct = fdct->float_dct;
				516	float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
				517	float_quantize_method_ptr do_quantize = fdct->float_quantize;
Pierre Ossman	dc5db14	2009-03-13 12:17:26 +0000	[diff] [blame]	518	workspace = fdct->float_workspace;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	519
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	520	sample_data += start_row; /* fold in the vertical offset once */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	521
				522	for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
				523	/* Load data into workspace, applying unsigned->signed conversion */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	524	(*do_convsamp) (sample_data, start_col, workspace);
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	525
				526	/* Perform the DCT */
				527	(*do_dct) (workspace);
				528
				529	/* Quantize/descale the coefficients, and store into coef_blocks[] */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	530	(*do_quantize) (coef_blocks[bi], divisors, workspace);
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	531	}
				532	}
				533
				534	#endif /* DCT_FLOAT_SUPPORTED */
				535
				536
				537	/*
				538	* Initialize FDCT manager.
				539	*/
				540
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	541	GLOBAL(void)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	542	jinit_forward_dct (j_compress_ptr cinfo)
				543	{
				544	my_fdct_ptr fdct;
				545	int i;
				546
				547	fdct = (my_fdct_ptr)
				548	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC	5de454b	2014-05-18 19:04:03 +0000	[diff] [blame]	549	sizeof(my_fdct_controller));
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	550	cinfo->fdct = (struct jpeg_forward_dct *) fdct;
				551	fdct->pub.start_pass = start_pass_fdctmgr;
				552
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	553	/* First determine the DCT... */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	554	switch (cinfo->dct_method) {
				555	#ifdef DCT_ISLOW_SUPPORTED
				556	case JDCT_ISLOW:
				557	fdct->pub.forward_DCT = forward_DCT;
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	558	if (jsimd_can_fdct_islow())
				559	fdct->dct = jsimd_fdct_islow;
				560	else
				561	fdct->dct = jpeg_fdct_islow;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	562	break;
				563	#endif
				564	#ifdef DCT_IFAST_SUPPORTED
				565	case JDCT_IFAST:
				566	fdct->pub.forward_DCT = forward_DCT;
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	567	if (jsimd_can_fdct_ifast())
				568	fdct->dct = jsimd_fdct_ifast;
				569	else
				570	fdct->dct = jpeg_fdct_ifast;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	571	break;
				572	#endif
				573	#ifdef DCT_FLOAT_SUPPORTED
				574	case JDCT_FLOAT:
				575	fdct->pub.forward_DCT = forward_DCT_float;
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	576	if (jsimd_can_fdct_float())
				577	fdct->float_dct = jsimd_fdct_float;
				578	else
				579	fdct->float_dct = jpeg_fdct_float;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	580	break;
				581	#endif
				582	default:
				583	ERREXIT(cinfo, JERR_NOT_COMPILED);
				584	break;
				585	}
				586
				587	/* ...then the supporting stages. */
				588	switch (cinfo->dct_method) {
				589	#ifdef DCT_ISLOW_SUPPORTED
				590	case JDCT_ISLOW:
				591	#endif
				592	#ifdef DCT_IFAST_SUPPORTED
				593	case JDCT_IFAST:
				594	#endif
				595	#if defined(DCT_ISLOW_SUPPORTED) \|\| defined(DCT_IFAST_SUPPORTED)
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	596	if (jsimd_can_convsamp())
				597	fdct->convsamp = jsimd_convsamp;
				598	else
				599	fdct->convsamp = convsamp;
				600	if (jsimd_can_quantize())
				601	fdct->quantize = jsimd_quantize;
				602	else
				603	fdct->quantize = quantize;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	604	break;
				605	#endif
				606	#ifdef DCT_FLOAT_SUPPORTED
				607	case JDCT_FLOAT:
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	608	if (jsimd_can_convsamp_float())
				609	fdct->float_convsamp = jsimd_convsamp_float;
				610	else
				611	fdct->float_convsamp = convsamp_float;
				612	if (jsimd_can_quantize_float())
				613	fdct->float_quantize = jsimd_quantize_float;
				614	else
				615	fdct->float_quantize = quantize_float;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	616	break;
				617	#endif
				618	default:
				619	ERREXIT(cinfo, JERR_NOT_COMPILED);
				620	break;
				621	}
				622
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	623	/* Allocate workspace memory */
				624	#ifdef DCT_FLOAT_SUPPORTED
				625	if (cinfo->dct_method == JDCT_FLOAT)
				626	fdct->float_workspace = (FAST_FLOAT *)
				627	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC	5de454b	2014-05-18 19:04:03 +0000	[diff] [blame]	628	sizeof(FAST_FLOAT) * DCTSIZE2);
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	629	else
				630	#endif
				631	fdct->workspace = (DCTELEM *)
				632	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC	5de454b	2014-05-18 19:04:03 +0000	[diff] [blame]	633	sizeof(DCTELEM) * DCTSIZE2);
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	634
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	635	/* Mark divisor tables unallocated */
				636	for (i = 0; i < NUM_QUANT_TBLS; i++) {
				637	fdct->divisors[i] = NULL;
				638	#ifdef DCT_FLOAT_SUPPORTED
				639	fdct->float_divisors[i] = NULL;
				640	#endif
				641	}
				642	}