Blame - jcdctmgr.c - platform/external/libjpeg-turbo

blob: 711f9dab62906a520f8eb3ce1334f0264baa7901 [file] [log] [blame]

Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	1	/*
				2	* jcdctmgr.c
				3	*
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	4	* Copyright (C) 1994-1996, Thomas G. Lane.
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	5	* Copyright (C) 1999-2006, MIYASAKA Masaru.
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	6	* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
DRC	a49c4e5	2011-02-18 20:50:08 +0000	[diff] [blame^]	7	* Copyright (C) 2011 D. R. Commander
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	8	* This file is part of the Independent JPEG Group's software.
				9	* For conditions of distribution and use, see the accompanying README file.
				10	*
				11	* This file contains the forward-DCT management logic.
				12	* This code selects a particular DCT implementation to be used,
				13	* and it performs related housekeeping chores including coefficient
				14	* quantization.
				15	*/
				16
				17	#define JPEG_INTERNALS
				18	#include "jinclude.h"
				19	#include "jpeglib.h"
				20	#include "jdct.h" /* Private declarations for DCT subsystem */
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	21	#include "jsimddct.h"
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	22
				23
				24	/* Private subobject for this module */
				25
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	26	typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
				27	typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
				28
				29	typedef JMETHOD(void, convsamp_method_ptr,
				30	(JSAMPARRAY sample_data, JDIMENSION start_col,
				31	DCTELEM * workspace));
				32	typedef JMETHOD(void, float_convsamp_method_ptr,
				33	(JSAMPARRAY sample_data, JDIMENSION start_col,
				34	FAST_FLOAT *workspace));
				35
				36	typedef JMETHOD(void, quantize_method_ptr,
				37	(JCOEFPTR coef_block, DCTELEM * divisors,
				38	DCTELEM * workspace));
				39	typedef JMETHOD(void, float_quantize_method_ptr,
				40	(JCOEFPTR coef_block, FAST_FLOAT * divisors,
				41	FAST_FLOAT * workspace));
				42
DRC	a49c4e5	2011-02-18 20:50:08 +0000	[diff] [blame^]	43	METHODDEF(void) quantize (JCOEFPTR, DCTELEM , DCTELEM );
				44
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	45	typedef struct {
				46	struct jpeg_forward_dct pub; /* public fields */
				47
				48	/* Pointer to the DCT routine actually in use */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	49	forward_DCT_method_ptr dct;
				50	convsamp_method_ptr convsamp;
				51	quantize_method_ptr quantize;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	52
				53	/* The actual post-DCT divisors --- not identical to the quant table
				54	* entries, because of scaling (especially for an unnormalized DCT).
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	55	* Each table is given in normal array order.
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	56	*/
				57	DCTELEM * divisors[NUM_QUANT_TBLS];
				58
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	59	/* work area for FDCT subroutine */
				60	DCTELEM * workspace;
				61
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	62	#ifdef DCT_FLOAT_SUPPORTED
				63	/* Same as above for the floating-point case. */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	64	float_DCT_method_ptr float_dct;
				65	float_convsamp_method_ptr float_convsamp;
				66	float_quantize_method_ptr float_quantize;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	67	FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	68	FAST_FLOAT * float_workspace;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	69	#endif
				70	} my_fdct_controller;
				71
				72	typedef my_fdct_controller * my_fdct_ptr;
				73
				74
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	75	/*
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	76	* Find the highest bit in an integer through binary search.
				77	*/
				78	LOCAL(int)
DRC	fc5dc4f	2009-10-01 22:26:14 +0000	[diff] [blame]	79	flss (UINT16 val)
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	80	{
				81	int bit;
				82
				83	bit = 16;
				84
				85	if (!val)
				86	return 0;
				87
				88	if (!(val & 0xff00)) {
				89	bit -= 8;
				90	val <<= 8;
				91	}
				92	if (!(val & 0xf000)) {
				93	bit -= 4;
				94	val <<= 4;
				95	}
				96	if (!(val & 0xc000)) {
				97	bit -= 2;
				98	val <<= 2;
				99	}
				100	if (!(val & 0x8000)) {
				101	bit -= 1;
				102	val <<= 1;
				103	}
				104
				105	return bit;
				106	}
				107
				108	/*
				109	* Compute values to do a division using reciprocal.
				110	*
				111	* This implementation is based on an algorithm described in
				112	* "How to optimize for the Pentium family of microprocessors"
				113	* (http://www.agner.org/assem/).
				114	* More information about the basic algorithm can be found in
				115	* the paper "Integer Division Using Reciprocals" by Robert Alverson.
				116	*
				117	* The basic idea is to replace x/d by x * d^-1. In order to store
				118	* d^-1 with enough precision we shift it left a few places. It turns
				119	* out that this algoright gives just enough precision, and also fits
				120	* into DCTELEM:
				121	*
				122	* b = (the number of significant bits in divisor) - 1
				123	* r = (word size) + b
				124	* f = 2^r / divisor
				125	*
				126	* f will not be an integer for most cases, so we need to compensate
				127	* for the rounding error introduced:
				128	*
				129	* no fractional part:
				130	*
				131	* result = input >> r
				132	*
				133	* fractional part of f < 0.5:
				134	*
				135	* round f down to nearest integer
				136	* result = ((input + 1) * f) >> r
				137	*
				138	* fractional part of f > 0.5:
				139	*
				140	* round f up to nearest integer
				141	* result = (input * f) >> r
				142	*
				143	* This is the original algorithm that gives truncated results. But we
				144	* want properly rounded results, so we replace "input" with
				145	* "input + divisor/2".
				146	*
				147	* In order to allow SIMD implementations we also tweak the values to
				148	* allow the same calculation to be made at all times:
				149	*
				150	* dctbl[0] = f rounded to nearest integer
				151	* dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
				152	* dctbl[2] = 1 << ((word size) * 2 - r)
				153	* dctbl[3] = r - (word size)
				154	*
				155	* dctbl[2] is for stupid instruction sets where the shift operation
				156	* isn't member wise (e.g. MMX).
				157	*
				158	* The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
				159	* is that most SIMD implementations have a "multiply and store top
				160	* half" operation.
				161	*
				162	* Lastly, we store each of the values in their own table instead
				163	* of in a consecutive manner, yet again in order to allow SIMD
				164	* routines.
				165	*/
DRC	a49c4e5	2011-02-18 20:50:08 +0000	[diff] [blame^]	166	LOCAL(int)
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	167	compute_reciprocal (UINT16 divisor, DCTELEM * dtbl)
				168	{
				169	UDCTELEM2 fq, fr;
				170	UDCTELEM c;
				171	int b, r;
				172
DRC	fc5dc4f	2009-10-01 22:26:14 +0000	[diff] [blame]	173	b = flss(divisor) - 1;
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	174	r = sizeof(DCTELEM) * 8 + b;
				175
				176	fq = ((UDCTELEM2)1 << r) / divisor;
				177	fr = ((UDCTELEM2)1 << r) % divisor;
				178
				179	c = divisor / 2; /* for rounding */
				180
				181	if (fr == 0) { /* divisor is power of two */
				182	/* fq will be one bit too large to fit in DCTELEM, so adjust */
				183	fq >>= 1;
				184	r--;
				185	} else if (fr <= (divisor / 2)) { /* fractional part is < 0.5 */
				186	c++;
				187	} else { /* fractional part is > 0.5 */
				188	fq++;
				189	}
				190
				191	dtbl[DCTSIZE2 * 0] = (DCTELEM) fq; /* reciprocal */
				192	dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */
				193	dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)82 - r)); /* scale */
				194	dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)8; / shift */
DRC	a49c4e5	2011-02-18 20:50:08 +0000	[diff] [blame^]	195
				196	if(r <= 16) return 0;
				197	else return 1;
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	198	}
				199
				200	/*
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	201	* Initialize for a processing pass.
				202	* Verify that all referenced Q-tables are present, and set up
				203	* the divisor table for each one.
				204	* In the current implementation, DCT of all components is done during
				205	* the first pass, even if only some components will be output in the
				206	* first scan. Hence all components should be examined here.
				207	*/
				208
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	209	METHODDEF(void)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	210	start_pass_fdctmgr (j_compress_ptr cinfo)
				211	{
				212	my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
				213	int ci, qtblno, i;
				214	jpeg_component_info *compptr;
				215	JQUANT_TBL * qtbl;
				216	DCTELEM * dtbl;
				217
				218	for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
				219	ci++, compptr++) {
				220	qtblno = compptr->quant_tbl_no;
				221	/* Make sure specified quantization table is present */
				222	if (qtblno < 0 \|\| qtblno >= NUM_QUANT_TBLS \|\|
				223	cinfo->quant_tbl_ptrs[qtblno] == NULL)
				224	ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
				225	qtbl = cinfo->quant_tbl_ptrs[qtblno];
				226	/* Compute divisors for this quant table */
				227	/* We may do this more than once for same table, but it's not a big deal */
				228	switch (cinfo->dct_method) {
				229	#ifdef DCT_ISLOW_SUPPORTED
				230	case JDCT_ISLOW:
				231	/* For LL&M IDCT method, divisors are equal to raw quantization
				232	* coefficients multiplied by 8 (to counteract scaling).
				233	*/
				234	if (fdct->divisors[qtblno] == NULL) {
				235	fdct->divisors[qtblno] = (DCTELEM *)
				236	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	237	(DCTSIZE2 * 4) * SIZEOF(DCTELEM));
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	238	}
				239	dtbl = fdct->divisors[qtblno];
				240	for (i = 0; i < DCTSIZE2; i++) {
DRC	a49c4e5	2011-02-18 20:50:08 +0000	[diff] [blame^]	241	if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i])
				242	&& fdct->quantize == jsimd_quantize)
				243	fdct->quantize = quantize;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	244	}
				245	break;
				246	#endif
				247	#ifdef DCT_IFAST_SUPPORTED
				248	case JDCT_IFAST:
				249	{
				250	/* For AA&N IDCT method, divisors are equal to quantization
				251	* coefficients scaled by scalefactor[row]*scalefactor[col], where
				252	* scalefactor[0] = 1
				253	* scalefactor[k] = cos(kPI/16) sqrt(2) for k=1..7
				254	* We apply a further scale factor of 8.
				255	*/
				256	#define CONST_BITS 14
				257	static const INT16 aanscales[DCTSIZE2] = {
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	258	/* precomputed values scaled up by 14 bits */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	259	16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
				260	22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
				261	21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
				262	19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
				263	16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
				264	12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
				265	8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
				266	4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
				267	};
				268	SHIFT_TEMPS
				269
				270	if (fdct->divisors[qtblno] == NULL) {
				271	fdct->divisors[qtblno] = (DCTELEM *)
				272	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	273	(DCTSIZE2 * 4) * SIZEOF(DCTELEM));
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	274	}
				275	dtbl = fdct->divisors[qtblno];
				276	for (i = 0; i < DCTSIZE2; i++) {
DRC	a49c4e5	2011-02-18 20:50:08 +0000	[diff] [blame^]	277	if(!compute_reciprocal(
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	278	DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
Thomas G. Lane	bc79e06	1995-08-02 00:00:00 +0000	[diff] [blame]	279	(INT32) aanscales[i]),
DRC	a49c4e5	2011-02-18 20:50:08 +0000	[diff] [blame^]	280	CONST_BITS-3), &dtbl[i])
				281	&& fdct->quantize == jsimd_quantize)
				282	fdct->quantize = quantize;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	283	}
				284	}
				285	break;
				286	#endif
				287	#ifdef DCT_FLOAT_SUPPORTED
				288	case JDCT_FLOAT:
				289	{
				290	/* For float AA&N IDCT method, divisors are equal to quantization
				291	* coefficients scaled by scalefactor[row]*scalefactor[col], where
				292	* scalefactor[0] = 1
				293	* scalefactor[k] = cos(kPI/16) sqrt(2) for k=1..7
				294	* We apply a further scale factor of 8.
				295	* What's actually stored is 1/divisor so that the inner loop can
				296	* use a multiplication rather than a division.
				297	*/
				298	FAST_FLOAT * fdtbl;
				299	int row, col;
				300	static const double aanscalefactor[DCTSIZE] = {
				301	1.0, 1.387039845, 1.306562965, 1.175875602,
				302	1.0, 0.785694958, 0.541196100, 0.275899379
				303	};
				304
				305	if (fdct->float_divisors[qtblno] == NULL) {
				306	fdct->float_divisors[qtblno] = (FAST_FLOAT *)
				307	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
				308	DCTSIZE2 * SIZEOF(FAST_FLOAT));
				309	}
				310	fdtbl = fdct->float_divisors[qtblno];
Thomas G. Lane	bc79e06	1995-08-02 00:00:00 +0000	[diff] [blame]	311	i = 0;
				312	for (row = 0; row < DCTSIZE; row++) {
				313	for (col = 0; col < DCTSIZE; col++) {
				314	fdtbl[i] = (FAST_FLOAT)
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	315	(1.0 / (((double) qtbl->quantval[i] *
Thomas G. Lane	bc79e06	1995-08-02 00:00:00 +0000	[diff] [blame]	316	aanscalefactor[row] * aanscalefactor[col] * 8.0)));
				317	i++;
				318	}
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	319	}
				320	}
				321	break;
				322	#endif
				323	default:
				324	ERREXIT(cinfo, JERR_NOT_COMPILED);
				325	break;
				326	}
				327	}
				328	}
				329
				330
				331	/*
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	332	* Load data into workspace, applying unsigned->signed conversion.
				333	*/
				334
				335	METHODDEF(void)
				336	convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)
				337	{
				338	register DCTELEM *workspaceptr;
				339	register JSAMPROW elemptr;
				340	register int elemr;
				341
				342	workspaceptr = workspace;
				343	for (elemr = 0; elemr < DCTSIZE; elemr++) {
				344	elemptr = sample_data[elemr] + start_col;
				345
				346	#if DCTSIZE == 8 /* unroll the inner loop */
				347	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				348	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				349	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				350	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				351	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				352	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				353	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				354	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				355	#else
				356	{
				357	register int elemc;
				358	for (elemc = DCTSIZE; elemc > 0; elemc--)
				359	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				360	}
				361	#endif
				362	}
				363	}
				364
				365
				366	/*
				367	* Quantize/descale the coefficients, and store into coef_blocks[].
				368	*/
				369
				370	METHODDEF(void)
				371	quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
				372	{
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	373	int i;
				374	DCTELEM temp;
				375	UDCTELEM recip, corr, shift;
				376	UDCTELEM2 product;
				377	JCOEFPTR output_ptr = coef_block;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	378
				379	for (i = 0; i < DCTSIZE2; i++) {
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	380	temp = workspace[i];
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	381	recip = divisors[i + DCTSIZE2 * 0];
				382	corr = divisors[i + DCTSIZE2 * 1];
				383	shift = divisors[i + DCTSIZE2 * 3];
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	384
				385	if (temp < 0) {
				386	temp = -temp;
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	387	product = (UDCTELEM2)(temp + corr) * recip;
				388	product >>= shift + sizeof(DCTELEM)*8;
				389	temp = product;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	390	temp = -temp;
				391	} else {
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	392	product = (UDCTELEM2)(temp + corr) * recip;
				393	product >>= shift + sizeof(DCTELEM)*8;
				394	temp = product;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	395	}
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	396
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	397	output_ptr[i] = (JCOEF) temp;
				398	}
				399	}
				400
				401
				402	/*
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	403	* Perform forward DCT on one or more blocks of a component.
				404	*
				405	* The input samples are taken from the sample_data[] array starting at
				406	* position start_row/start_col, and moving to the right for any additional
Thomas G. Lane	bc79e06	1995-08-02 00:00:00 +0000	[diff] [blame]	407	* blocks. The quantized coefficients are returned in coef_blocks[].
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	408	*/
				409
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	410	METHODDEF(void)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	411	forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
				412	JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
				413	JDIMENSION start_row, JDIMENSION start_col,
				414	JDIMENSION num_blocks)
				415	/* This version is used for integer DCT implementations. */
				416	{
				417	/* This routine is heavily used, so it's worth coding it tightly. */
				418	my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	419	DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	420	DCTELEM * workspace;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	421	JDIMENSION bi;
				422
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	423	/* Make sure the compiler doesn't look up these every pass */
				424	forward_DCT_method_ptr do_dct = fdct->dct;
				425	convsamp_method_ptr do_convsamp = fdct->convsamp;
				426	quantize_method_ptr do_quantize = fdct->quantize;
Pierre Ossman	dc5db14	2009-03-13 12:17:26 +0000	[diff] [blame]	427	workspace = fdct->workspace;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	428
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	429	sample_data += start_row; /* fold in the vertical offset once */
				430
				431	for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
				432	/* Load data into workspace, applying unsigned->signed conversion */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	433	(*do_convsamp) (sample_data, start_col, workspace);
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	434
				435	/* Perform the DCT */
				436	(*do_dct) (workspace);
				437
				438	/* Quantize/descale the coefficients, and store into coef_blocks[] */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	439	(*do_quantize) (coef_blocks[bi], divisors, workspace);
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	440	}
				441	}
				442
				443
				444	#ifdef DCT_FLOAT_SUPPORTED
				445
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	446
				447	METHODDEF(void)
				448	convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)
				449	{
				450	register FAST_FLOAT *workspaceptr;
				451	register JSAMPROW elemptr;
				452	register int elemr;
				453
				454	workspaceptr = workspace;
				455	for (elemr = 0; elemr < DCTSIZE; elemr++) {
				456	elemptr = sample_data[elemr] + start_col;
				457	#if DCTSIZE == 8 /* unroll the inner loop */
				458	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				459	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				460	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				461	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				462	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				463	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				464	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				465	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				466	#else
				467	{
				468	register int elemc;
				469	for (elemc = DCTSIZE; elemc > 0; elemc--)
				470	*workspaceptr++ = (FAST_FLOAT)
				471	(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
				472	}
				473	#endif
				474	}
				475	}
				476
				477
				478	METHODDEF(void)
				479	quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)
				480	{
				481	register FAST_FLOAT temp;
				482	register int i;
				483	register JCOEFPTR output_ptr = coef_block;
				484
				485	for (i = 0; i < DCTSIZE2; i++) {
				486	/* Apply the quantization and scaling factor */
				487	temp = workspace[i] * divisors[i];
				488
				489	/* Round to nearest integer.
				490	* Since C does not specify the direction of rounding for negative
				491	* quotients, we have to force the dividend positive for portability.
				492	* The maximum coefficient size is +-16K (for 12-bit data), so this
				493	* code should work for either 16-bit or 32-bit ints.
				494	*/
				495	output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
				496	}
				497	}
				498
				499
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	500	METHODDEF(void)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	501	forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
				502	JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
				503	JDIMENSION start_row, JDIMENSION start_col,
				504	JDIMENSION num_blocks)
				505	/* This version is used for floating-point DCT implementations. */
				506	{
				507	/* This routine is heavily used, so it's worth coding it tightly. */
				508	my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	509	FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	510	FAST_FLOAT * workspace;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	511	JDIMENSION bi;
				512
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	513
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	514	/* Make sure the compiler doesn't look up these every pass */
				515	float_DCT_method_ptr do_dct = fdct->float_dct;
				516	float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
				517	float_quantize_method_ptr do_quantize = fdct->float_quantize;
Pierre Ossman	dc5db14	2009-03-13 12:17:26 +0000	[diff] [blame]	518	workspace = fdct->float_workspace;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	519
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	520	sample_data += start_row; /* fold in the vertical offset once */
				521
				522	for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
				523	/* Load data into workspace, applying unsigned->signed conversion */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	524	(*do_convsamp) (sample_data, start_col, workspace);
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	525
				526	/* Perform the DCT */
				527	(*do_dct) (workspace);
				528
				529	/* Quantize/descale the coefficients, and store into coef_blocks[] */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	530	(*do_quantize) (coef_blocks[bi], divisors, workspace);
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	531	}
				532	}
				533
				534	#endif /* DCT_FLOAT_SUPPORTED */
				535
				536
				537	/*
				538	* Initialize FDCT manager.
				539	*/
				540
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	541	GLOBAL(void)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	542	jinit_forward_dct (j_compress_ptr cinfo)
				543	{
				544	my_fdct_ptr fdct;
				545	int i;
				546
				547	fdct = (my_fdct_ptr)
				548	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
				549	SIZEOF(my_fdct_controller));
				550	cinfo->fdct = (struct jpeg_forward_dct *) fdct;
				551	fdct->pub.start_pass = start_pass_fdctmgr;
				552
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	553	/* First determine the DCT... */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	554	switch (cinfo->dct_method) {
				555	#ifdef DCT_ISLOW_SUPPORTED
				556	case JDCT_ISLOW:
				557	fdct->pub.forward_DCT = forward_DCT;
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	558	if (jsimd_can_fdct_islow())
				559	fdct->dct = jsimd_fdct_islow;
				560	else
				561	fdct->dct = jpeg_fdct_islow;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	562	break;
				563	#endif
				564	#ifdef DCT_IFAST_SUPPORTED
				565	case JDCT_IFAST:
				566	fdct->pub.forward_DCT = forward_DCT;
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	567	if (jsimd_can_fdct_ifast())
				568	fdct->dct = jsimd_fdct_ifast;
				569	else
				570	fdct->dct = jpeg_fdct_ifast;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	571	break;
				572	#endif
				573	#ifdef DCT_FLOAT_SUPPORTED
				574	case JDCT_FLOAT:
				575	fdct->pub.forward_DCT = forward_DCT_float;
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	576	if (jsimd_can_fdct_float())
				577	fdct->float_dct = jsimd_fdct_float;
				578	else
				579	fdct->float_dct = jpeg_fdct_float;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	580	break;
				581	#endif
				582	default:
				583	ERREXIT(cinfo, JERR_NOT_COMPILED);
				584	break;
				585	}
				586
				587	/* ...then the supporting stages. */
				588	switch (cinfo->dct_method) {
				589	#ifdef DCT_ISLOW_SUPPORTED
				590	case JDCT_ISLOW:
				591	#endif
				592	#ifdef DCT_IFAST_SUPPORTED
				593	case JDCT_IFAST:
				594	#endif
				595	#if defined(DCT_ISLOW_SUPPORTED) \|\| defined(DCT_IFAST_SUPPORTED)
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	596	if (jsimd_can_convsamp())
				597	fdct->convsamp = jsimd_convsamp;
				598	else
				599	fdct->convsamp = convsamp;
				600	if (jsimd_can_quantize())
				601	fdct->quantize = jsimd_quantize;
				602	else
				603	fdct->quantize = quantize;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	604	break;
				605	#endif
				606	#ifdef DCT_FLOAT_SUPPORTED
				607	case JDCT_FLOAT:
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	608	if (jsimd_can_convsamp_float())
				609	fdct->float_convsamp = jsimd_convsamp_float;
				610	else
				611	fdct->float_convsamp = convsamp_float;
				612	if (jsimd_can_quantize_float())
				613	fdct->float_quantize = jsimd_quantize_float;
				614	else
				615	fdct->float_quantize = quantize_float;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	616	break;
				617	#endif
				618	default:
				619	ERREXIT(cinfo, JERR_NOT_COMPILED);
				620	break;
				621	}
				622
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	623	/* Allocate workspace memory */
				624	#ifdef DCT_FLOAT_SUPPORTED
				625	if (cinfo->dct_method == JDCT_FLOAT)
				626	fdct->float_workspace = (FAST_FLOAT *)
				627	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
				628	SIZEOF(FAST_FLOAT) * DCTSIZE2);
				629	else
				630	#endif
				631	fdct->workspace = (DCTELEM *)
				632	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
				633	SIZEOF(DCTELEM) * DCTSIZE2);
				634
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	635	/* Mark divisor tables unallocated */
				636	for (i = 0; i < NUM_QUANT_TBLS; i++) {
				637	fdct->divisors[i] = NULL;
				638	#ifdef DCT_FLOAT_SUPPORTED
				639	fdct->float_divisors[i] = NULL;
				640	#endif
				641	}
				642	}