Blame - jcdctmgr.c - platform/external/libjpeg-turbo

blob: 3234a01aa14ca91421f133b2f6ed62881e85aca9 [file] [log] [blame]

Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	1	/*
				2	* jcdctmgr.c
				3	*
DRC	a73e870	2012-12-31 02:52:30 +0000	[diff] [blame]	4	* This file was part of the Independent JPEG Group's software:
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	5	* Copyright (C) 1994-1996, Thomas G. Lane.
DRC	a6ef282	2013-09-28 03:23:49 +0000	[diff] [blame]	6	* libjpeg-turbo Modifications:
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	7	* Copyright (C) 1999-2006, MIYASAKA Masaru.
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	8	* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
DRC	a49c4e5	2011-02-18 20:50:08 +0000	[diff] [blame]	9	* Copyright (C) 2011 D. R. Commander
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	10	* For conditions of distribution and use, see the accompanying README file.
				11	*
				12	* This file contains the forward-DCT management logic.
				13	* This code selects a particular DCT implementation to be used,
				14	* and it performs related housekeeping chores including coefficient
				15	* quantization.
				16	*/
				17
				18	#define JPEG_INTERNALS
				19	#include "jinclude.h"
				20	#include "jpeglib.h"
				21	#include "jdct.h" /* Private declarations for DCT subsystem */
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	22	#include "jsimddct.h"
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	23
				24
				25	/* Private subobject for this module */
				26
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	27	typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
				28	typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
				29
				30	typedef JMETHOD(void, convsamp_method_ptr,
				31	(JSAMPARRAY sample_data, JDIMENSION start_col,
				32	DCTELEM * workspace));
				33	typedef JMETHOD(void, float_convsamp_method_ptr,
				34	(JSAMPARRAY sample_data, JDIMENSION start_col,
				35	FAST_FLOAT *workspace));
				36
				37	typedef JMETHOD(void, quantize_method_ptr,
				38	(JCOEFPTR coef_block, DCTELEM * divisors,
				39	DCTELEM * workspace));
				40	typedef JMETHOD(void, float_quantize_method_ptr,
				41	(JCOEFPTR coef_block, FAST_FLOAT * divisors,
				42	FAST_FLOAT * workspace));
				43
DRC	a49c4e5	2011-02-18 20:50:08 +0000	[diff] [blame]	44	METHODDEF(void) quantize (JCOEFPTR, DCTELEM , DCTELEM );
				45
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	46	typedef struct {
				47	struct jpeg_forward_dct pub; /* public fields */
				48
				49	/* Pointer to the DCT routine actually in use */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	50	forward_DCT_method_ptr dct;
				51	convsamp_method_ptr convsamp;
				52	quantize_method_ptr quantize;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	53
				54	/* The actual post-DCT divisors --- not identical to the quant table
				55	* entries, because of scaling (especially for an unnormalized DCT).
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	56	* Each table is given in normal array order.
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	57	*/
				58	DCTELEM * divisors[NUM_QUANT_TBLS];
				59
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	60	/* work area for FDCT subroutine */
				61	DCTELEM * workspace;
				62
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	63	#ifdef DCT_FLOAT_SUPPORTED
				64	/* Same as above for the floating-point case. */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	65	float_DCT_method_ptr float_dct;
				66	float_convsamp_method_ptr float_convsamp;
				67	float_quantize_method_ptr float_quantize;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	68	FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	69	FAST_FLOAT * float_workspace;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	70	#endif
				71	} my_fdct_controller;
				72
				73	typedef my_fdct_controller * my_fdct_ptr;
				74
				75
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	76	/*
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	77	* Find the highest bit in an integer through binary search.
				78	*/
				79	LOCAL(int)
DRC	fc5dc4f	2009-10-01 22:26:14 +0000	[diff] [blame]	80	flss (UINT16 val)
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	81	{
				82	int bit;
				83
				84	bit = 16;
				85
				86	if (!val)
				87	return 0;
				88
				89	if (!(val & 0xff00)) {
				90	bit -= 8;
				91	val <<= 8;
				92	}
				93	if (!(val & 0xf000)) {
				94	bit -= 4;
				95	val <<= 4;
				96	}
				97	if (!(val & 0xc000)) {
				98	bit -= 2;
				99	val <<= 2;
				100	}
				101	if (!(val & 0x8000)) {
				102	bit -= 1;
				103	val <<= 1;
				104	}
				105
				106	return bit;
				107	}
				108
				109	/*
				110	* Compute values to do a division using reciprocal.
				111	*
				112	* This implementation is based on an algorithm described in
				113	* "How to optimize for the Pentium family of microprocessors"
				114	* (http://www.agner.org/assem/).
				115	* More information about the basic algorithm can be found in
				116	* the paper "Integer Division Using Reciprocals" by Robert Alverson.
				117	*
				118	* The basic idea is to replace x/d by x * d^-1. In order to store
				119	* d^-1 with enough precision we shift it left a few places. It turns
				120	* out that this algoright gives just enough precision, and also fits
				121	* into DCTELEM:
				122	*
				123	* b = (the number of significant bits in divisor) - 1
				124	* r = (word size) + b
				125	* f = 2^r / divisor
				126	*
				127	* f will not be an integer for most cases, so we need to compensate
				128	* for the rounding error introduced:
				129	*
				130	* no fractional part:
				131	*
				132	* result = input >> r
				133	*
				134	* fractional part of f < 0.5:
				135	*
				136	* round f down to nearest integer
				137	* result = ((input + 1) * f) >> r
				138	*
				139	* fractional part of f > 0.5:
				140	*
				141	* round f up to nearest integer
				142	* result = (input * f) >> r
				143	*
				144	* This is the original algorithm that gives truncated results. But we
				145	* want properly rounded results, so we replace "input" with
				146	* "input + divisor/2".
				147	*
				148	* In order to allow SIMD implementations we also tweak the values to
				149	* allow the same calculation to be made at all times:
				150	*
				151	* dctbl[0] = f rounded to nearest integer
				152	* dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
				153	* dctbl[2] = 1 << ((word size) * 2 - r)
				154	* dctbl[3] = r - (word size)
				155	*
				156	* dctbl[2] is for stupid instruction sets where the shift operation
				157	* isn't member wise (e.g. MMX).
				158	*
				159	* The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
				160	* is that most SIMD implementations have a "multiply and store top
				161	* half" operation.
				162	*
				163	* Lastly, we store each of the values in their own table instead
				164	* of in a consecutive manner, yet again in order to allow SIMD
				165	* routines.
				166	*/
DRC	a49c4e5	2011-02-18 20:50:08 +0000	[diff] [blame]	167	LOCAL(int)
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	168	compute_reciprocal (UINT16 divisor, DCTELEM * dtbl)
				169	{
				170	UDCTELEM2 fq, fr;
				171	UDCTELEM c;
				172	int b, r;
				173
DRC	fc5dc4f	2009-10-01 22:26:14 +0000	[diff] [blame]	174	b = flss(divisor) - 1;
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	175	r = sizeof(DCTELEM) * 8 + b;
				176
				177	fq = ((UDCTELEM2)1 << r) / divisor;
				178	fr = ((UDCTELEM2)1 << r) % divisor;
				179
				180	c = divisor / 2; /* for rounding */
				181
				182	if (fr == 0) { /* divisor is power of two */
				183	/* fq will be one bit too large to fit in DCTELEM, so adjust */
				184	fq >>= 1;
				185	r--;
DRC	d65d99a	2012-01-31 03:39:23 +0000	[diff] [blame]	186	} else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	187	c++;
				188	} else { /* fractional part is > 0.5 */
				189	fq++;
				190	}
				191
				192	dtbl[DCTSIZE2 * 0] = (DCTELEM) fq; /* reciprocal */
				193	dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */
				194	dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)82 - r)); /* scale */
				195	dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)8; / shift */
DRC	a49c4e5	2011-02-18 20:50:08 +0000	[diff] [blame]	196
				197	if(r <= 16) return 0;
				198	else return 1;
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	199	}
				200
				201	/*
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	202	* Initialize for a processing pass.
				203	* Verify that all referenced Q-tables are present, and set up
				204	* the divisor table for each one.
				205	* In the current implementation, DCT of all components is done during
				206	* the first pass, even if only some components will be output in the
				207	* first scan. Hence all components should be examined here.
				208	*/
				209
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	210	METHODDEF(void)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	211	start_pass_fdctmgr (j_compress_ptr cinfo)
				212	{
				213	my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
				214	int ci, qtblno, i;
				215	jpeg_component_info *compptr;
				216	JQUANT_TBL * qtbl;
				217	DCTELEM * dtbl;
				218
				219	for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
				220	ci++, compptr++) {
				221	qtblno = compptr->quant_tbl_no;
				222	/* Make sure specified quantization table is present */
				223	if (qtblno < 0 \|\| qtblno >= NUM_QUANT_TBLS \|\|
				224	cinfo->quant_tbl_ptrs[qtblno] == NULL)
				225	ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
				226	qtbl = cinfo->quant_tbl_ptrs[qtblno];
				227	/* Compute divisors for this quant table */
				228	/* We may do this more than once for same table, but it's not a big deal */
				229	switch (cinfo->dct_method) {
				230	#ifdef DCT_ISLOW_SUPPORTED
				231	case JDCT_ISLOW:
				232	/* For LL&M IDCT method, divisors are equal to raw quantization
				233	* coefficients multiplied by 8 (to counteract scaling).
				234	*/
				235	if (fdct->divisors[qtblno] == NULL) {
				236	fdct->divisors[qtblno] = (DCTELEM *)
				237	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	238	(DCTSIZE2 * 4) * SIZEOF(DCTELEM));
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	239	}
				240	dtbl = fdct->divisors[qtblno];
				241	for (i = 0; i < DCTSIZE2; i++) {
DRC	a49c4e5	2011-02-18 20:50:08 +0000	[diff] [blame]	242	if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i])
				243	&& fdct->quantize == jsimd_quantize)
				244	fdct->quantize = quantize;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	245	}
				246	break;
				247	#endif
				248	#ifdef DCT_IFAST_SUPPORTED
				249	case JDCT_IFAST:
				250	{
				251	/* For AA&N IDCT method, divisors are equal to quantization
				252	* coefficients scaled by scalefactor[row]*scalefactor[col], where
				253	* scalefactor[0] = 1
				254	* scalefactor[k] = cos(kPI/16) sqrt(2) for k=1..7
				255	* We apply a further scale factor of 8.
				256	*/
				257	#define CONST_BITS 14
				258	static const INT16 aanscales[DCTSIZE2] = {
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	259	/* precomputed values scaled up by 14 bits */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	260	16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
				261	22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
				262	21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
				263	19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
				264	16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
				265	12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
				266	8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
				267	4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
				268	};
				269	SHIFT_TEMPS
				270
				271	if (fdct->divisors[qtblno] == NULL) {
				272	fdct->divisors[qtblno] = (DCTELEM *)
				273	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	274	(DCTSIZE2 * 4) * SIZEOF(DCTELEM));
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	275	}
				276	dtbl = fdct->divisors[qtblno];
				277	for (i = 0; i < DCTSIZE2; i++) {
DRC	a49c4e5	2011-02-18 20:50:08 +0000	[diff] [blame]	278	if(!compute_reciprocal(
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	279	DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
Thomas G. Lane	bc79e06	1995-08-02 00:00:00 +0000	[diff] [blame]	280	(INT32) aanscales[i]),
DRC	a49c4e5	2011-02-18 20:50:08 +0000	[diff] [blame]	281	CONST_BITS-3), &dtbl[i])
				282	&& fdct->quantize == jsimd_quantize)
				283	fdct->quantize = quantize;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	284	}
				285	}
				286	break;
				287	#endif
				288	#ifdef DCT_FLOAT_SUPPORTED
				289	case JDCT_FLOAT:
				290	{
				291	/* For float AA&N IDCT method, divisors are equal to quantization
				292	* coefficients scaled by scalefactor[row]*scalefactor[col], where
				293	* scalefactor[0] = 1
				294	* scalefactor[k] = cos(kPI/16) sqrt(2) for k=1..7
				295	* We apply a further scale factor of 8.
				296	* What's actually stored is 1/divisor so that the inner loop can
				297	* use a multiplication rather than a division.
				298	*/
				299	FAST_FLOAT * fdtbl;
				300	int row, col;
				301	static const double aanscalefactor[DCTSIZE] = {
				302	1.0, 1.387039845, 1.306562965, 1.175875602,
				303	1.0, 0.785694958, 0.541196100, 0.275899379
				304	};
				305
				306	if (fdct->float_divisors[qtblno] == NULL) {
				307	fdct->float_divisors[qtblno] = (FAST_FLOAT *)
				308	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
				309	DCTSIZE2 * SIZEOF(FAST_FLOAT));
				310	}
				311	fdtbl = fdct->float_divisors[qtblno];
Thomas G. Lane	bc79e06	1995-08-02 00:00:00 +0000	[diff] [blame]	312	i = 0;
				313	for (row = 0; row < DCTSIZE; row++) {
				314	for (col = 0; col < DCTSIZE; col++) {
				315	fdtbl[i] = (FAST_FLOAT)
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	316	(1.0 / (((double) qtbl->quantval[i] *
Thomas G. Lane	bc79e06	1995-08-02 00:00:00 +0000	[diff] [blame]	317	aanscalefactor[row] * aanscalefactor[col] * 8.0)));
				318	i++;
				319	}
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	320	}
				321	}
				322	break;
				323	#endif
				324	default:
				325	ERREXIT(cinfo, JERR_NOT_COMPILED);
				326	break;
				327	}
				328	}
				329	}
				330
				331
				332	/*
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	333	* Load data into workspace, applying unsigned->signed conversion.
				334	*/
				335
				336	METHODDEF(void)
				337	convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)
				338	{
				339	register DCTELEM *workspaceptr;
				340	register JSAMPROW elemptr;
				341	register int elemr;
				342
				343	workspaceptr = workspace;
				344	for (elemr = 0; elemr < DCTSIZE; elemr++) {
				345	elemptr = sample_data[elemr] + start_col;
				346
				347	#if DCTSIZE == 8 /* unroll the inner loop */
				348	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				349	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				350	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				351	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				352	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				353	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				354	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				355	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				356	#else
				357	{
				358	register int elemc;
				359	for (elemc = DCTSIZE; elemc > 0; elemc--)
				360	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				361	}
				362	#endif
				363	}
				364	}
				365
				366
				367	/*
				368	* Quantize/descale the coefficients, and store into coef_blocks[].
				369	*/
				370
				371	METHODDEF(void)
				372	quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
				373	{
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	374	int i;
				375	DCTELEM temp;
				376	UDCTELEM recip, corr, shift;
				377	UDCTELEM2 product;
				378	JCOEFPTR output_ptr = coef_block;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	379
				380	for (i = 0; i < DCTSIZE2; i++) {
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	381	temp = workspace[i];
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	382	recip = divisors[i + DCTSIZE2 * 0];
				383	corr = divisors[i + DCTSIZE2 * 1];
				384	shift = divisors[i + DCTSIZE2 * 3];
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	385
				386	if (temp < 0) {
				387	temp = -temp;
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	388	product = (UDCTELEM2)(temp + corr) * recip;
				389	product >>= shift + sizeof(DCTELEM)*8;
				390	temp = product;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	391	temp = -temp;
				392	} else {
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	393	product = (UDCTELEM2)(temp + corr) * recip;
				394	product >>= shift + sizeof(DCTELEM)*8;
				395	temp = product;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	396	}
Pierre Ossman	dedc42e	2009-03-09 13:23:04 +0000	[diff] [blame]	397
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	398	output_ptr[i] = (JCOEF) temp;
				399	}
				400	}
				401
				402
				403	/*
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	404	* Perform forward DCT on one or more blocks of a component.
				405	*
				406	* The input samples are taken from the sample_data[] array starting at
				407	* position start_row/start_col, and moving to the right for any additional
Thomas G. Lane	bc79e06	1995-08-02 00:00:00 +0000	[diff] [blame]	408	* blocks. The quantized coefficients are returned in coef_blocks[].
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	409	*/
				410
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	411	METHODDEF(void)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	412	forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
				413	JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
				414	JDIMENSION start_row, JDIMENSION start_col,
				415	JDIMENSION num_blocks)
				416	/* This version is used for integer DCT implementations. */
				417	{
				418	/* This routine is heavily used, so it's worth coding it tightly. */
				419	my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	420	DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	421	DCTELEM * workspace;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	422	JDIMENSION bi;
				423
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	424	/* Make sure the compiler doesn't look up these every pass */
				425	forward_DCT_method_ptr do_dct = fdct->dct;
				426	convsamp_method_ptr do_convsamp = fdct->convsamp;
				427	quantize_method_ptr do_quantize = fdct->quantize;
Pierre Ossman	dc5db14	2009-03-13 12:17:26 +0000	[diff] [blame]	428	workspace = fdct->workspace;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	429
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	430	sample_data += start_row; /* fold in the vertical offset once */
				431
				432	for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
				433	/* Load data into workspace, applying unsigned->signed conversion */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	434	(*do_convsamp) (sample_data, start_col, workspace);
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	435
				436	/* Perform the DCT */
				437	(*do_dct) (workspace);
				438
				439	/* Quantize/descale the coefficients, and store into coef_blocks[] */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	440	(*do_quantize) (coef_blocks[bi], divisors, workspace);
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	441	}
				442	}
				443
				444
				445	#ifdef DCT_FLOAT_SUPPORTED
				446
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	447
				448	METHODDEF(void)
				449	convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)
				450	{
				451	register FAST_FLOAT *workspaceptr;
				452	register JSAMPROW elemptr;
				453	register int elemr;
				454
				455	workspaceptr = workspace;
				456	for (elemr = 0; elemr < DCTSIZE; elemr++) {
				457	elemptr = sample_data[elemr] + start_col;
				458	#if DCTSIZE == 8 /* unroll the inner loop */
				459	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				460	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				461	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				462	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				463	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				464	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				465	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				466	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				467	#else
				468	{
				469	register int elemc;
				470	for (elemc = DCTSIZE; elemc > 0; elemc--)
				471	*workspaceptr++ = (FAST_FLOAT)
				472	(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
				473	}
				474	#endif
				475	}
				476	}
				477
				478
				479	METHODDEF(void)
				480	quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)
				481	{
				482	register FAST_FLOAT temp;
				483	register int i;
				484	register JCOEFPTR output_ptr = coef_block;
				485
				486	for (i = 0; i < DCTSIZE2; i++) {
				487	/* Apply the quantization and scaling factor */
				488	temp = workspace[i] * divisors[i];
				489
				490	/* Round to nearest integer.
				491	* Since C does not specify the direction of rounding for negative
				492	* quotients, we have to force the dividend positive for portability.
				493	* The maximum coefficient size is +-16K (for 12-bit data), so this
				494	* code should work for either 16-bit or 32-bit ints.
				495	*/
				496	output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
				497	}
				498	}
				499
				500
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	501	METHODDEF(void)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	502	forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
				503	JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
				504	JDIMENSION start_row, JDIMENSION start_col,
				505	JDIMENSION num_blocks)
				506	/* This version is used for floating-point DCT implementations. */
				507	{
				508	/* This routine is heavily used, so it's worth coding it tightly. */
				509	my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	510	FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	511	FAST_FLOAT * workspace;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	512	JDIMENSION bi;
				513
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	514
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	515	/* Make sure the compiler doesn't look up these every pass */
				516	float_DCT_method_ptr do_dct = fdct->float_dct;
				517	float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
				518	float_quantize_method_ptr do_quantize = fdct->float_quantize;
Pierre Ossman	dc5db14	2009-03-13 12:17:26 +0000	[diff] [blame]	519	workspace = fdct->float_workspace;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	520
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	521	sample_data += start_row; /* fold in the vertical offset once */
				522
				523	for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
				524	/* Load data into workspace, applying unsigned->signed conversion */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	525	(*do_convsamp) (sample_data, start_col, workspace);
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	526
				527	/* Perform the DCT */
				528	(*do_dct) (workspace);
				529
				530	/* Quantize/descale the coefficients, and store into coef_blocks[] */
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	531	(*do_quantize) (coef_blocks[bi], divisors, workspace);
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	532	}
				533	}
				534
				535	#endif /* DCT_FLOAT_SUPPORTED */
				536
				537
				538	/*
				539	* Initialize FDCT manager.
				540	*/
				541
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	542	GLOBAL(void)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	543	jinit_forward_dct (j_compress_ptr cinfo)
				544	{
				545	my_fdct_ptr fdct;
				546	int i;
				547
				548	fdct = (my_fdct_ptr)
				549	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
				550	SIZEOF(my_fdct_controller));
				551	cinfo->fdct = (struct jpeg_forward_dct *) fdct;
				552	fdct->pub.start_pass = start_pass_fdctmgr;
				553
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	554	/* First determine the DCT... */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	555	switch (cinfo->dct_method) {
				556	#ifdef DCT_ISLOW_SUPPORTED
				557	case JDCT_ISLOW:
				558	fdct->pub.forward_DCT = forward_DCT;
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	559	if (jsimd_can_fdct_islow())
				560	fdct->dct = jsimd_fdct_islow;
				561	else
				562	fdct->dct = jpeg_fdct_islow;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	563	break;
				564	#endif
				565	#ifdef DCT_IFAST_SUPPORTED
				566	case JDCT_IFAST:
				567	fdct->pub.forward_DCT = forward_DCT;
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	568	if (jsimd_can_fdct_ifast())
				569	fdct->dct = jsimd_fdct_ifast;
				570	else
				571	fdct->dct = jpeg_fdct_ifast;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	572	break;
				573	#endif
				574	#ifdef DCT_FLOAT_SUPPORTED
				575	case JDCT_FLOAT:
				576	fdct->pub.forward_DCT = forward_DCT_float;
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	577	if (jsimd_can_fdct_float())
				578	fdct->float_dct = jsimd_fdct_float;
				579	else
				580	fdct->float_dct = jpeg_fdct_float;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	581	break;
				582	#endif
				583	default:
				584	ERREXIT(cinfo, JERR_NOT_COMPILED);
				585	break;
				586	}
				587
				588	/* ...then the supporting stages. */
				589	switch (cinfo->dct_method) {
				590	#ifdef DCT_ISLOW_SUPPORTED
				591	case JDCT_ISLOW:
				592	#endif
				593	#ifdef DCT_IFAST_SUPPORTED
				594	case JDCT_IFAST:
				595	#endif
				596	#if defined(DCT_ISLOW_SUPPORTED) \|\| defined(DCT_IFAST_SUPPORTED)
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	597	if (jsimd_can_convsamp())
				598	fdct->convsamp = jsimd_convsamp;
				599	else
				600	fdct->convsamp = convsamp;
				601	if (jsimd_can_quantize())
				602	fdct->quantize = jsimd_quantize;
				603	else
				604	fdct->quantize = quantize;
Pierre Ossman	49dcbfb	2009-03-09 10:37:20 +0000	[diff] [blame]	605	break;
				606	#endif
				607	#ifdef DCT_FLOAT_SUPPORTED
				608	case JDCT_FLOAT:
Pierre Ossman	59a3938	2009-03-09 13:15:56 +0000	[diff] [blame]	609	if (jsimd_can_convsamp_float())
				610	fdct->float_convsamp = jsimd_convsamp_float;
				611	else
				612	fdct->float_convsamp = convsamp_float;
				613	if (jsimd_can_quantize_float())
				614	fdct->float_quantize = jsimd_quantize_float;
				615	else
				616	fdct->float_quantize = quantize_float;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	617	break;
				618	#endif
				619	default:
				620	ERREXIT(cinfo, JERR_NOT_COMPILED);
				621	break;
				622	}
				623
Pierre Ossman	35c4719	2009-03-09 13:29:37 +0000	[diff] [blame]	624	/* Allocate workspace memory */
				625	#ifdef DCT_FLOAT_SUPPORTED
				626	if (cinfo->dct_method == JDCT_FLOAT)
				627	fdct->float_workspace = (FAST_FLOAT *)
				628	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
				629	SIZEOF(FAST_FLOAT) * DCTSIZE2);
				630	else
				631	#endif
				632	fdct->workspace = (DCTELEM *)
				633	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
				634	SIZEOF(DCTELEM) * DCTSIZE2);
				635
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	636	/* Mark divisor tables unallocated */
				637	for (i = 0; i < NUM_QUANT_TBLS; i++) {
				638	fdct->divisors[i] = NULL;
				639	#ifdef DCT_FLOAT_SUPPORTED
				640	fdct->float_divisors[i] = NULL;
				641	#endif
				642	}
				643	}