Blame - jcdctmgr.c - platform/external/libjpeg-turbo

blob: 588b844155e35fd1ef84f7f4b570c98af6e0cb6a [file] [log] [blame]

Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	1	/*
				2	* jcdctmgr.c
				3	*
Thomas G. Lane	bc79e06	1995-08-02 00:00:00 +0000	[diff] [blame^]	4	* Copyright (C) 1994-1995, Thomas G. Lane.
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	5	* This file is part of the Independent JPEG Group's software.
				6	* For conditions of distribution and use, see the accompanying README file.
				7	*
				8	* This file contains the forward-DCT management logic.
				9	* This code selects a particular DCT implementation to be used,
				10	* and it performs related housekeeping chores including coefficient
				11	* quantization.
				12	*/
				13
				14	#define JPEG_INTERNALS
				15	#include "jinclude.h"
				16	#include "jpeglib.h"
				17	#include "jdct.h" /* Private declarations for DCT subsystem */
				18
				19
				20	/* Private subobject for this module */
				21
				22	typedef struct {
				23	struct jpeg_forward_dct pub; /* public fields */
				24
				25	/* Pointer to the DCT routine actually in use */
				26	forward_DCT_method_ptr do_dct;
				27
				28	/* The actual post-DCT divisors --- not identical to the quant table
				29	* entries, because of scaling (especially for an unnormalized DCT).
Thomas G. Lane	bc79e06	1995-08-02 00:00:00 +0000	[diff] [blame^]	30	* Each table is given in normal array order; note that this must
				31	* be converted from the zigzag order of the quantization tables.
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	32	*/
				33	DCTELEM * divisors[NUM_QUANT_TBLS];
				34
				35	#ifdef DCT_FLOAT_SUPPORTED
				36	/* Same as above for the floating-point case. */
				37	float_DCT_method_ptr do_float_dct;
				38	FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
				39	#endif
				40	} my_fdct_controller;
				41
				42	typedef my_fdct_controller * my_fdct_ptr;
				43
				44
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	45	/*
				46	* Initialize for a processing pass.
				47	* Verify that all referenced Q-tables are present, and set up
				48	* the divisor table for each one.
				49	* In the current implementation, DCT of all components is done during
				50	* the first pass, even if only some components will be output in the
				51	* first scan. Hence all components should be examined here.
				52	*/
				53
				54	METHODDEF void
				55	start_pass_fdctmgr (j_compress_ptr cinfo)
				56	{
				57	my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
				58	int ci, qtblno, i;
				59	jpeg_component_info *compptr;
				60	JQUANT_TBL * qtbl;
				61	DCTELEM * dtbl;
				62
				63	for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
				64	ci++, compptr++) {
				65	qtblno = compptr->quant_tbl_no;
				66	/* Make sure specified quantization table is present */
				67	if (qtblno < 0 \|\| qtblno >= NUM_QUANT_TBLS \|\|
				68	cinfo->quant_tbl_ptrs[qtblno] == NULL)
				69	ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
				70	qtbl = cinfo->quant_tbl_ptrs[qtblno];
				71	/* Compute divisors for this quant table */
				72	/* We may do this more than once for same table, but it's not a big deal */
				73	switch (cinfo->dct_method) {
				74	#ifdef DCT_ISLOW_SUPPORTED
				75	case JDCT_ISLOW:
				76	/* For LL&M IDCT method, divisors are equal to raw quantization
				77	* coefficients multiplied by 8 (to counteract scaling).
				78	*/
				79	if (fdct->divisors[qtblno] == NULL) {
				80	fdct->divisors[qtblno] = (DCTELEM *)
				81	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
				82	DCTSIZE2 * SIZEOF(DCTELEM));
				83	}
				84	dtbl = fdct->divisors[qtblno];
				85	for (i = 0; i < DCTSIZE2; i++) {
Thomas G. Lane	bc79e06	1995-08-02 00:00:00 +0000	[diff] [blame^]	86	dtbl[i] = ((DCTELEM) qtbl->quantval[jpeg_zigzag_order[i]]) << 3;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	87	}
				88	break;
				89	#endif
				90	#ifdef DCT_IFAST_SUPPORTED
				91	case JDCT_IFAST:
				92	{
				93	/* For AA&N IDCT method, divisors are equal to quantization
				94	* coefficients scaled by scalefactor[row]*scalefactor[col], where
				95	* scalefactor[0] = 1
				96	* scalefactor[k] = cos(kPI/16) sqrt(2) for k=1..7
				97	* We apply a further scale factor of 8.
				98	*/
				99	#define CONST_BITS 14
				100	static const INT16 aanscales[DCTSIZE2] = {
				101	/* precomputed values scaled up by 14 bits: in natural order */
				102	16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
				103	22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
				104	21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
				105	19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
				106	16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
				107	12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
				108	8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
				109	4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
				110	};
				111	SHIFT_TEMPS
				112
				113	if (fdct->divisors[qtblno] == NULL) {
				114	fdct->divisors[qtblno] = (DCTELEM *)
				115	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
				116	DCTSIZE2 * SIZEOF(DCTELEM));
				117	}
				118	dtbl = fdct->divisors[qtblno];
				119	for (i = 0; i < DCTSIZE2; i++) {
				120	dtbl[i] = (DCTELEM)
Thomas G. Lane	bc79e06	1995-08-02 00:00:00 +0000	[diff] [blame^]	121	DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[jpeg_zigzag_order[i]],
				122	(INT32) aanscales[i]),
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	123	CONST_BITS-3);
				124	}
				125	}
				126	break;
				127	#endif
				128	#ifdef DCT_FLOAT_SUPPORTED
				129	case JDCT_FLOAT:
				130	{
				131	/* For float AA&N IDCT method, divisors are equal to quantization
				132	* coefficients scaled by scalefactor[row]*scalefactor[col], where
				133	* scalefactor[0] = 1
				134	* scalefactor[k] = cos(kPI/16) sqrt(2) for k=1..7
				135	* We apply a further scale factor of 8.
				136	* What's actually stored is 1/divisor so that the inner loop can
				137	* use a multiplication rather than a division.
				138	*/
				139	FAST_FLOAT * fdtbl;
				140	int row, col;
				141	static const double aanscalefactor[DCTSIZE] = {
				142	1.0, 1.387039845, 1.306562965, 1.175875602,
				143	1.0, 0.785694958, 0.541196100, 0.275899379
				144	};
				145
				146	if (fdct->float_divisors[qtblno] == NULL) {
				147	fdct->float_divisors[qtblno] = (FAST_FLOAT *)
				148	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
				149	DCTSIZE2 * SIZEOF(FAST_FLOAT));
				150	}
				151	fdtbl = fdct->float_divisors[qtblno];
Thomas G. Lane	bc79e06	1995-08-02 00:00:00 +0000	[diff] [blame^]	152	i = 0;
				153	for (row = 0; row < DCTSIZE; row++) {
				154	for (col = 0; col < DCTSIZE; col++) {
				155	fdtbl[i] = (FAST_FLOAT)
				156	(1.0 / (((double) qtbl->quantval[jpeg_zigzag_order[i]] *
				157	aanscalefactor[row] * aanscalefactor[col] * 8.0)));
				158	i++;
				159	}
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	160	}
				161	}
				162	break;
				163	#endif
				164	default:
				165	ERREXIT(cinfo, JERR_NOT_COMPILED);
				166	break;
				167	}
				168	}
				169	}
				170
				171
				172	/*
				173	* Perform forward DCT on one or more blocks of a component.
				174	*
				175	* The input samples are taken from the sample_data[] array starting at
				176	* position start_row/start_col, and moving to the right for any additional
Thomas G. Lane	bc79e06	1995-08-02 00:00:00 +0000	[diff] [blame^]	177	* blocks. The quantized coefficients are returned in coef_blocks[].
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	178	*/
				179
				180	METHODDEF void
				181	forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
				182	JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
				183	JDIMENSION start_row, JDIMENSION start_col,
				184	JDIMENSION num_blocks)
				185	/* This version is used for integer DCT implementations. */
				186	{
				187	/* This routine is heavily used, so it's worth coding it tightly. */
				188	my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
				189	forward_DCT_method_ptr do_dct = fdct->do_dct;
				190	DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
				191	DCTELEM workspace[DCTSIZE2]; /* work area for FDCT subroutine */
				192	JDIMENSION bi;
				193
				194	sample_data += start_row; /* fold in the vertical offset once */
				195
				196	for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
				197	/* Load data into workspace, applying unsigned->signed conversion */
				198	{ register DCTELEM *workspaceptr;
				199	register JSAMPROW elemptr;
				200	register int elemr;
				201
				202	workspaceptr = workspace;
				203	for (elemr = 0; elemr < DCTSIZE; elemr++) {
				204	elemptr = sample_data[elemr] + start_col;
				205	#if DCTSIZE == 8 /* unroll the inner loop */
				206	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				207	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				208	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				209	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				210	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				211	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				212	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				213	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				214	#else
				215	{ register int elemc;
				216	for (elemc = DCTSIZE; elemc > 0; elemc--) {
				217	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
				218	}
				219	}
				220	#endif
				221	}
				222	}
				223
				224	/* Perform the DCT */
				225	(*do_dct) (workspace);
				226
				227	/* Quantize/descale the coefficients, and store into coef_blocks[] */
				228	{ register DCTELEM temp, qval;
				229	register int i;
				230	register JCOEFPTR output_ptr = coef_blocks[bi];
				231
				232	for (i = 0; i < DCTSIZE2; i++) {
				233	qval = divisors[i];
Thomas G. Lane	bc79e06	1995-08-02 00:00:00 +0000	[diff] [blame^]	234	temp = workspace[i];
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	235	/* Divide the coefficient value by qval, ensuring proper rounding.
				236	* Since C does not specify the direction of rounding for negative
				237	* quotients, we have to force the dividend positive for portability.
				238	*
				239	* In most files, at least half of the output values will be zero
				240	* (at default quantization settings, more like three-quarters...)
				241	* so we should ensure that this case is fast. On many machines,
				242	* a comparison is enough cheaper than a divide to make a special test
				243	* a win. Since both inputs will be nonnegative, we need only test
				244	* for a < b to discover whether a/b is 0.
				245	* If your machine's division is fast enough, define FAST_DIVIDE.
				246	*/
				247	#ifdef FAST_DIVIDE
				248	#define DIVIDE_BY(a,b) a /= b
				249	#else
				250	#define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0
				251	#endif
				252	if (temp < 0) {
				253	temp = -temp;
				254	temp += qval>>1; /* for rounding */
				255	DIVIDE_BY(temp, qval);
				256	temp = -temp;
				257	} else {
				258	temp += qval>>1; /* for rounding */
				259	DIVIDE_BY(temp, qval);
				260	}
				261	output_ptr[i] = (JCOEF) temp;
				262	}
				263	}
				264	}
				265	}
				266
				267
				268	#ifdef DCT_FLOAT_SUPPORTED
				269
				270	METHODDEF void
				271	forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
				272	JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
				273	JDIMENSION start_row, JDIMENSION start_col,
				274	JDIMENSION num_blocks)
				275	/* This version is used for floating-point DCT implementations. */
				276	{
				277	/* This routine is heavily used, so it's worth coding it tightly. */
				278	my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
				279	float_DCT_method_ptr do_dct = fdct->do_float_dct;
				280	FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
				281	FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
				282	JDIMENSION bi;
				283
				284	sample_data += start_row; /* fold in the vertical offset once */
				285
				286	for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
				287	/* Load data into workspace, applying unsigned->signed conversion */
				288	{ register FAST_FLOAT *workspaceptr;
				289	register JSAMPROW elemptr;
				290	register int elemr;
				291
				292	workspaceptr = workspace;
				293	for (elemr = 0; elemr < DCTSIZE; elemr++) {
				294	elemptr = sample_data[elemr] + start_col;
				295	#if DCTSIZE == 8 /* unroll the inner loop */
Thomas G. Lane	bc79e06	1995-08-02 00:00:00 +0000	[diff] [blame^]	296	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				297	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				298	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				299	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				300	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				301	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				302	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
				303	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	304	#else
				305	{ register int elemc;
				306	for (elemc = DCTSIZE; elemc > 0; elemc--) {
Thomas G. Lane	bc79e06	1995-08-02 00:00:00 +0000	[diff] [blame^]	307	*workspaceptr++ = (FAST_FLOAT)
				308	(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	309	}
				310	}
				311	#endif
				312	}
				313	}
				314
				315	/* Perform the DCT */
				316	(*do_dct) (workspace);
				317
				318	/* Quantize/descale the coefficients, and store into coef_blocks[] */
				319	{ register FAST_FLOAT temp;
				320	register int i;
				321	register JCOEFPTR output_ptr = coef_blocks[bi];
				322
				323	for (i = 0; i < DCTSIZE2; i++) {
				324	/* Apply the quantization and scaling factor */
Thomas G. Lane	bc79e06	1995-08-02 00:00:00 +0000	[diff] [blame^]	325	temp = workspace[i] * divisors[i];
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	326	/* Round to nearest integer.
				327	* Since C does not specify the direction of rounding for negative
				328	* quotients, we have to force the dividend positive for portability.
				329	* The maximum coefficient size is +-16K (for 12-bit data), so this
				330	* code should work for either 16-bit or 32-bit ints.
				331	*/
				332	output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
				333	}
				334	}
				335	}
				336	}
				337
				338	#endif /* DCT_FLOAT_SUPPORTED */
				339
				340
				341	/*
				342	* Initialize FDCT manager.
				343	*/
				344
				345	GLOBAL void
				346	jinit_forward_dct (j_compress_ptr cinfo)
				347	{
				348	my_fdct_ptr fdct;
				349	int i;
				350
				351	fdct = (my_fdct_ptr)
				352	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
				353	SIZEOF(my_fdct_controller));
				354	cinfo->fdct = (struct jpeg_forward_dct *) fdct;
				355	fdct->pub.start_pass = start_pass_fdctmgr;
				356
				357	switch (cinfo->dct_method) {
				358	#ifdef DCT_ISLOW_SUPPORTED
				359	case JDCT_ISLOW:
				360	fdct->pub.forward_DCT = forward_DCT;
				361	fdct->do_dct = jpeg_fdct_islow;
				362	break;
				363	#endif
				364	#ifdef DCT_IFAST_SUPPORTED
				365	case JDCT_IFAST:
				366	fdct->pub.forward_DCT = forward_DCT;
				367	fdct->do_dct = jpeg_fdct_ifast;
				368	break;
				369	#endif
				370	#ifdef DCT_FLOAT_SUPPORTED
				371	case JDCT_FLOAT:
				372	fdct->pub.forward_DCT = forward_DCT_float;
				373	fdct->do_float_dct = jpeg_fdct_float;
				374	break;
				375	#endif
				376	default:
				377	ERREXIT(cinfo, JERR_NOT_COMPILED);
				378	break;
				379	}
				380
				381	/* Mark divisor tables unallocated */
				382	for (i = 0; i < NUM_QUANT_TBLS; i++) {
				383	fdct->divisors[i] = NULL;
				384	#ifdef DCT_FLOAT_SUPPORTED
				385	fdct->float_divisors[i] = NULL;
				386	#endif
				387	}
				388	}