Blame - jdk/src/share/native/sun/java2d/loops/ScaledBlit.c - platform/libcore

blob: 56b73aabceac1a8b1496ea45a36ac5ddb16b5a5f [file] [log] [blame]

J. Duke	319a3b9	2007-12-01 00:00:00 +0000	[diff] [blame^]	1	/*
				2	* Copyright 2001-2005 Sun Microsystems, Inc. All Rights Reserved.
				3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
				4	*
				5	* This code is free software; you can redistribute it and/or modify it
				6	* under the terms of the GNU General Public License version 2 only, as
				7	* published by the Free Software Foundation. Sun designates this
				8	* particular file as subject to the "Classpath" exception as provided
				9	* by Sun in the LICENSE file that accompanied this code.
				10	*
				11	* This code is distributed in the hope that it will be useful, but WITHOUT
				12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
				13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				14	* version 2 for more details (a copy is included in the LICENSE file that
				15	* accompanied this code).
				16	*
				17	* You should have received a copy of the GNU General Public License version
				18	* 2 along with this work; if not, write to the Free Software Foundation,
				19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
				20	*
				21	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
				22	* CA 95054 USA or visit www.sun.com if you need additional information or
				23	* have any questions.
				24	*/
				25
				26	#include <math.h>
				27
				28	#include "jni_util.h"
				29	#include "GraphicsPrimitiveMgr.h"
				30	#include "Region.h"
				31
				32	#include "sun_java2d_loops_ScaledBlit.h"
				33
				34	/*
				35	* The scaling loops used inside the helper functions are based on the
				36	* following pseudocode for stepping through the source image:
				37	*
				38	* shift - number of bits of sub-pixel precision in scaled values
				39	* srcxorig, srcyorig - scaled location of first pixel
				40	* srcxinc, srcyinc - scaled x and y increments
				41	* dstwidth, dstheight - number of pixels to process across and down
				42	*
				43	* 1. srcy = srcyorig;
				44	* 2. for (dstheight) {
				45	* 3. srcx = srcxorig;
				46	* 4. for (dstwidth) {
				47	* 5. fetch and process pixel for (srcx >> shift, srcy >> shift)
				48	* 6. srcx += srcxinc;
				49	* 7. }
				50	* 8. srcy += srcyinc;
				51	* 9. }
				52	*
				53	* Note that each execution of line 6 or 8 accumulates error of
				54	* +/- 1 into the scaled coordinate variables. These lines are
				55	* each executed once per pixel across or once per pixel down
				56	* the region being iterated over, thus the error can accumulate
				57	* up to a magnitude of dstwidth in the horizontal direction and
				58	* dstheight in the vertical direction.
				59	*
				60	* If the error ever reaches a magnitude of (1 << shift) then we
				61	* will be off by at least 1 source pixel in our mapping.
				62	*
				63	* Note that we increment the source coordinates by the srcxinc
				64	* and srcyinc variables in each step. Thus, if our error ever
				65	* accumulates to a magnitude equal to srcxinc or srcyinc then
				66	* we will be ahead or behind of "where we should be" by at least
				67	* one iteration. Since each iteration is a destination pixel,
				68	* this means that our actual location will be off by at least
				69	* one destination pixel.
				70	*
				71	* This means that all of the values:
				72	*
				73	* - (1 << shift)
				74	* - srcxinc
				75	* - srcyinc
				76	*
				77	* all represent a maximum bound on how much error we can accumulate
				78	* before we are off by a source or a destination pixel. Thus,
				79	* we should make sure that we never process more than that many
				80	* pixels if we want to maintain single pixel accuracy. Even
				81	* better would be to process many fewer pixels than those bounds
				82	* to ensure that our accumulated error is much smaller than a
				83	* pixel.
				84	*/
				85
				86	/*
				87	* Find and return the largest tile size that is a power of 2 and
				88	* which is small enough to yield some reassuring degree of subpixel
				89	* accuracy. The degree of subpixel accuracy that will be preserved
				90	* by the tile size it chooses will vary and the details on how
				91	* it makes this decision are detailed in the comments below.
				92	*/
				93	static jint
				94	findpow2tilesize(jint shift, jint sxinc, jint syinc)
				95	{
				96	/*
				97	* The initial value of shift is our first estimate for
				98	* the power of 2 for our tilesize since it ensures
				99	* less than 1 source pixel of error.
				100	*
				101	* Reducing it until (1 << shift) is not larger than the
				102	* smallest of our increments ensures we will have no more
				103	* than 1 destination pixel of error as well.
				104	*/
				105	if (sxinc > syinc) {
				106	sxinc = syinc;
				107	}
				108	if (sxinc == 0) {
				109	/* Degenerate case will cause infinite loop in next loop... */
				110	return 1;
				111	}
				112	while ((1 << shift) > sxinc) {
				113	shift--;
				114	}
				115	/*
				116	* shift is now the largest it can be for less than 1 pixel
				117	* of error in either source or destination spaces.
				118	*
				119	* Now we will try for at least 8 bits of subpixel accuracy
				120	* with a tile size of at least 256x256 and reduce our subpixel
				121	* accuracy on a sliding scale down to a tilesize of 1x1 when
				122	* we have no bits of sub-pixel accuracy.
				123	*/
				124	if (shift >= 16) {
				125	/* Subtracting 8 asks for 8 bits of subpixel accuracy. */
				126	shift -= 8;
				127	} else {
				128	/* Ask for half of the remaining bits to be subpixel accuracy. */
				129	/* Rounding is in favor of subpixel accuracy over tile size. */
				130	/* Worst case, shift == 0 and tilesize == (1 << 0) == 1 */
				131	shift /= 2;
				132	}
				133	return (1 << shift);
				134	}
				135
				136	/*
				137	* For a given integer destination pixel coordinate "id", calculate the
				138	* integer destination coordinate of the start of the "ts" sized tile
				139	* in which it resides.
				140	* Tiles all start at even multiples of the tile size from the integer
				141	* destination origin "io".
				142	*
				143	* id == integer destination coordinate
				144	* io == integer destination operation origin
				145	* ts == tilesize (must be power of 2)
				146	*/
				147	#define TILESTART(id, io, ts) ((io) + (((id)-(io)) & (~((ts)-1))))
				148
				149	/*
				150	* For a given integer destination pixel coordinate "id", calculate the
				151	* sub-pixel accurate source coordinate from which its sample comes.
				152	* The returned source coordinate is expressed in a shifted fractional
				153	* arithmetic number system.
				154	*
				155	* id == integer destination coordinate
				156	* fo == floating point destination operation origin,
				157	* sf == source coordinate scale factor per destination pixel
				158	* (multiplied by fractional arithmetic "shift")
				159	*
				160	* The caller is required to cast this value to the appropriate
				161	* integer type for the needed precision. The rendering code which
				162	* deals only with valid coordinates within the bounds of the source
				163	* rectangle uses jint. The setup code, which occasionally deals
				164	* with coordinates that run out of bounds, uses jlong.
				165	*
				166	* Note that the rounding in this calculation is at a fraction of a
				167	* source pixel of (1.0 / (1<<shift)) since the scale factor includes
				168	* the fractional shift. As a result, the type of rounding used is
				169	* not very significant (floor, floor(x+.5), or ceil(x-.5)), but the
				170	* ceil(x-.5) version is used for consistency with the way that pixel
				171	* coordinates are rounded to assign the ".5" value to the lower
				172	* integer.
				173	*/
				174	#define SRCLOC(id, fo, sf) (ceil((((id) + 0.5) - (fo)) * (sf) - 0.5))
				175
				176	/*
				177	* Reverse map a srctarget coordinate into device space and refine the
				178	* answer. More specifically, what we are looking for is the smallest
				179	* destination coordinate that maps to a source coordinate that is
				180	* greater than or equal to the given target source coordinate.
				181	*
				182	* Note that since the inner loops use math that maps a destination
				183	* coordinate into source space and that, even though the equation
				184	* we use below is the theoretical inverse of the dst->src mapping,
				185	* we cannot rely on floating point math to guarantee that applying
				186	* both of these equations in sequence will give us an exact mapping
				187	* of src->dst->src. Thus, we must search back and forth to see if
				188	* we really map back to the given source coordinate and that we are
				189	* the smallest destination coordinate that does so.
				190	*
				191	* Note that, in practice, the answer from the initial guess tends to
				192	* be the right answer most of the time and the loop ends up finding
				193	* one iteration to be ">= srctarget" and the next to be "< srctarget"
				194	* and thus finds the answer in 2 iterations. A small number of
				195	* times, the initial guess is 1 too low and so we do one iteration
				196	* at "< srctarget" and the next at ">= srctarget" and again find the
				197	* answer in 2 iterations. All cases encountered during testing ended
				198	* up falling into one of those 2 categories and so the loop was always
				199	* executed exactly twice.
				200	*
				201	* Note also that the calculation of srcloc below may attempt to calculate
				202	* the src location of the destination pixel which is "1 beyond" the
				203	* end of the source image. Since our shift calculation code in the
				204	* main function only guaranteed that "srcw << shift" did not overflow
				205	* a 32-bit signed integer, we cannot guarantee that "(srcw+1) << shift"
				206	* or, more generally, "(srcw << shift)+srcinc" does not overflow.
				207	* As a result, we perform our calculations here with jlong values
				208	* so that we aren't affected by this overflow. Since srcw (shifted)
				209	* and srcinc are both 32-bit values, their sum cannot possibly overflow
				210	* a jlong. In fact, we can step up to a couple of billion steps of
				211	* size "srcinc" past the end of the image before we have to worry
				212	* about overflow - in practice, though, the search never steps more
				213	* than 1 past the end of the image so this buffer is more than enough.
				214	*/
				215	static jint
				216	refine(jint intorigin, jdouble dblorigin, jint tilesize,
				217	jdouble scale, jint srctarget, jint srcinc)
				218	{
				219	/* Make a first estimate of dest coordinate from srctarget */
				220	jint dstloc = (jint) ceil(dblorigin + srctarget / scale - 0.5);
				221	/* Loop until we get at least one value < and one >= the target */
				222	jboolean wasneg = JNI_FALSE;
				223	jboolean waspos = JNI_FALSE;
				224	jlong lsrcinc = srcinc;
				225	jlong lsrctarget = srctarget;
				226
				227	while (JNI_TRUE) {
				228	/*
				229	* Find src coordinate from dest coordinate using the same
				230	* math we will use below when iterating over tiles.
				231	*/
				232	jint tilestart = TILESTART(dstloc, intorigin, tilesize);
				233	jlong lsrcloc = (jlong) SRCLOC(tilestart, dblorigin, scale);
				234	if (dstloc > tilestart) {
				235	lsrcloc += lsrcinc * ((jlong) dstloc - tilestart);
				236	}
				237	if (lsrcloc >= lsrctarget) {
				238	/*
				239	* If we were previously less than target, then the current
				240	* dstloc is the smallest dst which maps >= the target.
				241	*/
				242	if (wasneg) break;
				243	dstloc--;
				244	waspos = JNI_TRUE;
				245	} else {
				246	/*
				247	* If we were previously greater than target, then this must
				248	* be the first dstloc which maps to < the target. Since we
				249	* want the smallest which maps >= the target, increment it
				250	* first before returning.
				251	*/
				252	dstloc++;
				253	if (waspos) break;
				254	wasneg = JNI_TRUE;
				255	}
				256	}
				257	return dstloc;
				258	}
				259
				260	/*
				261	* Class: sun_java2d_loops_ScaledBlit
				262	* Method: Scale
				263	* Signature: (Lsun/java2d/SurfaceData;Lsun/java2d/SurfaceData;Ljava/awt/Composite;Lsun/java2d/pipe/Region;IIIIDDDD)V
				264	*/
				265	JNIEXPORT void JNICALL
				266	Java_sun_java2d_loops_ScaledBlit_Scale
				267	(JNIEnv *env, jobject self,
				268	jobject srcData, jobject dstData,
				269	jobject comp, jobject clip,
				270	jint sx1, jint sy1, jint sx2, jint sy2,
				271	jdouble ddx1, jdouble ddy1, jdouble ddx2, jdouble ddy2)
				272	{
				273	SurfaceDataOps *srcOps;
				274	SurfaceDataOps *dstOps;
				275	SurfaceDataRasInfo srcInfo;
				276	SurfaceDataRasInfo dstInfo;
				277	NativePrimitive *pPrim;
				278	CompositeInfo compInfo;
				279	jint sxinc, syinc, shift;
				280	jint tilesize;
				281	jint idx1, idy1;
				282	jdouble scalex, scaley;
				283	RegionData clipInfo;
				284	jint dstFlags;
				285	jboolean xunderflow, yunderflow;
				286
				287	pPrim = GetNativePrim(env, self);
				288	if (pPrim == NULL) {
				289	return;
				290	}
				291	if (pPrim->pCompType->getCompInfo != NULL) {
				292	(*pPrim->pCompType->getCompInfo)(env, &compInfo, comp);
				293	}
				294	if (Region_GetInfo(env, clip, &clipInfo)) {
				295	return;
				296	}
				297
				298	srcOps = SurfaceData_GetOps(env, srcData);
				299	dstOps = SurfaceData_GetOps(env, dstData);
				300	if (srcOps == 0 \|\| dstOps == 0) {
				301	return;
				302	}
				303
				304	/*
				305	* Determine the precision to use for the fixed point math
				306	* for the coordinate scaling.
				307	* - OR together srcw and srch to get the MSB between the two
				308	* - Next shift it up until it goes negative
				309	* - Count the shifts and that will be the most accurate
				310	* precision available for the fixed point math
				311	* - a source coordinate of 1.0 will be (1 << shift)
				312	* - srcw & srch will be (srcw << shift) and (srch << shift)
				313	* and will not overflow
				314	* Note that if srcw or srch are so large that they are
				315	* negative numbers before shifting, then:
				316	* - shift will be 0
				317	* - tilesize will end up being 1x1 tiles
				318	* - we will brute force calculate the source location
				319	* of every destination pixel using the TILESTART and
				320	* SRCLOC macros in this function and then call the
				321	* scale helper function to copy one pixel at a time.
				322	* - TILESTART involves mostly jdouble calculations so
				323	* it should not have integer overflow problems.
				324	*/
				325	sxinc = (sx2 - sx1) \| (sy2 - sy1);
				326	shift = 0;
				327	if (sxinc > 0) {
				328	while ((sxinc <<= 1) > 0) {
				329	shift++;
				330	}
				331	}
				332	/*
				333	* Now determine the scaled integer increments used to traverse
				334	* the source image for each destination pixel. Our shift value
				335	* has been calculated above so that any location within the
				336	* destination image can be represented as a scaled integer
				337	* without incurring integer overflow.
				338	*
				339	* But we also need to worry about overflow of the sxinc and syinc
				340	* parameters. We already know that "srcw<<shift" and "srch<<shift"
				341	* cannot overflow a jint, and the only time that sxinc and syinc
				342	* can be larger than those two values is if ddy2-ddy1 or ddx2-ddx1
				343	* are smaller than 1. Since this situation implies that the
				344	* output area is no more than one pixel wide or tall, then we are
				345	* stepping by distances that are at least the size of the image
				346	* and only one destination pixel will ever be rendered - thus the
				347	* amount by which we step is largely irrelevant since after
				348	* drawing the first "in bounds" pixel, we will step completely
				349	* out of the source image and render nothing more. As a result,
				350	* we assign the appropriate "size of image" stepping parameter
				351	* for any scale to smaller than one device pixel.
				352	*/
				353	yunderflow = (ddy2 - ddy1) < 1.0;
				354	scaley = (((jdouble) (sy2 - sy1)) / (ddy2 - ddy1)) * (1 << shift);
				355	syinc = (yunderflow ? ((sy2 - sy1) << shift) : (jint) scaley);
				356	xunderflow = (ddx2 - ddx1) < 1.0;
				357	scalex = (((jdouble) (sx2 - sx1)) / (ddx2 - ddx1)) * (1 << shift);
				358	sxinc = (xunderflow ? ((sx2 - sx1) << shift) : (jint) scalex);
				359	tilesize = findpow2tilesize(shift, sxinc, syinc);
				360
				361
				362	srcInfo.bounds.x1 = sx1;
				363	srcInfo.bounds.y1 = sy1;
				364	srcInfo.bounds.x2 = sx2;
				365	srcInfo.bounds.y2 = sy2;
				366	if (srcOps->Lock(env, srcOps, &srcInfo, pPrim->srcflags) != SD_SUCCESS) {
				367	return;
				368	}
				369	if (srcInfo.bounds.x2 <= srcInfo.bounds.x1 \|\|
				370	srcInfo.bounds.y2 <= srcInfo.bounds.y1)
				371	{
				372	SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
				373	return;
				374	}
				375
				376	/*
				377	* Only refine lower bounds if lower source coordinate was clipped
				378	* because the math will work out to be exactly idx1, idy1 if not.
				379	* Always refine upper bounds since we want to make sure not to
				380	* overstep the source bounds based on the tiled iteration math.
				381	*
				382	* For underflow cases, simply check if the SRCLOC for the single
				383	* destination pixel maps inside the source bounds. If it does,
				384	* we render that pixel row or column (and only that pixel row
				385	* or column). If it does not, we render nothing.
				386	*/
				387	idx1 = (jint) ceil(ddx1 - 0.5);
				388	idy1 = (jint) ceil(ddy1 - 0.5);
				389	if (xunderflow) {
				390	jdouble x = sx1 + (SRCLOC(idx1, ddx1, scalex) / (1 << shift));
				391	dstInfo.bounds.x1 = dstInfo.bounds.x2 = idx1;
				392	if (x >= srcInfo.bounds.x1 && x < srcInfo.bounds.x2) {
				393	dstInfo.bounds.x2++;
				394	}
				395	} else {
				396	dstInfo.bounds.x1 = ((srcInfo.bounds.x1 <= sx1)
				397	? idx1
				398	: refine(idx1, ddx1, tilesize, scalex,
				399	(srcInfo.bounds.x1-sx1) << shift, sxinc));
				400	dstInfo.bounds.x2 = refine(idx1, ddx1, tilesize, scalex,
				401	(srcInfo.bounds.x2-sx1) << shift, sxinc);
				402	}
				403	if (yunderflow) {
				404	jdouble y = sy1 + (SRCLOC(idy1, ddy1, scaley) / (1 << shift));
				405	dstInfo.bounds.y1 = dstInfo.bounds.y2 = idy1;
				406	if (y >= srcInfo.bounds.y1 && y < srcInfo.bounds.y2) {
				407	dstInfo.bounds.y2++;
				408	}
				409	} else {
				410	dstInfo.bounds.y1 = ((srcInfo.bounds.y1 <= sy1)
				411	? idy1
				412	: refine(idy1, ddy1, tilesize, scaley,
				413	(srcInfo.bounds.y1-sy1) << shift, syinc));
				414	dstInfo.bounds.y2 = refine(idy1, ddy1, tilesize, scaley,
				415	(srcInfo.bounds.y2-sy1) << shift, syinc);
				416	}
				417
				418	SurfaceData_IntersectBounds(&dstInfo.bounds, &clipInfo.bounds);
				419	dstFlags = pPrim->dstflags;
				420	if (!Region_IsRectangular(&clipInfo)) {
				421	dstFlags \|= SD_LOCK_PARTIAL_WRITE;
				422	}
				423	if (dstOps->Lock(env, dstOps, &dstInfo, dstFlags) != SD_SUCCESS) {
				424	SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
				425	return;
				426	}
				427
				428	if (dstInfo.bounds.x2 > dstInfo.bounds.x1 &&
				429	dstInfo.bounds.y2 > dstInfo.bounds.y1)
				430	{
				431	srcOps->GetRasInfo(env, srcOps, &srcInfo);
				432	dstOps->GetRasInfo(env, dstOps, &dstInfo);
				433	if (srcInfo.rasBase && dstInfo.rasBase) {
				434	SurfaceDataBounds span;
				435	void *pSrc = PtrCoord(srcInfo.rasBase,
				436	sx1, srcInfo.pixelStride,
				437	sy1, srcInfo.scanStride);
				438
				439	Region_IntersectBounds(&clipInfo, &dstInfo.bounds);
				440	Region_StartIteration(env, &clipInfo);
				441	if (tilesize >= (ddx2 - ddx1) &&
				442	tilesize >= (ddy2 - ddy1))
				443	{
				444	/* Do everything in one tile */
				445	jint sxloc = (jint) SRCLOC(idx1, ddx1, scalex);
				446	jint syloc = (jint) SRCLOC(idy1, ddy1, scaley);
				447	while (Region_NextIteration(&clipInfo, &span)) {
				448	jint tsxloc = sxloc;
				449	jint tsyloc = syloc;
				450	void *pDst;
				451
				452	if (span.y1 > idy1) {
				453	tsyloc += syinc * (span.y1 - idy1);
				454	}
				455	if (span.x1 > idx1) {
				456	tsxloc += sxinc * (span.x1 - idx1);
				457	}
				458
				459	pDst = PtrCoord(dstInfo.rasBase,
				460	span.x1, dstInfo.pixelStride,
				461	span.y1, dstInfo.scanStride);
				462	(*pPrim->funcs.scaledblit)(pSrc, pDst,
				463	span.x2-span.x1, span.y2-span.y1,
				464	tsxloc, tsyloc,
				465	sxinc, syinc, shift,
				466	&srcInfo, &dstInfo,
				467	pPrim, &compInfo);
				468	}
				469	} else {
				470	/* Break each clip span into tiles for better accuracy. */
				471	while (Region_NextIteration(&clipInfo, &span)) {
				472	jint tilex, tiley;
				473	jint sxloc, syloc;
				474	jint x1, y1, x2, y2;
				475	void *pDst;
				476
				477	for (tiley = TILESTART(span.y1, idy1, tilesize);
				478	tiley < span.y2;
				479	tiley += tilesize)
				480	{
				481	/* Clip span to Y range of current tile */
				482	y1 = tiley;
				483	y2 = tiley + tilesize;
				484	if (y1 < span.y1) y1 = span.y1;
				485	if (y2 > span.y2) y2 = span.y2;
				486
				487	/* Find scaled source coordinate of first pixel */
				488	syloc = (jint) SRCLOC(tiley, ddy1, scaley);
				489	if (y1 > tiley) {
				490	syloc += syinc * (y1 - tiley);
				491	}
				492
				493	for (tilex = TILESTART(span.x1, idx1, tilesize);
				494	tilex < span.x2;
				495	tilex += tilesize)
				496	{
				497	/* Clip span to X range of current tile */
				498	x1 = tilex;
				499	x2 = tilex + tilesize;
				500	if (x1 < span.x1) x1 = span.x1;
				501	if (x2 > span.x2) x2 = span.x2;
				502
				503	/* Find scaled source coordinate of first pixel */
				504	sxloc = (jint) SRCLOC(tilex, ddx1, scalex);
				505	if (x1 > tilex) {
				506	sxloc += sxinc * (x1 - tilex);
				507	}
				508
				509	pDst = PtrCoord(dstInfo.rasBase,
				510	x1, dstInfo.pixelStride,
				511	y1, dstInfo.scanStride);
				512	(*pPrim->funcs.scaledblit)(pSrc, pDst, x2-x1, y2-y1,
				513	sxloc, syloc,
				514	sxinc, syinc, shift,
				515	&srcInfo, &dstInfo,
				516	pPrim, &compInfo);
				517	}
				518	}
				519	}
				520	}
				521	Region_EndIteration(env, &clipInfo);
				522	}
				523	SurfaceData_InvokeRelease(env, dstOps, &dstInfo);
				524	SurfaceData_InvokeRelease(env, srcOps, &srcInfo);
				525	}
				526	SurfaceData_InvokeUnlock(env, dstOps, &dstInfo);
				527	SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
				528	}