blob: 56b73aabceac1a8b1496ea45a36ac5ddb16b5a5f [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 2001-2005 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26#include <math.h>
27
28#include "jni_util.h"
29#include "GraphicsPrimitiveMgr.h"
30#include "Region.h"
31
32#include "sun_java2d_loops_ScaledBlit.h"
33
34/*
35 * The scaling loops used inside the helper functions are based on the
36 * following pseudocode for stepping through the source image:
37 *
38 * shift - number of bits of sub-pixel precision in scaled values
39 * srcxorig, srcyorig - scaled location of first pixel
40 * srcxinc, srcyinc - scaled x and y increments
41 * dstwidth, dstheight - number of pixels to process across and down
42 *
43 * 1. srcy = srcyorig;
44 * 2. for (dstheight) {
45 * 3. srcx = srcxorig;
46 * 4. for (dstwidth) {
47 * 5. fetch and process pixel for (srcx >> shift, srcy >> shift)
48 * 6. srcx += srcxinc;
49 * 7. }
50 * 8. srcy += srcyinc;
51 * 9. }
52 *
53 * Note that each execution of line 6 or 8 accumulates error of
54 * +/- 1 into the scaled coordinate variables. These lines are
55 * each executed once per pixel across or once per pixel down
56 * the region being iterated over, thus the error can accumulate
57 * up to a magnitude of dstwidth in the horizontal direction and
58 * dstheight in the vertical direction.
59 *
60 * If the error ever reaches a magnitude of (1 << shift) then we
61 * will be off by at least 1 source pixel in our mapping.
62 *
63 * Note that we increment the source coordinates by the srcxinc
64 * and srcyinc variables in each step. Thus, if our error ever
65 * accumulates to a magnitude equal to srcxinc or srcyinc then
66 * we will be ahead or behind of "where we should be" by at least
67 * one iteration. Since each iteration is a destination pixel,
68 * this means that our actual location will be off by at least
69 * one destination pixel.
70 *
71 * This means that all of the values:
72 *
73 * - (1 << shift)
74 * - srcxinc
75 * - srcyinc
76 *
77 * all represent a maximum bound on how much error we can accumulate
78 * before we are off by a source or a destination pixel. Thus,
79 * we should make sure that we never process more than that many
80 * pixels if we want to maintain single pixel accuracy. Even
81 * better would be to process many fewer pixels than those bounds
82 * to ensure that our accumulated error is much smaller than a
83 * pixel.
84 */
85
86/*
87 * Find and return the largest tile size that is a power of 2 and
88 * which is small enough to yield some reassuring degree of subpixel
89 * accuracy. The degree of subpixel accuracy that will be preserved
90 * by the tile size it chooses will vary and the details on how
91 * it makes this decision are detailed in the comments below.
92 */
93static jint
94findpow2tilesize(jint shift, jint sxinc, jint syinc)
95{
96 /*
97 * The initial value of shift is our first estimate for
98 * the power of 2 for our tilesize since it ensures
99 * less than 1 source pixel of error.
100 *
101 * Reducing it until (1 << shift) is not larger than the
102 * smallest of our increments ensures we will have no more
103 * than 1 destination pixel of error as well.
104 */
105 if (sxinc > syinc) {
106 sxinc = syinc;
107 }
108 if (sxinc == 0) {
109 /* Degenerate case will cause infinite loop in next loop... */
110 return 1;
111 }
112 while ((1 << shift) > sxinc) {
113 shift--;
114 }
115 /*
116 * shift is now the largest it can be for less than 1 pixel
117 * of error in either source or destination spaces.
118 *
119 * Now we will try for at least 8 bits of subpixel accuracy
120 * with a tile size of at least 256x256 and reduce our subpixel
121 * accuracy on a sliding scale down to a tilesize of 1x1 when
122 * we have no bits of sub-pixel accuracy.
123 */
124 if (shift >= 16) {
125 /* Subtracting 8 asks for 8 bits of subpixel accuracy. */
126 shift -= 8;
127 } else {
128 /* Ask for half of the remaining bits to be subpixel accuracy. */
129 /* Rounding is in favor of subpixel accuracy over tile size. */
130 /* Worst case, shift == 0 and tilesize == (1 << 0) == 1 */
131 shift /= 2;
132 }
133 return (1 << shift);
134}
135
136/*
137 * For a given integer destination pixel coordinate "id", calculate the
138 * integer destination coordinate of the start of the "ts" sized tile
139 * in which it resides.
140 * Tiles all start at even multiples of the tile size from the integer
141 * destination origin "io".
142 *
143 * id == integer destination coordinate
144 * io == integer destination operation origin
145 * ts == tilesize (must be power of 2)
146 */
147#define TILESTART(id, io, ts) ((io) + (((id)-(io)) & (~((ts)-1))))
148
149/*
150 * For a given integer destination pixel coordinate "id", calculate the
151 * sub-pixel accurate source coordinate from which its sample comes.
152 * The returned source coordinate is expressed in a shifted fractional
153 * arithmetic number system.
154 *
155 * id == integer destination coordinate
156 * fo == floating point destination operation origin,
157 * sf == source coordinate scale factor per destination pixel
158 * (multiplied by fractional arithmetic "shift")
159 *
160 * The caller is required to cast this value to the appropriate
161 * integer type for the needed precision. The rendering code which
162 * deals only with valid coordinates within the bounds of the source
163 * rectangle uses jint. The setup code, which occasionally deals
164 * with coordinates that run out of bounds, uses jlong.
165 *
166 * Note that the rounding in this calculation is at a fraction of a
167 * source pixel of (1.0 / (1<<shift)) since the scale factor includes
168 * the fractional shift. As a result, the type of rounding used is
169 * not very significant (floor, floor(x+.5), or ceil(x-.5)), but the
170 * ceil(x-.5) version is used for consistency with the way that pixel
171 * coordinates are rounded to assign the ".5" value to the lower
172 * integer.
173 */
174#define SRCLOC(id, fo, sf) (ceil((((id) + 0.5) - (fo)) * (sf) - 0.5))
175
176/*
177 * Reverse map a srctarget coordinate into device space and refine the
178 * answer. More specifically, what we are looking for is the smallest
179 * destination coordinate that maps to a source coordinate that is
180 * greater than or equal to the given target source coordinate.
181 *
182 * Note that since the inner loops use math that maps a destination
183 * coordinate into source space and that, even though the equation
184 * we use below is the theoretical inverse of the dst->src mapping,
185 * we cannot rely on floating point math to guarantee that applying
186 * both of these equations in sequence will give us an exact mapping
187 * of src->dst->src. Thus, we must search back and forth to see if
188 * we really map back to the given source coordinate and that we are
189 * the smallest destination coordinate that does so.
190 *
191 * Note that, in practice, the answer from the initial guess tends to
192 * be the right answer most of the time and the loop ends up finding
193 * one iteration to be ">= srctarget" and the next to be "< srctarget"
194 * and thus finds the answer in 2 iterations. A small number of
195 * times, the initial guess is 1 too low and so we do one iteration
196 * at "< srctarget" and the next at ">= srctarget" and again find the
197 * answer in 2 iterations. All cases encountered during testing ended
198 * up falling into one of those 2 categories and so the loop was always
199 * executed exactly twice.
200 *
201 * Note also that the calculation of srcloc below may attempt to calculate
202 * the src location of the destination pixel which is "1 beyond" the
203 * end of the source image. Since our shift calculation code in the
204 * main function only guaranteed that "srcw << shift" did not overflow
205 * a 32-bit signed integer, we cannot guarantee that "(srcw+1) << shift"
206 * or, more generally, "(srcw << shift)+srcinc" does not overflow.
207 * As a result, we perform our calculations here with jlong values
208 * so that we aren't affected by this overflow. Since srcw (shifted)
209 * and srcinc are both 32-bit values, their sum cannot possibly overflow
210 * a jlong. In fact, we can step up to a couple of billion steps of
211 * size "srcinc" past the end of the image before we have to worry
212 * about overflow - in practice, though, the search never steps more
213 * than 1 past the end of the image so this buffer is more than enough.
214 */
215static jint
216refine(jint intorigin, jdouble dblorigin, jint tilesize,
217 jdouble scale, jint srctarget, jint srcinc)
218{
219 /* Make a first estimate of dest coordinate from srctarget */
220 jint dstloc = (jint) ceil(dblorigin + srctarget / scale - 0.5);
221 /* Loop until we get at least one value < and one >= the target */
222 jboolean wasneg = JNI_FALSE;
223 jboolean waspos = JNI_FALSE;
224 jlong lsrcinc = srcinc;
225 jlong lsrctarget = srctarget;
226
227 while (JNI_TRUE) {
228 /*
229 * Find src coordinate from dest coordinate using the same
230 * math we will use below when iterating over tiles.
231 */
232 jint tilestart = TILESTART(dstloc, intorigin, tilesize);
233 jlong lsrcloc = (jlong) SRCLOC(tilestart, dblorigin, scale);
234 if (dstloc > tilestart) {
235 lsrcloc += lsrcinc * ((jlong) dstloc - tilestart);
236 }
237 if (lsrcloc >= lsrctarget) {
238 /*
239 * If we were previously less than target, then the current
240 * dstloc is the smallest dst which maps >= the target.
241 */
242 if (wasneg) break;
243 dstloc--;
244 waspos = JNI_TRUE;
245 } else {
246 /*
247 * If we were previously greater than target, then this must
248 * be the first dstloc which maps to < the target. Since we
249 * want the smallest which maps >= the target, increment it
250 * first before returning.
251 */
252 dstloc++;
253 if (waspos) break;
254 wasneg = JNI_TRUE;
255 }
256 }
257 return dstloc;
258}
259
260/*
261 * Class: sun_java2d_loops_ScaledBlit
262 * Method: Scale
263 * Signature: (Lsun/java2d/SurfaceData;Lsun/java2d/SurfaceData;Ljava/awt/Composite;Lsun/java2d/pipe/Region;IIIIDDDD)V
264 */
265JNIEXPORT void JNICALL
266Java_sun_java2d_loops_ScaledBlit_Scale
267 (JNIEnv *env, jobject self,
268 jobject srcData, jobject dstData,
269 jobject comp, jobject clip,
270 jint sx1, jint sy1, jint sx2, jint sy2,
271 jdouble ddx1, jdouble ddy1, jdouble ddx2, jdouble ddy2)
272{
273 SurfaceDataOps *srcOps;
274 SurfaceDataOps *dstOps;
275 SurfaceDataRasInfo srcInfo;
276 SurfaceDataRasInfo dstInfo;
277 NativePrimitive *pPrim;
278 CompositeInfo compInfo;
279 jint sxinc, syinc, shift;
280 jint tilesize;
281 jint idx1, idy1;
282 jdouble scalex, scaley;
283 RegionData clipInfo;
284 jint dstFlags;
285 jboolean xunderflow, yunderflow;
286
287 pPrim = GetNativePrim(env, self);
288 if (pPrim == NULL) {
289 return;
290 }
291 if (pPrim->pCompType->getCompInfo != NULL) {
292 (*pPrim->pCompType->getCompInfo)(env, &compInfo, comp);
293 }
294 if (Region_GetInfo(env, clip, &clipInfo)) {
295 return;
296 }
297
298 srcOps = SurfaceData_GetOps(env, srcData);
299 dstOps = SurfaceData_GetOps(env, dstData);
300 if (srcOps == 0 || dstOps == 0) {
301 return;
302 }
303
304 /*
305 * Determine the precision to use for the fixed point math
306 * for the coordinate scaling.
307 * - OR together srcw and srch to get the MSB between the two
308 * - Next shift it up until it goes negative
309 * - Count the shifts and that will be the most accurate
310 * precision available for the fixed point math
311 * - a source coordinate of 1.0 will be (1 << shift)
312 * - srcw & srch will be (srcw << shift) and (srch << shift)
313 * and will not overflow
314 * Note that if srcw or srch are so large that they are
315 * negative numbers before shifting, then:
316 * - shift will be 0
317 * - tilesize will end up being 1x1 tiles
318 * - we will brute force calculate the source location
319 * of every destination pixel using the TILESTART and
320 * SRCLOC macros in this function and then call the
321 * scale helper function to copy one pixel at a time.
322 * - TILESTART involves mostly jdouble calculations so
323 * it should not have integer overflow problems.
324 */
325 sxinc = (sx2 - sx1) | (sy2 - sy1);
326 shift = 0;
327 if (sxinc > 0) {
328 while ((sxinc <<= 1) > 0) {
329 shift++;
330 }
331 }
332 /*
333 * Now determine the scaled integer increments used to traverse
334 * the source image for each destination pixel. Our shift value
335 * has been calculated above so that any location within the
336 * destination image can be represented as a scaled integer
337 * without incurring integer overflow.
338 *
339 * But we also need to worry about overflow of the sxinc and syinc
340 * parameters. We already know that "srcw<<shift" and "srch<<shift"
341 * cannot overflow a jint, and the only time that sxinc and syinc
342 * can be larger than those two values is if ddy2-ddy1 or ddx2-ddx1
343 * are smaller than 1. Since this situation implies that the
344 * output area is no more than one pixel wide or tall, then we are
345 * stepping by distances that are at least the size of the image
346 * and only one destination pixel will ever be rendered - thus the
347 * amount by which we step is largely irrelevant since after
348 * drawing the first "in bounds" pixel, we will step completely
349 * out of the source image and render nothing more. As a result,
350 * we assign the appropriate "size of image" stepping parameter
351 * for any scale to smaller than one device pixel.
352 */
353 yunderflow = (ddy2 - ddy1) < 1.0;
354 scaley = (((jdouble) (sy2 - sy1)) / (ddy2 - ddy1)) * (1 << shift);
355 syinc = (yunderflow ? ((sy2 - sy1) << shift) : (jint) scaley);
356 xunderflow = (ddx2 - ddx1) < 1.0;
357 scalex = (((jdouble) (sx2 - sx1)) / (ddx2 - ddx1)) * (1 << shift);
358 sxinc = (xunderflow ? ((sx2 - sx1) << shift) : (jint) scalex);
359 tilesize = findpow2tilesize(shift, sxinc, syinc);
360
361
362 srcInfo.bounds.x1 = sx1;
363 srcInfo.bounds.y1 = sy1;
364 srcInfo.bounds.x2 = sx2;
365 srcInfo.bounds.y2 = sy2;
366 if (srcOps->Lock(env, srcOps, &srcInfo, pPrim->srcflags) != SD_SUCCESS) {
367 return;
368 }
369 if (srcInfo.bounds.x2 <= srcInfo.bounds.x1 ||
370 srcInfo.bounds.y2 <= srcInfo.bounds.y1)
371 {
372 SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
373 return;
374 }
375
376 /*
377 * Only refine lower bounds if lower source coordinate was clipped
378 * because the math will work out to be exactly idx1, idy1 if not.
379 * Always refine upper bounds since we want to make sure not to
380 * overstep the source bounds based on the tiled iteration math.
381 *
382 * For underflow cases, simply check if the SRCLOC for the single
383 * destination pixel maps inside the source bounds. If it does,
384 * we render that pixel row or column (and only that pixel row
385 * or column). If it does not, we render nothing.
386 */
387 idx1 = (jint) ceil(ddx1 - 0.5);
388 idy1 = (jint) ceil(ddy1 - 0.5);
389 if (xunderflow) {
390 jdouble x = sx1 + (SRCLOC(idx1, ddx1, scalex) / (1 << shift));
391 dstInfo.bounds.x1 = dstInfo.bounds.x2 = idx1;
392 if (x >= srcInfo.bounds.x1 && x < srcInfo.bounds.x2) {
393 dstInfo.bounds.x2++;
394 }
395 } else {
396 dstInfo.bounds.x1 = ((srcInfo.bounds.x1 <= sx1)
397 ? idx1
398 : refine(idx1, ddx1, tilesize, scalex,
399 (srcInfo.bounds.x1-sx1) << shift, sxinc));
400 dstInfo.bounds.x2 = refine(idx1, ddx1, tilesize, scalex,
401 (srcInfo.bounds.x2-sx1) << shift, sxinc);
402 }
403 if (yunderflow) {
404 jdouble y = sy1 + (SRCLOC(idy1, ddy1, scaley) / (1 << shift));
405 dstInfo.bounds.y1 = dstInfo.bounds.y2 = idy1;
406 if (y >= srcInfo.bounds.y1 && y < srcInfo.bounds.y2) {
407 dstInfo.bounds.y2++;
408 }
409 } else {
410 dstInfo.bounds.y1 = ((srcInfo.bounds.y1 <= sy1)
411 ? idy1
412 : refine(idy1, ddy1, tilesize, scaley,
413 (srcInfo.bounds.y1-sy1) << shift, syinc));
414 dstInfo.bounds.y2 = refine(idy1, ddy1, tilesize, scaley,
415 (srcInfo.bounds.y2-sy1) << shift, syinc);
416 }
417
418 SurfaceData_IntersectBounds(&dstInfo.bounds, &clipInfo.bounds);
419 dstFlags = pPrim->dstflags;
420 if (!Region_IsRectangular(&clipInfo)) {
421 dstFlags |= SD_LOCK_PARTIAL_WRITE;
422 }
423 if (dstOps->Lock(env, dstOps, &dstInfo, dstFlags) != SD_SUCCESS) {
424 SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
425 return;
426 }
427
428 if (dstInfo.bounds.x2 > dstInfo.bounds.x1 &&
429 dstInfo.bounds.y2 > dstInfo.bounds.y1)
430 {
431 srcOps->GetRasInfo(env, srcOps, &srcInfo);
432 dstOps->GetRasInfo(env, dstOps, &dstInfo);
433 if (srcInfo.rasBase && dstInfo.rasBase) {
434 SurfaceDataBounds span;
435 void *pSrc = PtrCoord(srcInfo.rasBase,
436 sx1, srcInfo.pixelStride,
437 sy1, srcInfo.scanStride);
438
439 Region_IntersectBounds(&clipInfo, &dstInfo.bounds);
440 Region_StartIteration(env, &clipInfo);
441 if (tilesize >= (ddx2 - ddx1) &&
442 tilesize >= (ddy2 - ddy1))
443 {
444 /* Do everything in one tile */
445 jint sxloc = (jint) SRCLOC(idx1, ddx1, scalex);
446 jint syloc = (jint) SRCLOC(idy1, ddy1, scaley);
447 while (Region_NextIteration(&clipInfo, &span)) {
448 jint tsxloc = sxloc;
449 jint tsyloc = syloc;
450 void *pDst;
451
452 if (span.y1 > idy1) {
453 tsyloc += syinc * (span.y1 - idy1);
454 }
455 if (span.x1 > idx1) {
456 tsxloc += sxinc * (span.x1 - idx1);
457 }
458
459 pDst = PtrCoord(dstInfo.rasBase,
460 span.x1, dstInfo.pixelStride,
461 span.y1, dstInfo.scanStride);
462 (*pPrim->funcs.scaledblit)(pSrc, pDst,
463 span.x2-span.x1, span.y2-span.y1,
464 tsxloc, tsyloc,
465 sxinc, syinc, shift,
466 &srcInfo, &dstInfo,
467 pPrim, &compInfo);
468 }
469 } else {
470 /* Break each clip span into tiles for better accuracy. */
471 while (Region_NextIteration(&clipInfo, &span)) {
472 jint tilex, tiley;
473 jint sxloc, syloc;
474 jint x1, y1, x2, y2;
475 void *pDst;
476
477 for (tiley = TILESTART(span.y1, idy1, tilesize);
478 tiley < span.y2;
479 tiley += tilesize)
480 {
481 /* Clip span to Y range of current tile */
482 y1 = tiley;
483 y2 = tiley + tilesize;
484 if (y1 < span.y1) y1 = span.y1;
485 if (y2 > span.y2) y2 = span.y2;
486
487 /* Find scaled source coordinate of first pixel */
488 syloc = (jint) SRCLOC(tiley, ddy1, scaley);
489 if (y1 > tiley) {
490 syloc += syinc * (y1 - tiley);
491 }
492
493 for (tilex = TILESTART(span.x1, idx1, tilesize);
494 tilex < span.x2;
495 tilex += tilesize)
496 {
497 /* Clip span to X range of current tile */
498 x1 = tilex;
499 x2 = tilex + tilesize;
500 if (x1 < span.x1) x1 = span.x1;
501 if (x2 > span.x2) x2 = span.x2;
502
503 /* Find scaled source coordinate of first pixel */
504 sxloc = (jint) SRCLOC(tilex, ddx1, scalex);
505 if (x1 > tilex) {
506 sxloc += sxinc * (x1 - tilex);
507 }
508
509 pDst = PtrCoord(dstInfo.rasBase,
510 x1, dstInfo.pixelStride,
511 y1, dstInfo.scanStride);
512 (*pPrim->funcs.scaledblit)(pSrc, pDst, x2-x1, y2-y1,
513 sxloc, syloc,
514 sxinc, syinc, shift,
515 &srcInfo, &dstInfo,
516 pPrim, &compInfo);
517 }
518 }
519 }
520 }
521 Region_EndIteration(env, &clipInfo);
522 }
523 SurfaceData_InvokeRelease(env, dstOps, &dstInfo);
524 SurfaceData_InvokeRelease(env, srcOps, &srcInfo);
525 }
526 SurfaceData_InvokeUnlock(env, dstOps, &dstInfo);
527 SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
528}