blob: b3bdb65a5b5efe062364b4768434818a5f4fc5d1 [file] [log] [blame]
Ruben Brunk370e2432014-10-14 18:33:23 -07001# Copyright 2013 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15import matplotlib
16matplotlib.use('Agg')
17
18import its.error
19import pylab
20import sys
21import Image
22import numpy
23import math
24import unittest
25import cStringIO
26import scipy.stats
27import copy
28
29DEFAULT_YUV_TO_RGB_CCM = numpy.matrix([
30 [1.000, 0.000, 1.402],
31 [1.000, -0.344, -0.714],
32 [1.000, 1.772, 0.000]])
33
34DEFAULT_YUV_OFFSETS = numpy.array([0, 128, 128])
35
36DEFAULT_GAMMA_LUT = numpy.array(
37 [math.floor(65535 * math.pow(i/65535.0, 1/2.2) + 0.5)
38 for i in xrange(65536)])
39
40DEFAULT_INVGAMMA_LUT = numpy.array(
41 [math.floor(65535 * math.pow(i/65535.0, 2.2) + 0.5)
42 for i in xrange(65536)])
43
44MAX_LUT_SIZE = 65536
45
46def convert_capture_to_rgb_image(cap,
47 ccm_yuv_to_rgb=DEFAULT_YUV_TO_RGB_CCM,
48 yuv_off=DEFAULT_YUV_OFFSETS,
49 props=None):
50 """Convert a captured image object to a RGB image.
51
52 Args:
53 cap: A capture object as returned by its.device.do_capture.
54 ccm_yuv_to_rgb: (Optional) the 3x3 CCM to convert from YUV to RGB.
55 yuv_off: (Optional) offsets to subtract from each of Y,U,V values.
56 props: (Optional) camera properties object (of static values);
57 required for processing raw images.
58
59 Returns:
60 RGB float-3 image array, with pixel values in [0.0, 1.0].
61 """
62 w = cap["width"]
63 h = cap["height"]
64 if cap["format"] == "raw10":
65 assert(props is not None)
66 cap = unpack_raw10_capture(cap, props)
67 if cap["format"] == "yuv":
68 y = cap["data"][0:w*h]
69 u = cap["data"][w*h:w*h*5/4]
70 v = cap["data"][w*h*5/4:w*h*6/4]
71 return convert_yuv420_to_rgb_image(y, u, v, w, h)
72 elif cap["format"] == "jpeg":
73 return decompress_jpeg_to_rgb_image(cap["data"])
74 elif cap["format"] == "raw":
75 assert(props is not None)
76 r,gr,gb,b = convert_capture_to_planes(cap, props)
77 return convert_raw_to_rgb_image(r,gr,gb,b, props, cap["metadata"])
78 else:
79 raise its.error.Error('Invalid format %s' % (cap["format"]))
80
81def unpack_raw10_capture(cap, props):
82 """Unpack a raw-10 capture to a raw-16 capture.
83
84 Args:
85 cap: A raw-10 capture object.
Chien-Yu Chen682faa22014-10-22 17:34:44 -070086 props: Camera properties object.
Ruben Brunk370e2432014-10-14 18:33:23 -070087
88 Returns:
89 New capture object with raw-16 data.
90 """
91 # Data is packed as 4x10b pixels in 5 bytes, with the first 4 bytes holding
92 # the MSPs of the pixels, and the 5th byte holding 4x2b LSBs.
93 w,h = cap["width"], cap["height"]
94 if w % 4 != 0:
95 raise its.error.Error('Invalid raw-10 buffer width')
96 cap = copy.deepcopy(cap)
97 cap["data"] = unpack_raw10_image(cap["data"].reshape(h,w*5/4))
98 cap["format"] = "raw"
99 return cap
100
101def unpack_raw10_image(img):
102 """Unpack a raw-10 image to a raw-16 image.
103
104 Output image will have the 10 LSBs filled in each 16b word, and the 6 MSBs
105 will be set to zero.
106
107 Args:
108 img: A raw-10 image, as a uint8 numpy array.
109
110 Returns:
111 Image as a uint16 numpy array, with all row padding stripped.
112 """
113 if img.shape[1] % 5 != 0:
114 raise its.error.Error('Invalid raw-10 buffer width')
115 w = img.shape[1]*4/5
116 h = img.shape[0]
117 # Cut out the 4x8b MSBs and shift to bits [10:2] in 16b words.
118 msbs = numpy.delete(img, numpy.s_[4::5], 1)
119 msbs = msbs.astype(numpy.uint16)
120 msbs = numpy.left_shift(msbs, 2)
121 msbs = msbs.reshape(h,w)
122 # Cut out the 4x2b LSBs and put each in bits [2:0] of their own 8b words.
123 lsbs = img[::, 4::5].reshape(h,w/4)
124 lsbs = numpy.right_shift(
125 numpy.packbits(numpy.unpackbits(lsbs).reshape(h,w/4,4,2),3), 6)
126 lsbs = lsbs.reshape(h,w)
127 # Fuse the MSBs and LSBs back together
128 img16 = numpy.bitwise_or(msbs, lsbs).reshape(h,w)
129 return img16
130
131def convert_capture_to_planes(cap, props=None):
132 """Convert a captured image object to separate image planes.
133
134 Decompose an image into multiple images, corresponding to different planes.
135
136 For YUV420 captures ("yuv"):
137 Returns Y,U,V planes, where the Y plane is full-res and the U,V planes
138 are each 1/2 x 1/2 of the full res.
139
140 For Bayer captures ("raw" or "raw10"):
141 Returns planes in the order R,Gr,Gb,B, regardless of the Bayer pattern
142 layout. Each plane is 1/2 x 1/2 of the full res.
143
144 For JPEG captures ("jpeg"):
145 Returns R,G,B full-res planes.
146
147 Args:
148 cap: A capture object as returned by its.device.do_capture.
149 props: (Optional) camera properties object (of static values);
150 required for processing raw images.
151
152 Returns:
153 A tuple of float numpy arrays (one per plane), consisting of pixel
154 values in the range [0.0, 1.0].
155 """
156 w = cap["width"]
157 h = cap["height"]
158 if cap["format"] == "raw10":
159 assert(props is not None)
160 cap = unpack_raw10_capture(cap, props)
161 if cap["format"] == "yuv":
162 y = cap["data"][0:w*h]
163 u = cap["data"][w*h:w*h*5/4]
164 v = cap["data"][w*h*5/4:w*h*6/4]
165 return ((y.astype(numpy.float32) / 255.0).reshape(h, w, 1),
166 (u.astype(numpy.float32) / 255.0).reshape(h/2, w/2, 1),
167 (v.astype(numpy.float32) / 255.0).reshape(h/2, w/2, 1))
168 elif cap["format"] == "jpeg":
169 rgb = decompress_jpeg_to_rgb_image(cap["data"]).reshape(w*h*3)
170 return (rgb[::3].reshape(h,w,1),
171 rgb[1::3].reshape(h,w,1),
172 rgb[2::3].reshape(h,w,1))
173 elif cap["format"] == "raw":
174 assert(props is not None)
175 white_level = float(props['android.sensor.info.whiteLevel'])
176 img = numpy.ndarray(shape=(h*w,), dtype='<u2',
177 buffer=cap["data"][0:w*h*2])
178 img = img.astype(numpy.float32).reshape(h,w) / white_level
179 imgs = [img[::2].reshape(w*h/2)[::2].reshape(h/2,w/2,1),
180 img[::2].reshape(w*h/2)[1::2].reshape(h/2,w/2,1),
181 img[1::2].reshape(w*h/2)[::2].reshape(h/2,w/2,1),
182 img[1::2].reshape(w*h/2)[1::2].reshape(h/2,w/2,1)]
183 idxs = get_canonical_cfa_order(props)
184 return [imgs[i] for i in idxs]
185 else:
186 raise its.error.Error('Invalid format %s' % (cap["format"]))
187
188def get_canonical_cfa_order(props):
189 """Returns a mapping from the Bayer 2x2 top-left grid in the CFA to
190 the standard order R,Gr,Gb,B.
191
192 Args:
193 props: Camera properties object.
194
195 Returns:
196 List of 4 integers, corresponding to the positions in the 2x2 top-
197 left Bayer grid of R,Gr,Gb,B, where the 2x2 grid is labeled as
198 0,1,2,3 in row major order.
199 """
200 # Note that raw streams aren't croppable, so the cropRegion doesn't need
201 # to be considered when determining the top-left pixel color.
202 cfa_pat = props['android.sensor.info.colorFilterArrangement']
203 if cfa_pat == 0:
204 # RGGB
205 return [0,1,2,3]
206 elif cfa_pat == 1:
207 # GRBG
208 return [1,0,3,2]
209 elif cfa_pat == 2:
210 # GBRG
211 return [2,3,0,1]
212 elif cfa_pat == 3:
213 # BGGR
214 return [3,2,1,0]
215 else:
216 raise its.error.Error("Not supported")
217
218def get_gains_in_canonical_order(props, gains):
219 """Reorders the gains tuple to the canonical R,Gr,Gb,B order.
220
221 Args:
222 props: Camera properties object.
223 gains: List of 4 values, in R,G_even,G_odd,B order.
224
225 Returns:
226 List of gains values, in R,Gr,Gb,B order.
227 """
228 cfa_pat = props['android.sensor.info.colorFilterArrangement']
229 if cfa_pat in [0,1]:
230 # RGGB or GRBG, so G_even is Gr
231 return gains
232 elif cfa_pat in [2,3]:
233 # GBRG or BGGR, so G_even is Gb
234 return [gains[0], gains[2], gains[1], gains[3]]
235 else:
236 raise its.error.Error("Not supported")
237
238def convert_raw_to_rgb_image(r_plane, gr_plane, gb_plane, b_plane,
239 props, cap_res):
240 """Convert a Bayer raw-16 image to an RGB image.
241
242 Includes some extremely rudimentary demosaicking and color processing
243 operations; the output of this function shouldn't be used for any image
244 quality analysis.
245
246 Args:
247 r_plane,gr_plane,gb_plane,b_plane: Numpy arrays for each color plane
248 in the Bayer image, with pixels in the [0.0, 1.0] range.
249 props: Camera properties object.
250 cap_res: Capture result (metadata) object.
251
252 Returns:
253 RGB float-3 image array, with pixel values in [0.0, 1.0]
254 """
255 # Values required for the RAW to RGB conversion.
256 assert(props is not None)
257 white_level = float(props['android.sensor.info.whiteLevel'])
258 black_levels = props['android.sensor.blackLevelPattern']
259 gains = cap_res['android.colorCorrection.gains']
260 ccm = cap_res['android.colorCorrection.transform']
261
262 # Reorder black levels and gains to R,Gr,Gb,B, to match the order
263 # of the planes.
264 idxs = get_canonical_cfa_order(props)
265 black_levels = [black_levels[i] for i in idxs]
266 gains = get_gains_in_canonical_order(props, gains)
267
268 # Convert CCM from rational to float, as numpy arrays.
269 ccm = numpy.array(its.objects.rational_to_float(ccm)).reshape(3,3)
270
271 # Need to scale the image back to the full [0,1] range after subtracting
272 # the black level from each pixel.
273 scale = white_level / (white_level - max(black_levels))
274
275 # Three-channel black levels, normalized to [0,1] by white_level.
276 black_levels = numpy.array([b/white_level for b in [
277 black_levels[i] for i in [0,1,3]]])
278
279 # Three-channel gains.
280 gains = numpy.array([gains[i] for i in [0,1,3]])
281
282 h,w = r_plane.shape[:2]
283 img = numpy.dstack([r_plane,(gr_plane+gb_plane)/2.0,b_plane])
284 img = (((img.reshape(h,w,3) - black_levels) * scale) * gains).clip(0.0,1.0)
285 img = numpy.dot(img.reshape(w*h,3), ccm.T).reshape(h,w,3).clip(0.0,1.0)
286 return img
287
288def convert_yuv420_to_rgb_image(y_plane, u_plane, v_plane,
289 w, h,
290 ccm_yuv_to_rgb=DEFAULT_YUV_TO_RGB_CCM,
291 yuv_off=DEFAULT_YUV_OFFSETS):
292 """Convert a YUV420 8-bit planar image to an RGB image.
293
294 Args:
295 y_plane: The packed 8-bit Y plane.
296 u_plane: The packed 8-bit U plane.
297 v_plane: The packed 8-bit V plane.
298 w: The width of the image.
299 h: The height of the image.
300 ccm_yuv_to_rgb: (Optional) the 3x3 CCM to convert from YUV to RGB.
301 yuv_off: (Optional) offsets to subtract from each of Y,U,V values.
302
303 Returns:
304 RGB float-3 image array, with pixel values in [0.0, 1.0].
305 """
306 y = numpy.subtract(y_plane, yuv_off[0])
307 u = numpy.subtract(u_plane, yuv_off[1]).view(numpy.int8)
308 v = numpy.subtract(v_plane, yuv_off[2]).view(numpy.int8)
309 u = u.reshape(h/2, w/2).repeat(2, axis=1).repeat(2, axis=0)
310 v = v.reshape(h/2, w/2).repeat(2, axis=1).repeat(2, axis=0)
311 yuv = numpy.dstack([y, u.reshape(w*h), v.reshape(w*h)])
312 flt = numpy.empty([h, w, 3], dtype=numpy.float32)
313 flt.reshape(w*h*3)[:] = yuv.reshape(h*w*3)[:]
314 flt = numpy.dot(flt.reshape(w*h,3), ccm_yuv_to_rgb.T).clip(0, 255)
315 rgb = numpy.empty([h, w, 3], dtype=numpy.uint8)
316 rgb.reshape(w*h*3)[:] = flt.reshape(w*h*3)[:]
317 return rgb.astype(numpy.float32) / 255.0
318
319def load_yuv420_to_rgb_image(yuv_fname,
320 w, h,
321 ccm_yuv_to_rgb=DEFAULT_YUV_TO_RGB_CCM,
322 yuv_off=DEFAULT_YUV_OFFSETS):
323 """Load a YUV420 image file, and return as an RGB image.
324
325 Args:
326 yuv_fname: The path of the YUV420 file.
327 w: The width of the image.
328 h: The height of the image.
329 ccm_yuv_to_rgb: (Optional) the 3x3 CCM to convert from YUV to RGB.
330 yuv_off: (Optional) offsets to subtract from each of Y,U,V values.
331
332 Returns:
333 RGB float-3 image array, with pixel values in [0.0, 1.0].
334 """
335 with open(yuv_fname, "rb") as f:
336 y = numpy.fromfile(f, numpy.uint8, w*h, "")
337 v = numpy.fromfile(f, numpy.uint8, w*h/4, "")
338 u = numpy.fromfile(f, numpy.uint8, w*h/4, "")
339 return convert_yuv420_to_rgb_image(y,u,v,w,h,ccm_yuv_to_rgb,yuv_off)
340
341def load_yuv420_to_yuv_planes(yuv_fname, w, h):
342 """Load a YUV420 image file, and return separate Y, U, and V plane images.
343
344 Args:
345 yuv_fname: The path of the YUV420 file.
346 w: The width of the image.
347 h: The height of the image.
348
349 Returns:
350 Separate Y, U, and V images as float-1 Numpy arrays, pixels in [0,1].
351 Note that pixel (0,0,0) is not black, since U,V pixels are centered at
352 0.5, and also that the Y and U,V plane images returned are different
353 sizes (due to chroma subsampling in the YUV420 format).
354 """
355 with open(yuv_fname, "rb") as f:
356 y = numpy.fromfile(f, numpy.uint8, w*h, "")
357 v = numpy.fromfile(f, numpy.uint8, w*h/4, "")
358 u = numpy.fromfile(f, numpy.uint8, w*h/4, "")
359 return ((y.astype(numpy.float32) / 255.0).reshape(h, w, 1),
360 (u.astype(numpy.float32) / 255.0).reshape(h/2, w/2, 1),
361 (v.astype(numpy.float32) / 255.0).reshape(h/2, w/2, 1))
362
363def decompress_jpeg_to_rgb_image(jpeg_buffer):
364 """Decompress a JPEG-compressed image, returning as an RGB image.
365
366 Args:
367 jpeg_buffer: The JPEG stream.
368
369 Returns:
370 A numpy array for the RGB image, with pixels in [0,1].
371 """
372 img = Image.open(cStringIO.StringIO(jpeg_buffer))
373 w = img.size[0]
374 h = img.size[1]
375 return numpy.array(img).reshape(h,w,3) / 255.0
376
377def apply_lut_to_image(img, lut):
378 """Applies a LUT to every pixel in a float image array.
379
380 Internally converts to a 16b integer image, since the LUT can work with up
381 to 16b->16b mappings (i.e. values in the range [0,65535]). The lut can also
382 have fewer than 65536 entries, however it must be sized as a power of 2
383 (and for smaller luts, the scale must match the bitdepth).
384
385 For a 16b lut of 65536 entries, the operation performed is:
386
387 lut[r * 65535] / 65535 -> r'
388 lut[g * 65535] / 65535 -> g'
389 lut[b * 65535] / 65535 -> b'
390
391 For a 10b lut of 1024 entries, the operation becomes:
392
393 lut[r * 1023] / 1023 -> r'
394 lut[g * 1023] / 1023 -> g'
395 lut[b * 1023] / 1023 -> b'
396
397 Args:
398 img: Numpy float image array, with pixel values in [0,1].
399 lut: Numpy table encoding a LUT, mapping 16b integer values.
400
401 Returns:
402 Float image array after applying LUT to each pixel.
403 """
404 n = len(lut)
405 if n <= 0 or n > MAX_LUT_SIZE or (n & (n - 1)) != 0:
406 raise its.error.Error('Invalid arg LUT size: %d' % (n))
407 m = float(n-1)
408 return (lut[(img * m).astype(numpy.uint16)] / m).astype(numpy.float32)
409
410def apply_matrix_to_image(img, mat):
411 """Multiplies a 3x3 matrix with each float-3 image pixel.
412
413 Each pixel is considered a column vector, and is left-multiplied by
414 the given matrix:
415
416 [ ] r r'
417 [ mat ] * g -> g'
418 [ ] b b'
419
420 Args:
421 img: Numpy float image array, with pixel values in [0,1].
422 mat: Numpy 3x3 matrix.
423
424 Returns:
425 The numpy float-3 image array resulting from the matrix mult.
426 """
427 h = img.shape[0]
428 w = img.shape[1]
429 img2 = numpy.empty([h, w, 3], dtype=numpy.float32)
430 img2.reshape(w*h*3)[:] = (numpy.dot(img.reshape(h*w, 3), mat.T)
431 ).reshape(w*h*3)[:]
432 return img2
433
434def get_image_patch(img, xnorm, ynorm, wnorm, hnorm):
435 """Get a patch (tile) of an image.
436
437 Args:
438 img: Numpy float image array, with pixel values in [0,1].
439 xnorm,ynorm,wnorm,hnorm: Normalized (in [0,1]) coords for the tile.
440
441 Returns:
442 Float image array of the patch.
443 """
444 hfull = img.shape[0]
445 wfull = img.shape[1]
446 xtile = math.ceil(xnorm * wfull)
447 ytile = math.ceil(ynorm * hfull)
448 wtile = math.floor(wnorm * wfull)
449 htile = math.floor(hnorm * hfull)
450 return img[ytile:ytile+htile,xtile:xtile+wtile,:].copy()
451
452def compute_image_means(img):
453 """Calculate the mean of each color channel in the image.
454
455 Args:
456 img: Numpy float image array, with pixel values in [0,1].
457
458 Returns:
459 A list of mean values, one per color channel in the image.
460 """
461 means = []
462 chans = img.shape[2]
463 for i in xrange(chans):
464 means.append(numpy.mean(img[:,:,i], dtype=numpy.float64))
465 return means
466
467def compute_image_variances(img):
468 """Calculate the variance of each color channel in the image.
469
470 Args:
471 img: Numpy float image array, with pixel values in [0,1].
472
473 Returns:
474 A list of mean values, one per color channel in the image.
475 """
476 variances = []
477 chans = img.shape[2]
478 for i in xrange(chans):
479 variances.append(numpy.var(img[:,:,i], dtype=numpy.float64))
480 return variances
481
482def write_image(img, fname, apply_gamma=False):
483 """Save a float-3 numpy array image to a file.
484
485 Supported formats: PNG, JPEG, and others; see PIL docs for more.
486
487 Image can be 3-channel, which is interpreted as RGB, or can be 1-channel,
488 which is greyscale.
489
490 Can optionally specify that the image should be gamma-encoded prior to
491 writing it out; this should be done if the image contains linear pixel
492 values, to make the image look "normal".
493
494 Args:
495 img: Numpy image array data.
496 fname: Path of file to save to; the extension specifies the format.
497 apply_gamma: (Optional) apply gamma to the image prior to writing it.
498 """
499 if apply_gamma:
500 img = apply_lut_to_image(img, DEFAULT_GAMMA_LUT)
501 (h, w, chans) = img.shape
502 if chans == 3:
503 Image.fromarray((img * 255.0).astype(numpy.uint8), "RGB").save(fname)
504 elif chans == 1:
505 img3 = (img * 255.0).astype(numpy.uint8).repeat(3).reshape(h,w,3)
506 Image.fromarray(img3, "RGB").save(fname)
507 else:
508 raise its.error.Error('Unsupported image type')
509
510def downscale_image(img, f):
511 """Shrink an image by a given integer factor.
512
513 This function computes output pixel values by averaging over rectangular
514 regions of the input image; it doesn't skip or sample pixels, and all input
515 image pixels are evenly weighted.
516
517 If the downscaling factor doesn't cleanly divide the width and/or height,
518 then the remaining pixels on the right or bottom edge are discarded prior
519 to the downscaling.
520
521 Args:
522 img: The input image as an ndarray.
523 f: The downscaling factor, which should be an integer.
524
525 Returns:
526 The new (downscaled) image, as an ndarray.
527 """
528 h,w,chans = img.shape
529 f = int(f)
530 assert(f >= 1)
531 h = (h/f)*f
532 w = (w/f)*f
533 img = img[0:h:,0:w:,::]
534 chs = []
535 for i in xrange(chans):
536 ch = img.reshape(h*w*chans)[i::chans].reshape(h,w)
537 ch = ch.reshape(h,w/f,f).mean(2).reshape(h,w/f)
538 ch = ch.T.reshape(w/f,h/f,f).mean(2).T.reshape(h/f,w/f)
539 chs.append(ch.reshape(h*w/(f*f)))
540 img = numpy.vstack(chs).T.reshape(h/f,w/f,chans)
541 return img
542
Timothy Knight9f387012014-11-16 17:45:25 -0800543def __get_color_checker_patch(img, xc,yc, patch_size):
Ruben Brunk370e2432014-10-14 18:33:23 -0700544 r = patch_size/2
Timothy Knight9f387012014-11-16 17:45:25 -0800545 tile = img[yc-r:yc+r:, xc-r:xc+r:, ::]
546 return tile
547
548def __measure_color_checker_patch(img, xc,yc, patch_size):
549 tile = __get_color_checker_patch(img, xc,yc, patch_size)
Ruben Brunk370e2432014-10-14 18:33:23 -0700550 means = tile.mean(1).mean(0)
551 return means
552
553def get_color_checker_chart_patches(img, debug_fname_prefix=None):
554 """Return the center coords of each patch in a color checker chart.
555
556 Assumptions:
557 * Chart is vertical or horizontal w.r.t. camera, but not diagonal.
558 * Chart is (roughly) planar-parallel to the camera.
559 * Chart is centered in frame (roughly).
560 * Around/behind chart is white/grey background.
561 * The only black pixels in the image are from the chart.
562 * Chart is 100% visible and contained within image.
563 * No other objects within image.
564 * Image is well-exposed.
565 * Standard color checker chart with standard-sized black borders.
566
567 The values returned are in the coordinate system of the chart; that is,
Timothy Knightbb1456a2014-11-16 11:14:15 -0800568 patch (0,0) is the brown patch that is in the chart's top-left corner when
569 it is in the normal upright/horizontal orientation. (The chart may be any
570 of the four main orientations in the image.)
Ruben Brunk370e2432014-10-14 18:33:23 -0700571
572 Args:
573 img: Input image, as a numpy array with pixels in [0,1].
574 debug_fname_prefix: If not None, the (string) name of a file prefix to
Chien-Yu Chen682faa22014-10-22 17:34:44 -0700575 use to save a number of debug images for visualizing the output of
Ruben Brunk370e2432014-10-14 18:33:23 -0700576 this function; can be used to see if the patches are being found
Timothy Knight4c8e0772014-11-21 19:28:46 -0800577 successfully.
Ruben Brunk370e2432014-10-14 18:33:23 -0700578
579 Returns:
580 6x4 list of lists of integer (x,y) coords of the center of each patch,
581 ordered in the "chart order" (6x4 row major).
582 """
583
584 # Shrink the original image.
585 DOWNSCALE_FACTOR = 4
586 img_small = downscale_image(img, DOWNSCALE_FACTOR)
587
588 # Make a threshold image, which is 1.0 where the image is black,
589 # and 0.0 elsewhere.
590 BLACK_PIXEL_THRESH = 0.2
591 mask_img = scipy.stats.threshold(
592 img_small.max(2), BLACK_PIXEL_THRESH, 1.1, 0.0)
593 mask_img = 1.0 - scipy.stats.threshold(mask_img, -0.1, 0.1, 1.0)
594
595 if debug_fname_prefix is not None:
596 h,w = mask_img.shape
597 write_image(img, debug_fname_prefix+"_0.jpg")
598 write_image(mask_img.repeat(3).reshape(h,w,3),
599 debug_fname_prefix+"_1.jpg")
600
601 # Mask image flattened to a single row or column (by averaging).
602 # Also apply a threshold to these arrays.
603 FLAT_PIXEL_THRESH = 0.05
604 flat_row = mask_img.mean(0)
605 flat_col = mask_img.mean(1)
606 flat_row = [0 if v < FLAT_PIXEL_THRESH else 1 for v in flat_row]
607 flat_col = [0 if v < FLAT_PIXEL_THRESH else 1 for v in flat_col]
608
609 # Start and end of the non-zero region of the flattened row/column.
610 flat_row_nonzero = [i for i in range(len(flat_row)) if flat_row[i]>0]
611 flat_col_nonzero = [i for i in range(len(flat_col)) if flat_col[i]>0]
612 flat_row_min, flat_row_max = min(flat_row_nonzero), max(flat_row_nonzero)
613 flat_col_min, flat_col_max = min(flat_col_nonzero), max(flat_col_nonzero)
614
615 # Orientation of chart, and number of grid cells horz. and vertically.
616 orient = "h" if flat_row_max-flat_row_min>flat_col_max-flat_col_min else "v"
617 xgrids = 6 if orient=="h" else 4
618 ygrids = 6 if orient=="v" else 4
619
620 # Get better bounds on the patches region, lopping off some of the excess
621 # black border.
622 HRZ_BORDER_PAD_FRAC = 0.0138
623 VERT_BORDER_PAD_FRAC = 0.0395
624 xpad = HRZ_BORDER_PAD_FRAC if orient=="h" else VERT_BORDER_PAD_FRAC
625 ypad = HRZ_BORDER_PAD_FRAC if orient=="v" else VERT_BORDER_PAD_FRAC
626 xchart = flat_row_min + (flat_row_max - flat_row_min) * xpad
627 ychart = flat_col_min + (flat_col_max - flat_col_min) * ypad
628 wchart = (flat_row_max - flat_row_min) * (1 - 2*xpad)
629 hchart = (flat_col_max - flat_col_min) * (1 - 2*ypad)
630
631 # Get the colors of the 4 corner patches, in clockwise order, by measuring
632 # the average value of a small patch at each of the 4 patch centers.
633 colors = []
634 centers = []
635 for (x,y) in [(0,0), (xgrids-1,0), (xgrids-1,ygrids-1), (0,ygrids-1)]:
636 xc = xchart + (x + 0.5)*wchart/xgrids
637 yc = ychart + (y + 0.5)*hchart/ygrids
638 xc = int(xc * DOWNSCALE_FACTOR + 0.5)
639 yc = int(yc * DOWNSCALE_FACTOR + 0.5)
640 centers.append((xc,yc))
641 chan_means = __measure_color_checker_patch(img, xc,yc, 32)
642 colors.append(sum(chan_means) / len(chan_means))
643
644 # The brightest corner is the white patch, the darkest is the black patch.
645 # The black patch should be counter-clockwise from the white patch.
646 white_patch_index = None
647 for i in range(4):
648 if colors[i] == max(colors) and \
649 colors[(i-1+4)%4] == min(colors):
650 white_patch_index = i%4
651 assert(white_patch_index is not None)
652
653 # Return the coords of the origin (top-left when the chart is in the normal
654 # upright orientation) patch's center, and the vector displacement to the
655 # center of the second patch on the first row of the chart (when in the
Chien-Yu Chen682faa22014-10-22 17:34:44 -0700656 # normal upright orientation).
Ruben Brunk370e2432014-10-14 18:33:23 -0700657 origin_index = (white_patch_index+1)%4
658 prev_index = (origin_index-1+4)%4
659 next_index = (origin_index+1)%4
660 origin_center = centers[origin_index]
661 prev_center = centers[prev_index]
662 next_center = centers[next_index]
663 vec_across = tuple([(next_center[i]-origin_center[i])/5.0 for i in [0,1]])
664 vec_down = tuple([(prev_center[i]-origin_center[i])/3.0 for i in [0,1]])
665
666 # Compute the center of each patch.
667 patches = [[],[],[],[]]
668 for yi in range(4):
669 for xi in range(6):
670 x0,y0 = origin_center
671 dxh,dyh = vec_across
672 dxv,dyv = vec_down
673 xc = int(x0 + dxh*xi + dxv*yi)
674 yc = int(y0 + dyh*xi + dyv*yi)
675 patches[yi].append((xc,yc))
676
677 # Sanity check: test that the R,G,B,black,white patches are correct.
Timothy Knight9f387012014-11-16 17:45:25 -0800678 sanity_failed = False
Ruben Brunk370e2432014-10-14 18:33:23 -0700679 patch_info = [(2,2,[0]), # Red
680 (2,1,[1]), # Green
681 (2,0,[2]), # Blue
682 (3,0,[0,1,2]), # White
683 (3,5,[])] # Black
684 for i in range(len(patch_info)):
685 yi,xi,high_chans = patch_info[i]
686 low_chans = [i for i in [0,1,2] if i not in high_chans]
687 xc,yc = patches[yi][xi]
688 means = __measure_color_checker_patch(img, xc,yc, 64)
689 if (min([means[i] for i in high_chans]+[1]) < \
690 max([means[i] for i in low_chans]+[0])):
Timothy Knight9f387012014-11-16 17:45:25 -0800691 sanity_failed = True
Ruben Brunk370e2432014-10-14 18:33:23 -0700692
693 if debug_fname_prefix is not None:
Timothy Knight9f387012014-11-16 17:45:25 -0800694 gridimg = numpy.zeros([4*(32+2), 6*(32+2), 3])
695 for yi in range(4):
696 for xi in range(6):
697 xc,yc = patches[yi][xi]
698 tile = __get_color_checker_patch(img, xc,yc, 32)
699 gridimg[yi*(32+2)+1:yi*(32+2)+1+32,
700 xi*(32+2)+1:xi*(32+2)+1+32, :] = tile
701 write_image(gridimg, debug_fname_prefix+"_2.png")
702
703 assert(not sanity_failed)
Ruben Brunk370e2432014-10-14 18:33:23 -0700704
705 return patches
706
707class __UnitTest(unittest.TestCase):
708 """Run a suite of unit tests on this module.
709 """
710
711 # TODO: Add more unit tests.
712
713 def test_apply_matrix_to_image(self):
714 """Unit test for apply_matrix_to_image.
715
716 Test by using a canned set of values on a 1x1 pixel image.
717
718 [ 1 2 3 ] [ 0.1 ] [ 1.4 ]
719 [ 4 5 6 ] * [ 0.2 ] = [ 3.2 ]
720 [ 7 8 9 ] [ 0.3 ] [ 5.0 ]
721 mat x y
722 """
723 mat = numpy.array([[1,2,3],[4,5,6],[7,8,9]])
724 x = numpy.array([0.1,0.2,0.3]).reshape(1,1,3)
725 y = apply_matrix_to_image(x, mat).reshape(3).tolist()
726 y_ref = [1.4,3.2,5.0]
727 passed = all([math.fabs(y[i] - y_ref[i]) < 0.001 for i in xrange(3)])
728 self.assertTrue(passed)
729
730 def test_apply_lut_to_image(self):
731 """ Unit test for apply_lut_to_image.
732
733 Test by using a canned set of values on a 1x1 pixel image. The LUT will
734 simply double the value of the index:
735
736 lut[x] = 2*x
737 """
738 lut = numpy.array([2*i for i in xrange(65536)])
739 x = numpy.array([0.1,0.2,0.3]).reshape(1,1,3)
740 y = apply_lut_to_image(x, lut).reshape(3).tolist()
741 y_ref = [0.2,0.4,0.6]
742 passed = all([math.fabs(y[i] - y_ref[i]) < 0.001 for i in xrange(3)])
743 self.assertTrue(passed)
744
745if __name__ == '__main__':
746 unittest.main()
747