Shri Borde | 7cd8149 | 2011-11-02 13:20:24 -0700 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright (c) 2011 The LibYuv project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
| 11 | #include <assert.h> |
| 12 | |
| 13 | #include "libyuv/cpu_id.h" |
| 14 | #include "video_common.h" |
| 15 | #include "row.h" |
| 16 | |
| 17 | #define kMaxStride (2048 * 4) |
| 18 | |
| 19 | namespace libyuv { |
| 20 | |
| 21 | // Note: to do this with Neon vld4.8 would load ARGB values into 4 registers |
| 22 | // and vst would select which 2 components to write. The low level would need |
| 23 | // to be ARGBToBG, ARGBToGB, ARGBToRG, ARGBToGR |
| 24 | |
| 25 | #if defined(WIN32) && !defined(COVERAGE_ENABLED) |
| 26 | #define HAS_ARGBTOBAYERROW_SSSE3 |
| 27 | __declspec(naked) |
| 28 | static void ARGBToBayerRow_SSSE3(const uint8* src_argb, |
| 29 | uint8* dst_bayer, uint32 selector, int pix) { |
| 30 | __asm { |
| 31 | mov eax, [esp + 4] // src_argb |
| 32 | mov edx, [esp + 8] // dst_bayer |
| 33 | movd xmm7, [esp + 12] // selector |
| 34 | mov ecx, [esp + 16] // pix |
| 35 | pshufd xmm7, xmm7, 0 |
| 36 | |
| 37 | wloop: |
| 38 | movdqa xmm0, [eax] |
| 39 | lea eax, [eax + 16] |
| 40 | pshufb xmm0, xmm7 |
| 41 | movd [edx], xmm0 |
| 42 | lea edx, [edx + 4] |
| 43 | sub ecx, 4 |
| 44 | ja wloop |
| 45 | ret |
| 46 | } |
| 47 | } |
| 48 | |
| 49 | #elif (defined(__x86_64__) || defined(__i386__)) && \ |
| 50 | !defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR) |
| 51 | |
| 52 | #define HAS_ARGBTOBAYERROW_SSSE3 |
| 53 | static void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer, |
| 54 | uint32 selector, int pix) { |
| 55 | asm volatile( |
| 56 | "movd %3,%%xmm7\n" |
| 57 | "pshufd $0x0,%%xmm7,%%xmm7\n" |
| 58 | "1:" |
| 59 | "movdqa (%0),%%xmm0\n" |
| 60 | "lea 0x10(%0),%0\n" |
| 61 | "pshufb %%xmm7,%%xmm0\n" |
| 62 | "movd %%xmm0,(%1)\n" |
| 63 | "lea 0x4(%1),%1\n" |
| 64 | "sub $0x4,%2\n" |
| 65 | "ja 1b\n" |
| 66 | : "+r"(src_argb), // %0 |
| 67 | "+r"(dst_bayer), // %1 |
| 68 | "+r"(pix) // %2 |
| 69 | : "r"(selector) // %3 |
| 70 | : "memory" |
| 71 | ); |
| 72 | } |
| 73 | #endif |
| 74 | |
| 75 | static void ARGBToBayerRow_C(const uint8* src_argb, |
| 76 | uint8* dst_bayer, uint32 selector, int pix) { |
| 77 | int index0 = selector & 0xff; |
| 78 | int index1 = (selector >> 8) & 0xff; |
| 79 | // Copy a row of Bayer. |
| 80 | for (int x = 0; x < (pix - 1); x += 2) { |
| 81 | dst_bayer[0] = src_argb[index0]; |
| 82 | dst_bayer[1] = src_argb[index1]; |
| 83 | src_argb += 8; |
| 84 | dst_bayer += 2; |
| 85 | } |
| 86 | if (pix & 1) { |
| 87 | dst_bayer[0] = src_argb[index0]; |
| 88 | } |
| 89 | } |
| 90 | |
| 91 | // generate a selector mask useful for pshufb |
| 92 | static uint32 GenerateSelector(int select0, int select1) { |
| 93 | return static_cast<uint32>(select0) | |
| 94 | static_cast<uint32>((select1 + 4) << 8) | |
| 95 | static_cast<uint32>((select0 + 8) << 16) | |
| 96 | static_cast<uint32>((select1 + 12) << 24); |
| 97 | } |
| 98 | |
| 99 | // Converts 32 bit ARGB to any Bayer RGB format. |
| 100 | int ARGBToBayerRGB(const uint8* src_rgb, int src_stride_rgb, |
| 101 | uint8* dst_bayer, int dst_stride_bayer, |
| 102 | uint32 dst_fourcc_bayer, |
| 103 | int width, int height) { |
| 104 | if (height < 0) { |
| 105 | height = -height; |
| 106 | src_rgb = src_rgb + (height - 1) * src_stride_rgb; |
| 107 | src_stride_rgb = -src_stride_rgb; |
| 108 | } |
| 109 | void (*ARGBToBayerRow)(const uint8* src_argb, |
| 110 | uint8* dst_bayer, uint32 selector, int pix); |
| 111 | #if defined(HAS_ARGBTOBAYERROW_SSSE3) |
| 112 | if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) && |
| 113 | (width % 4 == 0) && |
| 114 | IS_ALIGNED(src_rgb, 16) && (src_stride_rgb % 16 == 0) && |
| 115 | IS_ALIGNED(dst_bayer, 4) && (dst_stride_bayer % 4 == 0)) { |
| 116 | ARGBToBayerRow = ARGBToBayerRow_SSSE3; |
| 117 | } else |
| 118 | #endif |
| 119 | { |
| 120 | ARGBToBayerRow = ARGBToBayerRow_C; |
| 121 | } |
| 122 | |
| 123 | int blue_index = 0; |
| 124 | int green_index = 1; |
| 125 | int red_index = 2; |
| 126 | |
| 127 | // Now build a lookup table containing the indices for the four pixels in each |
| 128 | // 2x2 Bayer grid. |
| 129 | uint32 index_map[2]; |
| 130 | switch (dst_fourcc_bayer) { |
| 131 | default: |
| 132 | assert(false); |
| 133 | case FOURCC_RGGB: |
| 134 | index_map[0] = GenerateSelector(red_index, green_index); |
| 135 | index_map[1] = GenerateSelector(green_index, blue_index); |
| 136 | break; |
| 137 | case FOURCC_BGGR: |
| 138 | index_map[0] = GenerateSelector(blue_index, green_index); |
| 139 | index_map[1] = GenerateSelector(green_index, red_index); |
| 140 | break; |
| 141 | case FOURCC_GRBG: |
| 142 | index_map[0] = GenerateSelector(green_index, red_index); |
| 143 | index_map[1] = GenerateSelector(blue_index, green_index); |
| 144 | break; |
| 145 | case FOURCC_GBRG: |
| 146 | index_map[0] = GenerateSelector(green_index, blue_index); |
| 147 | index_map[1] = GenerateSelector(red_index, green_index); |
| 148 | break; |
| 149 | } |
| 150 | |
| 151 | // Now convert. |
| 152 | for (int y = 0; y < height; ++y) { |
| 153 | ARGBToBayerRow(src_rgb, dst_bayer, index_map[y & 1], width); |
| 154 | src_rgb += src_stride_rgb; |
| 155 | dst_bayer += dst_stride_bayer; |
| 156 | } |
| 157 | return 0; |
| 158 | } |
| 159 | |
| 160 | #define AVG(a,b) (((a) + (b)) >> 1) |
| 161 | |
| 162 | static void BayerRowBG(const uint8* src_bayer0, int src_stride_bayer, |
| 163 | uint8* dst_rgb, int pix) { |
| 164 | const uint8* src_bayer1 = src_bayer0 + src_stride_bayer; |
| 165 | uint8 g = src_bayer0[1]; |
| 166 | uint8 r = src_bayer1[1]; |
| 167 | for (int x = 0; x < (pix - 2); x += 2) { |
| 168 | dst_rgb[0] = src_bayer0[0]; |
| 169 | dst_rgb[1] = AVG(g, src_bayer0[1]); |
| 170 | dst_rgb[2] = AVG(r, src_bayer1[1]); |
| 171 | dst_rgb[3] = 255U; |
| 172 | dst_rgb[4] = AVG(src_bayer0[0], src_bayer0[2]); |
| 173 | dst_rgb[5] = src_bayer0[1]; |
| 174 | dst_rgb[6] = src_bayer1[1]; |
| 175 | dst_rgb[7] = 255U; |
| 176 | g = src_bayer0[1]; |
| 177 | r = src_bayer1[1]; |
| 178 | src_bayer0 += 2; |
| 179 | src_bayer1 += 2; |
| 180 | dst_rgb += 8; |
| 181 | } |
| 182 | dst_rgb[0] = src_bayer0[0]; |
| 183 | dst_rgb[1] = AVG(g, src_bayer0[1]); |
| 184 | dst_rgb[2] = AVG(r, src_bayer1[1]); |
| 185 | dst_rgb[3] = 255U; |
| 186 | dst_rgb[4] = src_bayer0[0]; |
| 187 | dst_rgb[5] = src_bayer0[1]; |
| 188 | dst_rgb[6] = src_bayer1[1]; |
| 189 | dst_rgb[7] = 255U; |
| 190 | } |
| 191 | |
| 192 | static void BayerRowRG(const uint8* src_bayer0, int src_stride_bayer, |
| 193 | uint8* dst_rgb, int pix) { |
| 194 | const uint8* src_bayer1 = src_bayer0 + src_stride_bayer; |
| 195 | uint8 g = src_bayer0[1]; |
| 196 | uint8 b = src_bayer1[1]; |
| 197 | for (int x = 0; x < (pix - 2); x += 2) { |
| 198 | dst_rgb[0] = AVG(b, src_bayer1[1]); |
| 199 | dst_rgb[1] = AVG(g, src_bayer0[1]); |
| 200 | dst_rgb[2] = src_bayer0[0]; |
| 201 | dst_rgb[3] = 255U; |
| 202 | dst_rgb[4] = src_bayer1[1]; |
| 203 | dst_rgb[5] = src_bayer0[1]; |
| 204 | dst_rgb[6] = AVG(src_bayer0[0], src_bayer0[2]); |
| 205 | dst_rgb[7] = 255U; |
| 206 | g = src_bayer0[1]; |
| 207 | b = src_bayer1[1]; |
| 208 | src_bayer0 += 2; |
| 209 | src_bayer1 += 2; |
| 210 | dst_rgb += 8; |
| 211 | } |
| 212 | dst_rgb[0] = AVG(b, src_bayer1[1]); |
| 213 | dst_rgb[1] = AVG(g, src_bayer0[1]); |
| 214 | dst_rgb[2] = src_bayer0[0]; |
| 215 | dst_rgb[3] = 255U; |
| 216 | dst_rgb[4] = src_bayer1[1]; |
| 217 | dst_rgb[5] = src_bayer0[1]; |
| 218 | dst_rgb[6] = src_bayer0[0]; |
| 219 | dst_rgb[7] = 255U; |
| 220 | } |
| 221 | |
| 222 | static void BayerRowGB(const uint8* src_bayer0, int src_stride_bayer, |
| 223 | uint8* dst_rgb, int pix) { |
| 224 | const uint8* src_bayer1 = src_bayer0 + src_stride_bayer; |
| 225 | uint8 b = src_bayer0[1]; |
| 226 | for (int x = 0; x < (pix - 2); x += 2) { |
| 227 | dst_rgb[0] = AVG(b, src_bayer0[1]); |
| 228 | dst_rgb[1] = src_bayer0[0]; |
| 229 | dst_rgb[2] = src_bayer1[0]; |
| 230 | dst_rgb[3] = 255U; |
| 231 | dst_rgb[4] = src_bayer0[1]; |
| 232 | dst_rgb[5] = AVG(src_bayer0[0], src_bayer0[2]); |
| 233 | dst_rgb[6] = AVG(src_bayer1[0], src_bayer1[2]); |
| 234 | dst_rgb[7] = 255U; |
| 235 | b = src_bayer0[1]; |
| 236 | src_bayer0 += 2; |
| 237 | src_bayer1 += 2; |
| 238 | dst_rgb += 8; |
| 239 | } |
| 240 | dst_rgb[0] = AVG(b, src_bayer0[1]); |
| 241 | dst_rgb[1] = src_bayer0[0]; |
| 242 | dst_rgb[2] = src_bayer1[0]; |
| 243 | dst_rgb[3] = 255U; |
| 244 | dst_rgb[4] = src_bayer0[1]; |
| 245 | dst_rgb[5] = src_bayer0[0]; |
| 246 | dst_rgb[6] = src_bayer1[0]; |
| 247 | dst_rgb[7] = 255U; |
| 248 | } |
| 249 | |
| 250 | static void BayerRowGR(const uint8* src_bayer0, int src_stride_bayer, |
| 251 | uint8* dst_rgb, int pix) { |
| 252 | const uint8* src_bayer1 = src_bayer0 + src_stride_bayer; |
| 253 | uint8 r = src_bayer0[1]; |
| 254 | for (int x = 0; x < (pix - 2); x += 2) { |
| 255 | dst_rgb[0] = src_bayer1[0]; |
| 256 | dst_rgb[1] = src_bayer0[0]; |
| 257 | dst_rgb[2] = AVG(r, src_bayer0[1]); |
| 258 | dst_rgb[3] = 255U; |
| 259 | dst_rgb[4] = AVG(src_bayer1[0], src_bayer1[2]); |
| 260 | dst_rgb[5] = AVG(src_bayer0[0], src_bayer0[2]); |
| 261 | dst_rgb[6] = src_bayer0[1]; |
| 262 | dst_rgb[7] = 255U; |
| 263 | r = src_bayer0[1]; |
| 264 | src_bayer0 += 2; |
| 265 | src_bayer1 += 2; |
| 266 | dst_rgb += 8; |
| 267 | } |
| 268 | dst_rgb[0] = src_bayer1[0]; |
| 269 | dst_rgb[1] = src_bayer0[0]; |
| 270 | dst_rgb[2] = AVG(r, src_bayer0[1]); |
| 271 | dst_rgb[3] = 255U; |
| 272 | dst_rgb[4] = src_bayer1[0]; |
| 273 | dst_rgb[5] = src_bayer0[0]; |
| 274 | dst_rgb[6] = src_bayer0[1]; |
| 275 | dst_rgb[7] = 255U; |
| 276 | } |
| 277 | |
| 278 | // Converts any Bayer RGB format to ARGB. |
| 279 | int BayerRGBToARGB(const uint8* src_bayer, int src_stride_bayer, |
| 280 | uint32 src_fourcc_bayer, |
| 281 | uint8* dst_rgb, int dst_stride_rgb, |
| 282 | int width, int height) { |
| 283 | if (height < 0) { |
| 284 | height = -height; |
| 285 | dst_rgb = dst_rgb + (height - 1) * dst_stride_rgb; |
| 286 | dst_stride_rgb = -dst_stride_rgb; |
| 287 | } |
| 288 | void (*BayerRow0)(const uint8* src_bayer, int src_stride_bayer, |
| 289 | uint8* dst_rgb, int pix); |
| 290 | void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer, |
| 291 | uint8* dst_rgb, int pix); |
| 292 | |
| 293 | switch (src_fourcc_bayer) { |
| 294 | default: |
| 295 | assert(false); |
| 296 | case FOURCC_RGGB: |
| 297 | BayerRow0 = BayerRowRG; |
| 298 | BayerRow1 = BayerRowGB; |
| 299 | break; |
| 300 | case FOURCC_BGGR: |
| 301 | BayerRow0 = BayerRowBG; |
| 302 | BayerRow1 = BayerRowGR; |
| 303 | break; |
| 304 | case FOURCC_GRBG: |
| 305 | BayerRow0 = BayerRowGR; |
| 306 | BayerRow1 = BayerRowBG; |
| 307 | break; |
| 308 | case FOURCC_GBRG: |
| 309 | BayerRow0 = BayerRowGB; |
| 310 | BayerRow1 = BayerRowRG; |
| 311 | break; |
| 312 | } |
| 313 | |
| 314 | for (int y = 0; y < (height - 1); y += 2) { |
| 315 | BayerRow0(src_bayer, src_stride_bayer, dst_rgb, width); |
| 316 | BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer, |
| 317 | dst_rgb + dst_stride_rgb, width); |
| 318 | src_bayer += src_stride_bayer * 2; |
| 319 | dst_rgb += dst_stride_rgb * 2; |
| 320 | } |
| 321 | if (height & 1) { |
| 322 | BayerRow0(src_bayer, -src_stride_bayer, dst_rgb, width); |
| 323 | } |
| 324 | return 0; |
| 325 | } |
| 326 | |
| 327 | // Converts any Bayer RGB format to ARGB. |
| 328 | int BayerRGBToI420(const uint8* src_bayer, int src_stride_bayer, |
| 329 | uint32 src_fourcc_bayer, |
| 330 | uint8* dst_y, int dst_stride_y, |
| 331 | uint8* dst_u, int dst_stride_u, |
| 332 | uint8* dst_v, int dst_stride_v, |
| 333 | int width, int height) { |
| 334 | if (width * 4 > kMaxStride) { |
| 335 | return -1; |
| 336 | } |
| 337 | // Negative height means invert the image. |
| 338 | if (height < 0) { |
| 339 | height = -height; |
| 340 | int halfheight = (height + 1) >> 1; |
| 341 | dst_y = dst_y + (height - 1) * dst_stride_y; |
| 342 | dst_u = dst_u + (halfheight - 1) * dst_stride_u; |
| 343 | dst_v = dst_v + (halfheight - 1) * dst_stride_v; |
| 344 | dst_stride_y = -dst_stride_y; |
| 345 | dst_stride_u = -dst_stride_u; |
| 346 | dst_stride_v = -dst_stride_v; |
| 347 | } |
| 348 | void (*BayerRow0)(const uint8* src_bayer, int src_stride_bayer, |
| 349 | uint8* dst_rgb, int pix); |
| 350 | void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer, |
| 351 | uint8* dst_rgb, int pix); |
| 352 | void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix); |
| 353 | void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, |
| 354 | uint8* dst_u, uint8* dst_v, int width); |
| 355 | SIMD_ALIGNED(uint8 row[kMaxStride * 2]); |
| 356 | |
| 357 | #if defined(HAS_ARGBTOYROW_SSSE3) |
| 358 | if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) && |
| 359 | (width % 16 == 0) && |
| 360 | IS_ALIGNED(row, 16) && (kMaxStride % 16 == 0) && |
| 361 | IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) { |
| 362 | ARGBToYRow = ARGBToYRow_SSSE3; |
| 363 | } else |
| 364 | #endif |
| 365 | { |
| 366 | ARGBToYRow = ARGBToYRow_C; |
| 367 | } |
| 368 | #if defined(HAS_ARGBTOUVROW_SSSE3) |
| 369 | if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) && |
| 370 | (width % 16 == 0) && |
| 371 | IS_ALIGNED(row, 16) && (kMaxStride % 16 == 0) && |
| 372 | IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) && |
| 373 | IS_ALIGNED(dst_v, 8) && (dst_stride_v % 8 == 0)) { |
| 374 | ARGBToUVRow = ARGBToUVRow_SSSE3; |
| 375 | } else |
| 376 | #endif |
| 377 | { |
| 378 | ARGBToUVRow = ARGBToUVRow_C; |
| 379 | } |
| 380 | |
| 381 | switch (src_fourcc_bayer) { |
| 382 | default: |
| 383 | assert(false); |
| 384 | case FOURCC_RGGB: |
| 385 | BayerRow0 = BayerRowRG; |
| 386 | BayerRow1 = BayerRowGB; |
| 387 | break; |
| 388 | case FOURCC_BGGR: |
| 389 | BayerRow0 = BayerRowBG; |
| 390 | BayerRow1 = BayerRowGR; |
| 391 | break; |
| 392 | case FOURCC_GRBG: |
| 393 | BayerRow0 = BayerRowGR; |
| 394 | BayerRow1 = BayerRowBG; |
| 395 | break; |
| 396 | case FOURCC_GBRG: |
| 397 | BayerRow0 = BayerRowGB; |
| 398 | BayerRow1 = BayerRowRG; |
| 399 | break; |
| 400 | } |
| 401 | |
| 402 | for (int y = 0; y < (height - 1); y += 2) { |
| 403 | BayerRow0(src_bayer, src_stride_bayer, row, width); |
| 404 | BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer, |
| 405 | row + kMaxStride, width); |
| 406 | ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width); |
| 407 | ARGBToYRow(row, dst_y, width); |
| 408 | ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width); |
| 409 | src_bayer += src_stride_bayer * 2; |
| 410 | dst_y += dst_stride_y * 2; |
| 411 | dst_u += dst_stride_u; |
| 412 | dst_v += dst_stride_v; |
| 413 | } |
| 414 | // TODO(fbarchard): Make sure this filters properly |
| 415 | if (height & 1) { |
| 416 | BayerRow0(src_bayer, src_stride_bayer, row, width); |
| 417 | ARGBToUVRow(row, 0, dst_u, dst_v, width); |
| 418 | ARGBToYRow(row, dst_y, width); |
| 419 | } |
| 420 | return 0; |
| 421 | } |
| 422 | |
| 423 | } // namespace libyuv |