Mike Klein | ded7a55 | 2018-04-10 10:05:31 -0400 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2018 Google Inc. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license that can be |
| 5 | * found in the LICENSE file. |
| 6 | */ |
| 7 | |
| 8 | #include "../skcms.h" |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | d6b41a2 | 2018-05-16 13:12:08 +0000 | [diff] [blame] | 9 | #include "Curve.h" |
Mike Klein | ded7a55 | 2018-04-10 10:05:31 -0400 | [diff] [blame] | 10 | #include "LinearAlgebra.h" |
Mike Klein | ef52250 | 2018-04-16 11:10:08 -0400 | [diff] [blame] | 11 | #include "Macros.h" |
Mike Klein | ded7a55 | 2018-04-10 10:05:31 -0400 | [diff] [blame] | 12 | #include "PortableMath.h" |
| 13 | #include "TransferFunction.h" |
| 14 | #include "Transform.h" |
| 15 | #include <assert.h> |
| 16 | #include <limits.h> |
| 17 | #include <stdint.h> |
| 18 | #include <string.h> |
| 19 | |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | 5e6853d | 2018-06-05 17:25:43 +0000 | [diff] [blame] | 20 | // Without this wasm would try to use the N=4 128-bit vector code path, |
| 21 | // which while ideal, causes tons of compiler problems. This would be |
| 22 | // a good thing to revisit as emcc matures (currently 1.38.5). |
| 23 | #if 1 && defined(__EMSCRIPTEN_major__) |
| 24 | #if !defined(SKCMS_PORTABLE) |
| 25 | #define SKCMS_PORTABLE |
| 26 | #endif |
| 27 | #endif |
| 28 | |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | b97f28b | 2018-04-24 14:32:32 +0000 | [diff] [blame] | 29 | extern bool g_skcms_dump_profile; |
| 30 | bool g_skcms_dump_profile = false; |
| 31 | |
| 32 | #if !defined(NDEBUG) && defined(__clang__) |
Mike Klein | ef52250 | 2018-04-16 11:10:08 -0400 | [diff] [blame] | 33 | // Basic profiling tools to time each Op. Not at all thread safe. |
| 34 | |
| 35 | #include <stdio.h> |
| 36 | #include <stdlib.h> |
| 37 | |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | 381c953 | 2018-04-23 16:15:32 +0000 | [diff] [blame] | 38 | #if defined(__arm__) || defined(__aarch64__) |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | eb17254 | 2018-04-23 15:41:39 +0000 | [diff] [blame] | 39 | #include <time.h> |
| 40 | static const char* now_units = "ticks"; |
| 41 | static uint64_t now() { return (uint64_t)clock(); } |
| 42 | #else |
| 43 | static const char* now_units = "cycles"; |
| 44 | static uint64_t now() { return __builtin_readcyclecounter(); } |
| 45 | #endif |
| 46 | |
Mike Klein | ef52250 | 2018-04-16 11:10:08 -0400 | [diff] [blame] | 47 | #define M(op) +1 |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | eb17254 | 2018-04-23 15:41:39 +0000 | [diff] [blame] | 48 | static uint64_t counts[FOREACH_Op(M)]; |
Mike Klein | ef52250 | 2018-04-16 11:10:08 -0400 | [diff] [blame] | 49 | #undef M |
| 50 | |
| 51 | static void profile_dump_stats() { |
| 52 | #define M(op) #op, |
| 53 | static const char* names[] = { FOREACH_Op(M) }; |
| 54 | #undef M |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | eb17254 | 2018-04-23 15:41:39 +0000 | [diff] [blame] | 55 | for (int i = 0; i < ARRAY_COUNT(counts); i++) { |
| 56 | if (counts[i]) { |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | 381c953 | 2018-04-23 16:15:32 +0000 | [diff] [blame] | 57 | fprintf(stderr, "%16s: %12llu %s\n", |
| 58 | names[i], (unsigned long long)counts[i], now_units); |
Mike Klein | ef52250 | 2018-04-16 11:10:08 -0400 | [diff] [blame] | 59 | } |
| 60 | } |
| 61 | } |
| 62 | |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | fe7e7cb | 2018-04-24 16:06:32 +0000 | [diff] [blame] | 63 | static inline Op profile_next_op(Op op) { |
| 64 | if (__builtin_expect(g_skcms_dump_profile, false)) { |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | b97f28b | 2018-04-24 14:32:32 +0000 | [diff] [blame] | 65 | static uint64_t start = 0; |
| 66 | static uint64_t* current = NULL; |
Mike Klein | ef52250 | 2018-04-16 11:10:08 -0400 | [diff] [blame] | 67 | |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | b97f28b | 2018-04-24 14:32:32 +0000 | [diff] [blame] | 68 | if (!current) { |
| 69 | atexit(profile_dump_stats); |
| 70 | } else { |
| 71 | *current += now() - start; |
| 72 | } |
| 73 | |
| 74 | current = &counts[op]; |
| 75 | start = now(); |
Mike Klein | ef52250 | 2018-04-16 11:10:08 -0400 | [diff] [blame] | 76 | } |
Mike Klein | ef52250 | 2018-04-16 11:10:08 -0400 | [diff] [blame] | 77 | return op; |
| 78 | } |
| 79 | #else |
| 80 | static inline Op profile_next_op(Op op) { |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | b97f28b | 2018-04-24 14:32:32 +0000 | [diff] [blame] | 81 | (void)g_skcms_dump_profile; |
Mike Klein | ef52250 | 2018-04-16 11:10:08 -0400 | [diff] [blame] | 82 | return op; |
| 83 | } |
| 84 | #endif |
| 85 | |
Mike Klein | ded7a55 | 2018-04-10 10:05:31 -0400 | [diff] [blame] | 86 | #if defined(__clang__) |
| 87 | typedef float __attribute__((ext_vector_type(4))) Fx4; |
| 88 | typedef int32_t __attribute__((ext_vector_type(4))) I32x4; |
| 89 | typedef uint64_t __attribute__((ext_vector_type(4))) U64x4; |
| 90 | typedef uint32_t __attribute__((ext_vector_type(4))) U32x4; |
| 91 | typedef uint16_t __attribute__((ext_vector_type(4))) U16x4; |
| 92 | typedef uint8_t __attribute__((ext_vector_type(4))) U8x4; |
| 93 | |
| 94 | typedef float __attribute__((ext_vector_type(8))) Fx8; |
| 95 | typedef int32_t __attribute__((ext_vector_type(8))) I32x8; |
| 96 | typedef uint64_t __attribute__((ext_vector_type(8))) U64x8; |
| 97 | typedef uint32_t __attribute__((ext_vector_type(8))) U32x8; |
| 98 | typedef uint16_t __attribute__((ext_vector_type(8))) U16x8; |
| 99 | typedef uint8_t __attribute__((ext_vector_type(8))) U8x8; |
| 100 | |
| 101 | typedef float __attribute__((ext_vector_type(16))) Fx16; |
| 102 | typedef int32_t __attribute__((ext_vector_type(16))) I32x16; |
| 103 | typedef uint64_t __attribute__((ext_vector_type(16))) U64x16; |
| 104 | typedef uint32_t __attribute__((ext_vector_type(16))) U32x16; |
| 105 | typedef uint16_t __attribute__((ext_vector_type(16))) U16x16; |
| 106 | typedef uint8_t __attribute__((ext_vector_type(16))) U8x16; |
| 107 | #elif defined(__GNUC__) |
| 108 | typedef float __attribute__((vector_size(16))) Fx4; |
| 109 | typedef int32_t __attribute__((vector_size(16))) I32x4; |
| 110 | typedef uint64_t __attribute__((vector_size(32))) U64x4; |
| 111 | typedef uint32_t __attribute__((vector_size(16))) U32x4; |
| 112 | typedef uint16_t __attribute__((vector_size( 8))) U16x4; |
| 113 | typedef uint8_t __attribute__((vector_size( 4))) U8x4; |
| 114 | |
| 115 | typedef float __attribute__((vector_size(32))) Fx8; |
| 116 | typedef int32_t __attribute__((vector_size(32))) I32x8; |
| 117 | typedef uint64_t __attribute__((vector_size(64))) U64x8; |
| 118 | typedef uint32_t __attribute__((vector_size(32))) U32x8; |
| 119 | typedef uint16_t __attribute__((vector_size(16))) U16x8; |
| 120 | typedef uint8_t __attribute__((vector_size( 8))) U8x8; |
| 121 | |
| 122 | typedef float __attribute__((vector_size( 64))) Fx16; |
| 123 | typedef int32_t __attribute__((vector_size( 64))) I32x16; |
| 124 | typedef uint64_t __attribute__((vector_size(128))) U64x16; |
| 125 | typedef uint32_t __attribute__((vector_size( 64))) U32x16; |
| 126 | typedef uint16_t __attribute__((vector_size( 32))) U16x16; |
| 127 | typedef uint8_t __attribute__((vector_size( 16))) U8x16; |
| 128 | #endif |
| 129 | |
| 130 | // First, instantiate our default exec_ops() implementation using the default compiliation target. |
| 131 | |
| 132 | #if defined(SKCMS_PORTABLE) || !(defined(__clang__) || defined(__GNUC__)) |
| 133 | #define N 1 |
| 134 | |
| 135 | #define F float |
| 136 | #define U64 uint64_t |
| 137 | #define U32 uint32_t |
| 138 | #define I32 int32_t |
| 139 | #define U16 uint16_t |
| 140 | #define U8 uint8_t |
| 141 | |
| 142 | #define F0 0.0f |
| 143 | #define F1 1.0f |
| 144 | |
| 145 | #elif defined(__AVX512F__) |
| 146 | #define N 16 |
| 147 | |
| 148 | #define F Fx16 |
| 149 | #define U64 U64x16 |
| 150 | #define U32 U32x16 |
| 151 | #define I32 I32x16 |
| 152 | #define U16 U16x16 |
| 153 | #define U8 U8x16 |
| 154 | |
| 155 | #define F0 (F){0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0} |
| 156 | #define F1 (F){1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1} |
| 157 | #elif defined(__AVX__) |
| 158 | #define N 8 |
| 159 | |
| 160 | #define F Fx8 |
| 161 | #define U64 U64x8 |
| 162 | #define U32 U32x8 |
| 163 | #define I32 I32x8 |
| 164 | #define U16 U16x8 |
| 165 | #define U8 U8x8 |
| 166 | |
| 167 | #define F0 (F){0,0,0,0, 0,0,0,0} |
| 168 | #define F1 (F){1,1,1,1, 1,1,1,1} |
| 169 | #else |
| 170 | #define N 4 |
| 171 | |
| 172 | #define F Fx4 |
| 173 | #define U64 U64x4 |
| 174 | #define U32 U32x4 |
| 175 | #define I32 I32x4 |
| 176 | #define U16 U16x4 |
| 177 | #define U8 U8x4 |
| 178 | |
| 179 | #define F0 (F){0,0,0,0} |
| 180 | #define F1 (F){1,1,1,1} |
| 181 | #endif |
| 182 | |
| 183 | #define NS(id) id |
| 184 | #define ATTR |
| 185 | #include "Transform_inl.h" |
| 186 | #undef N |
| 187 | #undef F |
| 188 | #undef U64 |
| 189 | #undef U32 |
| 190 | #undef I32 |
| 191 | #undef U16 |
| 192 | #undef U8 |
| 193 | #undef F0 |
| 194 | #undef F1 |
| 195 | #undef NS |
| 196 | #undef ATTR |
| 197 | |
| 198 | // Now, instantiate any other versions of run_program() we may want for runtime detection. |
| 199 | #if !defined(SKCMS_PORTABLE) && (defined(__clang__) || defined(__GNUC__)) \ |
| 200 | && defined(__x86_64__) && !defined(__AVX2__) |
| 201 | #define N 8 |
| 202 | #define F Fx8 |
| 203 | #define U64 U64x8 |
| 204 | #define U32 U32x8 |
| 205 | #define I32 I32x8 |
| 206 | #define U16 U16x8 |
| 207 | #define U8 U8x8 |
| 208 | #define F0 (F){0,0,0,0, 0,0,0,0} |
| 209 | #define F1 (F){1,1,1,1, 1,1,1,1} |
| 210 | |
| 211 | #define NS(id) id ## _hsw |
| 212 | #define ATTR __attribute__((target("avx2,f16c"))) |
| 213 | |
| 214 | // We check these guards to see if we have support for these features. |
| 215 | // They're likely _not_ defined here in our baseline build config. |
| 216 | #ifndef __AVX__ |
| 217 | #define __AVX__ 1 |
| 218 | #define UNDEF_AVX |
| 219 | #endif |
| 220 | #ifndef __F16C__ |
| 221 | #define __F16C__ 1 |
| 222 | #define UNDEF_F16C |
| 223 | #endif |
| 224 | #ifndef __AVX2__ |
| 225 | #define __AVX2__ 1 |
| 226 | #define UNDEF_AVX2 |
| 227 | #endif |
| 228 | |
| 229 | #include "Transform_inl.h" |
| 230 | |
| 231 | #undef N |
| 232 | #undef F |
| 233 | #undef U64 |
| 234 | #undef U32 |
| 235 | #undef I32 |
| 236 | #undef U16 |
| 237 | #undef U8 |
| 238 | #undef F0 |
| 239 | #undef F1 |
| 240 | #undef NS |
| 241 | #undef ATTR |
| 242 | |
| 243 | #ifdef UNDEF_AVX |
| 244 | #undef __AVX__ |
| 245 | #undef UNDEF_AVX |
| 246 | #endif |
| 247 | #ifdef UNDEF_F16C |
| 248 | #undef __F16C__ |
| 249 | #undef UNDEF_F16C |
| 250 | #endif |
| 251 | #ifdef UNDEF_AVX2 |
| 252 | #undef __AVX2__ |
| 253 | #undef UNDEF_AVX2 |
| 254 | #endif |
| 255 | |
| 256 | #define TEST_FOR_HSW |
| 257 | |
| 258 | static bool hsw_ok_ = false; |
| 259 | static void check_hsw_ok() { |
| 260 | // See http://www.sandpile.org/x86/cpuid.htm |
| 261 | |
| 262 | // First, a basic cpuid(1). |
| 263 | uint32_t eax, ebx, ecx, edx; |
| 264 | __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) |
| 265 | : "0"(1), "2"(0)); |
| 266 | |
| 267 | // Sanity check for prerequisites. |
| 268 | if ((edx & (1<<25)) != (1<<25)) { return; } // SSE |
| 269 | if ((edx & (1<<26)) != (1<<26)) { return; } // SSE2 |
| 270 | if ((ecx & (1<< 0)) != (1<< 0)) { return; } // SSE3 |
| 271 | if ((ecx & (1<< 9)) != (1<< 9)) { return; } // SSSE3 |
| 272 | if ((ecx & (1<<19)) != (1<<19)) { return; } // SSE4.1 |
| 273 | if ((ecx & (1<<20)) != (1<<20)) { return; } // SSE4.2 |
| 274 | |
| 275 | if ((ecx & (3<<26)) != (3<<26)) { return; } // XSAVE + OSXSAVE |
| 276 | |
| 277 | { |
| 278 | uint32_t eax_xgetbv, edx_xgetbv; |
| 279 | __asm__ __volatile__("xgetbv" : "=a"(eax_xgetbv), "=d"(edx_xgetbv) : "c"(0)); |
| 280 | if ((eax_xgetbv & (3<<1)) != (3<<1)) { return; } // XMM+YMM state saved? |
| 281 | } |
| 282 | |
| 283 | if ((ecx & (1<<28)) != (1<<28)) { return; } // AVX |
| 284 | if ((ecx & (1<<29)) != (1<<29)) { return; } // F16C |
| 285 | if ((ecx & (1<<12)) != (1<<12)) { return; } // FMA (TODO: not currently used) |
| 286 | |
| 287 | // Call cpuid(7) to check for our final AVX2 feature bit! |
| 288 | __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) |
| 289 | : "0"(7), "2"(0)); |
| 290 | if ((ebx & (1<< 5)) != (1<< 5)) { return; } // AVX2 |
| 291 | |
| 292 | hsw_ok_ = true; |
| 293 | } |
| 294 | |
| 295 | #if defined(_MSC_VER) |
| 296 | #include <Windows.h> |
| 297 | INIT_ONCE check_hsw_ok_once = INIT_ONCE_STATIC_INIT; |
| 298 | |
| 299 | static BOOL check_hsw_ok_InitOnce_wrapper(INIT_ONCE* once, void* param, void** ctx) { |
| 300 | (void)once; |
| 301 | (void)param; |
| 302 | (void)ctx; |
| 303 | check_hsw_ok(); |
| 304 | return TRUE; |
| 305 | } |
| 306 | |
| 307 | static bool hsw_ok() { |
| 308 | InitOnceExecuteOnce(&check_hsw_ok_once, check_hsw_ok_InitOnce_wrapper, NULL, NULL); |
| 309 | return hsw_ok_; |
| 310 | } |
| 311 | #else |
| 312 | #include <pthread.h> |
| 313 | static pthread_once_t check_hsw_ok_once = PTHREAD_ONCE_INIT; |
| 314 | |
| 315 | static bool hsw_ok() { |
| 316 | pthread_once(&check_hsw_ok_once, check_hsw_ok); |
| 317 | return hsw_ok_; |
| 318 | } |
| 319 | #endif |
| 320 | |
| 321 | #endif |
| 322 | |
| 323 | static bool is_identity_tf(const skcms_TransferFunction* tf) { |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | f0db236 | 2018-04-25 17:02:32 +0000 | [diff] [blame] | 324 | return tf->g == 1 && tf->a == 1 |
| 325 | && tf->b == 0 && tf->c == 0 && tf->d == 0 && tf->e == 0 && tf->f == 0; |
Mike Klein | ded7a55 | 2018-04-10 10:05:31 -0400 | [diff] [blame] | 326 | } |
| 327 | |
| 328 | typedef struct { |
| 329 | Op op; |
| 330 | const void* arg; |
| 331 | } OpAndArg; |
| 332 | |
| 333 | static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) { |
| 334 | static const struct { Op parametric, table_8, table_16; } ops[] = { |
| 335 | { Op_tf_r, Op_table_8_r, Op_table_16_r }, |
| 336 | { Op_tf_g, Op_table_8_g, Op_table_16_g }, |
| 337 | { Op_tf_b, Op_table_8_b, Op_table_16_b }, |
| 338 | { Op_tf_a, Op_table_8_a, Op_table_16_a }, |
| 339 | }; |
| 340 | |
| 341 | if (curve->table_entries == 0) { |
| 342 | return is_identity_tf(&curve->parametric) |
| 343 | ? (OpAndArg){ Op_noop, NULL } |
| 344 | : (OpAndArg){ ops[channel].parametric, &curve->parametric }; |
| 345 | } else if (curve->table_8) { |
| 346 | return (OpAndArg){ ops[channel].table_8, curve }; |
| 347 | } else if (curve->table_16) { |
| 348 | return (OpAndArg){ ops[channel].table_16, curve }; |
| 349 | } |
| 350 | |
| 351 | assert(false); |
| 352 | return (OpAndArg){Op_noop,NULL}; |
| 353 | } |
| 354 | |
| 355 | static size_t bytes_per_pixel(skcms_PixelFormat fmt) { |
| 356 | switch (fmt >> 1) { // ignore rgb/bgr |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | 09f92b9 | 2018-06-21 18:47:13 +0000 | [diff] [blame^] | 357 | case skcms_PixelFormat_A_8 >> 1: return 1; |
| 358 | case skcms_PixelFormat_G_8 >> 1: return 1; |
| 359 | case skcms_PixelFormat_ABGR_4444 >> 1: return 2; |
Mike Klein | ded7a55 | 2018-04-10 10:05:31 -0400 | [diff] [blame] | 360 | case skcms_PixelFormat_RGB_565 >> 1: return 2; |
| 361 | case skcms_PixelFormat_RGB_888 >> 1: return 3; |
| 362 | case skcms_PixelFormat_RGBA_8888 >> 1: return 4; |
| 363 | case skcms_PixelFormat_RGBA_1010102 >> 1: return 4; |
| 364 | case skcms_PixelFormat_RGB_161616 >> 1: return 6; |
| 365 | case skcms_PixelFormat_RGBA_16161616 >> 1: return 8; |
| 366 | case skcms_PixelFormat_RGB_hhh >> 1: return 6; |
| 367 | case skcms_PixelFormat_RGBA_hhhh >> 1: return 8; |
| 368 | case skcms_PixelFormat_RGB_fff >> 1: return 12; |
| 369 | case skcms_PixelFormat_RGBA_ffff >> 1: return 16; |
| 370 | } |
| 371 | assert(false); |
| 372 | return 0; |
| 373 | } |
| 374 | |
Mike Klein | bef8353 | 2018-04-18 12:32:35 -0400 | [diff] [blame] | 375 | static bool prep_for_destination(const skcms_ICCProfile* profile, |
| 376 | skcms_Matrix3x3* fromXYZD50, |
| 377 | skcms_TransferFunction* invR, |
| 378 | skcms_TransferFunction* invG, |
| 379 | skcms_TransferFunction* invB) { |
| 380 | // We only support destinations with parametric transfer functions |
| 381 | // and with gamuts that can be transformed from XYZD50. |
| 382 | return profile->has_trc |
| 383 | && profile->has_toXYZD50 |
| 384 | && profile->trc[0].table_entries == 0 |
| 385 | && profile->trc[1].table_entries == 0 |
| 386 | && profile->trc[2].table_entries == 0 |
| 387 | && skcms_TransferFunction_invert(&profile->trc[0].parametric, invR) |
| 388 | && skcms_TransferFunction_invert(&profile->trc[1].parametric, invG) |
| 389 | && skcms_TransferFunction_invert(&profile->trc[2].parametric, invB) |
| 390 | && skcms_Matrix3x3_invert(&profile->toXYZD50, fromXYZD50); |
| 391 | } |
| 392 | |
Mike Klein | ded7a55 | 2018-04-10 10:05:31 -0400 | [diff] [blame] | 393 | bool skcms_Transform(const void* src, |
| 394 | skcms_PixelFormat srcFmt, |
| 395 | skcms_AlphaFormat srcAlpha, |
| 396 | const skcms_ICCProfile* srcProfile, |
| 397 | void* dst, |
| 398 | skcms_PixelFormat dstFmt, |
| 399 | skcms_AlphaFormat dstAlpha, |
| 400 | const skcms_ICCProfile* dstProfile, |
| 401 | size_t nz) { |
| 402 | const size_t dst_bpp = bytes_per_pixel(dstFmt), |
| 403 | src_bpp = bytes_per_pixel(srcFmt); |
| 404 | // Let's just refuse if the request is absurdly big. |
| 405 | if (nz * dst_bpp > INT_MAX || nz * src_bpp > INT_MAX) { |
| 406 | return false; |
| 407 | } |
| 408 | int n = (int)nz; |
| 409 | |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | 94ed719 | 2018-06-21 16:29:13 +0000 | [diff] [blame] | 410 | // Null profiles default to sRGB. Passing null for both is handy when doing format conversion. |
| 411 | if (!srcProfile) { |
| 412 | srcProfile = skcms_sRGB_profile(); |
| 413 | } |
| 414 | if (!dstProfile) { |
| 415 | dstProfile = skcms_sRGB_profile(); |
Mike Klein | ded7a55 | 2018-04-10 10:05:31 -0400 | [diff] [blame] | 416 | } |
| 417 | |
| 418 | // We can't transform in place unless the PixelFormats are the same size. |
| 419 | if (dst == src && (dstFmt >> 1) != (srcFmt >> 1)) { |
| 420 | return false; |
| 421 | } |
| 422 | // TODO: this check lazilly disallows U16 <-> F16, but that would actually be fine. |
| 423 | // TODO: more careful alias rejection (like, dst == src + 1)? |
| 424 | |
| 425 | Op program [32]; |
| 426 | const void* arguments[32]; |
| 427 | |
| 428 | Op* ops = program; |
| 429 | const void** args = arguments; |
| 430 | |
| 431 | skcms_TransferFunction inv_dst_tf_r, inv_dst_tf_g, inv_dst_tf_b; |
| 432 | skcms_Matrix3x3 from_xyz; |
| 433 | |
| 434 | switch (srcFmt >> 1) { |
| 435 | default: return false; |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | 09f92b9 | 2018-06-21 18:47:13 +0000 | [diff] [blame^] | 436 | case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_load_a8; break; |
| 437 | case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_load_g8; break; |
| 438 | case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_load_4444; break; |
Mike Klein | ded7a55 | 2018-04-10 10:05:31 -0400 | [diff] [blame] | 439 | case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_load_565; break; |
| 440 | case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_load_888; break; |
| 441 | case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_load_8888; break; |
| 442 | case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_load_1010102; break; |
| 443 | case skcms_PixelFormat_RGB_161616 >> 1: *ops++ = Op_load_161616; break; |
| 444 | case skcms_PixelFormat_RGBA_16161616 >> 1: *ops++ = Op_load_16161616; break; |
| 445 | case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_load_hhh; break; |
| 446 | case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_load_hhhh; break; |
| 447 | case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_load_fff; break; |
| 448 | case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_load_ffff; break; |
| 449 | } |
| 450 | if (srcFmt & 1) { |
| 451 | *ops++ = Op_swap_rb; |
| 452 | } |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | 09f92b9 | 2018-06-21 18:47:13 +0000 | [diff] [blame^] | 453 | skcms_ICCProfile gray_dst_profile; |
| 454 | if ((dstFmt >> 1) == (skcms_PixelFormat_G_8 >> 1)) { |
| 455 | // When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform |
| 456 | // luminance (Y) by the destination transfer function. |
| 457 | gray_dst_profile = *dstProfile; |
| 458 | skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50); |
| 459 | dstProfile = &gray_dst_profile; |
| 460 | } |
Mike Klein | ded7a55 | 2018-04-10 10:05:31 -0400 | [diff] [blame] | 461 | |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | 94ed719 | 2018-06-21 16:29:13 +0000 | [diff] [blame] | 462 | if (srcProfile->data_color_space == skcms_Signature_CMYK) { |
Mike Klein | ded7a55 | 2018-04-10 10:05:31 -0400 | [diff] [blame] | 463 | // Photoshop creates CMYK images as inverse CMYK. |
| 464 | // These happen to be the only ones we've _ever_ seen. |
| 465 | *ops++ = Op_invert; |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | c8799aa | 2018-05-15 19:22:08 +0000 | [diff] [blame] | 466 | // With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K. |
| 467 | srcAlpha = skcms_AlphaFormat_Unpremul; |
Mike Klein | ded7a55 | 2018-04-10 10:05:31 -0400 | [diff] [blame] | 468 | } |
| 469 | |
| 470 | if (srcAlpha == skcms_AlphaFormat_Opaque) { |
| 471 | *ops++ = Op_force_opaque; |
| 472 | } else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) { |
| 473 | *ops++ = Op_unpremul; |
| 474 | } |
| 475 | |
| 476 | // TODO: We can skip this work if both srcAlpha and dstAlpha are PremulLinear, and the profiles |
| 477 | // are the same. Also, if dstAlpha is PremulLinear, and SrcAlpha is Opaque. |
| 478 | if (dstProfile != srcProfile || |
| 479 | srcAlpha == skcms_AlphaFormat_PremulLinear || |
| 480 | dstAlpha == skcms_AlphaFormat_PremulLinear) { |
| 481 | |
Mike Klein | bef8353 | 2018-04-18 12:32:35 -0400 | [diff] [blame] | 482 | if (!prep_for_destination(dstProfile, |
| 483 | &from_xyz, &inv_dst_tf_r, &inv_dst_tf_b, &inv_dst_tf_g)) { |
| 484 | return false; |
| 485 | } |
| 486 | |
Mike Klein | ded7a55 | 2018-04-10 10:05:31 -0400 | [diff] [blame] | 487 | if (srcProfile->has_A2B) { |
| 488 | if (srcProfile->A2B.input_channels) { |
| 489 | for (int i = 0; i < (int)srcProfile->A2B.input_channels; i++) { |
| 490 | OpAndArg oa = select_curve_op(&srcProfile->A2B.input_curves[i], i); |
| 491 | if (oa.op != Op_noop) { |
| 492 | *ops++ = oa.op; |
| 493 | *args++ = oa.arg; |
| 494 | } |
| 495 | } |
| 496 | switch (srcProfile->A2B.input_channels) { |
| 497 | case 3: *ops++ = srcProfile->A2B.grid_8 ? Op_clut_3D_8 : Op_clut_3D_16; break; |
| 498 | case 4: *ops++ = srcProfile->A2B.grid_8 ? Op_clut_4D_8 : Op_clut_4D_16; break; |
| 499 | default: return false; |
| 500 | } |
| 501 | *args++ = &srcProfile->A2B; |
| 502 | } |
| 503 | |
| 504 | if (srcProfile->A2B.matrix_channels == 3) { |
| 505 | for (int i = 0; i < 3; i++) { |
| 506 | OpAndArg oa = select_curve_op(&srcProfile->A2B.matrix_curves[i], i); |
| 507 | if (oa.op != Op_noop) { |
| 508 | *ops++ = oa.op; |
| 509 | *args++ = oa.arg; |
| 510 | } |
| 511 | } |
| 512 | |
| 513 | static const skcms_Matrix3x4 I = {{ |
| 514 | {1,0,0,0}, |
| 515 | {0,1,0,0}, |
| 516 | {0,0,1,0}, |
| 517 | }}; |
| 518 | if (0 != memcmp(&I, &srcProfile->A2B.matrix, sizeof(I))) { |
| 519 | *ops++ = Op_matrix_3x4; |
| 520 | *args++ = &srcProfile->A2B.matrix; |
| 521 | } |
| 522 | } |
| 523 | |
| 524 | if (srcProfile->A2B.output_channels == 3) { |
| 525 | for (int i = 0; i < 3; i++) { |
| 526 | OpAndArg oa = select_curve_op(&srcProfile->A2B.output_curves[i], i); |
| 527 | if (oa.op != Op_noop) { |
| 528 | *ops++ = oa.op; |
| 529 | *args++ = oa.arg; |
| 530 | } |
| 531 | } |
| 532 | } |
| 533 | |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | a724ea5 | 2018-05-07 16:57:26 +0000 | [diff] [blame] | 534 | if (srcProfile->pcs == skcms_Signature_Lab) { |
Mike Klein | ded7a55 | 2018-04-10 10:05:31 -0400 | [diff] [blame] | 535 | *ops++ = Op_lab_to_xyz; |
| 536 | } |
| 537 | |
| 538 | } else if (srcProfile->has_trc && srcProfile->has_toXYZD50) { |
| 539 | for (int i = 0; i < 3; i++) { |
| 540 | OpAndArg oa = select_curve_op(&srcProfile->trc[i], i); |
| 541 | if (oa.op != Op_noop) { |
| 542 | *ops++ = oa.op; |
| 543 | *args++ = oa.arg; |
| 544 | } |
| 545 | } |
| 546 | } else { |
| 547 | return false; |
| 548 | } |
| 549 | |
| 550 | // At this point our source colors are linear, either RGB (XYZ-type profiles) |
| 551 | // or XYZ (A2B-type profiles). Unpremul is a linear operation (multiply by a |
| 552 | // constant 1/a), so either way we can do it now if needed. |
| 553 | if (srcAlpha == skcms_AlphaFormat_PremulLinear) { |
| 554 | *ops++ = Op_unpremul; |
| 555 | } |
| 556 | |
Mike Klein | ded7a55 | 2018-04-10 10:05:31 -0400 | [diff] [blame] | 557 | // A2B sources should already be in XYZD50 at this point. |
| 558 | // Others still need to be transformed using their toXYZD50 matrix. |
| 559 | // N.B. There are profiles that contain both A2B tags and toXYZD50 matrices. |
| 560 | // If we use the A2B tags, we need to ignore the XYZD50 matrix entirely. |
| 561 | assert (srcProfile->has_A2B || srcProfile->has_toXYZD50); |
| 562 | static const skcms_Matrix3x3 I = {{ |
| 563 | { 1.0f, 0.0f, 0.0f }, |
| 564 | { 0.0f, 1.0f, 0.0f }, |
| 565 | { 0.0f, 0.0f, 1.0f }, |
| 566 | }}; |
| 567 | const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50; |
| 568 | |
| 569 | // There's a chance the source and destination gamuts are identical, |
| 570 | // in which case we can skip the gamut transform. |
| 571 | if (0 != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) { |
Mike Klein | 7b67b4a | 2018-04-12 09:21:02 -0400 | [diff] [blame] | 572 | // Concat the entire gamut transform into from_xyz, |
| 573 | // now slightly misnamed but it's a handy spot to stash the result. |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | b5d1f24 | 2018-05-09 18:44:08 +0000 | [diff] [blame] | 574 | from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz); |
Mike Klein | 7b67b4a | 2018-04-12 09:21:02 -0400 | [diff] [blame] | 575 | *ops++ = Op_matrix_3x3; |
| 576 | *args++ = &from_xyz; |
Mike Klein | ded7a55 | 2018-04-10 10:05:31 -0400 | [diff] [blame] | 577 | } |
| 578 | |
Mike Klein | bef8353 | 2018-04-18 12:32:35 -0400 | [diff] [blame] | 579 | if (dstAlpha == skcms_AlphaFormat_PremulLinear) { |
| 580 | *ops++ = Op_premul; |
Mike Klein | ded7a55 | 2018-04-10 10:05:31 -0400 | [diff] [blame] | 581 | } |
Mike Klein | bef8353 | 2018-04-18 12:32:35 -0400 | [diff] [blame] | 582 | |
| 583 | // Encode back to dst RGB using its parametric transfer functions. |
| 584 | if (!is_identity_tf(&inv_dst_tf_r)) { *ops++ = Op_tf_r; *args++ = &inv_dst_tf_r; } |
| 585 | if (!is_identity_tf(&inv_dst_tf_g)) { *ops++ = Op_tf_g; *args++ = &inv_dst_tf_g; } |
| 586 | if (!is_identity_tf(&inv_dst_tf_b)) { *ops++ = Op_tf_b; *args++ = &inv_dst_tf_b; } |
Mike Klein | ded7a55 | 2018-04-10 10:05:31 -0400 | [diff] [blame] | 587 | } |
| 588 | |
| 589 | if (dstAlpha == skcms_AlphaFormat_Opaque) { |
| 590 | *ops++ = Op_force_opaque; |
| 591 | } else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) { |
| 592 | *ops++ = Op_premul; |
| 593 | } |
| 594 | if (dstFmt & 1) { |
| 595 | *ops++ = Op_swap_rb; |
| 596 | } |
| 597 | if (dstFmt < skcms_PixelFormat_RGB_hhh) { |
| 598 | *ops++ = Op_clamp; |
| 599 | } |
| 600 | switch (dstFmt >> 1) { |
| 601 | default: return false; |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | 09f92b9 | 2018-06-21 18:47:13 +0000 | [diff] [blame^] | 602 | case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_store_a8; break; |
| 603 | case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_store_g8; break; |
| 604 | case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_store_4444; break; |
Mike Klein | ded7a55 | 2018-04-10 10:05:31 -0400 | [diff] [blame] | 605 | case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_store_565; break; |
| 606 | case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_store_888; break; |
| 607 | case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_store_8888; break; |
| 608 | case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_store_1010102; break; |
| 609 | case skcms_PixelFormat_RGB_161616 >> 1: *ops++ = Op_store_161616; break; |
| 610 | case skcms_PixelFormat_RGBA_16161616 >> 1: *ops++ = Op_store_16161616; break; |
| 611 | case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_store_hhh; break; |
| 612 | case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_store_hhhh; break; |
| 613 | case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_store_fff; break; |
| 614 | case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_store_ffff; break; |
| 615 | } |
| 616 | |
| 617 | void (*run)(const Op*, const void**, const char*, char*, int, size_t,size_t) = run_program; |
| 618 | #if defined(TEST_FOR_HSW) |
| 619 | if (hsw_ok()) { |
| 620 | run = run_program_hsw; |
| 621 | } |
| 622 | #endif |
| 623 | run(program, arguments, src, dst, n, src_bpp,dst_bpp); |
| 624 | return true; |
| 625 | } |
Mike Klein | bef8353 | 2018-04-18 12:32:35 -0400 | [diff] [blame] | 626 | |
| 627 | static void assert_usable_as_destination(const skcms_ICCProfile* profile) { |
| 628 | #if defined(NDEBUG) |
| 629 | (void)profile; |
| 630 | #else |
| 631 | skcms_Matrix3x3 fromXYZD50; |
| 632 | skcms_TransferFunction invR, invG, invB; |
| 633 | assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB)); |
| 634 | #endif |
| 635 | } |
| 636 | |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | 2af0904 | 2018-05-02 18:59:47 +0000 | [diff] [blame] | 637 | bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) { |
| 638 | skcms_Matrix3x3 fromXYZD50; |
| 639 | if (!profile->has_trc || !profile->has_toXYZD50 |
| 640 | || !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) { |
| 641 | return false; |
| 642 | } |
| 643 | |
| 644 | skcms_TransferFunction tf[3]; |
| 645 | for (int i = 0; i < 3; i++) { |
| 646 | skcms_TransferFunction inv; |
| 647 | if (profile->trc[i].table_entries == 0 |
| 648 | && skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) { |
| 649 | tf[i] = profile->trc[i].parametric; |
| 650 | continue; |
| 651 | } |
| 652 | |
| 653 | float max_error; |
| 654 | // Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible. |
| 655 | if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) { |
| 656 | return false; |
| 657 | } |
| 658 | } |
| 659 | |
| 660 | for (int i = 0; i < 3; ++i) { |
| 661 | profile->trc[i].table_entries = 0; |
| 662 | profile->trc[i].parametric = tf[i]; |
| 663 | } |
| 664 | |
| 665 | assert_usable_as_destination(profile); |
| 666 | return true; |
| 667 | } |
| 668 | |
| 669 | bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) { |
| 670 | // Operate on a copy of profile, so we can choose the best TF for the original curves |
| 671 | skcms_ICCProfile result = *profile; |
| 672 | if (!skcms_MakeUsableAsDestination(&result)) { |
| 673 | return false; |
| 674 | } |
| 675 | |
| 676 | int best_tf = 0; |
| 677 | float min_max_error = INFINITY_; |
| 678 | for (int i = 0; i < 3; i++) { |
| 679 | skcms_TransferFunction inv; |
| 680 | skcms_TransferFunction_invert(&result.trc[i].parametric, &inv); |
| 681 | |
| 682 | float err = 0; |
| 683 | for (int j = 0; j < 3; ++j) { |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | 8c29c14 | 2018-05-17 18:42:40 +0000 | [diff] [blame] | 684 | err = fmaxf_(err, skcms_MaxRoundtripError(&profile->trc[j], &inv)); |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | 2af0904 | 2018-05-02 18:59:47 +0000 | [diff] [blame] | 685 | } |
| 686 | if (min_max_error > err) { |
| 687 | min_max_error = err; |
| 688 | best_tf = i; |
| 689 | } |
| 690 | } |
| 691 | |
| 692 | for (int i = 0; i < 3; i++) { |
| 693 | result.trc[i].parametric = result.trc[best_tf].parametric; |
| 694 | } |
| 695 | |
| 696 | *profile = result; |
| 697 | assert_usable_as_destination(profile); |
| 698 | return true; |
| 699 | } |