Mike Klein | ded7a55 | 2018-04-10 10:05:31 -0400 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2018 Google Inc. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license that can be |
| 5 | * found in the LICENSE file. |
| 6 | */ |
| 7 | |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | 5155d38 | 2018-07-02 19:08:45 +0000 | [diff] [blame^] | 8 | #include "skcms.h" |
| 9 | #include "skcms_internal.h" |
| 10 | #include <assert.h> |
| 11 | #include <float.h> |
| 12 | #include <limits.h> |
| 13 | #include <stdlib.h> |
| 14 | #include <string.h> |
Mike Klein | ded7a55 | 2018-04-10 10:05:31 -0400 | [diff] [blame] | 15 | |
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com | 5155d38 | 2018-07-02 19:08:45 +0000 | [diff] [blame^] | 16 | static float minus_1_ulp(float x) { |
| 17 | int32_t bits; |
| 18 | memcpy(&bits, &x, sizeof(bits)); |
| 19 | bits = bits - 1; |
| 20 | memcpy(&x, &bits, sizeof(bits)); |
| 21 | return x; |
| 22 | } |
| 23 | |
| 24 | float skcms_eval_curve(const skcms_Curve* curve, float x) { |
| 25 | if (curve->table_entries == 0) { |
| 26 | return skcms_TransferFunction_eval(&curve->parametric, x); |
| 27 | } |
| 28 | |
| 29 | float ix = fmaxf_(0, fminf_(x, 1)) * (curve->table_entries - 1); |
| 30 | int lo = (int) ix, |
| 31 | hi = (int)minus_1_ulp(ix + 1.0f); |
| 32 | float t = ix - (float)lo; |
| 33 | |
| 34 | float l, h; |
| 35 | if (curve->table_8) { |
| 36 | l = curve->table_8[lo] * (1/255.0f); |
| 37 | h = curve->table_8[hi] * (1/255.0f); |
| 38 | } else { |
| 39 | uint16_t be_l, be_h; |
| 40 | memcpy(&be_l, curve->table_16 + 2*lo, 2); |
| 41 | memcpy(&be_h, curve->table_16 + 2*hi, 2); |
| 42 | uint16_t le_l = ((be_l << 8) | (be_l >> 8)) & 0xffff; |
| 43 | uint16_t le_h = ((be_h << 8) | (be_h >> 8)) & 0xffff; |
| 44 | l = le_l * (1/65535.0f); |
| 45 | h = le_h * (1/65535.0f); |
| 46 | } |
| 47 | return l + (h-l)*t; |
| 48 | } |
| 49 | |
| 50 | float skcms_MaxRoundtripError(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) { |
| 51 | uint32_t N = curve->table_entries > 256 ? curve->table_entries : 256; |
| 52 | const float dx = 1.0f / (N - 1); |
| 53 | float err = 0; |
| 54 | for (uint32_t i = 0; i < N; i++) { |
| 55 | float x = i * dx, |
| 56 | y = skcms_eval_curve(curve, x); |
| 57 | err = fmaxf_(err, fabsf_(x - skcms_TransferFunction_eval(inv_tf, y))); |
| 58 | } |
| 59 | return err; |
| 60 | } |
| 61 | |
| 62 | bool skcms_AreApproximateInverses(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) { |
| 63 | return skcms_MaxRoundtripError(curve, inv_tf) < (1/512.0f); |
| 64 | } |
| 65 | |
| 66 | // Additional ICC signature values that are only used internally |
| 67 | enum { |
| 68 | // File signature |
| 69 | skcms_Signature_acsp = 0x61637370, |
| 70 | |
| 71 | // Tag signatures |
| 72 | skcms_Signature_rTRC = 0x72545243, |
| 73 | skcms_Signature_gTRC = 0x67545243, |
| 74 | skcms_Signature_bTRC = 0x62545243, |
| 75 | skcms_Signature_kTRC = 0x6B545243, |
| 76 | |
| 77 | skcms_Signature_rXYZ = 0x7258595A, |
| 78 | skcms_Signature_gXYZ = 0x6758595A, |
| 79 | skcms_Signature_bXYZ = 0x6258595A, |
| 80 | |
| 81 | skcms_Signature_A2B0 = 0x41324230, |
| 82 | skcms_Signature_A2B1 = 0x41324231, |
| 83 | skcms_Signature_mAB = 0x6D414220, |
| 84 | |
| 85 | skcms_Signature_CHAD = 0x63686164, |
| 86 | |
| 87 | // Type signatures |
| 88 | skcms_Signature_curv = 0x63757276, |
| 89 | skcms_Signature_mft1 = 0x6D667431, |
| 90 | skcms_Signature_mft2 = 0x6D667432, |
| 91 | skcms_Signature_para = 0x70617261, |
| 92 | skcms_Signature_sf32 = 0x73663332, |
| 93 | // XYZ is also a PCS signature, so it's defined in skcms.h |
| 94 | // skcms_Signature_XYZ = 0x58595A20, |
| 95 | }; |
| 96 | |
| 97 | static uint16_t read_big_u16(const uint8_t* ptr) { |
| 98 | uint16_t be; |
| 99 | memcpy(&be, ptr, sizeof(be)); |
| 100 | #if defined(_MSC_VER) |
| 101 | return _byteswap_ushort(be); |
| 102 | #else |
| 103 | return __builtin_bswap16(be); |
| 104 | #endif |
| 105 | } |
| 106 | |
| 107 | static uint32_t read_big_u32(const uint8_t* ptr) { |
| 108 | uint32_t be; |
| 109 | memcpy(&be, ptr, sizeof(be)); |
| 110 | #if defined(_MSC_VER) |
| 111 | return _byteswap_ulong(be); |
| 112 | #else |
| 113 | return __builtin_bswap32(be); |
| 114 | #endif |
| 115 | } |
| 116 | |
| 117 | static int32_t read_big_i32(const uint8_t* ptr) { |
| 118 | return (int32_t)read_big_u32(ptr); |
| 119 | } |
| 120 | |
| 121 | static float read_big_fixed(const uint8_t* ptr) { |
| 122 | return read_big_i32(ptr) * (1.0f / 65536.0f); |
| 123 | } |
| 124 | |
| 125 | // Maps to an in-memory profile so that fields line up to the locations specified |
| 126 | // in ICC.1:2010, section 7.2 |
| 127 | typedef struct { |
| 128 | uint8_t size [ 4]; |
| 129 | uint8_t cmm_type [ 4]; |
| 130 | uint8_t version [ 4]; |
| 131 | uint8_t profile_class [ 4]; |
| 132 | uint8_t data_color_space [ 4]; |
| 133 | uint8_t pcs [ 4]; |
| 134 | uint8_t creation_date_time [12]; |
| 135 | uint8_t signature [ 4]; |
| 136 | uint8_t platform [ 4]; |
| 137 | uint8_t flags [ 4]; |
| 138 | uint8_t device_manufacturer [ 4]; |
| 139 | uint8_t device_model [ 4]; |
| 140 | uint8_t device_attributes [ 8]; |
| 141 | uint8_t rendering_intent [ 4]; |
| 142 | uint8_t illuminant_X [ 4]; |
| 143 | uint8_t illuminant_Y [ 4]; |
| 144 | uint8_t illuminant_Z [ 4]; |
| 145 | uint8_t creator [ 4]; |
| 146 | uint8_t profile_id [16]; |
| 147 | uint8_t reserved [28]; |
| 148 | uint8_t tag_count [ 4]; // Technically not part of header, but required |
| 149 | } header_Layout; |
| 150 | |
| 151 | typedef struct { |
| 152 | uint8_t signature [4]; |
| 153 | uint8_t offset [4]; |
| 154 | uint8_t size [4]; |
| 155 | } tag_Layout; |
| 156 | |
| 157 | static const tag_Layout* get_tag_table(const skcms_ICCProfile* profile) { |
| 158 | return (const tag_Layout*)(profile->buffer + SAFE_SIZEOF(header_Layout)); |
| 159 | } |
| 160 | |
| 161 | // s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid |
| 162 | // use of the type is for the CHAD tag that stores exactly nine values. |
| 163 | typedef struct { |
| 164 | uint8_t type [ 4]; |
| 165 | uint8_t reserved [ 4]; |
| 166 | uint8_t values [36]; |
| 167 | } sf32_Layout; |
| 168 | |
| 169 | bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) { |
| 170 | skcms_ICCTag tag; |
| 171 | if (!skcms_GetTagBySignature(profile, skcms_Signature_CHAD, &tag)) { |
| 172 | return false; |
| 173 | } |
| 174 | |
| 175 | if (tag.type != skcms_Signature_sf32 || tag.size < SAFE_SIZEOF(sf32_Layout)) { |
| 176 | return false; |
| 177 | } |
| 178 | |
| 179 | const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf; |
| 180 | const uint8_t* values = sf32Tag->values; |
| 181 | for (int r = 0; r < 3; ++r) |
| 182 | for (int c = 0; c < 3; ++c, values += 4) { |
| 183 | m->vals[r][c] = read_big_fixed(values); |
| 184 | } |
| 185 | return true; |
| 186 | } |
| 187 | |
| 188 | // XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of |
| 189 | // the type are for tags/data that store exactly one triple. |
| 190 | typedef struct { |
| 191 | uint8_t type [4]; |
| 192 | uint8_t reserved [4]; |
| 193 | uint8_t X [4]; |
| 194 | uint8_t Y [4]; |
| 195 | uint8_t Z [4]; |
| 196 | } XYZ_Layout; |
| 197 | |
| 198 | static bool read_tag_xyz(const skcms_ICCTag* tag, float* x, float* y, float* z) { |
| 199 | if (tag->type != skcms_Signature_XYZ || tag->size < SAFE_SIZEOF(XYZ_Layout)) { |
| 200 | return false; |
| 201 | } |
| 202 | |
| 203 | const XYZ_Layout* xyzTag = (const XYZ_Layout*)tag->buf; |
| 204 | |
| 205 | *x = read_big_fixed(xyzTag->X); |
| 206 | *y = read_big_fixed(xyzTag->Y); |
| 207 | *z = read_big_fixed(xyzTag->Z); |
| 208 | return true; |
| 209 | } |
| 210 | |
| 211 | static bool read_to_XYZD50(const skcms_ICCTag* rXYZ, const skcms_ICCTag* gXYZ, |
| 212 | const skcms_ICCTag* bXYZ, skcms_Matrix3x3* toXYZ) { |
| 213 | return read_tag_xyz(rXYZ, &toXYZ->vals[0][0], &toXYZ->vals[1][0], &toXYZ->vals[2][0]) && |
| 214 | read_tag_xyz(gXYZ, &toXYZ->vals[0][1], &toXYZ->vals[1][1], &toXYZ->vals[2][1]) && |
| 215 | read_tag_xyz(bXYZ, &toXYZ->vals[0][2], &toXYZ->vals[1][2], &toXYZ->vals[2][2]); |
| 216 | } |
| 217 | |
| 218 | typedef struct { |
| 219 | uint8_t type [4]; |
| 220 | uint8_t reserved_a [4]; |
| 221 | uint8_t function_type [2]; |
| 222 | uint8_t reserved_b [2]; |
| 223 | uint8_t parameters [ ]; // 1, 3, 4, 5, or 7 s15.16 parameters, depending on function_type |
| 224 | } para_Layout; |
| 225 | |
| 226 | static bool read_curve_para(const uint8_t* buf, uint32_t size, |
| 227 | skcms_Curve* curve, uint32_t* curve_size) { |
| 228 | if (size < SAFE_SIZEOF(para_Layout)) { |
| 229 | return false; |
| 230 | } |
| 231 | |
| 232 | const para_Layout* paraTag = (const para_Layout*)buf; |
| 233 | |
| 234 | enum { kG = 0, kGAB = 1, kGABC = 2, kGABCD = 3, kGABCDEF = 4 }; |
| 235 | uint16_t function_type = read_big_u16(paraTag->function_type); |
| 236 | if (function_type > kGABCDEF) { |
| 237 | return false; |
| 238 | } |
| 239 | |
| 240 | static const uint32_t curve_bytes[] = { 4, 12, 16, 20, 28 }; |
| 241 | if (size < SAFE_SIZEOF(para_Layout) + curve_bytes[function_type]) { |
| 242 | return false; |
| 243 | } |
| 244 | |
| 245 | if (curve_size) { |
| 246 | *curve_size = SAFE_SIZEOF(para_Layout) + curve_bytes[function_type]; |
| 247 | } |
| 248 | |
| 249 | curve->table_entries = 0; |
| 250 | curve->parametric.a = 1.0f; |
| 251 | curve->parametric.b = 0.0f; |
| 252 | curve->parametric.c = 0.0f; |
| 253 | curve->parametric.d = 0.0f; |
| 254 | curve->parametric.e = 0.0f; |
| 255 | curve->parametric.f = 0.0f; |
| 256 | curve->parametric.g = read_big_fixed(paraTag->parameters); |
| 257 | |
| 258 | switch (function_type) { |
| 259 | case kGAB: |
| 260 | curve->parametric.a = read_big_fixed(paraTag->parameters + 4); |
| 261 | curve->parametric.b = read_big_fixed(paraTag->parameters + 8); |
| 262 | if (curve->parametric.a == 0) { |
| 263 | return false; |
| 264 | } |
| 265 | curve->parametric.d = -curve->parametric.b / curve->parametric.a; |
| 266 | break; |
| 267 | case kGABC: |
| 268 | curve->parametric.a = read_big_fixed(paraTag->parameters + 4); |
| 269 | curve->parametric.b = read_big_fixed(paraTag->parameters + 8); |
| 270 | curve->parametric.e = read_big_fixed(paraTag->parameters + 12); |
| 271 | if (curve->parametric.a == 0) { |
| 272 | return false; |
| 273 | } |
| 274 | curve->parametric.d = -curve->parametric.b / curve->parametric.a; |
| 275 | curve->parametric.f = curve->parametric.e; |
| 276 | break; |
| 277 | case kGABCD: |
| 278 | curve->parametric.a = read_big_fixed(paraTag->parameters + 4); |
| 279 | curve->parametric.b = read_big_fixed(paraTag->parameters + 8); |
| 280 | curve->parametric.c = read_big_fixed(paraTag->parameters + 12); |
| 281 | curve->parametric.d = read_big_fixed(paraTag->parameters + 16); |
| 282 | break; |
| 283 | case kGABCDEF: |
| 284 | curve->parametric.a = read_big_fixed(paraTag->parameters + 4); |
| 285 | curve->parametric.b = read_big_fixed(paraTag->parameters + 8); |
| 286 | curve->parametric.c = read_big_fixed(paraTag->parameters + 12); |
| 287 | curve->parametric.d = read_big_fixed(paraTag->parameters + 16); |
| 288 | curve->parametric.e = read_big_fixed(paraTag->parameters + 20); |
| 289 | curve->parametric.f = read_big_fixed(paraTag->parameters + 24); |
| 290 | break; |
| 291 | } |
| 292 | return skcms_TransferFunction_isValid(&curve->parametric); |
| 293 | } |
| 294 | |
| 295 | typedef struct { |
| 296 | uint8_t type [4]; |
| 297 | uint8_t reserved [4]; |
| 298 | uint8_t value_count [4]; |
| 299 | uint8_t parameters [ ]; // value_count parameters (8.8 if 1, uint16 (n*65535) if > 1) |
| 300 | } curv_Layout; |
| 301 | |
| 302 | static bool read_curve_curv(const uint8_t* buf, uint32_t size, |
| 303 | skcms_Curve* curve, uint32_t* curve_size) { |
| 304 | if (size < SAFE_SIZEOF(curv_Layout)) { |
| 305 | return false; |
| 306 | } |
| 307 | |
| 308 | const curv_Layout* curvTag = (const curv_Layout*)buf; |
| 309 | |
| 310 | uint32_t value_count = read_big_u32(curvTag->value_count); |
| 311 | if (size < SAFE_SIZEOF(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t)) { |
| 312 | return false; |
| 313 | } |
| 314 | |
| 315 | if (curve_size) { |
| 316 | *curve_size = SAFE_SIZEOF(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t); |
| 317 | } |
| 318 | |
| 319 | if (value_count < 2) { |
| 320 | curve->table_entries = 0; |
| 321 | curve->parametric.a = 1.0f; |
| 322 | curve->parametric.b = 0.0f; |
| 323 | curve->parametric.c = 0.0f; |
| 324 | curve->parametric.d = 0.0f; |
| 325 | curve->parametric.e = 0.0f; |
| 326 | curve->parametric.f = 0.0f; |
| 327 | if (value_count == 0) { |
| 328 | // Empty tables are a shorthand for an identity curve |
| 329 | curve->parametric.g = 1.0f; |
| 330 | } else { |
| 331 | // Single entry tables are a shorthand for simple gamma |
| 332 | curve->parametric.g = read_big_u16(curvTag->parameters) * (1.0f / 256.0f); |
| 333 | } |
| 334 | } else { |
| 335 | curve->table_8 = NULL; |
| 336 | curve->table_16 = curvTag->parameters; |
| 337 | curve->table_entries = value_count; |
| 338 | } |
| 339 | |
| 340 | return true; |
| 341 | } |
| 342 | |
| 343 | // Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read. |
| 344 | // If curve_size is not NULL, writes the number of bytes used by the curve in (*curve_size). |
| 345 | static bool read_curve(const uint8_t* buf, uint32_t size, |
| 346 | skcms_Curve* curve, uint32_t* curve_size) { |
| 347 | if (!buf || size < 4 || !curve) { |
| 348 | return false; |
| 349 | } |
| 350 | |
| 351 | uint32_t type = read_big_u32(buf); |
| 352 | if (type == skcms_Signature_para) { |
| 353 | return read_curve_para(buf, size, curve, curve_size); |
| 354 | } else if (type == skcms_Signature_curv) { |
| 355 | return read_curve_curv(buf, size, curve, curve_size); |
| 356 | } |
| 357 | |
| 358 | return false; |
| 359 | } |
| 360 | |
| 361 | // mft1 and mft2 share a large chunk of data |
| 362 | typedef struct { |
| 363 | uint8_t type [ 4]; |
| 364 | uint8_t reserved_a [ 4]; |
| 365 | uint8_t input_channels [ 1]; |
| 366 | uint8_t output_channels [ 1]; |
| 367 | uint8_t grid_points [ 1]; |
| 368 | uint8_t reserved_b [ 1]; |
| 369 | uint8_t matrix [36]; |
| 370 | } mft_CommonLayout; |
| 371 | |
| 372 | typedef struct { |
| 373 | mft_CommonLayout common [ 1]; |
| 374 | |
| 375 | uint8_t tables [ ]; |
| 376 | } mft1_Layout; |
| 377 | |
| 378 | typedef struct { |
| 379 | mft_CommonLayout common [ 1]; |
| 380 | |
| 381 | uint8_t input_table_entries [ 2]; |
| 382 | uint8_t output_table_entries [ 2]; |
| 383 | uint8_t tables [ ]; |
| 384 | } mft2_Layout; |
| 385 | |
| 386 | static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) { |
| 387 | // MFT matrices are applied before the first set of curves, but must be identity unless the |
| 388 | // input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the |
| 389 | // matrix in skcms_A2B is applied later in the pipe, so supporting this would require another |
| 390 | // field/flag. |
| 391 | a2b->matrix_channels = 0; |
| 392 | |
| 393 | a2b->input_channels = mftTag->input_channels[0]; |
| 394 | a2b->output_channels = mftTag->output_channels[0]; |
| 395 | |
| 396 | // We require exactly three (ie XYZ/Lab/RGB) output channels |
| 397 | if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) { |
| 398 | return false; |
| 399 | } |
| 400 | // We require at least one, and no more than four (ie CMYK) input channels |
| 401 | if (a2b->input_channels < 1 || a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) { |
| 402 | return false; |
| 403 | } |
| 404 | |
| 405 | for (uint32_t i = 0; i < a2b->input_channels; ++i) { |
| 406 | a2b->grid_points[i] = mftTag->grid_points[0]; |
| 407 | } |
| 408 | // The grid only makes sense with at least two points along each axis |
| 409 | if (a2b->grid_points[0] < 2) { |
| 410 | return false; |
| 411 | } |
| 412 | |
| 413 | return true; |
| 414 | } |
| 415 | |
| 416 | static bool init_a2b_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width, |
| 417 | uint32_t input_table_entries, uint32_t output_table_entries, |
| 418 | skcms_A2B* a2b) { |
| 419 | // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow |
| 420 | uint32_t byte_len_per_input_table = input_table_entries * byte_width; |
| 421 | uint32_t byte_len_per_output_table = output_table_entries * byte_width; |
| 422 | |
| 423 | // [input|output]_channels are <= 4, so still no overflow |
| 424 | uint32_t byte_len_all_input_tables = a2b->input_channels * byte_len_per_input_table; |
| 425 | uint32_t byte_len_all_output_tables = a2b->output_channels * byte_len_per_output_table; |
| 426 | |
| 427 | uint64_t grid_size = a2b->output_channels * byte_width; |
| 428 | for (uint32_t axis = 0; axis < a2b->input_channels; ++axis) { |
| 429 | grid_size *= a2b->grid_points[axis]; |
| 430 | } |
| 431 | |
| 432 | if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) { |
| 433 | return false; |
| 434 | } |
| 435 | |
| 436 | for (uint32_t i = 0; i < a2b->input_channels; ++i) { |
| 437 | a2b->input_curves[i].table_entries = input_table_entries; |
| 438 | if (byte_width == 1) { |
| 439 | a2b->input_curves[i].table_8 = table_base + i * byte_len_per_input_table; |
| 440 | a2b->input_curves[i].table_16 = NULL; |
| 441 | } else { |
| 442 | a2b->input_curves[i].table_8 = NULL; |
| 443 | a2b->input_curves[i].table_16 = table_base + i * byte_len_per_input_table; |
| 444 | } |
| 445 | } |
| 446 | |
| 447 | if (byte_width == 1) { |
| 448 | a2b->grid_8 = table_base + byte_len_all_input_tables; |
| 449 | a2b->grid_16 = NULL; |
| 450 | } else { |
| 451 | a2b->grid_8 = NULL; |
| 452 | a2b->grid_16 = table_base + byte_len_all_input_tables; |
| 453 | } |
| 454 | |
| 455 | const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size; |
| 456 | for (uint32_t i = 0; i < a2b->output_channels; ++i) { |
| 457 | a2b->output_curves[i].table_entries = output_table_entries; |
| 458 | if (byte_width == 1) { |
| 459 | a2b->output_curves[i].table_8 = output_table_base + i * byte_len_per_output_table; |
| 460 | a2b->output_curves[i].table_16 = NULL; |
| 461 | } else { |
| 462 | a2b->output_curves[i].table_8 = NULL; |
| 463 | a2b->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table; |
| 464 | } |
| 465 | } |
| 466 | |
| 467 | return true; |
| 468 | } |
| 469 | |
| 470 | static bool read_tag_mft1(const skcms_ICCTag* tag, skcms_A2B* a2b) { |
| 471 | if (tag->size < SAFE_SIZEOF(mft1_Layout)) { |
| 472 | return false; |
| 473 | } |
| 474 | |
| 475 | const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf; |
| 476 | if (!read_mft_common(mftTag->common, a2b)) { |
| 477 | return false; |
| 478 | } |
| 479 | |
| 480 | uint32_t input_table_entries = 256; |
| 481 | uint32_t output_table_entries = 256; |
| 482 | |
| 483 | return init_a2b_tables(mftTag->tables, tag->size - SAFE_SIZEOF(mft1_Layout), 1, |
| 484 | input_table_entries, output_table_entries, a2b); |
| 485 | } |
| 486 | |
| 487 | static bool read_tag_mft2(const skcms_ICCTag* tag, skcms_A2B* a2b) { |
| 488 | if (tag->size < SAFE_SIZEOF(mft2_Layout)) { |
| 489 | return false; |
| 490 | } |
| 491 | |
| 492 | const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf; |
| 493 | if (!read_mft_common(mftTag->common, a2b)) { |
| 494 | return false; |
| 495 | } |
| 496 | |
| 497 | uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries); |
| 498 | uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries); |
| 499 | |
| 500 | // ICC spec mandates that 2 <= table_entries <= 4096 |
| 501 | if (input_table_entries < 2 || input_table_entries > 4096 || |
| 502 | output_table_entries < 2 || output_table_entries > 4096) { |
| 503 | return false; |
| 504 | } |
| 505 | |
| 506 | return init_a2b_tables(mftTag->tables, tag->size - SAFE_SIZEOF(mft2_Layout), 2, |
| 507 | input_table_entries, output_table_entries, a2b); |
| 508 | } |
| 509 | |
| 510 | static bool read_curves(const uint8_t* buf, uint32_t size, uint32_t curve_offset, |
| 511 | uint32_t num_curves, skcms_Curve* curves) { |
| 512 | for (uint32_t i = 0; i < num_curves; ++i) { |
| 513 | if (curve_offset > size) { |
| 514 | return false; |
| 515 | } |
| 516 | |
| 517 | uint32_t curve_bytes; |
| 518 | if (!read_curve(buf + curve_offset, size - curve_offset, &curves[i], &curve_bytes)) { |
| 519 | return false; |
| 520 | } |
| 521 | |
| 522 | if (curve_bytes > UINT32_MAX - 3) { |
| 523 | return false; |
| 524 | } |
| 525 | curve_bytes = (curve_bytes + 3) & ~3U; |
| 526 | |
| 527 | uint64_t new_offset_64 = (uint64_t)curve_offset + curve_bytes; |
| 528 | curve_offset = (uint32_t)new_offset_64; |
| 529 | if (new_offset_64 != curve_offset) { |
| 530 | return false; |
| 531 | } |
| 532 | } |
| 533 | |
| 534 | return true; |
| 535 | } |
| 536 | |
| 537 | typedef struct { |
| 538 | uint8_t type [ 4]; |
| 539 | uint8_t reserved_a [ 4]; |
| 540 | uint8_t input_channels [ 1]; |
| 541 | uint8_t output_channels [ 1]; |
| 542 | uint8_t reserved_b [ 2]; |
| 543 | uint8_t b_curve_offset [ 4]; |
| 544 | uint8_t matrix_offset [ 4]; |
| 545 | uint8_t m_curve_offset [ 4]; |
| 546 | uint8_t clut_offset [ 4]; |
| 547 | uint8_t a_curve_offset [ 4]; |
| 548 | } mAB_Layout; |
| 549 | |
| 550 | typedef struct { |
| 551 | uint8_t grid_points [16]; |
| 552 | uint8_t grid_byte_width [ 1]; |
| 553 | uint8_t reserved [ 3]; |
| 554 | uint8_t data [ ]; |
| 555 | } mABCLUT_Layout; |
| 556 | |
| 557 | static bool read_tag_mab(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) { |
| 558 | if (tag->size < SAFE_SIZEOF(mAB_Layout)) { |
| 559 | return false; |
| 560 | } |
| 561 | |
| 562 | const mAB_Layout* mABTag = (const mAB_Layout*)tag->buf; |
| 563 | |
| 564 | a2b->input_channels = mABTag->input_channels[0]; |
| 565 | a2b->output_channels = mABTag->output_channels[0]; |
| 566 | |
| 567 | // We require exactly three (ie XYZ/Lab/RGB) output channels |
| 568 | if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) { |
| 569 | return false; |
| 570 | } |
| 571 | // We require no more than four (ie CMYK) input channels |
| 572 | if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) { |
| 573 | return false; |
| 574 | } |
| 575 | |
| 576 | uint32_t b_curve_offset = read_big_u32(mABTag->b_curve_offset); |
| 577 | uint32_t matrix_offset = read_big_u32(mABTag->matrix_offset); |
| 578 | uint32_t m_curve_offset = read_big_u32(mABTag->m_curve_offset); |
| 579 | uint32_t clut_offset = read_big_u32(mABTag->clut_offset); |
| 580 | uint32_t a_curve_offset = read_big_u32(mABTag->a_curve_offset); |
| 581 | |
| 582 | // "B" curves must be present |
| 583 | if (0 == b_curve_offset) { |
| 584 | return false; |
| 585 | } |
| 586 | |
| 587 | if (!read_curves(tag->buf, tag->size, b_curve_offset, a2b->output_channels, |
| 588 | a2b->output_curves)) { |
| 589 | return false; |
| 590 | } |
| 591 | |
| 592 | // "M" curves and Matrix must be used together |
| 593 | if (0 != m_curve_offset) { |
| 594 | if (0 == matrix_offset) { |
| 595 | return false; |
| 596 | } |
| 597 | a2b->matrix_channels = a2b->output_channels; |
| 598 | if (!read_curves(tag->buf, tag->size, m_curve_offset, a2b->matrix_channels, |
| 599 | a2b->matrix_curves)) { |
| 600 | return false; |
| 601 | } |
| 602 | |
| 603 | // Read matrix, which is stored as a row-major 3x3, followed by the fourth column |
| 604 | if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) { |
| 605 | return false; |
| 606 | } |
| 607 | float encoding_factor = pcs_is_xyz ? 65535 / 32768.0f : 1.0f; |
| 608 | const uint8_t* mtx_buf = tag->buf + matrix_offset; |
| 609 | a2b->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf + 0); |
| 610 | a2b->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf + 4); |
| 611 | a2b->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf + 8); |
| 612 | a2b->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12); |
| 613 | a2b->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16); |
| 614 | a2b->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20); |
| 615 | a2b->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24); |
| 616 | a2b->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28); |
| 617 | a2b->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32); |
| 618 | a2b->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36); |
| 619 | a2b->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40); |
| 620 | a2b->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44); |
| 621 | } else { |
| 622 | if (0 != matrix_offset) { |
| 623 | return false; |
| 624 | } |
| 625 | a2b->matrix_channels = 0; |
| 626 | } |
| 627 | |
| 628 | // "A" curves and CLUT must be used together |
| 629 | if (0 != a_curve_offset) { |
| 630 | if (0 == clut_offset) { |
| 631 | return false; |
| 632 | } |
| 633 | if (!read_curves(tag->buf, tag->size, a_curve_offset, a2b->input_channels, |
| 634 | a2b->input_curves)) { |
| 635 | return false; |
| 636 | } |
| 637 | |
| 638 | if (tag->size < clut_offset + SAFE_SIZEOF(mABCLUT_Layout)) { |
| 639 | return false; |
| 640 | } |
| 641 | const mABCLUT_Layout* clut = (const mABCLUT_Layout*)(tag->buf + clut_offset); |
| 642 | |
| 643 | if (clut->grid_byte_width[0] == 1) { |
| 644 | a2b->grid_8 = clut->data; |
| 645 | a2b->grid_16 = NULL; |
| 646 | } else if (clut->grid_byte_width[0] == 2) { |
| 647 | a2b->grid_8 = NULL; |
| 648 | a2b->grid_16 = clut->data; |
| 649 | } else { |
| 650 | return false; |
| 651 | } |
| 652 | |
| 653 | uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[0]; |
| 654 | for (uint32_t i = 0; i < a2b->input_channels; ++i) { |
| 655 | a2b->grid_points[i] = clut->grid_points[i]; |
| 656 | // The grid only makes sense with at least two points along each axis |
| 657 | if (a2b->grid_points[i] < 2) { |
| 658 | return false; |
| 659 | } |
| 660 | grid_size *= a2b->grid_points[i]; |
| 661 | } |
| 662 | if (tag->size < clut_offset + SAFE_SIZEOF(mABCLUT_Layout) + grid_size) { |
| 663 | return false; |
| 664 | } |
| 665 | } else { |
| 666 | if (0 != clut_offset) { |
| 667 | return false; |
| 668 | } |
| 669 | |
| 670 | // If there is no CLUT, the number of input and output channels must match |
| 671 | if (a2b->input_channels != a2b->output_channels) { |
| 672 | return false; |
| 673 | } |
| 674 | |
| 675 | // Zero out the number of input channels to signal that we're skipping this stage |
| 676 | a2b->input_channels = 0; |
| 677 | } |
| 678 | |
| 679 | return true; |
| 680 | } |
| 681 | |
| 682 | static bool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) { |
| 683 | bool ok = false; |
| 684 | if (tag->type == skcms_Signature_mft1) { |
| 685 | ok = read_tag_mft1(tag, a2b); |
| 686 | } else if (tag->type == skcms_Signature_mft2) { |
| 687 | ok = read_tag_mft2(tag, a2b); |
| 688 | } else if (tag->type == skcms_Signature_mAB) { |
| 689 | ok = read_tag_mab(tag, a2b, pcs_is_xyz); |
| 690 | } |
| 691 | if (!ok) { |
| 692 | return false; |
| 693 | } |
| 694 | |
| 695 | // Detect and canonicalize identity tables. |
| 696 | skcms_Curve* curves[] = { |
| 697 | a2b->input_channels > 0 ? a2b->input_curves + 0 : NULL, |
| 698 | a2b->input_channels > 1 ? a2b->input_curves + 1 : NULL, |
| 699 | a2b->input_channels > 2 ? a2b->input_curves + 2 : NULL, |
| 700 | a2b->input_channels > 3 ? a2b->input_curves + 3 : NULL, |
| 701 | a2b->matrix_channels > 0 ? a2b->matrix_curves + 0 : NULL, |
| 702 | a2b->matrix_channels > 1 ? a2b->matrix_curves + 1 : NULL, |
| 703 | a2b->matrix_channels > 2 ? a2b->matrix_curves + 2 : NULL, |
| 704 | a2b->output_channels > 0 ? a2b->output_curves + 0 : NULL, |
| 705 | a2b->output_channels > 1 ? a2b->output_curves + 1 : NULL, |
| 706 | a2b->output_channels > 2 ? a2b->output_curves + 2 : NULL, |
| 707 | }; |
| 708 | |
| 709 | for (int i = 0; i < ARRAY_COUNT(curves); i++) { |
| 710 | skcms_Curve* curve = curves[i]; |
| 711 | |
| 712 | if (curve && curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) { |
| 713 | int N = (int)curve->table_entries; |
| 714 | |
| 715 | float c,d,f; |
| 716 | if (N == skcms_fit_linear(curve, N, 1.0f/(2*N), &c,&d,&f) |
| 717 | && c == 1.0f |
| 718 | && f == 0.0f) { |
| 719 | curve->table_entries = 0; |
| 720 | curve->table_8 = NULL; |
| 721 | curve->table_16 = NULL; |
| 722 | curve->parametric = (skcms_TransferFunction){1,1,0,0,0,0,0}; |
| 723 | } |
| 724 | } |
| 725 | } |
| 726 | |
| 727 | return true; |
| 728 | } |
| 729 | |
| 730 | void skcms_GetTagByIndex(const skcms_ICCProfile* profile, uint32_t idx, skcms_ICCTag* tag) { |
| 731 | if (!profile || !profile->buffer || !tag) { return; } |
| 732 | if (idx > profile->tag_count) { return; } |
| 733 | const tag_Layout* tags = get_tag_table(profile); |
| 734 | tag->signature = read_big_u32(tags[idx].signature); |
| 735 | tag->size = read_big_u32(tags[idx].size); |
| 736 | tag->buf = read_big_u32(tags[idx].offset) + profile->buffer; |
| 737 | tag->type = read_big_u32(tag->buf); |
| 738 | } |
| 739 | |
| 740 | bool skcms_GetTagBySignature(const skcms_ICCProfile* profile, uint32_t sig, skcms_ICCTag* tag) { |
| 741 | if (!profile || !profile->buffer || !tag) { return false; } |
| 742 | const tag_Layout* tags = get_tag_table(profile); |
| 743 | for (uint32_t i = 0; i < profile->tag_count; ++i) { |
| 744 | if (read_big_u32(tags[i].signature) == sig) { |
| 745 | tag->signature = sig; |
| 746 | tag->size = read_big_u32(tags[i].size); |
| 747 | tag->buf = read_big_u32(tags[i].offset) + profile->buffer; |
| 748 | tag->type = read_big_u32(tag->buf); |
| 749 | return true; |
| 750 | } |
| 751 | } |
| 752 | return false; |
| 753 | } |
| 754 | |
| 755 | static bool usable_as_src(const skcms_ICCProfile* profile) { |
| 756 | return profile->has_A2B |
| 757 | || (profile->has_trc && profile->has_toXYZD50); |
| 758 | } |
| 759 | |
| 760 | bool skcms_Parse(const void* buf, size_t len, skcms_ICCProfile* profile) { |
| 761 | assert(SAFE_SIZEOF(header_Layout) == 132); |
| 762 | |
| 763 | if (!profile) { |
| 764 | return false; |
| 765 | } |
| 766 | memset(profile, 0, SAFE_SIZEOF(*profile)); |
| 767 | |
| 768 | if (len < SAFE_SIZEOF(header_Layout)) { |
| 769 | return false; |
| 770 | } |
| 771 | |
| 772 | // Byte-swap all header fields |
| 773 | const header_Layout* header = buf; |
| 774 | profile->buffer = buf; |
| 775 | profile->size = read_big_u32(header->size); |
| 776 | uint32_t version = read_big_u32(header->version); |
| 777 | profile->data_color_space = read_big_u32(header->data_color_space); |
| 778 | profile->pcs = read_big_u32(header->pcs); |
| 779 | uint32_t signature = read_big_u32(header->signature); |
| 780 | float illuminant_X = read_big_fixed(header->illuminant_X); |
| 781 | float illuminant_Y = read_big_fixed(header->illuminant_Y); |
| 782 | float illuminant_Z = read_big_fixed(header->illuminant_Z); |
| 783 | profile->tag_count = read_big_u32(header->tag_count); |
| 784 | |
| 785 | // Validate signature, size (smaller than buffer, large enough to hold tag table), |
| 786 | // and major version |
| 787 | uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout); |
| 788 | if (signature != skcms_Signature_acsp || |
| 789 | profile->size > len || |
| 790 | profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size || |
| 791 | (version >> 24) > 4) { |
| 792 | return false; |
| 793 | } |
| 794 | |
| 795 | // Validate that illuminant is D50 white |
| 796 | if (fabsf_(illuminant_X - 0.9642f) > 0.0100f || |
| 797 | fabsf_(illuminant_Y - 1.0000f) > 0.0100f || |
| 798 | fabsf_(illuminant_Z - 0.8249f) > 0.0100f) { |
| 799 | return false; |
| 800 | } |
| 801 | |
| 802 | // Validate that all tag entries have sane offset + size |
| 803 | const tag_Layout* tags = get_tag_table(profile); |
| 804 | for (uint32_t i = 0; i < profile->tag_count; ++i) { |
| 805 | uint32_t tag_offset = read_big_u32(tags[i].offset); |
| 806 | uint32_t tag_size = read_big_u32(tags[i].size); |
| 807 | uint64_t tag_end = (uint64_t)tag_offset + (uint64_t)tag_size; |
| 808 | if (tag_size < 4 || tag_end > profile->size) { |
| 809 | return false; |
| 810 | } |
| 811 | } |
| 812 | |
| 813 | if (profile->pcs != skcms_Signature_XYZ && profile->pcs != skcms_Signature_Lab) { |
| 814 | return false; |
| 815 | } |
| 816 | |
| 817 | bool pcs_is_xyz = profile->pcs == skcms_Signature_XYZ; |
| 818 | |
| 819 | // Pre-parse commonly used tags. |
| 820 | skcms_ICCTag kTRC; |
| 821 | if (profile->data_color_space == skcms_Signature_Gray && |
| 822 | skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &kTRC)) { |
| 823 | if (!read_curve(kTRC.buf, kTRC.size, &profile->trc[0], NULL)) { |
| 824 | // Malformed tag |
| 825 | return false; |
| 826 | } |
| 827 | profile->trc[1] = profile->trc[0]; |
| 828 | profile->trc[2] = profile->trc[0]; |
| 829 | profile->has_trc = true; |
| 830 | |
| 831 | if (pcs_is_xyz) { |
| 832 | profile->toXYZD50.vals[0][0] = illuminant_X; |
| 833 | profile->toXYZD50.vals[1][1] = illuminant_Y; |
| 834 | profile->toXYZD50.vals[2][2] = illuminant_Z; |
| 835 | profile->has_toXYZD50 = true; |
| 836 | } |
| 837 | } else { |
| 838 | skcms_ICCTag rTRC, gTRC, bTRC; |
| 839 | if (skcms_GetTagBySignature(profile, skcms_Signature_rTRC, &rTRC) && |
| 840 | skcms_GetTagBySignature(profile, skcms_Signature_gTRC, &gTRC) && |
| 841 | skcms_GetTagBySignature(profile, skcms_Signature_bTRC, &bTRC)) { |
| 842 | if (!read_curve(rTRC.buf, rTRC.size, &profile->trc[0], NULL) || |
| 843 | !read_curve(gTRC.buf, gTRC.size, &profile->trc[1], NULL) || |
| 844 | !read_curve(bTRC.buf, bTRC.size, &profile->trc[2], NULL)) { |
| 845 | // Malformed TRC tags |
| 846 | return false; |
| 847 | } |
| 848 | profile->has_trc = true; |
| 849 | } |
| 850 | |
| 851 | skcms_ICCTag rXYZ, gXYZ, bXYZ; |
| 852 | if (skcms_GetTagBySignature(profile, skcms_Signature_rXYZ, &rXYZ) && |
| 853 | skcms_GetTagBySignature(profile, skcms_Signature_gXYZ, &gXYZ) && |
| 854 | skcms_GetTagBySignature(profile, skcms_Signature_bXYZ, &bXYZ)) { |
| 855 | if (!read_to_XYZD50(&rXYZ, &gXYZ, &bXYZ, &profile->toXYZD50)) { |
| 856 | // Malformed XYZ tags |
| 857 | return false; |
| 858 | } |
| 859 | profile->has_toXYZD50 = true; |
| 860 | } |
| 861 | } |
| 862 | |
| 863 | skcms_ICCTag a2b_tag; |
| 864 | |
| 865 | // For now, we're preferring A2B0, like Skia does and the ICC spec tells us to. |
| 866 | // TODO: prefer A2B1 (relative colormetric) over A2B0 (perceptual)? |
| 867 | // This breaks with the ICC spec, but we think it's a good idea, given that TRC curves |
| 868 | // and all our known users are thinking exclusively in terms of relative colormetric. |
| 869 | const uint32_t sigs[] = { skcms_Signature_A2B0, skcms_Signature_A2B1 }; |
| 870 | for (int i = 0; i < ARRAY_COUNT(sigs); i++) { |
| 871 | if (skcms_GetTagBySignature(profile, sigs[i], &a2b_tag)) { |
| 872 | if (!read_a2b(&a2b_tag, &profile->A2B, pcs_is_xyz)) { |
| 873 | // Malformed A2B tag |
| 874 | return false; |
| 875 | } |
| 876 | profile->has_A2B = true; |
| 877 | break; |
| 878 | } |
| 879 | } |
| 880 | |
| 881 | return usable_as_src(profile); |
| 882 | } |
| 883 | |
| 884 | |
| 885 | const skcms_ICCProfile* skcms_sRGB_profile() { |
| 886 | static const skcms_ICCProfile sRGB_profile = { |
| 887 | // These fields are moot when not a skcms_Parse()'d profile. |
| 888 | .buffer = NULL, |
| 889 | .size = 0, |
| 890 | .tag_count = 0, |
| 891 | |
| 892 | // We choose to represent sRGB with its canonical transfer function, |
| 893 | // and with its canonical XYZD50 gamut matrix. |
| 894 | .data_color_space = skcms_Signature_RGB, |
| 895 | .pcs = skcms_Signature_XYZ, |
| 896 | .has_trc = true, |
| 897 | .has_toXYZD50 = true, |
| 898 | .has_A2B = false, |
| 899 | |
| 900 | .trc = { |
| 901 | {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0 }}}, |
| 902 | {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0 }}}, |
| 903 | {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0 }}}, |
| 904 | }, |
| 905 | |
| 906 | .toXYZD50 = {{ |
| 907 | { 0.436065674f, 0.385147095f, 0.143066406f }, |
| 908 | { 0.222488403f, 0.716873169f, 0.060607910f }, |
| 909 | { 0.013916016f, 0.097076416f, 0.714096069f }, |
| 910 | }}, |
| 911 | }; |
| 912 | return &sRGB_profile; |
| 913 | } |
| 914 | |
| 915 | const skcms_ICCProfile* skcms_XYZD50_profile() { |
| 916 | static const skcms_ICCProfile XYZD50_profile = { |
| 917 | .buffer = NULL, |
| 918 | .size = 0, |
| 919 | .tag_count = 0, |
| 920 | |
| 921 | .data_color_space = skcms_Signature_RGB, |
| 922 | .pcs = skcms_Signature_XYZ, |
| 923 | .has_trc = true, |
| 924 | .has_toXYZD50 = true, |
| 925 | .has_A2B = false, |
| 926 | |
| 927 | .trc = { |
| 928 | {{0, {1,1,0,0,0,0,0}}}, |
| 929 | {{0, {1,1,0,0,0,0,0}}}, |
| 930 | {{0, {1,1,0,0,0,0,0}}}, |
| 931 | }, |
| 932 | |
| 933 | .toXYZD50 = {{ |
| 934 | {1,0,0}, |
| 935 | {0,1,0}, |
| 936 | {0,0,1}, |
| 937 | }}, |
| 938 | }; |
| 939 | |
| 940 | return &XYZD50_profile; |
| 941 | } |
| 942 | |
| 943 | const skcms_TransferFunction* skcms_sRGB_TransferFunction() { |
| 944 | return &skcms_sRGB_profile()->trc[0].parametric; |
| 945 | } |
| 946 | |
| 947 | const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction() { |
| 948 | static const skcms_TransferFunction sRGB_inv = |
| 949 | { (float)(1/2.4), 1.137119f, 0, 12.92f, 0.0031308f, -0.055f, 0 }; |
| 950 | return &sRGB_inv; |
| 951 | } |
| 952 | |
| 953 | const skcms_TransferFunction* skcms_Identity_TransferFunction() { |
| 954 | static const skcms_TransferFunction identity = {1,1,0,0,0,0,0}; |
| 955 | return &identity; |
| 956 | } |
| 957 | |
| 958 | const uint8_t skcms_252_random_bytes[] = { |
| 959 | 8, 179, 128, 204, 253, 38, 134, 184, 68, 102, 32, 138, 99, 39, 169, 215, |
| 960 | 119, 26, 3, 223, 95, 239, 52, 132, 114, 74, 81, 234, 97, 116, 244, 205, 30, |
| 961 | 154, 173, 12, 51, 159, 122, 153, 61, 226, 236, 178, 229, 55, 181, 220, 191, |
| 962 | 194, 160, 126, 168, 82, 131, 18, 180, 245, 163, 22, 246, 69, 235, 252, 57, |
| 963 | 108, 14, 6, 152, 240, 255, 171, 242, 20, 227, 177, 238, 96, 85, 16, 211, |
| 964 | 70, 200, 149, 155, 146, 127, 145, 100, 151, 109, 19, 165, 208, 195, 164, |
| 965 | 137, 254, 182, 248, 64, 201, 45, 209, 5, 147, 207, 210, 113, 162, 83, 225, |
| 966 | 9, 31, 15, 231, 115, 37, 58, 53, 24, 49, 197, 56, 120, 172, 48, 21, 214, |
| 967 | 129, 111, 11, 50, 187, 196, 34, 60, 103, 71, 144, 47, 203, 77, 80, 232, |
| 968 | 140, 222, 250, 206, 166, 247, 139, 249, 221, 72, 106, 27, 199, 117, 54, |
| 969 | 219, 135, 118, 40, 79, 41, 251, 46, 93, 212, 92, 233, 148, 28, 121, 63, |
| 970 | 123, 158, 105, 59, 29, 42, 143, 23, 0, 107, 176, 87, 104, 183, 156, 193, |
| 971 | 189, 90, 188, 65, 190, 17, 198, 7, 186, 161, 1, 124, 78, 125, 170, 133, |
| 972 | 174, 218, 67, 157, 75, 101, 89, 217, 62, 33, 141, 228, 25, 35, 91, 230, 4, |
| 973 | 2, 13, 73, 86, 167, 237, 84, 243, 44, 185, 66, 130, 110, 150, 142, 216, 88, |
| 974 | 112, 36, 224, 136, 202, 76, 94, 98, 175, 213 |
| 975 | }; |
| 976 | |
| 977 | bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) { |
| 978 | // For now this is the essentially the same strategy we use in test_only.c |
| 979 | // for our skcms_Transform() smoke tests: |
| 980 | // 1) transform A to XYZD50 |
| 981 | // 2) transform B to XYZD50 |
| 982 | // 3) return true if they're similar enough |
| 983 | // Our current criterion in 3) is maximum 1 bit error per XYZD50 byte. |
| 984 | |
| 985 | // Here are 252 of a random shuffle of all possible bytes. |
| 986 | // 252 is evenly divisible by 3 and 4. Only 192, 10, 241, and 43 are missing. |
| 987 | |
| 988 | if (A->data_color_space != B->data_color_space) { |
| 989 | return false; |
| 990 | } |
| 991 | |
| 992 | // Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK. |
| 993 | skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888; |
| 994 | size_t npixels = 84; |
| 995 | if (A->data_color_space == skcms_Signature_CMYK) { |
| 996 | fmt = skcms_PixelFormat_RGBA_8888; |
| 997 | npixels = 63; |
| 998 | } |
| 999 | |
| 1000 | uint8_t dstA[252], |
| 1001 | dstB[252]; |
| 1002 | if (!skcms_Transform( |
| 1003 | skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, A, |
| 1004 | dstA, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(), |
| 1005 | npixels)) { |
| 1006 | return false; |
| 1007 | } |
| 1008 | if (!skcms_Transform( |
| 1009 | skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, B, |
| 1010 | dstB, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(), |
| 1011 | npixels)) { |
| 1012 | return false; |
| 1013 | } |
| 1014 | |
| 1015 | for (size_t i = 0; i < 252; i++) { |
| 1016 | if (abs((int)dstA[i] - (int)dstB[i]) > 1) { |
| 1017 | return false; |
| 1018 | } |
| 1019 | } |
| 1020 | return true; |
| 1021 | } |
| 1022 | |
| 1023 | bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile, |
| 1024 | const skcms_TransferFunction* inv_tf) { |
| 1025 | if (!profile || !profile->has_trc) { |
| 1026 | return false; |
| 1027 | } |
| 1028 | |
| 1029 | return skcms_AreApproximateInverses(&profile->trc[0], inv_tf) && |
| 1030 | skcms_AreApproximateInverses(&profile->trc[1], inv_tf) && |
| 1031 | skcms_AreApproximateInverses(&profile->trc[2], inv_tf); |
| 1032 | } |
| 1033 | |
| 1034 | static bool is_zero_to_one(float x) { |
| 1035 | return 0 <= x && x <= 1; |
| 1036 | } |
| 1037 | |
| 1038 | bool skcms_PrimariesToXYZD50(float rx, float ry, |
| 1039 | float gx, float gy, |
| 1040 | float bx, float by, |
| 1041 | float wx, float wy, |
| 1042 | skcms_Matrix3x3* toXYZD50) { |
| 1043 | if (!is_zero_to_one(rx) || !is_zero_to_one(ry) || |
| 1044 | !is_zero_to_one(gx) || !is_zero_to_one(gy) || |
| 1045 | !is_zero_to_one(bx) || !is_zero_to_one(by) || |
| 1046 | !is_zero_to_one(wx) || !is_zero_to_one(wy) || |
| 1047 | !toXYZD50) { |
| 1048 | return false; |
| 1049 | } |
| 1050 | |
| 1051 | // First, we need to convert xy values (primaries) to XYZ. |
| 1052 | skcms_Matrix3x3 primaries = {{ |
| 1053 | { rx, gx, bx }, |
| 1054 | { ry, gy, by }, |
| 1055 | { 1 - rx - ry, 1 - gx - gy, 1 - bx - by }, |
| 1056 | }}; |
| 1057 | skcms_Matrix3x3 primaries_inv; |
| 1058 | if (!skcms_Matrix3x3_invert(&primaries, &primaries_inv)) { |
| 1059 | return false; |
| 1060 | } |
| 1061 | |
| 1062 | // Assumes that Y is 1.0f. |
| 1063 | skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } }; |
| 1064 | skcms_Vector3 XYZ = skcms_MV_mul(&primaries_inv, &wXYZ); |
| 1065 | |
| 1066 | skcms_Matrix3x3 toXYZ = {{ |
| 1067 | { XYZ.vals[0], 0, 0 }, |
| 1068 | { 0, XYZ.vals[1], 0 }, |
| 1069 | { 0, 0, XYZ.vals[2] }, |
| 1070 | }}; |
| 1071 | toXYZ = skcms_Matrix3x3_concat(&primaries, &toXYZ); |
| 1072 | |
| 1073 | // Now convert toXYZ matrix to toXYZD50. |
| 1074 | skcms_Vector3 wXYZD50 = { { 0.96422f, 1.0f, 0.82521f } }; |
| 1075 | |
| 1076 | // Calculate the chromatic adaptation matrix. We will use the Bradford method, thus |
| 1077 | // the matrices below. The Bradford method is used by Adobe and is widely considered |
| 1078 | // to be the best. |
| 1079 | skcms_Matrix3x3 xyz_to_lms = {{ |
| 1080 | { 0.8951f, 0.2664f, -0.1614f }, |
| 1081 | { -0.7502f, 1.7135f, 0.0367f }, |
| 1082 | { 0.0389f, -0.0685f, 1.0296f }, |
| 1083 | }}; |
| 1084 | skcms_Matrix3x3 lms_to_xyz = {{ |
| 1085 | { 0.9869929f, -0.1470543f, 0.1599627f }, |
| 1086 | { 0.4323053f, 0.5183603f, 0.0492912f }, |
| 1087 | { -0.0085287f, 0.0400428f, 0.9684867f }, |
| 1088 | }}; |
| 1089 | |
| 1090 | skcms_Vector3 srcCone = skcms_MV_mul(&xyz_to_lms, &wXYZ); |
| 1091 | skcms_Vector3 dstCone = skcms_MV_mul(&xyz_to_lms, &wXYZD50); |
| 1092 | |
| 1093 | skcms_Matrix3x3 DXtoD50 = {{ |
| 1094 | { dstCone.vals[0] / srcCone.vals[0], 0, 0 }, |
| 1095 | { 0, dstCone.vals[1] / srcCone.vals[1], 0 }, |
| 1096 | { 0, 0, dstCone.vals[2] / srcCone.vals[2] }, |
| 1097 | }}; |
| 1098 | DXtoD50 = skcms_Matrix3x3_concat(&DXtoD50, &xyz_to_lms); |
| 1099 | DXtoD50 = skcms_Matrix3x3_concat(&lms_to_xyz, &DXtoD50); |
| 1100 | |
| 1101 | *toXYZD50 = skcms_Matrix3x3_concat(&DXtoD50, &toXYZ); |
| 1102 | return true; |
| 1103 | } |
| 1104 | |
| 1105 | |
| 1106 | bool skcms_Matrix3x3_invert(const skcms_Matrix3x3* src, skcms_Matrix3x3* dst) { |
| 1107 | double a00 = src->vals[0][0], |
| 1108 | a01 = src->vals[1][0], |
| 1109 | a02 = src->vals[2][0], |
| 1110 | a10 = src->vals[0][1], |
| 1111 | a11 = src->vals[1][1], |
| 1112 | a12 = src->vals[2][1], |
| 1113 | a20 = src->vals[0][2], |
| 1114 | a21 = src->vals[1][2], |
| 1115 | a22 = src->vals[2][2]; |
| 1116 | |
| 1117 | double b0 = a00*a11 - a01*a10, |
| 1118 | b1 = a00*a12 - a02*a10, |
| 1119 | b2 = a01*a12 - a02*a11, |
| 1120 | b3 = a20, |
| 1121 | b4 = a21, |
| 1122 | b5 = a22; |
| 1123 | |
| 1124 | double determinant = b0*b5 |
| 1125 | - b1*b4 |
| 1126 | + b2*b3; |
| 1127 | |
| 1128 | if (determinant == 0) { |
| 1129 | return false; |
| 1130 | } |
| 1131 | |
| 1132 | double invdet = 1.0 / determinant; |
| 1133 | if (invdet > +FLT_MAX || invdet < -FLT_MAX || !isfinitef_((float)invdet)) { |
| 1134 | return false; |
| 1135 | } |
| 1136 | |
| 1137 | b0 *= invdet; |
| 1138 | b1 *= invdet; |
| 1139 | b2 *= invdet; |
| 1140 | b3 *= invdet; |
| 1141 | b4 *= invdet; |
| 1142 | b5 *= invdet; |
| 1143 | |
| 1144 | dst->vals[0][0] = (float)( a11*b5 - a12*b4 ); |
| 1145 | dst->vals[1][0] = (float)( a02*b4 - a01*b5 ); |
| 1146 | dst->vals[2][0] = (float)( + b2 ); |
| 1147 | dst->vals[0][1] = (float)( a12*b3 - a10*b5 ); |
| 1148 | dst->vals[1][1] = (float)( a00*b5 - a02*b3 ); |
| 1149 | dst->vals[2][1] = (float)( - b1 ); |
| 1150 | dst->vals[0][2] = (float)( a10*b4 - a11*b3 ); |
| 1151 | dst->vals[1][2] = (float)( a01*b3 - a00*b4 ); |
| 1152 | dst->vals[2][2] = (float)( + b0 ); |
| 1153 | |
| 1154 | for (int r = 0; r < 3; ++r) |
| 1155 | for (int c = 0; c < 3; ++c) { |
| 1156 | if (!isfinitef_(dst->vals[r][c])) { |
| 1157 | return false; |
| 1158 | } |
| 1159 | } |
| 1160 | return true; |
| 1161 | } |
| 1162 | |
| 1163 | skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3* A, const skcms_Matrix3x3* B) { |
| 1164 | skcms_Matrix3x3 m = { { { 0,0,0 },{ 0,0,0 },{ 0,0,0 } } }; |
| 1165 | for (int r = 0; r < 3; r++) |
| 1166 | for (int c = 0; c < 3; c++) { |
| 1167 | m.vals[r][c] = A->vals[r][0] * B->vals[0][c] |
| 1168 | + A->vals[r][1] * B->vals[1][c] |
| 1169 | + A->vals[r][2] * B->vals[2][c]; |
| 1170 | } |
| 1171 | return m; |
| 1172 | } |
| 1173 | |
| 1174 | skcms_Vector3 skcms_MV_mul(const skcms_Matrix3x3* m, const skcms_Vector3* v) { |
| 1175 | skcms_Vector3 dst = {{0,0,0}}; |
| 1176 | for (int row = 0; row < 3; ++row) { |
| 1177 | dst.vals[row] = m->vals[row][0] * v->vals[0] |
| 1178 | + m->vals[row][1] * v->vals[1] |
| 1179 | + m->vals[row][2] * v->vals[2]; |
| 1180 | } |
| 1181 | return dst; |
| 1182 | } |
| 1183 | |
| 1184 | #if defined(__clang__) || defined(__GNUC__) |
| 1185 | #define small_memcpy __builtin_memcpy |
| 1186 | #else |
| 1187 | #define small_memcpy memcpy |
| 1188 | #endif |
| 1189 | |
| 1190 | float log2f_(float x) { |
| 1191 | // The first approximation of log2(x) is its exponent 'e', minus 127. |
| 1192 | int32_t bits; |
| 1193 | small_memcpy(&bits, &x, sizeof(bits)); |
| 1194 | |
| 1195 | float e = (float)bits * (1.0f / (1<<23)); |
| 1196 | |
| 1197 | // If we use the mantissa too we can refine the error signficantly. |
| 1198 | int32_t m_bits = (bits & 0x007fffff) | 0x3f000000; |
| 1199 | float m; |
| 1200 | small_memcpy(&m, &m_bits, sizeof(m)); |
| 1201 | |
| 1202 | return (e - 124.225514990f |
| 1203 | - 1.498030302f*m |
| 1204 | - 1.725879990f/(0.3520887068f + m)); |
| 1205 | } |
| 1206 | |
| 1207 | float exp2f_(float x) { |
| 1208 | float fract = x - floorf_(x); |
| 1209 | |
| 1210 | float fbits = (1.0f * (1<<23)) * (x + 121.274057500f |
| 1211 | - 1.490129070f*fract |
| 1212 | + 27.728023300f/(4.84252568f - fract)); |
| 1213 | if (fbits > INT_MAX) { |
| 1214 | return INFINITY_; |
| 1215 | } else if (fbits < INT_MIN) { |
| 1216 | return -INFINITY_; |
| 1217 | } |
| 1218 | int32_t bits = (int32_t)fbits; |
| 1219 | small_memcpy(&x, &bits, sizeof(x)); |
| 1220 | return x; |
| 1221 | } |
| 1222 | |
| 1223 | float powf_(float x, float y) { |
| 1224 | return (x == 0) || (x == 1) ? x |
| 1225 | : exp2f_(log2f_(x) * y); |
| 1226 | } |
| 1227 | |
| 1228 | float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) { |
| 1229 | float sign = x < 0 ? -1.0f : 1.0f; |
| 1230 | x *= sign; |
| 1231 | |
| 1232 | return sign * (x < tf->d ? tf->c * x + tf->f |
| 1233 | : powf_(tf->a * x + tf->b, tf->g) + tf->e); |
| 1234 | } |
| 1235 | |
| 1236 | bool skcms_TransferFunction_isValid(const skcms_TransferFunction* tf) { |
| 1237 | // Reject obviously malformed inputs |
| 1238 | if (!isfinitef_(tf->a + tf->b + tf->c + tf->d + tf->e + tf->f + tf->g)) { |
| 1239 | return false; |
| 1240 | } |
| 1241 | |
| 1242 | // All of these parameters should be non-negative |
| 1243 | if (tf->a < 0 || tf->c < 0 || tf->d < 0 || tf->g < 0) { |
| 1244 | return false; |
| 1245 | } |
| 1246 | |
| 1247 | return true; |
| 1248 | } |
| 1249 | |
| 1250 | // TODO: Adjust logic here? This still assumes that purely linear inputs will have D > 1, which |
| 1251 | // we never generate. It also emits inverted linear using the same formulation. Standardize on |
| 1252 | // G == 1 here, too? |
| 1253 | bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) { |
| 1254 | // Original equation is: y = (ax + b)^g + e for x >= d |
| 1255 | // y = cx + f otherwise |
| 1256 | // |
| 1257 | // so 1st inverse is: (y - e)^(1/g) = ax + b |
| 1258 | // x = ((y - e)^(1/g) - b) / a |
| 1259 | // |
| 1260 | // which can be re-written as: x = (1/a)(y - e)^(1/g) - b/a |
| 1261 | // x = ((1/a)^g)^(1/g) * (y - e)^(1/g) - b/a |
| 1262 | // x = ([(1/a)^g]y + [-((1/a)^g)e]) ^ [1/g] + [-b/a] |
| 1263 | // |
| 1264 | // and 2nd inverse is: x = (y - f) / c |
| 1265 | // which can be re-written as: x = [1/c]y + [-f/c] |
| 1266 | // |
| 1267 | // and now both can be expressed in terms of the same parametric form as the |
| 1268 | // original - parameters are enclosed in square brackets. |
| 1269 | skcms_TransferFunction tf_inv = { 0, 0, 0, 0, 0, 0, 0 }; |
| 1270 | |
| 1271 | // This rejects obviously malformed inputs, as well as decreasing functions |
| 1272 | if (!skcms_TransferFunction_isValid(src)) { |
| 1273 | return false; |
| 1274 | } |
| 1275 | |
| 1276 | // There are additional constraints to be invertible |
| 1277 | bool has_nonlinear = (src->d <= 1); |
| 1278 | bool has_linear = (src->d > 0); |
| 1279 | |
| 1280 | // Is the linear section not invertible? |
| 1281 | if (has_linear && src->c == 0) { |
| 1282 | return false; |
| 1283 | } |
| 1284 | |
| 1285 | // Is the nonlinear section not invertible? |
| 1286 | if (has_nonlinear && (src->a == 0 || src->g == 0)) { |
| 1287 | return false; |
| 1288 | } |
| 1289 | |
| 1290 | // If both segments are present, they need to line up |
| 1291 | if (has_linear && has_nonlinear) { |
| 1292 | float l_at_d = src->c * src->d + src->f; |
| 1293 | float n_at_d = powf_(src->a * src->d + src->b, src->g) + src->e; |
| 1294 | if (fabsf_(l_at_d - n_at_d) > (1 / 512.0f)) { |
| 1295 | return false; |
| 1296 | } |
| 1297 | } |
| 1298 | |
| 1299 | // Invert linear segment |
| 1300 | if (has_linear) { |
| 1301 | tf_inv.c = 1.0f / src->c; |
| 1302 | tf_inv.f = -src->f / src->c; |
| 1303 | } |
| 1304 | |
| 1305 | // Invert nonlinear segment |
| 1306 | if (has_nonlinear) { |
| 1307 | tf_inv.g = 1.0f / src->g; |
| 1308 | tf_inv.a = powf_(1.0f / src->a, src->g); |
| 1309 | tf_inv.b = -tf_inv.a * src->e; |
| 1310 | tf_inv.e = -src->b / src->a; |
| 1311 | } |
| 1312 | |
| 1313 | if (!has_linear) { |
| 1314 | tf_inv.d = 0; |
| 1315 | } else if (!has_nonlinear) { |
| 1316 | // Any value larger than 1 works |
| 1317 | tf_inv.d = 2.0f; |
| 1318 | } else { |
| 1319 | tf_inv.d = src->c * src->d + src->f; |
| 1320 | } |
| 1321 | |
| 1322 | *dst = tf_inv; |
| 1323 | return true; |
| 1324 | } |
| 1325 | |
| 1326 | // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // |
| 1327 | |
| 1328 | // From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}: |
| 1329 | // |
| 1330 | // tf(x) = cx + f x < d |
| 1331 | // tf(x) = (ax + b)^g + e x ≥ d |
| 1332 | // |
| 1333 | // When fitting, we add the additional constraint that both pieces meet at d: |
| 1334 | // |
| 1335 | // cd + f = (ad + b)^g + e |
| 1336 | // |
| 1337 | // Solving for e and folding it through gives an alternate formulation of the non-linear piece: |
| 1338 | // |
| 1339 | // tf(x) = cx + f x < d |
| 1340 | // tf(x) = (ax + b)^g - (ad + b)^g + cd + f x ≥ d |
| 1341 | // |
| 1342 | // Our overall strategy is then: |
| 1343 | // For a couple tolerances, |
| 1344 | // - skcms_fit_linear(): fit c,d,f iteratively to as many points as our tolerance allows |
| 1345 | // - invert c,d,f |
| 1346 | // - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f |
| 1347 | // (and by constraint, inverted e) to the inverse of the table. |
| 1348 | // Return the parameters with least maximum error. |
| 1349 | // |
| 1350 | // To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals |
| 1351 | // of round-trip f_inv(x), the inverse of the non-linear piece of f(x). |
| 1352 | // |
| 1353 | // let y = Table(x) |
| 1354 | // r(x) = x - f_inv(y) |
| 1355 | // |
| 1356 | // ∂r/∂g = ln(ay + b)*(ay + b)^g |
| 1357 | // - ln(ad + b)*(ad + b)^g |
| 1358 | // ∂r/∂a = yg(ay + b)^(g-1) |
| 1359 | // - dg(ad + b)^(g-1) |
| 1360 | // ∂r/∂b = g(ay + b)^(g-1) |
| 1361 | // - g(ad + b)^(g-1) |
| 1362 | |
| 1363 | // Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P, |
| 1364 | // and fill out the gradient of the residual into dfdP. |
| 1365 | static float rg_nonlinear(float x, |
| 1366 | const skcms_Curve* curve, |
| 1367 | const skcms_TransferFunction* tf, |
| 1368 | const float P[3], |
| 1369 | float dfdP[3]) { |
| 1370 | const float y = skcms_eval_curve(curve, x); |
| 1371 | |
| 1372 | const float g = P[0], a = P[1], b = P[2], |
| 1373 | c = tf->c, d = tf->d, f = tf->f; |
| 1374 | |
| 1375 | const float Y = fmaxf_(a*y + b, 0.0f), |
| 1376 | D = a*d + b; |
| 1377 | assert (D >= 0); |
| 1378 | |
| 1379 | // The gradient. |
| 1380 | dfdP[0] = 0.69314718f*log2f_(Y)*powf_(Y, g) |
| 1381 | - 0.69314718f*log2f_(D)*powf_(D, g); |
| 1382 | dfdP[1] = y*g*powf_(Y, g-1) |
| 1383 | - d*g*powf_(D, g-1); |
| 1384 | dfdP[2] = g*powf_(Y, g-1) |
| 1385 | - g*powf_(D, g-1); |
| 1386 | |
| 1387 | // The residual. |
| 1388 | const float f_inv = powf_(Y, g) |
| 1389 | - powf_(D, g) |
| 1390 | + c*d + f; |
| 1391 | return x - f_inv; |
| 1392 | } |
| 1393 | |
| 1394 | int skcms_fit_linear(const skcms_Curve* curve, int N, float tol, float* c, float* d, float* f) { |
| 1395 | assert(N > 1); |
| 1396 | // We iteratively fit the first points to the TF's linear piece. |
| 1397 | // We want the cx + f line to pass through the first and last points we fit exactly. |
| 1398 | // |
| 1399 | // As we walk along the points we find the minimum and maximum slope of the line before the |
| 1400 | // error would exceed our tolerance. We stop when the range [slope_min, slope_max] becomes |
| 1401 | // emtpy, when we definitely can't add any more points. |
| 1402 | // |
| 1403 | // Some points' error intervals may intersect the running interval but not lie fully |
| 1404 | // within it. So we keep track of the last point we saw that is a valid end point candidate, |
| 1405 | // and once the search is done, back up to build the line through *that* point. |
| 1406 | const float dx = 1.0f / (N - 1); |
| 1407 | |
| 1408 | int lin_points = 1; |
| 1409 | *f = skcms_eval_curve(curve, 0); |
| 1410 | |
| 1411 | float slope_min = -INFINITY_; |
| 1412 | float slope_max = +INFINITY_; |
| 1413 | for (int i = 1; i < N; ++i) { |
| 1414 | float x = i * dx; |
| 1415 | float y = skcms_eval_curve(curve, x); |
| 1416 | |
| 1417 | float slope_max_i = (y + tol - *f) / x, |
| 1418 | slope_min_i = (y - tol - *f) / x; |
| 1419 | if (slope_max_i < slope_min || slope_max < slope_min_i) { |
| 1420 | // Slope intervals would no longer overlap. |
| 1421 | break; |
| 1422 | } |
| 1423 | slope_max = fminf_(slope_max, slope_max_i); |
| 1424 | slope_min = fmaxf_(slope_min, slope_min_i); |
| 1425 | |
| 1426 | float cur_slope = (y - *f) / x; |
| 1427 | if (slope_min <= cur_slope && cur_slope <= slope_max) { |
| 1428 | lin_points = i + 1; |
| 1429 | *c = cur_slope; |
| 1430 | } |
| 1431 | } |
| 1432 | |
| 1433 | // Set D to the last point that met our tolerance. |
| 1434 | *d = (lin_points - 1) * dx; |
| 1435 | return lin_points; |
| 1436 | } |
| 1437 | |
| 1438 | static bool gauss_newton_step(const skcms_Curve* curve, |
| 1439 | const skcms_TransferFunction* tf, |
| 1440 | float P[3], |
| 1441 | float x0, float dx, int N) { |
| 1442 | // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing. |
| 1443 | // |
| 1444 | // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P), |
| 1445 | // where r(P) is the residual vector |
| 1446 | // and Jf is the Jacobian matrix of f(), ∂r/∂P. |
| 1447 | // |
| 1448 | // Let's review the shape of each of these expressions: |
| 1449 | // r(P) is [N x 1], a column vector with one entry per value of x tested |
| 1450 | // Jf is [N x 3], a matrix with an entry for each (x,P) pair |
| 1451 | // Jf^T is [3 x N], the transpose of Jf |
| 1452 | // |
| 1453 | // Jf^T Jf is [3 x N] * [N x 3] == [3 x 3], a 3x3 matrix, |
| 1454 | // and so is its inverse (Jf^T Jf)^-1 |
| 1455 | // Jf^T r(P) is [3 x N] * [N x 1] == [3 x 1], a column vector with the same shape as P |
| 1456 | // |
| 1457 | // Our implementation strategy to get to the final ∆P is |
| 1458 | // 1) evaluate Jf^T Jf, call that lhs |
| 1459 | // 2) evaluate Jf^T r(P), call that rhs |
| 1460 | // 3) invert lhs |
| 1461 | // 4) multiply inverse lhs by rhs |
| 1462 | // |
| 1463 | // This is a friendly implementation strategy because we don't have to have any |
| 1464 | // buffers that scale with N, and equally nice don't have to perform any matrix |
| 1465 | // operations that are variable size. |
| 1466 | // |
| 1467 | // Other implementation strategies could trade this off, e.g. evaluating the |
| 1468 | // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by |
| 1469 | // the residuals. That would probably require implementing singular value |
| 1470 | // decomposition, and would create a [3 x N] matrix to be multiplied by the |
| 1471 | // [N x 1] residual vector, but on the upside I think that'd eliminate the |
| 1472 | // possibility of this gauss_newton_step() function ever failing. |
| 1473 | |
| 1474 | // 0) start off with lhs and rhs safely zeroed. |
| 1475 | skcms_Matrix3x3 lhs = {{ {0,0,0}, {0,0,0}, {0,0,0} }}; |
| 1476 | skcms_Vector3 rhs = { {0,0,0} }; |
| 1477 | |
| 1478 | // 1,2) evaluate lhs and evaluate rhs |
| 1479 | // We want to evaluate Jf only once, but both lhs and rhs involve Jf^T, |
| 1480 | // so we'll have to update lhs and rhs at the same time. |
| 1481 | for (int i = 0; i < N; i++) { |
| 1482 | float x = x0 + i*dx; |
| 1483 | |
| 1484 | float dfdP[3] = {0,0,0}; |
| 1485 | float resid = rg_nonlinear(x,curve,tf,P, dfdP); |
| 1486 | |
| 1487 | for (int r = 0; r < 3; r++) { |
| 1488 | for (int c = 0; c < 3; c++) { |
| 1489 | lhs.vals[r][c] += dfdP[r] * dfdP[c]; |
| 1490 | } |
| 1491 | rhs.vals[r] += dfdP[r] * resid; |
| 1492 | } |
| 1493 | } |
| 1494 | |
| 1495 | // If any of the 3 P parameters are unused, this matrix will be singular. |
| 1496 | // Detect those cases and fix them up to indentity instead, so we can invert. |
| 1497 | for (int k = 0; k < 3; k++) { |
| 1498 | if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 && |
| 1499 | lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0) { |
| 1500 | lhs.vals[k][k] = 1; |
| 1501 | } |
| 1502 | } |
| 1503 | |
| 1504 | // 3) invert lhs |
| 1505 | skcms_Matrix3x3 lhs_inv; |
| 1506 | if (!skcms_Matrix3x3_invert(&lhs, &lhs_inv)) { |
| 1507 | return false; |
| 1508 | } |
| 1509 | |
| 1510 | // 4) multiply inverse lhs by rhs |
| 1511 | skcms_Vector3 dP = skcms_MV_mul(&lhs_inv, &rhs); |
| 1512 | P[0] += dP.vals[0]; |
| 1513 | P[1] += dP.vals[1]; |
| 1514 | P[2] += dP.vals[2]; |
| 1515 | return isfinitef_(P[0]) && isfinitef_(P[1]) && isfinitef_(P[2]); |
| 1516 | } |
| 1517 | |
| 1518 | |
| 1519 | // Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't. |
| 1520 | static bool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) { |
| 1521 | float P[3] = { tf->g, tf->a, tf->b }; |
| 1522 | |
| 1523 | // No matter where we start, dx should always represent N even steps from 0 to 1. |
| 1524 | const float dx = 1.0f / (N-1); |
| 1525 | |
| 1526 | for (int j = 0; j < 3/*TODO: tune*/; j++) { |
| 1527 | // These extra constraints a >= 0 and ad+b >= 0 are not modeled in the optimization. |
| 1528 | // We don't really know how to fix up a if it goes negative. |
| 1529 | if (P[1] < 0) { |
| 1530 | return false; |
| 1531 | } |
| 1532 | // If ad+b goes negative, we feel just barely not uneasy enough to tweak b so ad+b is zero. |
| 1533 | if (P[1] * tf->d + P[2] < 0) { |
| 1534 | P[2] = -P[1] * tf->d; |
| 1535 | } |
| 1536 | assert (P[1] >= 0 && |
| 1537 | P[1] * tf->d + P[2] >= 0); |
| 1538 | |
| 1539 | if (!gauss_newton_step(curve, tf, |
| 1540 | P, |
| 1541 | L*dx, dx, N-L)) { |
| 1542 | return false; |
| 1543 | } |
| 1544 | } |
| 1545 | |
| 1546 | // We need to apply our fixups one last time |
| 1547 | if (P[1] < 0) { |
| 1548 | return false; |
| 1549 | } |
| 1550 | if (P[1] * tf->d + P[2] < 0) { |
| 1551 | P[2] = -P[1] * tf->d; |
| 1552 | } |
| 1553 | |
| 1554 | tf->g = P[0]; |
| 1555 | tf->a = P[1]; |
| 1556 | tf->b = P[2]; |
| 1557 | tf->e = tf->c*tf->d + tf->f |
| 1558 | - powf_(tf->a*tf->d + tf->b, tf->g); |
| 1559 | return true; |
| 1560 | } |
| 1561 | |
| 1562 | bool skcms_ApproximateCurve(const skcms_Curve* curve, |
| 1563 | skcms_TransferFunction* approx, |
| 1564 | float* max_error) { |
| 1565 | if (!curve || !approx || !max_error) { |
| 1566 | return false; |
| 1567 | } |
| 1568 | |
| 1569 | if (curve->table_entries == 0) { |
| 1570 | // No point approximating an skcms_TransferFunction with an skcms_TransferFunction! |
| 1571 | return false; |
| 1572 | } |
| 1573 | |
| 1574 | if (curve->table_entries == 1 || curve->table_entries > (uint32_t)INT_MAX) { |
| 1575 | // We need at least two points, and must put some reasonable cap on the maximum number. |
| 1576 | return false; |
| 1577 | } |
| 1578 | |
| 1579 | int N = (int)curve->table_entries; |
| 1580 | const float dx = 1.0f / (N - 1); |
| 1581 | |
| 1582 | *max_error = INFINITY_; |
| 1583 | const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f }; |
| 1584 | for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) { |
| 1585 | skcms_TransferFunction tf, |
| 1586 | tf_inv; |
| 1587 | int L = skcms_fit_linear(curve, N, kTolerances[t], &tf.c, &tf.d, &tf.f); |
| 1588 | |
| 1589 | if (L == N) { |
| 1590 | // If the entire data set was linear, move the coefficients to the nonlinear portion |
| 1591 | // with G == 1. This lets use a canonical representation with d == 0. |
| 1592 | tf.g = 1; |
| 1593 | tf.a = tf.c; |
| 1594 | tf.b = tf.f; |
| 1595 | tf.c = tf.d = tf.e = tf.f = 0; |
| 1596 | } else if (L == N - 1) { |
| 1597 | // Degenerate case with only two points in the nonlinear segment. Solve directly. |
| 1598 | tf.g = 1; |
| 1599 | tf.a = (skcms_eval_curve(curve, (N-1)*dx) - |
| 1600 | skcms_eval_curve(curve, (N-2)*dx)) |
| 1601 | / dx; |
| 1602 | tf.b = skcms_eval_curve(curve, (N-2)*dx) |
| 1603 | - tf.a * (N-2)*dx; |
| 1604 | tf.e = 0; |
| 1605 | } else { |
| 1606 | // Start by guessing a gamma-only curve through the midpoint. |
| 1607 | int mid = (L + N) / 2; |
| 1608 | float mid_x = mid / (N - 1.0f); |
| 1609 | float mid_y = skcms_eval_curve(curve, mid_x); |
| 1610 | tf.g = log2f_(mid_y) / log2f_(mid_x);; |
| 1611 | tf.a = 1; |
| 1612 | tf.b = 0; |
| 1613 | tf.e = tf.c*tf.d + tf.f |
| 1614 | - powf_(tf.a*tf.d + tf.b, tf.g); |
| 1615 | |
| 1616 | |
| 1617 | if (!skcms_TransferFunction_invert(&tf, &tf_inv) || |
| 1618 | !fit_nonlinear(curve, L,N, &tf_inv)) { |
| 1619 | continue; |
| 1620 | } |
| 1621 | |
| 1622 | // We fit tf_inv, so calculate tf to keep in sync. |
| 1623 | if (!skcms_TransferFunction_invert(&tf_inv, &tf)) { |
| 1624 | continue; |
| 1625 | } |
| 1626 | } |
| 1627 | |
| 1628 | // We find our error by roundtripping the table through tf_inv. |
| 1629 | // |
| 1630 | // (The most likely use case for this approximation is to be inverted and |
| 1631 | // used as the transfer function for a destination color space.) |
| 1632 | // |
| 1633 | // We've kept tf and tf_inv in sync above, but we can't guarantee that tf is |
| 1634 | // invertible, so re-verify that here (and use the new inverse for testing). |
| 1635 | if (!skcms_TransferFunction_invert(&tf, &tf_inv)) { |
| 1636 | continue; |
| 1637 | } |
| 1638 | |
| 1639 | float err = skcms_MaxRoundtripError(curve, &tf_inv); |
| 1640 | if (*max_error > err) { |
| 1641 | *max_error = err; |
| 1642 | *approx = tf; |
| 1643 | } |
| 1644 | } |
| 1645 | return isfinitef_(*max_error); |
| 1646 | } |
| 1647 | |
| 1648 | // Without this wasm would try to use the N=4 128-bit vector code path, |
| 1649 | // which while ideal, causes tons of compiler problems. This would be |
| 1650 | // a good thing to revisit as emcc matures (currently 1.38.5). |
| 1651 | #if 1 && defined(__EMSCRIPTEN_major__) |
| 1652 | #if !defined(SKCMS_PORTABLE) |
| 1653 | #define SKCMS_PORTABLE |
| 1654 | #endif |
| 1655 | #endif |
| 1656 | |
| 1657 | extern bool g_skcms_dump_profile; |
| 1658 | bool g_skcms_dump_profile = false; |
| 1659 | |
| 1660 | #if !defined(NDEBUG) && defined(__clang__) |
| 1661 | // Basic profiling tools to time each Op. Not at all thread safe. |
| 1662 | |
| 1663 | #include <stdio.h> |
| 1664 | #include <stdlib.h> |
| 1665 | |
| 1666 | #if defined(__arm__) || defined(__aarch64__) |
| 1667 | #include <time.h> |
| 1668 | static const char* now_units = "ticks"; |
| 1669 | static uint64_t now() { return (uint64_t)clock(); } |
| 1670 | #else |
| 1671 | static const char* now_units = "cycles"; |
| 1672 | static uint64_t now() { return __builtin_readcyclecounter(); } |
| 1673 | #endif |
| 1674 | |
| 1675 | #define M(op) +1 |
| 1676 | static uint64_t counts[FOREACH_Op(M)]; |
| 1677 | #undef M |
| 1678 | |
| 1679 | static void profile_dump_stats() { |
| 1680 | #define M(op) #op, |
| 1681 | static const char* names[] = { FOREACH_Op(M) }; |
| 1682 | #undef M |
| 1683 | for (int i = 0; i < ARRAY_COUNT(counts); i++) { |
| 1684 | if (counts[i]) { |
| 1685 | fprintf(stderr, "%16s: %12llu %s\n", |
| 1686 | names[i], (unsigned long long)counts[i], now_units); |
| 1687 | } |
| 1688 | } |
| 1689 | } |
| 1690 | |
| 1691 | static inline Op profile_next_op(Op op) { |
| 1692 | if (__builtin_expect(g_skcms_dump_profile, false)) { |
| 1693 | static uint64_t start = 0; |
| 1694 | static uint64_t* current = NULL; |
| 1695 | |
| 1696 | if (!current) { |
| 1697 | atexit(profile_dump_stats); |
| 1698 | } else { |
| 1699 | *current += now() - start; |
| 1700 | } |
| 1701 | |
| 1702 | current = &counts[op]; |
| 1703 | start = now(); |
| 1704 | } |
| 1705 | return op; |
| 1706 | } |
| 1707 | #else |
| 1708 | static inline Op profile_next_op(Op op) { |
| 1709 | (void)g_skcms_dump_profile; |
| 1710 | return op; |
| 1711 | } |
| 1712 | #endif |
| 1713 | |
| 1714 | #if defined(__clang__) |
| 1715 | typedef float __attribute__((ext_vector_type(4))) Fx4; |
| 1716 | typedef int32_t __attribute__((ext_vector_type(4))) I32x4; |
| 1717 | typedef uint64_t __attribute__((ext_vector_type(4))) U64x4; |
| 1718 | typedef uint32_t __attribute__((ext_vector_type(4))) U32x4; |
| 1719 | typedef uint16_t __attribute__((ext_vector_type(4))) U16x4; |
| 1720 | typedef uint8_t __attribute__((ext_vector_type(4))) U8x4; |
| 1721 | |
| 1722 | typedef float __attribute__((ext_vector_type(8))) Fx8; |
| 1723 | typedef int32_t __attribute__((ext_vector_type(8))) I32x8; |
| 1724 | typedef uint64_t __attribute__((ext_vector_type(8))) U64x8; |
| 1725 | typedef uint32_t __attribute__((ext_vector_type(8))) U32x8; |
| 1726 | typedef uint16_t __attribute__((ext_vector_type(8))) U16x8; |
| 1727 | typedef uint8_t __attribute__((ext_vector_type(8))) U8x8; |
| 1728 | |
| 1729 | typedef float __attribute__((ext_vector_type(16))) Fx16; |
| 1730 | typedef int32_t __attribute__((ext_vector_type(16))) I32x16; |
| 1731 | typedef uint64_t __attribute__((ext_vector_type(16))) U64x16; |
| 1732 | typedef uint32_t __attribute__((ext_vector_type(16))) U32x16; |
| 1733 | typedef uint16_t __attribute__((ext_vector_type(16))) U16x16; |
| 1734 | typedef uint8_t __attribute__((ext_vector_type(16))) U8x16; |
| 1735 | #elif defined(__GNUC__) |
| 1736 | typedef float __attribute__((vector_size(16))) Fx4; |
| 1737 | typedef int32_t __attribute__((vector_size(16))) I32x4; |
| 1738 | typedef uint64_t __attribute__((vector_size(32))) U64x4; |
| 1739 | typedef uint32_t __attribute__((vector_size(16))) U32x4; |
| 1740 | typedef uint16_t __attribute__((vector_size( 8))) U16x4; |
| 1741 | typedef uint8_t __attribute__((vector_size( 4))) U8x4; |
| 1742 | |
| 1743 | typedef float __attribute__((vector_size(32))) Fx8; |
| 1744 | typedef int32_t __attribute__((vector_size(32))) I32x8; |
| 1745 | typedef uint64_t __attribute__((vector_size(64))) U64x8; |
| 1746 | typedef uint32_t __attribute__((vector_size(32))) U32x8; |
| 1747 | typedef uint16_t __attribute__((vector_size(16))) U16x8; |
| 1748 | typedef uint8_t __attribute__((vector_size( 8))) U8x8; |
| 1749 | |
| 1750 | typedef float __attribute__((vector_size( 64))) Fx16; |
| 1751 | typedef int32_t __attribute__((vector_size( 64))) I32x16; |
| 1752 | typedef uint64_t __attribute__((vector_size(128))) U64x16; |
| 1753 | typedef uint32_t __attribute__((vector_size( 64))) U32x16; |
| 1754 | typedef uint16_t __attribute__((vector_size( 32))) U16x16; |
| 1755 | typedef uint8_t __attribute__((vector_size( 16))) U8x16; |
| 1756 | #endif |
| 1757 | |
| 1758 | // First, instantiate our default exec_ops() implementation using the default compiliation target. |
| 1759 | |
| 1760 | #if defined(SKCMS_PORTABLE) || !(defined(__clang__) || defined(__GNUC__)) |
| 1761 | #define N 1 |
| 1762 | |
| 1763 | #define F float |
| 1764 | #define U64 uint64_t |
| 1765 | #define U32 uint32_t |
| 1766 | #define I32 int32_t |
| 1767 | #define U16 uint16_t |
| 1768 | #define U8 uint8_t |
| 1769 | |
| 1770 | #define F0 0.0f |
| 1771 | #define F1 1.0f |
| 1772 | |
| 1773 | #elif defined(__AVX512F__) |
| 1774 | #define N 16 |
| 1775 | |
| 1776 | #define F Fx16 |
| 1777 | #define U64 U64x16 |
| 1778 | #define U32 U32x16 |
| 1779 | #define I32 I32x16 |
| 1780 | #define U16 U16x16 |
| 1781 | #define U8 U8x16 |
| 1782 | |
| 1783 | #define F0 (F){0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0} |
| 1784 | #define F1 (F){1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1} |
| 1785 | #elif defined(__AVX__) |
| 1786 | #define N 8 |
| 1787 | |
| 1788 | #define F Fx8 |
| 1789 | #define U64 U64x8 |
| 1790 | #define U32 U32x8 |
| 1791 | #define I32 I32x8 |
| 1792 | #define U16 U16x8 |
| 1793 | #define U8 U8x8 |
| 1794 | |
| 1795 | #define F0 (F){0,0,0,0, 0,0,0,0} |
| 1796 | #define F1 (F){1,1,1,1, 1,1,1,1} |
| 1797 | #else |
| 1798 | #define N 4 |
| 1799 | |
| 1800 | #define F Fx4 |
| 1801 | #define U64 U64x4 |
| 1802 | #define U32 U32x4 |
| 1803 | #define I32 I32x4 |
| 1804 | #define U16 U16x4 |
| 1805 | #define U8 U8x4 |
| 1806 | |
| 1807 | #define F0 (F){0,0,0,0} |
| 1808 | #define F1 (F){1,1,1,1} |
| 1809 | #endif |
| 1810 | |
| 1811 | #define NS(id) id |
| 1812 | #define ATTR |
| 1813 | #include "src/Transform_inl.h" |
| 1814 | #undef N |
| 1815 | #undef F |
| 1816 | #undef U64 |
| 1817 | #undef U32 |
| 1818 | #undef I32 |
| 1819 | #undef U16 |
| 1820 | #undef U8 |
| 1821 | #undef F0 |
| 1822 | #undef F1 |
| 1823 | #undef NS |
| 1824 | #undef ATTR |
| 1825 | |
| 1826 | // Now, instantiate any other versions of run_program() we may want for runtime detection. |
| 1827 | #if !defined(SKCMS_PORTABLE) && (defined(__clang__) || defined(__GNUC__)) \ |
| 1828 | && defined(__x86_64__) && !defined(__AVX2__) |
| 1829 | #define N 8 |
| 1830 | #define F Fx8 |
| 1831 | #define U64 U64x8 |
| 1832 | #define U32 U32x8 |
| 1833 | #define I32 I32x8 |
| 1834 | #define U16 U16x8 |
| 1835 | #define U8 U8x8 |
| 1836 | #define F0 (F){0,0,0,0, 0,0,0,0} |
| 1837 | #define F1 (F){1,1,1,1, 1,1,1,1} |
| 1838 | |
| 1839 | #define NS(id) id ## _hsw |
| 1840 | #define ATTR __attribute__((target("avx2,f16c"))) |
| 1841 | |
| 1842 | // We check these guards to see if we have support for these features. |
| 1843 | // They're likely _not_ defined here in our baseline build config. |
| 1844 | #ifndef __AVX__ |
| 1845 | #define __AVX__ 1 |
| 1846 | #define UNDEF_AVX |
| 1847 | #endif |
| 1848 | #ifndef __F16C__ |
| 1849 | #define __F16C__ 1 |
| 1850 | #define UNDEF_F16C |
| 1851 | #endif |
| 1852 | #ifndef __AVX2__ |
| 1853 | #define __AVX2__ 1 |
| 1854 | #define UNDEF_AVX2 |
| 1855 | #endif |
| 1856 | |
| 1857 | #include "src/Transform_inl.h" |
| 1858 | |
| 1859 | #undef N |
| 1860 | #undef F |
| 1861 | #undef U64 |
| 1862 | #undef U32 |
| 1863 | #undef I32 |
| 1864 | #undef U16 |
| 1865 | #undef U8 |
| 1866 | #undef F0 |
| 1867 | #undef F1 |
| 1868 | #undef NS |
| 1869 | #undef ATTR |
| 1870 | |
| 1871 | #ifdef UNDEF_AVX |
| 1872 | #undef __AVX__ |
| 1873 | #undef UNDEF_AVX |
| 1874 | #endif |
| 1875 | #ifdef UNDEF_F16C |
| 1876 | #undef __F16C__ |
| 1877 | #undef UNDEF_F16C |
| 1878 | #endif |
| 1879 | #ifdef UNDEF_AVX2 |
| 1880 | #undef __AVX2__ |
| 1881 | #undef UNDEF_AVX2 |
| 1882 | #endif |
| 1883 | |
| 1884 | #define TEST_FOR_HSW |
| 1885 | |
| 1886 | static bool hsw_ok_ = false; |
| 1887 | static void check_hsw_ok() { |
| 1888 | // See http://www.sandpile.org/x86/cpuid.htm |
| 1889 | |
| 1890 | // First, a basic cpuid(1). |
| 1891 | uint32_t eax, ebx, ecx, edx; |
| 1892 | __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) |
| 1893 | : "0"(1), "2"(0)); |
| 1894 | |
| 1895 | // Sanity check for prerequisites. |
| 1896 | if ((edx & (1<<25)) != (1<<25)) { return; } // SSE |
| 1897 | if ((edx & (1<<26)) != (1<<26)) { return; } // SSE2 |
| 1898 | if ((ecx & (1<< 0)) != (1<< 0)) { return; } // SSE3 |
| 1899 | if ((ecx & (1<< 9)) != (1<< 9)) { return; } // SSSE3 |
| 1900 | if ((ecx & (1<<19)) != (1<<19)) { return; } // SSE4.1 |
| 1901 | if ((ecx & (1<<20)) != (1<<20)) { return; } // SSE4.2 |
| 1902 | |
| 1903 | if ((ecx & (3<<26)) != (3<<26)) { return; } // XSAVE + OSXSAVE |
| 1904 | |
| 1905 | { |
| 1906 | uint32_t eax_xgetbv, edx_xgetbv; |
| 1907 | __asm__ __volatile__("xgetbv" : "=a"(eax_xgetbv), "=d"(edx_xgetbv) : "c"(0)); |
| 1908 | if ((eax_xgetbv & (3<<1)) != (3<<1)) { return; } // XMM+YMM state saved? |
| 1909 | } |
| 1910 | |
| 1911 | if ((ecx & (1<<28)) != (1<<28)) { return; } // AVX |
| 1912 | if ((ecx & (1<<29)) != (1<<29)) { return; } // F16C |
| 1913 | if ((ecx & (1<<12)) != (1<<12)) { return; } // FMA (TODO: not currently used) |
| 1914 | |
| 1915 | // Call cpuid(7) to check for our final AVX2 feature bit! |
| 1916 | __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) |
| 1917 | : "0"(7), "2"(0)); |
| 1918 | if ((ebx & (1<< 5)) != (1<< 5)) { return; } // AVX2 |
| 1919 | |
| 1920 | hsw_ok_ = true; |
| 1921 | } |
| 1922 | |
| 1923 | #if defined(_MSC_VER) |
| 1924 | #include <Windows.h> |
| 1925 | INIT_ONCE check_hsw_ok_once = INIT_ONCE_STATIC_INIT; |
| 1926 | |
| 1927 | static BOOL check_hsw_ok_InitOnce_wrapper(INIT_ONCE* once, void* param, void** ctx) { |
| 1928 | (void)once; |
| 1929 | (void)param; |
| 1930 | (void)ctx; |
| 1931 | check_hsw_ok(); |
| 1932 | return TRUE; |
| 1933 | } |
| 1934 | |
| 1935 | static bool hsw_ok() { |
| 1936 | InitOnceExecuteOnce(&check_hsw_ok_once, check_hsw_ok_InitOnce_wrapper, NULL, NULL); |
| 1937 | return hsw_ok_; |
| 1938 | } |
| 1939 | #else |
| 1940 | #include <pthread.h> |
| 1941 | static pthread_once_t check_hsw_ok_once = PTHREAD_ONCE_INIT; |
| 1942 | |
| 1943 | static bool hsw_ok() { |
| 1944 | pthread_once(&check_hsw_ok_once, check_hsw_ok); |
| 1945 | return hsw_ok_; |
| 1946 | } |
| 1947 | #endif |
| 1948 | |
| 1949 | #endif |
| 1950 | |
| 1951 | static bool is_identity_tf(const skcms_TransferFunction* tf) { |
| 1952 | return tf->g == 1 && tf->a == 1 |
| 1953 | && tf->b == 0 && tf->c == 0 && tf->d == 0 && tf->e == 0 && tf->f == 0; |
| 1954 | } |
| 1955 | |
| 1956 | typedef struct { |
| 1957 | Op op; |
| 1958 | const void* arg; |
| 1959 | } OpAndArg; |
| 1960 | |
| 1961 | static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) { |
| 1962 | static const struct { Op parametric, table_8, table_16; } ops[] = { |
| 1963 | { Op_tf_r, Op_table_8_r, Op_table_16_r }, |
| 1964 | { Op_tf_g, Op_table_8_g, Op_table_16_g }, |
| 1965 | { Op_tf_b, Op_table_8_b, Op_table_16_b }, |
| 1966 | { Op_tf_a, Op_table_8_a, Op_table_16_a }, |
| 1967 | }; |
| 1968 | |
| 1969 | if (curve->table_entries == 0) { |
| 1970 | return is_identity_tf(&curve->parametric) |
| 1971 | ? (OpAndArg){ Op_noop, NULL } |
| 1972 | : (OpAndArg){ ops[channel].parametric, &curve->parametric }; |
| 1973 | } else if (curve->table_8) { |
| 1974 | return (OpAndArg){ ops[channel].table_8, curve }; |
| 1975 | } else if (curve->table_16) { |
| 1976 | return (OpAndArg){ ops[channel].table_16, curve }; |
| 1977 | } |
| 1978 | |
| 1979 | assert(false); |
| 1980 | return (OpAndArg){Op_noop,NULL}; |
| 1981 | } |
| 1982 | |
| 1983 | static size_t bytes_per_pixel(skcms_PixelFormat fmt) { |
| 1984 | switch (fmt >> 1) { // ignore rgb/bgr |
| 1985 | case skcms_PixelFormat_A_8 >> 1: return 1; |
| 1986 | case skcms_PixelFormat_G_8 >> 1: return 1; |
| 1987 | case skcms_PixelFormat_ABGR_4444 >> 1: return 2; |
| 1988 | case skcms_PixelFormat_RGB_565 >> 1: return 2; |
| 1989 | case skcms_PixelFormat_RGB_888 >> 1: return 3; |
| 1990 | case skcms_PixelFormat_RGBA_8888 >> 1: return 4; |
| 1991 | case skcms_PixelFormat_RGBA_1010102 >> 1: return 4; |
| 1992 | case skcms_PixelFormat_RGB_161616 >> 1: return 6; |
| 1993 | case skcms_PixelFormat_RGBA_16161616 >> 1: return 8; |
| 1994 | case skcms_PixelFormat_RGB_hhh >> 1: return 6; |
| 1995 | case skcms_PixelFormat_RGBA_hhhh >> 1: return 8; |
| 1996 | case skcms_PixelFormat_RGB_fff >> 1: return 12; |
| 1997 | case skcms_PixelFormat_RGBA_ffff >> 1: return 16; |
| 1998 | } |
| 1999 | assert(false); |
| 2000 | return 0; |
| 2001 | } |
| 2002 | |
| 2003 | static bool prep_for_destination(const skcms_ICCProfile* profile, |
| 2004 | skcms_Matrix3x3* fromXYZD50, |
| 2005 | skcms_TransferFunction* invR, |
| 2006 | skcms_TransferFunction* invG, |
| 2007 | skcms_TransferFunction* invB) { |
| 2008 | // We only support destinations with parametric transfer functions |
| 2009 | // and with gamuts that can be transformed from XYZD50. |
| 2010 | return profile->has_trc |
| 2011 | && profile->has_toXYZD50 |
| 2012 | && profile->trc[0].table_entries == 0 |
| 2013 | && profile->trc[1].table_entries == 0 |
| 2014 | && profile->trc[2].table_entries == 0 |
| 2015 | && skcms_TransferFunction_invert(&profile->trc[0].parametric, invR) |
| 2016 | && skcms_TransferFunction_invert(&profile->trc[1].parametric, invG) |
| 2017 | && skcms_TransferFunction_invert(&profile->trc[2].parametric, invB) |
| 2018 | && skcms_Matrix3x3_invert(&profile->toXYZD50, fromXYZD50); |
| 2019 | } |
| 2020 | |
| 2021 | bool skcms_Transform(const void* src, |
| 2022 | skcms_PixelFormat srcFmt, |
| 2023 | skcms_AlphaFormat srcAlpha, |
| 2024 | const skcms_ICCProfile* srcProfile, |
| 2025 | void* dst, |
| 2026 | skcms_PixelFormat dstFmt, |
| 2027 | skcms_AlphaFormat dstAlpha, |
| 2028 | const skcms_ICCProfile* dstProfile, |
| 2029 | size_t nz) { |
| 2030 | const size_t dst_bpp = bytes_per_pixel(dstFmt), |
| 2031 | src_bpp = bytes_per_pixel(srcFmt); |
| 2032 | // Let's just refuse if the request is absurdly big. |
| 2033 | if (nz * dst_bpp > INT_MAX || nz * src_bpp > INT_MAX) { |
| 2034 | return false; |
| 2035 | } |
| 2036 | int n = (int)nz; |
| 2037 | |
| 2038 | // Null profiles default to sRGB. Passing null for both is handy when doing format conversion. |
| 2039 | if (!srcProfile) { |
| 2040 | srcProfile = skcms_sRGB_profile(); |
| 2041 | } |
| 2042 | if (!dstProfile) { |
| 2043 | dstProfile = skcms_sRGB_profile(); |
| 2044 | } |
| 2045 | |
| 2046 | // We can't transform in place unless the PixelFormats are the same size. |
| 2047 | if (dst == src && (dstFmt >> 1) != (srcFmt >> 1)) { |
| 2048 | return false; |
| 2049 | } |
| 2050 | // TODO: this check lazilly disallows U16 <-> F16, but that would actually be fine. |
| 2051 | // TODO: more careful alias rejection (like, dst == src + 1)? |
| 2052 | |
| 2053 | Op program [32]; |
| 2054 | const void* arguments[32]; |
| 2055 | |
| 2056 | Op* ops = program; |
| 2057 | const void** args = arguments; |
| 2058 | |
| 2059 | skcms_TransferFunction inv_dst_tf_r, inv_dst_tf_g, inv_dst_tf_b; |
| 2060 | skcms_Matrix3x3 from_xyz; |
| 2061 | |
| 2062 | switch (srcFmt >> 1) { |
| 2063 | default: return false; |
| 2064 | case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_load_a8; break; |
| 2065 | case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_load_g8; break; |
| 2066 | case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_load_4444; break; |
| 2067 | case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_load_565; break; |
| 2068 | case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_load_888; break; |
| 2069 | case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_load_8888; break; |
| 2070 | case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_load_1010102; break; |
| 2071 | case skcms_PixelFormat_RGB_161616 >> 1: *ops++ = Op_load_161616; break; |
| 2072 | case skcms_PixelFormat_RGBA_16161616 >> 1: *ops++ = Op_load_16161616; break; |
| 2073 | case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_load_hhh; break; |
| 2074 | case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_load_hhhh; break; |
| 2075 | case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_load_fff; break; |
| 2076 | case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_load_ffff; break; |
| 2077 | } |
| 2078 | if (srcFmt & 1) { |
| 2079 | *ops++ = Op_swap_rb; |
| 2080 | } |
| 2081 | skcms_ICCProfile gray_dst_profile; |
| 2082 | if ((dstFmt >> 1) == (skcms_PixelFormat_G_8 >> 1)) { |
| 2083 | // When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform |
| 2084 | // luminance (Y) by the destination transfer function. |
| 2085 | gray_dst_profile = *dstProfile; |
| 2086 | skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50); |
| 2087 | dstProfile = &gray_dst_profile; |
| 2088 | } |
| 2089 | |
| 2090 | if (srcProfile->data_color_space == skcms_Signature_CMYK) { |
| 2091 | // Photoshop creates CMYK images as inverse CMYK. |
| 2092 | // These happen to be the only ones we've _ever_ seen. |
| 2093 | *ops++ = Op_invert; |
| 2094 | // With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K. |
| 2095 | srcAlpha = skcms_AlphaFormat_Unpremul; |
| 2096 | } |
| 2097 | |
| 2098 | if (srcAlpha == skcms_AlphaFormat_Opaque) { |
| 2099 | *ops++ = Op_force_opaque; |
| 2100 | } else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) { |
| 2101 | *ops++ = Op_unpremul; |
| 2102 | } |
| 2103 | |
| 2104 | // TODO: We can skip this work if both srcAlpha and dstAlpha are PremulLinear, and the profiles |
| 2105 | // are the same. Also, if dstAlpha is PremulLinear, and SrcAlpha is Opaque. |
| 2106 | if (dstProfile != srcProfile || |
| 2107 | srcAlpha == skcms_AlphaFormat_PremulLinear || |
| 2108 | dstAlpha == skcms_AlphaFormat_PremulLinear) { |
| 2109 | |
| 2110 | if (!prep_for_destination(dstProfile, |
| 2111 | &from_xyz, &inv_dst_tf_r, &inv_dst_tf_b, &inv_dst_tf_g)) { |
| 2112 | return false; |
| 2113 | } |
| 2114 | |
| 2115 | if (srcProfile->has_A2B) { |
| 2116 | if (srcProfile->A2B.input_channels) { |
| 2117 | for (int i = 0; i < (int)srcProfile->A2B.input_channels; i++) { |
| 2118 | OpAndArg oa = select_curve_op(&srcProfile->A2B.input_curves[i], i); |
| 2119 | if (oa.op != Op_noop) { |
| 2120 | *ops++ = oa.op; |
| 2121 | *args++ = oa.arg; |
| 2122 | } |
| 2123 | } |
| 2124 | switch (srcProfile->A2B.input_channels) { |
| 2125 | case 3: *ops++ = srcProfile->A2B.grid_8 ? Op_clut_3D_8 : Op_clut_3D_16; break; |
| 2126 | case 4: *ops++ = srcProfile->A2B.grid_8 ? Op_clut_4D_8 : Op_clut_4D_16; break; |
| 2127 | default: return false; |
| 2128 | } |
| 2129 | *args++ = &srcProfile->A2B; |
| 2130 | } |
| 2131 | |
| 2132 | if (srcProfile->A2B.matrix_channels == 3) { |
| 2133 | for (int i = 0; i < 3; i++) { |
| 2134 | OpAndArg oa = select_curve_op(&srcProfile->A2B.matrix_curves[i], i); |
| 2135 | if (oa.op != Op_noop) { |
| 2136 | *ops++ = oa.op; |
| 2137 | *args++ = oa.arg; |
| 2138 | } |
| 2139 | } |
| 2140 | |
| 2141 | static const skcms_Matrix3x4 I = {{ |
| 2142 | {1,0,0,0}, |
| 2143 | {0,1,0,0}, |
| 2144 | {0,0,1,0}, |
| 2145 | }}; |
| 2146 | if (0 != memcmp(&I, &srcProfile->A2B.matrix, sizeof(I))) { |
| 2147 | *ops++ = Op_matrix_3x4; |
| 2148 | *args++ = &srcProfile->A2B.matrix; |
| 2149 | } |
| 2150 | } |
| 2151 | |
| 2152 | if (srcProfile->A2B.output_channels == 3) { |
| 2153 | for (int i = 0; i < 3; i++) { |
| 2154 | OpAndArg oa = select_curve_op(&srcProfile->A2B.output_curves[i], i); |
| 2155 | if (oa.op != Op_noop) { |
| 2156 | *ops++ = oa.op; |
| 2157 | *args++ = oa.arg; |
| 2158 | } |
| 2159 | } |
| 2160 | } |
| 2161 | |
| 2162 | if (srcProfile->pcs == skcms_Signature_Lab) { |
| 2163 | *ops++ = Op_lab_to_xyz; |
| 2164 | } |
| 2165 | |
| 2166 | } else if (srcProfile->has_trc && srcProfile->has_toXYZD50) { |
| 2167 | for (int i = 0; i < 3; i++) { |
| 2168 | OpAndArg oa = select_curve_op(&srcProfile->trc[i], i); |
| 2169 | if (oa.op != Op_noop) { |
| 2170 | *ops++ = oa.op; |
| 2171 | *args++ = oa.arg; |
| 2172 | } |
| 2173 | } |
| 2174 | } else { |
| 2175 | return false; |
| 2176 | } |
| 2177 | |
| 2178 | // At this point our source colors are linear, either RGB (XYZ-type profiles) |
| 2179 | // or XYZ (A2B-type profiles). Unpremul is a linear operation (multiply by a |
| 2180 | // constant 1/a), so either way we can do it now if needed. |
| 2181 | if (srcAlpha == skcms_AlphaFormat_PremulLinear) { |
| 2182 | *ops++ = Op_unpremul; |
| 2183 | } |
| 2184 | |
| 2185 | // A2B sources should already be in XYZD50 at this point. |
| 2186 | // Others still need to be transformed using their toXYZD50 matrix. |
| 2187 | // N.B. There are profiles that contain both A2B tags and toXYZD50 matrices. |
| 2188 | // If we use the A2B tags, we need to ignore the XYZD50 matrix entirely. |
| 2189 | assert (srcProfile->has_A2B || srcProfile->has_toXYZD50); |
| 2190 | static const skcms_Matrix3x3 I = {{ |
| 2191 | { 1.0f, 0.0f, 0.0f }, |
| 2192 | { 0.0f, 1.0f, 0.0f }, |
| 2193 | { 0.0f, 0.0f, 1.0f }, |
| 2194 | }}; |
| 2195 | const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50; |
| 2196 | |
| 2197 | // There's a chance the source and destination gamuts are identical, |
| 2198 | // in which case we can skip the gamut transform. |
| 2199 | if (0 != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) { |
| 2200 | // Concat the entire gamut transform into from_xyz, |
| 2201 | // now slightly misnamed but it's a handy spot to stash the result. |
| 2202 | from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz); |
| 2203 | *ops++ = Op_matrix_3x3; |
| 2204 | *args++ = &from_xyz; |
| 2205 | } |
| 2206 | |
| 2207 | if (dstAlpha == skcms_AlphaFormat_PremulLinear) { |
| 2208 | *ops++ = Op_premul; |
| 2209 | } |
| 2210 | |
| 2211 | // Encode back to dst RGB using its parametric transfer functions. |
| 2212 | if (!is_identity_tf(&inv_dst_tf_r)) { *ops++ = Op_tf_r; *args++ = &inv_dst_tf_r; } |
| 2213 | if (!is_identity_tf(&inv_dst_tf_g)) { *ops++ = Op_tf_g; *args++ = &inv_dst_tf_g; } |
| 2214 | if (!is_identity_tf(&inv_dst_tf_b)) { *ops++ = Op_tf_b; *args++ = &inv_dst_tf_b; } |
| 2215 | } |
| 2216 | |
| 2217 | if (dstAlpha == skcms_AlphaFormat_Opaque) { |
| 2218 | *ops++ = Op_force_opaque; |
| 2219 | } else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) { |
| 2220 | *ops++ = Op_premul; |
| 2221 | } |
| 2222 | if (dstFmt & 1) { |
| 2223 | *ops++ = Op_swap_rb; |
| 2224 | } |
| 2225 | if (dstFmt < skcms_PixelFormat_RGB_hhh) { |
| 2226 | *ops++ = Op_clamp; |
| 2227 | } |
| 2228 | switch (dstFmt >> 1) { |
| 2229 | default: return false; |
| 2230 | case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_store_a8; break; |
| 2231 | case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_store_g8; break; |
| 2232 | case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_store_4444; break; |
| 2233 | case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_store_565; break; |
| 2234 | case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_store_888; break; |
| 2235 | case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_store_8888; break; |
| 2236 | case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_store_1010102; break; |
| 2237 | case skcms_PixelFormat_RGB_161616 >> 1: *ops++ = Op_store_161616; break; |
| 2238 | case skcms_PixelFormat_RGBA_16161616 >> 1: *ops++ = Op_store_16161616; break; |
| 2239 | case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_store_hhh; break; |
| 2240 | case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_store_hhhh; break; |
| 2241 | case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_store_fff; break; |
| 2242 | case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_store_ffff; break; |
| 2243 | } |
| 2244 | |
| 2245 | void (*run)(const Op*, const void**, const char*, char*, int, size_t,size_t) = run_program; |
| 2246 | #if defined(TEST_FOR_HSW) |
| 2247 | if (hsw_ok()) { |
| 2248 | run = run_program_hsw; |
| 2249 | } |
| 2250 | #endif |
| 2251 | run(program, arguments, src, dst, n, src_bpp,dst_bpp); |
| 2252 | return true; |
| 2253 | } |
| 2254 | |
| 2255 | static void assert_usable_as_destination(const skcms_ICCProfile* profile) { |
| 2256 | #if defined(NDEBUG) |
| 2257 | (void)profile; |
| 2258 | #else |
| 2259 | skcms_Matrix3x3 fromXYZD50; |
| 2260 | skcms_TransferFunction invR, invG, invB; |
| 2261 | assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB)); |
| 2262 | #endif |
| 2263 | } |
| 2264 | |
| 2265 | bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) { |
| 2266 | skcms_Matrix3x3 fromXYZD50; |
| 2267 | if (!profile->has_trc || !profile->has_toXYZD50 |
| 2268 | || !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) { |
| 2269 | return false; |
| 2270 | } |
| 2271 | |
| 2272 | skcms_TransferFunction tf[3]; |
| 2273 | for (int i = 0; i < 3; i++) { |
| 2274 | skcms_TransferFunction inv; |
| 2275 | if (profile->trc[i].table_entries == 0 |
| 2276 | && skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) { |
| 2277 | tf[i] = profile->trc[i].parametric; |
| 2278 | continue; |
| 2279 | } |
| 2280 | |
| 2281 | float max_error; |
| 2282 | // Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible. |
| 2283 | if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) { |
| 2284 | return false; |
| 2285 | } |
| 2286 | } |
| 2287 | |
| 2288 | for (int i = 0; i < 3; ++i) { |
| 2289 | profile->trc[i].table_entries = 0; |
| 2290 | profile->trc[i].parametric = tf[i]; |
| 2291 | } |
| 2292 | |
| 2293 | assert_usable_as_destination(profile); |
| 2294 | return true; |
| 2295 | } |
| 2296 | |
| 2297 | bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) { |
| 2298 | // Operate on a copy of profile, so we can choose the best TF for the original curves |
| 2299 | skcms_ICCProfile result = *profile; |
| 2300 | if (!skcms_MakeUsableAsDestination(&result)) { |
| 2301 | return false; |
| 2302 | } |
| 2303 | |
| 2304 | int best_tf = 0; |
| 2305 | float min_max_error = INFINITY_; |
| 2306 | for (int i = 0; i < 3; i++) { |
| 2307 | skcms_TransferFunction inv; |
| 2308 | skcms_TransferFunction_invert(&result.trc[i].parametric, &inv); |
| 2309 | |
| 2310 | float err = 0; |
| 2311 | for (int j = 0; j < 3; ++j) { |
| 2312 | err = fmaxf_(err, skcms_MaxRoundtripError(&profile->trc[j], &inv)); |
| 2313 | } |
| 2314 | if (min_max_error > err) { |
| 2315 | min_max_error = err; |
| 2316 | best_tf = i; |
| 2317 | } |
| 2318 | } |
| 2319 | |
| 2320 | for (int i = 0; i < 3; i++) { |
| 2321 | result.trc[i].parametric = result.trc[best_tf].parametric; |
| 2322 | } |
| 2323 | |
| 2324 | *profile = result; |
| 2325 | assert_usable_as_destination(profile); |
| 2326 | return true; |
| 2327 | } |