blob: 5b66128ab0f7c3b216af6c7a49e9da36882ea026 [file] [log] [blame]
Mike Kleinded7a552018-04-10 10:05:31 -04001/*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com5155d382018-07-02 19:08:45 +00008#include "skcms.h"
9#include "skcms_internal.h"
10#include <assert.h>
11#include <float.h>
12#include <limits.h>
13#include <stdlib.h>
14#include <string.h>
Mike Kleinded7a552018-04-10 10:05:31 -040015
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com5155d382018-07-02 19:08:45 +000016static float minus_1_ulp(float x) {
17 int32_t bits;
18 memcpy(&bits, &x, sizeof(bits));
19 bits = bits - 1;
20 memcpy(&x, &bits, sizeof(bits));
21 return x;
22}
23
24float skcms_eval_curve(const skcms_Curve* curve, float x) {
25 if (curve->table_entries == 0) {
26 return skcms_TransferFunction_eval(&curve->parametric, x);
27 }
28
29 float ix = fmaxf_(0, fminf_(x, 1)) * (curve->table_entries - 1);
30 int lo = (int) ix,
31 hi = (int)minus_1_ulp(ix + 1.0f);
32 float t = ix - (float)lo;
33
34 float l, h;
35 if (curve->table_8) {
36 l = curve->table_8[lo] * (1/255.0f);
37 h = curve->table_8[hi] * (1/255.0f);
38 } else {
39 uint16_t be_l, be_h;
40 memcpy(&be_l, curve->table_16 + 2*lo, 2);
41 memcpy(&be_h, curve->table_16 + 2*hi, 2);
42 uint16_t le_l = ((be_l << 8) | (be_l >> 8)) & 0xffff;
43 uint16_t le_h = ((be_h << 8) | (be_h >> 8)) & 0xffff;
44 l = le_l * (1/65535.0f);
45 h = le_h * (1/65535.0f);
46 }
47 return l + (h-l)*t;
48}
49
50float skcms_MaxRoundtripError(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
51 uint32_t N = curve->table_entries > 256 ? curve->table_entries : 256;
52 const float dx = 1.0f / (N - 1);
53 float err = 0;
54 for (uint32_t i = 0; i < N; i++) {
55 float x = i * dx,
56 y = skcms_eval_curve(curve, x);
57 err = fmaxf_(err, fabsf_(x - skcms_TransferFunction_eval(inv_tf, y)));
58 }
59 return err;
60}
61
62bool skcms_AreApproximateInverses(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
63 return skcms_MaxRoundtripError(curve, inv_tf) < (1/512.0f);
64}
65
66// Additional ICC signature values that are only used internally
67enum {
68 // File signature
69 skcms_Signature_acsp = 0x61637370,
70
71 // Tag signatures
72 skcms_Signature_rTRC = 0x72545243,
73 skcms_Signature_gTRC = 0x67545243,
74 skcms_Signature_bTRC = 0x62545243,
75 skcms_Signature_kTRC = 0x6B545243,
76
77 skcms_Signature_rXYZ = 0x7258595A,
78 skcms_Signature_gXYZ = 0x6758595A,
79 skcms_Signature_bXYZ = 0x6258595A,
80
81 skcms_Signature_A2B0 = 0x41324230,
82 skcms_Signature_A2B1 = 0x41324231,
83 skcms_Signature_mAB = 0x6D414220,
84
85 skcms_Signature_CHAD = 0x63686164,
86
87 // Type signatures
88 skcms_Signature_curv = 0x63757276,
89 skcms_Signature_mft1 = 0x6D667431,
90 skcms_Signature_mft2 = 0x6D667432,
91 skcms_Signature_para = 0x70617261,
92 skcms_Signature_sf32 = 0x73663332,
93 // XYZ is also a PCS signature, so it's defined in skcms.h
94 // skcms_Signature_XYZ = 0x58595A20,
95};
96
97static uint16_t read_big_u16(const uint8_t* ptr) {
98 uint16_t be;
99 memcpy(&be, ptr, sizeof(be));
100#if defined(_MSC_VER)
101 return _byteswap_ushort(be);
102#else
103 return __builtin_bswap16(be);
104#endif
105}
106
107static uint32_t read_big_u32(const uint8_t* ptr) {
108 uint32_t be;
109 memcpy(&be, ptr, sizeof(be));
110#if defined(_MSC_VER)
111 return _byteswap_ulong(be);
112#else
113 return __builtin_bswap32(be);
114#endif
115}
116
117static int32_t read_big_i32(const uint8_t* ptr) {
118 return (int32_t)read_big_u32(ptr);
119}
120
121static float read_big_fixed(const uint8_t* ptr) {
122 return read_big_i32(ptr) * (1.0f / 65536.0f);
123}
124
125// Maps to an in-memory profile so that fields line up to the locations specified
126// in ICC.1:2010, section 7.2
127typedef struct {
128 uint8_t size [ 4];
129 uint8_t cmm_type [ 4];
130 uint8_t version [ 4];
131 uint8_t profile_class [ 4];
132 uint8_t data_color_space [ 4];
133 uint8_t pcs [ 4];
134 uint8_t creation_date_time [12];
135 uint8_t signature [ 4];
136 uint8_t platform [ 4];
137 uint8_t flags [ 4];
138 uint8_t device_manufacturer [ 4];
139 uint8_t device_model [ 4];
140 uint8_t device_attributes [ 8];
141 uint8_t rendering_intent [ 4];
142 uint8_t illuminant_X [ 4];
143 uint8_t illuminant_Y [ 4];
144 uint8_t illuminant_Z [ 4];
145 uint8_t creator [ 4];
146 uint8_t profile_id [16];
147 uint8_t reserved [28];
148 uint8_t tag_count [ 4]; // Technically not part of header, but required
149} header_Layout;
150
151typedef struct {
152 uint8_t signature [4];
153 uint8_t offset [4];
154 uint8_t size [4];
155} tag_Layout;
156
157static const tag_Layout* get_tag_table(const skcms_ICCProfile* profile) {
158 return (const tag_Layout*)(profile->buffer + SAFE_SIZEOF(header_Layout));
159}
160
161// s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid
162// use of the type is for the CHAD tag that stores exactly nine values.
163typedef struct {
164 uint8_t type [ 4];
165 uint8_t reserved [ 4];
166 uint8_t values [36];
167} sf32_Layout;
168
169bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) {
170 skcms_ICCTag tag;
171 if (!skcms_GetTagBySignature(profile, skcms_Signature_CHAD, &tag)) {
172 return false;
173 }
174
175 if (tag.type != skcms_Signature_sf32 || tag.size < SAFE_SIZEOF(sf32_Layout)) {
176 return false;
177 }
178
179 const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf;
180 const uint8_t* values = sf32Tag->values;
181 for (int r = 0; r < 3; ++r)
182 for (int c = 0; c < 3; ++c, values += 4) {
183 m->vals[r][c] = read_big_fixed(values);
184 }
185 return true;
186}
187
188// XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of
189// the type are for tags/data that store exactly one triple.
190typedef struct {
191 uint8_t type [4];
192 uint8_t reserved [4];
193 uint8_t X [4];
194 uint8_t Y [4];
195 uint8_t Z [4];
196} XYZ_Layout;
197
198static bool read_tag_xyz(const skcms_ICCTag* tag, float* x, float* y, float* z) {
199 if (tag->type != skcms_Signature_XYZ || tag->size < SAFE_SIZEOF(XYZ_Layout)) {
200 return false;
201 }
202
203 const XYZ_Layout* xyzTag = (const XYZ_Layout*)tag->buf;
204
205 *x = read_big_fixed(xyzTag->X);
206 *y = read_big_fixed(xyzTag->Y);
207 *z = read_big_fixed(xyzTag->Z);
208 return true;
209}
210
211static bool read_to_XYZD50(const skcms_ICCTag* rXYZ, const skcms_ICCTag* gXYZ,
212 const skcms_ICCTag* bXYZ, skcms_Matrix3x3* toXYZ) {
213 return read_tag_xyz(rXYZ, &toXYZ->vals[0][0], &toXYZ->vals[1][0], &toXYZ->vals[2][0]) &&
214 read_tag_xyz(gXYZ, &toXYZ->vals[0][1], &toXYZ->vals[1][1], &toXYZ->vals[2][1]) &&
215 read_tag_xyz(bXYZ, &toXYZ->vals[0][2], &toXYZ->vals[1][2], &toXYZ->vals[2][2]);
216}
217
218typedef struct {
219 uint8_t type [4];
220 uint8_t reserved_a [4];
221 uint8_t function_type [2];
222 uint8_t reserved_b [2];
223 uint8_t parameters [ ]; // 1, 3, 4, 5, or 7 s15.16 parameters, depending on function_type
224} para_Layout;
225
226static bool read_curve_para(const uint8_t* buf, uint32_t size,
227 skcms_Curve* curve, uint32_t* curve_size) {
228 if (size < SAFE_SIZEOF(para_Layout)) {
229 return false;
230 }
231
232 const para_Layout* paraTag = (const para_Layout*)buf;
233
234 enum { kG = 0, kGAB = 1, kGABC = 2, kGABCD = 3, kGABCDEF = 4 };
235 uint16_t function_type = read_big_u16(paraTag->function_type);
236 if (function_type > kGABCDEF) {
237 return false;
238 }
239
240 static const uint32_t curve_bytes[] = { 4, 12, 16, 20, 28 };
241 if (size < SAFE_SIZEOF(para_Layout) + curve_bytes[function_type]) {
242 return false;
243 }
244
245 if (curve_size) {
246 *curve_size = SAFE_SIZEOF(para_Layout) + curve_bytes[function_type];
247 }
248
249 curve->table_entries = 0;
250 curve->parametric.a = 1.0f;
251 curve->parametric.b = 0.0f;
252 curve->parametric.c = 0.0f;
253 curve->parametric.d = 0.0f;
254 curve->parametric.e = 0.0f;
255 curve->parametric.f = 0.0f;
256 curve->parametric.g = read_big_fixed(paraTag->parameters);
257
258 switch (function_type) {
259 case kGAB:
260 curve->parametric.a = read_big_fixed(paraTag->parameters + 4);
261 curve->parametric.b = read_big_fixed(paraTag->parameters + 8);
262 if (curve->parametric.a == 0) {
263 return false;
264 }
265 curve->parametric.d = -curve->parametric.b / curve->parametric.a;
266 break;
267 case kGABC:
268 curve->parametric.a = read_big_fixed(paraTag->parameters + 4);
269 curve->parametric.b = read_big_fixed(paraTag->parameters + 8);
270 curve->parametric.e = read_big_fixed(paraTag->parameters + 12);
271 if (curve->parametric.a == 0) {
272 return false;
273 }
274 curve->parametric.d = -curve->parametric.b / curve->parametric.a;
275 curve->parametric.f = curve->parametric.e;
276 break;
277 case kGABCD:
278 curve->parametric.a = read_big_fixed(paraTag->parameters + 4);
279 curve->parametric.b = read_big_fixed(paraTag->parameters + 8);
280 curve->parametric.c = read_big_fixed(paraTag->parameters + 12);
281 curve->parametric.d = read_big_fixed(paraTag->parameters + 16);
282 break;
283 case kGABCDEF:
284 curve->parametric.a = read_big_fixed(paraTag->parameters + 4);
285 curve->parametric.b = read_big_fixed(paraTag->parameters + 8);
286 curve->parametric.c = read_big_fixed(paraTag->parameters + 12);
287 curve->parametric.d = read_big_fixed(paraTag->parameters + 16);
288 curve->parametric.e = read_big_fixed(paraTag->parameters + 20);
289 curve->parametric.f = read_big_fixed(paraTag->parameters + 24);
290 break;
291 }
292 return skcms_TransferFunction_isValid(&curve->parametric);
293}
294
295typedef struct {
296 uint8_t type [4];
297 uint8_t reserved [4];
298 uint8_t value_count [4];
299 uint8_t parameters [ ]; // value_count parameters (8.8 if 1, uint16 (n*65535) if > 1)
300} curv_Layout;
301
302static bool read_curve_curv(const uint8_t* buf, uint32_t size,
303 skcms_Curve* curve, uint32_t* curve_size) {
304 if (size < SAFE_SIZEOF(curv_Layout)) {
305 return false;
306 }
307
308 const curv_Layout* curvTag = (const curv_Layout*)buf;
309
310 uint32_t value_count = read_big_u32(curvTag->value_count);
311 if (size < SAFE_SIZEOF(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t)) {
312 return false;
313 }
314
315 if (curve_size) {
316 *curve_size = SAFE_SIZEOF(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t);
317 }
318
319 if (value_count < 2) {
320 curve->table_entries = 0;
321 curve->parametric.a = 1.0f;
322 curve->parametric.b = 0.0f;
323 curve->parametric.c = 0.0f;
324 curve->parametric.d = 0.0f;
325 curve->parametric.e = 0.0f;
326 curve->parametric.f = 0.0f;
327 if (value_count == 0) {
328 // Empty tables are a shorthand for an identity curve
329 curve->parametric.g = 1.0f;
330 } else {
331 // Single entry tables are a shorthand for simple gamma
332 curve->parametric.g = read_big_u16(curvTag->parameters) * (1.0f / 256.0f);
333 }
334 } else {
335 curve->table_8 = NULL;
336 curve->table_16 = curvTag->parameters;
337 curve->table_entries = value_count;
338 }
339
340 return true;
341}
342
343// Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read.
344// If curve_size is not NULL, writes the number of bytes used by the curve in (*curve_size).
345static bool read_curve(const uint8_t* buf, uint32_t size,
346 skcms_Curve* curve, uint32_t* curve_size) {
347 if (!buf || size < 4 || !curve) {
348 return false;
349 }
350
351 uint32_t type = read_big_u32(buf);
352 if (type == skcms_Signature_para) {
353 return read_curve_para(buf, size, curve, curve_size);
354 } else if (type == skcms_Signature_curv) {
355 return read_curve_curv(buf, size, curve, curve_size);
356 }
357
358 return false;
359}
360
361// mft1 and mft2 share a large chunk of data
362typedef struct {
363 uint8_t type [ 4];
364 uint8_t reserved_a [ 4];
365 uint8_t input_channels [ 1];
366 uint8_t output_channels [ 1];
367 uint8_t grid_points [ 1];
368 uint8_t reserved_b [ 1];
369 uint8_t matrix [36];
370} mft_CommonLayout;
371
372typedef struct {
373 mft_CommonLayout common [ 1];
374
375 uint8_t tables [ ];
376} mft1_Layout;
377
378typedef struct {
379 mft_CommonLayout common [ 1];
380
381 uint8_t input_table_entries [ 2];
382 uint8_t output_table_entries [ 2];
383 uint8_t tables [ ];
384} mft2_Layout;
385
386static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) {
387 // MFT matrices are applied before the first set of curves, but must be identity unless the
388 // input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the
389 // matrix in skcms_A2B is applied later in the pipe, so supporting this would require another
390 // field/flag.
391 a2b->matrix_channels = 0;
392
393 a2b->input_channels = mftTag->input_channels[0];
394 a2b->output_channels = mftTag->output_channels[0];
395
396 // We require exactly three (ie XYZ/Lab/RGB) output channels
397 if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
398 return false;
399 }
400 // We require at least one, and no more than four (ie CMYK) input channels
401 if (a2b->input_channels < 1 || a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
402 return false;
403 }
404
405 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
406 a2b->grid_points[i] = mftTag->grid_points[0];
407 }
408 // The grid only makes sense with at least two points along each axis
409 if (a2b->grid_points[0] < 2) {
410 return false;
411 }
412
413 return true;
414}
415
416static bool init_a2b_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width,
417 uint32_t input_table_entries, uint32_t output_table_entries,
418 skcms_A2B* a2b) {
419 // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow
420 uint32_t byte_len_per_input_table = input_table_entries * byte_width;
421 uint32_t byte_len_per_output_table = output_table_entries * byte_width;
422
423 // [input|output]_channels are <= 4, so still no overflow
424 uint32_t byte_len_all_input_tables = a2b->input_channels * byte_len_per_input_table;
425 uint32_t byte_len_all_output_tables = a2b->output_channels * byte_len_per_output_table;
426
427 uint64_t grid_size = a2b->output_channels * byte_width;
428 for (uint32_t axis = 0; axis < a2b->input_channels; ++axis) {
429 grid_size *= a2b->grid_points[axis];
430 }
431
432 if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
433 return false;
434 }
435
436 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
437 a2b->input_curves[i].table_entries = input_table_entries;
438 if (byte_width == 1) {
439 a2b->input_curves[i].table_8 = table_base + i * byte_len_per_input_table;
440 a2b->input_curves[i].table_16 = NULL;
441 } else {
442 a2b->input_curves[i].table_8 = NULL;
443 a2b->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
444 }
445 }
446
447 if (byte_width == 1) {
448 a2b->grid_8 = table_base + byte_len_all_input_tables;
449 a2b->grid_16 = NULL;
450 } else {
451 a2b->grid_8 = NULL;
452 a2b->grid_16 = table_base + byte_len_all_input_tables;
453 }
454
455 const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
456 for (uint32_t i = 0; i < a2b->output_channels; ++i) {
457 a2b->output_curves[i].table_entries = output_table_entries;
458 if (byte_width == 1) {
459 a2b->output_curves[i].table_8 = output_table_base + i * byte_len_per_output_table;
460 a2b->output_curves[i].table_16 = NULL;
461 } else {
462 a2b->output_curves[i].table_8 = NULL;
463 a2b->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
464 }
465 }
466
467 return true;
468}
469
470static bool read_tag_mft1(const skcms_ICCTag* tag, skcms_A2B* a2b) {
471 if (tag->size < SAFE_SIZEOF(mft1_Layout)) {
472 return false;
473 }
474
475 const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
476 if (!read_mft_common(mftTag->common, a2b)) {
477 return false;
478 }
479
480 uint32_t input_table_entries = 256;
481 uint32_t output_table_entries = 256;
482
483 return init_a2b_tables(mftTag->tables, tag->size - SAFE_SIZEOF(mft1_Layout), 1,
484 input_table_entries, output_table_entries, a2b);
485}
486
487static bool read_tag_mft2(const skcms_ICCTag* tag, skcms_A2B* a2b) {
488 if (tag->size < SAFE_SIZEOF(mft2_Layout)) {
489 return false;
490 }
491
492 const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf;
493 if (!read_mft_common(mftTag->common, a2b)) {
494 return false;
495 }
496
497 uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries);
498 uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries);
499
500 // ICC spec mandates that 2 <= table_entries <= 4096
501 if (input_table_entries < 2 || input_table_entries > 4096 ||
502 output_table_entries < 2 || output_table_entries > 4096) {
503 return false;
504 }
505
506 return init_a2b_tables(mftTag->tables, tag->size - SAFE_SIZEOF(mft2_Layout), 2,
507 input_table_entries, output_table_entries, a2b);
508}
509
510static bool read_curves(const uint8_t* buf, uint32_t size, uint32_t curve_offset,
511 uint32_t num_curves, skcms_Curve* curves) {
512 for (uint32_t i = 0; i < num_curves; ++i) {
513 if (curve_offset > size) {
514 return false;
515 }
516
517 uint32_t curve_bytes;
518 if (!read_curve(buf + curve_offset, size - curve_offset, &curves[i], &curve_bytes)) {
519 return false;
520 }
521
522 if (curve_bytes > UINT32_MAX - 3) {
523 return false;
524 }
525 curve_bytes = (curve_bytes + 3) & ~3U;
526
527 uint64_t new_offset_64 = (uint64_t)curve_offset + curve_bytes;
528 curve_offset = (uint32_t)new_offset_64;
529 if (new_offset_64 != curve_offset) {
530 return false;
531 }
532 }
533
534 return true;
535}
536
537typedef struct {
538 uint8_t type [ 4];
539 uint8_t reserved_a [ 4];
540 uint8_t input_channels [ 1];
541 uint8_t output_channels [ 1];
542 uint8_t reserved_b [ 2];
543 uint8_t b_curve_offset [ 4];
544 uint8_t matrix_offset [ 4];
545 uint8_t m_curve_offset [ 4];
546 uint8_t clut_offset [ 4];
547 uint8_t a_curve_offset [ 4];
548} mAB_Layout;
549
550typedef struct {
551 uint8_t grid_points [16];
552 uint8_t grid_byte_width [ 1];
553 uint8_t reserved [ 3];
554 uint8_t data [ ];
555} mABCLUT_Layout;
556
557static bool read_tag_mab(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
558 if (tag->size < SAFE_SIZEOF(mAB_Layout)) {
559 return false;
560 }
561
562 const mAB_Layout* mABTag = (const mAB_Layout*)tag->buf;
563
564 a2b->input_channels = mABTag->input_channels[0];
565 a2b->output_channels = mABTag->output_channels[0];
566
567 // We require exactly three (ie XYZ/Lab/RGB) output channels
568 if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
569 return false;
570 }
571 // We require no more than four (ie CMYK) input channels
572 if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
573 return false;
574 }
575
576 uint32_t b_curve_offset = read_big_u32(mABTag->b_curve_offset);
577 uint32_t matrix_offset = read_big_u32(mABTag->matrix_offset);
578 uint32_t m_curve_offset = read_big_u32(mABTag->m_curve_offset);
579 uint32_t clut_offset = read_big_u32(mABTag->clut_offset);
580 uint32_t a_curve_offset = read_big_u32(mABTag->a_curve_offset);
581
582 // "B" curves must be present
583 if (0 == b_curve_offset) {
584 return false;
585 }
586
587 if (!read_curves(tag->buf, tag->size, b_curve_offset, a2b->output_channels,
588 a2b->output_curves)) {
589 return false;
590 }
591
592 // "M" curves and Matrix must be used together
593 if (0 != m_curve_offset) {
594 if (0 == matrix_offset) {
595 return false;
596 }
597 a2b->matrix_channels = a2b->output_channels;
598 if (!read_curves(tag->buf, tag->size, m_curve_offset, a2b->matrix_channels,
599 a2b->matrix_curves)) {
600 return false;
601 }
602
603 // Read matrix, which is stored as a row-major 3x3, followed by the fourth column
604 if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
605 return false;
606 }
607 float encoding_factor = pcs_is_xyz ? 65535 / 32768.0f : 1.0f;
608 const uint8_t* mtx_buf = tag->buf + matrix_offset;
609 a2b->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf + 0);
610 a2b->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf + 4);
611 a2b->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf + 8);
612 a2b->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
613 a2b->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
614 a2b->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
615 a2b->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
616 a2b->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
617 a2b->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
618 a2b->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
619 a2b->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
620 a2b->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
621 } else {
622 if (0 != matrix_offset) {
623 return false;
624 }
625 a2b->matrix_channels = 0;
626 }
627
628 // "A" curves and CLUT must be used together
629 if (0 != a_curve_offset) {
630 if (0 == clut_offset) {
631 return false;
632 }
633 if (!read_curves(tag->buf, tag->size, a_curve_offset, a2b->input_channels,
634 a2b->input_curves)) {
635 return false;
636 }
637
638 if (tag->size < clut_offset + SAFE_SIZEOF(mABCLUT_Layout)) {
639 return false;
640 }
641 const mABCLUT_Layout* clut = (const mABCLUT_Layout*)(tag->buf + clut_offset);
642
643 if (clut->grid_byte_width[0] == 1) {
644 a2b->grid_8 = clut->data;
645 a2b->grid_16 = NULL;
646 } else if (clut->grid_byte_width[0] == 2) {
647 a2b->grid_8 = NULL;
648 a2b->grid_16 = clut->data;
649 } else {
650 return false;
651 }
652
653 uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[0];
654 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
655 a2b->grid_points[i] = clut->grid_points[i];
656 // The grid only makes sense with at least two points along each axis
657 if (a2b->grid_points[i] < 2) {
658 return false;
659 }
660 grid_size *= a2b->grid_points[i];
661 }
662 if (tag->size < clut_offset + SAFE_SIZEOF(mABCLUT_Layout) + grid_size) {
663 return false;
664 }
665 } else {
666 if (0 != clut_offset) {
667 return false;
668 }
669
670 // If there is no CLUT, the number of input and output channels must match
671 if (a2b->input_channels != a2b->output_channels) {
672 return false;
673 }
674
675 // Zero out the number of input channels to signal that we're skipping this stage
676 a2b->input_channels = 0;
677 }
678
679 return true;
680}
681
682static bool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
683 bool ok = false;
684 if (tag->type == skcms_Signature_mft1) {
685 ok = read_tag_mft1(tag, a2b);
686 } else if (tag->type == skcms_Signature_mft2) {
687 ok = read_tag_mft2(tag, a2b);
688 } else if (tag->type == skcms_Signature_mAB) {
689 ok = read_tag_mab(tag, a2b, pcs_is_xyz);
690 }
691 if (!ok) {
692 return false;
693 }
694
695 // Detect and canonicalize identity tables.
696 skcms_Curve* curves[] = {
697 a2b->input_channels > 0 ? a2b->input_curves + 0 : NULL,
698 a2b->input_channels > 1 ? a2b->input_curves + 1 : NULL,
699 a2b->input_channels > 2 ? a2b->input_curves + 2 : NULL,
700 a2b->input_channels > 3 ? a2b->input_curves + 3 : NULL,
701 a2b->matrix_channels > 0 ? a2b->matrix_curves + 0 : NULL,
702 a2b->matrix_channels > 1 ? a2b->matrix_curves + 1 : NULL,
703 a2b->matrix_channels > 2 ? a2b->matrix_curves + 2 : NULL,
704 a2b->output_channels > 0 ? a2b->output_curves + 0 : NULL,
705 a2b->output_channels > 1 ? a2b->output_curves + 1 : NULL,
706 a2b->output_channels > 2 ? a2b->output_curves + 2 : NULL,
707 };
708
709 for (int i = 0; i < ARRAY_COUNT(curves); i++) {
710 skcms_Curve* curve = curves[i];
711
712 if (curve && curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) {
713 int N = (int)curve->table_entries;
714
715 float c,d,f;
716 if (N == skcms_fit_linear(curve, N, 1.0f/(2*N), &c,&d,&f)
717 && c == 1.0f
718 && f == 0.0f) {
719 curve->table_entries = 0;
720 curve->table_8 = NULL;
721 curve->table_16 = NULL;
722 curve->parametric = (skcms_TransferFunction){1,1,0,0,0,0,0};
723 }
724 }
725 }
726
727 return true;
728}
729
730void skcms_GetTagByIndex(const skcms_ICCProfile* profile, uint32_t idx, skcms_ICCTag* tag) {
731 if (!profile || !profile->buffer || !tag) { return; }
732 if (idx > profile->tag_count) { return; }
733 const tag_Layout* tags = get_tag_table(profile);
734 tag->signature = read_big_u32(tags[idx].signature);
735 tag->size = read_big_u32(tags[idx].size);
736 tag->buf = read_big_u32(tags[idx].offset) + profile->buffer;
737 tag->type = read_big_u32(tag->buf);
738}
739
740bool skcms_GetTagBySignature(const skcms_ICCProfile* profile, uint32_t sig, skcms_ICCTag* tag) {
741 if (!profile || !profile->buffer || !tag) { return false; }
742 const tag_Layout* tags = get_tag_table(profile);
743 for (uint32_t i = 0; i < profile->tag_count; ++i) {
744 if (read_big_u32(tags[i].signature) == sig) {
745 tag->signature = sig;
746 tag->size = read_big_u32(tags[i].size);
747 tag->buf = read_big_u32(tags[i].offset) + profile->buffer;
748 tag->type = read_big_u32(tag->buf);
749 return true;
750 }
751 }
752 return false;
753}
754
755static bool usable_as_src(const skcms_ICCProfile* profile) {
756 return profile->has_A2B
757 || (profile->has_trc && profile->has_toXYZD50);
758}
759
760bool skcms_Parse(const void* buf, size_t len, skcms_ICCProfile* profile) {
761 assert(SAFE_SIZEOF(header_Layout) == 132);
762
763 if (!profile) {
764 return false;
765 }
766 memset(profile, 0, SAFE_SIZEOF(*profile));
767
768 if (len < SAFE_SIZEOF(header_Layout)) {
769 return false;
770 }
771
772 // Byte-swap all header fields
773 const header_Layout* header = buf;
774 profile->buffer = buf;
775 profile->size = read_big_u32(header->size);
776 uint32_t version = read_big_u32(header->version);
777 profile->data_color_space = read_big_u32(header->data_color_space);
778 profile->pcs = read_big_u32(header->pcs);
779 uint32_t signature = read_big_u32(header->signature);
780 float illuminant_X = read_big_fixed(header->illuminant_X);
781 float illuminant_Y = read_big_fixed(header->illuminant_Y);
782 float illuminant_Z = read_big_fixed(header->illuminant_Z);
783 profile->tag_count = read_big_u32(header->tag_count);
784
785 // Validate signature, size (smaller than buffer, large enough to hold tag table),
786 // and major version
787 uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout);
788 if (signature != skcms_Signature_acsp ||
789 profile->size > len ||
790 profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size ||
791 (version >> 24) > 4) {
792 return false;
793 }
794
795 // Validate that illuminant is D50 white
796 if (fabsf_(illuminant_X - 0.9642f) > 0.0100f ||
797 fabsf_(illuminant_Y - 1.0000f) > 0.0100f ||
798 fabsf_(illuminant_Z - 0.8249f) > 0.0100f) {
799 return false;
800 }
801
802 // Validate that all tag entries have sane offset + size
803 const tag_Layout* tags = get_tag_table(profile);
804 for (uint32_t i = 0; i < profile->tag_count; ++i) {
805 uint32_t tag_offset = read_big_u32(tags[i].offset);
806 uint32_t tag_size = read_big_u32(tags[i].size);
807 uint64_t tag_end = (uint64_t)tag_offset + (uint64_t)tag_size;
808 if (tag_size < 4 || tag_end > profile->size) {
809 return false;
810 }
811 }
812
813 if (profile->pcs != skcms_Signature_XYZ && profile->pcs != skcms_Signature_Lab) {
814 return false;
815 }
816
817 bool pcs_is_xyz = profile->pcs == skcms_Signature_XYZ;
818
819 // Pre-parse commonly used tags.
820 skcms_ICCTag kTRC;
821 if (profile->data_color_space == skcms_Signature_Gray &&
822 skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &kTRC)) {
823 if (!read_curve(kTRC.buf, kTRC.size, &profile->trc[0], NULL)) {
824 // Malformed tag
825 return false;
826 }
827 profile->trc[1] = profile->trc[0];
828 profile->trc[2] = profile->trc[0];
829 profile->has_trc = true;
830
831 if (pcs_is_xyz) {
832 profile->toXYZD50.vals[0][0] = illuminant_X;
833 profile->toXYZD50.vals[1][1] = illuminant_Y;
834 profile->toXYZD50.vals[2][2] = illuminant_Z;
835 profile->has_toXYZD50 = true;
836 }
837 } else {
838 skcms_ICCTag rTRC, gTRC, bTRC;
839 if (skcms_GetTagBySignature(profile, skcms_Signature_rTRC, &rTRC) &&
840 skcms_GetTagBySignature(profile, skcms_Signature_gTRC, &gTRC) &&
841 skcms_GetTagBySignature(profile, skcms_Signature_bTRC, &bTRC)) {
842 if (!read_curve(rTRC.buf, rTRC.size, &profile->trc[0], NULL) ||
843 !read_curve(gTRC.buf, gTRC.size, &profile->trc[1], NULL) ||
844 !read_curve(bTRC.buf, bTRC.size, &profile->trc[2], NULL)) {
845 // Malformed TRC tags
846 return false;
847 }
848 profile->has_trc = true;
849 }
850
851 skcms_ICCTag rXYZ, gXYZ, bXYZ;
852 if (skcms_GetTagBySignature(profile, skcms_Signature_rXYZ, &rXYZ) &&
853 skcms_GetTagBySignature(profile, skcms_Signature_gXYZ, &gXYZ) &&
854 skcms_GetTagBySignature(profile, skcms_Signature_bXYZ, &bXYZ)) {
855 if (!read_to_XYZD50(&rXYZ, &gXYZ, &bXYZ, &profile->toXYZD50)) {
856 // Malformed XYZ tags
857 return false;
858 }
859 profile->has_toXYZD50 = true;
860 }
861 }
862
863 skcms_ICCTag a2b_tag;
864
865 // For now, we're preferring A2B0, like Skia does and the ICC spec tells us to.
866 // TODO: prefer A2B1 (relative colormetric) over A2B0 (perceptual)?
867 // This breaks with the ICC spec, but we think it's a good idea, given that TRC curves
868 // and all our known users are thinking exclusively in terms of relative colormetric.
869 const uint32_t sigs[] = { skcms_Signature_A2B0, skcms_Signature_A2B1 };
870 for (int i = 0; i < ARRAY_COUNT(sigs); i++) {
871 if (skcms_GetTagBySignature(profile, sigs[i], &a2b_tag)) {
872 if (!read_a2b(&a2b_tag, &profile->A2B, pcs_is_xyz)) {
873 // Malformed A2B tag
874 return false;
875 }
876 profile->has_A2B = true;
877 break;
878 }
879 }
880
881 return usable_as_src(profile);
882}
883
884
885const skcms_ICCProfile* skcms_sRGB_profile() {
886 static const skcms_ICCProfile sRGB_profile = {
887 // These fields are moot when not a skcms_Parse()'d profile.
888 .buffer = NULL,
889 .size = 0,
890 .tag_count = 0,
891
892 // We choose to represent sRGB with its canonical transfer function,
893 // and with its canonical XYZD50 gamut matrix.
894 .data_color_space = skcms_Signature_RGB,
895 .pcs = skcms_Signature_XYZ,
896 .has_trc = true,
897 .has_toXYZD50 = true,
898 .has_A2B = false,
899
900 .trc = {
901 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0 }}},
902 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0 }}},
903 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0 }}},
904 },
905
906 .toXYZD50 = {{
907 { 0.436065674f, 0.385147095f, 0.143066406f },
908 { 0.222488403f, 0.716873169f, 0.060607910f },
909 { 0.013916016f, 0.097076416f, 0.714096069f },
910 }},
911 };
912 return &sRGB_profile;
913}
914
915const skcms_ICCProfile* skcms_XYZD50_profile() {
916 static const skcms_ICCProfile XYZD50_profile = {
917 .buffer = NULL,
918 .size = 0,
919 .tag_count = 0,
920
921 .data_color_space = skcms_Signature_RGB,
922 .pcs = skcms_Signature_XYZ,
923 .has_trc = true,
924 .has_toXYZD50 = true,
925 .has_A2B = false,
926
927 .trc = {
928 {{0, {1,1,0,0,0,0,0}}},
929 {{0, {1,1,0,0,0,0,0}}},
930 {{0, {1,1,0,0,0,0,0}}},
931 },
932
933 .toXYZD50 = {{
934 {1,0,0},
935 {0,1,0},
936 {0,0,1},
937 }},
938 };
939
940 return &XYZD50_profile;
941}
942
943const skcms_TransferFunction* skcms_sRGB_TransferFunction() {
944 return &skcms_sRGB_profile()->trc[0].parametric;
945}
946
947const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction() {
948 static const skcms_TransferFunction sRGB_inv =
949 { (float)(1/2.4), 1.137119f, 0, 12.92f, 0.0031308f, -0.055f, 0 };
950 return &sRGB_inv;
951}
952
953const skcms_TransferFunction* skcms_Identity_TransferFunction() {
954 static const skcms_TransferFunction identity = {1,1,0,0,0,0,0};
955 return &identity;
956}
957
958const uint8_t skcms_252_random_bytes[] = {
959 8, 179, 128, 204, 253, 38, 134, 184, 68, 102, 32, 138, 99, 39, 169, 215,
960 119, 26, 3, 223, 95, 239, 52, 132, 114, 74, 81, 234, 97, 116, 244, 205, 30,
961 154, 173, 12, 51, 159, 122, 153, 61, 226, 236, 178, 229, 55, 181, 220, 191,
962 194, 160, 126, 168, 82, 131, 18, 180, 245, 163, 22, 246, 69, 235, 252, 57,
963 108, 14, 6, 152, 240, 255, 171, 242, 20, 227, 177, 238, 96, 85, 16, 211,
964 70, 200, 149, 155, 146, 127, 145, 100, 151, 109, 19, 165, 208, 195, 164,
965 137, 254, 182, 248, 64, 201, 45, 209, 5, 147, 207, 210, 113, 162, 83, 225,
966 9, 31, 15, 231, 115, 37, 58, 53, 24, 49, 197, 56, 120, 172, 48, 21, 214,
967 129, 111, 11, 50, 187, 196, 34, 60, 103, 71, 144, 47, 203, 77, 80, 232,
968 140, 222, 250, 206, 166, 247, 139, 249, 221, 72, 106, 27, 199, 117, 54,
969 219, 135, 118, 40, 79, 41, 251, 46, 93, 212, 92, 233, 148, 28, 121, 63,
970 123, 158, 105, 59, 29, 42, 143, 23, 0, 107, 176, 87, 104, 183, 156, 193,
971 189, 90, 188, 65, 190, 17, 198, 7, 186, 161, 1, 124, 78, 125, 170, 133,
972 174, 218, 67, 157, 75, 101, 89, 217, 62, 33, 141, 228, 25, 35, 91, 230, 4,
973 2, 13, 73, 86, 167, 237, 84, 243, 44, 185, 66, 130, 110, 150, 142, 216, 88,
974 112, 36, 224, 136, 202, 76, 94, 98, 175, 213
975};
976
977bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) {
978 // For now this is the essentially the same strategy we use in test_only.c
979 // for our skcms_Transform() smoke tests:
980 // 1) transform A to XYZD50
981 // 2) transform B to XYZD50
982 // 3) return true if they're similar enough
983 // Our current criterion in 3) is maximum 1 bit error per XYZD50 byte.
984
985 // Here are 252 of a random shuffle of all possible bytes.
986 // 252 is evenly divisible by 3 and 4. Only 192, 10, 241, and 43 are missing.
987
988 if (A->data_color_space != B->data_color_space) {
989 return false;
990 }
991
992 // Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK.
993 skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888;
994 size_t npixels = 84;
995 if (A->data_color_space == skcms_Signature_CMYK) {
996 fmt = skcms_PixelFormat_RGBA_8888;
997 npixels = 63;
998 }
999
1000 uint8_t dstA[252],
1001 dstB[252];
1002 if (!skcms_Transform(
1003 skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, A,
1004 dstA, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1005 npixels)) {
1006 return false;
1007 }
1008 if (!skcms_Transform(
1009 skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, B,
1010 dstB, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1011 npixels)) {
1012 return false;
1013 }
1014
1015 for (size_t i = 0; i < 252; i++) {
1016 if (abs((int)dstA[i] - (int)dstB[i]) > 1) {
1017 return false;
1018 }
1019 }
1020 return true;
1021}
1022
1023bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile,
1024 const skcms_TransferFunction* inv_tf) {
1025 if (!profile || !profile->has_trc) {
1026 return false;
1027 }
1028
1029 return skcms_AreApproximateInverses(&profile->trc[0], inv_tf) &&
1030 skcms_AreApproximateInverses(&profile->trc[1], inv_tf) &&
1031 skcms_AreApproximateInverses(&profile->trc[2], inv_tf);
1032}
1033
1034static bool is_zero_to_one(float x) {
1035 return 0 <= x && x <= 1;
1036}
1037
1038bool skcms_PrimariesToXYZD50(float rx, float ry,
1039 float gx, float gy,
1040 float bx, float by,
1041 float wx, float wy,
1042 skcms_Matrix3x3* toXYZD50) {
1043 if (!is_zero_to_one(rx) || !is_zero_to_one(ry) ||
1044 !is_zero_to_one(gx) || !is_zero_to_one(gy) ||
1045 !is_zero_to_one(bx) || !is_zero_to_one(by) ||
1046 !is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1047 !toXYZD50) {
1048 return false;
1049 }
1050
1051 // First, we need to convert xy values (primaries) to XYZ.
1052 skcms_Matrix3x3 primaries = {{
1053 { rx, gx, bx },
1054 { ry, gy, by },
1055 { 1 - rx - ry, 1 - gx - gy, 1 - bx - by },
1056 }};
1057 skcms_Matrix3x3 primaries_inv;
1058 if (!skcms_Matrix3x3_invert(&primaries, &primaries_inv)) {
1059 return false;
1060 }
1061
1062 // Assumes that Y is 1.0f.
1063 skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1064 skcms_Vector3 XYZ = skcms_MV_mul(&primaries_inv, &wXYZ);
1065
1066 skcms_Matrix3x3 toXYZ = {{
1067 { XYZ.vals[0], 0, 0 },
1068 { 0, XYZ.vals[1], 0 },
1069 { 0, 0, XYZ.vals[2] },
1070 }};
1071 toXYZ = skcms_Matrix3x3_concat(&primaries, &toXYZ);
1072
1073 // Now convert toXYZ matrix to toXYZD50.
1074 skcms_Vector3 wXYZD50 = { { 0.96422f, 1.0f, 0.82521f } };
1075
1076 // Calculate the chromatic adaptation matrix. We will use the Bradford method, thus
1077 // the matrices below. The Bradford method is used by Adobe and is widely considered
1078 // to be the best.
1079 skcms_Matrix3x3 xyz_to_lms = {{
1080 { 0.8951f, 0.2664f, -0.1614f },
1081 { -0.7502f, 1.7135f, 0.0367f },
1082 { 0.0389f, -0.0685f, 1.0296f },
1083 }};
1084 skcms_Matrix3x3 lms_to_xyz = {{
1085 { 0.9869929f, -0.1470543f, 0.1599627f },
1086 { 0.4323053f, 0.5183603f, 0.0492912f },
1087 { -0.0085287f, 0.0400428f, 0.9684867f },
1088 }};
1089
1090 skcms_Vector3 srcCone = skcms_MV_mul(&xyz_to_lms, &wXYZ);
1091 skcms_Vector3 dstCone = skcms_MV_mul(&xyz_to_lms, &wXYZD50);
1092
1093 skcms_Matrix3x3 DXtoD50 = {{
1094 { dstCone.vals[0] / srcCone.vals[0], 0, 0 },
1095 { 0, dstCone.vals[1] / srcCone.vals[1], 0 },
1096 { 0, 0, dstCone.vals[2] / srcCone.vals[2] },
1097 }};
1098 DXtoD50 = skcms_Matrix3x3_concat(&DXtoD50, &xyz_to_lms);
1099 DXtoD50 = skcms_Matrix3x3_concat(&lms_to_xyz, &DXtoD50);
1100
1101 *toXYZD50 = skcms_Matrix3x3_concat(&DXtoD50, &toXYZ);
1102 return true;
1103}
1104
1105
1106bool skcms_Matrix3x3_invert(const skcms_Matrix3x3* src, skcms_Matrix3x3* dst) {
1107 double a00 = src->vals[0][0],
1108 a01 = src->vals[1][0],
1109 a02 = src->vals[2][0],
1110 a10 = src->vals[0][1],
1111 a11 = src->vals[1][1],
1112 a12 = src->vals[2][1],
1113 a20 = src->vals[0][2],
1114 a21 = src->vals[1][2],
1115 a22 = src->vals[2][2];
1116
1117 double b0 = a00*a11 - a01*a10,
1118 b1 = a00*a12 - a02*a10,
1119 b2 = a01*a12 - a02*a11,
1120 b3 = a20,
1121 b4 = a21,
1122 b5 = a22;
1123
1124 double determinant = b0*b5
1125 - b1*b4
1126 + b2*b3;
1127
1128 if (determinant == 0) {
1129 return false;
1130 }
1131
1132 double invdet = 1.0 / determinant;
1133 if (invdet > +FLT_MAX || invdet < -FLT_MAX || !isfinitef_((float)invdet)) {
1134 return false;
1135 }
1136
1137 b0 *= invdet;
1138 b1 *= invdet;
1139 b2 *= invdet;
1140 b3 *= invdet;
1141 b4 *= invdet;
1142 b5 *= invdet;
1143
1144 dst->vals[0][0] = (float)( a11*b5 - a12*b4 );
1145 dst->vals[1][0] = (float)( a02*b4 - a01*b5 );
1146 dst->vals[2][0] = (float)( + b2 );
1147 dst->vals[0][1] = (float)( a12*b3 - a10*b5 );
1148 dst->vals[1][1] = (float)( a00*b5 - a02*b3 );
1149 dst->vals[2][1] = (float)( - b1 );
1150 dst->vals[0][2] = (float)( a10*b4 - a11*b3 );
1151 dst->vals[1][2] = (float)( a01*b3 - a00*b4 );
1152 dst->vals[2][2] = (float)( + b0 );
1153
1154 for (int r = 0; r < 3; ++r)
1155 for (int c = 0; c < 3; ++c) {
1156 if (!isfinitef_(dst->vals[r][c])) {
1157 return false;
1158 }
1159 }
1160 return true;
1161}
1162
1163skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3* A, const skcms_Matrix3x3* B) {
1164 skcms_Matrix3x3 m = { { { 0,0,0 },{ 0,0,0 },{ 0,0,0 } } };
1165 for (int r = 0; r < 3; r++)
1166 for (int c = 0; c < 3; c++) {
1167 m.vals[r][c] = A->vals[r][0] * B->vals[0][c]
1168 + A->vals[r][1] * B->vals[1][c]
1169 + A->vals[r][2] * B->vals[2][c];
1170 }
1171 return m;
1172}
1173
1174skcms_Vector3 skcms_MV_mul(const skcms_Matrix3x3* m, const skcms_Vector3* v) {
1175 skcms_Vector3 dst = {{0,0,0}};
1176 for (int row = 0; row < 3; ++row) {
1177 dst.vals[row] = m->vals[row][0] * v->vals[0]
1178 + m->vals[row][1] * v->vals[1]
1179 + m->vals[row][2] * v->vals[2];
1180 }
1181 return dst;
1182}
1183
1184#if defined(__clang__) || defined(__GNUC__)
1185 #define small_memcpy __builtin_memcpy
1186#else
1187 #define small_memcpy memcpy
1188#endif
1189
1190float log2f_(float x) {
1191 // The first approximation of log2(x) is its exponent 'e', minus 127.
1192 int32_t bits;
1193 small_memcpy(&bits, &x, sizeof(bits));
1194
1195 float e = (float)bits * (1.0f / (1<<23));
1196
1197 // If we use the mantissa too we can refine the error signficantly.
1198 int32_t m_bits = (bits & 0x007fffff) | 0x3f000000;
1199 float m;
1200 small_memcpy(&m, &m_bits, sizeof(m));
1201
1202 return (e - 124.225514990f
1203 - 1.498030302f*m
1204 - 1.725879990f/(0.3520887068f + m));
1205}
1206
1207float exp2f_(float x) {
1208 float fract = x - floorf_(x);
1209
1210 float fbits = (1.0f * (1<<23)) * (x + 121.274057500f
1211 - 1.490129070f*fract
1212 + 27.728023300f/(4.84252568f - fract));
1213 if (fbits > INT_MAX) {
1214 return INFINITY_;
1215 } else if (fbits < INT_MIN) {
1216 return -INFINITY_;
1217 }
1218 int32_t bits = (int32_t)fbits;
1219 small_memcpy(&x, &bits, sizeof(x));
1220 return x;
1221}
1222
1223float powf_(float x, float y) {
1224 return (x == 0) || (x == 1) ? x
1225 : exp2f_(log2f_(x) * y);
1226}
1227
1228float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) {
1229 float sign = x < 0 ? -1.0f : 1.0f;
1230 x *= sign;
1231
1232 return sign * (x < tf->d ? tf->c * x + tf->f
1233 : powf_(tf->a * x + tf->b, tf->g) + tf->e);
1234}
1235
1236bool skcms_TransferFunction_isValid(const skcms_TransferFunction* tf) {
1237 // Reject obviously malformed inputs
1238 if (!isfinitef_(tf->a + tf->b + tf->c + tf->d + tf->e + tf->f + tf->g)) {
1239 return false;
1240 }
1241
1242 // All of these parameters should be non-negative
1243 if (tf->a < 0 || tf->c < 0 || tf->d < 0 || tf->g < 0) {
1244 return false;
1245 }
1246
1247 return true;
1248}
1249
1250// TODO: Adjust logic here? This still assumes that purely linear inputs will have D > 1, which
1251// we never generate. It also emits inverted linear using the same formulation. Standardize on
1252// G == 1 here, too?
1253bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) {
1254 // Original equation is: y = (ax + b)^g + e for x >= d
1255 // y = cx + f otherwise
1256 //
1257 // so 1st inverse is: (y - e)^(1/g) = ax + b
1258 // x = ((y - e)^(1/g) - b) / a
1259 //
1260 // which can be re-written as: x = (1/a)(y - e)^(1/g) - b/a
1261 // x = ((1/a)^g)^(1/g) * (y - e)^(1/g) - b/a
1262 // x = ([(1/a)^g]y + [-((1/a)^g)e]) ^ [1/g] + [-b/a]
1263 //
1264 // and 2nd inverse is: x = (y - f) / c
1265 // which can be re-written as: x = [1/c]y + [-f/c]
1266 //
1267 // and now both can be expressed in terms of the same parametric form as the
1268 // original - parameters are enclosed in square brackets.
1269 skcms_TransferFunction tf_inv = { 0, 0, 0, 0, 0, 0, 0 };
1270
1271 // This rejects obviously malformed inputs, as well as decreasing functions
1272 if (!skcms_TransferFunction_isValid(src)) {
1273 return false;
1274 }
1275
1276 // There are additional constraints to be invertible
1277 bool has_nonlinear = (src->d <= 1);
1278 bool has_linear = (src->d > 0);
1279
1280 // Is the linear section not invertible?
1281 if (has_linear && src->c == 0) {
1282 return false;
1283 }
1284
1285 // Is the nonlinear section not invertible?
1286 if (has_nonlinear && (src->a == 0 || src->g == 0)) {
1287 return false;
1288 }
1289
1290 // If both segments are present, they need to line up
1291 if (has_linear && has_nonlinear) {
1292 float l_at_d = src->c * src->d + src->f;
1293 float n_at_d = powf_(src->a * src->d + src->b, src->g) + src->e;
1294 if (fabsf_(l_at_d - n_at_d) > (1 / 512.0f)) {
1295 return false;
1296 }
1297 }
1298
1299 // Invert linear segment
1300 if (has_linear) {
1301 tf_inv.c = 1.0f / src->c;
1302 tf_inv.f = -src->f / src->c;
1303 }
1304
1305 // Invert nonlinear segment
1306 if (has_nonlinear) {
1307 tf_inv.g = 1.0f / src->g;
1308 tf_inv.a = powf_(1.0f / src->a, src->g);
1309 tf_inv.b = -tf_inv.a * src->e;
1310 tf_inv.e = -src->b / src->a;
1311 }
1312
1313 if (!has_linear) {
1314 tf_inv.d = 0;
1315 } else if (!has_nonlinear) {
1316 // Any value larger than 1 works
1317 tf_inv.d = 2.0f;
1318 } else {
1319 tf_inv.d = src->c * src->d + src->f;
1320 }
1321
1322 *dst = tf_inv;
1323 return true;
1324}
1325
1326// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
1327
1328// From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}:
1329//
1330// tf(x) = cx + f x < d
1331// tf(x) = (ax + b)^g + e x ≥ d
1332//
1333// When fitting, we add the additional constraint that both pieces meet at d:
1334//
1335// cd + f = (ad + b)^g + e
1336//
1337// Solving for e and folding it through gives an alternate formulation of the non-linear piece:
1338//
1339// tf(x) = cx + f x < d
1340// tf(x) = (ax + b)^g - (ad + b)^g + cd + f x ≥ d
1341//
1342// Our overall strategy is then:
1343// For a couple tolerances,
1344// - skcms_fit_linear(): fit c,d,f iteratively to as many points as our tolerance allows
1345// - invert c,d,f
1346// - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f
1347// (and by constraint, inverted e) to the inverse of the table.
1348// Return the parameters with least maximum error.
1349//
1350// To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals
1351// of round-trip f_inv(x), the inverse of the non-linear piece of f(x).
1352//
1353// let y = Table(x)
1354// r(x) = x - f_inv(y)
1355//
1356// ∂r/∂g = ln(ay + b)*(ay + b)^g
1357// - ln(ad + b)*(ad + b)^g
1358// ∂r/∂a = yg(ay + b)^(g-1)
1359// - dg(ad + b)^(g-1)
1360// ∂r/∂b = g(ay + b)^(g-1)
1361// - g(ad + b)^(g-1)
1362
1363// Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P,
1364// and fill out the gradient of the residual into dfdP.
1365static float rg_nonlinear(float x,
1366 const skcms_Curve* curve,
1367 const skcms_TransferFunction* tf,
1368 const float P[3],
1369 float dfdP[3]) {
1370 const float y = skcms_eval_curve(curve, x);
1371
1372 const float g = P[0], a = P[1], b = P[2],
1373 c = tf->c, d = tf->d, f = tf->f;
1374
1375 const float Y = fmaxf_(a*y + b, 0.0f),
1376 D = a*d + b;
1377 assert (D >= 0);
1378
1379 // The gradient.
1380 dfdP[0] = 0.69314718f*log2f_(Y)*powf_(Y, g)
1381 - 0.69314718f*log2f_(D)*powf_(D, g);
1382 dfdP[1] = y*g*powf_(Y, g-1)
1383 - d*g*powf_(D, g-1);
1384 dfdP[2] = g*powf_(Y, g-1)
1385 - g*powf_(D, g-1);
1386
1387 // The residual.
1388 const float f_inv = powf_(Y, g)
1389 - powf_(D, g)
1390 + c*d + f;
1391 return x - f_inv;
1392}
1393
1394int skcms_fit_linear(const skcms_Curve* curve, int N, float tol, float* c, float* d, float* f) {
1395 assert(N > 1);
1396 // We iteratively fit the first points to the TF's linear piece.
1397 // We want the cx + f line to pass through the first and last points we fit exactly.
1398 //
1399 // As we walk along the points we find the minimum and maximum slope of the line before the
1400 // error would exceed our tolerance. We stop when the range [slope_min, slope_max] becomes
1401 // emtpy, when we definitely can't add any more points.
1402 //
1403 // Some points' error intervals may intersect the running interval but not lie fully
1404 // within it. So we keep track of the last point we saw that is a valid end point candidate,
1405 // and once the search is done, back up to build the line through *that* point.
1406 const float dx = 1.0f / (N - 1);
1407
1408 int lin_points = 1;
1409 *f = skcms_eval_curve(curve, 0);
1410
1411 float slope_min = -INFINITY_;
1412 float slope_max = +INFINITY_;
1413 for (int i = 1; i < N; ++i) {
1414 float x = i * dx;
1415 float y = skcms_eval_curve(curve, x);
1416
1417 float slope_max_i = (y + tol - *f) / x,
1418 slope_min_i = (y - tol - *f) / x;
1419 if (slope_max_i < slope_min || slope_max < slope_min_i) {
1420 // Slope intervals would no longer overlap.
1421 break;
1422 }
1423 slope_max = fminf_(slope_max, slope_max_i);
1424 slope_min = fmaxf_(slope_min, slope_min_i);
1425
1426 float cur_slope = (y - *f) / x;
1427 if (slope_min <= cur_slope && cur_slope <= slope_max) {
1428 lin_points = i + 1;
1429 *c = cur_slope;
1430 }
1431 }
1432
1433 // Set D to the last point that met our tolerance.
1434 *d = (lin_points - 1) * dx;
1435 return lin_points;
1436}
1437
1438static bool gauss_newton_step(const skcms_Curve* curve,
1439 const skcms_TransferFunction* tf,
1440 float P[3],
1441 float x0, float dx, int N) {
1442 // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
1443 //
1444 // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
1445 // where r(P) is the residual vector
1446 // and Jf is the Jacobian matrix of f(), ∂r/∂P.
1447 //
1448 // Let's review the shape of each of these expressions:
1449 // r(P) is [N x 1], a column vector with one entry per value of x tested
1450 // Jf is [N x 3], a matrix with an entry for each (x,P) pair
1451 // Jf^T is [3 x N], the transpose of Jf
1452 //
1453 // Jf^T Jf is [3 x N] * [N x 3] == [3 x 3], a 3x3 matrix,
1454 // and so is its inverse (Jf^T Jf)^-1
1455 // Jf^T r(P) is [3 x N] * [N x 1] == [3 x 1], a column vector with the same shape as P
1456 //
1457 // Our implementation strategy to get to the final ∆P is
1458 // 1) evaluate Jf^T Jf, call that lhs
1459 // 2) evaluate Jf^T r(P), call that rhs
1460 // 3) invert lhs
1461 // 4) multiply inverse lhs by rhs
1462 //
1463 // This is a friendly implementation strategy because we don't have to have any
1464 // buffers that scale with N, and equally nice don't have to perform any matrix
1465 // operations that are variable size.
1466 //
1467 // Other implementation strategies could trade this off, e.g. evaluating the
1468 // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
1469 // the residuals. That would probably require implementing singular value
1470 // decomposition, and would create a [3 x N] matrix to be multiplied by the
1471 // [N x 1] residual vector, but on the upside I think that'd eliminate the
1472 // possibility of this gauss_newton_step() function ever failing.
1473
1474 // 0) start off with lhs and rhs safely zeroed.
1475 skcms_Matrix3x3 lhs = {{ {0,0,0}, {0,0,0}, {0,0,0} }};
1476 skcms_Vector3 rhs = { {0,0,0} };
1477
1478 // 1,2) evaluate lhs and evaluate rhs
1479 // We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
1480 // so we'll have to update lhs and rhs at the same time.
1481 for (int i = 0; i < N; i++) {
1482 float x = x0 + i*dx;
1483
1484 float dfdP[3] = {0,0,0};
1485 float resid = rg_nonlinear(x,curve,tf,P, dfdP);
1486
1487 for (int r = 0; r < 3; r++) {
1488 for (int c = 0; c < 3; c++) {
1489 lhs.vals[r][c] += dfdP[r] * dfdP[c];
1490 }
1491 rhs.vals[r] += dfdP[r] * resid;
1492 }
1493 }
1494
1495 // If any of the 3 P parameters are unused, this matrix will be singular.
1496 // Detect those cases and fix them up to indentity instead, so we can invert.
1497 for (int k = 0; k < 3; k++) {
1498 if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 &&
1499 lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0) {
1500 lhs.vals[k][k] = 1;
1501 }
1502 }
1503
1504 // 3) invert lhs
1505 skcms_Matrix3x3 lhs_inv;
1506 if (!skcms_Matrix3x3_invert(&lhs, &lhs_inv)) {
1507 return false;
1508 }
1509
1510 // 4) multiply inverse lhs by rhs
1511 skcms_Vector3 dP = skcms_MV_mul(&lhs_inv, &rhs);
1512 P[0] += dP.vals[0];
1513 P[1] += dP.vals[1];
1514 P[2] += dP.vals[2];
1515 return isfinitef_(P[0]) && isfinitef_(P[1]) && isfinitef_(P[2]);
1516}
1517
1518
1519// Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't.
1520static bool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) {
1521 float P[3] = { tf->g, tf->a, tf->b };
1522
1523 // No matter where we start, dx should always represent N even steps from 0 to 1.
1524 const float dx = 1.0f / (N-1);
1525
1526 for (int j = 0; j < 3/*TODO: tune*/; j++) {
1527 // These extra constraints a >= 0 and ad+b >= 0 are not modeled in the optimization.
1528 // We don't really know how to fix up a if it goes negative.
1529 if (P[1] < 0) {
1530 return false;
1531 }
1532 // If ad+b goes negative, we feel just barely not uneasy enough to tweak b so ad+b is zero.
1533 if (P[1] * tf->d + P[2] < 0) {
1534 P[2] = -P[1] * tf->d;
1535 }
1536 assert (P[1] >= 0 &&
1537 P[1] * tf->d + P[2] >= 0);
1538
1539 if (!gauss_newton_step(curve, tf,
1540 P,
1541 L*dx, dx, N-L)) {
1542 return false;
1543 }
1544 }
1545
1546 // We need to apply our fixups one last time
1547 if (P[1] < 0) {
1548 return false;
1549 }
1550 if (P[1] * tf->d + P[2] < 0) {
1551 P[2] = -P[1] * tf->d;
1552 }
1553
1554 tf->g = P[0];
1555 tf->a = P[1];
1556 tf->b = P[2];
1557 tf->e = tf->c*tf->d + tf->f
1558 - powf_(tf->a*tf->d + tf->b, tf->g);
1559 return true;
1560}
1561
1562bool skcms_ApproximateCurve(const skcms_Curve* curve,
1563 skcms_TransferFunction* approx,
1564 float* max_error) {
1565 if (!curve || !approx || !max_error) {
1566 return false;
1567 }
1568
1569 if (curve->table_entries == 0) {
1570 // No point approximating an skcms_TransferFunction with an skcms_TransferFunction!
1571 return false;
1572 }
1573
1574 if (curve->table_entries == 1 || curve->table_entries > (uint32_t)INT_MAX) {
1575 // We need at least two points, and must put some reasonable cap on the maximum number.
1576 return false;
1577 }
1578
1579 int N = (int)curve->table_entries;
1580 const float dx = 1.0f / (N - 1);
1581
1582 *max_error = INFINITY_;
1583 const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
1584 for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) {
1585 skcms_TransferFunction tf,
1586 tf_inv;
1587 int L = skcms_fit_linear(curve, N, kTolerances[t], &tf.c, &tf.d, &tf.f);
1588
1589 if (L == N) {
1590 // If the entire data set was linear, move the coefficients to the nonlinear portion
1591 // with G == 1. This lets use a canonical representation with d == 0.
1592 tf.g = 1;
1593 tf.a = tf.c;
1594 tf.b = tf.f;
1595 tf.c = tf.d = tf.e = tf.f = 0;
1596 } else if (L == N - 1) {
1597 // Degenerate case with only two points in the nonlinear segment. Solve directly.
1598 tf.g = 1;
1599 tf.a = (skcms_eval_curve(curve, (N-1)*dx) -
1600 skcms_eval_curve(curve, (N-2)*dx))
1601 / dx;
1602 tf.b = skcms_eval_curve(curve, (N-2)*dx)
1603 - tf.a * (N-2)*dx;
1604 tf.e = 0;
1605 } else {
1606 // Start by guessing a gamma-only curve through the midpoint.
1607 int mid = (L + N) / 2;
1608 float mid_x = mid / (N - 1.0f);
1609 float mid_y = skcms_eval_curve(curve, mid_x);
1610 tf.g = log2f_(mid_y) / log2f_(mid_x);;
1611 tf.a = 1;
1612 tf.b = 0;
1613 tf.e = tf.c*tf.d + tf.f
1614 - powf_(tf.a*tf.d + tf.b, tf.g);
1615
1616
1617 if (!skcms_TransferFunction_invert(&tf, &tf_inv) ||
1618 !fit_nonlinear(curve, L,N, &tf_inv)) {
1619 continue;
1620 }
1621
1622 // We fit tf_inv, so calculate tf to keep in sync.
1623 if (!skcms_TransferFunction_invert(&tf_inv, &tf)) {
1624 continue;
1625 }
1626 }
1627
1628 // We find our error by roundtripping the table through tf_inv.
1629 //
1630 // (The most likely use case for this approximation is to be inverted and
1631 // used as the transfer function for a destination color space.)
1632 //
1633 // We've kept tf and tf_inv in sync above, but we can't guarantee that tf is
1634 // invertible, so re-verify that here (and use the new inverse for testing).
1635 if (!skcms_TransferFunction_invert(&tf, &tf_inv)) {
1636 continue;
1637 }
1638
1639 float err = skcms_MaxRoundtripError(curve, &tf_inv);
1640 if (*max_error > err) {
1641 *max_error = err;
1642 *approx = tf;
1643 }
1644 }
1645 return isfinitef_(*max_error);
1646}
1647
1648// Without this wasm would try to use the N=4 128-bit vector code path,
1649// which while ideal, causes tons of compiler problems. This would be
1650// a good thing to revisit as emcc matures (currently 1.38.5).
1651#if 1 && defined(__EMSCRIPTEN_major__)
1652 #if !defined(SKCMS_PORTABLE)
1653 #define SKCMS_PORTABLE
1654 #endif
1655#endif
1656
1657extern bool g_skcms_dump_profile;
1658bool g_skcms_dump_profile = false;
1659
1660#if !defined(NDEBUG) && defined(__clang__)
1661 // Basic profiling tools to time each Op. Not at all thread safe.
1662
1663 #include <stdio.h>
1664 #include <stdlib.h>
1665
1666 #if defined(__arm__) || defined(__aarch64__)
1667 #include <time.h>
1668 static const char* now_units = "ticks";
1669 static uint64_t now() { return (uint64_t)clock(); }
1670 #else
1671 static const char* now_units = "cycles";
1672 static uint64_t now() { return __builtin_readcyclecounter(); }
1673 #endif
1674
1675 #define M(op) +1
1676 static uint64_t counts[FOREACH_Op(M)];
1677 #undef M
1678
1679 static void profile_dump_stats() {
1680 #define M(op) #op,
1681 static const char* names[] = { FOREACH_Op(M) };
1682 #undef M
1683 for (int i = 0; i < ARRAY_COUNT(counts); i++) {
1684 if (counts[i]) {
1685 fprintf(stderr, "%16s: %12llu %s\n",
1686 names[i], (unsigned long long)counts[i], now_units);
1687 }
1688 }
1689 }
1690
1691 static inline Op profile_next_op(Op op) {
1692 if (__builtin_expect(g_skcms_dump_profile, false)) {
1693 static uint64_t start = 0;
1694 static uint64_t* current = NULL;
1695
1696 if (!current) {
1697 atexit(profile_dump_stats);
1698 } else {
1699 *current += now() - start;
1700 }
1701
1702 current = &counts[op];
1703 start = now();
1704 }
1705 return op;
1706 }
1707#else
1708 static inline Op profile_next_op(Op op) {
1709 (void)g_skcms_dump_profile;
1710 return op;
1711 }
1712#endif
1713
1714#if defined(__clang__)
1715 typedef float __attribute__((ext_vector_type(4))) Fx4;
1716 typedef int32_t __attribute__((ext_vector_type(4))) I32x4;
1717 typedef uint64_t __attribute__((ext_vector_type(4))) U64x4;
1718 typedef uint32_t __attribute__((ext_vector_type(4))) U32x4;
1719 typedef uint16_t __attribute__((ext_vector_type(4))) U16x4;
1720 typedef uint8_t __attribute__((ext_vector_type(4))) U8x4;
1721
1722 typedef float __attribute__((ext_vector_type(8))) Fx8;
1723 typedef int32_t __attribute__((ext_vector_type(8))) I32x8;
1724 typedef uint64_t __attribute__((ext_vector_type(8))) U64x8;
1725 typedef uint32_t __attribute__((ext_vector_type(8))) U32x8;
1726 typedef uint16_t __attribute__((ext_vector_type(8))) U16x8;
1727 typedef uint8_t __attribute__((ext_vector_type(8))) U8x8;
1728
1729 typedef float __attribute__((ext_vector_type(16))) Fx16;
1730 typedef int32_t __attribute__((ext_vector_type(16))) I32x16;
1731 typedef uint64_t __attribute__((ext_vector_type(16))) U64x16;
1732 typedef uint32_t __attribute__((ext_vector_type(16))) U32x16;
1733 typedef uint16_t __attribute__((ext_vector_type(16))) U16x16;
1734 typedef uint8_t __attribute__((ext_vector_type(16))) U8x16;
1735#elif defined(__GNUC__)
1736 typedef float __attribute__((vector_size(16))) Fx4;
1737 typedef int32_t __attribute__((vector_size(16))) I32x4;
1738 typedef uint64_t __attribute__((vector_size(32))) U64x4;
1739 typedef uint32_t __attribute__((vector_size(16))) U32x4;
1740 typedef uint16_t __attribute__((vector_size( 8))) U16x4;
1741 typedef uint8_t __attribute__((vector_size( 4))) U8x4;
1742
1743 typedef float __attribute__((vector_size(32))) Fx8;
1744 typedef int32_t __attribute__((vector_size(32))) I32x8;
1745 typedef uint64_t __attribute__((vector_size(64))) U64x8;
1746 typedef uint32_t __attribute__((vector_size(32))) U32x8;
1747 typedef uint16_t __attribute__((vector_size(16))) U16x8;
1748 typedef uint8_t __attribute__((vector_size( 8))) U8x8;
1749
1750 typedef float __attribute__((vector_size( 64))) Fx16;
1751 typedef int32_t __attribute__((vector_size( 64))) I32x16;
1752 typedef uint64_t __attribute__((vector_size(128))) U64x16;
1753 typedef uint32_t __attribute__((vector_size( 64))) U32x16;
1754 typedef uint16_t __attribute__((vector_size( 32))) U16x16;
1755 typedef uint8_t __attribute__((vector_size( 16))) U8x16;
1756#endif
1757
1758// First, instantiate our default exec_ops() implementation using the default compiliation target.
1759
1760#if defined(SKCMS_PORTABLE) || !(defined(__clang__) || defined(__GNUC__))
1761 #define N 1
1762
1763 #define F float
1764 #define U64 uint64_t
1765 #define U32 uint32_t
1766 #define I32 int32_t
1767 #define U16 uint16_t
1768 #define U8 uint8_t
1769
1770 #define F0 0.0f
1771 #define F1 1.0f
1772
1773#elif defined(__AVX512F__)
1774 #define N 16
1775
1776 #define F Fx16
1777 #define U64 U64x16
1778 #define U32 U32x16
1779 #define I32 I32x16
1780 #define U16 U16x16
1781 #define U8 U8x16
1782
1783 #define F0 (F){0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0}
1784 #define F1 (F){1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1}
1785#elif defined(__AVX__)
1786 #define N 8
1787
1788 #define F Fx8
1789 #define U64 U64x8
1790 #define U32 U32x8
1791 #define I32 I32x8
1792 #define U16 U16x8
1793 #define U8 U8x8
1794
1795 #define F0 (F){0,0,0,0, 0,0,0,0}
1796 #define F1 (F){1,1,1,1, 1,1,1,1}
1797#else
1798 #define N 4
1799
1800 #define F Fx4
1801 #define U64 U64x4
1802 #define U32 U32x4
1803 #define I32 I32x4
1804 #define U16 U16x4
1805 #define U8 U8x4
1806
1807 #define F0 (F){0,0,0,0}
1808 #define F1 (F){1,1,1,1}
1809#endif
1810
1811#define NS(id) id
1812#define ATTR
1813 #include "src/Transform_inl.h"
1814#undef N
1815#undef F
1816#undef U64
1817#undef U32
1818#undef I32
1819#undef U16
1820#undef U8
1821#undef F0
1822#undef F1
1823#undef NS
1824#undef ATTR
1825
1826// Now, instantiate any other versions of run_program() we may want for runtime detection.
1827#if !defined(SKCMS_PORTABLE) && (defined(__clang__) || defined(__GNUC__)) \
1828 && defined(__x86_64__) && !defined(__AVX2__)
1829 #define N 8
1830 #define F Fx8
1831 #define U64 U64x8
1832 #define U32 U32x8
1833 #define I32 I32x8
1834 #define U16 U16x8
1835 #define U8 U8x8
1836 #define F0 (F){0,0,0,0, 0,0,0,0}
1837 #define F1 (F){1,1,1,1, 1,1,1,1}
1838
1839 #define NS(id) id ## _hsw
1840 #define ATTR __attribute__((target("avx2,f16c")))
1841
1842 // We check these guards to see if we have support for these features.
1843 // They're likely _not_ defined here in our baseline build config.
1844 #ifndef __AVX__
1845 #define __AVX__ 1
1846 #define UNDEF_AVX
1847 #endif
1848 #ifndef __F16C__
1849 #define __F16C__ 1
1850 #define UNDEF_F16C
1851 #endif
1852 #ifndef __AVX2__
1853 #define __AVX2__ 1
1854 #define UNDEF_AVX2
1855 #endif
1856
1857 #include "src/Transform_inl.h"
1858
1859 #undef N
1860 #undef F
1861 #undef U64
1862 #undef U32
1863 #undef I32
1864 #undef U16
1865 #undef U8
1866 #undef F0
1867 #undef F1
1868 #undef NS
1869 #undef ATTR
1870
1871 #ifdef UNDEF_AVX
1872 #undef __AVX__
1873 #undef UNDEF_AVX
1874 #endif
1875 #ifdef UNDEF_F16C
1876 #undef __F16C__
1877 #undef UNDEF_F16C
1878 #endif
1879 #ifdef UNDEF_AVX2
1880 #undef __AVX2__
1881 #undef UNDEF_AVX2
1882 #endif
1883
1884 #define TEST_FOR_HSW
1885
1886 static bool hsw_ok_ = false;
1887 static void check_hsw_ok() {
1888 // See http://www.sandpile.org/x86/cpuid.htm
1889
1890 // First, a basic cpuid(1).
1891 uint32_t eax, ebx, ecx, edx;
1892 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
1893 : "0"(1), "2"(0));
1894
1895 // Sanity check for prerequisites.
1896 if ((edx & (1<<25)) != (1<<25)) { return; } // SSE
1897 if ((edx & (1<<26)) != (1<<26)) { return; } // SSE2
1898 if ((ecx & (1<< 0)) != (1<< 0)) { return; } // SSE3
1899 if ((ecx & (1<< 9)) != (1<< 9)) { return; } // SSSE3
1900 if ((ecx & (1<<19)) != (1<<19)) { return; } // SSE4.1
1901 if ((ecx & (1<<20)) != (1<<20)) { return; } // SSE4.2
1902
1903 if ((ecx & (3<<26)) != (3<<26)) { return; } // XSAVE + OSXSAVE
1904
1905 {
1906 uint32_t eax_xgetbv, edx_xgetbv;
1907 __asm__ __volatile__("xgetbv" : "=a"(eax_xgetbv), "=d"(edx_xgetbv) : "c"(0));
1908 if ((eax_xgetbv & (3<<1)) != (3<<1)) { return; } // XMM+YMM state saved?
1909 }
1910
1911 if ((ecx & (1<<28)) != (1<<28)) { return; } // AVX
1912 if ((ecx & (1<<29)) != (1<<29)) { return; } // F16C
1913 if ((ecx & (1<<12)) != (1<<12)) { return; } // FMA (TODO: not currently used)
1914
1915 // Call cpuid(7) to check for our final AVX2 feature bit!
1916 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
1917 : "0"(7), "2"(0));
1918 if ((ebx & (1<< 5)) != (1<< 5)) { return; } // AVX2
1919
1920 hsw_ok_ = true;
1921 }
1922
1923 #if defined(_MSC_VER)
1924 #include <Windows.h>
1925 INIT_ONCE check_hsw_ok_once = INIT_ONCE_STATIC_INIT;
1926
1927 static BOOL check_hsw_ok_InitOnce_wrapper(INIT_ONCE* once, void* param, void** ctx) {
1928 (void)once;
1929 (void)param;
1930 (void)ctx;
1931 check_hsw_ok();
1932 return TRUE;
1933 }
1934
1935 static bool hsw_ok() {
1936 InitOnceExecuteOnce(&check_hsw_ok_once, check_hsw_ok_InitOnce_wrapper, NULL, NULL);
1937 return hsw_ok_;
1938 }
1939 #else
1940 #include <pthread.h>
1941 static pthread_once_t check_hsw_ok_once = PTHREAD_ONCE_INIT;
1942
1943 static bool hsw_ok() {
1944 pthread_once(&check_hsw_ok_once, check_hsw_ok);
1945 return hsw_ok_;
1946 }
1947 #endif
1948
1949#endif
1950
1951static bool is_identity_tf(const skcms_TransferFunction* tf) {
1952 return tf->g == 1 && tf->a == 1
1953 && tf->b == 0 && tf->c == 0 && tf->d == 0 && tf->e == 0 && tf->f == 0;
1954}
1955
1956typedef struct {
1957 Op op;
1958 const void* arg;
1959} OpAndArg;
1960
1961static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
1962 static const struct { Op parametric, table_8, table_16; } ops[] = {
1963 { Op_tf_r, Op_table_8_r, Op_table_16_r },
1964 { Op_tf_g, Op_table_8_g, Op_table_16_g },
1965 { Op_tf_b, Op_table_8_b, Op_table_16_b },
1966 { Op_tf_a, Op_table_8_a, Op_table_16_a },
1967 };
1968
1969 if (curve->table_entries == 0) {
1970 return is_identity_tf(&curve->parametric)
1971 ? (OpAndArg){ Op_noop, NULL }
1972 : (OpAndArg){ ops[channel].parametric, &curve->parametric };
1973 } else if (curve->table_8) {
1974 return (OpAndArg){ ops[channel].table_8, curve };
1975 } else if (curve->table_16) {
1976 return (OpAndArg){ ops[channel].table_16, curve };
1977 }
1978
1979 assert(false);
1980 return (OpAndArg){Op_noop,NULL};
1981}
1982
1983static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
1984 switch (fmt >> 1) { // ignore rgb/bgr
1985 case skcms_PixelFormat_A_8 >> 1: return 1;
1986 case skcms_PixelFormat_G_8 >> 1: return 1;
1987 case skcms_PixelFormat_ABGR_4444 >> 1: return 2;
1988 case skcms_PixelFormat_RGB_565 >> 1: return 2;
1989 case skcms_PixelFormat_RGB_888 >> 1: return 3;
1990 case skcms_PixelFormat_RGBA_8888 >> 1: return 4;
1991 case skcms_PixelFormat_RGBA_1010102 >> 1: return 4;
1992 case skcms_PixelFormat_RGB_161616 >> 1: return 6;
1993 case skcms_PixelFormat_RGBA_16161616 >> 1: return 8;
1994 case skcms_PixelFormat_RGB_hhh >> 1: return 6;
1995 case skcms_PixelFormat_RGBA_hhhh >> 1: return 8;
1996 case skcms_PixelFormat_RGB_fff >> 1: return 12;
1997 case skcms_PixelFormat_RGBA_ffff >> 1: return 16;
1998 }
1999 assert(false);
2000 return 0;
2001}
2002
2003static bool prep_for_destination(const skcms_ICCProfile* profile,
2004 skcms_Matrix3x3* fromXYZD50,
2005 skcms_TransferFunction* invR,
2006 skcms_TransferFunction* invG,
2007 skcms_TransferFunction* invB) {
2008 // We only support destinations with parametric transfer functions
2009 // and with gamuts that can be transformed from XYZD50.
2010 return profile->has_trc
2011 && profile->has_toXYZD50
2012 && profile->trc[0].table_entries == 0
2013 && profile->trc[1].table_entries == 0
2014 && profile->trc[2].table_entries == 0
2015 && skcms_TransferFunction_invert(&profile->trc[0].parametric, invR)
2016 && skcms_TransferFunction_invert(&profile->trc[1].parametric, invG)
2017 && skcms_TransferFunction_invert(&profile->trc[2].parametric, invB)
2018 && skcms_Matrix3x3_invert(&profile->toXYZD50, fromXYZD50);
2019}
2020
2021bool skcms_Transform(const void* src,
2022 skcms_PixelFormat srcFmt,
2023 skcms_AlphaFormat srcAlpha,
2024 const skcms_ICCProfile* srcProfile,
2025 void* dst,
2026 skcms_PixelFormat dstFmt,
2027 skcms_AlphaFormat dstAlpha,
2028 const skcms_ICCProfile* dstProfile,
2029 size_t nz) {
2030 const size_t dst_bpp = bytes_per_pixel(dstFmt),
2031 src_bpp = bytes_per_pixel(srcFmt);
2032 // Let's just refuse if the request is absurdly big.
2033 if (nz * dst_bpp > INT_MAX || nz * src_bpp > INT_MAX) {
2034 return false;
2035 }
2036 int n = (int)nz;
2037
2038 // Null profiles default to sRGB. Passing null for both is handy when doing format conversion.
2039 if (!srcProfile) {
2040 srcProfile = skcms_sRGB_profile();
2041 }
2042 if (!dstProfile) {
2043 dstProfile = skcms_sRGB_profile();
2044 }
2045
2046 // We can't transform in place unless the PixelFormats are the same size.
2047 if (dst == src && (dstFmt >> 1) != (srcFmt >> 1)) {
2048 return false;
2049 }
2050 // TODO: this check lazilly disallows U16 <-> F16, but that would actually be fine.
2051 // TODO: more careful alias rejection (like, dst == src + 1)?
2052
2053 Op program [32];
2054 const void* arguments[32];
2055
2056 Op* ops = program;
2057 const void** args = arguments;
2058
2059 skcms_TransferFunction inv_dst_tf_r, inv_dst_tf_g, inv_dst_tf_b;
2060 skcms_Matrix3x3 from_xyz;
2061
2062 switch (srcFmt >> 1) {
2063 default: return false;
2064 case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_load_a8; break;
2065 case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_load_g8; break;
2066 case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_load_4444; break;
2067 case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_load_565; break;
2068 case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_load_888; break;
2069 case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_load_8888; break;
2070 case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_load_1010102; break;
2071 case skcms_PixelFormat_RGB_161616 >> 1: *ops++ = Op_load_161616; break;
2072 case skcms_PixelFormat_RGBA_16161616 >> 1: *ops++ = Op_load_16161616; break;
2073 case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_load_hhh; break;
2074 case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_load_hhhh; break;
2075 case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_load_fff; break;
2076 case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_load_ffff; break;
2077 }
2078 if (srcFmt & 1) {
2079 *ops++ = Op_swap_rb;
2080 }
2081 skcms_ICCProfile gray_dst_profile;
2082 if ((dstFmt >> 1) == (skcms_PixelFormat_G_8 >> 1)) {
2083 // When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform
2084 // luminance (Y) by the destination transfer function.
2085 gray_dst_profile = *dstProfile;
2086 skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50);
2087 dstProfile = &gray_dst_profile;
2088 }
2089
2090 if (srcProfile->data_color_space == skcms_Signature_CMYK) {
2091 // Photoshop creates CMYK images as inverse CMYK.
2092 // These happen to be the only ones we've _ever_ seen.
2093 *ops++ = Op_invert;
2094 // With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K.
2095 srcAlpha = skcms_AlphaFormat_Unpremul;
2096 }
2097
2098 if (srcAlpha == skcms_AlphaFormat_Opaque) {
2099 *ops++ = Op_force_opaque;
2100 } else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2101 *ops++ = Op_unpremul;
2102 }
2103
2104 // TODO: We can skip this work if both srcAlpha and dstAlpha are PremulLinear, and the profiles
2105 // are the same. Also, if dstAlpha is PremulLinear, and SrcAlpha is Opaque.
2106 if (dstProfile != srcProfile ||
2107 srcAlpha == skcms_AlphaFormat_PremulLinear ||
2108 dstAlpha == skcms_AlphaFormat_PremulLinear) {
2109
2110 if (!prep_for_destination(dstProfile,
2111 &from_xyz, &inv_dst_tf_r, &inv_dst_tf_b, &inv_dst_tf_g)) {
2112 return false;
2113 }
2114
2115 if (srcProfile->has_A2B) {
2116 if (srcProfile->A2B.input_channels) {
2117 for (int i = 0; i < (int)srcProfile->A2B.input_channels; i++) {
2118 OpAndArg oa = select_curve_op(&srcProfile->A2B.input_curves[i], i);
2119 if (oa.op != Op_noop) {
2120 *ops++ = oa.op;
2121 *args++ = oa.arg;
2122 }
2123 }
2124 switch (srcProfile->A2B.input_channels) {
2125 case 3: *ops++ = srcProfile->A2B.grid_8 ? Op_clut_3D_8 : Op_clut_3D_16; break;
2126 case 4: *ops++ = srcProfile->A2B.grid_8 ? Op_clut_4D_8 : Op_clut_4D_16; break;
2127 default: return false;
2128 }
2129 *args++ = &srcProfile->A2B;
2130 }
2131
2132 if (srcProfile->A2B.matrix_channels == 3) {
2133 for (int i = 0; i < 3; i++) {
2134 OpAndArg oa = select_curve_op(&srcProfile->A2B.matrix_curves[i], i);
2135 if (oa.op != Op_noop) {
2136 *ops++ = oa.op;
2137 *args++ = oa.arg;
2138 }
2139 }
2140
2141 static const skcms_Matrix3x4 I = {{
2142 {1,0,0,0},
2143 {0,1,0,0},
2144 {0,0,1,0},
2145 }};
2146 if (0 != memcmp(&I, &srcProfile->A2B.matrix, sizeof(I))) {
2147 *ops++ = Op_matrix_3x4;
2148 *args++ = &srcProfile->A2B.matrix;
2149 }
2150 }
2151
2152 if (srcProfile->A2B.output_channels == 3) {
2153 for (int i = 0; i < 3; i++) {
2154 OpAndArg oa = select_curve_op(&srcProfile->A2B.output_curves[i], i);
2155 if (oa.op != Op_noop) {
2156 *ops++ = oa.op;
2157 *args++ = oa.arg;
2158 }
2159 }
2160 }
2161
2162 if (srcProfile->pcs == skcms_Signature_Lab) {
2163 *ops++ = Op_lab_to_xyz;
2164 }
2165
2166 } else if (srcProfile->has_trc && srcProfile->has_toXYZD50) {
2167 for (int i = 0; i < 3; i++) {
2168 OpAndArg oa = select_curve_op(&srcProfile->trc[i], i);
2169 if (oa.op != Op_noop) {
2170 *ops++ = oa.op;
2171 *args++ = oa.arg;
2172 }
2173 }
2174 } else {
2175 return false;
2176 }
2177
2178 // At this point our source colors are linear, either RGB (XYZ-type profiles)
2179 // or XYZ (A2B-type profiles). Unpremul is a linear operation (multiply by a
2180 // constant 1/a), so either way we can do it now if needed.
2181 if (srcAlpha == skcms_AlphaFormat_PremulLinear) {
2182 *ops++ = Op_unpremul;
2183 }
2184
2185 // A2B sources should already be in XYZD50 at this point.
2186 // Others still need to be transformed using their toXYZD50 matrix.
2187 // N.B. There are profiles that contain both A2B tags and toXYZD50 matrices.
2188 // If we use the A2B tags, we need to ignore the XYZD50 matrix entirely.
2189 assert (srcProfile->has_A2B || srcProfile->has_toXYZD50);
2190 static const skcms_Matrix3x3 I = {{
2191 { 1.0f, 0.0f, 0.0f },
2192 { 0.0f, 1.0f, 0.0f },
2193 { 0.0f, 0.0f, 1.0f },
2194 }};
2195 const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50;
2196
2197 // There's a chance the source and destination gamuts are identical,
2198 // in which case we can skip the gamut transform.
2199 if (0 != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) {
2200 // Concat the entire gamut transform into from_xyz,
2201 // now slightly misnamed but it's a handy spot to stash the result.
2202 from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz);
2203 *ops++ = Op_matrix_3x3;
2204 *args++ = &from_xyz;
2205 }
2206
2207 if (dstAlpha == skcms_AlphaFormat_PremulLinear) {
2208 *ops++ = Op_premul;
2209 }
2210
2211 // Encode back to dst RGB using its parametric transfer functions.
2212 if (!is_identity_tf(&inv_dst_tf_r)) { *ops++ = Op_tf_r; *args++ = &inv_dst_tf_r; }
2213 if (!is_identity_tf(&inv_dst_tf_g)) { *ops++ = Op_tf_g; *args++ = &inv_dst_tf_g; }
2214 if (!is_identity_tf(&inv_dst_tf_b)) { *ops++ = Op_tf_b; *args++ = &inv_dst_tf_b; }
2215 }
2216
2217 if (dstAlpha == skcms_AlphaFormat_Opaque) {
2218 *ops++ = Op_force_opaque;
2219 } else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2220 *ops++ = Op_premul;
2221 }
2222 if (dstFmt & 1) {
2223 *ops++ = Op_swap_rb;
2224 }
2225 if (dstFmt < skcms_PixelFormat_RGB_hhh) {
2226 *ops++ = Op_clamp;
2227 }
2228 switch (dstFmt >> 1) {
2229 default: return false;
2230 case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_store_a8; break;
2231 case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_store_g8; break;
2232 case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_store_4444; break;
2233 case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_store_565; break;
2234 case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_store_888; break;
2235 case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_store_8888; break;
2236 case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_store_1010102; break;
2237 case skcms_PixelFormat_RGB_161616 >> 1: *ops++ = Op_store_161616; break;
2238 case skcms_PixelFormat_RGBA_16161616 >> 1: *ops++ = Op_store_16161616; break;
2239 case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_store_hhh; break;
2240 case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_store_hhhh; break;
2241 case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_store_fff; break;
2242 case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_store_ffff; break;
2243 }
2244
2245 void (*run)(const Op*, const void**, const char*, char*, int, size_t,size_t) = run_program;
2246#if defined(TEST_FOR_HSW)
2247 if (hsw_ok()) {
2248 run = run_program_hsw;
2249 }
2250#endif
2251 run(program, arguments, src, dst, n, src_bpp,dst_bpp);
2252 return true;
2253}
2254
2255static void assert_usable_as_destination(const skcms_ICCProfile* profile) {
2256#if defined(NDEBUG)
2257 (void)profile;
2258#else
2259 skcms_Matrix3x3 fromXYZD50;
2260 skcms_TransferFunction invR, invG, invB;
2261 assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB));
2262#endif
2263}
2264
2265bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) {
2266 skcms_Matrix3x3 fromXYZD50;
2267 if (!profile->has_trc || !profile->has_toXYZD50
2268 || !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) {
2269 return false;
2270 }
2271
2272 skcms_TransferFunction tf[3];
2273 for (int i = 0; i < 3; i++) {
2274 skcms_TransferFunction inv;
2275 if (profile->trc[i].table_entries == 0
2276 && skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) {
2277 tf[i] = profile->trc[i].parametric;
2278 continue;
2279 }
2280
2281 float max_error;
2282 // Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible.
2283 if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) {
2284 return false;
2285 }
2286 }
2287
2288 for (int i = 0; i < 3; ++i) {
2289 profile->trc[i].table_entries = 0;
2290 profile->trc[i].parametric = tf[i];
2291 }
2292
2293 assert_usable_as_destination(profile);
2294 return true;
2295}
2296
2297bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) {
2298 // Operate on a copy of profile, so we can choose the best TF for the original curves
2299 skcms_ICCProfile result = *profile;
2300 if (!skcms_MakeUsableAsDestination(&result)) {
2301 return false;
2302 }
2303
2304 int best_tf = 0;
2305 float min_max_error = INFINITY_;
2306 for (int i = 0; i < 3; i++) {
2307 skcms_TransferFunction inv;
2308 skcms_TransferFunction_invert(&result.trc[i].parametric, &inv);
2309
2310 float err = 0;
2311 for (int j = 0; j < 3; ++j) {
2312 err = fmaxf_(err, skcms_MaxRoundtripError(&profile->trc[j], &inv));
2313 }
2314 if (min_max_error > err) {
2315 min_max_error = err;
2316 best_tf = i;
2317 }
2318 }
2319
2320 for (int i = 0; i < 3; i++) {
2321 result.trc[i].parametric = result.trc[best_tf].parametric;
2322 }
2323
2324 *profile = result;
2325 assert_usable_as_destination(profile);
2326 return true;
2327}