Blame - third_party/skcms/skcms.c - platform/external/skia

blob: 5b66128ab0f7c3b216af6c7a49e9da36882ea026 [file] [log] [blame]

Mike Klein	ded7a55	2018-04-10 10:05:31 -0400	[diff] [blame]	1	/*
				2	* Copyright 2018 Google Inc.
				3	*
				4	* Use of this source code is governed by a BSD-style license that can be
				5	* found in the LICENSE file.
				6	*/
				7
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com	5155d38	2018-07-02 19:08:45 +0000	[diff] [blame^]	8	#include "skcms.h"
				9	#include "skcms_internal.h"
				10	#include <assert.h>
				11	#include <float.h>
				12	#include <limits.h>
				13	#include <stdlib.h>
				14	#include <string.h>
Mike Klein	ded7a55	2018-04-10 10:05:31 -0400	[diff] [blame]	15
skcms-skia-autoroll@skia-buildbots.google.com.iam.gserviceaccount.com	5155d38	2018-07-02 19:08:45 +0000	[diff] [blame^]	16	static float minus_1_ulp(float x) {
				17	int32_t bits;
				18	memcpy(&bits, &x, sizeof(bits));
				19	bits = bits - 1;
				20	memcpy(&x, &bits, sizeof(bits));
				21	return x;
				22	}
				23
				24	float skcms_eval_curve(const skcms_Curve* curve, float x) {
				25	if (curve->table_entries == 0) {
				26	return skcms_TransferFunction_eval(&curve->parametric, x);
				27	}
				28
				29	float ix = fmaxf_(0, fminf_(x, 1)) * (curve->table_entries - 1);
				30	int lo = (int) ix,
				31	hi = (int)minus_1_ulp(ix + 1.0f);
				32	float t = ix - (float)lo;
				33
				34	float l, h;
				35	if (curve->table_8) {
				36	l = curve->table_8[lo] * (1/255.0f);
				37	h = curve->table_8[hi] * (1/255.0f);
				38	} else {
				39	uint16_t be_l, be_h;
				40	memcpy(&be_l, curve->table_16 + 2*lo, 2);
				41	memcpy(&be_h, curve->table_16 + 2*hi, 2);
				42	uint16_t le_l = ((be_l << 8) \| (be_l >> 8)) & 0xffff;
				43	uint16_t le_h = ((be_h << 8) \| (be_h >> 8)) & 0xffff;
				44	l = le_l * (1/65535.0f);
				45	h = le_h * (1/65535.0f);
				46	}
				47	return l + (h-l)*t;
				48	}
				49
				50	float skcms_MaxRoundtripError(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
				51	uint32_t N = curve->table_entries > 256 ? curve->table_entries : 256;
				52	const float dx = 1.0f / (N - 1);
				53	float err = 0;
				54	for (uint32_t i = 0; i < N; i++) {
				55	float x = i * dx,
				56	y = skcms_eval_curve(curve, x);
				57	err = fmaxf_(err, fabsf_(x - skcms_TransferFunction_eval(inv_tf, y)));
				58	}
				59	return err;
				60	}
				61
				62	bool skcms_AreApproximateInverses(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
				63	return skcms_MaxRoundtripError(curve, inv_tf) < (1/512.0f);
				64	}
				65
				66	// Additional ICC signature values that are only used internally
				67	enum {
				68	// File signature
				69	skcms_Signature_acsp = 0x61637370,
				70
				71	// Tag signatures
				72	skcms_Signature_rTRC = 0x72545243,
				73	skcms_Signature_gTRC = 0x67545243,
				74	skcms_Signature_bTRC = 0x62545243,
				75	skcms_Signature_kTRC = 0x6B545243,
				76
				77	skcms_Signature_rXYZ = 0x7258595A,
				78	skcms_Signature_gXYZ = 0x6758595A,
				79	skcms_Signature_bXYZ = 0x6258595A,
				80
				81	skcms_Signature_A2B0 = 0x41324230,
				82	skcms_Signature_A2B1 = 0x41324231,
				83	skcms_Signature_mAB = 0x6D414220,
				84
				85	skcms_Signature_CHAD = 0x63686164,
				86
				87	// Type signatures
				88	skcms_Signature_curv = 0x63757276,
				89	skcms_Signature_mft1 = 0x6D667431,
				90	skcms_Signature_mft2 = 0x6D667432,
				91	skcms_Signature_para = 0x70617261,
				92	skcms_Signature_sf32 = 0x73663332,
				93	// XYZ is also a PCS signature, so it's defined in skcms.h
				94	// skcms_Signature_XYZ = 0x58595A20,
				95	};
				96
				97	static uint16_t read_big_u16(const uint8_t* ptr) {
				98	uint16_t be;
				99	memcpy(&be, ptr, sizeof(be));
				100	#if defined(_MSC_VER)
				101	return _byteswap_ushort(be);
				102	#else
				103	return __builtin_bswap16(be);
				104	#endif
				105	}
				106
				107	static uint32_t read_big_u32(const uint8_t* ptr) {
				108	uint32_t be;
				109	memcpy(&be, ptr, sizeof(be));
				110	#if defined(_MSC_VER)
				111	return _byteswap_ulong(be);
				112	#else
				113	return __builtin_bswap32(be);
				114	#endif
				115	}
				116
				117	static int32_t read_big_i32(const uint8_t* ptr) {
				118	return (int32_t)read_big_u32(ptr);
				119	}
				120
				121	static float read_big_fixed(const uint8_t* ptr) {
				122	return read_big_i32(ptr) * (1.0f / 65536.0f);
				123	}
				124
				125	// Maps to an in-memory profile so that fields line up to the locations specified
				126	// in ICC.1:2010, section 7.2
				127	typedef struct {
				128	uint8_t size [ 4];
				129	uint8_t cmm_type [ 4];
				130	uint8_t version [ 4];
				131	uint8_t profile_class [ 4];
				132	uint8_t data_color_space [ 4];
				133	uint8_t pcs [ 4];
				134	uint8_t creation_date_time [12];
				135	uint8_t signature [ 4];
				136	uint8_t platform [ 4];
				137	uint8_t flags [ 4];
				138	uint8_t device_manufacturer [ 4];
				139	uint8_t device_model [ 4];
				140	uint8_t device_attributes [ 8];
				141	uint8_t rendering_intent [ 4];
				142	uint8_t illuminant_X [ 4];
				143	uint8_t illuminant_Y [ 4];
				144	uint8_t illuminant_Z [ 4];
				145	uint8_t creator [ 4];
				146	uint8_t profile_id [16];
				147	uint8_t reserved [28];
				148	uint8_t tag_count [ 4]; // Technically not part of header, but required
				149	} header_Layout;
				150
				151	typedef struct {
				152	uint8_t signature [4];
				153	uint8_t offset [4];
				154	uint8_t size [4];
				155	} tag_Layout;
				156
				157	static const tag_Layout* get_tag_table(const skcms_ICCProfile* profile) {
				158	return (const tag_Layout*)(profile->buffer + SAFE_SIZEOF(header_Layout));
				159	}
				160
				161	// s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid
				162	// use of the type is for the CHAD tag that stores exactly nine values.
				163	typedef struct {
				164	uint8_t type [ 4];
				165	uint8_t reserved [ 4];
				166	uint8_t values [36];
				167	} sf32_Layout;
				168
				169	bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) {
				170	skcms_ICCTag tag;
				171	if (!skcms_GetTagBySignature(profile, skcms_Signature_CHAD, &tag)) {
				172	return false;
				173	}
				174
				175	if (tag.type != skcms_Signature_sf32 \|\| tag.size < SAFE_SIZEOF(sf32_Layout)) {
				176	return false;
				177	}
				178
				179	const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf;
				180	const uint8_t* values = sf32Tag->values;
				181	for (int r = 0; r < 3; ++r)
				182	for (int c = 0; c < 3; ++c, values += 4) {
				183	m->vals[r][c] = read_big_fixed(values);
				184	}
				185	return true;
				186	}
				187
				188	// XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of
				189	// the type are for tags/data that store exactly one triple.
				190	typedef struct {
				191	uint8_t type [4];
				192	uint8_t reserved [4];
				193	uint8_t X [4];
				194	uint8_t Y [4];
				195	uint8_t Z [4];
				196	} XYZ_Layout;
				197
				198	static bool read_tag_xyz(const skcms_ICCTag* tag, float* x, float* y, float* z) {
				199	if (tag->type != skcms_Signature_XYZ \|\| tag->size < SAFE_SIZEOF(XYZ_Layout)) {
				200	return false;
				201	}
				202
				203	const XYZ_Layout* xyzTag = (const XYZ_Layout*)tag->buf;
				204
				205	*x = read_big_fixed(xyzTag->X);
				206	*y = read_big_fixed(xyzTag->Y);
				207	*z = read_big_fixed(xyzTag->Z);
				208	return true;
				209	}
				210
				211	static bool read_to_XYZD50(const skcms_ICCTag* rXYZ, const skcms_ICCTag* gXYZ,
				212	const skcms_ICCTag* bXYZ, skcms_Matrix3x3* toXYZ) {
				213	return read_tag_xyz(rXYZ, &toXYZ->vals[0][0], &toXYZ->vals[1][0], &toXYZ->vals[2][0]) &&
				214	read_tag_xyz(gXYZ, &toXYZ->vals[0][1], &toXYZ->vals[1][1], &toXYZ->vals[2][1]) &&
				215	read_tag_xyz(bXYZ, &toXYZ->vals[0][2], &toXYZ->vals[1][2], &toXYZ->vals[2][2]);
				216	}
				217
				218	typedef struct {
				219	uint8_t type [4];
				220	uint8_t reserved_a [4];
				221	uint8_t function_type [2];
				222	uint8_t reserved_b [2];
				223	uint8_t parameters [ ]; // 1, 3, 4, 5, or 7 s15.16 parameters, depending on function_type
				224	} para_Layout;
				225
				226	static bool read_curve_para(const uint8_t* buf, uint32_t size,
				227	skcms_Curve* curve, uint32_t* curve_size) {
				228	if (size < SAFE_SIZEOF(para_Layout)) {
				229	return false;
				230	}
				231
				232	const para_Layout* paraTag = (const para_Layout*)buf;
				233
				234	enum { kG = 0, kGAB = 1, kGABC = 2, kGABCD = 3, kGABCDEF = 4 };
				235	uint16_t function_type = read_big_u16(paraTag->function_type);
				236	if (function_type > kGABCDEF) {
				237	return false;
				238	}
				239
				240	static const uint32_t curve_bytes[] = { 4, 12, 16, 20, 28 };
				241	if (size < SAFE_SIZEOF(para_Layout) + curve_bytes[function_type]) {
				242	return false;
				243	}
				244
				245	if (curve_size) {
				246	*curve_size = SAFE_SIZEOF(para_Layout) + curve_bytes[function_type];
				247	}
				248
				249	curve->table_entries = 0;
				250	curve->parametric.a = 1.0f;
				251	curve->parametric.b = 0.0f;
				252	curve->parametric.c = 0.0f;
				253	curve->parametric.d = 0.0f;
				254	curve->parametric.e = 0.0f;
				255	curve->parametric.f = 0.0f;
				256	curve->parametric.g = read_big_fixed(paraTag->parameters);
				257
				258	switch (function_type) {
				259	case kGAB:
				260	curve->parametric.a = read_big_fixed(paraTag->parameters + 4);
				261	curve->parametric.b = read_big_fixed(paraTag->parameters + 8);
				262	if (curve->parametric.a == 0) {
				263	return false;
				264	}
				265	curve->parametric.d = -curve->parametric.b / curve->parametric.a;
				266	break;
				267	case kGABC:
				268	curve->parametric.a = read_big_fixed(paraTag->parameters + 4);
				269	curve->parametric.b = read_big_fixed(paraTag->parameters + 8);
				270	curve->parametric.e = read_big_fixed(paraTag->parameters + 12);
				271	if (curve->parametric.a == 0) {
				272	return false;
				273	}
				274	curve->parametric.d = -curve->parametric.b / curve->parametric.a;
				275	curve->parametric.f = curve->parametric.e;
				276	break;
				277	case kGABCD:
				278	curve->parametric.a = read_big_fixed(paraTag->parameters + 4);
				279	curve->parametric.b = read_big_fixed(paraTag->parameters + 8);
				280	curve->parametric.c = read_big_fixed(paraTag->parameters + 12);
				281	curve->parametric.d = read_big_fixed(paraTag->parameters + 16);
				282	break;
				283	case kGABCDEF:
				284	curve->parametric.a = read_big_fixed(paraTag->parameters + 4);
				285	curve->parametric.b = read_big_fixed(paraTag->parameters + 8);
				286	curve->parametric.c = read_big_fixed(paraTag->parameters + 12);
				287	curve->parametric.d = read_big_fixed(paraTag->parameters + 16);
				288	curve->parametric.e = read_big_fixed(paraTag->parameters + 20);
				289	curve->parametric.f = read_big_fixed(paraTag->parameters + 24);
				290	break;
				291	}
				292	return skcms_TransferFunction_isValid(&curve->parametric);
				293	}
				294
				295	typedef struct {
				296	uint8_t type [4];
				297	uint8_t reserved [4];
				298	uint8_t value_count [4];
				299	uint8_t parameters [ ]; // value_count parameters (8.8 if 1, uint16 (n*65535) if > 1)
				300	} curv_Layout;
				301
				302	static bool read_curve_curv(const uint8_t* buf, uint32_t size,
				303	skcms_Curve* curve, uint32_t* curve_size) {
				304	if (size < SAFE_SIZEOF(curv_Layout)) {
				305	return false;
				306	}
				307
				308	const curv_Layout* curvTag = (const curv_Layout*)buf;
				309
				310	uint32_t value_count = read_big_u32(curvTag->value_count);
				311	if (size < SAFE_SIZEOF(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t)) {
				312	return false;
				313	}
				314
				315	if (curve_size) {
				316	curve_size = SAFE_SIZEOF(curv_Layout) + value_count SAFE_SIZEOF(uint16_t);
				317	}
				318
				319	if (value_count < 2) {
				320	curve->table_entries = 0;
				321	curve->parametric.a = 1.0f;
				322	curve->parametric.b = 0.0f;
				323	curve->parametric.c = 0.0f;
				324	curve->parametric.d = 0.0f;
				325	curve->parametric.e = 0.0f;
				326	curve->parametric.f = 0.0f;
				327	if (value_count == 0) {
				328	// Empty tables are a shorthand for an identity curve
				329	curve->parametric.g = 1.0f;
				330	} else {
				331	// Single entry tables are a shorthand for simple gamma
				332	curve->parametric.g = read_big_u16(curvTag->parameters) * (1.0f / 256.0f);
				333	}
				334	} else {
				335	curve->table_8 = NULL;
				336	curve->table_16 = curvTag->parameters;
				337	curve->table_entries = value_count;
				338	}
				339
				340	return true;
				341	}
				342
				343	// Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read.
				344	// If curve_size is not NULL, writes the number of bytes used by the curve in (*curve_size).
				345	static bool read_curve(const uint8_t* buf, uint32_t size,
				346	skcms_Curve* curve, uint32_t* curve_size) {
				347	if (!buf \|\| size < 4 \|\| !curve) {
				348	return false;
				349	}
				350
				351	uint32_t type = read_big_u32(buf);
				352	if (type == skcms_Signature_para) {
				353	return read_curve_para(buf, size, curve, curve_size);
				354	} else if (type == skcms_Signature_curv) {
				355	return read_curve_curv(buf, size, curve, curve_size);
				356	}
				357
				358	return false;
				359	}
				360
				361	// mft1 and mft2 share a large chunk of data
				362	typedef struct {
				363	uint8_t type [ 4];
				364	uint8_t reserved_a [ 4];
				365	uint8_t input_channels [ 1];
				366	uint8_t output_channels [ 1];
				367	uint8_t grid_points [ 1];
				368	uint8_t reserved_b [ 1];
				369	uint8_t matrix [36];
				370	} mft_CommonLayout;
				371
				372	typedef struct {
				373	mft_CommonLayout common [ 1];
				374
				375	uint8_t tables [ ];
				376	} mft1_Layout;
				377
				378	typedef struct {
				379	mft_CommonLayout common [ 1];
				380
				381	uint8_t input_table_entries [ 2];
				382	uint8_t output_table_entries [ 2];
				383	uint8_t tables [ ];
				384	} mft2_Layout;
				385
				386	static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) {
				387	// MFT matrices are applied before the first set of curves, but must be identity unless the
				388	// input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the
				389	// matrix in skcms_A2B is applied later in the pipe, so supporting this would require another
				390	// field/flag.
				391	a2b->matrix_channels = 0;
				392
				393	a2b->input_channels = mftTag->input_channels[0];
				394	a2b->output_channels = mftTag->output_channels[0];
				395
				396	// We require exactly three (ie XYZ/Lab/RGB) output channels
				397	if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
				398	return false;
				399	}
				400	// We require at least one, and no more than four (ie CMYK) input channels
				401	if (a2b->input_channels < 1 \|\| a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
				402	return false;
				403	}
				404
				405	for (uint32_t i = 0; i < a2b->input_channels; ++i) {
				406	a2b->grid_points[i] = mftTag->grid_points[0];
				407	}
				408	// The grid only makes sense with at least two points along each axis
				409	if (a2b->grid_points[0] < 2) {
				410	return false;
				411	}
				412
				413	return true;
				414	}
				415
				416	static bool init_a2b_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width,
				417	uint32_t input_table_entries, uint32_t output_table_entries,
				418	skcms_A2B* a2b) {
				419	// byte_width is 1 or 2, [input\|output]_table_entries are in [2, 4096], so no overflow
				420	uint32_t byte_len_per_input_table = input_table_entries * byte_width;
				421	uint32_t byte_len_per_output_table = output_table_entries * byte_width;
				422
				423	// [input\|output]_channels are <= 4, so still no overflow
				424	uint32_t byte_len_all_input_tables = a2b->input_channels * byte_len_per_input_table;
				425	uint32_t byte_len_all_output_tables = a2b->output_channels * byte_len_per_output_table;
				426
				427	uint64_t grid_size = a2b->output_channels * byte_width;
				428	for (uint32_t axis = 0; axis < a2b->input_channels; ++axis) {
				429	grid_size *= a2b->grid_points[axis];
				430	}
				431
				432	if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
				433	return false;
				434	}
				435
				436	for (uint32_t i = 0; i < a2b->input_channels; ++i) {
				437	a2b->input_curves[i].table_entries = input_table_entries;
				438	if (byte_width == 1) {
				439	a2b->input_curves[i].table_8 = table_base + i * byte_len_per_input_table;
				440	a2b->input_curves[i].table_16 = NULL;
				441	} else {
				442	a2b->input_curves[i].table_8 = NULL;
				443	a2b->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
				444	}
				445	}
				446
				447	if (byte_width == 1) {
				448	a2b->grid_8 = table_base + byte_len_all_input_tables;
				449	a2b->grid_16 = NULL;
				450	} else {
				451	a2b->grid_8 = NULL;
				452	a2b->grid_16 = table_base + byte_len_all_input_tables;
				453	}
				454
				455	const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
				456	for (uint32_t i = 0; i < a2b->output_channels; ++i) {
				457	a2b->output_curves[i].table_entries = output_table_entries;
				458	if (byte_width == 1) {
				459	a2b->output_curves[i].table_8 = output_table_base + i * byte_len_per_output_table;
				460	a2b->output_curves[i].table_16 = NULL;
				461	} else {
				462	a2b->output_curves[i].table_8 = NULL;
				463	a2b->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
				464	}
				465	}
				466
				467	return true;
				468	}
				469
				470	static bool read_tag_mft1(const skcms_ICCTag* tag, skcms_A2B* a2b) {
				471	if (tag->size < SAFE_SIZEOF(mft1_Layout)) {
				472	return false;
				473	}
				474
				475	const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
				476	if (!read_mft_common(mftTag->common, a2b)) {
				477	return false;
				478	}
				479
				480	uint32_t input_table_entries = 256;
				481	uint32_t output_table_entries = 256;
				482
				483	return init_a2b_tables(mftTag->tables, tag->size - SAFE_SIZEOF(mft1_Layout), 1,
				484	input_table_entries, output_table_entries, a2b);
				485	}
				486
				487	static bool read_tag_mft2(const skcms_ICCTag* tag, skcms_A2B* a2b) {
				488	if (tag->size < SAFE_SIZEOF(mft2_Layout)) {
				489	return false;
				490	}
				491
				492	const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf;
				493	if (!read_mft_common(mftTag->common, a2b)) {
				494	return false;
				495	}
				496
				497	uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries);
				498	uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries);
				499
				500	// ICC spec mandates that 2 <= table_entries <= 4096
				501	if (input_table_entries < 2 \|\| input_table_entries > 4096 \|\|
				502	output_table_entries < 2 \|\| output_table_entries > 4096) {
				503	return false;
				504	}
				505
				506	return init_a2b_tables(mftTag->tables, tag->size - SAFE_SIZEOF(mft2_Layout), 2,
				507	input_table_entries, output_table_entries, a2b);
				508	}
				509
				510	static bool read_curves(const uint8_t* buf, uint32_t size, uint32_t curve_offset,
				511	uint32_t num_curves, skcms_Curve* curves) {
				512	for (uint32_t i = 0; i < num_curves; ++i) {
				513	if (curve_offset > size) {
				514	return false;
				515	}
				516
				517	uint32_t curve_bytes;
				518	if (!read_curve(buf + curve_offset, size - curve_offset, &curves[i], &curve_bytes)) {
				519	return false;
				520	}
				521
				522	if (curve_bytes > UINT32_MAX - 3) {
				523	return false;
				524	}
				525	curve_bytes = (curve_bytes + 3) & ~3U;
				526
				527	uint64_t new_offset_64 = (uint64_t)curve_offset + curve_bytes;
				528	curve_offset = (uint32_t)new_offset_64;
				529	if (new_offset_64 != curve_offset) {
				530	return false;
				531	}
				532	}
				533
				534	return true;
				535	}
				536
				537	typedef struct {
				538	uint8_t type [ 4];
				539	uint8_t reserved_a [ 4];
				540	uint8_t input_channels [ 1];
				541	uint8_t output_channels [ 1];
				542	uint8_t reserved_b [ 2];
				543	uint8_t b_curve_offset [ 4];
				544	uint8_t matrix_offset [ 4];
				545	uint8_t m_curve_offset [ 4];
				546	uint8_t clut_offset [ 4];
				547	uint8_t a_curve_offset [ 4];
				548	} mAB_Layout;
				549
				550	typedef struct {
				551	uint8_t grid_points [16];
				552	uint8_t grid_byte_width [ 1];
				553	uint8_t reserved [ 3];
				554	uint8_t data [ ];
				555	} mABCLUT_Layout;
				556
				557	static bool read_tag_mab(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
				558	if (tag->size < SAFE_SIZEOF(mAB_Layout)) {
				559	return false;
				560	}
				561
				562	const mAB_Layout* mABTag = (const mAB_Layout*)tag->buf;
				563
				564	a2b->input_channels = mABTag->input_channels[0];
				565	a2b->output_channels = mABTag->output_channels[0];
				566
				567	// We require exactly three (ie XYZ/Lab/RGB) output channels
				568	if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
				569	return false;
				570	}
				571	// We require no more than four (ie CMYK) input channels
				572	if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
				573	return false;
				574	}
				575
				576	uint32_t b_curve_offset = read_big_u32(mABTag->b_curve_offset);
				577	uint32_t matrix_offset = read_big_u32(mABTag->matrix_offset);
				578	uint32_t m_curve_offset = read_big_u32(mABTag->m_curve_offset);
				579	uint32_t clut_offset = read_big_u32(mABTag->clut_offset);
				580	uint32_t a_curve_offset = read_big_u32(mABTag->a_curve_offset);
				581
				582	// "B" curves must be present
				583	if (0 == b_curve_offset) {
				584	return false;
				585	}
				586
				587	if (!read_curves(tag->buf, tag->size, b_curve_offset, a2b->output_channels,
				588	a2b->output_curves)) {
				589	return false;
				590	}
				591
				592	// "M" curves and Matrix must be used together
				593	if (0 != m_curve_offset) {
				594	if (0 == matrix_offset) {
				595	return false;
				596	}
				597	a2b->matrix_channels = a2b->output_channels;
				598	if (!read_curves(tag->buf, tag->size, m_curve_offset, a2b->matrix_channels,
				599	a2b->matrix_curves)) {
				600	return false;
				601	}
				602
				603	// Read matrix, which is stored as a row-major 3x3, followed by the fourth column
				604	if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
				605	return false;
				606	}
				607	float encoding_factor = pcs_is_xyz ? 65535 / 32768.0f : 1.0f;
				608	const uint8_t* mtx_buf = tag->buf + matrix_offset;
				609	a2b->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf + 0);
				610	a2b->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf + 4);
				611	a2b->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf + 8);
				612	a2b->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
				613	a2b->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
				614	a2b->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
				615	a2b->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
				616	a2b->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
				617	a2b->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
				618	a2b->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
				619	a2b->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
				620	a2b->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
				621	} else {
				622	if (0 != matrix_offset) {
				623	return false;
				624	}
				625	a2b->matrix_channels = 0;
				626	}
				627
				628	// "A" curves and CLUT must be used together
				629	if (0 != a_curve_offset) {
				630	if (0 == clut_offset) {
				631	return false;
				632	}
				633	if (!read_curves(tag->buf, tag->size, a_curve_offset, a2b->input_channels,
				634	a2b->input_curves)) {
				635	return false;
				636	}
				637
				638	if (tag->size < clut_offset + SAFE_SIZEOF(mABCLUT_Layout)) {
				639	return false;
				640	}
				641	const mABCLUT_Layout* clut = (const mABCLUT_Layout*)(tag->buf + clut_offset);
				642
				643	if (clut->grid_byte_width[0] == 1) {
				644	a2b->grid_8 = clut->data;
				645	a2b->grid_16 = NULL;
				646	} else if (clut->grid_byte_width[0] == 2) {
				647	a2b->grid_8 = NULL;
				648	a2b->grid_16 = clut->data;
				649	} else {
				650	return false;
				651	}
				652
				653	uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[0];
				654	for (uint32_t i = 0; i < a2b->input_channels; ++i) {
				655	a2b->grid_points[i] = clut->grid_points[i];
				656	// The grid only makes sense with at least two points along each axis
				657	if (a2b->grid_points[i] < 2) {
				658	return false;
				659	}
				660	grid_size *= a2b->grid_points[i];
				661	}
				662	if (tag->size < clut_offset + SAFE_SIZEOF(mABCLUT_Layout) + grid_size) {
				663	return false;
				664	}
				665	} else {
				666	if (0 != clut_offset) {
				667	return false;
				668	}
				669
				670	// If there is no CLUT, the number of input and output channels must match
				671	if (a2b->input_channels != a2b->output_channels) {
				672	return false;
				673	}
				674
				675	// Zero out the number of input channels to signal that we're skipping this stage
				676	a2b->input_channels = 0;
				677	}
				678
				679	return true;
				680	}
				681
				682	static bool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
				683	bool ok = false;
				684	if (tag->type == skcms_Signature_mft1) {
				685	ok = read_tag_mft1(tag, a2b);
				686	} else if (tag->type == skcms_Signature_mft2) {
				687	ok = read_tag_mft2(tag, a2b);
				688	} else if (tag->type == skcms_Signature_mAB) {
				689	ok = read_tag_mab(tag, a2b, pcs_is_xyz);
				690	}
				691	if (!ok) {
				692	return false;
				693	}
				694
				695	// Detect and canonicalize identity tables.
				696	skcms_Curve* curves[] = {
				697	a2b->input_channels > 0 ? a2b->input_curves + 0 : NULL,
				698	a2b->input_channels > 1 ? a2b->input_curves + 1 : NULL,
				699	a2b->input_channels > 2 ? a2b->input_curves + 2 : NULL,
				700	a2b->input_channels > 3 ? a2b->input_curves + 3 : NULL,
				701	a2b->matrix_channels > 0 ? a2b->matrix_curves + 0 : NULL,
				702	a2b->matrix_channels > 1 ? a2b->matrix_curves + 1 : NULL,
				703	a2b->matrix_channels > 2 ? a2b->matrix_curves + 2 : NULL,
				704	a2b->output_channels > 0 ? a2b->output_curves + 0 : NULL,
				705	a2b->output_channels > 1 ? a2b->output_curves + 1 : NULL,
				706	a2b->output_channels > 2 ? a2b->output_curves + 2 : NULL,
				707	};
				708
				709	for (int i = 0; i < ARRAY_COUNT(curves); i++) {
				710	skcms_Curve* curve = curves[i];
				711
				712	if (curve && curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) {
				713	int N = (int)curve->table_entries;
				714
				715	float c,d,f;
				716	if (N == skcms_fit_linear(curve, N, 1.0f/(2*N), &c,&d,&f)
				717	&& c == 1.0f
				718	&& f == 0.0f) {
				719	curve->table_entries = 0;
				720	curve->table_8 = NULL;
				721	curve->table_16 = NULL;
				722	curve->parametric = (skcms_TransferFunction){1,1,0,0,0,0,0};
				723	}
				724	}
				725	}
				726
				727	return true;
				728	}
				729
				730	void skcms_GetTagByIndex(const skcms_ICCProfile* profile, uint32_t idx, skcms_ICCTag* tag) {
				731	if (!profile \|\| !profile->buffer \|\| !tag) { return; }
				732	if (idx > profile->tag_count) { return; }
				733	const tag_Layout* tags = get_tag_table(profile);
				734	tag->signature = read_big_u32(tags[idx].signature);
				735	tag->size = read_big_u32(tags[idx].size);
				736	tag->buf = read_big_u32(tags[idx].offset) + profile->buffer;
				737	tag->type = read_big_u32(tag->buf);
				738	}
				739
				740	bool skcms_GetTagBySignature(const skcms_ICCProfile* profile, uint32_t sig, skcms_ICCTag* tag) {
				741	if (!profile \|\| !profile->buffer \|\| !tag) { return false; }
				742	const tag_Layout* tags = get_tag_table(profile);
				743	for (uint32_t i = 0; i < profile->tag_count; ++i) {
				744	if (read_big_u32(tags[i].signature) == sig) {
				745	tag->signature = sig;
				746	tag->size = read_big_u32(tags[i].size);
				747	tag->buf = read_big_u32(tags[i].offset) + profile->buffer;
				748	tag->type = read_big_u32(tag->buf);
				749	return true;
				750	}
				751	}
				752	return false;
				753	}
				754
				755	static bool usable_as_src(const skcms_ICCProfile* profile) {
				756	return profile->has_A2B
				757	\|\| (profile->has_trc && profile->has_toXYZD50);
				758	}
				759
				760	bool skcms_Parse(const void* buf, size_t len, skcms_ICCProfile* profile) {
				761	assert(SAFE_SIZEOF(header_Layout) == 132);
				762
				763	if (!profile) {
				764	return false;
				765	}
				766	memset(profile, 0, SAFE_SIZEOF(*profile));
				767
				768	if (len < SAFE_SIZEOF(header_Layout)) {
				769	return false;
				770	}
				771
				772	// Byte-swap all header fields
				773	const header_Layout* header = buf;
				774	profile->buffer = buf;
				775	profile->size = read_big_u32(header->size);
				776	uint32_t version = read_big_u32(header->version);
				777	profile->data_color_space = read_big_u32(header->data_color_space);
				778	profile->pcs = read_big_u32(header->pcs);
				779	uint32_t signature = read_big_u32(header->signature);
				780	float illuminant_X = read_big_fixed(header->illuminant_X);
				781	float illuminant_Y = read_big_fixed(header->illuminant_Y);
				782	float illuminant_Z = read_big_fixed(header->illuminant_Z);
				783	profile->tag_count = read_big_u32(header->tag_count);
				784
				785	// Validate signature, size (smaller than buffer, large enough to hold tag table),
				786	// and major version
				787	uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout);
				788	if (signature != skcms_Signature_acsp \|\|
				789	profile->size > len \|\|
				790	profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size \|\|
				791	(version >> 24) > 4) {
				792	return false;
				793	}
				794
				795	// Validate that illuminant is D50 white
				796	if (fabsf_(illuminant_X - 0.9642f) > 0.0100f \|\|
				797	fabsf_(illuminant_Y - 1.0000f) > 0.0100f \|\|
				798	fabsf_(illuminant_Z - 0.8249f) > 0.0100f) {
				799	return false;
				800	}
				801
				802	// Validate that all tag entries have sane offset + size
				803	const tag_Layout* tags = get_tag_table(profile);
				804	for (uint32_t i = 0; i < profile->tag_count; ++i) {
				805	uint32_t tag_offset = read_big_u32(tags[i].offset);
				806	uint32_t tag_size = read_big_u32(tags[i].size);
				807	uint64_t tag_end = (uint64_t)tag_offset + (uint64_t)tag_size;
				808	if (tag_size < 4 \|\| tag_end > profile->size) {
				809	return false;
				810	}
				811	}
				812
				813	if (profile->pcs != skcms_Signature_XYZ && profile->pcs != skcms_Signature_Lab) {
				814	return false;
				815	}
				816
				817	bool pcs_is_xyz = profile->pcs == skcms_Signature_XYZ;
				818
				819	// Pre-parse commonly used tags.
				820	skcms_ICCTag kTRC;
				821	if (profile->data_color_space == skcms_Signature_Gray &&
				822	skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &kTRC)) {
				823	if (!read_curve(kTRC.buf, kTRC.size, &profile->trc[0], NULL)) {
				824	// Malformed tag
				825	return false;
				826	}
				827	profile->trc[1] = profile->trc[0];
				828	profile->trc[2] = profile->trc[0];
				829	profile->has_trc = true;
				830
				831	if (pcs_is_xyz) {
				832	profile->toXYZD50.vals[0][0] = illuminant_X;
				833	profile->toXYZD50.vals[1][1] = illuminant_Y;
				834	profile->toXYZD50.vals[2][2] = illuminant_Z;
				835	profile->has_toXYZD50 = true;
				836	}
				837	} else {
				838	skcms_ICCTag rTRC, gTRC, bTRC;
				839	if (skcms_GetTagBySignature(profile, skcms_Signature_rTRC, &rTRC) &&
				840	skcms_GetTagBySignature(profile, skcms_Signature_gTRC, &gTRC) &&
				841	skcms_GetTagBySignature(profile, skcms_Signature_bTRC, &bTRC)) {
				842	if (!read_curve(rTRC.buf, rTRC.size, &profile->trc[0], NULL) \|\|
				843	!read_curve(gTRC.buf, gTRC.size, &profile->trc[1], NULL) \|\|
				844	!read_curve(bTRC.buf, bTRC.size, &profile->trc[2], NULL)) {
				845	// Malformed TRC tags
				846	return false;
				847	}
				848	profile->has_trc = true;
				849	}
				850
				851	skcms_ICCTag rXYZ, gXYZ, bXYZ;
				852	if (skcms_GetTagBySignature(profile, skcms_Signature_rXYZ, &rXYZ) &&
				853	skcms_GetTagBySignature(profile, skcms_Signature_gXYZ, &gXYZ) &&
				854	skcms_GetTagBySignature(profile, skcms_Signature_bXYZ, &bXYZ)) {
				855	if (!read_to_XYZD50(&rXYZ, &gXYZ, &bXYZ, &profile->toXYZD50)) {
				856	// Malformed XYZ tags
				857	return false;
				858	}
				859	profile->has_toXYZD50 = true;
				860	}
				861	}
				862
				863	skcms_ICCTag a2b_tag;
				864
				865	// For now, we're preferring A2B0, like Skia does and the ICC spec tells us to.
				866	// TODO: prefer A2B1 (relative colormetric) over A2B0 (perceptual)?
				867	// This breaks with the ICC spec, but we think it's a good idea, given that TRC curves
				868	// and all our known users are thinking exclusively in terms of relative colormetric.
				869	const uint32_t sigs[] = { skcms_Signature_A2B0, skcms_Signature_A2B1 };
				870	for (int i = 0; i < ARRAY_COUNT(sigs); i++) {
				871	if (skcms_GetTagBySignature(profile, sigs[i], &a2b_tag)) {
				872	if (!read_a2b(&a2b_tag, &profile->A2B, pcs_is_xyz)) {
				873	// Malformed A2B tag
				874	return false;
				875	}
				876	profile->has_A2B = true;
				877	break;
				878	}
				879	}
				880
				881	return usable_as_src(profile);
				882	}
				883
				884
				885	const skcms_ICCProfile* skcms_sRGB_profile() {
				886	static const skcms_ICCProfile sRGB_profile = {
				887	// These fields are moot when not a skcms_Parse()'d profile.
				888	.buffer = NULL,
				889	.size = 0,
				890	.tag_count = 0,
				891
				892	// We choose to represent sRGB with its canonical transfer function,
				893	// and with its canonical XYZD50 gamut matrix.
				894	.data_color_space = skcms_Signature_RGB,
				895	.pcs = skcms_Signature_XYZ,
				896	.has_trc = true,
				897	.has_toXYZD50 = true,
				898	.has_A2B = false,
				899
				900	.trc = {
				901	{{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0 }}},
				902	{{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0 }}},
				903	{{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0 }}},
				904	},
				905
				906	.toXYZD50 = {{
				907	{ 0.436065674f, 0.385147095f, 0.143066406f },
				908	{ 0.222488403f, 0.716873169f, 0.060607910f },
				909	{ 0.013916016f, 0.097076416f, 0.714096069f },
				910	}},
				911	};
				912	return &sRGB_profile;
				913	}
				914
				915	const skcms_ICCProfile* skcms_XYZD50_profile() {
				916	static const skcms_ICCProfile XYZD50_profile = {
				917	.buffer = NULL,
				918	.size = 0,
				919	.tag_count = 0,
				920
				921	.data_color_space = skcms_Signature_RGB,
				922	.pcs = skcms_Signature_XYZ,
				923	.has_trc = true,
				924	.has_toXYZD50 = true,
				925	.has_A2B = false,
				926
				927	.trc = {
				928	{{0, {1,1,0,0,0,0,0}}},
				929	{{0, {1,1,0,0,0,0,0}}},
				930	{{0, {1,1,0,0,0,0,0}}},
				931	},
				932
				933	.toXYZD50 = {{
				934	{1,0,0},
				935	{0,1,0},
				936	{0,0,1},
				937	}},
				938	};
				939
				940	return &XYZD50_profile;
				941	}
				942
				943	const skcms_TransferFunction* skcms_sRGB_TransferFunction() {
				944	return &skcms_sRGB_profile()->trc[0].parametric;
				945	}
				946
				947	const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction() {
				948	static const skcms_TransferFunction sRGB_inv =
				949	{ (float)(1/2.4), 1.137119f, 0, 12.92f, 0.0031308f, -0.055f, 0 };
				950	return &sRGB_inv;
				951	}
				952
				953	const skcms_TransferFunction* skcms_Identity_TransferFunction() {
				954	static const skcms_TransferFunction identity = {1,1,0,0,0,0,0};
				955	return &identity;
				956	}
				957
				958	const uint8_t skcms_252_random_bytes[] = {
				959	8, 179, 128, 204, 253, 38, 134, 184, 68, 102, 32, 138, 99, 39, 169, 215,
				960	119, 26, 3, 223, 95, 239, 52, 132, 114, 74, 81, 234, 97, 116, 244, 205, 30,
				961	154, 173, 12, 51, 159, 122, 153, 61, 226, 236, 178, 229, 55, 181, 220, 191,
				962	194, 160, 126, 168, 82, 131, 18, 180, 245, 163, 22, 246, 69, 235, 252, 57,
				963	108, 14, 6, 152, 240, 255, 171, 242, 20, 227, 177, 238, 96, 85, 16, 211,
				964	70, 200, 149, 155, 146, 127, 145, 100, 151, 109, 19, 165, 208, 195, 164,
				965	137, 254, 182, 248, 64, 201, 45, 209, 5, 147, 207, 210, 113, 162, 83, 225,
				966	9, 31, 15, 231, 115, 37, 58, 53, 24, 49, 197, 56, 120, 172, 48, 21, 214,
				967	129, 111, 11, 50, 187, 196, 34, 60, 103, 71, 144, 47, 203, 77, 80, 232,
				968	140, 222, 250, 206, 166, 247, 139, 249, 221, 72, 106, 27, 199, 117, 54,
				969	219, 135, 118, 40, 79, 41, 251, 46, 93, 212, 92, 233, 148, 28, 121, 63,
				970	123, 158, 105, 59, 29, 42, 143, 23, 0, 107, 176, 87, 104, 183, 156, 193,
				971	189, 90, 188, 65, 190, 17, 198, 7, 186, 161, 1, 124, 78, 125, 170, 133,
				972	174, 218, 67, 157, 75, 101, 89, 217, 62, 33, 141, 228, 25, 35, 91, 230, 4,
				973	2, 13, 73, 86, 167, 237, 84, 243, 44, 185, 66, 130, 110, 150, 142, 216, 88,
				974	112, 36, 224, 136, 202, 76, 94, 98, 175, 213
				975	};
				976
				977	bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) {
				978	// For now this is the essentially the same strategy we use in test_only.c
				979	// for our skcms_Transform() smoke tests:
				980	// 1) transform A to XYZD50
				981	// 2) transform B to XYZD50
				982	// 3) return true if they're similar enough
				983	// Our current criterion in 3) is maximum 1 bit error per XYZD50 byte.
				984
				985	// Here are 252 of a random shuffle of all possible bytes.
				986	// 252 is evenly divisible by 3 and 4. Only 192, 10, 241, and 43 are missing.
				987
				988	if (A->data_color_space != B->data_color_space) {
				989	return false;
				990	}
				991
				992	// Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK.
				993	skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888;
				994	size_t npixels = 84;
				995	if (A->data_color_space == skcms_Signature_CMYK) {
				996	fmt = skcms_PixelFormat_RGBA_8888;
				997	npixels = 63;
				998	}
				999
				1000	uint8_t dstA[252],
				1001	dstB[252];
				1002	if (!skcms_Transform(
				1003	skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, A,
				1004	dstA, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
				1005	npixels)) {
				1006	return false;
				1007	}
				1008	if (!skcms_Transform(
				1009	skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, B,
				1010	dstB, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
				1011	npixels)) {
				1012	return false;
				1013	}
				1014
				1015	for (size_t i = 0; i < 252; i++) {
				1016	if (abs((int)dstA[i] - (int)dstB[i]) > 1) {
				1017	return false;
				1018	}
				1019	}
				1020	return true;
				1021	}
				1022
				1023	bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile,
				1024	const skcms_TransferFunction* inv_tf) {
				1025	if (!profile \|\| !profile->has_trc) {
				1026	return false;
				1027	}
				1028
				1029	return skcms_AreApproximateInverses(&profile->trc[0], inv_tf) &&
				1030	skcms_AreApproximateInverses(&profile->trc[1], inv_tf) &&
				1031	skcms_AreApproximateInverses(&profile->trc[2], inv_tf);
				1032	}
				1033
				1034	static bool is_zero_to_one(float x) {
				1035	return 0 <= x && x <= 1;
				1036	}
				1037
				1038	bool skcms_PrimariesToXYZD50(float rx, float ry,
				1039	float gx, float gy,
				1040	float bx, float by,
				1041	float wx, float wy,
				1042	skcms_Matrix3x3* toXYZD50) {
				1043	if (!is_zero_to_one(rx) \|\| !is_zero_to_one(ry) \|\|
				1044	!is_zero_to_one(gx) \|\| !is_zero_to_one(gy) \|\|
				1045	!is_zero_to_one(bx) \|\| !is_zero_to_one(by) \|\|
				1046	!is_zero_to_one(wx) \|\| !is_zero_to_one(wy) \|\|
				1047	!toXYZD50) {
				1048	return false;
				1049	}
				1050
				1051	// First, we need to convert xy values (primaries) to XYZ.
				1052	skcms_Matrix3x3 primaries = {{
				1053	{ rx, gx, bx },
				1054	{ ry, gy, by },
				1055	{ 1 - rx - ry, 1 - gx - gy, 1 - bx - by },
				1056	}};
				1057	skcms_Matrix3x3 primaries_inv;
				1058	if (!skcms_Matrix3x3_invert(&primaries, &primaries_inv)) {
				1059	return false;
				1060	}
				1061
				1062	// Assumes that Y is 1.0f.
				1063	skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
				1064	skcms_Vector3 XYZ = skcms_MV_mul(&primaries_inv, &wXYZ);
				1065
				1066	skcms_Matrix3x3 toXYZ = {{
				1067	{ XYZ.vals[0], 0, 0 },
				1068	{ 0, XYZ.vals[1], 0 },
				1069	{ 0, 0, XYZ.vals[2] },
				1070	}};
				1071	toXYZ = skcms_Matrix3x3_concat(&primaries, &toXYZ);
				1072
				1073	// Now convert toXYZ matrix to toXYZD50.
				1074	skcms_Vector3 wXYZD50 = { { 0.96422f, 1.0f, 0.82521f } };
				1075
				1076	// Calculate the chromatic adaptation matrix. We will use the Bradford method, thus
				1077	// the matrices below. The Bradford method is used by Adobe and is widely considered
				1078	// to be the best.
				1079	skcms_Matrix3x3 xyz_to_lms = {{
				1080	{ 0.8951f, 0.2664f, -0.1614f },
				1081	{ -0.7502f, 1.7135f, 0.0367f },
				1082	{ 0.0389f, -0.0685f, 1.0296f },
				1083	}};
				1084	skcms_Matrix3x3 lms_to_xyz = {{
				1085	{ 0.9869929f, -0.1470543f, 0.1599627f },
				1086	{ 0.4323053f, 0.5183603f, 0.0492912f },
				1087	{ -0.0085287f, 0.0400428f, 0.9684867f },
				1088	}};
				1089
				1090	skcms_Vector3 srcCone = skcms_MV_mul(&xyz_to_lms, &wXYZ);
				1091	skcms_Vector3 dstCone = skcms_MV_mul(&xyz_to_lms, &wXYZD50);
				1092
				1093	skcms_Matrix3x3 DXtoD50 = {{
				1094	{ dstCone.vals[0] / srcCone.vals[0], 0, 0 },
				1095	{ 0, dstCone.vals[1] / srcCone.vals[1], 0 },
				1096	{ 0, 0, dstCone.vals[2] / srcCone.vals[2] },
				1097	}};
				1098	DXtoD50 = skcms_Matrix3x3_concat(&DXtoD50, &xyz_to_lms);
				1099	DXtoD50 = skcms_Matrix3x3_concat(&lms_to_xyz, &DXtoD50);
				1100
				1101	*toXYZD50 = skcms_Matrix3x3_concat(&DXtoD50, &toXYZ);
				1102	return true;
				1103	}
				1104
				1105
				1106	bool skcms_Matrix3x3_invert(const skcms_Matrix3x3* src, skcms_Matrix3x3* dst) {
				1107	double a00 = src->vals[0][0],
				1108	a01 = src->vals[1][0],
				1109	a02 = src->vals[2][0],
				1110	a10 = src->vals[0][1],
				1111	a11 = src->vals[1][1],
				1112	a12 = src->vals[2][1],
				1113	a20 = src->vals[0][2],
				1114	a21 = src->vals[1][2],
				1115	a22 = src->vals[2][2];
				1116
				1117	double b0 = a00a11 - a01a10,
				1118	b1 = a00a12 - a02a10,
				1119	b2 = a01a12 - a02a11,
				1120	b3 = a20,
				1121	b4 = a21,
				1122	b5 = a22;
				1123
				1124	double determinant = b0*b5
				1125	- b1*b4
				1126	+ b2*b3;
				1127
				1128	if (determinant == 0) {
				1129	return false;
				1130	}
				1131
				1132	double invdet = 1.0 / determinant;
				1133	if (invdet > +FLT_MAX \|\| invdet < -FLT_MAX \|\| !isfinitef_((float)invdet)) {
				1134	return false;
				1135	}
				1136
				1137	b0 *= invdet;
				1138	b1 *= invdet;
				1139	b2 *= invdet;
				1140	b3 *= invdet;
				1141	b4 *= invdet;
				1142	b5 *= invdet;
				1143
				1144	dst->vals[0][0] = (float)( a11b5 - a12b4 );
				1145	dst->vals[1][0] = (float)( a02b4 - a01b5 );
				1146	dst->vals[2][0] = (float)( + b2 );
				1147	dst->vals[0][1] = (float)( a12b3 - a10b5 );
				1148	dst->vals[1][1] = (float)( a00b5 - a02b3 );
				1149	dst->vals[2][1] = (float)( - b1 );
				1150	dst->vals[0][2] = (float)( a10b4 - a11b3 );
				1151	dst->vals[1][2] = (float)( a01b3 - a00b4 );
				1152	dst->vals[2][2] = (float)( + b0 );
				1153
				1154	for (int r = 0; r < 3; ++r)
				1155	for (int c = 0; c < 3; ++c) {
				1156	if (!isfinitef_(dst->vals[r][c])) {
				1157	return false;
				1158	}
				1159	}
				1160	return true;
				1161	}
				1162
				1163	skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3* A, const skcms_Matrix3x3* B) {
				1164	skcms_Matrix3x3 m = { { { 0,0,0 },{ 0,0,0 },{ 0,0,0 } } };
				1165	for (int r = 0; r < 3; r++)
				1166	for (int c = 0; c < 3; c++) {
				1167	m.vals[r][c] = A->vals[r][0] * B->vals[0][c]
				1168	+ A->vals[r][1] * B->vals[1][c]
				1169	+ A->vals[r][2] * B->vals[2][c];
				1170	}
				1171	return m;
				1172	}
				1173
				1174	skcms_Vector3 skcms_MV_mul(const skcms_Matrix3x3* m, const skcms_Vector3* v) {
				1175	skcms_Vector3 dst = {{0,0,0}};
				1176	for (int row = 0; row < 3; ++row) {
				1177	dst.vals[row] = m->vals[row][0] * v->vals[0]
				1178	+ m->vals[row][1] * v->vals[1]
				1179	+ m->vals[row][2] * v->vals[2];
				1180	}
				1181	return dst;
				1182	}
				1183
				1184	#if defined(__clang__) \|\| defined(__GNUC__)
				1185	#define small_memcpy __builtin_memcpy
				1186	#else
				1187	#define small_memcpy memcpy
				1188	#endif
				1189
				1190	float log2f_(float x) {
				1191	// The first approximation of log2(x) is its exponent 'e', minus 127.
				1192	int32_t bits;
				1193	small_memcpy(&bits, &x, sizeof(bits));
				1194
				1195	float e = (float)bits * (1.0f / (1<<23));
				1196
				1197	// If we use the mantissa too we can refine the error signficantly.
				1198	int32_t m_bits = (bits & 0x007fffff) \| 0x3f000000;
				1199	float m;
				1200	small_memcpy(&m, &m_bits, sizeof(m));
				1201
				1202	return (e - 124.225514990f
				1203	- 1.498030302f*m
				1204	- 1.725879990f/(0.3520887068f + m));
				1205	}
				1206
				1207	float exp2f_(float x) {
				1208	float fract = x - floorf_(x);
				1209
				1210	float fbits = (1.0f * (1<<23)) * (x + 121.274057500f
				1211	- 1.490129070f*fract
				1212	+ 27.728023300f/(4.84252568f - fract));
				1213	if (fbits > INT_MAX) {
				1214	return INFINITY_;
				1215	} else if (fbits < INT_MIN) {
				1216	return -INFINITY_;
				1217	}
				1218	int32_t bits = (int32_t)fbits;
				1219	small_memcpy(&x, &bits, sizeof(x));
				1220	return x;
				1221	}
				1222
				1223	float powf_(float x, float y) {
				1224	return (x == 0) \|\| (x == 1) ? x
				1225	: exp2f_(log2f_(x) * y);
				1226	}
				1227
				1228	float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) {
				1229	float sign = x < 0 ? -1.0f : 1.0f;
				1230	x *= sign;
				1231
				1232	return sign * (x < tf->d ? tf->c * x + tf->f
				1233	: powf_(tf->a * x + tf->b, tf->g) + tf->e);
				1234	}
				1235
				1236	bool skcms_TransferFunction_isValid(const skcms_TransferFunction* tf) {
				1237	// Reject obviously malformed inputs
				1238	if (!isfinitef_(tf->a + tf->b + tf->c + tf->d + tf->e + tf->f + tf->g)) {
				1239	return false;
				1240	}
				1241
				1242	// All of these parameters should be non-negative
				1243	if (tf->a < 0 \|\| tf->c < 0 \|\| tf->d < 0 \|\| tf->g < 0) {
				1244	return false;
				1245	}
				1246
				1247	return true;
				1248	}
				1249
				1250	// TODO: Adjust logic here? This still assumes that purely linear inputs will have D > 1, which
				1251	// we never generate. It also emits inverted linear using the same formulation. Standardize on
				1252	// G == 1 here, too?
				1253	bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) {
				1254	// Original equation is: y = (ax + b)^g + e for x >= d
				1255	// y = cx + f otherwise
				1256	//
				1257	// so 1st inverse is: (y - e)^(1/g) = ax + b
				1258	// x = ((y - e)^(1/g) - b) / a
				1259	//
				1260	// which can be re-written as: x = (1/a)(y - e)^(1/g) - b/a
				1261	// x = ((1/a)^g)^(1/g) * (y - e)^(1/g) - b/a
				1262	// x = ([(1/a)^g]y + [-((1/a)^g)e]) ^ [1/g] + [-b/a]
				1263	//
				1264	// and 2nd inverse is: x = (y - f) / c
				1265	// which can be re-written as: x = [1/c]y + [-f/c]
				1266	//
				1267	// and now both can be expressed in terms of the same parametric form as the
				1268	// original - parameters are enclosed in square brackets.
				1269	skcms_TransferFunction tf_inv = { 0, 0, 0, 0, 0, 0, 0 };
				1270
				1271	// This rejects obviously malformed inputs, as well as decreasing functions
				1272	if (!skcms_TransferFunction_isValid(src)) {
				1273	return false;
				1274	}
				1275
				1276	// There are additional constraints to be invertible
				1277	bool has_nonlinear = (src->d <= 1);
				1278	bool has_linear = (src->d > 0);
				1279
				1280	// Is the linear section not invertible?
				1281	if (has_linear && src->c == 0) {
				1282	return false;
				1283	}
				1284
				1285	// Is the nonlinear section not invertible?
				1286	if (has_nonlinear && (src->a == 0 \|\| src->g == 0)) {
				1287	return false;
				1288	}
				1289
				1290	// If both segments are present, they need to line up
				1291	if (has_linear && has_nonlinear) {
				1292	float l_at_d = src->c * src->d + src->f;
				1293	float n_at_d = powf_(src->a * src->d + src->b, src->g) + src->e;
				1294	if (fabsf_(l_at_d - n_at_d) > (1 / 512.0f)) {
				1295	return false;
				1296	}
				1297	}
				1298
				1299	// Invert linear segment
				1300	if (has_linear) {
				1301	tf_inv.c = 1.0f / src->c;
				1302	tf_inv.f = -src->f / src->c;
				1303	}
				1304
				1305	// Invert nonlinear segment
				1306	if (has_nonlinear) {
				1307	tf_inv.g = 1.0f / src->g;
				1308	tf_inv.a = powf_(1.0f / src->a, src->g);
				1309	tf_inv.b = -tf_inv.a * src->e;
				1310	tf_inv.e = -src->b / src->a;
				1311	}
				1312
				1313	if (!has_linear) {
				1314	tf_inv.d = 0;
				1315	} else if (!has_nonlinear) {
				1316	// Any value larger than 1 works
				1317	tf_inv.d = 2.0f;
				1318	} else {
				1319	tf_inv.d = src->c * src->d + src->f;
				1320	}
				1321
				1322	*dst = tf_inv;
				1323	return true;
				1324	}
				1325
				1326	// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
				1327
				1328	// From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}:
				1329	//
				1330	// tf(x) = cx + f x < d
				1331	// tf(x) = (ax + b)^g + e x ≥ d
				1332	//
				1333	// When fitting, we add the additional constraint that both pieces meet at d:
				1334	//
				1335	// cd + f = (ad + b)^g + e
				1336	//
				1337	// Solving for e and folding it through gives an alternate formulation of the non-linear piece:
				1338	//
				1339	// tf(x) = cx + f x < d
				1340	// tf(x) = (ax + b)^g - (ad + b)^g + cd + f x ≥ d
				1341	//
				1342	// Our overall strategy is then:
				1343	// For a couple tolerances,
				1344	// - skcms_fit_linear(): fit c,d,f iteratively to as many points as our tolerance allows
				1345	// - invert c,d,f
				1346	// - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f
				1347	// (and by constraint, inverted e) to the inverse of the table.
				1348	// Return the parameters with least maximum error.
				1349	//
				1350	// To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals
				1351	// of round-trip f_inv(x), the inverse of the non-linear piece of f(x).
				1352	//
				1353	// let y = Table(x)
				1354	// r(x) = x - f_inv(y)
				1355	//
				1356	// ∂r/∂g = ln(ay + b)*(ay + b)^g
				1357	// - ln(ad + b)*(ad + b)^g
				1358	// ∂r/∂a = yg(ay + b)^(g-1)
				1359	// - dg(ad + b)^(g-1)
				1360	// ∂r/∂b = g(ay + b)^(g-1)
				1361	// - g(ad + b)^(g-1)
				1362
				1363	// Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P,
				1364	// and fill out the gradient of the residual into dfdP.
				1365	static float rg_nonlinear(float x,
				1366	const skcms_Curve* curve,
				1367	const skcms_TransferFunction* tf,
				1368	const float P[3],
				1369	float dfdP[3]) {
				1370	const float y = skcms_eval_curve(curve, x);
				1371
				1372	const float g = P[0], a = P[1], b = P[2],
				1373	c = tf->c, d = tf->d, f = tf->f;
				1374
				1375	const float Y = fmaxf_(a*y + b, 0.0f),
				1376	D = a*d + b;
				1377	assert (D >= 0);
				1378
				1379	// The gradient.
				1380	dfdP[0] = 0.69314718flog2f_(Y)powf_(Y, g)
				1381	- 0.69314718flog2f_(D)powf_(D, g);
				1382	dfdP[1] = ygpowf_(Y, g-1)
				1383	- dgpowf_(D, g-1);
				1384	dfdP[2] = g*powf_(Y, g-1)
				1385	- g*powf_(D, g-1);
				1386
				1387	// The residual.
				1388	const float f_inv = powf_(Y, g)
				1389	- powf_(D, g)
				1390	+ c*d + f;
				1391	return x - f_inv;
				1392	}
				1393
				1394	int skcms_fit_linear(const skcms_Curve* curve, int N, float tol, float* c, float* d, float* f) {
				1395	assert(N > 1);
				1396	// We iteratively fit the first points to the TF's linear piece.
				1397	// We want the cx + f line to pass through the first and last points we fit exactly.
				1398	//
				1399	// As we walk along the points we find the minimum and maximum slope of the line before the
				1400	// error would exceed our tolerance. We stop when the range [slope_min, slope_max] becomes
				1401	// emtpy, when we definitely can't add any more points.
				1402	//
				1403	// Some points' error intervals may intersect the running interval but not lie fully
				1404	// within it. So we keep track of the last point we saw that is a valid end point candidate,
				1405	// and once the search is done, back up to build the line through that point.
				1406	const float dx = 1.0f / (N - 1);
				1407
				1408	int lin_points = 1;
				1409	*f = skcms_eval_curve(curve, 0);
				1410
				1411	float slope_min = -INFINITY_;
				1412	float slope_max = +INFINITY_;
				1413	for (int i = 1; i < N; ++i) {
				1414	float x = i * dx;
				1415	float y = skcms_eval_curve(curve, x);
				1416
				1417	float slope_max_i = (y + tol - *f) / x,
				1418	slope_min_i = (y - tol - *f) / x;
				1419	if (slope_max_i < slope_min \|\| slope_max < slope_min_i) {
				1420	// Slope intervals would no longer overlap.
				1421	break;
				1422	}
				1423	slope_max = fminf_(slope_max, slope_max_i);
				1424	slope_min = fmaxf_(slope_min, slope_min_i);
				1425
				1426	float cur_slope = (y - *f) / x;
				1427	if (slope_min <= cur_slope && cur_slope <= slope_max) {
				1428	lin_points = i + 1;
				1429	*c = cur_slope;
				1430	}
				1431	}
				1432
				1433	// Set D to the last point that met our tolerance.
				1434	d = (lin_points - 1) dx;
				1435	return lin_points;
				1436	}
				1437
				1438	static bool gauss_newton_step(const skcms_Curve* curve,
				1439	const skcms_TransferFunction* tf,
				1440	float P[3],
				1441	float x0, float dx, int N) {
				1442	// We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
				1443	//
				1444	// We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
				1445	// where r(P) is the residual vector
				1446	// and Jf is the Jacobian matrix of f(), ∂r/∂P.
				1447	//
				1448	// Let's review the shape of each of these expressions:
				1449	// r(P) is [N x 1], a column vector with one entry per value of x tested
				1450	// Jf is [N x 3], a matrix with an entry for each (x,P) pair
				1451	// Jf^T is [3 x N], the transpose of Jf
				1452	//
				1453	// Jf^T Jf is [3 x N] * [N x 3] == [3 x 3], a 3x3 matrix,
				1454	// and so is its inverse (Jf^T Jf)^-1
				1455	// Jf^T r(P) is [3 x N] * [N x 1] == [3 x 1], a column vector with the same shape as P
				1456	//
				1457	// Our implementation strategy to get to the final ∆P is
				1458	// 1) evaluate Jf^T Jf, call that lhs
				1459	// 2) evaluate Jf^T r(P), call that rhs
				1460	// 3) invert lhs
				1461	// 4) multiply inverse lhs by rhs
				1462	//
				1463	// This is a friendly implementation strategy because we don't have to have any
				1464	// buffers that scale with N, and equally nice don't have to perform any matrix
				1465	// operations that are variable size.
				1466	//
				1467	// Other implementation strategies could trade this off, e.g. evaluating the
				1468	// pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
				1469	// the residuals. That would probably require implementing singular value
				1470	// decomposition, and would create a [3 x N] matrix to be multiplied by the
				1471	// [N x 1] residual vector, but on the upside I think that'd eliminate the
				1472	// possibility of this gauss_newton_step() function ever failing.
				1473
				1474	// 0) start off with lhs and rhs safely zeroed.
				1475	skcms_Matrix3x3 lhs = {{ {0,0,0}, {0,0,0}, {0,0,0} }};
				1476	skcms_Vector3 rhs = { {0,0,0} };
				1477
				1478	// 1,2) evaluate lhs and evaluate rhs
				1479	// We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
				1480	// so we'll have to update lhs and rhs at the same time.
				1481	for (int i = 0; i < N; i++) {
				1482	float x = x0 + i*dx;
				1483
				1484	float dfdP[3] = {0,0,0};
				1485	float resid = rg_nonlinear(x,curve,tf,P, dfdP);
				1486
				1487	for (int r = 0; r < 3; r++) {
				1488	for (int c = 0; c < 3; c++) {
				1489	lhs.vals[r][c] += dfdP[r] * dfdP[c];
				1490	}
				1491	rhs.vals[r] += dfdP[r] * resid;
				1492	}
				1493	}
				1494
				1495	// If any of the 3 P parameters are unused, this matrix will be singular.
				1496	// Detect those cases and fix them up to indentity instead, so we can invert.
				1497	for (int k = 0; k < 3; k++) {
				1498	if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 &&
				1499	lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0) {
				1500	lhs.vals[k][k] = 1;
				1501	}
				1502	}
				1503
				1504	// 3) invert lhs
				1505	skcms_Matrix3x3 lhs_inv;
				1506	if (!skcms_Matrix3x3_invert(&lhs, &lhs_inv)) {
				1507	return false;
				1508	}
				1509
				1510	// 4) multiply inverse lhs by rhs
				1511	skcms_Vector3 dP = skcms_MV_mul(&lhs_inv, &rhs);
				1512	P[0] += dP.vals[0];
				1513	P[1] += dP.vals[1];
				1514	P[2] += dP.vals[2];
				1515	return isfinitef_(P[0]) && isfinitef_(P[1]) && isfinitef_(P[2]);
				1516	}
				1517
				1518
				1519	// Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't.
				1520	static bool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) {
				1521	float P[3] = { tf->g, tf->a, tf->b };
				1522
				1523	// No matter where we start, dx should always represent N even steps from 0 to 1.
				1524	const float dx = 1.0f / (N-1);
				1525
				1526	for (int j = 0; j < 3/TODO: tune/; j++) {
				1527	// These extra constraints a >= 0 and ad+b >= 0 are not modeled in the optimization.
				1528	// We don't really know how to fix up a if it goes negative.
				1529	if (P[1] < 0) {
				1530	return false;
				1531	}
				1532	// If ad+b goes negative, we feel just barely not uneasy enough to tweak b so ad+b is zero.
				1533	if (P[1] * tf->d + P[2] < 0) {
				1534	P[2] = -P[1] * tf->d;
				1535	}
				1536	assert (P[1] >= 0 &&
				1537	P[1] * tf->d + P[2] >= 0);
				1538
				1539	if (!gauss_newton_step(curve, tf,
				1540	P,
				1541	L*dx, dx, N-L)) {
				1542	return false;
				1543	}
				1544	}
				1545
				1546	// We need to apply our fixups one last time
				1547	if (P[1] < 0) {
				1548	return false;
				1549	}
				1550	if (P[1] * tf->d + P[2] < 0) {
				1551	P[2] = -P[1] * tf->d;
				1552	}
				1553
				1554	tf->g = P[0];
				1555	tf->a = P[1];
				1556	tf->b = P[2];
				1557	tf->e = tf->c*tf->d + tf->f
				1558	- powf_(tf->a*tf->d + tf->b, tf->g);
				1559	return true;
				1560	}
				1561
				1562	bool skcms_ApproximateCurve(const skcms_Curve* curve,
				1563	skcms_TransferFunction* approx,
				1564	float* max_error) {
				1565	if (!curve \|\| !approx \|\| !max_error) {
				1566	return false;
				1567	}
				1568
				1569	if (curve->table_entries == 0) {
				1570	// No point approximating an skcms_TransferFunction with an skcms_TransferFunction!
				1571	return false;
				1572	}
				1573
				1574	if (curve->table_entries == 1 \|\| curve->table_entries > (uint32_t)INT_MAX) {
				1575	// We need at least two points, and must put some reasonable cap on the maximum number.
				1576	return false;
				1577	}
				1578
				1579	int N = (int)curve->table_entries;
				1580	const float dx = 1.0f / (N - 1);
				1581
				1582	*max_error = INFINITY_;
				1583	const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
				1584	for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) {
				1585	skcms_TransferFunction tf,
				1586	tf_inv;
				1587	int L = skcms_fit_linear(curve, N, kTolerances[t], &tf.c, &tf.d, &tf.f);
				1588
				1589	if (L == N) {
				1590	// If the entire data set was linear, move the coefficients to the nonlinear portion
				1591	// with G == 1. This lets use a canonical representation with d == 0.
				1592	tf.g = 1;
				1593	tf.a = tf.c;
				1594	tf.b = tf.f;
				1595	tf.c = tf.d = tf.e = tf.f = 0;
				1596	} else if (L == N - 1) {
				1597	// Degenerate case with only two points in the nonlinear segment. Solve directly.
				1598	tf.g = 1;
				1599	tf.a = (skcms_eval_curve(curve, (N-1)*dx) -
				1600	skcms_eval_curve(curve, (N-2)*dx))
				1601	/ dx;
				1602	tf.b = skcms_eval_curve(curve, (N-2)*dx)
				1603	- tf.a * (N-2)*dx;
				1604	tf.e = 0;
				1605	} else {
				1606	// Start by guessing a gamma-only curve through the midpoint.
				1607	int mid = (L + N) / 2;
				1608	float mid_x = mid / (N - 1.0f);
				1609	float mid_y = skcms_eval_curve(curve, mid_x);
				1610	tf.g = log2f_(mid_y) / log2f_(mid_x);;
				1611	tf.a = 1;
				1612	tf.b = 0;
				1613	tf.e = tf.c*tf.d + tf.f
				1614	- powf_(tf.a*tf.d + tf.b, tf.g);
				1615
				1616
				1617	if (!skcms_TransferFunction_invert(&tf, &tf_inv) \|\|
				1618	!fit_nonlinear(curve, L,N, &tf_inv)) {
				1619	continue;
				1620	}
				1621
				1622	// We fit tf_inv, so calculate tf to keep in sync.
				1623	if (!skcms_TransferFunction_invert(&tf_inv, &tf)) {
				1624	continue;
				1625	}
				1626	}
				1627
				1628	// We find our error by roundtripping the table through tf_inv.
				1629	//
				1630	// (The most likely use case for this approximation is to be inverted and
				1631	// used as the transfer function for a destination color space.)
				1632	//
				1633	// We've kept tf and tf_inv in sync above, but we can't guarantee that tf is
				1634	// invertible, so re-verify that here (and use the new inverse for testing).
				1635	if (!skcms_TransferFunction_invert(&tf, &tf_inv)) {
				1636	continue;
				1637	}
				1638
				1639	float err = skcms_MaxRoundtripError(curve, &tf_inv);
				1640	if (*max_error > err) {
				1641	*max_error = err;
				1642	*approx = tf;
				1643	}
				1644	}
				1645	return isfinitef_(*max_error);
				1646	}
				1647
				1648	// Without this wasm would try to use the N=4 128-bit vector code path,
				1649	// which while ideal, causes tons of compiler problems. This would be
				1650	// a good thing to revisit as emcc matures (currently 1.38.5).
				1651	#if 1 && defined(__EMSCRIPTEN_major__)
				1652	#if !defined(SKCMS_PORTABLE)
				1653	#define SKCMS_PORTABLE
				1654	#endif
				1655	#endif
				1656
				1657	extern bool g_skcms_dump_profile;
				1658	bool g_skcms_dump_profile = false;
				1659
				1660	#if !defined(NDEBUG) && defined(__clang__)
				1661	// Basic profiling tools to time each Op. Not at all thread safe.
				1662
				1663	#include <stdio.h>
				1664	#include <stdlib.h>
				1665
				1666	#if defined(__arm__) \|\| defined(__aarch64__)
				1667	#include <time.h>
				1668	static const char* now_units = "ticks";
				1669	static uint64_t now() { return (uint64_t)clock(); }
				1670	#else
				1671	static const char* now_units = "cycles";
				1672	static uint64_t now() { return __builtin_readcyclecounter(); }
				1673	#endif
				1674
				1675	#define M(op) +1
				1676	static uint64_t counts[FOREACH_Op(M)];
				1677	#undef M
				1678
				1679	static void profile_dump_stats() {
				1680	#define M(op) #op,
				1681	static const char* names[] = { FOREACH_Op(M) };
				1682	#undef M
				1683	for (int i = 0; i < ARRAY_COUNT(counts); i++) {
				1684	if (counts[i]) {
				1685	fprintf(stderr, "%16s: %12llu %s\n",
				1686	names[i], (unsigned long long)counts[i], now_units);
				1687	}
				1688	}
				1689	}
				1690
				1691	static inline Op profile_next_op(Op op) {
				1692	if (__builtin_expect(g_skcms_dump_profile, false)) {
				1693	static uint64_t start = 0;
				1694	static uint64_t* current = NULL;
				1695
				1696	if (!current) {
				1697	atexit(profile_dump_stats);
				1698	} else {
				1699	*current += now() - start;
				1700	}
				1701
				1702	current = &counts[op];
				1703	start = now();
				1704	}
				1705	return op;
				1706	}
				1707	#else
				1708	static inline Op profile_next_op(Op op) {
				1709	(void)g_skcms_dump_profile;
				1710	return op;
				1711	}
				1712	#endif
				1713
				1714	#if defined(__clang__)
				1715	typedef float __attribute__((ext_vector_type(4))) Fx4;
				1716	typedef int32_t __attribute__((ext_vector_type(4))) I32x4;
				1717	typedef uint64_t __attribute__((ext_vector_type(4))) U64x4;
				1718	typedef uint32_t __attribute__((ext_vector_type(4))) U32x4;
				1719	typedef uint16_t __attribute__((ext_vector_type(4))) U16x4;
				1720	typedef uint8_t __attribute__((ext_vector_type(4))) U8x4;
				1721
				1722	typedef float __attribute__((ext_vector_type(8))) Fx8;
				1723	typedef int32_t __attribute__((ext_vector_type(8))) I32x8;
				1724	typedef uint64_t __attribute__((ext_vector_type(8))) U64x8;
				1725	typedef uint32_t __attribute__((ext_vector_type(8))) U32x8;
				1726	typedef uint16_t __attribute__((ext_vector_type(8))) U16x8;
				1727	typedef uint8_t __attribute__((ext_vector_type(8))) U8x8;
				1728
				1729	typedef float __attribute__((ext_vector_type(16))) Fx16;
				1730	typedef int32_t __attribute__((ext_vector_type(16))) I32x16;
				1731	typedef uint64_t __attribute__((ext_vector_type(16))) U64x16;
				1732	typedef uint32_t __attribute__((ext_vector_type(16))) U32x16;
				1733	typedef uint16_t __attribute__((ext_vector_type(16))) U16x16;
				1734	typedef uint8_t __attribute__((ext_vector_type(16))) U8x16;
				1735	#elif defined(__GNUC__)
				1736	typedef float __attribute__((vector_size(16))) Fx4;
				1737	typedef int32_t __attribute__((vector_size(16))) I32x4;
				1738	typedef uint64_t __attribute__((vector_size(32))) U64x4;
				1739	typedef uint32_t __attribute__((vector_size(16))) U32x4;
				1740	typedef uint16_t __attribute__((vector_size( 8))) U16x4;
				1741	typedef uint8_t __attribute__((vector_size( 4))) U8x4;
				1742
				1743	typedef float __attribute__((vector_size(32))) Fx8;
				1744	typedef int32_t __attribute__((vector_size(32))) I32x8;
				1745	typedef uint64_t __attribute__((vector_size(64))) U64x8;
				1746	typedef uint32_t __attribute__((vector_size(32))) U32x8;
				1747	typedef uint16_t __attribute__((vector_size(16))) U16x8;
				1748	typedef uint8_t __attribute__((vector_size( 8))) U8x8;
				1749
				1750	typedef float __attribute__((vector_size( 64))) Fx16;
				1751	typedef int32_t __attribute__((vector_size( 64))) I32x16;
				1752	typedef uint64_t __attribute__((vector_size(128))) U64x16;
				1753	typedef uint32_t __attribute__((vector_size( 64))) U32x16;
				1754	typedef uint16_t __attribute__((vector_size( 32))) U16x16;
				1755	typedef uint8_t __attribute__((vector_size( 16))) U8x16;
				1756	#endif
				1757
				1758	// First, instantiate our default exec_ops() implementation using the default compiliation target.
				1759
				1760	#if defined(SKCMS_PORTABLE) \|\| !(defined(__clang__) \|\| defined(__GNUC__))
				1761	#define N 1
				1762
				1763	#define F float
				1764	#define U64 uint64_t
				1765	#define U32 uint32_t
				1766	#define I32 int32_t
				1767	#define U16 uint16_t
				1768	#define U8 uint8_t
				1769
				1770	#define F0 0.0f
				1771	#define F1 1.0f
				1772
				1773	#elif defined(__AVX512F__)
				1774	#define N 16
				1775
				1776	#define F Fx16
				1777	#define U64 U64x16
				1778	#define U32 U32x16
				1779	#define I32 I32x16
				1780	#define U16 U16x16
				1781	#define U8 U8x16
				1782
				1783	#define F0 (F){0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0}
				1784	#define F1 (F){1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1}
				1785	#elif defined(__AVX__)
				1786	#define N 8
				1787
				1788	#define F Fx8
				1789	#define U64 U64x8
				1790	#define U32 U32x8
				1791	#define I32 I32x8
				1792	#define U16 U16x8
				1793	#define U8 U8x8
				1794
				1795	#define F0 (F){0,0,0,0, 0,0,0,0}
				1796	#define F1 (F){1,1,1,1, 1,1,1,1}
				1797	#else
				1798	#define N 4
				1799
				1800	#define F Fx4
				1801	#define U64 U64x4
				1802	#define U32 U32x4
				1803	#define I32 I32x4
				1804	#define U16 U16x4
				1805	#define U8 U8x4
				1806
				1807	#define F0 (F){0,0,0,0}
				1808	#define F1 (F){1,1,1,1}
				1809	#endif
				1810
				1811	#define NS(id) id
				1812	#define ATTR
				1813	#include "src/Transform_inl.h"
				1814	#undef N
				1815	#undef F
				1816	#undef U64
				1817	#undef U32
				1818	#undef I32
				1819	#undef U16
				1820	#undef U8
				1821	#undef F0
				1822	#undef F1
				1823	#undef NS
				1824	#undef ATTR
				1825
				1826	// Now, instantiate any other versions of run_program() we may want for runtime detection.
				1827	#if !defined(SKCMS_PORTABLE) && (defined(__clang__) \|\| defined(__GNUC__)) \
				1828	&& defined(__x86_64__) && !defined(__AVX2__)
				1829	#define N 8
				1830	#define F Fx8
				1831	#define U64 U64x8
				1832	#define U32 U32x8
				1833	#define I32 I32x8
				1834	#define U16 U16x8
				1835	#define U8 U8x8
				1836	#define F0 (F){0,0,0,0, 0,0,0,0}
				1837	#define F1 (F){1,1,1,1, 1,1,1,1}
				1838
				1839	#define NS(id) id ## _hsw
				1840	#define ATTR __attribute__((target("avx2,f16c")))
				1841
				1842	// We check these guards to see if we have support for these features.
				1843	// They're likely _not_ defined here in our baseline build config.
				1844	#ifndef __AVX__
				1845	#define __AVX__ 1
				1846	#define UNDEF_AVX
				1847	#endif
				1848	#ifndef __F16C__
				1849	#define __F16C__ 1
				1850	#define UNDEF_F16C
				1851	#endif
				1852	#ifndef __AVX2__
				1853	#define __AVX2__ 1
				1854	#define UNDEF_AVX2
				1855	#endif
				1856
				1857	#include "src/Transform_inl.h"
				1858
				1859	#undef N
				1860	#undef F
				1861	#undef U64
				1862	#undef U32
				1863	#undef I32
				1864	#undef U16
				1865	#undef U8
				1866	#undef F0
				1867	#undef F1
				1868	#undef NS
				1869	#undef ATTR
				1870
				1871	#ifdef UNDEF_AVX
				1872	#undef __AVX__
				1873	#undef UNDEF_AVX
				1874	#endif
				1875	#ifdef UNDEF_F16C
				1876	#undef __F16C__
				1877	#undef UNDEF_F16C
				1878	#endif
				1879	#ifdef UNDEF_AVX2
				1880	#undef __AVX2__
				1881	#undef UNDEF_AVX2
				1882	#endif
				1883
				1884	#define TEST_FOR_HSW
				1885
				1886	static bool hsw_ok_ = false;
				1887	static void check_hsw_ok() {
				1888	// See http://www.sandpile.org/x86/cpuid.htm
				1889
				1890	// First, a basic cpuid(1).
				1891	uint32_t eax, ebx, ecx, edx;
				1892	__asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
				1893	: "0"(1), "2"(0));
				1894
				1895	// Sanity check for prerequisites.
				1896	if ((edx & (1<<25)) != (1<<25)) { return; } // SSE
				1897	if ((edx & (1<<26)) != (1<<26)) { return; } // SSE2
				1898	if ((ecx & (1<< 0)) != (1<< 0)) { return; } // SSE3
				1899	if ((ecx & (1<< 9)) != (1<< 9)) { return; } // SSSE3
				1900	if ((ecx & (1<<19)) != (1<<19)) { return; } // SSE4.1
				1901	if ((ecx & (1<<20)) != (1<<20)) { return; } // SSE4.2
				1902
				1903	if ((ecx & (3<<26)) != (3<<26)) { return; } // XSAVE + OSXSAVE
				1904
				1905	{
				1906	uint32_t eax_xgetbv, edx_xgetbv;
				1907	__asm__ __volatile__("xgetbv" : "=a"(eax_xgetbv), "=d"(edx_xgetbv) : "c"(0));
				1908	if ((eax_xgetbv & (3<<1)) != (3<<1)) { return; } // XMM+YMM state saved?
				1909	}
				1910
				1911	if ((ecx & (1<<28)) != (1<<28)) { return; } // AVX
				1912	if ((ecx & (1<<29)) != (1<<29)) { return; } // F16C
				1913	if ((ecx & (1<<12)) != (1<<12)) { return; } // FMA (TODO: not currently used)
				1914
				1915	// Call cpuid(7) to check for our final AVX2 feature bit!
				1916	__asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
				1917	: "0"(7), "2"(0));
				1918	if ((ebx & (1<< 5)) != (1<< 5)) { return; } // AVX2
				1919
				1920	hsw_ok_ = true;
				1921	}
				1922
				1923	#if defined(_MSC_VER)
				1924	#include <Windows.h>
				1925	INIT_ONCE check_hsw_ok_once = INIT_ONCE_STATIC_INIT;
				1926
				1927	static BOOL check_hsw_ok_InitOnce_wrapper(INIT_ONCE* once, void* param, void** ctx) {
				1928	(void)once;
				1929	(void)param;
				1930	(void)ctx;
				1931	check_hsw_ok();
				1932	return TRUE;
				1933	}
				1934
				1935	static bool hsw_ok() {
				1936	InitOnceExecuteOnce(&check_hsw_ok_once, check_hsw_ok_InitOnce_wrapper, NULL, NULL);
				1937	return hsw_ok_;
				1938	}
				1939	#else
				1940	#include <pthread.h>
				1941	static pthread_once_t check_hsw_ok_once = PTHREAD_ONCE_INIT;
				1942
				1943	static bool hsw_ok() {
				1944	pthread_once(&check_hsw_ok_once, check_hsw_ok);
				1945	return hsw_ok_;
				1946	}
				1947	#endif
				1948
				1949	#endif
				1950
				1951	static bool is_identity_tf(const skcms_TransferFunction* tf) {
				1952	return tf->g == 1 && tf->a == 1
				1953	&& tf->b == 0 && tf->c == 0 && tf->d == 0 && tf->e == 0 && tf->f == 0;
				1954	}
				1955
				1956	typedef struct {
				1957	Op op;
				1958	const void* arg;
				1959	} OpAndArg;
				1960
				1961	static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
				1962	static const struct { Op parametric, table_8, table_16; } ops[] = {
				1963	{ Op_tf_r, Op_table_8_r, Op_table_16_r },
				1964	{ Op_tf_g, Op_table_8_g, Op_table_16_g },
				1965	{ Op_tf_b, Op_table_8_b, Op_table_16_b },
				1966	{ Op_tf_a, Op_table_8_a, Op_table_16_a },
				1967	};
				1968
				1969	if (curve->table_entries == 0) {
				1970	return is_identity_tf(&curve->parametric)
				1971	? (OpAndArg){ Op_noop, NULL }
				1972	: (OpAndArg){ ops[channel].parametric, &curve->parametric };
				1973	} else if (curve->table_8) {
				1974	return (OpAndArg){ ops[channel].table_8, curve };
				1975	} else if (curve->table_16) {
				1976	return (OpAndArg){ ops[channel].table_16, curve };
				1977	}
				1978
				1979	assert(false);
				1980	return (OpAndArg){Op_noop,NULL};
				1981	}
				1982
				1983	static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
				1984	switch (fmt >> 1) { // ignore rgb/bgr
				1985	case skcms_PixelFormat_A_8 >> 1: return 1;
				1986	case skcms_PixelFormat_G_8 >> 1: return 1;
				1987	case skcms_PixelFormat_ABGR_4444 >> 1: return 2;
				1988	case skcms_PixelFormat_RGB_565 >> 1: return 2;
				1989	case skcms_PixelFormat_RGB_888 >> 1: return 3;
				1990	case skcms_PixelFormat_RGBA_8888 >> 1: return 4;
				1991	case skcms_PixelFormat_RGBA_1010102 >> 1: return 4;
				1992	case skcms_PixelFormat_RGB_161616 >> 1: return 6;
				1993	case skcms_PixelFormat_RGBA_16161616 >> 1: return 8;
				1994	case skcms_PixelFormat_RGB_hhh >> 1: return 6;
				1995	case skcms_PixelFormat_RGBA_hhhh >> 1: return 8;
				1996	case skcms_PixelFormat_RGB_fff >> 1: return 12;
				1997	case skcms_PixelFormat_RGBA_ffff >> 1: return 16;
				1998	}
				1999	assert(false);
				2000	return 0;
				2001	}
				2002
				2003	static bool prep_for_destination(const skcms_ICCProfile* profile,
				2004	skcms_Matrix3x3* fromXYZD50,
				2005	skcms_TransferFunction* invR,
				2006	skcms_TransferFunction* invG,
				2007	skcms_TransferFunction* invB) {
				2008	// We only support destinations with parametric transfer functions
				2009	// and with gamuts that can be transformed from XYZD50.
				2010	return profile->has_trc
				2011	&& profile->has_toXYZD50
				2012	&& profile->trc[0].table_entries == 0
				2013	&& profile->trc[1].table_entries == 0
				2014	&& profile->trc[2].table_entries == 0
				2015	&& skcms_TransferFunction_invert(&profile->trc[0].parametric, invR)
				2016	&& skcms_TransferFunction_invert(&profile->trc[1].parametric, invG)
				2017	&& skcms_TransferFunction_invert(&profile->trc[2].parametric, invB)
				2018	&& skcms_Matrix3x3_invert(&profile->toXYZD50, fromXYZD50);
				2019	}
				2020
				2021	bool skcms_Transform(const void* src,
				2022	skcms_PixelFormat srcFmt,
				2023	skcms_AlphaFormat srcAlpha,
				2024	const skcms_ICCProfile* srcProfile,
				2025	void* dst,
				2026	skcms_PixelFormat dstFmt,
				2027	skcms_AlphaFormat dstAlpha,
				2028	const skcms_ICCProfile* dstProfile,
				2029	size_t nz) {
				2030	const size_t dst_bpp = bytes_per_pixel(dstFmt),
				2031	src_bpp = bytes_per_pixel(srcFmt);
				2032	// Let's just refuse if the request is absurdly big.
				2033	if (nz * dst_bpp > INT_MAX \|\| nz * src_bpp > INT_MAX) {
				2034	return false;
				2035	}
				2036	int n = (int)nz;
				2037
				2038	// Null profiles default to sRGB. Passing null for both is handy when doing format conversion.
				2039	if (!srcProfile) {
				2040	srcProfile = skcms_sRGB_profile();
				2041	}
				2042	if (!dstProfile) {
				2043	dstProfile = skcms_sRGB_profile();
				2044	}
				2045
				2046	// We can't transform in place unless the PixelFormats are the same size.
				2047	if (dst == src && (dstFmt >> 1) != (srcFmt >> 1)) {
				2048	return false;
				2049	}
				2050	// TODO: this check lazilly disallows U16 <-> F16, but that would actually be fine.
				2051	// TODO: more careful alias rejection (like, dst == src + 1)?
				2052
				2053	Op program [32];
				2054	const void* arguments[32];
				2055
				2056	Op* ops = program;
				2057	const void** args = arguments;
				2058
				2059	skcms_TransferFunction inv_dst_tf_r, inv_dst_tf_g, inv_dst_tf_b;
				2060	skcms_Matrix3x3 from_xyz;
				2061
				2062	switch (srcFmt >> 1) {
				2063	default: return false;
				2064	case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_load_a8; break;
				2065	case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_load_g8; break;
				2066	case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_load_4444; break;
				2067	case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_load_565; break;
				2068	case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_load_888; break;
				2069	case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_load_8888; break;
				2070	case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_load_1010102; break;
				2071	case skcms_PixelFormat_RGB_161616 >> 1: *ops++ = Op_load_161616; break;
				2072	case skcms_PixelFormat_RGBA_16161616 >> 1: *ops++ = Op_load_16161616; break;
				2073	case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_load_hhh; break;
				2074	case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_load_hhhh; break;
				2075	case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_load_fff; break;
				2076	case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_load_ffff; break;
				2077	}
				2078	if (srcFmt & 1) {
				2079	*ops++ = Op_swap_rb;
				2080	}
				2081	skcms_ICCProfile gray_dst_profile;
				2082	if ((dstFmt >> 1) == (skcms_PixelFormat_G_8 >> 1)) {
				2083	// When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform
				2084	// luminance (Y) by the destination transfer function.
				2085	gray_dst_profile = *dstProfile;
				2086	skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50);
				2087	dstProfile = &gray_dst_profile;
				2088	}
				2089
				2090	if (srcProfile->data_color_space == skcms_Signature_CMYK) {
				2091	// Photoshop creates CMYK images as inverse CMYK.
				2092	// These happen to be the only ones we've _ever_ seen.
				2093	*ops++ = Op_invert;
				2094	// With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K.
				2095	srcAlpha = skcms_AlphaFormat_Unpremul;
				2096	}
				2097
				2098	if (srcAlpha == skcms_AlphaFormat_Opaque) {
				2099	*ops++ = Op_force_opaque;
				2100	} else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) {
				2101	*ops++ = Op_unpremul;
				2102	}
				2103
				2104	// TODO: We can skip this work if both srcAlpha and dstAlpha are PremulLinear, and the profiles
				2105	// are the same. Also, if dstAlpha is PremulLinear, and SrcAlpha is Opaque.
				2106	if (dstProfile != srcProfile \|\|
				2107	srcAlpha == skcms_AlphaFormat_PremulLinear \|\|
				2108	dstAlpha == skcms_AlphaFormat_PremulLinear) {
				2109
				2110	if (!prep_for_destination(dstProfile,
				2111	&from_xyz, &inv_dst_tf_r, &inv_dst_tf_b, &inv_dst_tf_g)) {
				2112	return false;
				2113	}
				2114
				2115	if (srcProfile->has_A2B) {
				2116	if (srcProfile->A2B.input_channels) {
				2117	for (int i = 0; i < (int)srcProfile->A2B.input_channels; i++) {
				2118	OpAndArg oa = select_curve_op(&srcProfile->A2B.input_curves[i], i);
				2119	if (oa.op != Op_noop) {
				2120	*ops++ = oa.op;
				2121	*args++ = oa.arg;
				2122	}
				2123	}
				2124	switch (srcProfile->A2B.input_channels) {
				2125	case 3: *ops++ = srcProfile->A2B.grid_8 ? Op_clut_3D_8 : Op_clut_3D_16; break;
				2126	case 4: *ops++ = srcProfile->A2B.grid_8 ? Op_clut_4D_8 : Op_clut_4D_16; break;
				2127	default: return false;
				2128	}
				2129	*args++ = &srcProfile->A2B;
				2130	}
				2131
				2132	if (srcProfile->A2B.matrix_channels == 3) {
				2133	for (int i = 0; i < 3; i++) {
				2134	OpAndArg oa = select_curve_op(&srcProfile->A2B.matrix_curves[i], i);
				2135	if (oa.op != Op_noop) {
				2136	*ops++ = oa.op;
				2137	*args++ = oa.arg;
				2138	}
				2139	}
				2140
				2141	static const skcms_Matrix3x4 I = {{
				2142	{1,0,0,0},
				2143	{0,1,0,0},
				2144	{0,0,1,0},
				2145	}};
				2146	if (0 != memcmp(&I, &srcProfile->A2B.matrix, sizeof(I))) {
				2147	*ops++ = Op_matrix_3x4;
				2148	*args++ = &srcProfile->A2B.matrix;
				2149	}
				2150	}
				2151
				2152	if (srcProfile->A2B.output_channels == 3) {
				2153	for (int i = 0; i < 3; i++) {
				2154	OpAndArg oa = select_curve_op(&srcProfile->A2B.output_curves[i], i);
				2155	if (oa.op != Op_noop) {
				2156	*ops++ = oa.op;
				2157	*args++ = oa.arg;
				2158	}
				2159	}
				2160	}
				2161
				2162	if (srcProfile->pcs == skcms_Signature_Lab) {
				2163	*ops++ = Op_lab_to_xyz;
				2164	}
				2165
				2166	} else if (srcProfile->has_trc && srcProfile->has_toXYZD50) {
				2167	for (int i = 0; i < 3; i++) {
				2168	OpAndArg oa = select_curve_op(&srcProfile->trc[i], i);
				2169	if (oa.op != Op_noop) {
				2170	*ops++ = oa.op;
				2171	*args++ = oa.arg;
				2172	}
				2173	}
				2174	} else {
				2175	return false;
				2176	}
				2177
				2178	// At this point our source colors are linear, either RGB (XYZ-type profiles)
				2179	// or XYZ (A2B-type profiles). Unpremul is a linear operation (multiply by a
				2180	// constant 1/a), so either way we can do it now if needed.
				2181	if (srcAlpha == skcms_AlphaFormat_PremulLinear) {
				2182	*ops++ = Op_unpremul;
				2183	}
				2184
				2185	// A2B sources should already be in XYZD50 at this point.
				2186	// Others still need to be transformed using their toXYZD50 matrix.
				2187	// N.B. There are profiles that contain both A2B tags and toXYZD50 matrices.
				2188	// If we use the A2B tags, we need to ignore the XYZD50 matrix entirely.
				2189	assert (srcProfile->has_A2B \|\| srcProfile->has_toXYZD50);
				2190	static const skcms_Matrix3x3 I = {{
				2191	{ 1.0f, 0.0f, 0.0f },
				2192	{ 0.0f, 1.0f, 0.0f },
				2193	{ 0.0f, 0.0f, 1.0f },
				2194	}};
				2195	const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50;
				2196
				2197	// There's a chance the source and destination gamuts are identical,
				2198	// in which case we can skip the gamut transform.
				2199	if (0 != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) {
				2200	// Concat the entire gamut transform into from_xyz,
				2201	// now slightly misnamed but it's a handy spot to stash the result.
				2202	from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz);
				2203	*ops++ = Op_matrix_3x3;
				2204	*args++ = &from_xyz;
				2205	}
				2206
				2207	if (dstAlpha == skcms_AlphaFormat_PremulLinear) {
				2208	*ops++ = Op_premul;
				2209	}
				2210
				2211	// Encode back to dst RGB using its parametric transfer functions.
				2212	if (!is_identity_tf(&inv_dst_tf_r)) { ops++ = Op_tf_r; args++ = &inv_dst_tf_r; }
				2213	if (!is_identity_tf(&inv_dst_tf_g)) { ops++ = Op_tf_g; args++ = &inv_dst_tf_g; }
				2214	if (!is_identity_tf(&inv_dst_tf_b)) { ops++ = Op_tf_b; args++ = &inv_dst_tf_b; }
				2215	}
				2216
				2217	if (dstAlpha == skcms_AlphaFormat_Opaque) {
				2218	*ops++ = Op_force_opaque;
				2219	} else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) {
				2220	*ops++ = Op_premul;
				2221	}
				2222	if (dstFmt & 1) {
				2223	*ops++ = Op_swap_rb;
				2224	}
				2225	if (dstFmt < skcms_PixelFormat_RGB_hhh) {
				2226	*ops++ = Op_clamp;
				2227	}
				2228	switch (dstFmt >> 1) {
				2229	default: return false;
				2230	case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_store_a8; break;
				2231	case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_store_g8; break;
				2232	case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_store_4444; break;
				2233	case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_store_565; break;
				2234	case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_store_888; break;
				2235	case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_store_8888; break;
				2236	case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_store_1010102; break;
				2237	case skcms_PixelFormat_RGB_161616 >> 1: *ops++ = Op_store_161616; break;
				2238	case skcms_PixelFormat_RGBA_16161616 >> 1: *ops++ = Op_store_16161616; break;
				2239	case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_store_hhh; break;
				2240	case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_store_hhhh; break;
				2241	case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_store_fff; break;
				2242	case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_store_ffff; break;
				2243	}
				2244
				2245	void (run)(const Op, const void*, const char, char*, int, size_t,size_t) = run_program;
				2246	#if defined(TEST_FOR_HSW)
				2247	if (hsw_ok()) {
				2248	run = run_program_hsw;
				2249	}
				2250	#endif
				2251	run(program, arguments, src, dst, n, src_bpp,dst_bpp);
				2252	return true;
				2253	}
				2254
				2255	static void assert_usable_as_destination(const skcms_ICCProfile* profile) {
				2256	#if defined(NDEBUG)
				2257	(void)profile;
				2258	#else
				2259	skcms_Matrix3x3 fromXYZD50;
				2260	skcms_TransferFunction invR, invG, invB;
				2261	assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB));
				2262	#endif
				2263	}
				2264
				2265	bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) {
				2266	skcms_Matrix3x3 fromXYZD50;
				2267	if (!profile->has_trc \|\| !profile->has_toXYZD50
				2268	\|\| !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) {
				2269	return false;
				2270	}
				2271
				2272	skcms_TransferFunction tf[3];
				2273	for (int i = 0; i < 3; i++) {
				2274	skcms_TransferFunction inv;
				2275	if (profile->trc[i].table_entries == 0
				2276	&& skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) {
				2277	tf[i] = profile->trc[i].parametric;
				2278	continue;
				2279	}
				2280
				2281	float max_error;
				2282	// Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible.
				2283	if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) {
				2284	return false;
				2285	}
				2286	}
				2287
				2288	for (int i = 0; i < 3; ++i) {
				2289	profile->trc[i].table_entries = 0;
				2290	profile->trc[i].parametric = tf[i];
				2291	}
				2292
				2293	assert_usable_as_destination(profile);
				2294	return true;
				2295	}
				2296
				2297	bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) {
				2298	// Operate on a copy of profile, so we can choose the best TF for the original curves
				2299	skcms_ICCProfile result = *profile;
				2300	if (!skcms_MakeUsableAsDestination(&result)) {
				2301	return false;
				2302	}
				2303
				2304	int best_tf = 0;
				2305	float min_max_error = INFINITY_;
				2306	for (int i = 0; i < 3; i++) {
				2307	skcms_TransferFunction inv;
				2308	skcms_TransferFunction_invert(&result.trc[i].parametric, &inv);
				2309
				2310	float err = 0;
				2311	for (int j = 0; j < 3; ++j) {
				2312	err = fmaxf_(err, skcms_MaxRoundtripError(&profile->trc[j], &inv));
				2313	}
				2314	if (min_max_error > err) {
				2315	min_max_error = err;
				2316	best_tf = i;
				2317	}
				2318	}
				2319
				2320	for (int i = 0; i < 3; i++) {
				2321	result.trc[i].parametric = result.trc[best_tf].parametric;
				2322	}
				2323
				2324	*profile = result;
				2325	assert_usable_as_destination(profile);
				2326	return true;
				2327	}