Blame - src/qs8-requantization/gemmlowp-scalar.c - platform/external/XNNPACK

blob: 119e81edb9beb9c16b3c4e22eb01f671eb17ce05 [file] [log] [blame]

Marat Dukhan	2e23d2b	2020-07-29 16:01:37 -0700	[diff] [blame]	1	// Copyright (c) Facebook, Inc. and its affiliates.
				2	// All rights reserved.
				3	//
				4	// Copyright 2019 Google LLC
				5	//
				6	// This source code is licensed under the BSD-style license found in the
				7	// LICENSE file in the root directory of this source tree.
				8
				9	#include <assert.h>
				10	#include <stdint.h>
				11	#include <stddef.h>
				12
				13	#include <fp16/bitcasts.h>
				14
Marat Dukhan	f86ee8b	2021-05-24 23:41:31 -0700	[diff] [blame]	15	#include <xnnpack/math.h>
Marat Dukhan	2e23d2b	2020-07-29 16:01:37 -0700	[diff] [blame]	16	#include <xnnpack/requantization-stubs.h>
				17
				18
Marat Dukhan	9976cd8	2021-05-24 23:15:45 -0700	[diff] [blame]	19	void xnn_qs8_requantize_gemmlowp__scalar(
Marat Dukhan	2e23d2b	2020-07-29 16:01:37 -0700	[diff] [blame]	20	size_t n,
				21	const int32_t* input,
				22	float scale,
				23	int8_t zero_point,
				24	int8_t qmin,
				25	int8_t qmax,
				26	int8_t* output)
				27	{
				28	assert(n % 4 == 0);
				29	assert(scale < 1.0f);
				30	assert(scale >= 0x1.0p-32f);
				31
				32	// Compute requantization parameters.
				33	const uint32_t scale_bits = fp32_to_bits(scale);
				34
				35	// Multiplier is in [0x40000000, 0x7FFFFF80] range.
				36	const int32_t multiplier = (int32_t)(((scale_bits & UINT32_C(0x007FFFFF)) \| UINT32_C(0x00800000)) << 7);
				37	assert(multiplier >= INT32_C(0x40000000));
				38	assert(multiplier <= INT32_C(0x7FFFFF80));
				39
				40	// Shift is in [0, 31] range.
				41	const int32_t shift = 127 + 31 - 32 - (fp32_to_bits(scale) >> 23);
				42	assert(shift >= 0);
				43	assert(shift < 32);
				44
				45	const int64_t q31rounding = INT64_C(0x40000000);
				46	const int32_t remainder_mask = (int32_t)((UINT32_C(1) << shift) - UINT32_C(1));
				47	const int32_t threshold = (int32_t)((uint32_t) remainder_mask >> 1);
				48	const int32_t smin = (int32_t) qmin - (int32_t) zero_point;
				49	const int32_t smax = (int32_t) qmax - (int32_t) zero_point;
				50	for (; n != 0; n -= 4) {
				51	const int32_t x = input[0];
				52	const int32_t y = input[1];
				53	const int32_t z = input[2];
				54	const int32_t w = input[3];
				55	input += 4;
				56
				57	// Compute full 64-bit product of signed 32-bit factors.
				58	//
				59	// Note: multiplier can be treated as either signed or unsigned.
				60	const int64_t x_product = (int64_t) x * (int64_t) multiplier;
				61	const int64_t y_product = (int64_t) y * (int64_t) multiplier;
				62	const int64_t z_product = (int64_t) z * (int64_t) multiplier;
				63	const int64_t w_product = (int64_t) w * (int64_t) multiplier;
				64
				65	// Get the Q31 multiplication result by extracting bits 31-62 of the product, with rounding up.
				66	// Add rounding value (0x40000000) and then shift right by 31 bits and extract the low 32-bit word.
				67	// Note: casts to unsigned types are needed to avoid undefined behavior.
				68	// Given the multiplier range, the result of Q31 multiplication is in [-2147483520, 2147483519] range.
				69	const int32_t x_q31product = (int32_t) (uint32_t) ((uint64_t) (x_product + q31rounding) >> 31);
				70	const int32_t y_q31product = (int32_t) (uint32_t) ((uint64_t) (y_product + q31rounding) >> 31);
				71	const int32_t z_q31product = (int32_t) (uint32_t) ((uint64_t) (z_product + q31rounding) >> 31);
				72	const int32_t w_q31product = (int32_t) (uint32_t) ((uint64_t) (w_product + q31rounding) >> 31);
				73
				74	// Arithmetically shift the adjusted product right with rounding.
				75	// Rounding is performed towards closest integer, with midpoints rounded away from zero.
				76	//
				77	// Shift with correct rounding could be efficiently implemented by pre-adding rounding constant, but with input in
				78	// [-2147483520, 2147483519] range and rounding constant up to 2**30 we can't rule out overflow. This limitation
				79	// leaves us with 3 options:
				80	// 1. Extend input to 64-bit signed integer, perform addition and shift on 64-bit integers, then truncate result
				81	// to 32 bits.
				82	// 2. Detect overflow and handle this situation separately. Note that overflow is possible only when input is
				83	// positive, and even when addition of a rounding constant overflows 32-bit signed integer, it still doesn't
				84	// overflow 32-bit unsigned integer. Thus, in case of signed overflow, we can compute the result using unsigned
				85	// arithmetics, specifically using logical shift right instead of arithmetic shift right.
				86	// 3. Performs arithmetic shift as is, which will produce division result rounded down. Then compute remainder of
				87	// this division by a power of 2, and adjust the result. Result needs adjustment (increment by 1) when
				88	// - input is positive, shift is non-zero, and remainder >= 2**(shift - 1), e.g. 10 >> 2 needs adjustment
				89	// - input is negative, shift is non-zero, and remainder > 2**(shift - 1), e.g. -10 >> 2 doesn't need adjustment
				90	// These conditions can be generalized as
				91	// remainder + (input <= 0) > 2**(shift - 1)
				92	// or equivalently
				93	// remainder - (input < 0) > ((2**shift - 1) >> 1)
				94	// When shift is 0, remainder is 0 as well, the last condition is always false, and no adjustment is done.
				95	//
				96	// Among these options, option 3 is the most performant across the board, although option 1 is promising for 64-bit
				97	// instruction sets.
				98	const int32_t x_remainder = (x_q31product & remainder_mask) - (int32_t) (x_q31product < 0);
				99	const int32_t y_remainder = (y_q31product & remainder_mask) - (int32_t) (y_q31product < 0);
				100	const int32_t z_remainder = (z_q31product & remainder_mask) - (int32_t) (z_q31product < 0);
				101	const int32_t w_remainder = (w_q31product & remainder_mask) - (int32_t) (w_q31product < 0);
				102
				103	const int32_t x_scaled = asr_s32(x_q31product, shift) + (int32_t) (x_remainder > threshold);
				104	const int32_t y_scaled = asr_s32(y_q31product, shift) + (int32_t) (y_remainder > threshold);
				105	const int32_t z_scaled = asr_s32(z_q31product, shift) + (int32_t) (z_remainder > threshold);
				106	const int32_t w_scaled = asr_s32(w_q31product, shift) + (int32_t) (w_remainder > threshold);
				107
				108	// Clamp scaled value with zero point between (qmin - zero point) and (qmax - zero point).
Marat Dukhan	062bee3	2021-05-27 20:31:07 -0700	[diff] [blame^]	109	const int32_t x_clamped = math_min_s32(math_max_s32(x_scaled, smin), smax);
				110	const int32_t y_clamped = math_min_s32(math_max_s32(y_scaled, smin), smax);
				111	const int32_t z_clamped = math_min_s32(math_max_s32(z_scaled, smin), smax);
				112	const int32_t w_clamped = math_min_s32(math_max_s32(w_scaled, smin), smax);
Marat Dukhan	2e23d2b	2020-07-29 16:01:37 -0700	[diff] [blame]	113
				114	// Add zero point to clamped value.
				115	// The result is guaranteed to be in [qmin, qmax] range.
				116	//
				117	// This addition can be safely done before clamping, because scaled values are in [-2147483520, 2147483519]
				118	// range, so addition of zero point (which is in [-128, 127] range) can not overflow signed 32-bit integer.
				119	const int32_t x_biased = x_clamped + zero_point;
				120	const int32_t y_biased = y_clamped + zero_point;
				121	const int32_t z_biased = z_clamped + zero_point;
				122	const int32_t w_biased = w_clamped + zero_point;
				123
				124	output[0] = (int8_t) x_biased;
				125	output[1] = (int8_t) y_biased;
				126	output[2] = (int8_t) z_biased;
				127	output[3] = (int8_t) w_biased;
				128	output += 4;
				129	}
				130	}