Blame - src/qs8-requantization/fp32-scalar-lrintf.c - platform/external/XNNPACK

blob: 29937fe59126b0561d4d0e76c2784ee12f25735c [file] [log] [blame]

Marat Dukhan	2e23d2b	2020-07-29 16:01:37 -0700	[diff] [blame^]	1	// Copyright (c) Facebook, Inc. and its affiliates.
				2	// All rights reserved.
				3	//
				4	// Copyright 2019 Google LLC
				5	//
				6	// This source code is licensed under the BSD-style license found in the
				7	// LICENSE file in the root directory of this source tree.
				8
				9	#include <assert.h>
				10	#include <math.h>
				11	#include <stdint.h>
				12	#include <stddef.h>
				13
				14	#include <fp16/bitcasts.h>
				15
				16	#include <xnnpack/requantization-stubs.h>
				17
				18
				19	void xnn_qs8_requantize_fp32__scalar_lrintf(
				20	size_t n,
				21	const int32_t* input,
				22	float scale,
				23	int8_t zero_point,
				24	int8_t qmin,
				25	int8_t qmax,
				26	int8_t* output)
				27	{
				28	assert(n % 4 == 0);
				29	assert(scale < 1.0f);
				30	assert(scale >= 0x1.0p-32f);
				31
				32	const long lmin = (long) ((int32_t) qmin - (int32_t) zero_point);
				33	const long lmax = (long) ((int32_t) qmax - (int32_t) zero_point);
				34	for (; n != 0; n -= 4) {
				35	const int32_t x = input[0];
				36	const int32_t y = input[1];
				37	const int32_t z = input[2];
				38	const int32_t w = input[3];
				39	input += 4;
				40
				41	const float x_scaled = (float) x * scale;
				42	const float y_scaled = (float) y * scale;
				43	const float z_scaled = (float) z * scale;
				44	const float w_scaled = (float) w * scale;
				45
				46	const long x_rounded = lrintf(x_scaled);
				47	const long y_rounded = lrintf(y_scaled);
				48	const long z_rounded = lrintf(z_scaled);
				49	const long w_rounded = lrintf(w_scaled);
				50
				51	const int32_t x_clamped = (int32_t) (x_rounded < lmin ? lmin : x_rounded > lmax ? lmax : x_rounded);
				52	const int32_t y_clamped = (int32_t) (y_rounded < lmin ? lmin : y_rounded > lmax ? lmax : y_rounded);
				53	const int32_t z_clamped = (int32_t) (z_rounded < lmin ? lmin : z_rounded > lmax ? lmax : z_rounded);
				54	const int32_t w_clamped = (int32_t) (w_rounded < lmin ? lmin : w_rounded > lmax ? lmax : w_rounded);
				55
				56	const int32_t x_biased = x_clamped + (int32_t) zero_point;
				57	const int32_t y_biased = y_clamped + (int32_t) zero_point;
				58	const int32_t z_biased = z_clamped + (int32_t) zero_point;
				59	const int32_t w_biased = w_clamped + (int32_t) zero_point;
				60
				61	output[0] = (int8_t) x_biased;
				62	output[1] = (int8_t) y_biased;
				63	output[2] = (int8_t) z_biased;
				64	output[3] = (int8_t) w_biased;
				65	output += 4;
				66	}
				67	}