XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 1 | // Copyright (c) Facebook, Inc. and its affiliates. |
| 2 | // All rights reserved. |
| 3 | // |
| 4 | // Copyright 2019 Google LLC |
| 5 | // |
| 6 | // This source code is licensed under the BSD-style license found in the |
| 7 | // LICENSE file in the root directory of this source tree. |
| 8 | |
| 9 | #pragma once |
| 10 | |
| 11 | #if defined(__cplusplus) && (__cplusplus >= 201103L) |
| 12 | #include <climits> |
| 13 | #include <cstdint> |
| 14 | #include <cstdbool> |
| 15 | #include <cassert> |
| 16 | #else |
| 17 | #include <limits.h> |
| 18 | #include <stdint.h> |
| 19 | #include <stdbool.h> |
| 20 | #include <assert.h> |
| 21 | #endif |
| 22 | |
| 23 | #include <fp16.h> |
| 24 | |
Marat Dukhan | c72fa1e | 2019-11-27 11:54:03 -0800 | [diff] [blame] | 25 | #include <xnnpack/common.h> |
| 26 | |
| 27 | |
Marat Dukhan | f42facc | 2020-03-08 15:14:53 -0700 | [diff] [blame] | 28 | #if defined(__clang__) |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 29 | #if __clang_major__ == 3 && __clang_minor__ >= 7 || __clang_major__ > 3 |
| 30 | #define XNN_IGNORE_SHIFT_BASE_UB __attribute__((__no_sanitize__("shift-base"))) |
| 31 | #else |
| 32 | #define XNN_IGNORE_SHIFT_BASE_UB |
| 33 | #endif |
| 34 | #elif defined(__GNUC__) |
| 35 | #if __GNUC__ >= 8 |
| 36 | #define XNN_IGNORE_SHIFT_BASE_UB __attribute__((__no_sanitize__("shift-base"))) |
| 37 | #elif __GNUC__ == 4 && __GNUC_MINOR__ >= 9 || __GNUC__ > 4 |
Marat Dukhan | 80fc932 | 2019-09-29 21:06:36 -0700 | [diff] [blame] | 38 | // 4.9 <= gcc < 8 support ubsan, but doesn't support no_sanitize attribute |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 39 | #define XNN_IGNORE_SHIFT_BASE_UB |
| 40 | #ifndef XNN_USE_SHIFT_BASE_UB_WORKAROUND |
| 41 | #define XNN_USE_SHIFT_BASE_UB_WORKAROUND 1 |
| 42 | #endif |
| 43 | #else |
| 44 | #define XNN_IGNORE_SHIFT_BASE_UB |
| 45 | #endif |
| 46 | #else |
| 47 | #define XNN_IGNORE_SHIFT_BASE_UB |
| 48 | #endif |
| 49 | |
| 50 | XNN_IGNORE_SHIFT_BASE_UB |
| 51 | inline static int32_t asr_s32(int32_t x, uint32_t n) { |
| 52 | #ifdef XNN_USE_SHIFT_BASE_UB_WORKAROUND |
Marat Dukhan | c72fa1e | 2019-11-27 11:54:03 -0800 | [diff] [blame] | 53 | #if XNN_ARCH_X86_64 || XNN_ARCH_ARM64 |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 54 | return (int32_t) ((uint64_t) (int64_t) x >> n); |
| 55 | #else |
| 56 | return x >= 0 ? x >> n : ~(~x >> n); |
| 57 | #endif |
| 58 | #else |
| 59 | return x >> n; |
| 60 | #endif |
| 61 | } |
| 62 | |
| 63 | XNN_IGNORE_SHIFT_BASE_UB |
| 64 | inline static int64_t asr_s64(int64_t x, uint32_t n) { |
| 65 | #ifdef XNN_USE_SHIFT_BASE_UB_WORKAROUND |
| 66 | return x >= 0 ? x >> n : ~(~x >> n); |
| 67 | #else |
| 68 | return x >> n; |
| 69 | #endif |
| 70 | } |
| 71 | |
| 72 | inline static uint8_t scalar_requantize_precise( |
| 73 | int32_t value, |
| 74 | float scale, |
| 75 | uint8_t zero_point, |
| 76 | uint8_t qmin, |
| 77 | uint8_t qmax) |
| 78 | { |
| 79 | assert(scale < 1.0f); |
| 80 | assert(scale >= 0x1.0p-32f); |
| 81 | |
| 82 | const uint32_t scale_bits = fp32_to_bits(scale); |
| 83 | const uint32_t multiplier = (scale_bits & UINT32_C(0x007FFFFF)) | UINT32_C(0x00800000); |
| 84 | const uint32_t shift = 127 + 23 - (scale_bits >> 23); |
| 85 | assert(shift >= 24); |
| 86 | assert(shift < 56); |
| 87 | |
Marat Dukhan | 80fc932 | 2019-09-29 21:06:36 -0700 | [diff] [blame] | 88 | // Compute absolute value of input as unsigned 32-bit int. |
| 89 | // All further computations will work with unsigned values to avoid undefined behaviour on signed operations. |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 90 | const uint32_t abs_value = (value >= 0) ? (uint32_t) value : -(uint32_t) value; |
| 91 | |
Marat Dukhan | 80fc932 | 2019-09-29 21:06:36 -0700 | [diff] [blame] | 92 | // Compute full 64-bit product of 32-bit factors |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 93 | const uint64_t product = (uint64_t) abs_value * (uint64_t) multiplier; |
| 94 | |
Marat Dukhan | 80fc932 | 2019-09-29 21:06:36 -0700 | [diff] [blame] | 95 | // Shift the full 64-bit product right with rounding. |
| 96 | // Rounding is performed towards closest integer, with midpoints rounded up (same as away from zero). |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 97 | const uint64_t rounding = UINT64_C(1) << (shift - 1); |
| 98 | const uint32_t abs_scaled_value = (uint32_t) ((product + rounding) >> shift); |
| 99 | |
Marat Dukhan | 80fc932 | 2019-09-29 21:06:36 -0700 | [diff] [blame] | 100 | // Copy the sign of input to scaled absolute input value. |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 101 | const int32_t scaled_value = (int32_t) (value >= 0 ? abs_scaled_value : -abs_scaled_value); |
| 102 | |
Marat Dukhan | 80fc932 | 2019-09-29 21:06:36 -0700 | [diff] [blame] | 103 | // Clamp scaled value with zero point between smin and smax. |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 104 | int32_t clamped_value = scaled_value; |
| 105 | const int32_t smin = (int32_t) (uint32_t) qmin - (int32_t) (uint32_t) zero_point; |
| 106 | if (clamped_value < smin) { |
| 107 | clamped_value = smin; |
| 108 | } |
| 109 | const int32_t smax = (int32_t) (uint32_t) qmax - (int32_t) (uint32_t) zero_point; |
| 110 | if (clamped_value > smax) { |
| 111 | clamped_value = smax; |
| 112 | } |
| 113 | |
Marat Dukhan | 80fc932 | 2019-09-29 21:06:36 -0700 | [diff] [blame] | 114 | // Add zero point to clamped value. |
XNNPACK Team | b455b12 | 2019-09-27 18:10:33 -0700 | [diff] [blame] | 115 | const int32_t biased_value = clamped_value + (int32_t) (uint32_t) zero_point; |
| 116 | |
| 117 | return biased_value; |
| 118 | } |