blob: 8599b5062ec2ee698bcd10c0ec808df93dc8ae3f [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <algorithm>
10#include <cfloat>
11#include <cmath>
12#include <functional>
13#include <random>
14#include <vector>
15
16#include <cpuinfo.h>
17
Frank Barchardbb4c18b2019-09-30 11:05:52 -070018#include <benchmark/benchmark.h>
Marat Dukhan629a33e2019-10-01 10:39:14 -070019#include <fp16/fp16.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070020#include "bench/gemm.h"
Frank Barchardbb4c18b2019-09-30 11:05:52 -070021#include "bench/utils.h"
XNNPACK Teamb455b122019-09-27 18:10:33 -070022#include <xnnpack/AlignedAllocator.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070023#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070024#include <xnnpack/gemm.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070025#include <xnnpack/pack.h>
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -070026#include <xnnpack/params-init.h>
Frank Barcharde0601b52019-10-25 17:43:34 -070027#include <xnnpack/params.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070028
XNNPACK Teamb455b122019-09-27 18:10:33 -070029
30static void GEMMBenchmark(benchmark::State& state,
Frank Barcharde92f8592020-05-04 10:08:44 -070031 xnn_f16_gemm_minmax_ukernel_function gemm,
Marat Dukhanc4302c22022-01-06 19:27:03 -080032 size_t mr, size_t nr, size_t kr, size_t sr,
33 xnn_init_f16_scaleminmax_params_fn init_params,
34 benchmark::utils::IsaCheckFunction isa_check = nullptr)
XNNPACK Teamb455b122019-09-27 18:10:33 -070035{
Marat Dukhanc4302c22022-01-06 19:27:03 -080036 if (isa_check && !isa_check(state)) {
Frank Barchard40f50e12020-05-29 22:21:56 -070037 return;
38 }
XNNPACK Teamb455b122019-09-27 18:10:33 -070039
40 const size_t mc = state.range(0);
41 const size_t nc = state.range(1);
42 const size_t kc = state.range(2);
43
Marat Dukhan42323232019-10-23 02:09:02 -070044 const size_t nc_stride = benchmark::utils::RoundUp(nc, nr);
Marat Dukhanfbd67a72022-01-31 18:03:50 -080045 const size_t kc_stride = benchmark::utils::RoundUp(kc, kr * sr);
XNNPACK Teamb455b122019-09-27 18:10:33 -070046
47 std::random_device random_device;
48 auto rng = std::mt19937(random_device());
Marat Dukhan44f0ca72020-08-02 21:46:58 -070049 auto f32rng = std::bind(std::uniform_real_distribution<float>(), std::ref(rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -070050 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
51
52 std::vector<uint16_t> a(mc * kc);
53 std::generate(a.begin(), a.end(), std::ref(f16rng));
54 std::vector<uint16_t> k(nc * kc);
55 std::generate(k.begin(), k.end(), std::ref(f16rng));
56 std::vector<uint16_t> b(nc);
57 std::generate(b.begin(), b.end(), std::ref(f16rng));
58
59 const size_t w_elements = nc_stride * kc_stride + nc_stride;
60 const size_t c_elements = mc * nc;
61 const size_t num_buffers = 1 +
Marat Dukhan42323232019-10-23 02:09:02 -070062 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
XNNPACK Teamb455b122019-09-27 18:10:33 -070063 sizeof(uint16_t) * (w_elements + c_elements));
64
Marat Dukhane13e6392021-07-26 22:22:35 -070065 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> w(w_elements * num_buffers);
XNNPACK Teamb455b122019-09-27 18:10:33 -070066 std::fill(w.begin(), w.end(), 0);
Marat Dukhan0b043742021-06-02 18:29:11 -070067 xnn_pack_f16_gemm_goi_w(1 /* groups */, nc, kc, nr, kr, sr, k.data(), b.data(), w.data(), 0, nullptr);
XNNPACK Teamb455b122019-09-27 18:10:33 -070068 std::vector<uint16_t> c(c_elements * num_buffers);
69 std::fill(c.begin(), c.end(), UINT16_C(0x7E00) /* NaN */);
70
Frank Barchard40d20fe2020-05-05 00:37:45 -070071 // Prepare minmax parameters.
72 xnn_f16_scaleminmax_params params;
Marat Dukhanc4302c22022-01-06 19:27:03 -080073 init_params(&params,
74 UINT16_C(0x3C00) /* 1.0 */, UINT16_C(0xFC00) /* -inf */, UINT16_C(0x7C00) /* inf */);
XNNPACK Teamb455b122019-09-27 18:10:33 -070075
76 size_t buffer_index = 0;
77 for (auto _ : state) {
78 // Use circular buffers (exceeding cache size) and prefetch to control cache state:
79 // - A is always in L1 cache (if fits, otherwise L2, L3, etc)
80 // - W is not in cache (for any cache level)
81 // - C is not in cache (for any cache level)
82 state.PauseTiming();
Marat Dukhan42323232019-10-23 02:09:02 -070083 benchmark::utils::PrefetchToL1(a.data(), a.size() * sizeof(uint16_t));
XNNPACK Teamb455b122019-09-27 18:10:33 -070084 buffer_index = (buffer_index + 1) % num_buffers;
85 state.ResumeTiming();
86
87 for (uint32_t m = 0; m < mc; m += mr) {
88 const uint32_t mb = min(mc - m, mr);
89 for (uint32_t n = 0; n < nc; n += nr) {
90 const uint32_t nb = min(nc - n, nr);
Frank Barcharde92f8592020-05-04 10:08:44 -070091 gemm(
Marat Dukhanb1864632019-11-25 16:34:17 -080092 mb, nb, kc * sizeof(uint16_t),
XNNPACK Teamb455b122019-09-27 18:10:33 -070093 a.data() + m * kc, kc * sizeof(uint16_t),
94 w.data() + (nc_stride * buffer_index + n) * (kc_stride + 1),
95 c.data() + (mc * buffer_index + m) * nc + n, nc * sizeof(uint16_t), nr * sizeof(uint16_t),
Frank Barchard77acbf22020-05-01 10:08:26 -070096 &params);
XNNPACK Teamb455b122019-09-27 18:10:33 -070097 }
98 }
99 }
100
Marat Dukhand713e8a2020-12-04 14:23:12 -0800101 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
102 if (cpu_frequency != 0) {
103 state.counters["cpufreq"] = cpu_frequency;
104 }
105
XNNPACK Teamb455b122019-09-27 18:10:33 -0700106 state.counters["FLOPS"] = benchmark::Counter(
107 uint64_t(state.iterations()) * 2 * mc * nc * kc, benchmark::Counter::kIsRate);
108}
109
XNNPACK Teamb455b122019-09-27 18:10:33 -0700110
Frank Barchard683f5592020-04-10 00:48:26 -0700111#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard36b76b62020-04-10 12:39:17 -0700112 static void f16_gemm_1x16__aarch64_neonfp16arith_ld32(benchmark::State& state, const char* net) {
Marat Dukhanc4302c22022-01-06 19:27:03 -0800113 GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32, 1, 16, 1, 1,
114 xnn_init_f16_scaleminmax_neon_params, benchmark::utils::CheckNEONFP16ARITH);
Frank Barchard36b76b62020-04-10 12:39:17 -0700115 }
Frank Barchard683f5592020-04-10 00:48:26 -0700116 static void f16_gemm_4x16__aarch64_neonfp16arith_ld32(benchmark::State& state, const char* net) {
Marat Dukhanc4302c22022-01-06 19:27:03 -0800117 GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32, 4, 16, 1, 1,
118 xnn_init_f16_scaleminmax_neon_params, benchmark::utils::CheckNEONFP16ARITH);
Frank Barchard683f5592020-04-10 00:48:26 -0700119 }
Frank Barchard683f5592020-04-10 00:48:26 -0700120 static void f16_gemm_6x16__aarch64_neonfp16arith_ld32(benchmark::State& state, const char* net) {
Marat Dukhanc4302c22022-01-06 19:27:03 -0800121 GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32, 6, 16, 1, 1,
122 xnn_init_f16_scaleminmax_neon_params, benchmark::utils::CheckNEONFP16ARITH);
Frank Barchard683f5592020-04-10 00:48:26 -0700123 }
Frank Barchard80fc5f42021-06-07 10:43:16 -0700124 static void f16_gemm_6x16__aarch64_neonfp16arith_cortex_a55(benchmark::State& state, const char* net) {
Marat Dukhanc4302c22022-01-06 19:27:03 -0800125 GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a55, 6, 16, 1, 1,
126 xnn_init_f16_scaleminmax_neon_params, benchmark::utils::CheckNEONFP16ARITH);
Frank Barchard80fc5f42021-06-07 10:43:16 -0700127 }
Frank Barchard97374612021-06-07 11:51:07 -0700128 static void f16_gemm_6x16__aarch64_neonfp16arith_cortex_a75(benchmark::State& state, const char* net) {
Marat Dukhanc4302c22022-01-06 19:27:03 -0800129 GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_cortex_a75, 6, 16, 1, 1,
130 xnn_init_f16_scaleminmax_neon_params, benchmark::utils::CheckNEONFP16ARITH);
Frank Barchard97374612021-06-07 11:51:07 -0700131 }
Frank Barchardbddfbcd2020-04-15 12:32:41 -0700132 static void f16_gemm_1x8__aarch64_neonfp16arith_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc4302c22022-01-06 19:27:03 -0800133 GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64, 1, 8, 1, 1,
134 xnn_init_f16_scaleminmax_neon_params, benchmark::utils::CheckNEONFP16ARITH);
Frank Barchardbddfbcd2020-04-15 12:32:41 -0700135 }
Frank Barchardbddfbcd2020-04-15 12:32:41 -0700136 static void f16_gemm_4x8__aarch64_neonfp16arith_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc4302c22022-01-06 19:27:03 -0800137 GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64, 4, 8, 1, 1,
138 xnn_init_f16_scaleminmax_neon_params, benchmark::utils::CheckNEONFP16ARITH);
Frank Barchardbddfbcd2020-04-15 12:32:41 -0700139 }
Frank Barchardbddfbcd2020-04-15 12:32:41 -0700140 static void f16_gemm_6x8__aarch64_neonfp16arith_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc4302c22022-01-06 19:27:03 -0800141 GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64, 6, 8, 1, 1,
142 xnn_init_f16_scaleminmax_neon_params, benchmark::utils::CheckNEONFP16ARITH);
Frank Barchardbddfbcd2020-04-15 12:32:41 -0700143 }
Frank Barchard3b8e5662020-04-20 12:12:53 -0700144 static void f16_gemm_8x8__aarch64_neonfp16arith_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc4302c22022-01-06 19:27:03 -0800145 GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64, 8, 8, 1, 1,
146 xnn_init_f16_scaleminmax_neon_params, benchmark::utils::CheckNEONFP16ARITH);
Frank Barchard3b8e5662020-04-20 12:12:53 -0700147 }
148
Frank Barchard36b76b62020-04-10 12:39:17 -0700149 BENCHMARK_GEMM(f16_gemm_1x16__aarch64_neonfp16arith_ld32)
Frank Barchard683f5592020-04-10 00:48:26 -0700150 BENCHMARK_GEMM(f16_gemm_4x16__aarch64_neonfp16arith_ld32)
151 BENCHMARK_GEMM(f16_gemm_6x16__aarch64_neonfp16arith_ld32)
Frank Barchard80fc5f42021-06-07 10:43:16 -0700152 BENCHMARK_GEMM(f16_gemm_6x16__aarch64_neonfp16arith_cortex_a55)
Frank Barchard97374612021-06-07 11:51:07 -0700153 BENCHMARK_GEMM(f16_gemm_6x16__aarch64_neonfp16arith_cortex_a75)
Frank Barchardbddfbcd2020-04-15 12:32:41 -0700154 BENCHMARK_GEMM(f16_gemm_1x8__aarch64_neonfp16arith_ld64)
155 BENCHMARK_GEMM(f16_gemm_4x8__aarch64_neonfp16arith_ld64)
156 BENCHMARK_GEMM(f16_gemm_6x8__aarch64_neonfp16arith_ld64)
Frank Barchard3b8e5662020-04-20 12:12:53 -0700157 BENCHMARK_GEMM(f16_gemm_8x8__aarch64_neonfp16arith_ld64)
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700158#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
159
160#if XNN_ARCH_ARM64
161 static void f16_gemm_1x8__neonfp16arith_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc4302c22022-01-06 19:27:03 -0800162 GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64, 1, 8, 1, 1,
163 xnn_init_f16_scaleminmax_neon_params, benchmark::utils::CheckNEONFP16ARITH);
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700164 }
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700165 static void f16_gemm_4x8__neonfp16arith_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc4302c22022-01-06 19:27:03 -0800166 GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64, 4, 8, 1, 1,
167 xnn_init_f16_scaleminmax_neon_params, benchmark::utils::CheckNEONFP16ARITH);
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700168 }
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700169 static void f16_gemm_6x8__neonfp16arith_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc4302c22022-01-06 19:27:03 -0800170 GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64, 6, 8, 1, 1,
171 xnn_init_f16_scaleminmax_neon_params, benchmark::utils::CheckNEONFP16ARITH);
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700172 }
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700173 static void f16_gemm_8x8__neonfp16arith_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc4302c22022-01-06 19:27:03 -0800174 GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64, 8, 8, 1, 1,
175 xnn_init_f16_scaleminmax_neon_params, benchmark::utils::CheckNEONFP16ARITH);
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700176 }
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700177 static void f16_gemm_1x16__neonfp16arith_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc4302c22022-01-06 19:27:03 -0800178 GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64, 1, 16, 1, 1,
179 xnn_init_f16_scaleminmax_neon_params, benchmark::utils::CheckNEONFP16ARITH);
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700180 }
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700181 static void f16_gemm_4x16__neonfp16arith_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc4302c22022-01-06 19:27:03 -0800182 GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64, 4, 16, 1, 1,
183 xnn_init_f16_scaleminmax_neon_params, benchmark::utils::CheckNEONFP16ARITH);
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700184 }
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700185 static void f16_gemm_6x16__neonfp16arith_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc4302c22022-01-06 19:27:03 -0800186 GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64, 6, 16, 1, 1,
187 xnn_init_f16_scaleminmax_neon_params, benchmark::utils::CheckNEONFP16ARITH);
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700188 }
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700189 static void f16_gemm_8x16__neonfp16arith_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc4302c22022-01-06 19:27:03 -0800190 GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64, 8, 16, 1, 1,
191 xnn_init_f16_scaleminmax_neon_params, benchmark::utils::CheckNEONFP16ARITH);
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700192 }
193
194 BENCHMARK_GEMM(f16_gemm_1x8__neonfp16arith_ld64)
195 BENCHMARK_GEMM(f16_gemm_4x8__neonfp16arith_ld64)
196 BENCHMARK_GEMM(f16_gemm_6x8__neonfp16arith_ld64)
197 BENCHMARK_GEMM(f16_gemm_8x8__neonfp16arith_ld64)
198 BENCHMARK_GEMM(f16_gemm_1x16__neonfp16arith_ld64)
199 BENCHMARK_GEMM(f16_gemm_4x16__neonfp16arith_ld64)
200 BENCHMARK_GEMM(f16_gemm_6x16__neonfp16arith_ld64)
201 BENCHMARK_GEMM(f16_gemm_8x16__neonfp16arith_ld64)
Frank Barchard683f5592020-04-10 00:48:26 -0700202#endif // XNN_ARCH_ARM64
Frank Barchardbddfbcd2020-04-15 12:32:41 -0700203
Marat Dukhanc4302c22022-01-06 19:27:03 -0800204#if XNN_ARCH_X86 || XNN_ARCH_X86_64
205 static void f16_gemm_1x8__avx2_broadcast(benchmark::State& state, const char* net) {
206 GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_1x8__avx2_broadcast, 1, 8, 1, 1,
207 xnn_init_f16_scaleminmax_avx_params, benchmark::utils::CheckAVX2);
208 }
209 static void f16_gemm_4x8__avx2_broadcast(benchmark::State& state, const char* net) {
210 GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_4x8__avx2_broadcast, 4, 8, 1, 1,
211 xnn_init_f16_scaleminmax_avx_params, benchmark::utils::CheckAVX2);
212 }
213 static void f16_gemm_5x8__avx2_broadcast(benchmark::State& state, const char* net) {
214 GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_5x8__avx2_broadcast, 5, 8, 1, 1,
215 xnn_init_f16_scaleminmax_avx_params, benchmark::utils::CheckAVX2);
216 }
217 static void f16_gemm_6x8__avx2_broadcast(benchmark::State& state, const char* net) {
218 GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_6x8__avx2_broadcast, 6, 8, 1, 1,
219 xnn_init_f16_scaleminmax_avx_params, benchmark::utils::CheckAVX2);
220 }
221 static void f16_gemm_7x8__avx2_broadcast(benchmark::State& state, const char* net) {
222 GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_7x8__avx2_broadcast, 7, 8, 1, 1,
223 xnn_init_f16_scaleminmax_avx_params, benchmark::utils::CheckAVX2);
224 }
225 static void f16_gemm_1x16__avx2_broadcast(benchmark::State& state, const char* net) {
226 GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_1x16__avx2_broadcast, 1, 16, 1, 1,
227 xnn_init_f16_scaleminmax_avx_params, benchmark::utils::CheckAVX2);
228 }
229 static void f16_gemm_3x16__avx2_broadcast(benchmark::State& state, const char* net) {
230 GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_3x16__avx2_broadcast, 3, 16, 1, 1,
231 xnn_init_f16_scaleminmax_avx_params, benchmark::utils::CheckAVX2);
232 }
233 static void f16_gemm_4x16__avx2_broadcast(benchmark::State& state, const char* net) {
234 GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_4x16__avx2_broadcast, 4, 16, 1, 1,
235 xnn_init_f16_scaleminmax_avx_params, benchmark::utils::CheckAVX2);
236 }
237 static void f16_gemm_5x16__avx2_broadcast(benchmark::State& state, const char* net) {
238 GEMMBenchmark(state, xnn_f16_gemm_minmax_ukernel_5x16__avx2_broadcast, 5, 16, 1, 1,
239 xnn_init_f16_scaleminmax_avx_params, benchmark::utils::CheckAVX2);
240 }
241
242 BENCHMARK_GEMM(f16_gemm_1x8__avx2_broadcast)
243 BENCHMARK_GEMM(f16_gemm_4x8__avx2_broadcast)
244 BENCHMARK_GEMM(f16_gemm_5x8__avx2_broadcast)
245 BENCHMARK_GEMM(f16_gemm_6x8__avx2_broadcast)
246 BENCHMARK_GEMM(f16_gemm_7x8__avx2_broadcast)
247 BENCHMARK_GEMM(f16_gemm_1x16__avx2_broadcast)
248 BENCHMARK_GEMM(f16_gemm_3x16__avx2_broadcast)
249 BENCHMARK_GEMM(f16_gemm_4x16__avx2_broadcast)
250 BENCHMARK_GEMM(f16_gemm_5x16__avx2_broadcast)
251#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
252
XNNPACK Teamb455b122019-09-27 18:10:33 -0700253#ifndef XNNPACK_BENCHMARK_NO_MAIN
254BENCHMARK_MAIN();
255#endif