| /* |
| * Copyright (C) 2013 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #ifndef __BANDWIDTH_H__ |
| #define __BANDWIDTH_H__ |
| |
| #include <stdlib.h> |
| #include <string.h> |
| |
| #include "utils/Compat.h" |
| #include "memtest.h" |
| |
| // Bandwidth Class definitions. |
| class BandwidthBenchmark { |
| public: |
| BandwidthBenchmark() |
| : _size(0), |
| _num_warm_loops(DEFAULT_NUM_WARM_LOOPS), |
| _num_loops(DEFAULT_NUM_LOOPS) {} |
| virtual ~BandwidthBenchmark() {} |
| |
| bool run() { |
| if (_size == 0) { |
| return false; |
| } |
| if (!canRun()) { |
| return false; |
| } |
| |
| bench(_num_warm_loops); |
| |
| nsecs_t t = system_time(); |
| bench(_num_loops); |
| t = system_time() - t; |
| |
| _mb_per_sec = (_size*(_num_loops/_BYTES_PER_MB))/(t/_NUM_NS_PER_SEC); |
| |
| return true; |
| } |
| |
| bool canRun() { return !usesNeon() || isNeonSupported(); } |
| |
| virtual bool setSize(size_t size) = 0; |
| |
| virtual const char *getName() = 0; |
| |
| virtual bool verify() = 0; |
| |
| virtual bool usesNeon() { return false; } |
| |
| bool isNeonSupported() { |
| #if defined(__ARM_NEON__) |
| return true; |
| #else |
| return false; |
| #endif |
| } |
| |
| // Accessors/mutators. |
| double mb_per_sec() { return _mb_per_sec; } |
| size_t num_warm_loops() { return _num_warm_loops; } |
| size_t num_loops() { return _num_loops; } |
| size_t size() { return _size; } |
| |
| void set_num_warm_loops(size_t num_warm_loops) { |
| _num_warm_loops = num_warm_loops; |
| } |
| void set_num_loops(size_t num_loops) { _num_loops = num_loops; } |
| |
| // Static constants |
| static const unsigned int DEFAULT_NUM_WARM_LOOPS = 1000000; |
| static const unsigned int DEFAULT_NUM_LOOPS = 20000000; |
| |
| protected: |
| virtual void bench(size_t num_loops) = 0; |
| |
| double _mb_per_sec; |
| size_t _size; |
| size_t _num_warm_loops; |
| size_t _num_loops; |
| |
| private: |
| // Static constants |
| static const CONSTEXPR double _NUM_NS_PER_SEC = 1000000000.0; |
| static const CONSTEXPR double _BYTES_PER_MB = 1024.0* 1024.0; |
| }; |
| |
| class CopyBandwidthBenchmark : public BandwidthBenchmark { |
| public: |
| CopyBandwidthBenchmark() : BandwidthBenchmark(), _src(NULL), _dst(NULL) { } |
| |
| bool setSize(size_t size) { |
| if (_src) { |
| free(_src); |
| } |
| if (_dst) { |
| free(_dst); |
| } |
| |
| if (size == 0) { |
| _size = DEFAULT_COPY_SIZE; |
| } else { |
| _size = size; |
| } |
| |
| _src = reinterpret_cast<char*>(memalign(64, _size)); |
| if (!_src) { |
| perror("Failed to allocate memory for test."); |
| return false; |
| } |
| _dst = reinterpret_cast<char*>(memalign(64, _size)); |
| if (!_dst) { |
| perror("Failed to allocate memory for test."); |
| return false; |
| } |
| |
| return true; |
| } |
| virtual ~CopyBandwidthBenchmark() { |
| if (_src) { |
| free(_src); |
| _src = NULL; |
| } |
| if (_dst) { |
| free(_dst); |
| _dst = NULL; |
| } |
| } |
| |
| bool verify() { |
| memset(_src, 0x23, _size); |
| memset(_dst, 0, _size); |
| bench(1); |
| if (memcmp(_src, _dst, _size) != 0) { |
| printf("Buffers failed to compare after one loop.\n"); |
| return false; |
| } |
| |
| memset(_src, 0x23, _size); |
| memset(_dst, 0, _size); |
| _num_loops = 2; |
| bench(2); |
| if (memcmp(_src, _dst, _size) != 0) { |
| printf("Buffers failed to compare after two loops.\n"); |
| return false; |
| } |
| |
| return true; |
| } |
| |
| protected: |
| char *_src; |
| char *_dst; |
| |
| static const unsigned int DEFAULT_COPY_SIZE = 8000; |
| }; |
| |
| class CopyLdrdStrdBenchmark : public CopyBandwidthBenchmark { |
| public: |
| CopyLdrdStrdBenchmark() : CopyBandwidthBenchmark() { } |
| virtual ~CopyLdrdStrdBenchmark() {} |
| |
| const char *getName() { return "ldrd/strd"; } |
| |
| protected: |
| // Copy using ldrd/strd instructions. |
| void bench(size_t num_loops) { |
| asm volatile( |
| "stmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n" |
| |
| "mov r0, %0\n" |
| "mov r1, %1\n" |
| "mov r2, %2\n" |
| "mov r3, %3\n" |
| |
| "0:\n" |
| "mov r4, r2, lsr #6\n" |
| |
| "1:\n" |
| "ldrd r6, r7, [r0]\n" |
| "strd r6, r7, [r1]\n" |
| "ldrd r6, r7, [r0, #8]\n" |
| "strd r6, r7, [r1, #8]\n" |
| "ldrd r6, r7, [r0, #16]\n" |
| "strd r6, r7, [r1, #16]\n" |
| "ldrd r6, r7, [r0, #24]\n" |
| "strd r6, r7, [r1, #24]\n" |
| "ldrd r6, r7, [r0, #32]\n" |
| "strd r6, r7, [r1, #32]\n" |
| "ldrd r6, r7, [r0, #40]\n" |
| "strd r6, r7, [r1, #40]\n" |
| "ldrd r6, r7, [r0, #48]\n" |
| "strd r6, r7, [r1, #48]\n" |
| "ldrd r6, r7, [r0, #56]\n" |
| "strd r6, r7, [r1, #56]\n" |
| |
| "add r0, r0, #64\n" |
| "add r1, r1, #64\n" |
| "subs r4, r4, #1\n" |
| "bgt 1b\n" |
| |
| "sub r0, r0, r2\n" |
| "sub r1, r1, r2\n" |
| "subs r3, r3, #1\n" |
| "bgt 0b\n" |
| |
| "ldmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n" |
| :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3"); |
| } |
| }; |
| |
| class CopyLdmiaStmiaBenchmark : public CopyBandwidthBenchmark { |
| public: |
| CopyLdmiaStmiaBenchmark() : CopyBandwidthBenchmark() { } |
| virtual ~CopyLdmiaStmiaBenchmark() {} |
| |
| const char *getName() { return "ldmia/stmia"; } |
| |
| protected: |
| // Copy using ldmia/stmia instructions. |
| void bench(size_t num_loops) { |
| asm volatile( |
| "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n" |
| |
| "mov r0, %0\n" |
| "mov r1, %1\n" |
| "mov r2, %2\n" |
| "mov r3, %3\n" |
| |
| "0:\n" |
| "mov r4, r2, lsr #6\n" |
| |
| "1:\n" |
| "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n" |
| "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n" |
| "subs r4, r4, #1\n" |
| "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n" |
| "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n" |
| "bgt 1b\n" |
| |
| "sub r0, r0, r2\n" |
| "sub r1, r1, r2\n" |
| "subs r3, r3, #1\n" |
| "bgt 0b\n" |
| |
| "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n" |
| :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3"); |
| } |
| }; |
| |
| class CopyVld1Vst1Benchmark : public CopyBandwidthBenchmark { |
| public: |
| CopyVld1Vst1Benchmark() : CopyBandwidthBenchmark() { } |
| virtual ~CopyVld1Vst1Benchmark() {} |
| |
| const char *getName() { return "vld1/vst1"; } |
| |
| bool usesNeon() { return true; } |
| |
| protected: |
| // Copy using vld1/vst1 instructions. |
| void bench(size_t num_loops) { |
| #if defined(__ARM_NEON__) |
| asm volatile( |
| "stmfd sp!, {r0,r1,r2,r3,r4}\n" |
| |
| "mov r0, %0\n" |
| "mov r1, %1\n" |
| "mov r2, %2\n" |
| "mov r3, %3\n" |
| |
| "0:\n" |
| "mov r4, r2, lsr #6\n" |
| |
| "1:\n" |
| "vld1.8 {d0-d3}, [r0]!\n" |
| "vld1.8 {d4-d7}, [r0]!\n" |
| "subs r4, r4, #1\n" |
| "vst1.8 {d0-d3}, [r1:128]!\n" |
| "vst1.8 {d4-d7}, [r1:128]!\n" |
| "bgt 1b\n" |
| |
| "sub r0, r0, r2\n" |
| "sub r1, r1, r2\n" |
| "subs r3, r3, #1\n" |
| "bgt 0b\n" |
| |
| "ldmfd sp!, {r0,r1,r2,r3,r4}\n" |
| :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3"); |
| #endif |
| } |
| }; |
| |
| class CopyVldrVstrBenchmark : public CopyBandwidthBenchmark { |
| public: |
| CopyVldrVstrBenchmark() : CopyBandwidthBenchmark() { } |
| virtual ~CopyVldrVstrBenchmark() {} |
| |
| const char *getName() { return "vldr/vstr"; } |
| |
| bool usesNeon() { return true; } |
| |
| protected: |
| // Copy using vldr/vstr instructions. |
| void bench(size_t num_loops) { |
| #if defined(__ARM_NEON__) |
| asm volatile( |
| "stmfd sp!, {r0,r1,r2,r3,r4}\n" |
| |
| "mov r0, %0\n" |
| "mov r1, %1\n" |
| "mov r2, %2\n" |
| "mov r3, %3\n" |
| |
| "0:\n" |
| "mov r4, r2, lsr #6\n" |
| |
| "1:\n" |
| "vldr d0, [r0, #0]\n" |
| "subs r4, r4, #1\n" |
| "vldr d1, [r0, #8]\n" |
| "vstr d0, [r1, #0]\n" |
| "vldr d0, [r0, #16]\n" |
| "vstr d1, [r1, #8]\n" |
| "vldr d1, [r0, #24]\n" |
| "vstr d0, [r1, #16]\n" |
| "vldr d0, [r0, #32]\n" |
| "vstr d1, [r1, #24]\n" |
| "vldr d1, [r0, #40]\n" |
| "vstr d0, [r1, #32]\n" |
| "vldr d0, [r0, #48]\n" |
| "vstr d1, [r1, #40]\n" |
| "vldr d1, [r0, #56]\n" |
| "vstr d0, [r1, #48]\n" |
| "add r0, r0, #64\n" |
| "vstr d1, [r1, #56]\n" |
| "add r1, r1, #64\n" |
| "bgt 1b\n" |
| |
| "sub r0, r0, r2\n" |
| "sub r1, r1, r2\n" |
| "subs r3, r3, #1\n" |
| "bgt 0b\n" |
| |
| "ldmfd sp!, {r0,r1,r2,r3,r4}\n" |
| :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3"); |
| #endif |
| } |
| }; |
| |
| class CopyVldmiaVstmiaBenchmark : public CopyBandwidthBenchmark { |
| public: |
| CopyVldmiaVstmiaBenchmark() : CopyBandwidthBenchmark() { } |
| virtual ~CopyVldmiaVstmiaBenchmark() {} |
| |
| const char *getName() { return "vldmia/vstmia"; } |
| |
| bool usesNeon() { return true; } |
| |
| protected: |
| // Copy using vldmia/vstmia instructions. |
| void bench(size_t num_loops) { |
| #if defined(__ARM_NEON__) |
| asm volatile( |
| "stmfd sp!, {r0,r1,r2,r3,r4}\n" |
| |
| "mov r0, %0\n" |
| "mov r1, %1\n" |
| "mov r2, %2\n" |
| "mov r3, %3\n" |
| |
| "0:\n" |
| "mov r4, r2, lsr #6\n" |
| |
| "1:\n" |
| "vldmia r0!, {d0-d7}\n" |
| "subs r4, r4, #1\n" |
| "vstmia r1!, {d0-d7}\n" |
| "bgt 1b\n" |
| |
| "sub r0, r0, r2\n" |
| "sub r1, r1, r2\n" |
| "subs r3, r3, #1\n" |
| "bgt 0b\n" |
| |
| "ldmfd sp!, {r0,r1,r2,r3,r4}\n" |
| :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3"); |
| #endif |
| } |
| }; |
| |
| class MemcpyBenchmark : public CopyBandwidthBenchmark { |
| public: |
| MemcpyBenchmark() : CopyBandwidthBenchmark() { } |
| virtual ~MemcpyBenchmark() {} |
| |
| const char *getName() { return "memcpy"; } |
| |
| protected: |
| void bench(size_t num_loops) { |
| for (size_t i = 0; i < num_loops; i++) { |
| memcpy(_dst, _src, _size); |
| } |
| } |
| }; |
| |
| class SingleBufferBandwidthBenchmark : public BandwidthBenchmark { |
| public: |
| SingleBufferBandwidthBenchmark() : BandwidthBenchmark(), _buffer(NULL) { } |
| virtual ~SingleBufferBandwidthBenchmark() { |
| if (_buffer) { |
| free(_buffer); |
| _buffer = NULL; |
| } |
| } |
| |
| bool setSize(size_t size) { |
| if (_buffer) { |
| free(_buffer); |
| _buffer = NULL; |
| } |
| |
| if (_size == 0) { |
| _size = DEFAULT_SINGLE_BUFFER_SIZE; |
| } else { |
| _size = size; |
| } |
| |
| _buffer = reinterpret_cast<char*>(memalign(64, _size)); |
| if (!_buffer) { |
| perror("Failed to allocate memory for test."); |
| return false; |
| } |
| memset(_buffer, 0, _size); |
| |
| return true; |
| } |
| |
| bool verify() { return true; } |
| |
| protected: |
| char *_buffer; |
| |
| static const unsigned int DEFAULT_SINGLE_BUFFER_SIZE = 16000; |
| }; |
| |
| class WriteBandwidthBenchmark : public SingleBufferBandwidthBenchmark { |
| public: |
| WriteBandwidthBenchmark() : SingleBufferBandwidthBenchmark() { } |
| virtual ~WriteBandwidthBenchmark() { } |
| |
| bool verify() { |
| memset(_buffer, 0, _size); |
| bench(1); |
| for (size_t i = 0; i < _size; i++) { |
| if (_buffer[i] != 1) { |
| printf("Buffer failed to compare after one loop.\n"); |
| return false; |
| } |
| } |
| |
| memset(_buffer, 0, _size); |
| bench(2); |
| for (size_t i = 0; i < _size; i++) { |
| if (_buffer[i] != 2) { |
| printf("Buffer failed to compare after two loops.\n"); |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| }; |
| |
| class WriteStrdBenchmark : public WriteBandwidthBenchmark { |
| public: |
| WriteStrdBenchmark() : WriteBandwidthBenchmark() { } |
| virtual ~WriteStrdBenchmark() {} |
| |
| const char *getName() { return "strd"; } |
| |
| protected: |
| // Write a given value using strd. |
| void bench(size_t num_loops) { |
| asm volatile( |
| "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n" |
| |
| "mov r0, %0\n" |
| "mov r1, %1\n" |
| "mov r2, %2\n" |
| |
| "mov r4, #0\n" |
| "mov r5, #0\n" |
| |
| "0:\n" |
| "mov r3, r1, lsr #5\n" |
| |
| "add r4, r4, #0x01010101\n" |
| "mov r5, r4\n" |
| |
| "1:\n" |
| "subs r3, r3, #1\n" |
| "strd r4, r5, [r0]\n" |
| "strd r4, r5, [r0, #8]\n" |
| "strd r4, r5, [r0, #16]\n" |
| "strd r4, r5, [r0, #24]\n" |
| "add r0, r0, #32\n" |
| "bgt 1b\n" |
| |
| "sub r0, r0, r1\n" |
| "subs r2, r2, #1\n" |
| "bgt 0b\n" |
| |
| "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n" |
| :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); |
| } |
| }; |
| |
| class WriteStmiaBenchmark : public WriteBandwidthBenchmark { |
| public: |
| WriteStmiaBenchmark() : WriteBandwidthBenchmark() { } |
| virtual ~WriteStmiaBenchmark() {} |
| |
| const char *getName() { return "stmia"; } |
| |
| protected: |
| // Write a given value using stmia. |
| void bench(size_t num_loops) { |
| asm volatile( |
| "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n" |
| |
| "mov r0, %0\n" |
| "mov r1, %1\n" |
| "mov r2, %2\n" |
| |
| "mov r4, #0\n" |
| |
| "0:\n" |
| "mov r3, r1, lsr #5\n" |
| |
| "add r4, r4, #0x01010101\n" |
| "mov r5, r4\n" |
| "mov r6, r4\n" |
| "mov r7, r4\n" |
| "mov r8, r4\n" |
| "mov r9, r4\n" |
| "mov r10, r4\n" |
| "mov r11, r4\n" |
| |
| "1:\n" |
| "subs r3, r3, #1\n" |
| "stmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n" |
| "bgt 1b\n" |
| |
| "sub r0, r0, r1\n" |
| "subs r2, r2, #1\n" |
| "bgt 0b\n" |
| |
| "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n" |
| :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); |
| } |
| }; |
| |
| class WriteVst1Benchmark : public WriteBandwidthBenchmark { |
| public: |
| WriteVst1Benchmark() : WriteBandwidthBenchmark() { } |
| virtual ~WriteVst1Benchmark() {} |
| |
| const char *getName() { return "vst1"; } |
| |
| bool usesNeon() { return true; } |
| |
| protected: |
| // Write a given value using vst. |
| void bench(size_t num_loops) { |
| #if defined(__ARM_NEON__) |
| asm volatile( |
| "stmfd sp!, {r0,r1,r2,r3,r4}\n" |
| |
| "mov r0, %0\n" |
| "mov r1, %1\n" |
| "mov r2, %2\n" |
| "mov r4, #0\n" |
| |
| "0:\n" |
| "mov r3, r1, lsr #5\n" |
| |
| "add r4, r4, #1\n" |
| "vdup.8 d0, r4\n" |
| "vmov d1, d0\n" |
| "vmov d2, d0\n" |
| "vmov d3, d0\n" |
| |
| "1:\n" |
| "subs r3, r3, #1\n" |
| "vst1.8 {d0-d3}, [r0:128]!\n" |
| "bgt 1b\n" |
| |
| "sub r0, r0, r1\n" |
| "subs r2, r2, #1\n" |
| "bgt 0b\n" |
| |
| "ldmfd sp!, {r0,r1,r2,r3,r4}\n" |
| :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); |
| #endif |
| } |
| }; |
| |
| class WriteVstrBenchmark : public WriteBandwidthBenchmark { |
| public: |
| WriteVstrBenchmark() : WriteBandwidthBenchmark() { } |
| virtual ~WriteVstrBenchmark() {} |
| |
| const char *getName() { return "vstr"; } |
| |
| bool usesNeon() { return true; } |
| |
| protected: |
| // Write a given value using vst. |
| void bench(size_t num_loops) { |
| #if defined(__ARM_NEON__) |
| asm volatile( |
| "stmfd sp!, {r0,r1,r2,r3,r4}\n" |
| |
| "mov r0, %0\n" |
| "mov r1, %1\n" |
| "mov r2, %2\n" |
| "mov r4, #0\n" |
| |
| "0:\n" |
| "mov r3, r1, lsr #5\n" |
| |
| "add r4, r4, #1\n" |
| "vdup.8 d0, r4\n" |
| "vmov d1, d0\n" |
| "vmov d2, d0\n" |
| "vmov d3, d0\n" |
| |
| "1:\n" |
| "vstr d0, [r0, #0]\n" |
| "subs r3, r3, #1\n" |
| "vstr d1, [r0, #8]\n" |
| "vstr d0, [r0, #16]\n" |
| "vstr d1, [r0, #24]\n" |
| "add r0, r0, #32\n" |
| "bgt 1b\n" |
| |
| "sub r0, r0, r1\n" |
| "subs r2, r2, #1\n" |
| "bgt 0b\n" |
| |
| "ldmfd sp!, {r0,r1,r2,r3,r4}\n" |
| :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); |
| #endif |
| } |
| }; |
| |
| class WriteVstmiaBenchmark : public WriteBandwidthBenchmark { |
| public: |
| WriteVstmiaBenchmark() : WriteBandwidthBenchmark() { } |
| virtual ~WriteVstmiaBenchmark() {} |
| |
| const char *getName() { return "vstmia"; } |
| |
| bool usesNeon() { return true; } |
| |
| protected: |
| // Write a given value using vstmia. |
| void bench(size_t num_loops) { |
| #if defined(__ARM_NEON__) |
| asm volatile( |
| "stmfd sp!, {r0,r1,r2,r3,r4}\n" |
| |
| "mov r0, %0\n" |
| "mov r1, %1\n" |
| "mov r2, %2\n" |
| "mov r4, #0\n" |
| |
| "0:\n" |
| "mov r3, r1, lsr #5\n" |
| |
| "add r4, r4, #1\n" |
| "vdup.8 d0, r4\n" |
| "vmov d1, d0\n" |
| "vmov d2, d0\n" |
| "vmov d3, d0\n" |
| |
| "1:\n" |
| "subs r3, r3, #1\n" |
| "vstmia r0!, {d0-d3}\n" |
| "bgt 1b\n" |
| |
| "sub r0, r0, r1\n" |
| "subs r2, r2, #1\n" |
| "bgt 0b\n" |
| |
| "ldmfd sp!, {r0,r1,r2,r3,r4}\n" |
| :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); |
| #endif |
| } |
| }; |
| |
| class MemsetBenchmark : public WriteBandwidthBenchmark { |
| public: |
| MemsetBenchmark() : WriteBandwidthBenchmark() { } |
| virtual ~MemsetBenchmark() {} |
| |
| const char *getName() { return "memset"; } |
| |
| protected: |
| void bench(size_t num_loops) { |
| for (size_t i = 0; i < num_loops; i++) { |
| memset(_buffer, (i % 255) + 1, _size); |
| } |
| } |
| }; |
| |
| class ReadLdrdBenchmark : public SingleBufferBandwidthBenchmark { |
| public: |
| ReadLdrdBenchmark() : SingleBufferBandwidthBenchmark() { } |
| virtual ~ReadLdrdBenchmark() {} |
| |
| const char *getName() { return "ldrd"; } |
| |
| protected: |
| // Write a given value using strd. |
| void bench(size_t num_loops) { |
| asm volatile( |
| "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n" |
| |
| "mov r0, %0\n" |
| "mov r1, %1\n" |
| "mov r2, %2\n" |
| |
| "0:\n" |
| "mov r3, r1, lsr #5\n" |
| |
| "1:\n" |
| "subs r3, r3, #1\n" |
| "ldrd r4, r5, [r0]\n" |
| "ldrd r4, r5, [r0, #8]\n" |
| "ldrd r4, r5, [r0, #16]\n" |
| "ldrd r4, r5, [r0, #24]\n" |
| "add r0, r0, #32\n" |
| "bgt 1b\n" |
| |
| "sub r0, r0, r1\n" |
| "subs r2, r2, #1\n" |
| "bgt 0b\n" |
| |
| "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n" |
| :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); |
| } |
| }; |
| |
| class ReadLdmiaBenchmark : public SingleBufferBandwidthBenchmark { |
| public: |
| ReadLdmiaBenchmark() : SingleBufferBandwidthBenchmark() { } |
| virtual ~ReadLdmiaBenchmark() {} |
| |
| const char *getName() { return "ldmia"; } |
| |
| protected: |
| // Write a given value using stmia. |
| void bench(size_t num_loops) { |
| asm volatile( |
| "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n" |
| |
| "mov r0, %0\n" |
| "mov r1, %1\n" |
| "mov r2, %2\n" |
| |
| "0:\n" |
| "mov r3, r1, lsr #5\n" |
| |
| "1:\n" |
| "subs r3, r3, #1\n" |
| "ldmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n" |
| "bgt 1b\n" |
| |
| "sub r0, r0, r1\n" |
| "subs r2, r2, #1\n" |
| "bgt 0b\n" |
| |
| "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n" |
| :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); |
| } |
| }; |
| |
| class ReadVld1Benchmark : public SingleBufferBandwidthBenchmark { |
| public: |
| ReadVld1Benchmark() : SingleBufferBandwidthBenchmark() { } |
| virtual ~ReadVld1Benchmark() {} |
| |
| const char *getName() { return "vld1"; } |
| |
| bool usesNeon() { return true; } |
| |
| protected: |
| // Write a given value using vst. |
| void bench(size_t num_loops) { |
| #if defined(__ARM_NEON__) |
| asm volatile( |
| "stmfd sp!, {r0,r1,r2,r3}\n" |
| |
| "mov r0, %0\n" |
| "mov r1, %1\n" |
| "mov r2, %2\n" |
| |
| "0:\n" |
| "mov r3, r1, lsr #5\n" |
| |
| "1:\n" |
| "subs r3, r3, #1\n" |
| "vld1.8 {d0-d3}, [r0:128]!\n" |
| "bgt 1b\n" |
| |
| "sub r0, r0, r1\n" |
| "subs r2, r2, #1\n" |
| "bgt 0b\n" |
| |
| "ldmfd sp!, {r0,r1,r2,r3}\n" |
| :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); |
| #endif |
| } |
| }; |
| |
| class ReadVldrBenchmark : public SingleBufferBandwidthBenchmark { |
| public: |
| ReadVldrBenchmark() : SingleBufferBandwidthBenchmark() { } |
| virtual ~ReadVldrBenchmark() {} |
| |
| const char *getName() { return "vldr"; } |
| |
| bool usesNeon() { return true; } |
| |
| protected: |
| // Write a given value using vst. |
| void bench(size_t num_loops) { |
| #if defined(__ARM_NEON__) |
| asm volatile( |
| "stmfd sp!, {r0,r1,r2,r3}\n" |
| |
| "mov r0, %0\n" |
| "mov r1, %1\n" |
| "mov r2, %2\n" |
| |
| "0:\n" |
| "mov r3, r1, lsr #5\n" |
| |
| "1:\n" |
| "vldr d0, [r0, #0]\n" |
| "subs r3, r3, #1\n" |
| "vldr d1, [r0, #8]\n" |
| "vldr d0, [r0, #16]\n" |
| "vldr d1, [r0, #24]\n" |
| "add r0, r0, #32\n" |
| "bgt 1b\n" |
| |
| "sub r0, r0, r1\n" |
| "subs r2, r2, #1\n" |
| "bgt 0b\n" |
| |
| "ldmfd sp!, {r0,r1,r2,r3}\n" |
| :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); |
| #endif |
| } |
| }; |
| |
| |
| class ReadVldmiaBenchmark : public SingleBufferBandwidthBenchmark { |
| public: |
| ReadVldmiaBenchmark() : SingleBufferBandwidthBenchmark() { } |
| virtual ~ReadVldmiaBenchmark() {} |
| |
| const char *getName() { return "vldmia"; } |
| |
| bool usesNeon() { return true; } |
| |
| protected: |
| // Write a given value using vstmia. |
| void bench(size_t num_loops) { |
| #if defined(__ARM_NEON__) |
| asm volatile( |
| "stmfd sp!, {r0,r1,r2,r3}\n" |
| |
| "mov r0, %0\n" |
| "mov r1, %1\n" |
| "mov r2, %2\n" |
| |
| "0:\n" |
| "mov r3, r1, lsr #5\n" |
| |
| "1:\n" |
| "subs r3, r3, #1\n" |
| "vldmia r0!, {d0-d3}\n" |
| "bgt 1b\n" |
| |
| "sub r0, r0, r1\n" |
| "subs r2, r2, #1\n" |
| "bgt 0b\n" |
| |
| "ldmfd sp!, {r0,r1,r2,r3}\n" |
| :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); |
| #endif |
| } |
| }; |
| |
| #endif // __BANDWIDTH_H__ |