Hangyu Kuang | f047e7c | 2016-07-06 14:21:45 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2012 The LibYuv Project Authors. All rights reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
| 11 | #include "libyuv/basic_types.h" |
| 12 | |
| 13 | #include "libyuv/compare_row.h" |
| 14 | #include "libyuv/row.h" |
| 15 | |
| 16 | #ifdef __cplusplus |
| 17 | namespace libyuv { |
| 18 | extern "C" { |
| 19 | #endif |
| 20 | |
| 21 | #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) |
| 22 | |
| 23 | uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { |
| 24 | volatile uint32 sse; |
| 25 | asm volatile ( |
| 26 | "eor v16.16b, v16.16b, v16.16b \n" |
| 27 | "eor v18.16b, v18.16b, v18.16b \n" |
| 28 | "eor v17.16b, v17.16b, v17.16b \n" |
| 29 | "eor v19.16b, v19.16b, v19.16b \n" |
| 30 | |
| 31 | "1: \n" |
| 32 | MEMACCESS(0) |
| 33 | "ld1 {v0.16b}, [%0], #16 \n" |
| 34 | MEMACCESS(1) |
| 35 | "ld1 {v1.16b}, [%1], #16 \n" |
| 36 | "subs %w2, %w2, #16 \n" |
| 37 | "usubl v2.8h, v0.8b, v1.8b \n" |
| 38 | "usubl2 v3.8h, v0.16b, v1.16b \n" |
| 39 | "smlal v16.4s, v2.4h, v2.4h \n" |
| 40 | "smlal v17.4s, v3.4h, v3.4h \n" |
| 41 | "smlal2 v18.4s, v2.8h, v2.8h \n" |
| 42 | "smlal2 v19.4s, v3.8h, v3.8h \n" |
| 43 | "b.gt 1b \n" |
| 44 | |
| 45 | "add v16.4s, v16.4s, v17.4s \n" |
| 46 | "add v18.4s, v18.4s, v19.4s \n" |
| 47 | "add v19.4s, v16.4s, v18.4s \n" |
| 48 | "addv s0, v19.4s \n" |
| 49 | "fmov %w3, s0 \n" |
| 50 | : "+r"(src_a), |
| 51 | "+r"(src_b), |
| 52 | "+r"(count), |
| 53 | "=r"(sse) |
| 54 | : |
| 55 | : "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"); |
| 56 | return sse; |
| 57 | } |
| 58 | |
| 59 | #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) |
| 60 | |
| 61 | #ifdef __cplusplus |
| 62 | } // extern "C" |
| 63 | } // namespace libyuv |
| 64 | #endif |