blob: 0b807d37bee353735e70746731c86b57dbf0e739 [file] [log] [blame]
Chong Zhangab123ac2019-06-27 14:28:37 -07001/*
2 * Copyright 2017 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "libyuv/basic_types.h"
12
13#include "libyuv/compare_row.h"
14#include "libyuv/row.h"
15
16// This module is for GCC MSA
17#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
18#include "libyuv/macros_msa.h"
19
20#ifdef __cplusplus
21namespace libyuv {
22extern "C" {
23#endif
24
25uint32_t HammingDistance_MSA(const uint8_t* src_a,
26 const uint8_t* src_b,
27 int count) {
28 uint32_t diff = 0u;
29 int i;
30 v16u8 src0, src1, src2, src3;
31 v2i64 vec0 = {0}, vec1 = {0};
32
33 for (i = 0; i < count; i += 32) {
34 src0 = (v16u8)__msa_ld_b((v16i8*)src_a, 0);
35 src1 = (v16u8)__msa_ld_b((v16i8*)src_a, 16);
36 src2 = (v16u8)__msa_ld_b((v16i8*)src_b, 0);
37 src3 = (v16u8)__msa_ld_b((v16i8*)src_b, 16);
38 src0 ^= src2;
39 src1 ^= src3;
40 vec0 += __msa_pcnt_d((v2i64)src0);
41 vec1 += __msa_pcnt_d((v2i64)src1);
42 src_a += 32;
43 src_b += 32;
44 }
45
46 vec0 += vec1;
47 diff = (uint32_t)__msa_copy_u_w((v4i32)vec0, 0);
48 diff += (uint32_t)__msa_copy_u_w((v4i32)vec0, 2);
49 return diff;
50}
51
52uint32_t SumSquareError_MSA(const uint8_t* src_a,
53 const uint8_t* src_b,
54 int count) {
55 uint32_t sse = 0u;
56 int i;
57 v16u8 src0, src1, src2, src3;
58 v8i16 vec0, vec1, vec2, vec3;
59 v4i32 reg0 = {0}, reg1 = {0}, reg2 = {0}, reg3 = {0};
60 v2i64 tmp0;
61
62 for (i = 0; i < count; i += 32) {
63 src0 = (v16u8)__msa_ld_b((v16i8*)src_a, 0);
64 src1 = (v16u8)__msa_ld_b((v16i8*)src_a, 16);
65 src2 = (v16u8)__msa_ld_b((v16i8*)src_b, 0);
66 src3 = (v16u8)__msa_ld_b((v16i8*)src_b, 16);
67 vec0 = (v8i16)__msa_ilvr_b((v16i8)src2, (v16i8)src0);
68 vec1 = (v8i16)__msa_ilvl_b((v16i8)src2, (v16i8)src0);
69 vec2 = (v8i16)__msa_ilvr_b((v16i8)src3, (v16i8)src1);
70 vec3 = (v8i16)__msa_ilvl_b((v16i8)src3, (v16i8)src1);
71 vec0 = __msa_hsub_u_h((v16u8)vec0, (v16u8)vec0);
72 vec1 = __msa_hsub_u_h((v16u8)vec1, (v16u8)vec1);
73 vec2 = __msa_hsub_u_h((v16u8)vec2, (v16u8)vec2);
74 vec3 = __msa_hsub_u_h((v16u8)vec3, (v16u8)vec3);
75 reg0 = __msa_dpadd_s_w(reg0, vec0, vec0);
76 reg1 = __msa_dpadd_s_w(reg1, vec1, vec1);
77 reg2 = __msa_dpadd_s_w(reg2, vec2, vec2);
78 reg3 = __msa_dpadd_s_w(reg3, vec3, vec3);
79 src_a += 32;
80 src_b += 32;
81 }
82
83 reg0 += reg1;
84 reg2 += reg3;
85 reg0 += reg2;
86 tmp0 = __msa_hadd_s_d(reg0, reg0);
87 sse = (uint32_t)__msa_copy_u_w((v4i32)tmp0, 0);
88 sse += (uint32_t)__msa_copy_u_w((v4i32)tmp0, 2);
89 return sse;
90}
91
92#ifdef __cplusplus
93} // extern "C"
94} // namespace libyuv
95#endif
96
97#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)