fbarchard@google.com | ca41005 | 2012-10-14 06:01:19 +0000 | [diff] [blame] | 1 | /* |
fbarchard@google.com | bb6bddc | 2012-10-14 06:41:17 +0000 | [diff] [blame^] | 2 | * Copyright (c) 2012 The LibYuv project authors. All Rights Reserved. |
fbarchard@google.com | ca41005 | 2012-10-14 06:01:19 +0000 | [diff] [blame] | 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
| 11 | #include "libyuv/row.h" |
fbarchard@google.com | bb6bddc | 2012-10-14 06:41:17 +0000 | [diff] [blame^] | 12 | |
fbarchard@google.com | ca41005 | 2012-10-14 06:01:19 +0000 | [diff] [blame] | 13 | #ifdef __cplusplus |
| 14 | namespace libyuv { |
| 15 | extern "C" { |
| 16 | #endif |
| 17 | |
| 18 | #if !defined(YUV_DISABLE_ASM) && defined(__mips__) |
| 19 | #ifdef HAS_SPLITUV_MIPS_DSPR2 |
fbarchard@google.com | ca41005 | 2012-10-14 06:01:19 +0000 | [diff] [blame] | 20 | void SplitUV_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, |
| 21 | int width) { |
fbarchard@google.com | bb6bddc | 2012-10-14 06:41:17 +0000 | [diff] [blame^] | 22 | |
| 23 | __asm__ __volatile__ ( |
| 24 | ".set push \n" |
| 25 | ".set noreorder \n" |
| 26 | |
| 27 | "srl $t4, %[width], 4 \n" // multiplies of 16 |
| 28 | "blez $t4, 2f \n" |
| 29 | " andi %[width], %[width], 0xf \n" // residual |
| 30 | "andi $t0, %[src_uv], 0x3 \n" |
| 31 | "andi $t1, %[dst_u], 0x3 \n" |
| 32 | "andi $t2, %[dst_v], 0x3 \n" |
| 33 | "or $t0, $t0, $t1 \n" |
| 34 | "or $t0, $t0, $t2 \n" |
| 35 | |
| 36 | "beqz $t0, 12f \n" // test if aligned |
| 37 | " nop \n" |
| 38 | |
| 39 | // src and dst are unaligned |
| 40 | "1: \n" |
| 41 | "addiu $t4, $t4, -1 \n" |
| 42 | "lwr $t0, 0(%[src_uv]) \n" |
| 43 | "lwl $t0, 3(%[src_uv]) \n" // V1 | U1 | V0 | U0 |
| 44 | "lwr $t1, 4(%[src_uv]) \n" |
| 45 | "lwl $t1, 7(%[src_uv]) \n" // V3 | U3 | V2 | U2 |
| 46 | "lwr $t2, 8(%[src_uv]) \n" |
| 47 | "lwl $t2, 11(%[src_uv]) \n" // V5 | U5 | V4 | U4 |
| 48 | "lwr $t3, 12(%[src_uv]) \n" |
| 49 | "lwl $t3, 15(%[src_uv]) \n" // V7 | U7 | V6 | U6 |
| 50 | "lwr $t5, 16(%[src_uv]) \n" |
| 51 | "lwl $t5, 19(%[src_uv]) \n" // V9 | U9 | V8 | U8 |
| 52 | "lwr $t6, 20(%[src_uv]) \n" |
| 53 | "lwl $t6, 23(%[src_uv]) \n" // V11 | U11 | V10 | U10 |
| 54 | "lwr $t7, 24(%[src_uv]) \n" |
| 55 | "lwl $t7, 27(%[src_uv]) \n" // V13 | U13 | V12 | U12 |
| 56 | "lwr $t8, 28(%[src_uv]) \n" |
| 57 | "lwl $t8, 31(%[src_uv]) \n" // V15 | U15 | V14 | U14 |
| 58 | |
| 59 | "precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0 |
| 60 | "precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0 |
| 61 | "precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4 |
| 62 | "precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4 |
| 63 | "precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8 |
| 64 | "precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8 |
| 65 | "precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 | V12 |
| 66 | "precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 | U12 |
| 67 | "addiu %[src_uv], %[src_uv], 32 \n" |
| 68 | |
| 69 | "swr $t9, 0(%[dst_v]) \n" |
| 70 | "swl $t9, 3(%[dst_v]) \n" |
| 71 | "swr $t0, 0(%[dst_u]) \n" |
| 72 | "swl $t0, 3(%[dst_u]) \n" |
| 73 | "swr $t1, 4(%[dst_v]) \n" |
| 74 | "swl $t1, 7(%[dst_v]) \n" |
| 75 | "swr $t2, 4(%[dst_u]) \n" |
| 76 | "swl $t2, 7(%[dst_u]) \n" |
| 77 | "swr $t3, 8(%[dst_v]) \n" |
| 78 | "swl $t3, 11(%[dst_v]) \n" |
| 79 | "swr $t5, 8(%[dst_u]) \n" |
| 80 | "swl $t5, 11(%[dst_u]) \n" |
| 81 | "swr $t6, 12(%[dst_v]) \n" |
| 82 | "swl $t6, 15(%[dst_v]) \n" |
| 83 | "swr $t7, 12(%[dst_u]) \n" |
| 84 | "swl $t7, 15(%[dst_u]) \n" |
| 85 | "addiu %[dst_u], %[dst_u], 16 \n" |
| 86 | "bgtz $t4, 1b \n" |
| 87 | " addiu %[dst_v], %[dst_v], 16 \n" |
| 88 | |
| 89 | "beqz %[width], 3f \n" |
| 90 | " nop \n" |
| 91 | "b 2f \n" |
| 92 | " nop \n" |
| 93 | |
| 94 | // src and dst are aligned |
| 95 | "12: \n" |
| 96 | "addiu $t4, $t4, -1 \n" |
| 97 | "lw $t0, 0(%[src_uv]) \n" // V1 | U1 | V0 | U0 |
| 98 | "lw $t1, 4(%[src_uv]) \n" // V3 | U3 | V2 | U2 |
| 99 | "lw $t2, 8(%[src_uv]) \n" // V5 | U5 | V4 | U4 |
| 100 | "lw $t3, 12(%[src_uv]) \n" // V7 | U7 | V6 | U6 |
| 101 | "lw $t5, 16(%[src_uv]) \n" // V9 | U9 | V8 | U8 |
| 102 | "lw $t6, 20(%[src_uv]) \n" // V11 | U11 | V10 | U10 |
| 103 | "lw $t7, 24(%[src_uv]) \n" // V13 | U13 | V12 | U12 |
| 104 | "lw $t8, 28(%[src_uv]) \n" // V15 | U15 | V14 | U14 |
| 105 | |
| 106 | "addiu %[src_uv], %[src_uv], 32 \n" |
| 107 | "precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0 |
| 108 | "precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0 |
| 109 | "precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4 |
| 110 | "precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4 |
| 111 | "precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8 |
| 112 | "precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8 |
| 113 | "precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 | V12 |
| 114 | "precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 | U12 |
| 115 | |
| 116 | "sw $t9, 0(%[dst_v]) \n" |
| 117 | "sw $t0, 0(%[dst_u]) \n" |
| 118 | "sw $t1, 4(%[dst_v]) \n" |
| 119 | "sw $t2, 4(%[dst_u]) \n" |
| 120 | "sw $t3, 8(%[dst_v]) \n" |
| 121 | "sw $t5, 8(%[dst_u]) \n" |
| 122 | "sw $t6, 12(%[dst_v]) \n" |
| 123 | "sw $t7, 12(%[dst_u]) \n" |
| 124 | "addiu %[dst_v], %[dst_v], 16 \n" |
| 125 | "bgtz $t4, 12b \n" |
| 126 | " addiu %[dst_u], %[dst_u], 16 \n" |
| 127 | |
| 128 | "beqz %[width], 3f \n" |
| 129 | " nop \n" |
| 130 | |
| 131 | "2: \n" |
| 132 | "lbu $t0, 0(%[src_uv]) \n" |
| 133 | "lbu $t1, 1(%[src_uv]) \n" |
| 134 | "addiu %[src_uv], %[src_uv], 2 \n" |
| 135 | "addiu %[width], %[width], -1 \n" |
| 136 | "sb $t0, 0(%[dst_u]) \n" |
| 137 | "sb $t1, 0(%[dst_v]) \n" |
| 138 | "addiu %[dst_u], %[dst_u], 1 \n" |
| 139 | "bgtz %[width], 2b \n" |
| 140 | " addiu %[dst_v], %[dst_v], 1 \n" |
| 141 | |
| 142 | "3: \n" |
| 143 | ".set pop \n" |
| 144 | : [src_uv] "+r" (src_uv), |
| 145 | [width] "+r" (width), |
| 146 | [dst_u] "+r" (dst_u), |
| 147 | [dst_v] "+r" (dst_v) |
| 148 | : |
| 149 | : "t0", "t1", "t2", "t3", |
| 150 | "t4", "t5", "t6", "t7", "t8", "t9" |
fbarchard@google.com | ca41005 | 2012-10-14 06:01:19 +0000 | [diff] [blame] | 151 | ); |
| 152 | } |
| 153 | #endif // HAS_SPLITUV_MIPS_DSPR2 |
fbarchard@google.com | bb6bddc | 2012-10-14 06:41:17 +0000 | [diff] [blame^] | 154 | |
fbarchard@google.com | ca41005 | 2012-10-14 06:01:19 +0000 | [diff] [blame] | 155 | #endif // __mips__ |
| 156 | |
| 157 | #ifdef __cplusplus |
| 158 | } // extern "C" |
| 159 | } // namespace libyuv |
| 160 | #endif |