| @/****************************************************************************** |
| @ * |
| @ * Copyright (C) 2015 The Android Open Source Project |
| @ * |
| @ * Licensed under the Apache License, Version 2.0 (the "License"); |
| @ * you may not use this file except in compliance with the License. |
| @ * You may obtain a copy of the License at: |
| @ * |
| @ * http://www.apache.org/licenses/LICENSE-2.0 |
| @ * |
| @ * Unless required by applicable law or agreed to in writing, software |
| @ * distributed under the License is distributed on an "AS IS" BASIS, |
| @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| @ * See the License for the specific language governing permissions and |
| @ * limitations under the License. |
| @ * |
| @ ***************************************************************************** |
| @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
| @*/ |
| @** |
| @****************************************************************************** |
| @* @file |
| @* ih264_intra_pred_chroma_a9q.s |
| @* |
| @* @brief |
| @* Contains function definitions for intra chroma prediction . |
| @* |
| @* @author |
| @* Ittiam |
| @* |
| @* @par List of Functions: |
| @* |
| @* - ih264_intra_pred_chroma_mode_horz_a9q() |
| @* - ih264_intra_pred_chroma_8x8_mode_vert_a9q() |
| @* - ih264_intra_pred_chroma_mode_dc_a9q() |
| @* - ih264_intra_pred_chroma_mode_plane_a9q() |
| @* |
| @* @remarks |
| @* None |
| @* |
| @******************************************************************************* |
| @* |
| |
| @* All the functions here are replicated from ih264_chroma_intra_pred_filters.c |
| @ |
| |
| .text |
| .p2align 2 |
| |
| .extern ih264_gai1_intrapred_chroma_plane_coeffs1 |
| .hidden ih264_gai1_intrapred_chroma_plane_coeffs1 |
| .extern ih264_gai1_intrapred_chroma_plane_coeffs2 |
| .hidden ih264_gai1_intrapred_chroma_plane_coeffs2 |
| scratch_chroma_intrapred_addr1: |
| .long ih264_gai1_intrapred_chroma_plane_coeffs1 - scrlblc1 - 8 |
| |
| scratch_intrapred_chroma_plane_addr1: |
| .long ih264_gai1_intrapred_chroma_plane_coeffs2 - scrlblc2 - 8 |
| @** |
| @******************************************************************************* |
| @* |
| @*ih264_intra_pred_chroma_8x8_mode_dc |
| @* |
| @* @brief |
| @* Perform Intra prediction for chroma_8x8 mode:DC |
| @* |
| @* @par Description: |
| @* Perform Intra prediction for chroma_8x8 mode:DC ,described in sec 8.3.4.1 |
| @* |
| @* @param[in] pu1_src |
| @* UWORD8 pointer to the source containing alternate U and V samples |
| @* |
| @* @param[out] pu1_dst |
| @* UWORD8 pointer to the destination with alternate U and V samples |
| @* |
| @* @param[in] src_strd |
| @* integer source stride |
| @* |
| @* @param[in] dst_strd |
| @* integer destination stride |
| @* |
| @** @param[in] ui_neighboravailability |
| @* availability of neighbouring pixels |
| @* |
| @* @returns |
| @* |
| @* @remarks |
| @* None |
| @* |
| @******************************************************************************* |
| @void ih264_intra_pred_chroma_8x8_mode_dc(UWORD8 *pu1_src, |
| @ UWORD8 *pu1_dst, |
| @ WORD32 src_strd, |
| @ WORD32 dst_strd, |
| @ WORD32 ui_neighboravailability) |
| |
| @**************Variables Vs Registers***************************************** |
| @ r0 => *pu1_src |
| @ r1 => *pu1_dst |
| @ r2 => src_strd |
| @ r3 => dst_strd |
| @ r4 => ui_neighboravailability |
| |
| .global ih264_intra_pred_chroma_8x8_mode_dc_a9q |
| |
| ih264_intra_pred_chroma_8x8_mode_dc_a9q: |
| |
| stmfd sp!, {r4, r14} @store register values to stack |
| ldr r4, [sp, #8] @r4 => ui_neighboravailability |
| vpush {d8-d15} |
| |
| ands r2, r4, #0x01 @CHECKING IF LEFT_AVAILABLE ELSE BRANCHING TO ONLY TOP AVAILABLE |
| beq top_available |
| ands r2, r4, #0x04 @CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE |
| beq left_available |
| |
| vld1.u8 {q0}, [r0] @BOTH LEFT AND TOP AVAILABLE |
| add r0, r0, #18 |
| vld1.u8 {q1}, [r0] |
| vaddl.u8 q2, d1, d2 |
| vaddl.u8 q3, d0, d3 |
| vmovl.u8 q1, d3 |
| vmovl.u8 q0, d0 |
| |
| vadd.u16 d12, d4, d5 |
| vadd.u16 d13, d2, d3 |
| vadd.u16 d15, d6, d7 |
| vadd.u16 d14, d0, d1 |
| |
| vpadd.u32 d12, d12, d15 |
| vpadd.u32 d14, d13, d14 |
| vqrshrun.s16 d12, q6, #3 |
| vqrshrun.s16 d14, q7, #2 |
| vdup.u16 d8, d12[0] |
| vdup.u16 d9, d14[0] |
| vdup.u16 d10, d14[1] |
| vdup.u16 d11, d12[1] |
| b str_pred |
| |
| top_available: @ONLY TOP AVAILABLE |
| ands r2, r4, #0x04 @CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE |
| beq none_available |
| |
| add r0, r0, #18 |
| vld1.u8 {q0}, [r0] |
| vmovl.u8 q1, d0 |
| vmovl.u8 q2, d1 |
| vadd.u16 d0, d2, d3 |
| vadd.u16 d1, d4, d5 |
| vpaddl.u32 q0, q0 |
| vqrshrun.s16 d0, q0, #2 |
| vdup.u16 d8, d0[0] |
| vdup.u16 d9, d0[2] |
| vmov q5, q4 |
| b str_pred |
| |
| left_available: @ONLY LEFT AVAILABLE |
| vld1.u8 {q0}, [r0] |
| vmovl.u8 q1, d0 |
| vmovl.u8 q2, d1 |
| vadd.u16 d0, d2, d3 |
| vadd.u16 d1, d4, d5 |
| vpaddl.u32 q0, q0 |
| vqrshrun.s16 d0, q0, #2 |
| vdup.u16 q5, d0[0] |
| vdup.u16 q4, d0[2] |
| b str_pred |
| |
| none_available: @NONE AVAILABLE |
| vmov.u8 q4, #128 |
| vmov.u8 q5, #128 |
| |
| str_pred: |
| vst1.8 {q4}, [r1], r3 |
| vst1.8 {q4}, [r1], r3 |
| vst1.8 {q4}, [r1], r3 |
| vst1.8 {q4}, [r1], r3 |
| vst1.8 {q5}, [r1], r3 |
| vst1.8 {q5}, [r1], r3 |
| vst1.8 {q5}, [r1], r3 |
| vst1.8 {q5}, [r1], r3 |
| |
| vpop {d8-d15} |
| ldmfd sp!, {r4, pc} @Restoring registers from stack |
| |
| |
| |
| @****************************************************************************** |
| |
| |
| @** |
| @******************************************************************************* |
| @* |
| @*ih264_intra_pred_chroma_8x8_mode_horz |
| @* |
| @* @brief |
| @* Perform Intra prediction for chroma_8x8 mode:Horizontal |
| @* |
| @* @par Description: |
| @* Perform Intra prediction for chroma_8x8 mode:Horizontal ,described in sec 8.3.4.2 |
| @* |
| @* @param[in] pu1_src |
| @* UWORD8 pointer to the source containing alternate U and V samples |
| @* |
| @* @param[out] pu1_dst |
| @* UWORD8 pointer to the destination with alternate U and V samples |
| @* |
| @* @param[in] src_strd |
| @* integer source stride |
| @* |
| @* @param[in] dst_strd |
| @* integer destination stride |
| @* |
| @* @param[in] ui_neighboravailability |
| @* availability of neighbouring pixels(Not used in this function) |
| @* |
| @* @returns |
| @* |
| @* @remarks |
| @* None |
| @* |
| @******************************************************************************* |
| @* |
| @void ih264_intra_pred_chroma_8x8_mode_horz(UWORD8 *pu1_src, |
| @ UWORD8 *pu1_dst, |
| @ WORD32 src_strd, |
| @ WORD32 dst_strd, |
| @ WORD32 ui_neighboravailability) |
| @**************Variables Vs Registers***************************************** |
| @ r0 => *pu1_src |
| @ r1 => *pu1_dst |
| @ r2 => src_strd |
| @ r3 => dst_strd |
| @ r4 => ui_neighboravailability |
| |
| |
| .global ih264_intra_pred_chroma_8x8_mode_horz_a9q |
| |
| ih264_intra_pred_chroma_8x8_mode_horz_a9q: |
| |
| stmfd sp!, {r14} @store register values to stack |
| |
| vld1.u8 {q0}, [r0] |
| mov r2, #6 |
| |
| vdup.u16 q1, d1[3] |
| vdup.u16 q2, d1[2] |
| vst1.8 {q1}, [r1], r3 |
| |
| loop_8x8_horz: |
| vext.8 q0, q0, q0, #12 |
| vst1.8 {q2}, [r1], r3 |
| vdup.u16 q1, d1[3] |
| subs r2, #2 |
| vdup.u16 q2, d1[2] |
| vst1.8 {q1}, [r1], r3 |
| bne loop_8x8_horz |
| |
| vext.8 q0, q0, q0, #12 |
| vst1.8 {q2}, [r1], r3 |
| |
| ldmfd sp!, {pc} @restoring registers from stack |
| |
| |
| |
| |
| @** |
| @******************************************************************************* |
| @* |
| @*ih264_intra_pred_chroma_8x8_mode_vert |
| @* |
| @* @brief |
| @* Perform Intra prediction for chroma_8x8 mode:vertical |
| @* |
| @* @par Description: |
| @*Perform Intra prediction for chroma_8x8 mode:vertical ,described in sec 8.3.4.3 |
| @* |
| @* @param[in] pu1_src |
| @* UWORD8 pointer to the source containing alternate U and V samples |
| @* |
| @* @param[out] pu1_dst |
| @* UWORD8 pointer to the destination with alternate U and V samples |
| @* |
| @* @param[in] src_strd |
| @* integer source stride |
| @* |
| @* @param[in] dst_strd |
| @* integer destination stride |
| @* |
| @* @param[in] ui_neighboravailability |
| @* availability of neighbouring pixels(Not used in this function) |
| @* |
| @* @returns |
| @* |
| @* @remarks |
| @* None |
| @* |
| @******************************************************************************* |
| @void ih264_intra_pred_chroma_8x8_mode_vert(UWORD8 *pu1_src, |
| @ UWORD8 *pu1_dst, |
| @ WORD32 src_strd, |
| @ WORD32 dst_strd, |
| @ WORD32 ui_neighboravailability) |
| |
| @**************Variables Vs Registers***************************************** |
| @ r0 => *pu1_src |
| @ r1 => *pu1_dst |
| @ r2 => src_strd |
| @ r3 => dst_strd |
| @ r4 => ui_neighboravailability |
| |
| |
| .global ih264_intra_pred_chroma_8x8_mode_vert_a9q |
| |
| ih264_intra_pred_chroma_8x8_mode_vert_a9q: |
| |
| stmfd sp!, {r4-r12, r14} @store register values to stack |
| |
| add r0, r0, #18 |
| vld1.8 {q0}, [r0] |
| |
| vst1.8 {q0}, [r1], r3 |
| vst1.8 {q0}, [r1], r3 |
| vst1.8 {q0}, [r1], r3 |
| vst1.8 {q0}, [r1], r3 |
| vst1.8 {q0}, [r1], r3 |
| vst1.8 {q0}, [r1], r3 |
| vst1.8 {q0}, [r1], r3 |
| vst1.8 {q0}, [r1], r3 |
| |
| ldmfd sp!, {r4-r12, pc} @Restoring registers from stack |
| |
| |
| |
| |
| @****************************************************************************** |
| |
| |
| @** |
| @******************************************************************************* |
| @* |
| @*ih264_intra_pred_chroma_8x8_mode_plane |
| @* |
| @* @brief |
| @* Perform Intra prediction for chroma_8x8 mode:PLANE |
| @* |
| @* @par Description: |
| @* Perform Intra prediction for chroma_8x8 mode:PLANE ,described in sec 8.3.4.4 |
| @* |
| @* @param[in] pu1_src |
| @* UWORD8 pointer to the source containing alternate U and V samples |
| @* |
| @* @param[out] pu1_dst |
| @* UWORD8 pointer to the destination with alternate U and V samples |
| @* |
| @* @param[in] src_strd |
| @* integer source stride |
| @* |
| @* @param[in] dst_strd |
| @* integer destination stride |
| @* |
| @* @param[in] ui_neighboravailability |
| @* availability of neighbouring pixels |
| @* |
| @* @returns |
| @* |
| @* @remarks |
| @* None |
| @* |
| @******************************************************************************* |
| @void ih264_intra_pred_chroma_8x8_mode_plane(UWORD8 *pu1_src, |
| @ UWORD8 *pu1_dst, |
| @ WORD32 src_strd, |
| @ WORD32 dst_strd, |
| @ WORD32 ui_neighboravailability) |
| |
| @**************Variables Vs Registers***************************************** |
| @ r0 => *pu1_src |
| @ r1 => *pu1_dst |
| @ r2 => src_strd |
| @ r3 => dst_strd |
| @ r4 => ui_neighboravailability |
| |
| .global ih264_intra_pred_chroma_8x8_mode_plane_a9q |
| ih264_intra_pred_chroma_8x8_mode_plane_a9q: |
| |
| stmfd sp!, {r4-r10, r12, lr} |
| vpush {d8-d15} |
| |
| vld1.32 d0, [r0] |
| add r10, r0, #10 |
| vld1.32 d1, [r10] |
| add r10, r10, #6 |
| vrev64.16 d5, d0 |
| vld1.32 d2, [r10]! |
| add r10, r10, #2 |
| vrev64.16 d7, d2 |
| vld1.32 d3, [r10] |
| sub r5, r3, #8 |
| ldr r12, scratch_chroma_intrapred_addr1 |
| scrlblc1: |
| add r12, r12, pc |
| vsubl.u8 q5, d5, d1 |
| vld1.64 {q4}, [r12] @ Load multiplication factors 1 to 8 into D3 |
| vsubl.u8 q6, d3, d7 |
| vmul.s16 q7, q5, q4 |
| vmul.s16 q8, q6, q4 |
| vuzp.16 q7, q8 |
| |
| vpadd.s16 d14, d14 |
| vpadd.s16 d15, d15 |
| vpadd.s16 d16, d16 |
| vpadd.s16 d17, d17 |
| vpadd.s16 d14, d14 |
| vpadd.s16 d15, d15 |
| vpadd.s16 d16, d16 |
| vpadd.s16 d17, d17 |
| |
| mov r6, #34 |
| vdup.16 q9, r6 |
| |
| vmull.s16 q11, d14, d18 |
| vmull.s16 q12, d15, d18 |
| vmull.s16 q13, d16, d18 |
| vmull.s16 q14, d17, d18 |
| |
| vrshrn.s32 d10, q11, #6 |
| vrshrn.s32 d12, q12, #6 |
| vrshrn.s32 d13, q13, #6 |
| vrshrn.s32 d14, q14, #6 |
| |
| |
| ldrb r6, [r0], #1 |
| add r10, r0, #31 |
| ldrb r8, [r0], #1 |
| ldrb r7, [r10], #1 |
| ldrb r9, [r10], #1 |
| |
| add r6, r6, r7 |
| add r8, r8, r9 |
| lsl r6, r6, #4 |
| lsl r8, r8, #4 |
| |
| vdup.16 q0, r6 |
| vdup.16 q1, r8 |
| vdup.16 q2, d12[0] |
| vdup.16 q3, d10[0] |
| |
| vdup.16 q12, d14[0] |
| vdup.16 q13, d13[0] |
| vzip.16 q2, q12 |
| vzip.16 q3, q13 |
| vzip.16 q0, q1 |
| |
| ldr r12, scratch_intrapred_chroma_plane_addr1 |
| scrlblc2: |
| add r12, r12, pc |
| vld1.64 {q4}, [r12] |
| vmov.16 q5, q4 |
| vmov q11, q4 |
| vzip.16 q4, q5 |
| |
| vmul.s16 q6, q2, q4 |
| vmul.s16 q8, q2, q5 |
| vadd.s16 q6, q0, q6 |
| vadd.s16 q8, q0, q8 |
| |
| |
| vdup.16 q10, d22[0] |
| vmul.s16 q2, q3, q10 |
| vdup.16 q15, d22[1] |
| vmul.s16 q9, q3, q10 |
| vmul.s16 q7, q3, q15 |
| vmul.s16 q4, q3, q15 |
| vadd.s16 q12, q6, q2 |
| vadd.s16 q0, q8, q9 |
| vadd.s16 q1, q6, q7 |
| vqrshrun.s16 d28, q12, #5 |
| vadd.s16 q13, q8, q4 |
| vqrshrun.s16 d29, q0, #5 |
| vdup.16 q10, d22[2] |
| vst1.8 {q14}, [r1], r3 |
| vqrshrun.s16 d28, q1, #5 |
| vqrshrun.s16 d29, q13, #5 |
| vmul.s16 q2, q3, q10 |
| vmul.s16 q9, q3, q10 |
| vst1.8 {q14}, [r1], r3 |
| vadd.s16 q12, q6, q2 |
| vadd.s16 q0, q8, q9 |
| vdup.16 q15, d22[3] |
| vqrshrun.s16 d28, q12, #5 |
| vqrshrun.s16 d29, q0, #5 |
| vmul.s16 q7, q3, q15 |
| vmul.s16 q4, q3, q15 |
| vst1.8 {q14}, [r1], r3 |
| vadd.s16 q1, q6, q7 |
| vadd.s16 q13, q8, q4 |
| vdup.16 q10, d23[0] |
| vqrshrun.s16 d28, q1, #5 |
| vqrshrun.s16 d29, q13, #5 |
| vmul.s16 q2, q3, q10 |
| vmul.s16 q9, q3, q10 |
| vst1.8 {q14}, [r1], r3 |
| vadd.s16 q12, q6, q2 |
| vadd.s16 q0, q8, q9 |
| vdup.16 q15, d23[1] |
| vqrshrun.s16 d28, q12, #5 |
| vqrshrun.s16 d29, q0, #5 |
| vmul.s16 q7, q3, q15 |
| vmul.s16 q4, q3, q15 |
| vst1.8 {q14}, [r1], r3 |
| vadd.s16 q1, q6, q7 |
| vadd.s16 q13, q8, q4 |
| vdup.16 q10, d23[2] |
| vqrshrun.s16 d28, q1, #5 |
| vqrshrun.s16 d29, q13, #5 |
| vmul.s16 q2, q3, q10 |
| vmul.s16 q9, q3, q10 |
| vst1.8 {q14}, [r1], r3 |
| vadd.s16 q12, q6, q2 |
| vadd.s16 q0, q8, q9 |
| vdup.16 q15, d23[3] |
| vqrshrun.s16 d28, q12, #5 |
| vqrshrun.s16 d29, q0, #5 |
| vmul.s16 q7, q3, q15 |
| vmul.s16 q4, q3, q15 |
| vst1.8 {q14}, [r1], r3 |
| vadd.s16 q1, q6, q7 |
| vadd.s16 q13, q8, q4 |
| vqrshrun.s16 d28, q1, #5 |
| vqrshrun.s16 d29, q13, #5 |
| vst1.8 {q14}, [r1], r3 |
| |
| |
| |
| end_func_plane: |
| |
| vpop {d8-d15} |
| ldmfd sp!, {r4-r10, r12, pc} |
| |
| |
| |
| |