| @/****************************************************************************** |
| @ * |
| @ * Copyright (C) 2015 The Android Open Source Project |
| @ * |
| @ * Licensed under the Apache License, Version 2.0 (the "License"); |
| @ * you may not use this file except in compliance with the License. |
| @ * You may obtain a copy of the License at: |
| @ * |
| @ * http://www.apache.org/licenses/LICENSE-2.0 |
| @ * |
| @ * Unless required by applicable law or agreed to in writing, software |
| @ * distributed under the License is distributed on an "AS IS" BASIS, |
| @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| @ * See the License for the specific language governing permissions and |
| @ * limitations under the License. |
| @ * |
| @ ***************************************************************************** |
| @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
| @*/ |
| @** |
| @****************************************************************************** |
| @* @file |
| @* ih264_intra_pred_luma_8x8_a9q.s |
| @* |
| @* @brief |
| @* Contains function definitions for intra 8x8 Luma prediction . |
| @* |
| @* @author |
| @* Ittiam |
| @* |
| @* @par List of Functions: |
| @* |
| @* -ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q |
| @* -ih264_intra_pred_luma_8x8_mode_vert_a9q |
| @* -ih264_intra_pred_luma_8x8_mode_horz_a9q |
| @* -ih264_intra_pred_luma_8x8_mode_dc_a9q |
| @* -ih264_intra_pred_luma_8x8_mode_diag_dl_a9q |
| @* -ih264_intra_pred_luma_8x8_mode_diag_dr_a9q |
| @* -ih264_intra_pred_luma_8x8_mode_vert_r_a9q |
| @* -ih264_intra_pred_luma_8x8_mode_horz_d_a9q |
| @* -ih264_intra_pred_luma_8x8_mode_vert_l_a9q |
| @* -ih264_intra_pred_luma_8x8_mode_horz_u_a9q |
| @* |
| @* @remarks |
| @* None |
| @* |
| @******************************************************************************* |
| @* |
| |
| @* All the functions here are replicated from ih264_intra_pred_filters.c |
| @ |
| |
| .text |
| .p2align 2 |
| |
| .extern ih264_gai1_intrapred_luma_8x8_horz_u |
| .hidden ih264_gai1_intrapred_luma_8x8_horz_u |
| scratch_intrapred_addr_8x8: |
| .long ih264_gai1_intrapred_luma_8x8_horz_u - scrlb8x8l2 - 8 |
| |
| @** |
| @******************************************************************************* |
| @* |
| @*ih264_intra_pred_luma_8x8_mode_ref_filtering |
| @* |
| @* @brief |
| @* Reference sample filtering process for Intra_8x8 sample prediction |
| @* |
| @* @par Description: |
| @* Perform Reference sample filtering process for Intra_8x8 sample prediction ,described in sec 8.3.2.2.1 |
| @* |
| @* @param[in] pu1_src |
| @* UWORD8 pointer to the source |
| @* |
| @* @param[out] pu1_dst |
| @* UWORD8 pointer to the destination |
| @* |
| @* @param[in] src_strd |
| @* integer source stride [Not used] |
| @* |
| @* @param[in] dst_strd |
| @* integer destination stride[Not used] |
| @* |
| @* @param[in] ui_neighboravailability |
| @* availability of neighbouring pixels[Not used] |
| @* |
| @* @returns |
| @* |
| @* @remarks |
| @* None |
| @* |
| @******************************************************************************* |
| @void ih264_intra_pred_luma_8x8_mode_ref_filtering(UWORD8 *pu1_src, |
| @ UWORD8 *pu1_dst) |
| |
| @**************Variables Vs Registers***************************************** |
| @ r0 => *pu1_src |
| @ r1 => *pu1_dst |
| |
| |
| .global ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q |
| |
| ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q: |
| |
| stmfd sp!, {r4-r12, r14} @store register values to stack |
| vpush {d8-d15} |
| |
| vld1.u8 {q0}, [r0]! @ |
| vld1.u8 {q1}, [r0] |
| add r0, r0, #8 @ |
| vext.8 q2, q0, q1, #1 |
| vext.8 q3, q1, q1, #1 |
| vext.8 q4, q2, q3, #1 |
| vext.8 q5, q3, q3, #1 |
| vld1.8 {d10[7]}, [r0] @ LOADING SRC[24] AGIN TO THE END FOR p'[ 15, -1 ] = ( p[ 14, -1 ] + 3 * p[ 15, -1 ] + 2 ) >> 2 |
| vaddl.u8 q10, d0, d4 |
| vaddl.u8 q7, d0, d0 @ SPECIAL CASE FOR p'[ -1 ,7 ] = ( p[ -1, 6 ] + 3 * p[ -1, 7 ] + 2 ) >> 2 |
| vadd.u16 q7, q10, q7 |
| vaddl.u8 q11, d1, d5 |
| vqrshrun.s16 d14, q7, #2 |
| vaddl.u8 q12, d4, d8 |
| vaddl.u8 q13, d5, d9 |
| vst1.8 {d14[0]}, [r1]! |
| vadd.u16 q12, q10, q12 |
| vadd.u16 q13, q11, q13 |
| vaddl.u8 q9, d2, d6 |
| vaddl.u8 q8, d6, d10 |
| vqrshrun.s16 d4, q12, #2 |
| vqrshrun.s16 d5, q13, #2 |
| vadd.u16 q6, q8, q9 |
| vst1.8 {q2}, [r1]! |
| vqrshrun.s16 d6, q6, #2 |
| vst1.8 {d6}, [r1] |
| |
| |
| end_func_ref_filt: |
| |
| vpop {d8-d15} |
| ldmfd sp!, {r4-r12, pc} @Restoring registers from stack |
| |
| |
| |
| |
| |
| |
| @** |
| @******************************************************************************* |
| @* |
| @*ih264_intra_pred_luma_8x8_mode_vert |
| @* |
| @* @brief |
| @* Perform Intra prediction for luma_8x8 mode:vertical |
| @* |
| @* @par Description: |
| @* Perform Intra prediction for luma_8x8 mode:vertical ,described in sec 8.3.2.2.2 |
| @* |
| @* @param[in] pu1_src |
| @* UWORD8 pointer to the source |
| @* |
| @* @param[out] pu1_dst |
| @* UWORD8 pointer to the destination |
| @* |
| @* @param[in] src_strd |
| @* integer source stride |
| @* |
| @* @param[in] dst_strd |
| @* integer destination stride |
| @* |
| @* @param[in] ui_neighboravailability |
| @* availability of neighbouring pixels(Not used in this function) |
| @* |
| @* @returns |
| @* |
| @* @remarks |
| @* None |
| @* |
| @******************************************************************************* |
| @void ih264_intra_pred_luma_8x8_mode_vert(UWORD8 *pu1_src, |
| @ UWORD8 *pu1_dst, |
| @ WORD32 src_strd, |
| @ WORD32 dst_strd, |
| @ WORD32 ui_neighboravailability) |
| |
| @**************Variables Vs Registers***************************************** |
| @ r0 => *pu1_src |
| @ r1 => *pu1_dst |
| @ r2 => src_strd |
| @ r3 => dst_strd |
| @ r4 => ui_neighboravailability |
| |
| |
| .global ih264_intra_pred_luma_8x8_mode_vert_a9q |
| |
| ih264_intra_pred_luma_8x8_mode_vert_a9q: |
| |
| stmfd sp!, {r4-r12, r14} @store register values to stack |
| |
| add r0, r0, #9 |
| vld1.8 d0, [r0] |
| |
| vst1.8 d0, [r1], r3 |
| vst1.8 d0, [r1], r3 |
| vst1.8 d0, [r1], r3 |
| vst1.8 d0, [r1], r3 |
| vst1.8 d0, [r1], r3 |
| vst1.8 d0, [r1], r3 |
| vst1.8 d0, [r1], r3 |
| vst1.8 d0, [r1], r3 |
| |
| ldmfd sp!, {r4-r12, pc} @Restoring registers from stack |
| |
| |
| |
| |
| |
| @****************************************************************************** |
| |
| |
| @** |
| @******************************************************************************* |
| @* |
| @*ih264_intra_pred_luma_8x8_mode_horz |
| @* |
| @* @brief |
| @* Perform Intra prediction for luma_8x8 mode:horizontal |
| @* |
| @* @par Description: |
| @* Perform Intra prediction for luma_8x8 mode:horizontal ,described in sec 8.3.2.2.2 |
| @* |
| @* @param[in] pu1_src |
| @* UWORD8 pointer to the source |
| @* |
| @* @param[out] pu1_dst |
| @* UWORD8 pointer to the destination |
| @* |
| @* @param[in] src_strd |
| @* integer source stride |
| @* |
| @* @param[in] dst_strd |
| @* integer destination stride |
| @* |
| @* @param[in] ui_neighboravailability |
| @* availability of neighbouring pixels(Not used in this function) |
| @* |
| @* @returns |
| @* |
| @* @remarks |
| @* None |
| @* |
| @******************************************************************************* |
| @* |
| @void ih264_intra_pred_luma_8x8_mode_horz(UWORD8 *pu1_src, |
| @ UWORD8 *pu1_dst, |
| @ WORD32 src_strd, |
| @ WORD32 dst_strd, |
| @ WORD32 ui_neighboravailability) |
| @**************Variables Vs Registers***************************************** |
| @ r0 => *pu1_src |
| @ r1 => *pu1_dst |
| @ r2 => src_strd |
| @ r3 => dst_strd |
| @ r4 => ui_neighboravailability |
| |
| |
| .global ih264_intra_pred_luma_8x8_mode_horz_a9q |
| |
| ih264_intra_pred_luma_8x8_mode_horz_a9q: |
| |
| stmfd sp!, {r14} @store register values to stack |
| |
| vld1.u8 {d0}, [r0] |
| mov r2, #6 |
| |
| vdup.u8 d1, d0[7] |
| vdup.u8 d2, d0[6] |
| vst1.8 {d1}, [r1], r3 |
| |
| loop_8x8_horz: |
| vext.8 d0, d0, d0, #6 |
| vst1.8 {d2}, [r1], r3 |
| vdup.u8 d1, d0[7] |
| subs r2, #2 |
| vdup.u8 d2, d0[6] |
| vst1.8 {d1}, [r1], r3 |
| bne loop_8x8_horz |
| |
| vext.8 d0, d0, d0, #6 |
| vst1.8 {d2}, [r1], r3 |
| |
| ldmfd sp!, {pc} @restoring registers from stack |
| |
| |
| |
| |
| |
| @****************************************************************************** |
| |
| |
| @** |
| @******************************************************************************* |
| @* |
| @*ih264_intra_pred_luma_8x8_mode_dc |
| @* |
| @* @brief |
| @* Perform Intra prediction for luma_8x8 mode:DC |
| @* |
| @* @par Description: |
| @* Perform Intra prediction for luma_8x8 mode:DC ,described in sec 8.3.2.2.3 |
| @* |
| @* @param[in] pu1_src |
| @* UWORD8 pointer to the source |
| @* |
| @* @param[out] pu1_dst |
| @* UWORD8 pointer to the destination |
| @* |
| @* @param[in] src_strd |
| @* integer source stride |
| @* |
| @* @param[in] dst_strd |
| @* integer destination stride |
| @* |
| @* @param[in] ui_neighboravailability |
| @* availability of neighbouring pixels |
| @* |
| @* @returns |
| @* |
| @* @remarks |
| @* None |
| @* |
| @******************************************************************************* |
| @void ih264_intra_pred_luma_8x8_mode_dc(UWORD8 *pu1_src, |
| @ UWORD8 *pu1_dst, |
| @ WORD32 src_strd, |
| @ WORD32 dst_strd, |
| @ WORD32 ui_neighboravailability) |
| |
| @**************Variables Vs Registers***************************************** |
| @ r0 => *pu1_src |
| @ r1 => *pu1_dst |
| @ r2 => src_strd |
| @ r3 => dst_strd |
| @ r4 => ui_neighboravailability |
| |
| |
| .global ih264_intra_pred_luma_8x8_mode_dc_a9q |
| |
| ih264_intra_pred_luma_8x8_mode_dc_a9q: |
| |
| stmfd sp!, {r4, r14} @store register values to stack |
| ldr r4, [sp, #8] @r4 => ui_neighboravailability |
| |
| ands r2, r4, #0x01 @CHECKING IF LEFT_AVAILABLE ELSE BRANCHING TO ONLY TOP AVAILABLE |
| beq top_available |
| ands r2, r4, #0x04 @CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE |
| beq left_available |
| |
| vld1.u8 {d0}, [r0] @BOTH LEFT AND TOP AVAILABLE |
| add r0, r0, #9 |
| vld1.u8 {d1}, [r0] |
| vpaddl.u8 q0, q0 |
| vadd.u16 d0, d0, d1 |
| vpaddl.u16 d0, d0 |
| vpaddl.u32 d0, d0 |
| vqrshrun.s16 d0, q0, #4 |
| vdup.u8 d0, d0[0] |
| b str_pred |
| |
| top_available: @ONLY TOP AVAILABLE |
| ands r2, r4, #0x04 @CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE |
| beq none_available |
| |
| add r0, r0, #9 |
| vld1.u8 {d0}, [r0] |
| vpaddl.u8 d0, d0 |
| vpaddl.u16 d0, d0 |
| vpaddl.u32 d0, d0 |
| vqrshrun.s16 d0, q0, #3 |
| vdup.u8 d0, d0[0] |
| b str_pred |
| |
| left_available: @ONLY LEFT AVAILABLE |
| vld1.u8 {d0}, [r0] |
| vpaddl.u8 d0, d0 |
| vpaddl.u16 d0, d0 |
| vpaddl.u32 d0, d0 |
| vqrshrun.s16 d0, q0, #3 |
| vdup.u8 d0, d0[0] |
| b str_pred |
| |
| none_available: @NONE AVAILABLE |
| vmov.u8 q0, #128 |
| |
| str_pred: |
| vst1.8 {d0}, [r1], r3 |
| vst1.8 {d0}, [r1], r3 |
| vst1.8 {d0}, [r1], r3 |
| vst1.8 {d0}, [r1], r3 |
| vst1.8 {d0}, [r1], r3 |
| vst1.8 {d0}, [r1], r3 |
| vst1.8 {d0}, [r1], r3 |
| vst1.8 {d0}, [r1], r3 |
| |
| ldmfd sp!, {r4, pc} @Restoring registers from stack |
| |
| |
| |
| |
| |
| |
| @** |
| @******************************************************************************* |
| @* |
| @*ih264_intra_pred_luma_8x8_mode_diag_dl |
| @* |
| @* @brief |
| @* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left |
| @* |
| @* @par Description: |
| @* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left ,described in sec 8.3.2.2.4 |
| @* |
| @* @param[in] pu1_src |
| @* UWORD8 pointer to the source |
| @* |
| @* @param[out] pu1_dst |
| @* UWORD8 pointer to the destination |
| @* |
| @* @param[in] src_strd |
| @* integer source stride |
| @* |
| @* @param[in] dst_strd |
| @* integer destination stride |
| @* |
| @* @param[in] ui_neighboravailability |
| @* availability of neighbouring pixels |
| @* |
| @* @returns |
| @* |
| @* @remarks |
| @* None |
| @* |
| @******************************************************************************* |
| @void ih264_intra_pred_luma_8x8_mode_diag_dl(UWORD8 *pu1_src, |
| @ UWORD8 *pu1_dst, |
| @ WORD32 src_strd, |
| @ WORD32 dst_strd, |
| @ WORD32 ui_neighboravailability) |
| |
| @**************Variables Vs Registers***************************************** |
| @ r0 => *pu1_src |
| @ r1 => *pu1_dst |
| @ r2 => src_strd |
| @ r3 => dst_strd |
| @ r4 => ui_neighboravailability |
| |
| .global ih264_intra_pred_luma_8x8_mode_diag_dl_a9q |
| |
| ih264_intra_pred_luma_8x8_mode_diag_dl_a9q: |
| |
| stmfd sp!, {r4-r12, r14} @store register values to stack |
| |
| add r0, r0, #9 |
| sub r5, r3, #4 |
| add r6, r0, #15 |
| vld1.8 {q0}, [r0] |
| vext.8 q2, q0, q0, #2 |
| vext.8 q1, q0, q0, #1 |
| vld1.8 {d5[6]}, [r6] |
| @ q1 = q0 shifted to left once |
| @ q2 = q1 shifted to left once |
| vaddl.u8 q10, d0, d2 @Adding for FILT121 |
| vaddl.u8 q11, d1, d3 |
| vaddl.u8 q12, d2, d4 |
| vaddl.u8 q13, d3, d5 |
| vadd.u16 q12, q10, q12 |
| vadd.u16 q13, q11, q13 |
| |
| vqrshrun.s16 d4, q12, #2 |
| vqrshrun.s16 d5, q13, #2 |
| @Q2 has all FILT121 values |
| vst1.8 {d4}, [r1], r3 |
| vext.8 q9, q2, q2, #1 |
| vext.8 q8, q9, q9, #1 |
| vst1.8 {d18}, [r1], r3 |
| vext.8 q15, q8, q8, #1 |
| vst1.8 {d16}, [r1], r3 |
| vst1.8 {d30}, [r1], r3 |
| vst1.32 {d4[1]}, [r1]! |
| vst1.32 {d5[0]}, [r1], r5 |
| vst1.32 {d18[1]}, [r1]! |
| vst1.32 {d19[0]}, [r1], r5 |
| vst1.32 {d16[1]}, [r1]! |
| vst1.32 {d17[0]}, [r1], r5 |
| vst1.32 {d30[1]}, [r1]! |
| vst1.32 {d31[0]}, [r1], r5 |
| |
| |
| end_func_diag_dl: |
| ldmfd sp!, {r4-r12, pc} @Restoring registers from stack |
| |
| |
| |
| |
| @** |
| @******************************************************************************* |
| @* |
| @*ih264_intra_pred_luma_8x8_mode_diag_dr |
| @* |
| @* @brief |
| @* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right |
| @* |
| @* @par Description: |
| @* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right ,described in sec 8.3.2.2.5 |
| @* |
| @* @param[in] pu1_src |
| @* UWORD8 pointer to the source |
| @* |
| @* @param[out] pu1_dst |
| @* UWORD8 pointer to the destination |
| @* |
| @* @param[in] src_strd |
| @* integer source stride |
| @* |
| @* @param[in] dst_strd |
| @* integer destination stride |
| @* |
| @* @param[in] ui_neighboravailability |
| @* availability of neighbouring pixels |
| @* |
| @* @returns |
| @* |
| @* @remarks |
| @* None |
| @* |
| @******************************************************************************* |
| @void ih264_intra_pred_luma_8x8_mode_diag_dr(UWORD8 *pu1_src, |
| @ UWORD8 *pu1_dst, |
| @ WORD32 src_strd, |
| @ WORD32 dst_strd, |
| @ WORD32 ui_neighboravailability) |
| |
| @**************Variables Vs Registers***************************************** |
| @ r0 => *pu1_src |
| @ r1 => *pu1_dst |
| @ r2 => src_strd |
| @ r3 => dst_strd |
| @ r4 => ui_neighboravailability |
| |
| |
| .global ih264_intra_pred_luma_8x8_mode_diag_dr_a9q |
| |
| ih264_intra_pred_luma_8x8_mode_diag_dr_a9q: |
| |
| stmfd sp!, {r4-r12, r14} @store register values to stack |
| |
| |
| vld1.u8 {q0}, [r0] |
| add r0, r0, #1 |
| vld1.u8 {q1}, [r0] |
| vext.8 q2, q1, q1, #1 |
| @ q1 = q0 shifted to left once |
| @ q2 = q1 shifted to left once |
| vaddl.u8 q10, d0, d2 @Adding for FILT121 |
| vaddl.u8 q11, d1, d3 |
| vaddl.u8 q12, d2, d4 |
| vaddl.u8 q13, d3, d5 |
| vadd.u16 q12, q10, q12 |
| vadd.u16 q13, q11, q13 |
| vqrshrun.s16 d4, q12, #2 |
| vqrshrun.s16 d5, q13, #2 |
| @Q2 has all FILT121 values |
| sub r5, r3, #4 |
| vext.8 q9, q2, q2, #15 |
| vst1.8 {d19}, [r1], r3 |
| vext.8 q8, q9, q9, #15 |
| vst1.8 {d17}, [r1], r3 |
| vext.8 q15, q8, q8, #15 |
| vst1.8 {d31}, [r1], r3 |
| vst1.32 {d4[1]}, [r1]! |
| vst1.32 {d5[0]}, [r1], r5 |
| vst1.32 {d18[1]}, [r1]! |
| vst1.32 {d19[0]}, [r1], r5 |
| vst1.32 {d16[1]}, [r1]! |
| vst1.32 {d17[0]}, [r1], r5 |
| vst1.32 {d30[1]}, [r1]! |
| vst1.32 {d31[0]}, [r1], r5 |
| vst1.8 {d4}, [r1], r3 |
| |
| end_func_diag_dr: |
| ldmfd sp!, {r4-r12, pc} @Restoring registers from stack |
| |
| |
| |
| |
| @** |
| @******************************************************************************* |
| @* |
| @*ih264_intra_pred_luma_8x8_mode_vert_r |
| @* |
| @* @brief |
| @* Perform Intra prediction for luma_8x8 mode:Vertical_Right |
| @* |
| @* @par Description: |
| @* Perform Intra prediction for luma_8x8 mode:Vertical_Right ,described in sec 8.3.2.2.6 |
| @* |
| @* @param[in] pu1_src |
| @* UWORD8 pointer to the source |
| @* |
| @* @param[out] pu1_dst |
| @* UWORD8 pointer to the destination |
| @* |
| @* @param[in] src_strd |
| @* integer source stride |
| @* |
| @* @param[in] dst_strd |
| @* integer destination stride |
| @* |
| @* @param[in] ui_neighboravailability |
| @* availability of neighbouring pixels |
| @* |
| @* @returns |
| @* |
| @* @remarks |
| @* None |
| @* |
| @******************************************************************************* |
| @void ih264_intra_pred_luma_8x8_mode_vert_r(UWORD8 *pu1_src, |
| @ UWORD8 *pu1_dst, |
| @ WORD32 src_strd, |
| @ WORD32 dst_strd, |
| @ WORD32 ui_neighboravailability) |
| |
| @**************Variables Vs Registers***************************************** |
| @ r0 => *pu1_src |
| @ r1 => *pu1_dst |
| @ r2 => src_strd |
| @ r3 => dst_strd |
| @ r4 => ui_neighboravailability |
| |
| |
| .global ih264_intra_pred_luma_8x8_mode_vert_r_a9q |
| |
| ih264_intra_pred_luma_8x8_mode_vert_r_a9q: |
| |
| stmfd sp!, {r4-r12, r14} @store register values to stack |
| |
| vld1.u8 {q0}, [r0] |
| add r0, r0, #1 |
| vld1.u8 {q1}, [r0] |
| vext.8 q2, q1, q1, #1 |
| @ q1 = q0 shifted to left once |
| @ q2 = q1 shifted to left once |
| vaddl.u8 q10, d0, d2 |
| vaddl.u8 q11, d1, d3 |
| vaddl.u8 q12, d2, d4 |
| vaddl.u8 q13, d3, d5 |
| vadd.u16 q12, q10, q12 |
| vadd.u16 q13, q11, q13 |
| |
| vqrshrun.s16 d4, q10, #1 |
| vqrshrun.s16 d5, q11, #1 |
| vqrshrun.s16 d6, q12, #2 |
| vqrshrun.s16 d7, q13, #2 |
| @Q2 has all FILT11 values |
| @Q3 has all FILT121 values |
| sub r5, r3, #6 |
| sub r6, r3, #4 |
| vst1.8 {d5}, [r1], r3 @ row 0 |
| vext.8 q9, q3, q3, #15 |
| vmov.8 q11, q9 |
| vext.8 q8, q2, q2, #1 |
| vst1.8 {d19}, [r1], r3 @row 1 |
| |
| vmov.8 q15, q8 |
| vext.8 q10, q2, q2, #15 |
| vuzp.8 q8, q9 |
| @row 2 |
| vext.8 q14, q8, q8, #1 |
| vst1.8 {d21}, [r1] |
| vst1.8 {d6[6]}, [r1], r3 |
| @row 3 |
| |
| vst1.16 {d29[1]}, [r1]! |
| vst1.32 {d7[0]}, [r1]! |
| vst1.16 {d7[2]}, [r1], r5 |
| @row 4 |
| vst1.16 {d19[1]}, [r1]! |
| vst1.32 {d5[0]}, [r1]! |
| vst1.16 {d5[2]}, [r1], r5 |
| |
| @row 5 |
| vext.8 q13, q9, q9, #1 |
| vst1.16 {d17[1]}, [r1]! |
| vst1.32 {d23[0]}, [r1]! |
| vst1.16 {d23[2]}, [r1], r5 |
| |
| |
| @row 6 |
| vst1.16 {d27[0]}, [r1]! |
| vst1.8 {d27[2]}, [r1]! |
| vst1.8 {d5[0]}, [r1]! |
| vst1.32 {d31[0]}, [r1], r6 |
| @row 7 |
| vst1.32 {d29[0]}, [r1]! |
| vst1.32 {d7[0]}, [r1]! |
| |
| |
| |
| end_func_vert_r: |
| ldmfd sp!, {r4-r12, pc} @Restoring registers from stack |
| |
| |
| |
| |
| @** |
| @******************************************************************************* |
| @* |
| @*ih264_intra_pred_luma_8x8_mode_horz_d |
| @* |
| @* @brief |
| @* Perform Intra prediction for luma_8x8 mode:Horizontal_Down |
| @* |
| @* @par Description: |
| @* Perform Intra prediction for luma_8x8 mode:Horizontal_Down ,described in sec 8.3.2.2.7 |
| @* |
| @* @param[in] pu1_src |
| @* UWORD8 pointer to the source |
| @* |
| @* @param[out] pu1_dst |
| @* UWORD8 pointer to the destination |
| @* |
| @* @param[in] src_strd |
| @* integer source stride |
| @* |
| @* @param[in] dst_strd |
| @* integer destination stride |
| @* |
| @* @param[in] ui_neighboravailability |
| @* availability of neighbouring pixels |
| @* |
| @* @returns |
| @* |
| @* @remarks |
| @* None |
| @* |
| @******************************************************************************* |
| @void ih264_intra_pred_luma_8x8_mode_horz_d(UWORD8 *pu1_src, |
| @ UWORD8 *pu1_dst, |
| @ WORD32 src_strd, |
| @ WORD32 dst_strd, |
| @ WORD32 ui_neighboravailability) |
| |
| @**************Variables Vs Registers***************************************** |
| @ r0 => *pu1_src |
| @ r1 => *pu1_dst |
| @ r2 => src_strd |
| @ r3 => dst_strd |
| @ r4 => ui_neighboravailability |
| |
| .global ih264_intra_pred_luma_8x8_mode_horz_d_a9q |
| |
| ih264_intra_pred_luma_8x8_mode_horz_d_a9q: |
| |
| stmfd sp!, {r4-r12, r14} @store register values to stack |
| vpush {d8-d15} |
| |
| vld1.u8 {q0}, [r0] |
| add r0, r0, #1 |
| vld1.u8 {q1}, [r0] |
| vext.8 q2, q1, q1, #1 |
| @ q1 = q0 shifted to left once |
| @ q2 = q1 shifted to left once |
| vaddl.u8 q10, d0, d2 |
| vaddl.u8 q11, d1, d3 |
| vaddl.u8 q12, d2, d4 |
| vaddl.u8 q13, d3, d5 |
| vadd.u16 q12, q10, q12 |
| vadd.u16 q13, q11, q13 |
| |
| vqrshrun.s16 d4, q10, #1 |
| vqrshrun.s16 d5, q11, #1 |
| vqrshrun.s16 d6, q12, #2 |
| vqrshrun.s16 d7, q13, #2 |
| @Q2 has all FILT11 values |
| @Q3 has all FILT121 values |
| vmov.8 q4, q2 |
| vmov.8 q5, q3 |
| sub r6, r3, #6 |
| vtrn.8 q4, q5 @ |
| vmov.8 q6, q4 |
| vmov.8 q7, q5 |
| sub r5, r3, #4 |
| vtrn.16 q6, q7 |
| vext.8 q8, q3, q3, #14 |
| @ROW 0 |
| vst1.8 {d17}, [r1] |
| vst1.16 {d10[3]}, [r1], r3 |
| |
| @ROW 1 |
| vst1.32 {d14[1]}, [r1]! |
| vst1.32 {d7[0]}, [r1], r5 |
| @ROW 2 |
| vst1.16 {d10[2]}, [r1]! |
| vst1.32 {d14[1]}, [r1]! |
| vst1.16 {d7[0]}, [r1], r6 |
| @ROW 3 |
| vst1.32 {d12[1]}, [r1]! |
| vst1.32 {d14[1]}, [r1], r5 |
| @ROW 4 |
| vst1.16 {d14[1]}, [r1]! |
| vst1.32 {d12[1]}, [r1]! |
| vst1.16 {d14[2]}, [r1], r6 |
| @ROW 5 |
| vst1.32 {d14[0]}, [r1]! |
| vst1.32 {d12[1]}, [r1], r5 |
| @ROW 6 |
| vst1.16 {d10[0]}, [r1]! |
| vst1.16 {d8[1]}, [r1]! |
| vst1.16 {d14[1]}, [r1]! |
| vst1.16 {d12[2]}, [r1], r6 |
| @ROW 7 |
| vst1.32 {d12[0]}, [r1]! |
| vst1.32 {d14[0]}, [r1], r5 |
| |
| end_func_horz_d: |
| vpop {d8-d15} |
| ldmfd sp!, {r4-r12, pc} @Restoring registers from stack |
| |
| |
| |
| |
| |
| @** |
| @******************************************************************************* |
| @* |
| @*ih264_intra_pred_luma_8x8_mode_vert_l |
| @* |
| @* @brief |
| @* Perform Intra prediction for luma_8x8 mode:Vertical_Left |
| @* |
| @* @par Description: |
| @* Perform Intra prediction for luma_8x8 mode:Vertical_Left ,described in sec 8.3.2.2.8 |
| @* |
| @* @param[in] pu1_src |
| @* UWORD8 pointer to the source |
| @* |
| @* @param[out] pu1_dst |
| @* UWORD8 pointer to the destination |
| @* |
| @* @param[in] src_strd |
| @* integer source stride |
| @* |
| @* @param[in] dst_strd |
| @* integer destination stride |
| @* |
| @* @param[in] ui_neighboravailability |
| @* availability of neighbouring pixels |
| @* |
| @* @returns |
| @* |
| @* @remarks |
| @* None |
| @* |
| @******************************************************************************* |
| @void ih264_intra_pred_luma_8x8_mode_vert_l(UWORD8 *pu1_src, |
| @ UWORD8 *pu1_dst, |
| @ WORD32 src_strd, |
| @ WORD32 dst_strd, |
| @ WORD32 ui_neighboravailability) |
| |
| @**************Variables Vs Registers***************************************** |
| @ r0 => *pu1_src |
| @ r1 => *pu1_dst |
| @ r2 => src_strd |
| @ r3 => dst_strd |
| @ r4 => ui_neighboravailability |
| |
| |
| .global ih264_intra_pred_luma_8x8_mode_vert_l_a9q |
| |
| ih264_intra_pred_luma_8x8_mode_vert_l_a9q: |
| |
| stmfd sp!, {r4-r12, r14} @Restoring registers from stack |
| vpush {d8-d15} |
| |
| add r0, r0, #9 |
| vld1.u8 {q0}, [r0] |
| add r0, r0, #1 |
| vld1.u8 {q1}, [r0] |
| vext.8 q2, q1, q1, #1 |
| vaddl.u8 q10, d0, d2 |
| vaddl.u8 q11, d1, d3 |
| vaddl.u8 q12, d2, d4 |
| vaddl.u8 q13, d3, d5 |
| vadd.u16 q12, q10, q12 |
| vadd.u16 q13, q11, q13 |
| |
| vqrshrun.s16 d4, q10, #1 |
| vqrshrun.s16 d5, q11, #1 |
| vqrshrun.s16 d6, q12, #2 |
| vext.8 q4, q2, q2, #1 |
| vqrshrun.s16 d7, q13, #2 |
| @Q2 has all FILT11 values |
| @Q3 has all FILT121 values |
| |
| vext.8 q5, q3, q3, #1 |
| @ROW 0,1 |
| vst1.8 {d4}, [r1], r3 |
| vst1.8 {d6}, [r1], r3 |
| |
| vext.8 q6, q4, q4, #1 |
| vext.8 q7, q5, q5, #1 |
| @ROW 2,3 |
| vst1.8 {d8}, [r1], r3 |
| vst1.8 {d10}, [r1], r3 |
| |
| vext.8 q8, q6, q6, #1 |
| vext.8 q9, q7, q7, #1 |
| @ROW 4,5 |
| vst1.8 {d12}, [r1], r3 |
| vst1.8 {d14}, [r1], r3 |
| @ROW 6,7 |
| vst1.8 {d16}, [r1], r3 |
| vst1.8 {d18}, [r1], r3 |
| |
| end_func_vert_l: |
| vpop {d8-d15} |
| ldmfd sp!, {r4-r12, pc} @Restoring registers from stack |
| |
| |
| |
| |
| |
| @** |
| @******************************************************************************* |
| @* |
| @*ih264_intra_pred_luma_8x8_mode_horz_u |
| @* |
| @* @brief |
| @* Perform Intra prediction for luma_8x8 mode:Horizontal_Up |
| @* |
| @* @par Description: |
| @* Perform Intra prediction for luma_8x8 mode:Horizontal_Up ,described in sec 8.3.2.2.9 |
| @* |
| @* @param[in] pu1_src |
| @* UWORD8 pointer to the source |
| @* |
| @* @param[out] pu1_dst |
| @* UWORD8 pointer to the destination |
| @* |
| @* @param[in] src_strd |
| @* integer source stride |
| @* |
| @* @param[in] dst_strd |
| @* integer destination stride |
| @* |
| @* @param[in] ui_neighboravailability |
| @* availability of neighbouring pixels |
| @* |
| @* @returns |
| @* |
| @* @remarks |
| @* None |
| @* |
| @******************************************************************************* |
| @void ih264_intra_pred_luma_8x8_mode_horz_u(UWORD8 *pu1_src, |
| @ UWORD8 *pu1_dst, |
| @ WORD32 src_strd, |
| @ WORD32 dst_strd, |
| @ WORD32 ui_neighboravailability) |
| |
| @**************Variables Vs Registers***************************************** |
| @ r0 => *pu1_src |
| @ r1 => *pu1_dst |
| @ r2 => src_strd |
| @ r3 => dst_strd |
| @ r4 => ui_neighboravailability |
| |
| .global ih264_intra_pred_luma_8x8_mode_horz_u_a9q |
| |
| ih264_intra_pred_luma_8x8_mode_horz_u_a9q: |
| |
| stmfd sp!, {r4-r12, r14} @store register values to stack |
| vpush {d8-d15} |
| |
| vld1.u8 {q0}, [r0] |
| vld1.u8 {d1[7]}, [r0] |
| vext.8 q1, q0, q0, #1 |
| vext.8 q2, q1, q1, #1 |
| @ LOADING V TABLE |
| ldr r12, scratch_intrapred_addr_8x8 |
| scrlb8x8l2: |
| add r12, r12, pc |
| vaddl.u8 q10, d0, d2 |
| vaddl.u8 q11, d1, d3 |
| vaddl.u8 q12, d2, d4 |
| vaddl.u8 q13, d3, d5 |
| vadd.u16 q12, q10, q12 |
| vadd.u16 q13, q11, q13 |
| vld1.u8 {q5}, [r12] |
| vqrshrun.s16 d4, q10, #1 |
| vqrshrun.s16 d5, q11, #1 |
| vqrshrun.s16 d6, q12, #2 |
| vqrshrun.s16 d7, q13, #2 |
| @Q2 has all FILT11 values |
| @Q3 has all FILT121 values |
| vtbl.u8 d12, {q2, q3}, d10 |
| vdup.u8 q7, d5[7] @ |
| vtbl.u8 d13, {q2, q3}, d11 |
| vext.8 q8, q6, q7, #2 |
| vext.8 q9, q8, q7, #2 |
| vst1.8 {d12}, [r1], r3 |
| vext.8 q10, q9, q7, #2 |
| vst1.8 {d16}, [r1], r3 |
| vst1.8 {d18}, [r1], r3 |
| vst1.8 {d20}, [r1], r3 |
| vst1.8 {d13}, [r1], r3 |
| vst1.8 {d17}, [r1], r3 |
| vst1.8 {d19}, [r1], r3 |
| vst1.8 {d21}, [r1], r3 |
| |
| |
| end_func_horz_u: |
| vpop {d8-d15} |
| ldmfd sp!, {r4-r12, pc} @Restoring registers from stack |
| |
| |
| |
| |
| |
| |
| |
| |