| ///***************************************************************************** |
| //* |
| //* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore |
| //* |
| //* Licensed under the Apache License, Version 2.0 (the "License"); |
| //* you may not use this file except in compliance with the License. |
| //* You may obtain a copy of the License at: |
| //* |
| //* http://www.apache.org/licenses/LICENSE-2.0 |
| //* |
| //* Unless required by applicable law or agreed to in writing, software |
| //* distributed under the License is distributed on an "AS IS" BASIS, |
| //* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| //* See the License for the specific language governing permissions and |
| //* limitations under the License. |
| //* |
| //*****************************************************************************/ |
| ///** |
| // ******************************************************************************* |
| // * //file |
| // * ihevc_padding_neon.s |
| // * |
| // * //brief |
| // * contains function definitions padding |
| // * |
| // * //author |
| // * naveen sr |
| // * |
| // * //par list of functions: |
| // * - ihevc_pad_left_luma() |
| // * - ihevc_pad_left_chroma() |
| // * |
| // * //remarks |
| // * none |
| // * |
| // ******************************************************************************* |
| //*/ |
| |
| ///** |
| //******************************************************************************* |
| //* |
| //* //brief |
| //* padding (luma block) at the left of a 2d array |
| //* |
| //* //par description: |
| //* the left column of a 2d array is replicated for pad_size times at the left |
| //* |
| //* |
| //* //param[in] pu1_src |
| //* uword8 pointer to the source |
| //* |
| //* //param[in] src_strd |
| //* integer source stride |
| //* |
| //* //param[in] ht |
| //* integer height of the array |
| //* |
| //* //param[in] wd |
| //* integer width of the array |
| //* |
| //* //param[in] pad_size |
| //* integer -padding size of the array |
| //* |
| //* //param[in] ht |
| //* integer height of the array |
| //* |
| //* //param[in] wd |
| //* integer width of the array |
| //* |
| //* //returns |
| //* |
| //* //remarks |
| //* none |
| //* |
| //******************************************************************************* |
| //*/ |
| //.if pad_left_luma == c |
| //void ihevc_pad_left_luma(uword8 *pu1_src, |
| // word32 src_strd, |
| // word32 ht, |
| // word32 pad_size) |
| //**************variables vs registers************************* |
| // x0 => *pu1_src |
| // x1 => src_strd |
| // x2 => ht |
| // x3 => pad_size |
| |
| .text |
| .align 4 |
| |
| .globl ihevc_pad_left_luma_av8 |
| |
| .type ihevc_pad_left_luma_av8, %function |
| |
| ihevc_pad_left_luma_av8: |
| |
| loop_start_luma_left: |
| // pad size is assumed to be pad_left = 80 |
| sub x4,x0,x3 |
| |
| ldrb w8,[x0] |
| add x0,x0,x1 |
| ldrb w9,[x0] |
| add x0,x0,x1 |
| ldrb w10,[x0] |
| add x0,x0,x1 |
| ldrb w11,[x0] |
| add x0,x0,x1 |
| |
| dup v0.16b,w8 |
| dup v2.16b,w9 |
| dup v4.16b,w10 |
| dup v6.16b,w11 |
| |
| add x5,x4,x1 |
| |
| st1 {v0.16b},[x4],#16 //128/8 = 16 bytes store |
| st1 {v0.16b},[x4],#16 // 16 bytes store |
| st1 {v0.16b},[x4],#16 // 16 bytes store |
| st1 {v0.16b},[x4],#16 // 16 bytes store |
| st1 {v0.16b},[x4] // 16 bytes store |
| |
| add x6,x5,x1 |
| |
| st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store |
| st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store |
| st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store |
| st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store |
| st1 {v2.16b},[x5] //128/8 = 16 bytes store |
| |
| add x7,x6,x1 |
| |
| st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store |
| st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store |
| st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store |
| st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store |
| st1 {v4.16b},[x6] //128/8 = 16 bytes store |
| |
| subs x2, x2,#4 |
| |
| st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store |
| st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store |
| st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store |
| st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store |
| st1 {v6.16b},[x7] //128/8 = 16 bytes store |
| |
| // total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store |
| |
| bne loop_start_luma_left |
| |
| ret |
| |
| |
| |
| |
| |
| ///** |
| //******************************************************************************* |
| //* |
| //* //brief |
| //* padding (chroma block) at the left of a 2d array |
| //* |
| //* //par description: |
| //* the left column of a 2d array is replicated for pad_size times at the left |
| //* |
| //* |
| //* //param[in] pu1_src |
| //* uword8 pointer to the source |
| //* |
| //* //param[in] src_strd |
| //* integer source stride |
| //* |
| //* //param[in] ht |
| //* integer height of the array |
| //* |
| //* //param[in] wd |
| //* integer width of the array (each colour component) |
| //* |
| //* //param[in] pad_size |
| //* integer -padding size of the array |
| //* |
| //* //param[in] ht |
| //* integer height of the array |
| //* |
| //* //param[in] wd |
| //* integer width of the array |
| //* |
| //* //returns |
| //* |
| //* //remarks |
| //* none |
| //* |
| //******************************************************************************* |
| //*/ |
| //.if pad_left_chroma == c |
| //void ihevc_pad_left_chroma(uword8 *pu1_src, |
| // word32 src_strd, |
| // word32 ht, |
| // word32 pad_size) |
| //{ |
| // x0 => *pu1_src |
| // x1 => src_strd |
| // x2 => ht |
| // x3 => pad_size |
| |
| |
| |
| .globl ihevc_pad_left_chroma_av8 |
| |
| .type ihevc_pad_left_chroma_av8, %function |
| |
| ihevc_pad_left_chroma_av8: |
| |
| |
| loop_start_chroma_left: |
| // pad size is assumed to be pad_left = 80 |
| sub x4,x0,x3 |
| |
| ldrh w8,[x0] |
| add x0,x0,x1 |
| ldrh w9,[x0] |
| add x0,x0,x1 |
| ldrh w10,[x0] |
| add x0,x0,x1 |
| ldrh w11,[x0] |
| add x0,x0,x1 |
| |
| dup v0.8h,w8 |
| dup v2.8h,w9 |
| dup v4.8h,w10 |
| dup v6.8h,w11 |
| |
| add x5,x4,x1 |
| |
| st1 {v0.16b},[x4],#16 //128/8 = 16 bytes store |
| st1 {v0.16b},[x4],#16 // 16 bytes store |
| st1 {v0.16b},[x4],#16 // 16 bytes store |
| st1 {v0.16b},[x4],#16 // 16 bytes store |
| st1 {v0.16b},[x4] // 16 bytes store |
| |
| add x6,x5,x1 |
| |
| st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store |
| st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store |
| st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store |
| st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store |
| st1 {v2.16b},[x5] //128/8 = 16 bytes store |
| |
| add x7,x6,x1 |
| |
| st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store |
| st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store |
| st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store |
| st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store |
| st1 {v4.16b},[x6] //128/8 = 16 bytes store |
| |
| subs x2, x2,#4 |
| |
| st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store |
| st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store |
| st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store |
| st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store |
| st1 {v6.16b},[x7] //128/8 = 16 bytes store |
| |
| // total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store |
| |
| bne loop_start_chroma_left |
| |
| ret |
| |
| |
| |
| |
| |
| ///** |
| //******************************************************************************* |
| //* |
| //* //brief |
| //* padding (luma block) at the right of a 2d array |
| //* |
| //* //par description: |
| //* the right column of a 2d array is replicated for pad_size times at the right |
| //* |
| //* |
| //* //param[in] pu1_src |
| //* uword8 pointer to the source |
| //* |
| //* //param[in] src_strd |
| //* integer source stride |
| //* |
| //* //param[in] ht |
| //* integer height of the array |
| //* |
| //* //param[in] wd |
| //* integer width of the array |
| //* |
| //* //param[in] pad_size |
| //* integer -padding size of the array |
| //* |
| //* //param[in] ht |
| //* integer height of the array |
| //* |
| //* //param[in] wd |
| //* integer width of the array |
| //* |
| //* //returns |
| //* |
| //* //remarks |
| //* none |
| //* |
| //******************************************************************************* |
| //*/ |
| //.if pad_right_luma == c |
| //void ihevc_pad_right_luma(uword8 *pu1_src, |
| // word32 src_strd, |
| // word32 ht, |
| // word32 pad_size) |
| //{ |
| // word32 row// |
| // |
| // for(row = 0// row < ht// row++) |
| // { |
| // memset(pu1_src, *(pu1_src -1), pad_size)// |
| // |
| // pu1_src += src_strd// |
| // } |
| //} |
| // |
| // x0 => *pu1_src |
| // x1 => src_strd |
| // x2 => ht |
| // x3 => pad_size |
| |
| |
| |
| .globl ihevc_pad_right_luma_av8 |
| |
| .type ihevc_pad_right_luma_av8, %function |
| |
| ihevc_pad_right_luma_av8: |
| |
| |
| loop_start_luma_right: |
| // pad size is assumed to be pad_left = 80 |
| mov x4,x0 |
| |
| ldrb w8,[x0, #-1] |
| add x0,x0,x1 |
| ldrb w9,[x0, #-1] |
| add x0,x0,x1 |
| ldrb w10,[x0, #-1] |
| add x0,x0,x1 |
| ldrb w11,[x0, #-1] |
| add x0,x0,x1 |
| |
| add x5,x4,x1 |
| add x6,x5,x1 |
| add x7,x6,x1 |
| |
| dup v0.16b,w8 |
| dup v2.16b,w9 |
| dup v4.16b,w10 |
| dup v6.16b,w11 |
| |
| st1 {v0.16b},[x4],#16 //128/8 = 16 bytes store |
| st1 {v0.16b},[x4],#16 // 16 bytes store |
| st1 {v0.16b},[x4],#16 // 16 bytes store |
| st1 {v0.16b},[x4],#16 // 16 bytes store |
| st1 {v0.16b},[x4] // 16 bytes store |
| |
| |
| st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store |
| st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store |
| st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store |
| st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store |
| st1 {v2.16b},[x5] //128/8 = 16 bytes store |
| |
| subs x2, x2,#4 |
| |
| st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store |
| st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store |
| st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store |
| st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store |
| st1 {v4.16b},[x6] //128/8 = 16 bytes store |
| |
| st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store |
| st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store |
| st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store |
| st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store |
| st1 {v6.16b},[x7] //128/8 = 16 bytes store |
| |
| |
| // total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store |
| |
| |
| bne loop_start_luma_right |
| |
| ret |
| |
| |
| |
| |
| |
| ///** |
| //******************************************************************************* |
| //* |
| //* //brief |
| ////* padding (chroma block) at the right of a 2d array |
| //* |
| //* //par description: |
| //* the right column of a 2d array is replicated for pad_size times at the right |
| //* |
| //* |
| //* //param[in] pu1_src |
| ////* uword8 pointer to the source |
| //* |
| //* //param[in] src_strd |
| //* integer source stride |
| //* |
| //* //param[in] ht |
| ////* integer height of the array |
| //* |
| //* //param[in] wd |
| //* integer width of the array (each colour component) |
| //* |
| //* //param[in] pad_size |
| //* integer -padding size of the array |
| //* |
| //* //param[in] ht |
| ////* integer height of the array |
| //* |
| //* //param[in] wd |
| //* integer width of the array |
| //* |
| //* //returns |
| //* |
| //* //remarks |
| //* none |
| //* |
| //******************************************************************************* |
| //*/ |
| //.if pad_right_chroma == c |
| //void ihevc_pad_right_chroma(uword8 *pu1_src, |
| // word32 src_strd, |
| // word32 ht, |
| // word32 pad_size) |
| // x0 => *pu1_src |
| // x1 => src_strd |
| // x2 => ht |
| // x3 => pad_size |
| |
| |
| |
| .globl ihevc_pad_right_chroma_av8 |
| |
| .type ihevc_pad_right_chroma_av8, %function |
| |
| ihevc_pad_right_chroma_av8: |
| |
| |
| loop_start_chroma_right: |
| // pad size is assumed to be pad_left = 80 |
| mov x4,x0 |
| |
| ldrh w8,[x0, #-2] |
| add x0,x0,x1 |
| ldrh w9,[x0, #-2] |
| add x0,x0,x1 |
| ldrh w10,[x0, #-2] |
| add x0,x0,x1 |
| ldrh w11,[x0, #-2] |
| add x0,x0,x1 |
| |
| dup v0.8h,w8 |
| dup v2.8h,w9 |
| dup v4.8h,w10 |
| dup v6.8h,w11 |
| |
| add x5,x4,x1 |
| |
| st1 {v0.16b},[x4],#16 //128/8 = 16 bytes store |
| st1 {v0.16b},[x4],#16 // 16 bytes store |
| st1 {v0.16b},[x4],#16 // 16 bytes store |
| st1 {v0.16b},[x4],#16 // 16 bytes store |
| st1 {v0.16b},[x4] // 16 bytes store |
| |
| add x6,x5,x1 |
| |
| st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store |
| st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store |
| st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store |
| st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store |
| st1 {v2.16b},[x5] //128/8 = 16 bytes store |
| |
| add x7,x6,x1 |
| |
| st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store |
| st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store |
| st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store |
| st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store |
| st1 {v4.16b},[x6] //128/8 = 16 bytes store |
| |
| subs x2, x2,#4 |
| |
| st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store |
| st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store |
| st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store |
| st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store |
| st1 {v6.16b},[x7] //128/8 = 16 bytes store |
| |
| // total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store |
| |
| bne loop_start_chroma_right |
| |
| ret |
| |
| |
| |
| |
| |
| |
| |
| |