blob: 42ee5ac28bbce0bed5424aa182727bdaa73e9165 [file] [log] [blame]
Harish Mahendrakar0d8951c2014-05-16 10:31:13 -07001/******************************************************************************
2*
3* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*
5* Licensed under the Apache License, Version 2.0 (the "License");
6* you may not use this file except in compliance with the License.
7* You may obtain a copy of the License at:
8*
9* http://www.apache.org/licenses/LICENSE-2.0
10*
11* Unless required by applicable law or agreed to in writing, software
12* distributed under the License is distributed on an "AS IS" BASIS,
13* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14* See the License for the specific language governing permissions and
15* limitations under the License.
16*
17******************************************************************************/
18/**
19*******************************************************************************
20* @file
21* ihevc_padding_atom_intr.c
22*
23* @brief
24* Contains function definitions for Padding
25*
26* @author
27* Srinivas T
28*
29* @par List of Functions:
30* - ihevc_pad_left_luma_ssse3()
31* - ihevc_pad_left_chroma_ssse3()
32* - ihevc_pad_right_luma_ssse3()
33* - ihevc_pad_right_chroma_ssse3()
34*
35* @remarks
36* None
37*
38*******************************************************************************
39*/
40
41#include <string.h>
42#include <assert.h>
43#include "ihevc_typedefs.h"
44#include "ihevc_func_selector.h"
45#include "ihevc_platform_macros.h"
46#include "ihevc_mem_fns.h"
47#include "ihevc_debug.h"
48
49#include <immintrin.h>
50
51
52/**
53*******************************************************************************
54*
55* @brief
56* Padding (luma block) at the left of a 2d array
57*
58* @par Description:
59* The left column of a 2d array is replicated for pad_size times at the left
60*
61*
62* @param[in] pu1_src
63* UWORD8 pointer to the source
64*
65* @param[in] src_strd
66* integer source stride
67*
68* @param[in] ht
69* integer height of the array
70*
71* @param[in] wd
72* integer width of the array
73*
74* @param[in] pad_size
75* integer -padding size of the array
76*
77* @param[in] ht
78* integer height of the array
79*
80* @param[in] wd
81* integer width of the array
82*
83* @returns
84*
85* @remarks
86* None
87*
88*******************************************************************************
89*/
90
91void ihevc_pad_left_luma_ssse3(UWORD8 *pu1_src,
92 WORD32 src_strd,
93 WORD32 ht,
94 WORD32 pad_size)
95{
96 WORD32 row;
97 WORD32 i;
98 UWORD8 *pu1_dst;
99 __m128i const0_16x8b;
100
101 const0_16x8b = _mm_setzero_si128();
102
103 ASSERT(pad_size % 8 == 0);
104
105 for(row = 0; row < ht; row++)
106 {
107 __m128i src_temp0_16x8b;
108
109 src_temp0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
110 pu1_dst = pu1_src - pad_size;
111 src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
112 for(i = 0; i < pad_size; i += 8)
113 {
114 _mm_storel_epi64((__m128i *)(pu1_dst + i), src_temp0_16x8b);
115 }
116 pu1_src += src_strd;
117 }
118
119}
120
121
122
123/**
124*******************************************************************************
125*
126* @brief
127* Padding (chroma block) at the left of a 2d array
128*
129* @par Description:
130* The left column of a 2d array is replicated for pad_size times at the left
131*
132*
133* @param[in] pu1_src
134* UWORD8 pointer to the source
135*
136* @param[in] src_strd
137* integer source stride
138*
139* @param[in] ht
140* integer height of the array
141*
142* @param[in] wd
143* integer width of the array (each colour component)
144*
145* @param[in] pad_size
146* integer -padding size of the array
147*
148* @param[in] ht
149* integer height of the array
150*
151* @param[in] wd
152* integer width of the array
153*
154* @returns
155*
156* @remarks
157* None
158*
159*******************************************************************************
160*/
161
162void ihevc_pad_left_chroma_ssse3(UWORD8 *pu1_src,
163 WORD32 src_strd,
164 WORD32 ht,
165 WORD32 pad_size)
166{
167 WORD32 row;
168 WORD32 col;
169 UWORD8 *pu1_dst;
170 __m128i const0_16x8b, const1_16x8b;
171 const0_16x8b = _mm_setzero_si128();
172 const1_16x8b = _mm_set1_epi8(1);
173 const0_16x8b = _mm_unpacklo_epi8(const0_16x8b, const1_16x8b);
174
175 ASSERT(pad_size % 8 == 0);
176 for(row = 0; row < ht; row++)
177 {
178 __m128i src_temp0_16x8b;
179
180 src_temp0_16x8b = _mm_loadu_si128((__m128i *)pu1_src);
181 pu1_dst = pu1_src - pad_size;
182 src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
183
184 for(col = 0; col < pad_size; col += 8)
185 {
186 _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
187 }
188 pu1_src += src_strd;
189 }
190
191}
192
193
194
195/**
196*******************************************************************************
197*
198* @brief
199* Padding (luma block) at the right of a 2d array
200*
201* @par Description:
202* The right column of a 2d array is replicated for pad_size times at the right
203*
204*
205* @param[in] pu1_src
206* UWORD8 pointer to the source
207*
208* @param[in] src_strd
209* integer source stride
210*
211* @param[in] ht
212* integer height of the array
213*
214* @param[in] wd
215* integer width of the array
216*
217* @param[in] pad_size
218* integer -padding size of the array
219*
220* @param[in] ht
221* integer height of the array
222*
223* @param[in] wd
224* integer width of the array
225*
226* @returns
227*
228* @remarks
229* None
230*
231*******************************************************************************
232*/
233
234void ihevc_pad_right_luma_ssse3(UWORD8 *pu1_src,
235 WORD32 src_strd,
236 WORD32 ht,
237 WORD32 pad_size)
238{
239 WORD32 row;
240 WORD32 col;
241 UWORD8 *pu1_dst;
242 __m128i const0_16x8b;
243
244 ASSERT(pad_size % 8 == 0);
245
246 for(row = 0; row < ht; row++)
247 {
248 __m128i src_temp0_16x8b;
249
250 src_temp0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src - 1));
251 const0_16x8b = _mm_setzero_si128();
252 pu1_dst = pu1_src;
253 src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
254 for(col = 0; col < pad_size; col += 8)
255 {
256 _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
257 }
258 pu1_src += src_strd;
259 }
260
261}
262
263
264
265/**
266*******************************************************************************
267*
268* @brief
269* Padding (chroma block) at the right of a 2d array
270*
271* @par Description:
272* The right column of a 2d array is replicated for pad_size times at the right
273*
274*
275* @param[in] pu1_src
276* UWORD8 pointer to the source
277*
278* @param[in] src_strd
279* integer source stride
280*
281* @param[in] ht
282* integer height of the array
283*
284* @param[in] wd
285* integer width of the array (each colour component)
286*
287* @param[in] pad_size
288* integer -padding size of the array
289*
290* @param[in] ht
291* integer height of the array
292*
293* @param[in] wd
294* integer width of the array
295*
296* @returns
297*
298* @remarks
299* None
300*
301*******************************************************************************
302*/
303
304void ihevc_pad_right_chroma_ssse3(UWORD8 *pu1_src,
305 WORD32 src_strd,
306 WORD32 ht,
307 WORD32 pad_size)
308{
309 WORD32 row;
310 WORD32 col;
311 UWORD8 *pu1_dst;
312 __m128i const0_16x8b, const1_16x8b;
313 const0_16x8b = _mm_setzero_si128();
314 const1_16x8b = _mm_set1_epi8(1);
315 const0_16x8b = _mm_unpacklo_epi8(const0_16x8b, const1_16x8b);
316
317 ASSERT(pad_size % 8 == 0);
318
319 for(row = 0; row < ht; row++)
320 {
321 __m128i src_temp0_16x8b;
322
323 src_temp0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src - 2));
324 pu1_dst = pu1_src;
325 src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
326 for(col = 0; col < pad_size; col += 8)
327 {
328 _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
329 }
330
331 pu1_src += src_strd;
332 }
333}
334