blob: f8c44df8e03a1b3592cc97c70a0480dce4be252c [file] [log] [blame]
Hamsalekha S8d3d3032015-03-13 21:24:58 +05301/******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*/
20
21/**
22******************************************************************************
23* @file ih264e_distortion_metrics.c
24*
25* @brief
26* This file contains definitions of routines that compute distortion
27* between two macro/sub blocks of identical dimensions
28*
29* @author
30* Ittiam
31*
32* @par List of Functions:
33* - ime_sub_pel_compute_sad_16x16()
34* - ime_calculate_sad4_prog()
35* - ime_calculate_sad3_prog()
36* - ime_calculate_sad2_prog()
37* - ime_compute_sad_16x16()
38* - ime_compute_sad_16x16_fast()
39* - ime_compute_sad_16x16_ea8()
40* - ime_compute_sad_8x8()
41* - ime_compute_sad_4x4()
42* - ime_compute_sad_16x8()
43* - ime_compute_satqd_16x16_lumainter()
44* - ime_compute_satqd_8x16_chroma()
45* - ime_compute_satqd_16x16_lumaintra()
46*
47*
48* @remarks
49* None
50*
51*******************************************************************************
52*/
53
54/*****************************************************************************/
55/* File Includes */
56/*****************************************************************************/
57
58/* System include files */
59#include <stdio.h>
60#include <stdlib.h>
61#include <string.h>
62
63/* User include files */
64#include "ime_typedefs.h"
65#include "ime_defs.h"
66#include "ime_macros.h"
67#include "ime_statistics.h"
68#include "ime_platform_macros.h"
69#include "ime_distortion_metrics.h"
70
71
72/*****************************************************************************/
73/* Function Definitions */
74/*****************************************************************************/
75
76/**
77******************************************************************************
78*
79* @brief computes distortion (SAD) at all subpel points about the src location
80*
81* @par Description
82* This functions computes SAD at all points at a subpel distance from the
83* current source location.
84*
85* @param[in] pu1_src
86* UWORD8 pointer to the source
87*
88* @param[out] pu1_ref_half_x
89* UWORD8 pointer to half pel buffer
90*
91* @param[out] pu1_ref_half_y
92* UWORD8 pointer to half pel buffer
93*
94* @param[out] pu1_ref_half_xy
95* UWORD8 pointer to half pel buffer
96*
97* @param[in] src_strd
98* integer source stride
99*
100* @param[in] ref_strd
101* integer ref stride
102*
103* @param[out] pi4_sad
104* integer evaluated sad
105* pi4_sad[0] - half x
106* pi4_sad[1] - half x - 1
107* pi4_sad[2] - half y
108* pi4_sad[3] - half y - 1
109* pi4_sad[4] - half xy
110* pi4_sad[5] - half xy - 1
111* pi4_sad[6] - half xy - strd
112* pi4_sad[7] - half xy - 1 - strd
113*
114* @remarks
115*
116******************************************************************************
117*/
118void ime_sub_pel_compute_sad_16x16(UWORD8 *pu1_src,
119 UWORD8 *pu1_ref_half_x,
120 UWORD8 *pu1_ref_half_y,
121 UWORD8 *pu1_ref_half_xy,
122 WORD32 src_strd,
123 WORD32 ref_strd,
124 WORD32 *pi4_sad)
125{
126 UWORD8 *pu1_ref_half_x_left = pu1_ref_half_x - 1;
127 UWORD8 *pu1_ref_half_y_top = pu1_ref_half_y - ref_strd;
128 UWORD8 *pu1_ref_half_xy_left = pu1_ref_half_xy - 1;
129 UWORD8 *pu1_ref_half_xy_top = pu1_ref_half_xy - ref_strd;
130 UWORD8 *pu1_ref_half_xy_top_left = pu1_ref_half_xy - ref_strd - 1;
131
132 WORD32 row, col;
133
134 memset(pi4_sad, 0, 8 * sizeof(WORD32));
135
136 for(row = 0; row < MB_SIZE; row++)
137 {
138 for(col = 0; col < MB_SIZE; col++)
139 {
140 WORD32 src;
141 WORD32 diff;
142
143 src = pu1_src[col];
144
145 diff = src - pu1_ref_half_x[col];
146 pi4_sad[0] += ABS(diff);
147
148 diff = src - pu1_ref_half_x_left[col];
149 pi4_sad[1] += ABS(diff);
150
151 diff = src - pu1_ref_half_y[col];
152 pi4_sad[2] += ABS(diff);
153
154 diff = src - pu1_ref_half_y_top[col];
155 pi4_sad[3] += ABS(diff);
156
157 diff = src - pu1_ref_half_xy[col];
158 pi4_sad[4] += ABS(diff);
159
160 diff = src - pu1_ref_half_xy_left[col];
161 pi4_sad[5] += ABS(diff);
162
163 diff = src - pu1_ref_half_xy_top[col];
164 pi4_sad[6] += ABS(diff);
165
166 diff = src - pu1_ref_half_xy_top_left[col];
167 pi4_sad[7] += ABS(diff);
168 }
169
170 pu1_src += src_strd;
171
172 pu1_ref_half_x += ref_strd;
173 pu1_ref_half_x_left += ref_strd;
174
175 pu1_ref_half_y += ref_strd;
176 pu1_ref_half_y_top += ref_strd;
177
178 pu1_ref_half_xy += ref_strd;
179 pu1_ref_half_xy_left += ref_strd;
180 pu1_ref_half_xy_top += ref_strd;
181 pu1_ref_half_xy_top_left += ref_strd;
182 }
183}
184
185/**
186*******************************************************************************
187*
188* @brief compute sad
189*
190* @par Description: This function computes the sad at vertices of diamond grid
191* centered at reference pointer and at unit distance from it.
192*
193* @param[in] pu1_ref
194* UWORD8 pointer to the reference
195*
196* @param[out] pu1_src
197* UWORD8 pointer to the source
198*
199* @param[in] ref_strd
200* integer reference stride
201*
202* @param[in] src_strd
203* integer source stride
204*
205* @param[out] pi4_sad
206* pointer to integer array evaluated sad
207*
208* @returns sad at all evaluated vertexes
209*
210* @remarks none
211*
212*******************************************************************************
213*/
214void ime_calculate_sad4_prog(UWORD8 *pu1_ref,
215 UWORD8 *pu1_src,
216 WORD32 ref_strd,
217 WORD32 src_strd,
218 WORD32 *pi4_sad)
219{
220
221 /* reference ptrs at unit 1 distance in diamond pattern centered at pu1_ref */
222 UWORD8 *left_ptr = pu1_ref - 1;
223 UWORD8 *right_ptr = pu1_ref + 1;
224 UWORD8 *top_ptr = pu1_ref - ref_strd;
225 UWORD8 *bot_ptr = pu1_ref + ref_strd;
226
227 /* temp var */
228 WORD32 count2, count3;
229 UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
230 UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
231
232 memset(pi4_sad, 0, 4 * sizeof(WORD32));
233
234 for(count2 = MB_SIZE; count2 > 0; count2--)
235 {
236 for(count3 = MB_SIZE; count3 > 0 ; count3--)
237 {
238 WORD32 src;
239 WORD32 diff;
240
241 src = *pu1_src++;
242
243 diff = src - *left_ptr++;
244 pi4_sad[0] += ABS(diff);
245
246 diff = src - *right_ptr++;
247 pi4_sad[1] += ABS(diff);
248
249 diff = src - *top_ptr++;
250 pi4_sad[2] += ABS(diff);
251
252 diff = src - *bot_ptr++;
253 pi4_sad[3] += ABS(diff);
254 }
255
256 bot_ptr += u4_ref_buf_offset;
257 left_ptr += u4_ref_buf_offset;
258 right_ptr += u4_ref_buf_offset;
259 top_ptr += u4_ref_buf_offset;
260
261 pu1_src += u4_cur_buf_offset;
262 }
263
264}
265
266/**
267*******************************************************************************
268*
269* @brief compute sad
270*
271* @par Description: This function computes the sad at vertices of diamond grid
272* centered at reference pointer and at unit distance from it.
273*
274* @param[in] pu1_ref1, pu1_ref2, pu1_ref3
275* UWORD8 pointer to the reference
276*
277* @param[out] pu1_src
278* UWORD8 pointer to the source
279*
280* @param[in] ref_strd
281* integer reference stride
282*
283* @param[in] src_strd
284* integer source stride
285*
286* @param[out] pi4_sad
287* pointer to integer array evaluated sad
288*
289* @returns sad at all evaluated vertexes
290*
291* @remarks none
292*
293*******************************************************************************
294*/
295void ime_calculate_sad3_prog(UWORD8 *pu1_ref1,
296 UWORD8 *pu1_ref2,
297 UWORD8 *pu1_ref3,
298 UWORD8 *pu1_src,
299 WORD32 ref_strd,
300 WORD32 src_strd,
301 WORD32 *pi4_sad)
302{
303 /* temp var */
304 WORD32 i;
305 UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
306 UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
307
308 for(i = 16; i > 0; i--)
309 {
310 USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
311 USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
312 USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
313 pu1_src += 4;
314 pu1_ref1 += 4;
315 pu1_ref2 += 4;
316 pu1_ref3 += 4;
317
318 USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
319 USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
320 USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
321 pu1_src += 4;
322 pu1_ref1 += 4;
323 pu1_ref2 += 4;
324 pu1_ref3 += 4;
325
326 USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
327 USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
328 USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
329 pu1_src += 4;
330 pu1_ref1 += 4;
331 pu1_ref2 += 4;
332 pu1_ref3 += 4;
333
334 USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
335 USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
336 USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
337 pu1_src += 4;
338 pu1_ref1 += 4;
339 pu1_ref2 += 4;
340 pu1_ref3 += 4;
341
342 pu1_src += u4_cur_buf_offset;
343 pu1_ref1 += u4_ref_buf_offset;
344 pu1_ref2 += u4_ref_buf_offset;
345 pu1_ref3 += u4_ref_buf_offset;
346 }
347
348}
349
350/**
351*******************************************************************************
352*
353* @brief compute sad
354*
355* @par Description: This function computes the sad at vertices of diamond grid
356* centered at reference pointer and at unit distance from it.
357*
358* @param[in] pu1_ref1, pu1_ref2
359* UWORD8 pointer to the reference
360*
361* @param[out] pu1_src
362* UWORD8 pointer to the source
363*
364* @param[in] ref_strd
365* integer reference stride
366*
367* @param[in] src_strd
368* integer source stride
369*
370* @param[out] pi4_sad
371* pointer to integer array evaluated sad
372*
373* @returns sad at all evaluated vertexes
374*
375* @remarks none
376*
377*******************************************************************************
378*/
379void ime_calculate_sad2_prog(UWORD8 *pu1_ref1,
380 UWORD8 *pu1_ref2,
381 UWORD8 *pu1_src,
382 WORD32 ref_strd,
383 WORD32 src_strd,
384 WORD32 *pi4_sad)
385{
386 /* temp var */
387 WORD32 i;
388 UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
389 UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
390
391 for(i = 16; i > 0; i--)
392 {
393 USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
394 USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
395 pu1_src += 4;
396 pu1_ref1 += 4;
397 pu1_ref2 += 4;
398
399 USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
400 USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
401 pu1_src += 4;
402 pu1_ref1 += 4;
403 pu1_ref2 += 4;
404
405 USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
406 USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
407 pu1_src += 4;
408 pu1_ref1 += 4;
409 pu1_ref2 += 4;
410
411 USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
412 USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
413 pu1_src += 4;
414 pu1_ref1 += 4;
415 pu1_ref2 += 4;
416
417 pu1_src += u4_cur_buf_offset;
418 pu1_ref1 += u4_ref_buf_offset;
419 pu1_ref2 += u4_ref_buf_offset;
420 }
421
422}
423
424/**
425******************************************************************************
426*
427* @brief computes distortion (SAD) between 2 16x16 blocks
428*
429* @par Description
430* This functions computes SAD between 2 16x16 blocks. There is a provision
431* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
432* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
433*
434* @param[in] pu1_src
435* UWORD8 pointer to the source
436*
437* @param[out] pu1_dst
438* UWORD8 pointer to the destination
439*
440* @param[in] src_strd
441* integer source stride
442*
443* @param[in] dst_strd
444* integer destination stride
445*
446* @param[in] i4_max_sad
447* integer maximum allowed distortion
448*
449* @param[out] pi4_mb_distortion
450* integer evaluated sad
451*
452* @remarks
453*
454******************************************************************************
455*/
456void ime_compute_sad_16x16(UWORD8 *pu1_src,
457 UWORD8 *pu1_est,
458 WORD32 src_strd,
459 WORD32 est_strd,
460 WORD32 i4_max_sad,
461 WORD32 *pi4_mb_distortion)
462{
463 WORD32 i4_sad = 0;
464 UWORD32 u4_src_offset = src_strd - 16;
465 UWORD32 u4_est_offset = est_strd - 16;
466 UWORD32 i;
467
468GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, 16);
469
470 for(i = 16; i > 0; i--)
471 {
472 USADA8(pu1_src, pu1_est, i4_sad);
473 pu1_src += 4;
474 pu1_est += 4;
475
476 USADA8(pu1_src, pu1_est, i4_sad);
477 pu1_src += 4;
478 pu1_est += 4;
479
480 USADA8(pu1_src, pu1_est, i4_sad);
481 pu1_src += 4;
482 pu1_est += 4;
483
484 USADA8(pu1_src, pu1_est, i4_sad);
485 pu1_src += 4;
486 pu1_est += 4;
487
488 /* early exit */
489 if(i4_max_sad < i4_sad)
490 {
491
492GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, 16-i);
493
494 *pi4_mb_distortion = i4_sad;
495 return ;
496 }
497 pu1_src += u4_src_offset;
498 pu1_est += u4_est_offset;
499 }
500
501 *pi4_mb_distortion = i4_sad;
502 return ;
503}
504
505/**
506******************************************************************************
507*
508* @brief computes distortion (SAD) between 2 16x16 blocks (fast mode)
509*
510* @par Description
511* This functions computes SAD between 2 16x16 blocks. There is a provision
512* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
513* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
514*
515* @param[in] pu1_src
516* UWORD8 pointer to the source
517*
518* @param[out] pu1_dst
519* UWORD8 pointer to the destination
520*
521* @param[in] src_strd
522* integer source stride
523*
524* @param[in] dst_strd
525* integer destination stride
526*
527* @param[in] i4_max_sad
528* integer maximum allowed distortion
529*
530* @param[out] pi4_mb_distortion
531* integer evaluated sad
532*
533* @remarks
534*
535******************************************************************************
536*/
537void ime_compute_sad_16x16_fast(UWORD8 *pu1_src,
538 UWORD8 *pu1_est,
539 WORD32 src_strd,
540 WORD32 est_strd,
541 WORD32 i4_max_sad,
542 WORD32 *pi4_mb_distortion)
543{
544
545 WORD32 i4_sad = 0;
546 UWORD32 u4_src_offset = 2 * src_strd - 16;
547 UWORD32 u4_est_offset = 2 * est_strd - 16;
548 UWORD32 i;
549
550 UNUSED(i4_max_sad);
551
552 for(i = 16; i > 0; i-= 2)
553 {
554 USADA8(pu1_src, pu1_est, i4_sad);
555 pu1_src += 4;
556 pu1_est += 4;
557
558 USADA8(pu1_src, pu1_est, i4_sad);
559 pu1_src += 4;
560 pu1_est += 4;
561
562 USADA8(pu1_src, pu1_est, i4_sad);
563 pu1_src += 4;
564 pu1_est += 4;
565
566 USADA8(pu1_src, pu1_est, i4_sad);
567 pu1_src += 4;
568 pu1_est += 4;
569
570 pu1_src += u4_src_offset;
571 pu1_est += u4_est_offset;
572 }
573
574 *pi4_mb_distortion = (i4_sad << 1);
575 return ;
576}
577
578/**
579******************************************************************************
580*
581* @brief computes distortion (SAD) between 2 8x8 blocks
582*
583* @par Description
584* This functions computes SAD between 2 8x8 blocks. There is a provision
585* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
586* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
587*
588* @param[in] pu1_src
589* UWORD8 pointer to the source
590*
591* @param[out] pu1_dst
592* UWORD8 pointer to the destination
593*
594* @param[in] src_strd
595* integer source stride
596*
597* @param[in] dst_strd
598* integer destination stride
599*
600* @param[in] u4_max_sad
601* integer maximum allowed distortion
602*
603* @param[out] i4_sad
604* integer evaluated sad
605*
606* @remarks
607*
608******************************************************************************
609 */
610
611void ime_compute_sad_8x8(UWORD8 *pu1_src,
612 UWORD8 *pu1_est,
613 WORD32 src_strd,
614 WORD32 est_strd,
615 WORD32 i4_max_sad,
616 WORD32 *pi4_mb_distortion)
617{
618 WORD32 i4_sad = 0;
619 UWORD32 u4_src_offset = src_strd - 8;
620 UWORD32 u4_est_offset = est_strd - 8;
621 UWORD32 i, j;
622 WORD16 temp;
623
624 for(i = 8; i > 0; i--)
625 {
626 for(j = 8; j > 0; j--)
627 {
628 /* SAD */
629 temp = *pu1_src++ - *pu1_est++;
630 i4_sad += ABS(temp);
631 }
632 /* early exit */
633 if(i4_max_sad < i4_sad)
634 {
635 *pi4_mb_distortion = i4_sad;
636 return;
637 }
638 pu1_src += u4_src_offset;
639 pu1_est += u4_est_offset;
640 }
641 *pi4_mb_distortion = i4_sad;
642}
643
644/**
645******************************************************************************
646*
647* @brief computes distortion (SAD) between 2 4x4 blocks
648*
649* @par Description
650* This functions computes SAD between 2 4x4 blocks. There is a provision
651* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
652* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
653*
654* @param[in] pu1_src
655* UWORD8 pointer to the source
656*
657* @param[out] pu1_dst
658* UWORD8 pointer to the destination
659*
660* @param[in] src_strd
661* integer source stride
662*
663* @param[in] dst_strd
664* integer destination stride
665*
666* @param[in] u4_max_sad
667* integer maximum allowed distortion
668*
669* @param[out] pi4_mb_distortion
670* integer evaluated sad
671*
672* @remarks
673*
674******************************************************************************
675*/
676void ime_compute_sad_4x4
677 (
678 UWORD8 *pu1_src,
679 UWORD8 *pu1_est,
680 WORD32 src_strd,
681 WORD32 est_strd,
682 WORD32 i4_max_sad,
683 WORD32 *pi4_mb_distortion
684 )
685{
686 WORD32 i4_sad = 0;
687
688 UNUSED(i4_max_sad);
689
690 USADA8(pu1_src, pu1_est, i4_sad);
691 pu1_src += src_strd;
692 pu1_est += est_strd;
693
694 USADA8(pu1_src, pu1_est, i4_sad);
695 pu1_src += src_strd;
696 pu1_est += est_strd;
697
698 USADA8(pu1_src, pu1_est, i4_sad);
699 pu1_src += src_strd;
700 pu1_est += est_strd;
701
702 USADA8(pu1_src, pu1_est, i4_sad);
703 *pi4_mb_distortion = i4_sad;
704}
705
706
707/**
708******************************************************************************
709*
710* @brief computes distortion (SAD) between 2 16x8 blocks
711*
712*
713* @par Description
714* This functions computes SAD between 2 16x8 blocks. There is a provision
715* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
716* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
717*
718* @param[in] pu1_src
719* UWORD8 pointer to the source
720*
721* @param[out] pu1_dst
722* UWORD8 pointer to the destination
723*
724* @param[in] src_strd
725* integer source stride
726*
727* @param[in] dst_strd
728* integer destination stride
729*
730* @param[in] u4_max_sad
731* integer maximum allowed distortion
732*
733* @param[out] pi4_mb_distortion
734* integer evaluated sad
735*
736* @remarks
737*
738******************************************************************************
739*/
740void ime_compute_sad_16x8
741 (
742 UWORD8 *pu1_src,
743 UWORD8 *pu1_est,
744 WORD32 src_strd,
745 WORD32 est_strd,
746 WORD32 i4_max_sad,
747 WORD32 *pi4_mb_distortion
748 )
749{
750 WORD32 i4_sad = 0;
751 UWORD32 u4_src_offset = src_strd - 16;
752 UWORD32 u4_est_offset = est_strd - 16;
753 UWORD32 i, j;
754 WORD16 temp;
755
756GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, 8);
757
758 for(i = 8; i > 0; i--)
759 {
760 for(j = 16; j > 0; j--)
761 {
762 /* SAD */
763 temp = *pu1_src++ - *pu1_est++;
764 i4_sad += ABS(temp);
765 }
766 /* early exit */
767 if(i4_max_sad < i4_sad)
768 {
769
770GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, 8-i);
771
772 *pi4_mb_distortion = i4_sad;
773
774 return;
775 }
776 pu1_src += u4_src_offset;
777 pu1_est += u4_est_offset;
778 }
779
780 *pi4_mb_distortion = i4_sad;
781 return;
782
783}
784
785/**
786******************************************************************************
787*
788* @brief computes distortion (SAD) between 2 16x16 blocks
789*
790* @par Description
791* This functions computes SAD between 2 16x16 blocks. There is a provision
792* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
793* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
794*
795* @param[in] pu1_src
796* UWORD8 pointer to the source
797*
798* @param[out] pu1_dst
799* UWORD8 pointer to the destination
800*
801* @param[in] src_strd
802* integer source stride
803*
804* @param[in] dst_strd
805* integer destination stride
806*
807* @param[in] i4_max_sad
808* integer maximum allowed distortion
809*
810* @param[out] pi4_mb_distortion
811* integer evaluated sad
812*
813* @remarks
814*
815******************************************************************************
816*/
817void ime_compute_sad_16x16_ea8(UWORD8 *pu1_src,
818 UWORD8 *pu1_est,
819 WORD32 src_strd,
820 WORD32 est_strd,
821 WORD32 i4_max_sad,
822 WORD32 *pi4_mb_distortion)
823{
824 WORD32 i4_sad = 0;
825 UWORD32 u4_src_offset = src_strd - 16;
826 UWORD32 u4_est_offset = est_strd - 16;
827 UWORD32 i, j;
828 WORD16 temp;
829 UWORD8 *pu1_src_temp = pu1_src + src_strd;
830 UWORD8 *pu1_est_temp = pu1_est + est_strd;
831
832 for(i = 16; i > 0; i -= 2)
833 {
834 for(j = 16; j > 0; j--)
835 {
836 /* SAD */
837 temp = *pu1_src++ - *pu1_est++;
838 i4_sad += ABS(temp);
839 }
840
841 pu1_src += (u4_src_offset + src_strd);
842 pu1_est += (u4_est_offset + est_strd);
843
844 }
845
846 /* early exit */
847 if(i4_max_sad < i4_sad)
848 {
849 *pi4_mb_distortion = i4_sad;
850 return;
851 }
852
853 pu1_src = pu1_src_temp;
854 pu1_est = pu1_est_temp;
855
856 for(i = 16; i > 0; i -= 2)
857 {
858 for(j = 16; j > 0; j--)
859 {
860 /* SAD */
861 temp = *pu1_src++ - *pu1_est++;
862 i4_sad += ABS(temp);
863 }
864
865 pu1_src += u4_src_offset + src_strd;
866 pu1_est += u4_est_offset + est_strd;
867 }
868
869 *pi4_mb_distortion = i4_sad;
870 return;
871}
872
873
874/**
875*******************************************************************************
876*
877* @brief This function computes SAD between two 16x16 blocks
878* It also computes if the block will be zero after H264 transform and quant for
879* Intra 16x16 blocks
880*
881* @param[in] pu1_src
882* UWORD8 pointer to the source
883*
884* @param[out] pu1_dst
885* UWORD8 pointer to the destination
886*
887* @param[in] src_strd
888* integer source stride
889*
890* @param[in] dst_strd
891* integer destination stride
892*
893* @param[in] pu2_thrsh
894* Threshold for each element of transofrmed quantized block
895*
896* @param[out] pi4_mb_distortion
897* integer evaluated sad
898*
899* @param[out] pu4_is_zero
900* Poitner to store if the block is zero after transform and quantization
901*
902* @remarks
903*
904******************************************************************************
905*/
906void ime_compute_satqd_16x16_lumainter(UWORD8 *pu1_src,
907 UWORD8 *pu1_est,
908 WORD32 src_strd,
909 WORD32 est_strd,
910 UWORD16 *pu2_thrsh,
911 WORD32 *pi4_mb_distortion,
912 UWORD32 *pu4_is_non_zero)
913{
914 UWORD32 i,j;
915 WORD16 s1,s2,s3,s4,sad_1,sad_2,ls1,ls2,ls3,ls4,ls5,ls6,ls7,ls8;
916 UWORD8 *pu1_src_lp,*pu1_est_lp;
917 UWORD32 sad = 0;
918
919 (*pi4_mb_distortion) = 0;
920 for(i=0;i<4;i++)
921 {
922 for(j=0;j<4;j++)
923 {
924 pu1_src_lp = pu1_src + 4*j;
925 pu1_est_lp = pu1_est + 4*j;
926
927 s1 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
928 s4 = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
929
930 pu1_src_lp += src_strd;
931 pu1_est_lp += est_strd;
932
933 s2 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
934 s3 = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
935
936 pu1_src_lp += src_strd;
937 pu1_est_lp += est_strd;
938
939 s2 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
940 s3 += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
941
942 pu1_src_lp += src_strd;
943 pu1_est_lp += est_strd;
944
945 s1 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
946 s4 += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
947
948 sad_1 = s1+s2+s3+s4;
949
950 if(sad == 0)
951 {
952 sad_2 = sad_1<<1;
953
954 ls1 = sad_2 -(s2 + s3);
955 ls2 = sad_2 -(s1 + s4);
956 ls3 = sad_2 -(s3 + s4);
957 ls4 = sad_2 -(s3 - (s1<<1));
958 ls5 = sad_2 -(s4 - (s2<<1));
959 ls6 = sad_2 -(s1 + s2);
960 ls7 = sad_2 -(s2 - (s4<<1));
961 ls8 = sad_2 -(s1 - (s3<<1));
962
963 if(
964 pu2_thrsh[8] <= sad_1 ||
965 pu2_thrsh[0] <= ls2 ||
966 pu2_thrsh[1] <= ls1 ||
967 pu2_thrsh[2] <= ls8 ||
968 pu2_thrsh[3] <= ls5 ||
969
970 pu2_thrsh[4] <= ls6 ||
971 pu2_thrsh[5] <= ls3 ||
972 pu2_thrsh[6] <= ls7 ||
973 pu2_thrsh[7] <= ls4
974
975 )sad = 1;
976 }
977 (*pi4_mb_distortion) += sad_1;
978 }
979 pu1_src += (src_strd *4);
980 pu1_est += (est_strd *4);
981 }
982 *pu4_is_non_zero = sad;
983}
984
985
986/**
987******************************************************************************
988*
989* @brief computes distortion (SAD and SAQTD) between 2 16x8 (interleaved) chroma blocks
990*
991*
992* @par Description
993* This functions computes SAD between2 16x8 chroma blocks(interleaved)
994* It also checks if the SATDD(Sum of absolute transformed wuqntized differnce beteern the blocks
995* If SAQTD is zero, it gives back zero
996* Other wise sad is retrned
997* There is no provison for early exit
998*
999* The transform done here is the transform for chroma blocks in H264
1000*
1001* @param[in] pu1_src
1002* UWORD8 pointer to the source
1003*
1004* @param[out] pu1_dst
1005* UWORD8 pointer to the destination
1006*
1007* @param[in] src_strd
1008* integer source stride
1009*
1010* @param[in] dst_strd
1011* integer destination stride
1012*
1013* @param[in] pu2_thrsh
1014* Threshold for each element of transofrmed quantized block
1015*
1016* @param[out] pi4_mb_distortion
1017* integer evaluated sad
1018*
1019* @remarks
1020* Fucntion code is nit updated.
1021* Will require debugging and minor modifications
1022*
1023******************************************************************************
1024*/
1025void ime_compute_satqd_8x16_chroma(UWORD8 *pu1_src,
1026 UWORD8 *pu1_est,
1027 WORD32 src_strd,
1028 WORD32 est_strd,
1029 WORD32 max_sad,
1030 UWORD16 *thrsh)
1031{
1032 WORD32 i,j,plane;
1033 WORD16 s1,s2,s3,s4,sad_1,sad_2,ls1,ls2,ls3,ls4,ls5,ls6,ls7,ls8;
1034 UWORD8 *pu1_src_lp,*pu1_est_lp,*pu1_src_plane,*pu1_est_plane;
1035 WORD32 sad =0;
1036 UNUSED(max_sad);
1037
1038 pu1_src_plane = pu1_src;
1039 pu1_est_plane = pu1_est;
1040
1041 for(plane =0;plane<2;plane++)
1042 {
1043 for(i=0;i<4;i++)
1044 {
1045 for(j=0;j<4;j++)
1046 {
1047 pu1_src_lp = pu1_src + 8*j;
1048 pu1_est_lp = pu1_est + 8*j;
1049
1050 s1 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
1051 s4 = ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
1052
1053 pu1_src_lp += src_strd;
1054 pu1_est_lp += est_strd;
1055
1056 s2 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
1057 s3 = ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
1058
1059 pu1_src_lp += src_strd;
1060 pu1_est_lp += est_strd;
1061
1062 s2 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
1063 s3 += ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
1064
1065 pu1_src_lp += src_strd;
1066 pu1_est_lp += est_strd;
1067
1068 s1 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
1069 s4 += ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
1070
1071 sad_1 = s1+s2+s3+s4;
1072 sad_2 = sad_1<<1;
1073
1074 ls1 = sad_2 -(s2 + s3);
1075 ls2 = sad_2 -(s1 + s4);
1076 ls3 = sad_2 -(s3 + s4);
1077 ls4 = sad_2 -(s3 - (s1<<1));
1078 ls5 = sad_2 -(s4 - (s2<<1));
1079 ls6 = sad_2 -(s1 + s2);
1080 ls7 = sad_2 -(s2 - (s4<<1));
1081 ls8 = sad_2 -(s1 - (s3<<1));
1082
1083 if(
1084 //thrsh[0] > sad_1 && Chroma Dc is checked later
1085 thrsh[1] > ls1 &&
1086 thrsh[2] > sad_1 &&
1087 thrsh[3] > ls2 &&
1088
1089 thrsh[4] > ls3 &&
1090 thrsh[5] > ls4 &&
1091 thrsh[6] > ls3 &&
1092 thrsh[7] > ls5 &&
1093
1094 thrsh[8] > sad_1 &&
1095 thrsh[9] > ls1 &&
1096 thrsh[10]> sad_1 &&
1097 thrsh[11]> ls2 &&
1098
1099 thrsh[12]> ls6 &&
1100 thrsh[13]> ls7 &&
1101 thrsh[14]> ls6 &&
1102 thrsh[15]> ls8
1103 )
1104 {
1105 /*set current sad to be zero*/
1106 }
1107 else
1108 return ;
1109
1110 sad += sad_1;
1111 }
1112 pu1_src += (src_strd *4);
1113 pu1_est += (est_strd *4);
1114 }
1115 if(sad < (thrsh[0]<<1))sad = 0;
1116 else return ;
1117
1118 pu1_src = pu1_src_plane+1;
1119 pu1_est = pu1_est_plane+1;
1120 }
1121 return ;
1122}
1123
1124
1125/**
1126******************************************************************************
1127*
1128* @brief computes distortion (SAD and SAQTD) between 2 16x16 blocks
1129*
1130* @par Description
1131* This functions computes SAD between 2 16x16 blocks.
1132* It also checks if the SATDD(Sum of absolute transformed wuqntized differnce beteern the blocks
1133* If SAQTD is zero, it gives back zero
1134* Other wise sad is retrned
1135* There is no provison for early exit
1136*
1137* The transform done here is the transform for inter 16x16 blocks in H264
1138*
1139* @param[in] pu1_src
1140* UWORD8 pointer to the source
1141*
1142* @param[out] pu1_dst
1143* UWORD8 pointer to the destination
1144*
1145* @param[in] src_strd
1146* integer source stride
1147*
1148* @param[in] dst_strd
1149* integer destination stride
1150*
1151* @param[in] pu2_thrsh
1152* Threshold for each element of transofrmed quantized block
1153*
1154* @param[out] pi4_mb_distortion
1155* integer evaluated sad
1156*
1157* @remarks
1158*
1159******************************************************************************
1160*/
1161void ime_compute_satqd_16x16_lumaintra(UWORD8 *pu1_src,
1162 UWORD8 *pu1_est,
1163 WORD32 src_strd,
1164 WORD32 est_strd,
1165 WORD32 max_sad,
1166 UWORD16 *thrsh,
1167 WORD32 *pi4_mb_distortion,
1168 UWORD8 *sig_nz_sad)
1169{
1170 UWORD32 i,j;
1171 WORD16 s1[4],s2[4],s3[4],s4[4],sad[4];
1172 UWORD8 *pu1_src_lp,*pu1_est_lp;
1173 UWORD8 *sig_sad_dc;
1174 UWORD32 nz_sad_sig = 0;
1175 UNUSED(max_sad);
1176 *pi4_mb_distortion =0;
1177
1178 sig_sad_dc = sig_nz_sad;
1179 sig_nz_sad++;
1180
1181 for(i=0;i<4;i++)
1182 {
1183 for(j=0;j<4;j++)
1184 {
1185 pu1_src_lp = pu1_src + 4*j;
1186 pu1_est_lp = pu1_est + 4*j;
1187
1188 s1[j] = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
1189 s4[j] = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
1190
1191 pu1_src_lp += src_strd;
1192 pu1_est_lp += est_strd;
1193
1194 s2[j] = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
1195 s3[j] = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
1196
1197 pu1_src_lp += src_strd;
1198 pu1_est_lp += est_strd;
1199
1200 s2[j] += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
1201 s3[j] += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
1202
1203 pu1_src_lp += src_strd;
1204 pu1_est_lp += est_strd;
1205
1206 s1[j] += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
1207 s4[j] += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
1208
1209 sad[j] = ((s1[j]+s2[j]+s3[j]+s4[j])<<1);
1210 }
1211
1212 for(j=0;j<4;j++)
1213 {
1214
1215 if(
1216 //thrsh[0] > (sad[j] >> 1) &&Dc goes in the other part
1217 thrsh[1] > (sad[j] -(s2[j] + s3[j])) &&
1218 thrsh[2] > (sad[j]>>1) &&
1219 thrsh[3] > (sad[j] -(s1[j] + s4[j])) &&
1220
1221 thrsh[4] > (sad[j] -(s3[j] + s4[j])) &&
1222 thrsh[5] > (sad[j] -(s3[j] - (s1[j]<<1))) &&
1223 thrsh[6] > (sad[j] -(s3[j] + s4[j])) &&
1224 thrsh[7] > (sad[j] -(s4[j] - (s2[j]<<1))) &&
1225
1226 thrsh[8] > (sad[j]>>1) &&
1227 thrsh[9] > (sad[j] -(s2[j] + s3[j])) &&
1228 thrsh[10]> (sad[j]>>1) &&
1229 thrsh[11]> (sad[j] -(s1[j] + s4[j])) &&
1230
1231 thrsh[12]> (sad[j] -(s1[j] + s2[j])) &&
1232 thrsh[13]> (sad[j] -(s2[j] - (s4[j]<<1))) &&
1233 thrsh[14]> (sad[j] -(s1[j] + s2[j])) &&
1234 thrsh[15]> (sad[j] -(s1[j] - (s3[j]<<1)))
1235 )
1236 {
1237 //sad[j] = 0; /*set current sad to be zero*/
1238 sig_nz_sad[j] = 0;/*Signal that the sad is zero*/
1239 }
1240 else
1241 {
1242 sig_nz_sad[j] = 1;/*signal that sad is non zero*/
1243 nz_sad_sig = 1;
1244 }
1245
1246 (*pi4_mb_distortion) += (sad[j]>>1);
1247 //if((*pi4_mb_distortion) >= max_sad)return; /*return or some thing*/
1248 }
1249
1250 sig_nz_sad += 4;
1251 pu1_src += (src_strd *4);
1252 pu1_est += (est_strd *4);
1253 }
1254
1255 if((*pi4_mb_distortion) < thrsh[0]<<2)
1256 {
1257 *sig_sad_dc = 0;
1258 if(nz_sad_sig == 0)(*pi4_mb_distortion) = 0;
1259 }
1260 else *sig_sad_dc = 1;
1261}
1262
Harinarayanan K K134291e2015-06-18 16:03:38 +05301263