blob: f40443e2907015abf13e36b52690f4278fc71473 [file] [log] [blame]
Shri Borde7cd81492011-11-02 13:20:24 -07001/*
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -08002 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
Shri Borde7cd81492011-11-02 13:20:24 -07003 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
Hangyu Kuangf047e7c2016-07-06 14:21:45 -07007 * in the file PATENTS. All contributing project authors may
Shri Borde7cd81492011-11-02 13:20:24 -07008 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Shri Borde7cd81492011-11-02 13:20:24 -070011#include <stdlib.h>
12#include <time.h>
13
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -080014#include "libyuv/cpu_id.h"
15#include "libyuv/scale.h"
16#include "../unit_test/unit_test.h"
Shri Borde7cd81492011-11-02 13:20:24 -070017
Hangyu Kuangf047e7c2016-07-06 14:21:45 -070018#define STRINGIZE(line) #line
19#define FILELINESTR(file, line) file ":" STRINGIZE(line)
20
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -080021namespace libyuv {
Shri Borde7cd81492011-11-02 13:20:24 -070022
Hangyu Kuangf047e7c2016-07-06 14:21:45 -070023// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -080024static int TestFilter(int src_width, int src_height,
25 int dst_width, int dst_height,
Hangyu Kuangf047e7c2016-07-06 14:21:45 -070026 FilterMode f, int benchmark_iterations,
27 int disable_cpu_flags, int benchmark_cpu_info) {
28 if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
29 return 0;
30 }
Shri Borde7cd81492011-11-02 13:20:24 -070031
Hangyu Kuangf047e7c2016-07-06 14:21:45 -070032 int i, j;
33 const int b = 0; // 128 to test for padding/stride.
34 int src_width_uv = (Abs(src_width) + 1) >> 1;
35 int src_height_uv = (Abs(src_height) + 1) >> 1;
Shri Borde7cd81492011-11-02 13:20:24 -070036
Hangyu Kuangf047e7c2016-07-06 14:21:45 -070037 int64 src_y_plane_size = (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2);
38 int64 src_uv_plane_size = (src_width_uv + b * 2) * (src_height_uv + b * 2);
39
40 int src_stride_y = b * 2 + Abs(src_width);
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -080041 int src_stride_uv = b * 2 + src_width_uv;
Shri Borde7cd81492011-11-02 13:20:24 -070042
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -080043 align_buffer_page_end(src_y, src_y_plane_size)
44 align_buffer_page_end(src_u, src_uv_plane_size)
45 align_buffer_page_end(src_v, src_uv_plane_size)
Hangyu Kuangf047e7c2016-07-06 14:21:45 -070046 if (!src_y || !src_u || !src_v) {
47 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
48 return 0;
49 }
50 MemRandomize(src_y, src_y_plane_size);
51 MemRandomize(src_u, src_uv_plane_size);
52 MemRandomize(src_v, src_uv_plane_size);
Shri Borde7cd81492011-11-02 13:20:24 -070053
Hangyu Kuangf047e7c2016-07-06 14:21:45 -070054 int dst_width_uv = (dst_width + 1) >> 1;
55 int dst_height_uv = (dst_height + 1) >> 1;
Shri Borde7cd81492011-11-02 13:20:24 -070056
Hangyu Kuangf047e7c2016-07-06 14:21:45 -070057 int64 dst_y_plane_size = (dst_width + b * 2) * (dst_height + b * 2);
58 int64 dst_uv_plane_size = (dst_width_uv + b * 2) * (dst_height_uv + b * 2);
Shri Borde7cd81492011-11-02 13:20:24 -070059
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -080060 int dst_stride_y = b * 2 + dst_width;
61 int dst_stride_uv = b * 2 + dst_width_uv;
Shri Borde7cd81492011-11-02 13:20:24 -070062
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -080063 align_buffer_page_end(dst_y_c, dst_y_plane_size)
64 align_buffer_page_end(dst_u_c, dst_uv_plane_size)
65 align_buffer_page_end(dst_v_c, dst_uv_plane_size)
66 align_buffer_page_end(dst_y_opt, dst_y_plane_size)
67 align_buffer_page_end(dst_u_opt, dst_uv_plane_size)
68 align_buffer_page_end(dst_v_opt, dst_uv_plane_size)
Hangyu Kuangf047e7c2016-07-06 14:21:45 -070069 if (!dst_y_c || !dst_u_c || !dst_v_c ||
70 !dst_y_opt|| !dst_u_opt|| !dst_v_opt) {
71 printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
72 return 0;
73 }
Shri Borde7cd81492011-11-02 13:20:24 -070074
Hangyu Kuangf047e7c2016-07-06 14:21:45 -070075 MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
76 double c_time = get_time();
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -080077 I420Scale(src_y + (src_stride_y * b) + b, src_stride_y,
78 src_u + (src_stride_uv * b) + b, src_stride_uv,
79 src_v + (src_stride_uv * b) + b, src_stride_uv,
80 src_width, src_height,
81 dst_y_c + (dst_stride_y * b) + b, dst_stride_y,
82 dst_u_c + (dst_stride_uv * b) + b, dst_stride_uv,
83 dst_v_c + (dst_stride_uv * b) + b, dst_stride_uv,
84 dst_width, dst_height, f);
Hangyu Kuangf047e7c2016-07-06 14:21:45 -070085 c_time = (get_time() - c_time);
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -080086
Hangyu Kuangf047e7c2016-07-06 14:21:45 -070087 MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -080088 double opt_time = get_time();
89 for (i = 0; i < benchmark_iterations; ++i) {
90 I420Scale(src_y + (src_stride_y * b) + b, src_stride_y,
91 src_u + (src_stride_uv * b) + b, src_stride_uv,
92 src_v + (src_stride_uv * b) + b, src_stride_uv,
93 src_width, src_height,
94 dst_y_opt + (dst_stride_y * b) + b, dst_stride_y,
95 dst_u_opt + (dst_stride_uv * b) + b, dst_stride_uv,
96 dst_v_opt + (dst_stride_uv * b) + b, dst_stride_uv,
97 dst_width, dst_height, f);
98 }
99 opt_time = (get_time() - opt_time) / benchmark_iterations;
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -0800100 // Report performance of C vs OPT
101 printf("filter %d - %8d us C - %8d us OPT\n",
Hangyu Kuangf047e7c2016-07-06 14:21:45 -0700102 f,
103 static_cast<int>(c_time * 1e6),
104 static_cast<int>(opt_time * 1e6));
Shri Borde7cd81492011-11-02 13:20:24 -0700105
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -0800106 // C version may be a little off from the optimized. Order of
107 // operations may introduce rounding somewhere. So do a difference
108 // of the buffers and look to see that the max difference isn't
109 // over 2.
110 int max_diff = 0;
111 for (i = b; i < (dst_height + b); ++i) {
112 for (j = b; j < (dst_width + b); ++j) {
Hangyu Kuangf047e7c2016-07-06 14:21:45 -0700113 int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -0800114 dst_y_opt[(i * dst_stride_y) + j]);
115 if (abs_diff > max_diff) {
116 max_diff = abs_diff;
Shri Borde7cd81492011-11-02 13:20:24 -0700117 }
118 }
119 }
120
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -0800121 for (i = b; i < (dst_height_uv + b); ++i) {
122 for (j = b; j < (dst_width_uv + b); ++j) {
Hangyu Kuangf047e7c2016-07-06 14:21:45 -0700123 int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -0800124 dst_u_opt[(i * dst_stride_uv) + j]);
125 if (abs_diff > max_diff) {
126 max_diff = abs_diff;
127 }
Hangyu Kuangf047e7c2016-07-06 14:21:45 -0700128 abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -0800129 dst_v_opt[(i * dst_stride_uv) + j]);
130 if (abs_diff > max_diff) {
131 max_diff = abs_diff;
132 }
133 }
134 }
Shri Borde7cd81492011-11-02 13:20:24 -0700135
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -0800136 free_aligned_buffer_page_end(dst_y_c)
137 free_aligned_buffer_page_end(dst_u_c)
138 free_aligned_buffer_page_end(dst_v_c)
139 free_aligned_buffer_page_end(dst_y_opt)
140 free_aligned_buffer_page_end(dst_u_opt)
141 free_aligned_buffer_page_end(dst_v_opt)
142
143 free_aligned_buffer_page_end(src_y)
144 free_aligned_buffer_page_end(src_u)
145 free_aligned_buffer_page_end(src_v)
146
147 return max_diff;
Shri Borde7cd81492011-11-02 13:20:24 -0700148}
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -0800149
Hangyu Kuangf047e7c2016-07-06 14:21:45 -0700150// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
151// 0 = exact.
152static int TestFilter_16(int src_width, int src_height,
153 int dst_width, int dst_height,
154 FilterMode f, int benchmark_iterations) {
155 if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
156 return 0;
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -0800157 }
Hangyu Kuangf047e7c2016-07-06 14:21:45 -0700158
159 int i, j;
160 const int b = 0; // 128 to test for padding/stride.
161 int src_width_uv = (Abs(src_width) + 1) >> 1;
162 int src_height_uv = (Abs(src_height) + 1) >> 1;
163
164 int64 src_y_plane_size = (Abs(src_width) + b * 2) *
165 (Abs(src_height) + b * 2);
166 int64 src_uv_plane_size = (src_width_uv + b * 2) * (src_height_uv + b * 2);
167
168 int src_stride_y = b * 2 + Abs(src_width);
169 int src_stride_uv = b * 2 + src_width_uv;
170
171 align_buffer_page_end(src_y, src_y_plane_size)
172 align_buffer_page_end(src_u, src_uv_plane_size)
173 align_buffer_page_end(src_v, src_uv_plane_size)
174 align_buffer_page_end(src_y_16, src_y_plane_size * 2)
175 align_buffer_page_end(src_u_16, src_uv_plane_size * 2)
176 align_buffer_page_end(src_v_16, src_uv_plane_size * 2)
177 uint16* p_src_y_16 = reinterpret_cast<uint16*>(src_y_16);
178 uint16* p_src_u_16 = reinterpret_cast<uint16*>(src_u_16);
179 uint16* p_src_v_16 = reinterpret_cast<uint16*>(src_v_16);
180
181 MemRandomize(src_y, src_y_plane_size);
182 MemRandomize(src_u, src_uv_plane_size);
183 MemRandomize(src_v, src_uv_plane_size);
184
185 for (i = b; i < src_height + b; ++i) {
186 for (j = b; j < src_width + b; ++j) {
187 p_src_y_16[(i * src_stride_y) + j] = src_y[(i * src_stride_y) + j];
188 }
189 }
190
191 for (i = b; i < (src_height_uv + b); ++i) {
192 for (j = b; j < (src_width_uv + b); ++j) {
193 p_src_u_16[(i * src_stride_uv) + j] = src_u[(i * src_stride_uv) + j];
194 p_src_v_16[(i * src_stride_uv) + j] = src_v[(i * src_stride_uv) + j];
195 }
196 }
197
198 int dst_width_uv = (dst_width + 1) >> 1;
199 int dst_height_uv = (dst_height + 1) >> 1;
200
201 int dst_y_plane_size = (dst_width + b * 2) * (dst_height + b * 2);
202 int dst_uv_plane_size = (dst_width_uv + b * 2) * (dst_height_uv + b * 2);
203
204 int dst_stride_y = b * 2 + dst_width;
205 int dst_stride_uv = b * 2 + dst_width_uv;
206
207 align_buffer_page_end(dst_y_8, dst_y_plane_size)
208 align_buffer_page_end(dst_u_8, dst_uv_plane_size)
209 align_buffer_page_end(dst_v_8, dst_uv_plane_size)
210 align_buffer_page_end(dst_y_16, dst_y_plane_size * 2)
211 align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2)
212 align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2)
213
214 uint16* p_dst_y_16 = reinterpret_cast<uint16*>(dst_y_16);
215 uint16* p_dst_u_16 = reinterpret_cast<uint16*>(dst_u_16);
216 uint16* p_dst_v_16 = reinterpret_cast<uint16*>(dst_v_16);
217
218 I420Scale(src_y + (src_stride_y * b) + b, src_stride_y,
219 src_u + (src_stride_uv * b) + b, src_stride_uv,
220 src_v + (src_stride_uv * b) + b, src_stride_uv,
221 src_width, src_height,
222 dst_y_8 + (dst_stride_y * b) + b, dst_stride_y,
223 dst_u_8 + (dst_stride_uv * b) + b, dst_stride_uv,
224 dst_v_8 + (dst_stride_uv * b) + b, dst_stride_uv,
225 dst_width, dst_height, f);
226
227 for (i = 0; i < benchmark_iterations; ++i) {
228 I420Scale_16(p_src_y_16 + (src_stride_y * b) + b, src_stride_y,
229 p_src_u_16 + (src_stride_uv * b) + b, src_stride_uv,
230 p_src_v_16 + (src_stride_uv * b) + b, src_stride_uv,
231 src_width, src_height,
232 p_dst_y_16 + (dst_stride_y * b) + b, dst_stride_y,
233 p_dst_u_16 + (dst_stride_uv * b) + b, dst_stride_uv,
234 p_dst_v_16 + (dst_stride_uv * b) + b, dst_stride_uv,
235 dst_width, dst_height, f);
236 }
237
238 // Expect an exact match
239 int max_diff = 0;
240 for (i = b; i < (dst_height + b); ++i) {
241 for (j = b; j < (dst_width + b); ++j) {
242 int abs_diff = Abs(dst_y_8[(i * dst_stride_y) + j] -
243 p_dst_y_16[(i * dst_stride_y) + j]);
244 if (abs_diff > max_diff) {
245 max_diff = abs_diff;
246 }
247 }
248 }
249
250 for (i = b; i < (dst_height_uv + b); ++i) {
251 for (j = b; j < (dst_width_uv + b); ++j) {
252 int abs_diff = Abs(dst_u_8[(i * dst_stride_uv) + j] -
253 p_dst_u_16[(i * dst_stride_uv) + j]);
254 if (abs_diff > max_diff) {
255 max_diff = abs_diff;
256 }
257 abs_diff = Abs(dst_v_8[(i * dst_stride_uv) + j] -
258 p_dst_v_16[(i * dst_stride_uv) + j]);
259 if (abs_diff > max_diff) {
260 max_diff = abs_diff;
261 }
262 }
263 }
264
265 free_aligned_buffer_page_end(dst_y_8)
266 free_aligned_buffer_page_end(dst_u_8)
267 free_aligned_buffer_page_end(dst_v_8)
268 free_aligned_buffer_page_end(dst_y_16)
269 free_aligned_buffer_page_end(dst_u_16)
270 free_aligned_buffer_page_end(dst_v_16)
271
272 free_aligned_buffer_page_end(src_y)
273 free_aligned_buffer_page_end(src_u)
274 free_aligned_buffer_page_end(src_v)
275 free_aligned_buffer_page_end(src_y_16)
276 free_aligned_buffer_page_end(src_u_16)
277 free_aligned_buffer_page_end(src_v_16)
278
279 return max_diff;
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -0800280}
281
Hangyu Kuangf047e7c2016-07-06 14:21:45 -0700282// The following adjustments in dimensions ensure the scale factor will be
283// exactly achieved.
284// 2 is chroma subsample
285#define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
286#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -0800287
Hangyu Kuangf047e7c2016-07-06 14:21:45 -0700288#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
289 TEST_F(LibYUVScaleTest, ScaleDownBy##name##_##filter) { \
290 int diff = TestFilter(SX(benchmark_width_, nom, denom), \
291 SX(benchmark_height_, nom, denom), \
292 DX(benchmark_width_, nom, denom), \
293 DX(benchmark_height_, nom, denom), \
294 kFilter##filter, benchmark_iterations_, \
295 disable_cpu_flags_, benchmark_cpu_info_); \
296 EXPECT_LE(diff, max_diff); \
297 } \
298 TEST_F(LibYUVScaleTest, DISABLED_ScaleDownBy##name##_##filter##_16) { \
299 int diff = TestFilter_16(SX(benchmark_width_, nom, denom), \
300 SX(benchmark_height_, nom, denom), \
301 DX(benchmark_width_, nom, denom), \
302 DX(benchmark_height_, nom, denom), \
303 kFilter##filter, benchmark_iterations_); \
304 EXPECT_LE(diff, max_diff); \
305 }
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -0800306
Hangyu Kuangf047e7c2016-07-06 14:21:45 -0700307// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
308// filtering is different fixed point implementations for SSSE3, Neon and C.
309#define TEST_FACTOR(name, nom, denom, boxdiff) \
310 TEST_FACTOR1(name, None, nom, denom, 0) \
311 TEST_FACTOR1(name, Linear, nom, denom, 3) \
312 TEST_FACTOR1(name, Bilinear, nom, denom, 3) \
313 TEST_FACTOR1(name, Box, nom, denom, boxdiff)
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -0800314
Hangyu Kuangf047e7c2016-07-06 14:21:45 -0700315TEST_FACTOR(2, 1, 2, 0)
316TEST_FACTOR(4, 1, 4, 0)
317TEST_FACTOR(8, 1, 8, 0)
318TEST_FACTOR(3by4, 3, 4, 1)
319TEST_FACTOR(3by8, 3, 8, 1)
320TEST_FACTOR(3, 1, 3, 0)
321#undef TEST_FACTOR1
322#undef TEST_FACTOR
323#undef SX
324#undef DX
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -0800325
Hangyu Kuangf047e7c2016-07-06 14:21:45 -0700326#define TEST_SCALETO1(name, width, height, filter, max_diff) \
327 TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \
328 int diff = TestFilter(benchmark_width_, benchmark_height_, \
329 width, height, \
330 kFilter##filter, benchmark_iterations_, \
331 disable_cpu_flags_, benchmark_cpu_info_); \
332 EXPECT_LE(diff, max_diff); \
333 } \
334 TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \
335 int diff = TestFilter(width, height, \
336 Abs(benchmark_width_), Abs(benchmark_height_), \
337 kFilter##filter, benchmark_iterations_, \
338 disable_cpu_flags_, benchmark_cpu_info_); \
339 EXPECT_LE(diff, max_diff); \
340 } \
341 TEST_F(LibYUVScaleTest, \
342 DISABLED_##name##To##width##x##height##_##filter##_16) { \
343 int diff = TestFilter_16(benchmark_width_, benchmark_height_, \
344 width, height, \
345 kFilter##filter, benchmark_iterations_); \
346 EXPECT_LE(diff, max_diff); \
347 } \
348 TEST_F(LibYUVScaleTest, \
349 DISABLED_##name##From##width##x##height##_##filter##_16) { \
350 int diff = TestFilter_16(width, height, \
351 Abs(benchmark_width_), Abs(benchmark_height_), \
352 kFilter##filter, benchmark_iterations_); \
353 EXPECT_LE(diff, max_diff); \
354 }
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -0800355
Hangyu Kuangf047e7c2016-07-06 14:21:45 -0700356// Test scale to a specified size with all 4 filters.
357#define TEST_SCALETO(name, width, height) \
358 TEST_SCALETO1(name, width, height, None, 0) \
359 TEST_SCALETO1(name, width, height, Linear, 0) \
360 TEST_SCALETO1(name, width, height, Bilinear, 0) \
361 TEST_SCALETO1(name, width, height, Box, 0)
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -0800362
Hangyu Kuangf047e7c2016-07-06 14:21:45 -0700363TEST_SCALETO(Scale, 1, 1)
364TEST_SCALETO(Scale, 320, 240)
365TEST_SCALETO(Scale, 352, 288)
366TEST_SCALETO(Scale, 569, 480)
367TEST_SCALETO(Scale, 640, 360)
368TEST_SCALETO(Scale, 1280, 720)
369#undef TEST_SCALETO1
370#undef TEST_SCALETO
Hendrik Dahlkamp33cfdeb2013-01-23 18:27:37 -0800371
372} // namespace libyuv