blob: 08e4f669483b0a6c66931cdd916c781d45bec7a9 [file] [log] [blame]
epoger@google.comec3ed6a2011-07-28 14:26:00 +00001
senorblanco@chromium.org4e753552009-11-16 21:09:00 +00002/*
epoger@google.comec3ed6a2011-07-28 14:26:00 +00003 * Copyright 2009 The Android Open Source Project
4 *
5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file.
senorblanco@chromium.org4e753552009-11-16 21:09:00 +00007 */
8
epoger@google.comec3ed6a2011-07-28 14:26:00 +00009
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000010#include <emmintrin.h>
11#include "SkUtils_opts_SSE2.h"
rmistry@google.comfbfcd562012-08-23 18:09:54 +000012
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000013void sk_memset16_SSE2(uint16_t *dst, uint16_t value, int count)
14{
15 SkASSERT(dst != NULL && count >= 0);
16
17 // dst must be 2-byte aligned.
18 SkASSERT((((size_t) dst) & 0x01) == 0);
19
20 if (count >= 32) {
21 while (((size_t)dst) & 0x0F) {
22 *dst++ = value;
23 --count;
24 }
25 __m128i *d = reinterpret_cast<__m128i*>(dst);
26 __m128i value_wide = _mm_set1_epi16(value);
27 while (count >= 32) {
28 _mm_store_si128(d++, value_wide);
29 _mm_store_si128(d++, value_wide);
30 _mm_store_si128(d++, value_wide);
31 _mm_store_si128(d++, value_wide);
32 count -= 32;
33 }
34 dst = reinterpret_cast<uint16_t*>(d);
35 }
36 while (count > 0) {
37 *dst++ = value;
38 --count;
39 }
40}
rmistry@google.comfbfcd562012-08-23 18:09:54 +000041
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000042void sk_memset32_SSE2(uint32_t *dst, uint32_t value, int count)
43{
44 SkASSERT(dst != NULL && count >= 0);
45
46 // dst must be 4-byte aligned.
47 SkASSERT((((size_t) dst) & 0x03) == 0);
48
49 if (count >= 16) {
50 while (((size_t)dst) & 0x0F) {
51 *dst++ = value;
52 --count;
53 }
54 __m128i *d = reinterpret_cast<__m128i*>(dst);
55 __m128i value_wide = _mm_set1_epi32(value);
56 while (count >= 16) {
57 _mm_store_si128(d++, value_wide);
58 _mm_store_si128(d++, value_wide);
59 _mm_store_si128(d++, value_wide);
60 _mm_store_si128(d++, value_wide);
61 count -= 16;
62 }
63 dst = reinterpret_cast<uint32_t*>(d);
64 }
65 while (count > 0) {
66 *dst++ = value;
67 --count;
68 }
69}