blob: b0761f1d33207a5fa3dc3b9c745859f50f6caf63 [file] [log] [blame]
Mike Klein7ac2be22020-11-05 09:38:53 -06001/*
2 * Copyright 2020 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "src/core/SkMSAN.h"
9#include "src/core/SkOpts.h"
10
11#if defined(__x86_64__) || defined(_M_X64) // memset16 and memset32 could work on 32-bit x86 too.
12
13 static const char* note = "MSAN can't see that rep sto initializes memory.";
14
15 #if defined(_MSC_VER)
16 #include <intrin.h>
17 static inline void repsto(uint16_t* dst, uint16_t v, size_t n) {
18 sk_msan_mark_initialized(dst,dst+n,note);
19 __stosw(dst, v, n);
20 }
21 static inline void repsto(uint32_t* dst, uint32_t v, size_t n) {
22 sk_msan_mark_initialized(dst,dst+n,note);
23 static_assert(sizeof(uint32_t) == sizeof(unsigned long));
24 __stosd(reinterpret_cast<unsigned long*>(dst), v, n);
25 }
26 static inline void repsto(uint64_t* dst, uint64_t v, size_t n) {
27 sk_msan_mark_initialized(dst,dst+n,note);
28 __stosq(dst, v, n);
29 }
30 #else
31 static inline void repsto(uint16_t* dst, uint16_t v, size_t n) {
32 sk_msan_mark_initialized(dst,dst+n,note);
33 asm volatile("rep stosw" : "+D"(dst), "+c"(n) : "a"(v) : "memory");
34 }
35 static inline void repsto(uint32_t* dst, uint32_t v, size_t n) {
36 sk_msan_mark_initialized(dst,dst+n,note);
37 asm volatile("rep stosl" : "+D"(dst), "+c"(n) : "a"(v) : "memory");
38 }
39 static inline void repsto(uint64_t* dst, uint64_t v, size_t n) {
40 sk_msan_mark_initialized(dst,dst+n,note);
41 asm volatile("rep stosq" : "+D"(dst), "+c"(n) : "a"(v) : "memory");
42 }
43 #endif
44
45 // ERMS is ideal for large copies but has a relatively high setup cost,
46 // so we use the previous best routine for small inputs. FSRM would make this moot.
47 static void (*g_memset16_prev)(uint16_t*, uint16_t, int);
48 static void (*g_memset32_prev)(uint32_t*, uint32_t, int);
49 static void (*g_memset64_prev)(uint64_t*, uint64_t, int);
50 static void (*g_rect_memset16_prev)(uint16_t*, uint16_t, int, size_t, int);
51 static void (*g_rect_memset32_prev)(uint32_t*, uint32_t, int, size_t, int);
52 static void (*g_rect_memset64_prev)(uint64_t*, uint64_t, int, size_t, int);
53
54 // Empirically determined with `nanobench -m memset`.
55 static bool small(size_t bytes) { return bytes < 1024; }
56
57 #define SK_OPTS_NS erms
58 namespace SK_OPTS_NS {
59 static inline void memset16(uint16_t* dst, uint16_t v, int n) {
60 return small(sizeof(v)*n) ? g_memset16_prev(dst, v, n)
61 : repsto(dst, v, n);
62 }
63 static inline void memset32(uint32_t* dst, uint32_t v, int n) {
64 return small(sizeof(v)*n) ? g_memset32_prev(dst, v, n)
65 : repsto(dst, v, n);
66 }
67 static inline void memset64(uint64_t* dst, uint64_t v, int n) {
68 return small(sizeof(v)*n) ? g_memset64_prev(dst, v, n)
69 : repsto(dst, v, n);
70 }
71
72 static inline void rect_memset16(uint16_t* dst, uint16_t v, int n,
73 size_t rowBytes, int height) {
74 if (small(sizeof(v)*n)) {
75 return g_rect_memset16_prev(dst,v,n, rowBytes,height);
76 }
77 for (int stride = rowBytes/sizeof(v); height --> 0; dst += stride) {
78 repsto(dst, v, n);
79 }
80 }
81 static inline void rect_memset32(uint32_t* dst, uint32_t v, int n,
82 size_t rowBytes, int height) {
83 if (small(sizeof(v)*n)) {
84 return g_rect_memset32_prev(dst,v,n, rowBytes,height);
85 }
86 for (int stride = rowBytes/sizeof(v); height --> 0; dst += stride) {
87 repsto(dst, v, n);
88 }
89 }
90 static inline void rect_memset64(uint64_t* dst, uint64_t v, int n,
91 size_t rowBytes, int height) {
92 if (small(sizeof(v)*n)) {
93 return g_rect_memset64_prev(dst,v,n, rowBytes,height);
94 }
95 for (int stride = rowBytes/sizeof(v); height --> 0; dst += stride) {
96 repsto(dst, v, n);
97 }
98 }
99 } // namespace SK_OPTS_NS
100
101 namespace SkOpts {
102 void Init_erms() {
103 g_memset16_prev = memset16;
104 g_memset32_prev = memset32;
105 g_memset64_prev = memset64;
106 g_rect_memset16_prev = rect_memset16;
107 g_rect_memset32_prev = rect_memset32;
108 g_rect_memset64_prev = rect_memset64;
109
110 memset16 = SK_OPTS_NS::memset16;
111 memset32 = SK_OPTS_NS::memset32;
112 memset64 = SK_OPTS_NS::memset64;
113 rect_memset16 = SK_OPTS_NS::rect_memset16;
114 rect_memset32 = SK_OPTS_NS::rect_memset32;
115 rect_memset64 = SK_OPTS_NS::rect_memset64;
116 }
117 }
118#else
119 namespace SkOpts {
120 void Init_erms() {}
121 }
122#endif