blob: c0507e9772e3a94b903cf32ca6e95c354863d588 [file] [log] [blame]
deanm@google.com1579ec72008-08-05 18:57:36 +09001// Copyright 2008, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14// * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// This file is an internal atomic implementation, use base/atomicops.h instead.
31
32#ifndef BASE_ATOMICOPS_INTERNALS_X86_GCC_H_
33#define BASE_ATOMICOPS_INTERNALS_X86_GCC_H_
34
35// This struct is not part of the public API of this module; clients may not
36// use it.
37// Features of this x86. Values may not be correct before main() is run,
38// but are set conservatively.
39struct AtomicOps_x86CPUFeatureStruct {
40 bool has_amd_lock_mb_bug; // Processor has AMD memory-barrier bug; do lfence
41 // after acquire compare-and-swap.
42 bool has_sse2; // Processor has SSE2.
43};
44extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures;
45
46#define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory")
47
48namespace base {
49namespace subtle {
50
51// 32-bit low-level operations on any platform.
52
53inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
54 Atomic32 old_value,
55 Atomic32 new_value) {
56 Atomic32 prev;
57 __asm__ __volatile__("lock; cmpxchgl %1,%2"
58 : "=a" (prev)
59 : "q" (new_value), "m" (*ptr), "0" (old_value)
60 : "memory");
61 return prev;
62}
63
64inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
65 Atomic32 new_value) {
66 __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg.
67 : "=r" (new_value)
68 : "m" (*ptr), "0" (new_value)
69 : "memory");
70 return new_value; // Now it's the previous value.
71}
72
73inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
74 Atomic32 increment) {
75 Atomic32 temp = increment;
76 __asm__ __volatile__("lock; xaddl %0,%1"
77 : "+r" (temp), "+m" (*ptr)
78 : : "memory");
79 // temp now holds the old value of *ptr
80 return temp + increment;
81}
82
83inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
84 Atomic32 increment) {
85 Atomic32 temp = increment;
86 __asm__ __volatile__("lock; xaddl %0,%1"
87 : "+r" (temp), "+m" (*ptr)
88 : : "memory");
89 // temp now holds the old value of *ptr
90 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
91 __asm__ __volatile__("lfence" : : : "memory");
92 }
93 return temp + increment;
94}
95
96inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
97 Atomic32 old_value,
98 Atomic32 new_value) {
99 Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
100 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
101 __asm__ __volatile__("lfence" : : : "memory");
102 }
103 return x;
104}
105
106inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
107 Atomic32 old_value,
108 Atomic32 new_value) {
109 return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
110}
111
112inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
113 *ptr = value;
114}
115
116#if defined(__x86_64__)
117
118// 64-bit implementations of memory barrier can be simpler, because it
119// "mfence" is guaranteed to exist.
120inline void MemoryBarrier() {
121 __asm__ __volatile__("mfence" : : : "memory");
122}
123
124inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
125 *ptr = value;
126 MemoryBarrier();
127}
128
129#else
130
131inline void MemoryBarrier() {
132 if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
133 __asm__ __volatile__("mfence" : : : "memory");
134 } else { // mfence is faster but not present on PIII
135 Atomic32 x = 0;
136 NoBarrier_AtomicExchange(&x, 0); // acts as a barrier on PIII
137 }
138}
139
140inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
141 if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
142 *ptr = value;
143 __asm__ __volatile__("mfence" : : : "memory");
144 } else {
145 NoBarrier_AtomicExchange(ptr, value);
146 // acts as a barrier on PIII
147 }
148}
149#endif
150
151inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
152 ATOMICOPS_COMPILER_BARRIER();
153 *ptr = value; // An x86 store acts as a release barrier.
154 // See comments in Atomic64 version of Release_Store(), below.
155}
156
157inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
158 return *ptr;
159}
160
161inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
162 Atomic32 value = *ptr; // An x86 load acts as a acquire barrier.
163 // See comments in Atomic64 version of Release_Store(), below.
164 ATOMICOPS_COMPILER_BARRIER();
165 return value;
166}
167
168inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
169 MemoryBarrier();
170 return *ptr;
171}
172
173#if defined(__x86_64__)
174
175// 64-bit low-level operations on 64-bit platform.
176
177inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
178 Atomic64 old_value,
179 Atomic64 new_value) {
180 Atomic64 prev;
181 __asm__ __volatile__("lock; cmpxchgq %1,%2"
182 : "=a" (prev)
183 : "q" (new_value), "m" (*ptr), "0" (old_value)
184 : "memory");
185 return prev;
186}
187
188inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
189 Atomic64 new_value) {
190 __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg.
191 : "=r" (new_value)
192 : "m" (*ptr), "0" (new_value)
193 : "memory");
194 return new_value; // Now it's the previous value.
195}
196
197inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
198 Atomic64 increment) {
199 Atomic64 temp = increment;
200 __asm__ __volatile__("lock; xaddq %0,%1"
201 : "+r" (temp), "+m" (*ptr)
202 : : "memory");
203 // temp now contains the previous value of *ptr
204 return temp + increment;
205}
206
207inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,
208 Atomic64 increment) {
209 Atomic64 temp = increment;
210 __asm__ __volatile__("lock; xaddq %0,%1"
211 : "+r" (temp), "+m" (*ptr)
212 : : "memory");
213 // temp now contains the previous value of *ptr
214 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
215 __asm__ __volatile__("lfence" : : : "memory");
216 }
217 return temp + increment;
218}
219
220inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
221 *ptr = value;
222}
223
224inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
225 *ptr = value;
226 MemoryBarrier();
227}
228
229inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
230 ATOMICOPS_COMPILER_BARRIER();
231
232 *ptr = value; // An x86 store acts as a release barrier
233 // for current AMD/Intel chips as of Jan 2008.
234 // See also Acquire_Load(), below.
235
236 // When new chips come out, check:
237 // IA-32 Intel Architecture Software Developer's Manual, Volume 3:
238 // System Programming Guide, Chatper 7: Multiple-processor management,
239 // Section 7.2, Memory Ordering.
240 // Last seen at:
241 // http://developer.intel.com/design/pentium4/manuals/index_new.htm
242 //
243 // x86 stores/loads fail to act as barriers for a few instructions (clflush
244 // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are
245 // not generated by the compiler, and are rare. Users of these instructions
246 // need to know about cache behaviour in any case since all of these involve
247 // either flushing cache lines or non-temporal cache hints.
248}
249
250inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
251 return *ptr;
252}
253
254inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
255 Atomic64 value = *ptr; // An x86 load acts as a acquire barrier,
256 // for current AMD/Intel chips as of Jan 2008.
257 // See also Release_Store(), above.
258 ATOMICOPS_COMPILER_BARRIER();
259 return value;
260}
261
262inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
263 MemoryBarrier();
264 return *ptr;
265}
266#endif // defined(__x86_64__)
267
268} // namespace base::subtle
269} // namespace base
270
271#undef ATOMICOPS_COMPILER_BARRIER
272
273#endif // BASE_ATOMICOPS_INTERNALS_X86_GCC_H_