1 | // Copyright (c) 2005, 2006, Google Inc. |
2 | // All rights reserved. |
3 | // |
4 | // Redistribution and use in source and binary forms, with or without |
5 | // modification, are permitted provided that the following conditions are |
6 | // met: |
7 | // |
8 | // * Redistributions of source code must retain the above copyright |
9 | // notice, this list of conditions and the following disclaimer. |
10 | // * Redistributions in binary form must reproduce the above |
11 | // copyright notice, this list of conditions and the following disclaimer |
12 | // in the documentation and/or other materials provided with the |
13 | // distribution. |
14 | // * Neither the name of Google Inc. nor the names of its |
15 | // contributors may be used to endorse or promote products derived from |
16 | // this software without specific prior written permission. |
17 | // |
18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 | |
30 | // --- |
31 | // Author: Sanjay Ghemawat <opensource@google.com> |
32 | |
33 | #ifndef TCMALLOC_INTERNAL_SPINLOCK_H__ |
34 | #define TCMALLOC_INTERNAL_SPINLOCK_H__ |
35 | |
36 | #if (CPU(X86) || CPU(X86_64) || CPU(PPC)) && (COMPILER(GCC) || COMPILER(MSVC)) |
37 | |
38 | #include <time.h> /* For nanosleep() */ |
39 | |
40 | #include <sched.h> /* For sched_yield() */ |
41 | |
42 | #if HAVE(STDINT_H) |
43 | #include <stdint.h> |
44 | #elif HAVE(INTTYPES_H) |
45 | #include <inttypes.h> |
46 | #else |
47 | #include <sys/types.h> |
48 | #endif |
49 | |
50 | #if OS(WINDOWS) |
51 | #ifndef WIN32_LEAN_AND_MEAN |
52 | #define WIN32_LEAN_AND_MEAN |
53 | #endif |
54 | #include <windows.h> |
55 | #endif |
56 | |
57 | static void TCMalloc_SlowLock(volatile unsigned int* lockword); |
58 | |
59 | // The following is a struct so that it can be initialized at compile time |
60 | struct TCMalloc_SpinLock { |
61 | |
62 | inline void Lock() { |
63 | int r; |
64 | #if COMPILER(GCC) |
65 | #if CPU(X86) || CPU(X86_64) |
66 | __asm__ __volatile__ |
67 | ("xchgl %0, %1" |
68 | : "=r" (r), "=m" (lockword_) |
69 | : "0" (1), "m" (lockword_) |
70 | : "memory" ); |
71 | #else |
72 | volatile unsigned int *lockword_ptr = &lockword_; |
73 | __asm__ __volatile__ |
74 | ("1: lwarx %0, 0, %1\n\t" |
75 | "stwcx. %2, 0, %1\n\t" |
76 | "bne- 1b\n\t" |
77 | "isync" |
78 | : "=&r" (r), "=r" (lockword_ptr) |
79 | : "r" (1), "1" (lockword_ptr) |
80 | : "memory" ); |
81 | #endif |
82 | #elif COMPILER(MSVC) |
83 | __asm { |
84 | mov eax, this ; store &lockword_ (which is this+0) in eax |
85 | mov ebx, 1 ; store 1 in ebx |
86 | xchg [eax], ebx ; exchange lockword_ and 1 |
87 | mov r, ebx ; store old value of lockword_ in r |
88 | } |
89 | #endif |
90 | if (r) TCMalloc_SlowLock(lockword: &lockword_); |
91 | } |
92 | |
93 | inline void Unlock() { |
94 | #if COMPILER(GCC) |
95 | #if CPU(X86) || CPU(X86_64) |
96 | __asm__ __volatile__ |
97 | ("movl $0, %0" |
98 | : "=m" (lockword_) |
99 | : "m" (lockword_) |
100 | : "memory" ); |
101 | #else |
102 | __asm__ __volatile__ |
103 | ("isync\n\t" |
104 | "eieio\n\t" |
105 | "stw %1, %0" |
106 | #if OS(DARWIN) || CPU(PPC) |
107 | : "=o" (lockword_) |
108 | #else |
109 | : "=m" (lockword_) |
110 | #endif |
111 | : "r" (0) |
112 | : "memory" ); |
113 | #endif |
114 | #elif COMPILER(MSVC) |
115 | __asm { |
116 | mov eax, this ; store &lockword_ (which is this+0) in eax |
117 | mov [eax], 0 ; set lockword_ to 0 |
118 | } |
119 | #endif |
120 | } |
121 | // Report if we think the lock can be held by this thread. |
122 | // When the lock is truly held by the invoking thread |
123 | // we will always return true. |
124 | // Indended to be used as CHECK(lock.IsHeld()); |
125 | inline bool IsHeld() const { |
126 | return lockword_ != 0; |
127 | } |
128 | |
129 | inline void Init() { lockword_ = 0; } |
130 | |
131 | volatile unsigned int lockword_; |
132 | }; |
133 | |
134 | #define SPINLOCK_INITIALIZER { 0 } |
135 | |
136 | static void TCMalloc_SlowLock(volatile unsigned int* lockword) { |
137 | sched_yield(); // Yield immediately since fast path failed |
138 | while (true) { |
139 | int r; |
140 | #if COMPILER(GCC) |
141 | #if CPU(X86) || CPU(X86_64) |
142 | __asm__ __volatile__ |
143 | ("xchgl %0, %1" |
144 | : "=r" (r), "=m" (*lockword) |
145 | : "0" (1), "m" (*lockword) |
146 | : "memory" ); |
147 | |
148 | #else |
149 | int tmp = 1; |
150 | __asm__ __volatile__ |
151 | ("1: lwarx %0, 0, %1\n\t" |
152 | "stwcx. %2, 0, %1\n\t" |
153 | "bne- 1b\n\t" |
154 | "isync" |
155 | : "=&r" (r), "=r" (lockword) |
156 | : "r" (tmp), "1" (lockword) |
157 | : "memory" ); |
158 | #endif |
159 | #elif COMPILER(MSVC) |
160 | __asm { |
161 | mov eax, lockword ; assign lockword into eax |
162 | mov ebx, 1 ; assign 1 into ebx |
163 | xchg [eax], ebx ; exchange *lockword and 1 |
164 | mov r, ebx ; store old value of *lockword in r |
165 | } |
166 | #endif |
167 | if (!r) { |
168 | return; |
169 | } |
170 | |
171 | // This code was adapted from the ptmalloc2 implementation of |
172 | // spinlocks which would sched_yield() upto 50 times before |
173 | // sleeping once for a few milliseconds. Mike Burrows suggested |
174 | // just doing one sched_yield() outside the loop and always |
175 | // sleeping after that. This change helped a great deal on the |
176 | // performance of spinlocks under high contention. A test program |
177 | // with 10 threads on a dual Xeon (four virtual processors) went |
178 | // from taking 30 seconds to 16 seconds. |
179 | |
180 | // Sleep for a few milliseconds |
181 | #if OS(WINDOWS) |
182 | Sleep(2); |
183 | #else |
184 | struct timespec tm; |
185 | tm.tv_sec = 0; |
186 | tm.tv_nsec = 2000001; |
187 | nanosleep(requested_time: &tm, NULL); |
188 | #endif |
189 | } |
190 | } |
191 | |
192 | #else |
193 | |
194 | #include <pthread.h> |
195 | |
196 | // Portable version |
197 | struct TCMalloc_SpinLock { |
198 | pthread_mutex_t private_lock_; |
199 | |
200 | inline void Init() { |
201 | if (pthread_mutex_init(&private_lock_, NULL) != 0) CRASH(); |
202 | } |
203 | inline void Finalize() { |
204 | if (pthread_mutex_destroy(&private_lock_) != 0) CRASH(); |
205 | } |
206 | inline void Lock() { |
207 | if (pthread_mutex_lock(&private_lock_) != 0) CRASH(); |
208 | } |
209 | inline void Unlock() { |
210 | if (pthread_mutex_unlock(&private_lock_) != 0) CRASH(); |
211 | } |
212 | bool IsHeld() { |
213 | if (pthread_mutex_trylock(&private_lock_)) |
214 | return true; |
215 | |
216 | Unlock(); |
217 | return false; |
218 | } |
219 | }; |
220 | |
221 | #define SPINLOCK_INITIALIZER { PTHREAD_MUTEX_INITIALIZER } |
222 | |
223 | #endif |
224 | |
225 | // Corresponding locker object that arranges to acquire a spinlock for |
226 | // the duration of a C++ scope. |
227 | class TCMalloc_SpinLockHolder { |
228 | private: |
229 | TCMalloc_SpinLock* lock_; |
230 | public: |
231 | inline explicit TCMalloc_SpinLockHolder(TCMalloc_SpinLock* l) |
232 | : lock_(l) { l->Lock(); } |
233 | inline ~TCMalloc_SpinLockHolder() { lock_->Unlock(); } |
234 | }; |
235 | |
236 | // Short-hands for convenient use by tcmalloc.cc |
237 | typedef TCMalloc_SpinLock SpinLock; |
238 | typedef TCMalloc_SpinLockHolder SpinLockHolder; |
239 | |
240 | #endif // TCMALLOC_INTERNAL_SPINLOCK_H__ |
241 | |