1 | //===-- tsan_rtl_access.cpp -----------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file is a part of ThreadSanitizer (TSan), a race detector. |
10 | // |
11 | // Definitions of memory access and function entry/exit entry points. |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "tsan_rtl.h" |
15 | |
16 | namespace __tsan { |
17 | |
18 | ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState* thr, uptr pc, |
19 | uptr addr, uptr size, |
20 | AccessType typ) { |
21 | DCHECK(size == 1 || size == 2 || size == 4 || size == 8); |
22 | if (!kCollectHistory) |
23 | return true; |
24 | EventAccess* ev; |
25 | if (UNLIKELY(!TraceAcquire(thr, &ev))) |
26 | return false; |
27 | u64 size_log = size == 1 ? 0 : size == 2 ? 1 : size == 4 ? 2 : 3; |
28 | uptr pc_delta = pc - thr->trace_prev_pc + (1 << (EventAccess::kPCBits - 1)); |
29 | thr->trace_prev_pc = pc; |
30 | if (LIKELY(pc_delta < (1 << EventAccess::kPCBits))) { |
31 | ev->is_access = 1; |
32 | ev->is_read = !!(typ & kAccessRead); |
33 | ev->is_atomic = !!(typ & kAccessAtomic); |
34 | ev->size_log = size_log; |
35 | ev->pc_delta = pc_delta; |
36 | DCHECK_EQ(ev->pc_delta, pc_delta); |
37 | ev->addr = CompressAddr(addr); |
38 | TraceRelease(thr, evp: ev); |
39 | return true; |
40 | } |
41 | auto* evex = reinterpret_cast<EventAccessExt*>(ev); |
42 | evex->is_access = 0; |
43 | evex->is_func = 0; |
44 | evex->type = EventType::kAccessExt; |
45 | evex->is_read = !!(typ & kAccessRead); |
46 | evex->is_atomic = !!(typ & kAccessAtomic); |
47 | evex->size_log = size_log; |
48 | // Note: this is important, see comment in EventAccessExt. |
49 | evex->_ = 0; |
50 | evex->addr = CompressAddr(addr); |
51 | evex->pc = pc; |
52 | TraceRelease(thr, evp: evex); |
53 | return true; |
54 | } |
55 | |
56 | ALWAYS_INLINE |
57 | bool TryTraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size, |
58 | AccessType typ) { |
59 | if (!kCollectHistory) |
60 | return true; |
61 | EventAccessRange* ev; |
62 | if (UNLIKELY(!TraceAcquire(thr, &ev))) |
63 | return false; |
64 | thr->trace_prev_pc = pc; |
65 | ev->is_access = 0; |
66 | ev->is_func = 0; |
67 | ev->type = EventType::kAccessRange; |
68 | ev->is_read = !!(typ & kAccessRead); |
69 | ev->is_free = !!(typ & kAccessFree); |
70 | ev->size_lo = size; |
71 | ev->pc = CompressAddr(addr: pc); |
72 | ev->addr = CompressAddr(addr); |
73 | ev->size_hi = size >> EventAccessRange::kSizeLoBits; |
74 | TraceRelease(thr, evp: ev); |
75 | return true; |
76 | } |
77 | |
78 | void TraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size, |
79 | AccessType typ) { |
80 | if (LIKELY(TryTraceMemoryAccessRange(thr, pc, addr, size, typ))) |
81 | return; |
82 | TraceSwitchPart(thr); |
83 | UNUSED bool res = TryTraceMemoryAccessRange(thr, pc, addr, size, typ); |
84 | DCHECK(res); |
85 | } |
86 | |
87 | void TraceFunc(ThreadState* thr, uptr pc) { |
88 | if (LIKELY(TryTraceFunc(thr, pc))) |
89 | return; |
90 | TraceSwitchPart(thr); |
91 | UNUSED bool res = TryTraceFunc(thr, pc); |
92 | DCHECK(res); |
93 | } |
94 | |
95 | NOINLINE void TraceRestartFuncEntry(ThreadState* thr, uptr pc) { |
96 | TraceSwitchPart(thr); |
97 | FuncEntry(thr, pc); |
98 | } |
99 | |
100 | NOINLINE void TraceRestartFuncExit(ThreadState* thr) { |
101 | TraceSwitchPart(thr); |
102 | FuncExit(thr); |
103 | } |
104 | |
105 | void TraceMutexLock(ThreadState* thr, EventType type, uptr pc, uptr addr, |
106 | StackID stk) { |
107 | DCHECK(type == EventType::kLock || type == EventType::kRLock); |
108 | if (!kCollectHistory) |
109 | return; |
110 | EventLock ev; |
111 | ev.is_access = 0; |
112 | ev.is_func = 0; |
113 | ev.type = type; |
114 | ev.pc = CompressAddr(addr: pc); |
115 | ev.stack_lo = stk; |
116 | ev.stack_hi = stk >> EventLock::kStackIDLoBits; |
117 | ev._ = 0; |
118 | ev.addr = CompressAddr(addr); |
119 | TraceEvent(thr, ev); |
120 | } |
121 | |
122 | void TraceMutexUnlock(ThreadState* thr, uptr addr) { |
123 | if (!kCollectHistory) |
124 | return; |
125 | EventUnlock ev; |
126 | ev.is_access = 0; |
127 | ev.is_func = 0; |
128 | ev.type = EventType::kUnlock; |
129 | ev._ = 0; |
130 | ev.addr = CompressAddr(addr); |
131 | TraceEvent(thr, ev); |
132 | } |
133 | |
134 | void TraceTime(ThreadState* thr) { |
135 | if (!kCollectHistory) |
136 | return; |
137 | FastState fast_state = thr->fast_state; |
138 | EventTime ev; |
139 | ev.is_access = 0; |
140 | ev.is_func = 0; |
141 | ev.type = EventType::kTime; |
142 | ev.sid = static_cast<u64>(fast_state.sid()); |
143 | ev.epoch = static_cast<u64>(fast_state.epoch()); |
144 | ev._ = 0; |
145 | TraceEvent(thr, ev); |
146 | } |
147 | |
148 | NOINLINE void DoReportRace(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, |
149 | Shadow old, |
150 | AccessType typ) SANITIZER_NO_THREAD_SAFETY_ANALYSIS { |
151 | // For the free shadow markers the first element (that contains kFreeSid) |
152 | // triggers the race, but the second element contains info about the freeing |
153 | // thread, take it. |
154 | if (old.sid() == kFreeSid) |
155 | old = Shadow(LoadShadow(p: &shadow_mem[1])); |
156 | // This prevents trapping on this address in future. |
157 | for (uptr i = 0; i < kShadowCnt; i++) |
158 | StoreShadow(sp: &shadow_mem[i], s: i == 0 ? Shadow::kRodata : Shadow::kEmpty); |
159 | // See the comment in MemoryRangeFreed as to why the slot is locked |
160 | // for free memory accesses. ReportRace must not be called with |
161 | // the slot locked because of the fork. But MemoryRangeFreed is not |
162 | // called during fork because fork sets ignore_reads_and_writes, |
163 | // so simply unlocking the slot should be fine. |
164 | if (typ & kAccessSlotLocked) |
165 | SlotUnlock(thr); |
166 | ReportRace(thr, shadow_mem, cur, old: Shadow(old), typ); |
167 | if (typ & kAccessSlotLocked) |
168 | SlotLock(thr); |
169 | } |
170 | |
171 | #if !TSAN_VECTORIZE |
172 | ALWAYS_INLINE |
173 | bool ContainsSameAccess(RawShadow* s, Shadow cur, int unused0, int unused1, |
174 | AccessType typ) { |
175 | for (uptr i = 0; i < kShadowCnt; i++) { |
176 | auto old = LoadShadow(&s[i]); |
177 | if (!(typ & kAccessRead)) { |
178 | if (old == cur.raw()) |
179 | return true; |
180 | continue; |
181 | } |
182 | auto masked = static_cast<RawShadow>(static_cast<u32>(old) | |
183 | static_cast<u32>(Shadow::kRodata)); |
184 | if (masked == cur.raw()) |
185 | return true; |
186 | if (!(typ & kAccessNoRodata) && !SANITIZER_GO) { |
187 | if (old == Shadow::kRodata) |
188 | return true; |
189 | } |
190 | } |
191 | return false; |
192 | } |
193 | |
194 | ALWAYS_INLINE |
195 | bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, |
196 | int unused0, int unused1, AccessType typ) { |
197 | bool stored = false; |
198 | for (uptr idx = 0; idx < kShadowCnt; idx++) { |
199 | RawShadow* sp = &shadow_mem[idx]; |
200 | Shadow old(LoadShadow(sp)); |
201 | if (LIKELY(old.raw() == Shadow::kEmpty)) { |
202 | if (!(typ & kAccessCheckOnly) && !stored) |
203 | StoreShadow(sp, cur.raw()); |
204 | return false; |
205 | } |
206 | if (LIKELY(!(cur.access() & old.access()))) |
207 | continue; |
208 | if (LIKELY(cur.sid() == old.sid())) { |
209 | if (!(typ & kAccessCheckOnly) && |
210 | LIKELY(cur.access() == old.access() && old.IsRWWeakerOrEqual(typ))) { |
211 | StoreShadow(sp, cur.raw()); |
212 | stored = true; |
213 | } |
214 | continue; |
215 | } |
216 | if (LIKELY(old.IsBothReadsOrAtomic(typ))) |
217 | continue; |
218 | if (LIKELY(thr->clock.Get(old.sid()) >= old.epoch())) |
219 | continue; |
220 | DoReportRace(thr, shadow_mem, cur, old, typ); |
221 | return true; |
222 | } |
223 | // We did not find any races and had already stored |
224 | // the current access info, so we are done. |
225 | if (LIKELY(stored)) |
226 | return false; |
227 | // Choose a random candidate slot and replace it. |
228 | uptr index = |
229 | atomic_load_relaxed(&thr->trace_pos) / sizeof(Event) % kShadowCnt; |
230 | StoreShadow(&shadow_mem[index], cur.raw()); |
231 | return false; |
232 | } |
233 | |
234 | # define LOAD_CURRENT_SHADOW(cur, shadow_mem) UNUSED int access = 0, shadow = 0 |
235 | |
236 | #else /* !TSAN_VECTORIZE */ |
237 | |
238 | ALWAYS_INLINE |
239 | bool ContainsSameAccess(RawShadow* unused0, Shadow unused1, m128 shadow, |
240 | m128 access, AccessType typ) { |
241 | // Note: we could check if there is a larger access of the same type, |
242 | // e.g. we just allocated/memset-ed a block (so it contains 8 byte writes) |
243 | // and now do smaller reads/writes, these can also be considered as "same |
244 | // access". However, it will make the check more expensive, so it's unclear |
245 | // if it's worth it. But this would conserve trace space, so it's useful |
246 | // besides potential speed up. |
247 | if (!(typ & kAccessRead)) { |
248 | const m128 same = _mm_cmpeq_epi32(a: shadow, b: access); |
249 | return _mm_movemask_epi8(a: same); |
250 | } |
251 | // For reads we need to reset read bit in the shadow, |
252 | // because we need to match read with both reads and writes. |
253 | // Shadow::kRodata has only read bit set, so it does what we want. |
254 | // We also abuse it for rodata check to save few cycles |
255 | // since we already loaded Shadow::kRodata into a register. |
256 | // Reads from rodata can't race. |
257 | // Measurements show that they can be 10-20% of all memory accesses. |
258 | // Shadow::kRodata has epoch 0 which cannot appear in shadow normally |
259 | // (thread epochs start from 1). So the same read bit mask |
260 | // serves as rodata indicator. |
261 | const m128 read_mask = _mm_set1_epi32(i: static_cast<u32>(Shadow::kRodata)); |
262 | const m128 masked_shadow = _mm_or_si128(a: shadow, b: read_mask); |
263 | m128 same = _mm_cmpeq_epi32(a: masked_shadow, b: access); |
264 | // Range memory accesses check Shadow::kRodata before calling this, |
265 | // Shadow::kRodatas is not possible for free memory access |
266 | // and Go does not use Shadow::kRodata. |
267 | if (!(typ & kAccessNoRodata) && !SANITIZER_GO) { |
268 | const m128 ro = _mm_cmpeq_epi32(a: shadow, b: read_mask); |
269 | same = _mm_or_si128(a: ro, b: same); |
270 | } |
271 | return _mm_movemask_epi8(a: same); |
272 | } |
273 | |
274 | NOINLINE void DoReportRaceV(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, |
275 | u32 race_mask, m128 shadow, AccessType typ) { |
276 | // race_mask points which of the shadow elements raced with the current |
277 | // access. Extract that element. |
278 | CHECK_NE(race_mask, 0); |
279 | u32 old; |
280 | // Note: _mm_extract_epi32 index must be a constant value. |
281 | switch (__builtin_ffs(race_mask) / 4) { |
282 | case 0: |
283 | old = _mm_extract_epi32(shadow, 0); |
284 | break; |
285 | case 1: |
286 | old = _mm_extract_epi32(shadow, 1); |
287 | break; |
288 | case 2: |
289 | old = _mm_extract_epi32(shadow, 2); |
290 | break; |
291 | case 3: |
292 | old = _mm_extract_epi32(shadow, 3); |
293 | break; |
294 | } |
295 | Shadow prev(static_cast<RawShadow>(old)); |
296 | // For the free shadow markers the first element (that contains kFreeSid) |
297 | // triggers the race, but the second element contains info about the freeing |
298 | // thread, take it. |
299 | if (prev.sid() == kFreeSid) |
300 | prev = Shadow(static_cast<RawShadow>(_mm_extract_epi32(shadow, 1))); |
301 | DoReportRace(thr, shadow_mem, cur, old: prev, typ); |
302 | } |
303 | |
304 | ALWAYS_INLINE |
305 | bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, |
306 | m128 shadow, m128 access, AccessType typ) { |
307 | // Note: empty/zero slots don't intersect with any access. |
308 | const m128 zero = _mm_setzero_si128(); |
309 | const m128 mask_access = _mm_set1_epi32(i: 0x000000ff); |
310 | const m128 mask_sid = _mm_set1_epi32(i: 0x0000ff00); |
311 | const m128 mask_read_atomic = _mm_set1_epi32(i: 0xc0000000); |
312 | const m128 access_and = _mm_and_si128(a: access, b: shadow); |
313 | const m128 access_xor = _mm_xor_si128(a: access, b: shadow); |
314 | const m128 intersect = _mm_and_si128(a: access_and, b: mask_access); |
315 | const m128 not_intersect = _mm_cmpeq_epi32(a: intersect, b: zero); |
316 | const m128 not_same_sid = _mm_and_si128(a: access_xor, b: mask_sid); |
317 | const m128 same_sid = _mm_cmpeq_epi32(a: not_same_sid, b: zero); |
318 | const m128 both_read_or_atomic = _mm_and_si128(a: access_and, b: mask_read_atomic); |
319 | const m128 no_race = |
320 | _mm_or_si128(a: _mm_or_si128(a: not_intersect, b: same_sid), b: both_read_or_atomic); |
321 | const int race_mask = _mm_movemask_epi8(a: _mm_cmpeq_epi32(a: no_race, b: zero)); |
322 | if (UNLIKELY(race_mask)) |
323 | goto SHARED; |
324 | |
325 | STORE : { |
326 | if (typ & kAccessCheckOnly) |
327 | return false; |
328 | // We could also replace different sid's if access is the same, |
329 | // rw weaker and happens before. However, just checking access below |
330 | // is not enough because we also need to check that !both_read_or_atomic |
331 | // (reads from different sids can be concurrent). |
332 | // Theoretically we could replace smaller accesses with larger accesses, |
333 | // but it's unclear if it's worth doing. |
334 | const m128 mask_access_sid = _mm_set1_epi32(i: 0x0000ffff); |
335 | const m128 not_same_sid_access = _mm_and_si128(a: access_xor, b: mask_access_sid); |
336 | const m128 same_sid_access = _mm_cmpeq_epi32(a: not_same_sid_access, b: zero); |
337 | const m128 access_read_atomic = |
338 | _mm_set1_epi32(i: (typ & (kAccessRead | kAccessAtomic)) << 30); |
339 | const m128 rw_weaker = |
340 | _mm_cmpeq_epi32(a: _mm_max_epu32(V1: shadow, V2: access_read_atomic), b: shadow); |
341 | const m128 rewrite = _mm_and_si128(a: same_sid_access, b: rw_weaker); |
342 | const int rewrite_mask = _mm_movemask_epi8(a: rewrite); |
343 | int index = __builtin_ffs(rewrite_mask); |
344 | if (UNLIKELY(index == 0)) { |
345 | const m128 empty = _mm_cmpeq_epi32(a: shadow, b: zero); |
346 | const int empty_mask = _mm_movemask_epi8(a: empty); |
347 | index = __builtin_ffs(empty_mask); |
348 | if (UNLIKELY(index == 0)) |
349 | index = (atomic_load_relaxed(a: &thr->trace_pos) / 2) % 16; |
350 | } |
351 | StoreShadow(sp: &shadow_mem[index / 4], s: cur.raw()); |
352 | // We could zero other slots determined by rewrite_mask. |
353 | // That would help other threads to evict better slots, |
354 | // but it's unclear if it's worth it. |
355 | return false; |
356 | } |
357 | |
358 | SHARED: |
359 | m128 thread_epochs = _mm_set1_epi32(i: 0x7fffffff); |
360 | // Need to unwind this because _mm_extract_epi8/_mm_insert_epi32 |
361 | // indexes must be constants. |
362 | # define LOAD_EPOCH(idx) \ |
363 | if (LIKELY(race_mask & (1 << (idx * 4)))) { \ |
364 | u8 sid = _mm_extract_epi8(shadow, idx * 4 + 1); \ |
365 | u16 epoch = static_cast<u16>(thr->clock.Get(static_cast<Sid>(sid))); \ |
366 | thread_epochs = _mm_insert_epi32(thread_epochs, u32(epoch) << 16, idx); \ |
367 | } |
368 | LOAD_EPOCH(0); |
369 | LOAD_EPOCH(1); |
370 | LOAD_EPOCH(2); |
371 | LOAD_EPOCH(3); |
372 | # undef LOAD_EPOCH |
373 | const m128 mask_epoch = _mm_set1_epi32(i: 0x3fff0000); |
374 | const m128 shadow_epochs = _mm_and_si128(a: shadow, b: mask_epoch); |
375 | const m128 concurrent = _mm_cmplt_epi32(a: thread_epochs, b: shadow_epochs); |
376 | const int concurrent_mask = _mm_movemask_epi8(a: concurrent); |
377 | if (LIKELY(concurrent_mask == 0)) |
378 | goto STORE; |
379 | |
380 | DoReportRaceV(thr, shadow_mem, cur, race_mask: concurrent_mask, shadow, typ); |
381 | return true; |
382 | } |
383 | |
384 | # define LOAD_CURRENT_SHADOW(cur, shadow_mem) \ |
385 | const m128 access = _mm_set1_epi32(static_cast<u32>((cur).raw())); \ |
386 | const m128 shadow = _mm_load_si128(reinterpret_cast<m128*>(shadow_mem)) |
387 | #endif |
388 | |
389 | char* DumpShadow(char* buf, RawShadow raw) { |
390 | if (raw == Shadow::kEmpty) { |
391 | internal_snprintf(buffer: buf, length: 64, format: "0" ); |
392 | return buf; |
393 | } |
394 | Shadow s(raw); |
395 | AccessType typ; |
396 | s.GetAccess(addr: nullptr, size: nullptr, typ: &typ); |
397 | internal_snprintf(buffer: buf, length: 64, format: "{tid=%u@%u access=0x%x typ=%x}" , |
398 | static_cast<u32>(s.sid()), static_cast<u32>(s.epoch()), |
399 | s.access(), static_cast<u32>(typ)); |
400 | return buf; |
401 | } |
402 | |
403 | // TryTrace* and TraceRestart* functions allow to turn memory access and func |
404 | // entry/exit callbacks into leaf functions with all associated performance |
405 | // benefits. These hottest callbacks do only 2 slow path calls: report a race |
406 | // and trace part switching. Race reporting is easy to turn into a tail call, we |
407 | // just always return from the runtime after reporting a race. But trace part |
408 | // switching is harder because it needs to be in the middle of callbacks. To |
409 | // turn it into a tail call we immidiately return after TraceRestart* functions, |
410 | // but TraceRestart* functions themselves recurse into the callback after |
411 | // switching trace part. As the result the hottest callbacks contain only tail |
412 | // calls, which effectively makes them leaf functions (can use all registers, |
413 | // no frame setup, etc). |
414 | NOINLINE void TraceRestartMemoryAccess(ThreadState* thr, uptr pc, uptr addr, |
415 | uptr size, AccessType typ) { |
416 | TraceSwitchPart(thr); |
417 | MemoryAccess(thr, pc, addr, size, typ); |
418 | } |
419 | |
420 | ALWAYS_INLINE USED void MemoryAccess(ThreadState* thr, uptr pc, uptr addr, |
421 | uptr size, AccessType typ) { |
422 | RawShadow* shadow_mem = MemToShadow(x: addr); |
423 | UNUSED char memBuf[4][64]; |
424 | DPrintf2("#%d: Access: %d@%d %p/%zd typ=0x%x {%s, %s, %s, %s}\n" , thr->tid, |
425 | static_cast<int>(thr->fast_state.sid()), |
426 | static_cast<int>(thr->fast_state.epoch()), (void*)addr, size, |
427 | static_cast<int>(typ), DumpShadow(memBuf[0], shadow_mem[0]), |
428 | DumpShadow(memBuf[1], shadow_mem[1]), |
429 | DumpShadow(memBuf[2], shadow_mem[2]), |
430 | DumpShadow(memBuf[3], shadow_mem[3])); |
431 | |
432 | FastState fast_state = thr->fast_state; |
433 | Shadow cur(fast_state, addr, size, typ); |
434 | |
435 | LOAD_CURRENT_SHADOW(cur, shadow_mem); |
436 | if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) |
437 | return; |
438 | if (UNLIKELY(fast_state.GetIgnoreBit())) |
439 | return; |
440 | if (!TryTraceMemoryAccess(thr, pc, addr, size, typ)) |
441 | return TraceRestartMemoryAccess(thr, pc, addr, size, typ); |
442 | CheckRaces(thr, shadow_mem, cur, shadow, access, typ); |
443 | } |
444 | |
445 | void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr, AccessType typ); |
446 | |
447 | NOINLINE |
448 | void RestartMemoryAccess16(ThreadState* thr, uptr pc, uptr addr, |
449 | AccessType typ) { |
450 | TraceSwitchPart(thr); |
451 | MemoryAccess16(thr, pc, addr, typ); |
452 | } |
453 | |
454 | ALWAYS_INLINE USED void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr, |
455 | AccessType typ) { |
456 | const uptr size = 16; |
457 | FastState fast_state = thr->fast_state; |
458 | if (UNLIKELY(fast_state.GetIgnoreBit())) |
459 | return; |
460 | Shadow cur(fast_state, 0, 8, typ); |
461 | RawShadow* shadow_mem = MemToShadow(x: addr); |
462 | bool traced = false; |
463 | { |
464 | LOAD_CURRENT_SHADOW(cur, shadow_mem); |
465 | if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) |
466 | goto SECOND; |
467 | if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ)) |
468 | return RestartMemoryAccess16(thr, pc, addr, typ); |
469 | traced = true; |
470 | if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ))) |
471 | return; |
472 | } |
473 | SECOND: |
474 | shadow_mem += kShadowCnt; |
475 | LOAD_CURRENT_SHADOW(cur, shadow_mem); |
476 | if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) |
477 | return; |
478 | if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ)) |
479 | return RestartMemoryAccess16(thr, pc, addr, typ); |
480 | CheckRaces(thr, shadow_mem, cur, shadow, access, typ); |
481 | } |
482 | |
483 | NOINLINE |
484 | void RestartUnalignedMemoryAccess(ThreadState* thr, uptr pc, uptr addr, |
485 | uptr size, AccessType typ) { |
486 | TraceSwitchPart(thr); |
487 | UnalignedMemoryAccess(thr, pc, addr, size, typ); |
488 | } |
489 | |
490 | ALWAYS_INLINE USED void UnalignedMemoryAccess(ThreadState* thr, uptr pc, |
491 | uptr addr, uptr size, |
492 | AccessType typ) { |
493 | DCHECK_LE(size, 8); |
494 | FastState fast_state = thr->fast_state; |
495 | if (UNLIKELY(fast_state.GetIgnoreBit())) |
496 | return; |
497 | RawShadow* shadow_mem = MemToShadow(x: addr); |
498 | bool traced = false; |
499 | uptr size1 = Min<uptr>(a: size, b: RoundUp(p: addr + 1, align: kShadowCell) - addr); |
500 | { |
501 | Shadow cur(fast_state, addr, size1, typ); |
502 | LOAD_CURRENT_SHADOW(cur, shadow_mem); |
503 | if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) |
504 | goto SECOND; |
505 | if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ)) |
506 | return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ); |
507 | traced = true; |
508 | if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ))) |
509 | return; |
510 | } |
511 | SECOND: |
512 | uptr size2 = size - size1; |
513 | if (LIKELY(size2 == 0)) |
514 | return; |
515 | shadow_mem += kShadowCnt; |
516 | Shadow cur(fast_state, 0, size2, typ); |
517 | LOAD_CURRENT_SHADOW(cur, shadow_mem); |
518 | if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) |
519 | return; |
520 | if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ)) |
521 | return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ); |
522 | CheckRaces(thr, shadow_mem, cur, shadow, access, typ); |
523 | } |
524 | |
525 | void ShadowSet(RawShadow* p, RawShadow* end, RawShadow v) { |
526 | DCHECK_LE(p, end); |
527 | DCHECK(IsShadowMem(p)); |
528 | DCHECK(IsShadowMem(end)); |
529 | UNUSED const uptr kAlign = kShadowCnt * kShadowSize; |
530 | DCHECK_EQ(reinterpret_cast<uptr>(p) % kAlign, 0); |
531 | DCHECK_EQ(reinterpret_cast<uptr>(end) % kAlign, 0); |
532 | #if !TSAN_VECTORIZE |
533 | for (; p < end; p += kShadowCnt) { |
534 | p[0] = v; |
535 | for (uptr i = 1; i < kShadowCnt; i++) p[i] = Shadow::kEmpty; |
536 | } |
537 | #else |
538 | m128 vv = _mm_setr_epi32( |
539 | i0: static_cast<u32>(v), i1: static_cast<u32>(Shadow::kEmpty), |
540 | i2: static_cast<u32>(Shadow::kEmpty), i3: static_cast<u32>(Shadow::kEmpty)); |
541 | m128* vp = reinterpret_cast<m128*>(p); |
542 | m128* vend = reinterpret_cast<m128*>(end); |
543 | for (; vp < vend; vp++) _mm_store_si128(p: vp, b: vv); |
544 | #endif |
545 | } |
546 | |
547 | static void MemoryRangeSet(uptr addr, uptr size, RawShadow val) { |
548 | if (size == 0) |
549 | return; |
550 | DCHECK_EQ(addr % kShadowCell, 0); |
551 | DCHECK_EQ(size % kShadowCell, 0); |
552 | // If a user passes some insane arguments (memset(0)), |
553 | // let it just crash as usual. |
554 | if (!IsAppMem(mem: addr) || !IsAppMem(mem: addr + size - 1)) |
555 | return; |
556 | RawShadow* begin = MemToShadow(x: addr); |
557 | RawShadow* end = begin + size / kShadowCell * kShadowCnt; |
558 | // Don't want to touch lots of shadow memory. |
559 | // If a program maps 10MB stack, there is no need reset the whole range. |
560 | // UnmapOrDie/MmapFixedNoReserve does not work on Windows. |
561 | if (SANITIZER_WINDOWS || |
562 | size <= common_flags()->clear_shadow_mmap_threshold) { |
563 | ShadowSet(p: begin, end, v: val); |
564 | return; |
565 | } |
566 | // The region is big, reset only beginning and end. |
567 | const uptr kPageSize = GetPageSizeCached(); |
568 | // Set at least first kPageSize/2 to page boundary. |
569 | RawShadow* mid1 = |
570 | Min(a: end, b: reinterpret_cast<RawShadow*>(RoundUp( |
571 | p: reinterpret_cast<uptr>(begin) + kPageSize / 2, align: kPageSize))); |
572 | ShadowSet(p: begin, end: mid1, v: val); |
573 | // Reset middle part. |
574 | RawShadow* mid2 = RoundDown(p: end, align: kPageSize); |
575 | if (mid2 > mid1) { |
576 | if (!MmapFixedSuperNoReserve(fixed_addr: (uptr)mid1, size: (uptr)mid2 - (uptr)mid1)) |
577 | Die(); |
578 | } |
579 | // Set the ending. |
580 | ShadowSet(p: mid2, end, v: val); |
581 | } |
582 | |
583 | void MemoryResetRange(ThreadState* thr, uptr pc, uptr addr, uptr size) { |
584 | uptr addr1 = RoundDown(p: addr, align: kShadowCell); |
585 | uptr size1 = RoundUp(p: size + addr - addr1, align: kShadowCell); |
586 | MemoryRangeSet(addr: addr1, size: size1, val: Shadow::kEmpty); |
587 | } |
588 | |
589 | void MemoryRangeFreed(ThreadState* thr, uptr pc, uptr addr, uptr size) { |
590 | // Callers must lock the slot to ensure synchronization with the reset. |
591 | // The problem with "freed" memory is that it's not "monotonic" |
592 | // with respect to bug detection: freed memory is bad to access, |
593 | // but then if the heap block is reallocated later, it's good to access. |
594 | // As the result a garbage "freed" shadow can lead to a false positive |
595 | // if it happens to match a real free in the thread trace, |
596 | // but the heap block was reallocated before the current memory access, |
597 | // so it's still good to access. It's not the case with data races. |
598 | DCHECK(thr->slot_locked); |
599 | DCHECK_EQ(addr % kShadowCell, 0); |
600 | size = RoundUp(p: size, align: kShadowCell); |
601 | // Processing more than 1k (2k of shadow) is expensive, |
602 | // can cause excessive memory consumption (user does not necessary touch |
603 | // the whole range) and most likely unnecessary. |
604 | size = Min<uptr>(a: size, b: 1024); |
605 | const AccessType typ = kAccessWrite | kAccessFree | kAccessSlotLocked | |
606 | kAccessCheckOnly | kAccessNoRodata; |
607 | TraceMemoryAccessRange(thr, pc, addr, size, typ); |
608 | RawShadow* shadow_mem = MemToShadow(x: addr); |
609 | Shadow cur(thr->fast_state, 0, kShadowCell, typ); |
610 | #if TSAN_VECTORIZE |
611 | const m128 access = _mm_set1_epi32(i: static_cast<u32>(cur.raw())); |
612 | const m128 freed = _mm_setr_epi32( |
613 | i0: static_cast<u32>(Shadow::FreedMarker()), |
614 | i1: static_cast<u32>(Shadow::FreedInfo(sid: cur.sid(), epoch: cur.epoch())), i2: 0, i3: 0); |
615 | for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) { |
616 | const m128 shadow = _mm_load_si128(p: (m128*)shadow_mem); |
617 | if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ))) |
618 | return; |
619 | _mm_store_si128(p: (m128*)shadow_mem, b: freed); |
620 | } |
621 | #else |
622 | for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) { |
623 | if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, 0, 0, typ))) |
624 | return; |
625 | StoreShadow(&shadow_mem[0], Shadow::FreedMarker()); |
626 | StoreShadow(&shadow_mem[1], Shadow::FreedInfo(cur.sid(), cur.epoch())); |
627 | StoreShadow(&shadow_mem[2], Shadow::kEmpty); |
628 | StoreShadow(&shadow_mem[3], Shadow::kEmpty); |
629 | } |
630 | #endif |
631 | } |
632 | |
633 | void MemoryRangeImitateWrite(ThreadState* thr, uptr pc, uptr addr, uptr size) { |
634 | DCHECK_EQ(addr % kShadowCell, 0); |
635 | size = RoundUp(p: size, align: kShadowCell); |
636 | TraceMemoryAccessRange(thr, pc, addr, size, typ: kAccessWrite); |
637 | Shadow cur(thr->fast_state, 0, 8, kAccessWrite); |
638 | MemoryRangeSet(addr, size, val: cur.raw()); |
639 | } |
640 | |
641 | void MemoryRangeImitateWriteOrResetRange(ThreadState* thr, uptr pc, uptr addr, |
642 | uptr size) { |
643 | if (thr->ignore_reads_and_writes == 0) |
644 | MemoryRangeImitateWrite(thr, pc, addr, size); |
645 | else |
646 | MemoryResetRange(thr, pc, addr, size); |
647 | } |
648 | |
649 | ALWAYS_INLINE |
650 | bool MemoryAccessRangeOne(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, |
651 | AccessType typ) { |
652 | LOAD_CURRENT_SHADOW(cur, shadow_mem); |
653 | if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) |
654 | return false; |
655 | return CheckRaces(thr, shadow_mem, cur, shadow, access, typ); |
656 | } |
657 | |
658 | template <bool is_read> |
659 | NOINLINE void RestartMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, |
660 | uptr size) { |
661 | TraceSwitchPart(thr); |
662 | MemoryAccessRangeT<is_read>(thr, pc, addr, size); |
663 | } |
664 | |
665 | template <bool is_read> |
666 | void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) { |
667 | const AccessType typ = |
668 | (is_read ? kAccessRead : kAccessWrite) | kAccessNoRodata; |
669 | RawShadow* shadow_mem = MemToShadow(x: addr); |
670 | DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_read=%d\n" , thr->tid, |
671 | (void*)pc, (void*)addr, (int)size, is_read); |
672 | |
673 | #if SANITIZER_DEBUG |
674 | if (!IsAppMem(addr)) { |
675 | Printf("Access to non app mem %zx\n" , addr); |
676 | DCHECK(IsAppMem(addr)); |
677 | } |
678 | if (!IsAppMem(addr + size - 1)) { |
679 | Printf("Access to non app mem %zx\n" , addr + size - 1); |
680 | DCHECK(IsAppMem(addr + size - 1)); |
681 | } |
682 | if (!IsShadowMem(shadow_mem)) { |
683 | Printf("Bad shadow addr %p (%zx)\n" , static_cast<void*>(shadow_mem), addr); |
684 | DCHECK(IsShadowMem(shadow_mem)); |
685 | } |
686 | if (!IsShadowMem(shadow_mem + size * kShadowCnt - 1)) { |
687 | Printf("Bad shadow addr %p (%zx)\n" , |
688 | static_cast<void*>(shadow_mem + size * kShadowCnt - 1), |
689 | addr + size - 1); |
690 | DCHECK(IsShadowMem(shadow_mem + size * kShadowCnt - 1)); |
691 | } |
692 | #endif |
693 | |
694 | // Access to .rodata section, no races here. |
695 | // Measurements show that it can be 10-20% of all memory accesses. |
696 | // Check here once to not check for every access separately. |
697 | // Note: we could (and should) do this only for the is_read case |
698 | // (writes shouldn't go to .rodata). But it happens in Chromium tests: |
699 | // https://bugs.chromium.org/p/chromium/issues/detail?id=1275581#c19 |
700 | // Details are unknown since it happens only on CI machines. |
701 | if (*shadow_mem == Shadow::kRodata) |
702 | return; |
703 | |
704 | FastState fast_state = thr->fast_state; |
705 | if (UNLIKELY(fast_state.GetIgnoreBit())) |
706 | return; |
707 | |
708 | if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ)) |
709 | return RestartMemoryAccessRange<is_read>(thr, pc, addr, size); |
710 | |
711 | if (UNLIKELY(addr % kShadowCell)) { |
712 | // Handle unaligned beginning, if any. |
713 | uptr size1 = Min(a: size, b: RoundUp(p: addr, align: kShadowCell) - addr); |
714 | size -= size1; |
715 | Shadow cur(fast_state, addr, size1, typ); |
716 | if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ))) |
717 | return; |
718 | shadow_mem += kShadowCnt; |
719 | } |
720 | // Handle middle part, if any. |
721 | Shadow cur(fast_state, 0, kShadowCell, typ); |
722 | for (; size >= kShadowCell; size -= kShadowCell, shadow_mem += kShadowCnt) { |
723 | if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ))) |
724 | return; |
725 | } |
726 | // Handle ending, if any. |
727 | if (UNLIKELY(size)) { |
728 | Shadow cur(fast_state, 0, size, typ); |
729 | if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ))) |
730 | return; |
731 | } |
732 | } |
733 | |
734 | template void MemoryAccessRangeT<true>(ThreadState* thr, uptr pc, uptr addr, |
735 | uptr size); |
736 | template void MemoryAccessRangeT<false>(ThreadState* thr, uptr pc, uptr addr, |
737 | uptr size); |
738 | |
739 | } // namespace __tsan |
740 | |
741 | #if !SANITIZER_GO |
742 | // Must be included in this file to make sure everything is inlined. |
743 | # include "tsan_interface.inc" |
744 | #endif |
745 | |