1 | //===-- tsan_vector_clock.cpp ---------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file is a part of ThreadSanitizer (TSan), a race detector. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | #include "tsan_vector_clock.h" |
13 | |
14 | #include "sanitizer_common/sanitizer_placement_new.h" |
15 | #include "tsan_mman.h" |
16 | |
17 | namespace __tsan { |
18 | |
19 | #if TSAN_VECTORIZE |
20 | const uptr kVectorClockSize = kThreadSlotCount * sizeof(Epoch) / sizeof(m128); |
21 | #endif |
22 | |
23 | VectorClock::VectorClock() { Reset(); } |
24 | |
25 | void VectorClock::Reset() { |
26 | #if !TSAN_VECTORIZE |
27 | for (uptr i = 0; i < kThreadSlotCount; i++) |
28 | clk_[i] = kEpochZero; |
29 | #else |
30 | m128 z = _mm_setzero_si128(); |
31 | m128* vclk = reinterpret_cast<m128*>(clk_); |
32 | for (uptr i = 0; i < kVectorClockSize; i++) _mm_store_si128(p: &vclk[i], b: z); |
33 | #endif |
34 | } |
35 | |
36 | void VectorClock::Acquire(const VectorClock* src) { |
37 | if (!src) |
38 | return; |
39 | #if !TSAN_VECTORIZE |
40 | for (uptr i = 0; i < kThreadSlotCount; i++) |
41 | clk_[i] = max(clk_[i], src->clk_[i]); |
42 | #else |
43 | m128* __restrict vdst = reinterpret_cast<m128*>(clk_); |
44 | m128 const* __restrict vsrc = reinterpret_cast<m128 const*>(src->clk_); |
45 | for (uptr i = 0; i < kVectorClockSize; i++) { |
46 | m128 s = _mm_load_si128(p: &vsrc[i]); |
47 | m128 d = _mm_load_si128(p: &vdst[i]); |
48 | m128 m = _mm_max_epu16(V1: s, V2: d); |
49 | _mm_store_si128(p: &vdst[i], b: m); |
50 | } |
51 | #endif |
52 | } |
53 | |
54 | static VectorClock* AllocClock(VectorClock** dstp) { |
55 | if (UNLIKELY(!*dstp)) |
56 | *dstp = New<VectorClock>(); |
57 | return *dstp; |
58 | } |
59 | |
60 | void VectorClock::Release(VectorClock** dstp) const { |
61 | VectorClock* dst = AllocClock(dstp); |
62 | dst->Acquire(src: this); |
63 | } |
64 | |
65 | void VectorClock::ReleaseStore(VectorClock** dstp) const { |
66 | VectorClock* dst = AllocClock(dstp); |
67 | *dst = *this; |
68 | } |
69 | |
70 | VectorClock& VectorClock::operator=(const VectorClock& other) { |
71 | #if !TSAN_VECTORIZE |
72 | for (uptr i = 0; i < kThreadSlotCount; i++) |
73 | clk_[i] = other.clk_[i]; |
74 | #else |
75 | m128* __restrict vdst = reinterpret_cast<m128*>(clk_); |
76 | m128 const* __restrict vsrc = reinterpret_cast<m128 const*>(other.clk_); |
77 | for (uptr i = 0; i < kVectorClockSize; i++) { |
78 | m128 s = _mm_load_si128(p: &vsrc[i]); |
79 | _mm_store_si128(p: &vdst[i], b: s); |
80 | } |
81 | #endif |
82 | return *this; |
83 | } |
84 | |
85 | void VectorClock::ReleaseStoreAcquire(VectorClock** dstp) { |
86 | VectorClock* dst = AllocClock(dstp); |
87 | #if !TSAN_VECTORIZE |
88 | for (uptr i = 0; i < kThreadSlotCount; i++) { |
89 | Epoch tmp = dst->clk_[i]; |
90 | dst->clk_[i] = clk_[i]; |
91 | clk_[i] = max(clk_[i], tmp); |
92 | } |
93 | #else |
94 | m128* __restrict vdst = reinterpret_cast<m128*>(dst->clk_); |
95 | m128* __restrict vclk = reinterpret_cast<m128*>(clk_); |
96 | for (uptr i = 0; i < kVectorClockSize; i++) { |
97 | m128 t = _mm_load_si128(p: &vdst[i]); |
98 | m128 c = _mm_load_si128(p: &vclk[i]); |
99 | m128 m = _mm_max_epu16(V1: c, V2: t); |
100 | _mm_store_si128(p: &vdst[i], b: c); |
101 | _mm_store_si128(p: &vclk[i], b: m); |
102 | } |
103 | #endif |
104 | } |
105 | |
106 | void VectorClock::ReleaseAcquire(VectorClock** dstp) { |
107 | VectorClock* dst = AllocClock(dstp); |
108 | #if !TSAN_VECTORIZE |
109 | for (uptr i = 0; i < kThreadSlotCount; i++) { |
110 | dst->clk_[i] = max(dst->clk_[i], clk_[i]); |
111 | clk_[i] = dst->clk_[i]; |
112 | } |
113 | #else |
114 | m128* __restrict vdst = reinterpret_cast<m128*>(dst->clk_); |
115 | m128* __restrict vclk = reinterpret_cast<m128*>(clk_); |
116 | for (uptr i = 0; i < kVectorClockSize; i++) { |
117 | m128 c = _mm_load_si128(p: &vclk[i]); |
118 | m128 d = _mm_load_si128(p: &vdst[i]); |
119 | m128 m = _mm_max_epu16(V1: c, V2: d); |
120 | _mm_store_si128(p: &vdst[i], b: m); |
121 | _mm_store_si128(p: &vclk[i], b: m); |
122 | } |
123 | #endif |
124 | } |
125 | |
126 | } // namespace __tsan |
127 | |