1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #include <linux/percpu.h> |
3 | #include <linux/sched.h> |
4 | #include <linux/osq_lock.h> |
5 | |
6 | /* |
7 | * An MCS like lock especially tailored for optimistic spinning for sleeping |
8 | * lock implementations (mutex, rwsem, etc). |
9 | * |
10 | * Using a single mcs node per CPU is safe because sleeping locks should not be |
11 | * called from interrupt context and we have preemption disabled while |
12 | * spinning. |
13 | */ |
14 | |
15 | struct optimistic_spin_node { |
16 | struct optimistic_spin_node *next, *prev; |
17 | int locked; /* 1 if lock acquired */ |
18 | int cpu; /* encoded CPU # + 1 value */ |
19 | }; |
20 | |
21 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node); |
22 | |
23 | /* |
24 | * We use the value 0 to represent "no CPU", thus the encoded value |
25 | * will be the CPU number incremented by 1. |
26 | */ |
27 | static inline int encode_cpu(int cpu_nr) |
28 | { |
29 | return cpu_nr + 1; |
30 | } |
31 | |
32 | static inline int node_cpu(struct optimistic_spin_node *node) |
33 | { |
34 | return node->cpu - 1; |
35 | } |
36 | |
37 | static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val) |
38 | { |
39 | int cpu_nr = encoded_cpu_val - 1; |
40 | |
41 | return per_cpu_ptr(&osq_node, cpu_nr); |
42 | } |
43 | |
44 | /* |
45 | * Get a stable @node->next pointer, either for unlock() or unqueue() purposes. |
46 | * Can return NULL in case we were the last queued and we updated @lock instead. |
47 | * |
48 | * If osq_lock() is being cancelled there must be a previous node |
49 | * and 'old_cpu' is its CPU #. |
50 | * For osq_unlock() there is never a previous node and old_cpu is |
51 | * set to OSQ_UNLOCKED_VAL. |
52 | */ |
53 | static inline struct optimistic_spin_node * |
54 | osq_wait_next(struct optimistic_spin_queue *lock, |
55 | struct optimistic_spin_node *node, |
56 | int old_cpu) |
57 | { |
58 | int curr = encode_cpu(smp_processor_id()); |
59 | |
60 | for (;;) { |
61 | if (atomic_read(v: &lock->tail) == curr && |
62 | atomic_cmpxchg_acquire(v: &lock->tail, old: curr, new: old_cpu) == curr) { |
63 | /* |
64 | * We were the last queued, we moved @lock back. @prev |
65 | * will now observe @lock and will complete its |
66 | * unlock()/unqueue(). |
67 | */ |
68 | return NULL; |
69 | } |
70 | |
71 | /* |
72 | * We must xchg() the @node->next value, because if we were to |
73 | * leave it in, a concurrent unlock()/unqueue() from |
74 | * @node->next might complete Step-A and think its @prev is |
75 | * still valid. |
76 | * |
77 | * If the concurrent unlock()/unqueue() wins the race, we'll |
78 | * wait for either @lock to point to us, through its Step-B, or |
79 | * wait for a new @node->next from its Step-C. |
80 | */ |
81 | if (node->next) { |
82 | struct optimistic_spin_node *next; |
83 | |
84 | next = xchg(&node->next, NULL); |
85 | if (next) |
86 | return next; |
87 | } |
88 | |
89 | cpu_relax(); |
90 | } |
91 | } |
92 | |
93 | bool osq_lock(struct optimistic_spin_queue *lock) |
94 | { |
95 | struct optimistic_spin_node *node = this_cpu_ptr(&osq_node); |
96 | struct optimistic_spin_node *prev, *next; |
97 | int curr = encode_cpu(smp_processor_id()); |
98 | int old; |
99 | |
100 | node->locked = 0; |
101 | node->next = NULL; |
102 | node->cpu = curr; |
103 | |
104 | /* |
105 | * We need both ACQUIRE (pairs with corresponding RELEASE in |
106 | * unlock() uncontended, or fastpath) and RELEASE (to publish |
107 | * the node fields we just initialised) semantics when updating |
108 | * the lock tail. |
109 | */ |
110 | old = atomic_xchg(v: &lock->tail, new: curr); |
111 | if (old == OSQ_UNLOCKED_VAL) |
112 | return true; |
113 | |
114 | prev = decode_cpu(encoded_cpu_val: old); |
115 | node->prev = prev; |
116 | |
117 | /* |
118 | * osq_lock() unqueue |
119 | * |
120 | * node->prev = prev osq_wait_next() |
121 | * WMB MB |
122 | * prev->next = node next->prev = prev // unqueue-C |
123 | * |
124 | * Here 'node->prev' and 'next->prev' are the same variable and we need |
125 | * to ensure these stores happen in-order to avoid corrupting the list. |
126 | */ |
127 | smp_wmb(); |
128 | |
129 | WRITE_ONCE(prev->next, node); |
130 | |
131 | /* |
132 | * Normally @prev is untouchable after the above store; because at that |
133 | * moment unlock can proceed and wipe the node element from stack. |
134 | * |
135 | * However, since our nodes are static per-cpu storage, we're |
136 | * guaranteed their existence -- this allows us to apply |
137 | * cmpxchg in an attempt to undo our queueing. |
138 | */ |
139 | |
140 | /* |
141 | * Wait to acquire the lock or cancellation. Note that need_resched() |
142 | * will come with an IPI, which will wake smp_cond_load_relaxed() if it |
143 | * is implemented with a monitor-wait. vcpu_is_preempted() relies on |
144 | * polling, be careful. |
145 | */ |
146 | if (smp_cond_load_relaxed(&node->locked, VAL || need_resched() || |
147 | vcpu_is_preempted(node_cpu(node->prev)))) |
148 | return true; |
149 | |
150 | /* unqueue */ |
151 | /* |
152 | * Step - A -- stabilize @prev |
153 | * |
154 | * Undo our @prev->next assignment; this will make @prev's |
155 | * unlock()/unqueue() wait for a next pointer since @lock points to us |
156 | * (or later). |
157 | */ |
158 | |
159 | for (;;) { |
160 | /* |
161 | * cpu_relax() below implies a compiler barrier which would |
162 | * prevent this comparison being optimized away. |
163 | */ |
164 | if (data_race(prev->next) == node && |
165 | cmpxchg(&prev->next, node, NULL) == node) |
166 | break; |
167 | |
168 | /* |
169 | * We can only fail the cmpxchg() racing against an unlock(), |
170 | * in which case we should observe @node->locked becoming |
171 | * true. |
172 | */ |
173 | if (smp_load_acquire(&node->locked)) |
174 | return true; |
175 | |
176 | cpu_relax(); |
177 | |
178 | /* |
179 | * Or we race against a concurrent unqueue()'s step-B, in which |
180 | * case its step-C will write us a new @node->prev pointer. |
181 | */ |
182 | prev = READ_ONCE(node->prev); |
183 | } |
184 | |
185 | /* |
186 | * Step - B -- stabilize @next |
187 | * |
188 | * Similar to unlock(), wait for @node->next or move @lock from @node |
189 | * back to @prev. |
190 | */ |
191 | |
192 | next = osq_wait_next(lock, node, old_cpu: prev->cpu); |
193 | if (!next) |
194 | return false; |
195 | |
196 | /* |
197 | * Step - C -- unlink |
198 | * |
199 | * @prev is stable because its still waiting for a new @prev->next |
200 | * pointer, @next is stable because our @node->next pointer is NULL and |
201 | * it will wait in Step-A. |
202 | */ |
203 | |
204 | WRITE_ONCE(next->prev, prev); |
205 | WRITE_ONCE(prev->next, next); |
206 | |
207 | return false; |
208 | } |
209 | |
210 | void osq_unlock(struct optimistic_spin_queue *lock) |
211 | { |
212 | struct optimistic_spin_node *node, *next; |
213 | int curr = encode_cpu(smp_processor_id()); |
214 | |
215 | /* |
216 | * Fast path for the uncontended case. |
217 | */ |
218 | if (likely(atomic_cmpxchg_release(&lock->tail, curr, |
219 | OSQ_UNLOCKED_VAL) == curr)) |
220 | return; |
221 | |
222 | /* |
223 | * Second most likely case. |
224 | */ |
225 | node = this_cpu_ptr(&osq_node); |
226 | next = xchg(&node->next, NULL); |
227 | if (next) { |
228 | WRITE_ONCE(next->locked, 1); |
229 | return; |
230 | } |
231 | |
232 | next = osq_wait_next(lock, node, OSQ_UNLOCKED_VAL); |
233 | if (next) |
234 | WRITE_ONCE(next->locked, 1); |
235 | } |
236 | |