context.c source code [linux/arch/riscv/mm/context.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Copyright (C) 2012 Regents of the University of California
4	* Copyright (C) 2017 SiFive
5	* Copyright (C) 2021 Western Digital Corporation or its affiliates.
6	*/
7
8	#include <linux/bitops.h>
9	#include <linux/cpumask.h>
10	#include <linux/mm.h>
11	#include <linux/percpu.h>
12	#include <linux/slab.h>
13	#include <linux/spinlock.h>
14	#include <linux/static_key.h>
15	#include <asm/tlbflush.h>
16	#include <asm/cacheflush.h>
17	#include <asm/mmu_context.h>
18
19	#ifdef CONFIG_MMU
20
21	DEFINE_STATIC_KEY_FALSE(use_asid_allocator);
22
23	static unsigned long asid_bits;
24	static unsigned long num_asids;
25	unsigned long asid_mask;
26
27	static atomic_long_t current_version;
28
29	static DEFINE_RAW_SPINLOCK(context_lock);
30	static cpumask_t context_tlb_flush_pending;
31	static unsigned long *context_asid_map;
32
33	static DEFINE_PER_CPU(atomic_long_t, active_context);
34	static DEFINE_PER_CPU(unsigned long, reserved_context);
35
36	static bool check_update_reserved_context(unsigned long cntx,
37	unsigned long newcntx)
38	{
39	int cpu;
40	bool hit = false;
41
42	/*
43	* Iterate over the set of reserved CONTEXT looking for a match.
44	* If we find one, then we can update our mm to use new CONTEXT
45	* (i.e. the same CONTEXT in the current_version) but we can't
46	* exit the loop early, since we need to ensure that all copies
47	* of the old CONTEXT are updated to reflect the mm. Failure to do
48	* so could result in us missing the reserved CONTEXT in a future
49	* version.
50	*/
51	for_each_possible_cpu(cpu) {
52	if (per_cpu(reserved_context, cpu) == cntx) {
53	hit = true;
54	per_cpu(reserved_context, cpu) = newcntx;
55	}
56	}
57
58	return hit;
59	}
60
61	static void __flush_context(void)
62	{
63	int i;
64	unsigned long cntx;
65
66	/ Must be called with context_lock held /
67	lockdep_assert_held(&context_lock);
68
69	/ Update the list of reserved ASIDs and the ASID bitmap. /
70	bitmap_zero(dst: context_asid_map, nbits: num_asids);
71
72	/ Mark already active ASIDs as used /
73	for_each_possible_cpu(i) {
74	cntx = atomic_long_xchg_relaxed(v: &per_cpu(active_context, i), new: `0`);
75	/*
76	* If this CPU has already been through a rollover, but
77	* hasn't run another task in the meantime, we must preserve
78	* its reserved CONTEXT, as this is the only trace we have of
79	* the process it is still running.
80	*/
81	if (cntx == `0`)
82	cntx = per_cpu(reserved_context, i);
83
84	__set_bit(cntx & asid_mask, context_asid_map);
85	per_cpu(reserved_context, i) = cntx;
86	}
87
88	/ Mark ASID #0 as used because it is used at boot-time /
89	__set_bit(`0`, context_asid_map);
90
91	/ Queue a TLB invalidation for each CPU on next context-switch /
92	cpumask_setall(dstp: &context_tlb_flush_pending);
93	}
94
95	static unsigned long __new_context(struct mm_struct *mm)
96	{
97	static u32 cur_idx = `1`;
98	unsigned long cntx = atomic_long_read(v: &mm->context.id);
99	unsigned long asid, ver = atomic_long_read(v: &current_version);
100
101	/ Must be called with context_lock held /
102	lockdep_assert_held(&context_lock);
103
104	if (cntx != `0`) {
105	unsigned long newcntx = ver \| (cntx & asid_mask);
106
107	/*
108	* If our current CONTEXT was active during a rollover, we
109	* can continue to use it and this was just a false alarm.
110	*/
111	if (check_update_reserved_context(cntx, newcntx))
112	return newcntx;
113
114	/*
115	* We had a valid CONTEXT in a previous life, so try to
116	* re-use it if possible.
117	*/
118	if (!__test_and_set_bit(cntx & asid_mask, context_asid_map))
119	return newcntx;
120	}
121
122	/*
123	* Allocate a free ASID. If we can't find one then increment
124	* current_version and flush all ASIDs.
125	*/
126	asid = find_next_zero_bit(addr: context_asid_map, size: num_asids, offset: cur_idx);
127	if (asid != num_asids)
128	goto set_asid;
129
130	/ We're out of ASIDs, so increment current_version /
131	ver = atomic_long_add_return_relaxed(i: num_asids, v: &current_version);
132
133	/ Flush everything /
134	__flush_context();
135
136	/ We have more ASIDs than CPUs, so this will always succeed /
137	asid = find_next_zero_bit(addr: context_asid_map, size: num_asids, offset: `1`);
138
139	set_asid:
140	__set_bit(asid, context_asid_map);
141	cur_idx = asid;
142	return asid \| ver;
143	}
144
145	static void set_mm_asid(struct mm_struct mm, unsigned* int cpu)
146	{
147	unsigned long flags;
148	bool need_flush_tlb = false;
149	unsigned long cntx, old_active_cntx;
150
151	cntx = atomic_long_read(v: &mm->context.id);
152
153	/*
154	* If our active_context is non-zero and the context matches the
155	* current_version, then we update the active_context entry with a
156	* relaxed cmpxchg.
157	*
158	* Following is how we handle racing with a concurrent rollover:
159	*
160	* - We get a zero back from the cmpxchg and end up waiting on the
161	* lock. Taking the lock synchronises with the rollover and so
162	* we are forced to see the updated verion.
163	*
164	* - We get a valid context back from the cmpxchg then we continue
165	* using old ASID because __flush_context() would have marked ASID
166	* of active_context as used and next context switch we will
167	* allocate new context.
168	*/
169	old_active_cntx = atomic_long_read(v: &per_cpu(active_context, cpu));
170	if (old_active_cntx &&
171	((cntx & ~asid_mask) == atomic_long_read(v: &current_version)) &&
172	atomic_long_cmpxchg_relaxed(v: &per_cpu(active_context, cpu),
173	old: old_active_cntx, new: cntx))
174	goto switch_mm_fast;
175
176	raw_spin_lock_irqsave(&context_lock, flags);
177
178	/ Check that our ASID belongs to the current_version. /
179	cntx = atomic_long_read(v: &mm->context.id);
180	if ((cntx & ~asid_mask) != atomic_long_read(v: &current_version)) {
181	cntx = __new_context(mm);
182	atomic_long_set(v: &mm->context.id, i: cntx);
183	}
184
185	if (cpumask_test_and_clear_cpu(cpu, cpumask: &context_tlb_flush_pending))
186	need_flush_tlb = true;
187
188	atomic_long_set(v: &per_cpu(active_context, cpu), i: cntx);
189
190	raw_spin_unlock_irqrestore(&context_lock, flags);
191
192	switch_mm_fast:
193	csr_write(CSR_SATP, virt_to_pfn(mm->pgd) \|
194	((cntx & asid_mask) << SATP_ASID_SHIFT) \|
195	satp_mode);
196
197	if (need_flush_tlb)
198	local_flush_tlb_all();
199	}
200
201	static void set_mm_noasid(struct mm_struct *mm)
202	{
203	/ Switch the page table and blindly nuke entire local TLB /
204	csr_write(CSR_SATP, virt_to_pfn(mm->pgd) \| satp_mode);
205	local_flush_tlb_all();
206	}
207
208	static inline void set_mm(struct mm_struct *prev,
209	struct mm_struct next, unsigned* int cpu)
210	{
211	/*
212	* The mm_cpumask indicates which harts' TLBs contain the virtual
213	* address mapping of the mm. Compared to noasid, using asid
214	* can't guarantee that stale TLB entries are invalidated because
215	* the asid mechanism wouldn't flush TLB for every switch_mm for
216	* performance. So when using asid, keep all CPUs footmarks in
217	* cpumask() until mm reset.
218	*/
219	cpumask_set_cpu(cpu, dstp: mm_cpumask(mm: next));
220	if (static_branch_unlikely(&use_asid_allocator)) {
221	set_mm_asid(mm: next, cpu);
222	} else {
223	cpumask_clear_cpu(cpu, dstp: mm_cpumask(mm: prev));
224	set_mm_noasid(next);
225	}
226	}
227
228	static int __init asids_init(void)
229	{
230	unsigned long old;
231
232	/ Figure-out number of ASID bits in HW /
233	old = csr_read(CSR_SATP);
234	asid_bits = old \| (SATP_ASID_MASK << SATP_ASID_SHIFT);
235	csr_write(CSR_SATP, asid_bits);
236	asid_bits = (csr_read(CSR_SATP) >> SATP_ASID_SHIFT) & SATP_ASID_MASK;
237	asid_bits = fls_long(l: asid_bits);
238	csr_write(CSR_SATP, old);
239
240	/*
241	* In the process of determining number of ASID bits (above)
242	* we polluted the TLB of current HART so let's do TLB flushed
243	* to remove unwanted TLB enteries.
244	*/
245	local_flush_tlb_all();
246
247	/ Pre-compute ASID details /
248	if (asid_bits) {
249	num_asids = `1` << asid_bits;
250	asid_mask = num_asids - `1`;
251	}
252
253	/*
254	* Use ASID allocator only if number of HW ASIDs are
255	* at-least twice more than CPUs
256	*/
257	if (num_asids > (`2` * num_possible_cpus())) {
258	atomic_long_set(v: &current_version, i: num_asids);
259
260	context_asid_map = bitmap_zalloc(nbits: num_asids, GFP_KERNEL);
261	if (!context_asid_map)
262	panic(fmt: "Failed to allocate bitmap for %lu ASIDs\n",
263	num_asids);
264
265	__set_bit(`0`, context_asid_map);
266
267	static_branch_enable(&use_asid_allocator);
268
269	pr_info("ASID allocator using %lu bits (%lu entries)\n",
270	asid_bits, num_asids);
271	} else {
272	pr_info("ASID allocator disabled (%lu bits)\n", asid_bits);
273	}
274
275	return `0`;
276	}
277	early_initcall(asids_init);
278	#else
279	static inline void set_mm(struct mm_struct *prev,
280	struct mm_struct next, unsigned* int cpu)
281	{
282	/ Nothing to do here when there is no MMU /
283	}
284	#endif
285
286	/*
287	* When necessary, performs a deferred icache flush for the given MM context,
288	* on the local CPU. RISC-V has no direct mechanism for instruction cache
289	* shoot downs, so instead we send an IPI that informs the remote harts they
290	* need to flush their local instruction caches. To avoid pathologically slow
291	* behavior in a common case (a bunch of single-hart processes on a many-hart
292	* machine, ie 'make -j') we avoid the IPIs for harts that are not currently
293	* executing a MM context and instead schedule a deferred local instruction
294	* cache flush to be performed before execution resumes on each hart. This
295	* actually performs that local instruction cache flush, which implicitly only
296	* refers to the current hart.
297	*
298	* The "cpu" argument must be the current local CPU number.
299	*/
300	static inline void flush_icache_deferred(struct mm_struct mm, unsigned* int cpu)
301	{
302	#ifdef CONFIG_SMP
303	cpumask_t *mask = &mm->context.icache_stale_mask;
304
305	if (cpumask_test_cpu(cpu, cpumask: mask)) {
306	cpumask_clear_cpu(cpu, dstp: mask);
307	/*
308	* Ensure the remote hart's writes are visible to this hart.
309	* This pairs with a barrier in flush_icache_mm.
310	*/
311	smp_mb();
312	local_flush_icache_all();
313	}
314
315	#endif
316	}
317
318	void switch_mm(struct mm_struct prev, struct* mm_struct *next,
319	struct task_struct *task)
320	{
321	unsigned int cpu;
322
323	if (unlikely(prev == next))
324	return;
325
326	membarrier_arch_switch_mm(prev, next, task);
327
328	/*
329	* Mark the current MM context as inactive, and the next as
330	* active. This is at least used by the icache flushing
331	* routines in order to determine who should be flushed.
332	*/
333	cpu = smp_processor_id();
334
335	set_mm(prev, next, cpu);
336
337	flush_icache_deferred(mm: next, cpu);
338	}
339

source code of linux/arch/riscv/mm/context.c