1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _ASM_X86_TLBFLUSH_H
3#define _ASM_X86_TLBFLUSH_H
4
5#include <linux/mm_types.h>
6#include <linux/mmu_notifier.h>
7#include <linux/sched.h>
8
9#include <asm/barrier.h>
10#include <asm/processor.h>
11#include <asm/cpufeature.h>
12#include <asm/special_insns.h>
13#include <asm/smp.h>
14#include <asm/invpcid.h>
15#include <asm/pti.h>
16#include <asm/processor-flags.h>
17#include <asm/pgtable.h>
18
19DECLARE_PER_CPU(u64, tlbstate_untag_mask);
20
21void __flush_tlb_all(void);
22
23#define TLB_FLUSH_ALL -1UL
24#define TLB_GENERATION_INVALID 0
25
26void cr4_update_irqsoff(unsigned long set, unsigned long clear);
27unsigned long cr4_read_shadow(void);
28
29/* Set in this cpu's CR4. */
30static inline void cr4_set_bits_irqsoff(unsigned long mask)
31{
32 cr4_update_irqsoff(set: mask, clear: 0);
33}
34
35/* Clear in this cpu's CR4. */
36static inline void cr4_clear_bits_irqsoff(unsigned long mask)
37{
38 cr4_update_irqsoff(set: 0, clear: mask);
39}
40
41/* Set in this cpu's CR4. */
42static inline void cr4_set_bits(unsigned long mask)
43{
44 unsigned long flags;
45
46 local_irq_save(flags);
47 cr4_set_bits_irqsoff(mask);
48 local_irq_restore(flags);
49}
50
51/* Clear in this cpu's CR4. */
52static inline void cr4_clear_bits(unsigned long mask)
53{
54 unsigned long flags;
55
56 local_irq_save(flags);
57 cr4_clear_bits_irqsoff(mask);
58 local_irq_restore(flags);
59}
60
61#ifndef MODULE
62/*
63 * 6 because 6 should be plenty and struct tlb_state will fit in two cache
64 * lines.
65 */
66#define TLB_NR_DYN_ASIDS 6
67
68struct tlb_context {
69 u64 ctx_id;
70 u64 tlb_gen;
71};
72
73struct tlb_state {
74 /*
75 * cpu_tlbstate.loaded_mm should match CR3 whenever interrupts
76 * are on. This means that it may not match current->active_mm,
77 * which will contain the previous user mm when we're in lazy TLB
78 * mode even if we've already switched back to swapper_pg_dir.
79 *
80 * During switch_mm_irqs_off(), loaded_mm will be set to
81 * LOADED_MM_SWITCHING during the brief interrupts-off window
82 * when CR3 and loaded_mm would otherwise be inconsistent. This
83 * is for nmi_uaccess_okay()'s benefit.
84 */
85 struct mm_struct *loaded_mm;
86
87#define LOADED_MM_SWITCHING ((struct mm_struct *)1UL)
88
89 /* Last user mm for optimizing IBPB */
90 union {
91 struct mm_struct *last_user_mm;
92 unsigned long last_user_mm_spec;
93 };
94
95 u16 loaded_mm_asid;
96 u16 next_asid;
97
98 /*
99 * If set we changed the page tables in such a way that we
100 * needed an invalidation of all contexts (aka. PCIDs / ASIDs).
101 * This tells us to go invalidate all the non-loaded ctxs[]
102 * on the next context switch.
103 *
104 * The current ctx was kept up-to-date as it ran and does not
105 * need to be invalidated.
106 */
107 bool invalidate_other;
108
109#ifdef CONFIG_ADDRESS_MASKING
110 /*
111 * Active LAM mode.
112 *
113 * X86_CR3_LAM_U57/U48 shifted right by X86_CR3_LAM_U57_BIT or 0 if LAM
114 * disabled.
115 */
116 u8 lam;
117#endif
118
119 /*
120 * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
121 * the corresponding user PCID needs a flush next time we
122 * switch to it; see SWITCH_TO_USER_CR3.
123 */
124 unsigned short user_pcid_flush_mask;
125
126 /*
127 * Access to this CR4 shadow and to H/W CR4 is protected by
128 * disabling interrupts when modifying either one.
129 */
130 unsigned long cr4;
131
132 /*
133 * This is a list of all contexts that might exist in the TLB.
134 * There is one per ASID that we use, and the ASID (what the
135 * CPU calls PCID) is the index into ctxts.
136 *
137 * For each context, ctx_id indicates which mm the TLB's user
138 * entries came from. As an invariant, the TLB will never
139 * contain entries that are out-of-date as when that mm reached
140 * the tlb_gen in the list.
141 *
142 * To be clear, this means that it's legal for the TLB code to
143 * flush the TLB without updating tlb_gen. This can happen
144 * (for now, at least) due to paravirt remote flushes.
145 *
146 * NB: context 0 is a bit special, since it's also used by
147 * various bits of init code. This is fine -- code that
148 * isn't aware of PCID will end up harmlessly flushing
149 * context 0.
150 */
151 struct tlb_context ctxs[TLB_NR_DYN_ASIDS];
152};
153DECLARE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate);
154
155struct tlb_state_shared {
156 /*
157 * We can be in one of several states:
158 *
159 * - Actively using an mm. Our CPU's bit will be set in
160 * mm_cpumask(loaded_mm) and is_lazy == false;
161 *
162 * - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit
163 * will not be set in mm_cpumask(&init_mm) and is_lazy == false.
164 *
165 * - Lazily using a real mm. loaded_mm != &init_mm, our bit
166 * is set in mm_cpumask(loaded_mm), but is_lazy == true.
167 * We're heuristically guessing that the CR3 load we
168 * skipped more than makes up for the overhead added by
169 * lazy mode.
170 */
171 bool is_lazy;
172};
173DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared);
174
175bool nmi_uaccess_okay(void);
176#define nmi_uaccess_okay nmi_uaccess_okay
177
178/* Initialize cr4 shadow for this CPU. */
179static inline void cr4_init_shadow(void)
180{
181 this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
182}
183
184extern unsigned long mmu_cr4_features;
185extern u32 *trampoline_cr4_features;
186
187/* How many pages can be invalidated with one INVLPGB. */
188extern u16 invlpgb_count_max;
189
190extern void initialize_tlbstate_and_flush(void);
191
192/*
193 * TLB flushing:
194 *
195 * - flush_tlb_all() flushes all processes TLBs
196 * - flush_tlb_mm(mm) flushes the specified mm context TLB's
197 * - flush_tlb_page(vma, vmaddr) flushes one page
198 * - flush_tlb_range(vma, start, end) flushes a range of pages
199 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
200 * - flush_tlb_multi(cpumask, info) flushes TLBs on multiple cpus
201 *
202 * ..but the i386 has somewhat limited tlb flushing capabilities,
203 * and page-granular flushes are available only on i486 and up.
204 */
205struct flush_tlb_info {
206 /*
207 * We support several kinds of flushes.
208 *
209 * - Fully flush a single mm. .mm will be set, .end will be
210 * TLB_FLUSH_ALL, and .new_tlb_gen will be the tlb_gen to
211 * which the IPI sender is trying to catch us up.
212 *
213 * - Partially flush a single mm. .mm will be set, .start and
214 * .end will indicate the range, and .new_tlb_gen will be set
215 * such that the changes between generation .new_tlb_gen-1 and
216 * .new_tlb_gen are entirely contained in the indicated range.
217 *
218 * - Fully flush all mms whose tlb_gens have been updated. .mm
219 * will be NULL, .end will be TLB_FLUSH_ALL, and .new_tlb_gen
220 * will be zero.
221 */
222 struct mm_struct *mm;
223 unsigned long start;
224 unsigned long end;
225 u64 new_tlb_gen;
226 unsigned int initiating_cpu;
227 u8 stride_shift;
228 u8 freed_tables;
229 u8 trim_cpumask;
230};
231
232void flush_tlb_local(void);
233void flush_tlb_one_user(unsigned long addr);
234void flush_tlb_one_kernel(unsigned long addr);
235void flush_tlb_multi(const struct cpumask *cpumask,
236 const struct flush_tlb_info *info);
237
238static inline bool is_dyn_asid(u16 asid)
239{
240 return asid < TLB_NR_DYN_ASIDS;
241}
242
243static inline bool is_global_asid(u16 asid)
244{
245 return !is_dyn_asid(asid);
246}
247
248#ifdef CONFIG_BROADCAST_TLB_FLUSH
249static inline u16 mm_global_asid(struct mm_struct *mm)
250{
251 u16 asid;
252
253 if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
254 return 0;
255
256 asid = smp_load_acquire(&mm->context.global_asid);
257
258 /* mm->context.global_asid is either 0, or a global ASID */
259 VM_WARN_ON_ONCE(asid && is_dyn_asid(asid));
260
261 return asid;
262}
263
264static inline void mm_init_global_asid(struct mm_struct *mm)
265{
266 if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
267 mm->context.global_asid = 0;
268 mm->context.asid_transition = false;
269 }
270}
271
272static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid)
273{
274 /*
275 * Notably flush_tlb_mm_range() -> broadcast_tlb_flush() ->
276 * finish_asid_transition() needs to observe asid_transition = true
277 * once it observes global_asid.
278 */
279 mm->context.asid_transition = true;
280 smp_store_release(&mm->context.global_asid, asid);
281}
282
283static inline void mm_clear_asid_transition(struct mm_struct *mm)
284{
285 WRITE_ONCE(mm->context.asid_transition, false);
286}
287
288static inline bool mm_in_asid_transition(struct mm_struct *mm)
289{
290 if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
291 return false;
292
293 return mm && READ_ONCE(mm->context.asid_transition);
294}
295#else
296static inline u16 mm_global_asid(struct mm_struct *mm) { return 0; }
297static inline void mm_init_global_asid(struct mm_struct *mm) { }
298static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid) { }
299static inline void mm_clear_asid_transition(struct mm_struct *mm) { }
300static inline bool mm_in_asid_transition(struct mm_struct *mm) { return false; }
301#endif /* CONFIG_BROADCAST_TLB_FLUSH */
302
303#ifdef CONFIG_PARAVIRT
304#include <asm/paravirt.h>
305#endif
306
307#define flush_tlb_mm(mm) \
308 flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL, true)
309
310#define flush_tlb_range(vma, start, end) \
311 flush_tlb_mm_range((vma)->vm_mm, start, end, \
312 ((vma)->vm_flags & VM_HUGETLB) \
313 ? huge_page_shift(hstate_vma(vma)) \
314 : PAGE_SHIFT, true)
315
316extern void flush_tlb_all(void);
317extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
318 unsigned long end, unsigned int stride_shift,
319 bool freed_tables);
320extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
321
322static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
323{
324 flush_tlb_mm_range(mm: vma->vm_mm, start: a, end: a + PAGE_SIZE, PAGE_SHIFT, freed_tables: false);
325}
326
327static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
328{
329 bool should_defer = false;
330
331 /* If remote CPUs need to be flushed then defer batch the flush */
332 if (cpumask_any_but(mask: mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
333 should_defer = true;
334 put_cpu();
335
336 return should_defer;
337}
338
339static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
340{
341 /*
342 * Bump the generation count. This also serves as a full barrier
343 * that synchronizes with switch_mm(): callers are required to order
344 * their read of mm_cpumask after their writes to the paging
345 * structures.
346 */
347 return atomic64_inc_return(v: &mm->context.tlb_gen);
348}
349
350static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
351 struct mm_struct *mm, unsigned long start, unsigned long end)
352{
353 inc_mm_tlb_gen(mm);
354 cpumask_or(dstp: &batch->cpumask, src1p: &batch->cpumask, src2p: mm_cpumask(mm));
355 batch->unmapped_pages = true;
356 mmu_notifier_arch_invalidate_secondary_tlbs(mm, start: 0, end: -1UL);
357}
358
359static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
360{
361 flush_tlb_mm(mm);
362}
363
364extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
365
366static inline bool pte_flags_need_flush(unsigned long oldflags,
367 unsigned long newflags,
368 bool ignore_access)
369{
370 /*
371 * Flags that require a flush when cleared but not when they are set.
372 * Only include flags that would not trigger spurious page-faults.
373 * Non-present entries are not cached. Hardware would set the
374 * dirty/access bit if needed without a fault.
375 */
376 const pteval_t flush_on_clear = _PAGE_DIRTY | _PAGE_PRESENT |
377 _PAGE_ACCESSED;
378 const pteval_t software_flags = _PAGE_SOFTW1 | _PAGE_SOFTW2 |
379 _PAGE_SOFTW3 | _PAGE_SOFTW4 |
380 _PAGE_SAVED_DIRTY;
381 const pteval_t flush_on_change = _PAGE_RW | _PAGE_USER | _PAGE_PWT |
382 _PAGE_PCD | _PAGE_PSE | _PAGE_GLOBAL | _PAGE_PAT |
383 _PAGE_PAT_LARGE | _PAGE_PKEY_BIT0 | _PAGE_PKEY_BIT1 |
384 _PAGE_PKEY_BIT2 | _PAGE_PKEY_BIT3 | _PAGE_NX;
385 unsigned long diff = oldflags ^ newflags;
386
387 BUILD_BUG_ON(flush_on_clear & software_flags);
388 BUILD_BUG_ON(flush_on_clear & flush_on_change);
389 BUILD_BUG_ON(flush_on_change & software_flags);
390
391 /* Ignore software flags */
392 diff &= ~software_flags;
393
394 if (ignore_access)
395 diff &= ~_PAGE_ACCESSED;
396
397 /*
398 * Did any of the 'flush_on_clear' flags was clleared set from between
399 * 'oldflags' and 'newflags'?
400 */
401 if (diff & oldflags & flush_on_clear)
402 return true;
403
404 /* Flush on modified flags. */
405 if (diff & flush_on_change)
406 return true;
407
408 /* Ensure there are no flags that were left behind */
409 if (IS_ENABLED(CONFIG_DEBUG_VM) &&
410 (diff & ~(flush_on_clear | software_flags | flush_on_change))) {
411 VM_WARN_ON_ONCE(1);
412 return true;
413 }
414
415 return false;
416}
417
418/*
419 * pte_needs_flush() checks whether permissions were demoted and require a
420 * flush. It should only be used for userspace PTEs.
421 */
422static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte)
423{
424 /* !PRESENT -> * ; no need for flush */
425 if (!(pte_flags(pte: oldpte) & _PAGE_PRESENT))
426 return false;
427
428 /* PFN changed ; needs flush */
429 if (pte_pfn(pte: oldpte) != pte_pfn(pte: newpte))
430 return true;
431
432 /*
433 * check PTE flags; ignore access-bit; see comment in
434 * ptep_clear_flush_young().
435 */
436 return pte_flags_need_flush(oldflags: pte_flags(pte: oldpte), newflags: pte_flags(pte: newpte),
437 ignore_access: true);
438}
439#define pte_needs_flush pte_needs_flush
440
441/*
442 * huge_pmd_needs_flush() checks whether permissions were demoted and require a
443 * flush. It should only be used for userspace huge PMDs.
444 */
445static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
446{
447 /* !PRESENT -> * ; no need for flush */
448 if (!(pmd_flags(pmd: oldpmd) & _PAGE_PRESENT))
449 return false;
450
451 /* PFN changed ; needs flush */
452 if (pmd_pfn(pmd: oldpmd) != pmd_pfn(pmd: newpmd))
453 return true;
454
455 /*
456 * check PMD flags; do not ignore access-bit; see
457 * pmdp_clear_flush_young().
458 */
459 return pte_flags_need_flush(oldflags: pmd_flags(pmd: oldpmd), newflags: pmd_flags(pmd: newpmd),
460 ignore_access: false);
461}
462#define huge_pmd_needs_flush huge_pmd_needs_flush
463
464#ifdef CONFIG_ADDRESS_MASKING
465static inline u64 tlbstate_lam_cr3_mask(void)
466{
467 u64 lam = this_cpu_read(cpu_tlbstate.lam);
468
469 return lam << X86_CR3_LAM_U57_BIT;
470}
471
472static inline void cpu_tlbstate_update_lam(unsigned long lam, u64 untag_mask)
473{
474 this_cpu_write(cpu_tlbstate.lam, lam >> X86_CR3_LAM_U57_BIT);
475 this_cpu_write(tlbstate_untag_mask, untag_mask);
476}
477
478#else
479
480static inline u64 tlbstate_lam_cr3_mask(void)
481{
482 return 0;
483}
484
485static inline void cpu_tlbstate_update_lam(unsigned long lam, u64 untag_mask)
486{
487}
488#endif
489#endif /* !MODULE */
490
491static inline void __native_tlb_flush_global(unsigned long cr4)
492{
493 native_write_cr4(val: cr4 ^ X86_CR4_PGE);
494 native_write_cr4(val: cr4);
495}
496#endif /* _ASM_X86_TLBFLUSH_H */
497

Provided by KDAB

Privacy Policy
Improve your Profiling and Debugging skills
Find out more

source code of linux/arch/x86/include/asm/tlbflush.h