kvm_host.h source code [linux/arch/x86/include/asm/kvm_host.h]

1	/ SPDX-License-Identifier: GPL-2.0-only /
2	/*
3	* Kernel-based Virtual Machine driver for Linux
4	*
5	* This header defines architecture specific interfaces, x86 version
6	*/
7
8	#ifndef _ASM_X86_KVM_HOST_H
9	#define _ASM_X86_KVM_HOST_H
10
11	#include <linux/types.h>
12	#include <linux/mm.h>
13	#include <linux/mmu_notifier.h>
14	#include <linux/tracepoint.h>
15	#include <linux/cpumask.h>
16	#include <linux/irq_work.h>
17	#include <linux/irq.h>
18	#include <linux/workqueue.h>
19
20	#include <linux/kvm.h>
21	#include <linux/kvm_para.h>
22	#include <linux/kvm_types.h>
23	#include <linux/perf_event.h>
24	#include <linux/pvclock_gtod.h>
25	#include <linux/clocksource.h>
26	#include <linux/irqbypass.h>
27	#include <linux/kfifo.h>
28	#include <linux/sched/vhost_task.h>
29	#include <linux/call_once.h>
30	#include <linux/atomic.h>
31
32	#include <asm/apic.h>
33	#include <asm/pvclock-abi.h>
34	#include <asm/desc.h>
35	#include <asm/mtrr.h>
36	#include <asm/msr-index.h>
37	#include <asm/msr.h>
38	#include <asm/asm.h>
39	#include <asm/irq_remapping.h>
40	#include <asm/kvm_page_track.h>
41	#include <asm/kvm_vcpu_regs.h>
42	#include <asm/reboot.h>
43	#include <hyperv/hvhdk.h>
44
45	#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
46
47	/*
48	* CONFIG_KVM_MAX_NR_VCPUS is defined iff CONFIG_KVM!=n, provide a dummy max if
49	* KVM is disabled (arbitrarily use the default from CONFIG_KVM_MAX_NR_VCPUS).
50	*/
51	#ifdef CONFIG_KVM_MAX_NR_VCPUS
52	#define KVM_MAX_VCPUS CONFIG_KVM_MAX_NR_VCPUS
53	#else
54	#define KVM_MAX_VCPUS 1024
55	#endif
56
57	/*
58	* In x86, the VCPU ID corresponds to the APIC ID, and APIC IDs
59	* might be larger than the actual number of VCPUs because the
60	* APIC ID encodes CPU topology information.
61	*
62	* In the worst case, we'll need less than one extra bit for the
63	* Core ID, and less than one extra bit for the Package (Die) ID,
64	* so ratio of 4 should be enough.
65	*/
66	#define KVM_VCPU_ID_RATIO 4
67	#define KVM_MAX_VCPU_IDS (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO)
68
69	/ memory slots that are not exposed to userspace /
70	#define KVM_INTERNAL_MEM_SLOTS 3
71
72	#define KVM_HALT_POLL_NS_DEFAULT 200000
73
74	#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
75
76	#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE \| \
77	KVM_DIRTY_LOG_INITIALLY_SET)
78
79	#define KVM_BUS_LOCK_DETECTION_VALID_MODE (KVM_BUS_LOCK_DETECTION_OFF \| \
80	KVM_BUS_LOCK_DETECTION_EXIT)
81
82	#define KVM_X86_NOTIFY_VMEXIT_VALID_BITS (KVM_X86_NOTIFY_VMEXIT_ENABLED \| \
83	KVM_X86_NOTIFY_VMEXIT_USER)
84
85	/ x86-specific vcpu->requests bit members /
86	#define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0)
87	#define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1)
88	#define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2)
89	#define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3)
90	#define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4)
91	#define KVM_REQ_LOAD_MMU_PGD KVM_ARCH_REQ(5)
92	#define KVM_REQ_EVENT KVM_ARCH_REQ(6)
93	#define KVM_REQ_APF_HALT KVM_ARCH_REQ(7)
94	#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8)
95	#define KVM_REQ_NMI KVM_ARCH_REQ(9)
96	#define KVM_REQ_PMU KVM_ARCH_REQ(10)
97	#define KVM_REQ_PMI KVM_ARCH_REQ(11)
98	#ifdef CONFIG_KVM_SMM
99	#define KVM_REQ_SMI KVM_ARCH_REQ(12)
100	#endif
101	#define KVM_REQ_MASTERCLOCK_UPDATE KVM_ARCH_REQ(13)
102	#define KVM_REQ_MCLOCK_INPROGRESS \
103	KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT \| KVM_REQUEST_NO_WAKEUP)
104	#define KVM_REQ_SCAN_IOAPIC \
105	KVM_ARCH_REQ_FLAGS(15, KVM_REQUEST_WAIT \| KVM_REQUEST_NO_WAKEUP)
106	#define KVM_REQ_GLOBAL_CLOCK_UPDATE KVM_ARCH_REQ(16)
107	#define KVM_REQ_APIC_PAGE_RELOAD \
108	KVM_ARCH_REQ_FLAGS(17, KVM_REQUEST_WAIT \| KVM_REQUEST_NO_WAKEUP)
109	#define KVM_REQ_HV_CRASH KVM_ARCH_REQ(18)
110	#define KVM_REQ_IOAPIC_EOI_EXIT KVM_ARCH_REQ(19)
111	#define KVM_REQ_HV_RESET KVM_ARCH_REQ(20)
112	#define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21)
113	#define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22)
114	#define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23)
115	#define KVM_REQ_GET_NESTED_STATE_PAGES KVM_ARCH_REQ(24)
116	#define KVM_REQ_APICV_UPDATE \
117	KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT \| KVM_REQUEST_NO_WAKEUP)
118	#define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26)
119	#define KVM_REQ_TLB_FLUSH_GUEST \
120	KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT \| KVM_REQUEST_NO_WAKEUP)
121	#define KVM_REQ_APF_READY KVM_ARCH_REQ(28)
122	#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29)
123	#define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
124	KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT \| KVM_REQUEST_NO_WAKEUP)
125	#define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \
126	KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT \| KVM_REQUEST_NO_WAKEUP)
127	#define KVM_REQ_HV_TLB_FLUSH \
128	KVM_ARCH_REQ_FLAGS(32, KVM_REQUEST_WAIT \| KVM_REQUEST_NO_WAKEUP)
129	#define KVM_REQ_UPDATE_PROTECTED_GUEST_STATE \
130	KVM_ARCH_REQ_FLAGS(34, KVM_REQUEST_WAIT)
131
132	#define CR0_RESERVED_BITS \
133	(~(unsigned long)(X86_CR0_PE \| X86_CR0_MP \| X86_CR0_EM \| X86_CR0_TS \
134	\| X86_CR0_ET \| X86_CR0_NE \| X86_CR0_WP \| X86_CR0_AM \
135	\| X86_CR0_NW \| X86_CR0_CD \| X86_CR0_PG))
136
137	#define CR4_RESERVED_BITS \
138	(~(unsigned long)(X86_CR4_VME \| X86_CR4_PVI \| X86_CR4_TSD \| X86_CR4_DE\
139	\| X86_CR4_PSE \| X86_CR4_PAE \| X86_CR4_MCE \
140	\| X86_CR4_PGE \| X86_CR4_PCE \| X86_CR4_OSFXSR \| X86_CR4_PCIDE \
141	\| X86_CR4_OSXSAVE \| X86_CR4_SMEP \| X86_CR4_FSGSBASE \
142	\| X86_CR4_OSXMMEXCPT \| X86_CR4_LA57 \| X86_CR4_VMXE \
143	\| X86_CR4_SMAP \| X86_CR4_PKE \| X86_CR4_UMIP \
144	\| X86_CR4_LAM_SUP))
145
146	#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
147
148
149
150	#define INVALID_PAGE (~(hpa_t)0)
151	#define VALID_PAGE(x) ((x) != INVALID_PAGE)
152
153	/ KVM Hugepage definitions for x86 /
154	#define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G
155	#define KVM_NR_PAGE_SIZES (KVM_MAX_HUGEPAGE_LEVEL - PG_LEVEL_4K + 1)
156	#define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 9)
157	#define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x))
158	#define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x))
159	#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1))
160	#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
161
162	#define KVM_MEMSLOT_PAGES_TO_MMU_PAGES_RATIO 50
163	#define KVM_MIN_ALLOC_MMU_PAGES 64UL
164	#define KVM_MMU_HASH_SHIFT 12
165	#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
166	#define KVM_MIN_FREE_MMU_PAGES 5
167	#define KVM_REFILL_PAGES 25
168	#define KVM_MAX_CPUID_ENTRIES 256
169	#define KVM_NR_VAR_MTRR 8
170
171	#define ASYNC_PF_PER_VCPU 64
172
173	enum kvm_reg {
174	VCPU_REGS_RAX = __VCPU_REGS_RAX,
175	VCPU_REGS_RCX = __VCPU_REGS_RCX,
176	VCPU_REGS_RDX = __VCPU_REGS_RDX,
177	VCPU_REGS_RBX = __VCPU_REGS_RBX,
178	VCPU_REGS_RSP = __VCPU_REGS_RSP,
179	VCPU_REGS_RBP = __VCPU_REGS_RBP,
180	VCPU_REGS_RSI = __VCPU_REGS_RSI,
181	VCPU_REGS_RDI = __VCPU_REGS_RDI,
182	#ifdef CONFIG_X86_64
183	VCPU_REGS_R8 = __VCPU_REGS_R8,
184	VCPU_REGS_R9 = __VCPU_REGS_R9,
185	VCPU_REGS_R10 = __VCPU_REGS_R10,
186	VCPU_REGS_R11 = __VCPU_REGS_R11,
187	VCPU_REGS_R12 = __VCPU_REGS_R12,
188	VCPU_REGS_R13 = __VCPU_REGS_R13,
189	VCPU_REGS_R14 = __VCPU_REGS_R14,
190	VCPU_REGS_R15 = __VCPU_REGS_R15,
191	#endif
192	VCPU_REGS_RIP,
193	NR_VCPU_REGS,
194
195	VCPU_EXREG_PDPTR = NR_VCPU_REGS,
196	VCPU_EXREG_CR0,
197	VCPU_EXREG_CR3,
198	VCPU_EXREG_CR4,
199	VCPU_EXREG_RFLAGS,
200	VCPU_EXREG_SEGMENTS,
201	VCPU_EXREG_EXIT_INFO_1,
202	VCPU_EXREG_EXIT_INFO_2,
203	};
204
205	enum {
206	VCPU_SREG_ES,
207	VCPU_SREG_CS,
208	VCPU_SREG_SS,
209	VCPU_SREG_DS,
210	VCPU_SREG_FS,
211	VCPU_SREG_GS,
212	VCPU_SREG_TR,
213	VCPU_SREG_LDTR,
214	};
215
216	enum exit_fastpath_completion {
217	EXIT_FASTPATH_NONE,
218	EXIT_FASTPATH_REENTER_GUEST,
219	EXIT_FASTPATH_EXIT_HANDLED,
220	EXIT_FASTPATH_EXIT_USERSPACE,
221	};
222	typedef enum exit_fastpath_completion fastpath_t;
223
224	struct x86_emulate_ctxt;
225	struct x86_exception;
226	union kvm_smram;
227	enum x86_intercept;
228	enum x86_intercept_stage;
229
230	#define KVM_NR_DB_REGS 4
231
232	#define DR6_BUS_LOCK (1 << 11)
233	#define DR6_BD (1 << 13)
234	#define DR6_BS (1 << 14)
235	#define DR6_BT (1 << 15)
236	#define DR6_RTM (1 << 16)
237	/*
238	* DR6_ACTIVE_LOW combines fixed-1 and active-low bits.
239	* We can regard all the bits in DR6_FIXED_1 as active_low bits;
240	* they will never be 0 for now, but when they are defined
241	* in the future it will require no code change.
242	*
243	* DR6_ACTIVE_LOW is also used as the init/reset value for DR6.
244	*/
245	#define DR6_ACTIVE_LOW 0xffff0ff0
246	#define DR6_VOLATILE 0x0001e80f
247	#define DR6_FIXED_1 (DR6_ACTIVE_LOW & ~DR6_VOLATILE)
248
249	#define DR7_BP_EN_MASK 0x000000ff
250	#define DR7_GE (1 << 9)
251	#define DR7_GD (1 << 13)
252	#define DR7_FIXED_1 0x00000400
253	#define DR7_VOLATILE 0xffff2bff
254
255	#define KVM_GUESTDBG_VALID_MASK \
256	(KVM_GUESTDBG_ENABLE \| \
257	KVM_GUESTDBG_SINGLESTEP \| \
258	KVM_GUESTDBG_USE_HW_BP \| \
259	KVM_GUESTDBG_USE_SW_BP \| \
260	KVM_GUESTDBG_INJECT_BP \| \
261	KVM_GUESTDBG_INJECT_DB \| \
262	KVM_GUESTDBG_BLOCKIRQ)
263
264	#define PFERR_PRESENT_MASK BIT(0)
265	#define PFERR_WRITE_MASK BIT(1)
266	#define PFERR_USER_MASK BIT(2)
267	#define PFERR_RSVD_MASK BIT(3)
268	#define PFERR_FETCH_MASK BIT(4)
269	#define PFERR_PK_MASK BIT(5)
270	#define PFERR_SGX_MASK BIT(15)
271	#define PFERR_GUEST_RMP_MASK BIT_ULL(31)
272	#define PFERR_GUEST_FINAL_MASK BIT_ULL(32)
273	#define PFERR_GUEST_PAGE_MASK BIT_ULL(33)
274	#define PFERR_GUEST_ENC_MASK BIT_ULL(34)
275	#define PFERR_GUEST_SIZEM_MASK BIT_ULL(35)
276	#define PFERR_GUEST_VMPL_MASK BIT_ULL(36)
277
278	/*
279	* IMPLICIT_ACCESS is a KVM-defined flag used to correctly perform SMAP checks
280	* when emulating instructions that triggers implicit access.
281	*/
282	#define PFERR_IMPLICIT_ACCESS BIT_ULL(48)
283	/*
284	* PRIVATE_ACCESS is a KVM-defined flag us to indicate that a fault occurred
285	* when the guest was accessing private memory.
286	*/
287	#define PFERR_PRIVATE_ACCESS BIT_ULL(49)
288	#define PFERR_SYNTHETIC_MASK (PFERR_IMPLICIT_ACCESS \| PFERR_PRIVATE_ACCESS)
289
290	/ apic attention bits /
291	#define KVM_APIC_CHECK_VAPIC 0
292	/*
293	* The following bit is set with PV-EOI, unset on EOI.
294	* We detect PV-EOI changes by guest by comparing
295	* this bit with PV-EOI in guest memory.
296	* See the implementation in apic_update_pv_eoi.
297	*/
298	#define KVM_APIC_PV_EOI_PENDING 1
299
300	struct kvm_kernel_irq_routing_entry;
301
302	/*
303	* kvm_mmu_page_role tracks the properties of a shadow page (where shadow page
304	* also includes TDP pages) to determine whether or not a page can be used in
305	* the given MMU context. This is a subset of the overall kvm_cpu_role to
306	* minimize the size of kvm_memory_slot.arch.gfn_write_track, i.e. allows
307	* allocating 2 bytes per gfn instead of 4 bytes per gfn.
308	*
309	* Upper-level shadow pages having gptes are tracked for write-protection via
310	* gfn_write_track. As above, gfn_write_track is a 16 bit counter, so KVM must
311	* not create more than 2^16-1 upper-level shadow pages at a single gfn,
312	* otherwise gfn_write_track will overflow and explosions will ensue.
313	*
314	* A unique shadow page (SP) for a gfn is created if and only if an existing SP
315	* cannot be reused. The ability to reuse a SP is tracked by its role, which
316	* incorporates various mode bits and properties of the SP. Roughly speaking,
317	* the number of unique SPs that can theoretically be created is 2^n, where n
318	* is the number of bits that are used to compute the role.
319	*
320	* But, even though there are 20 bits in the mask below, not all combinations
321	* of modes and flags are possible:
322	*
323	* - invalid shadow pages are not accounted, mirror pages are not shadowed,
324	* so the bits are effectively 18.
325	*
326	* - quadrant will only be used if has_4_byte_gpte=1 (non-PAE paging);
327	* execonly and ad_disabled are only used for nested EPT which has
328	* has_4_byte_gpte=0. Therefore, 2 bits are always unused.
329	*
330	* - the 4 bits of level are effectively limited to the values 2/3/4/5,
331	* as 4k SPs are not tracked (allowed to go unsync). In addition non-PAE
332	* paging has exactly one upper level, making level completely redundant
333	* when has_4_byte_gpte=1.
334	*
335	* - on top of this, smep_andnot_wp and smap_andnot_wp are only set if
336	* cr0_wp=0, therefore these three bits only give rise to 5 possibilities.
337	*
338	* Therefore, the maximum number of possible upper-level shadow pages for a
339	* single gfn is a bit less than 2^13.
340	*/
341	union kvm_mmu_page_role {
342	u32 word;
343	struct {
344	unsigned level:`4`;
345	unsigned has_4_byte_gpte:`1`;
346	unsigned quadrant:`2`;
347	unsigned direct:`1`;
348	unsigned access:`3`;
349	unsigned invalid:`1`;
350	unsigned efer_nx:`1`;
351	unsigned cr0_wp:`1`;
352	unsigned smep_andnot_wp:`1`;
353	unsigned smap_andnot_wp:`1`;
354	unsigned ad_disabled:`1`;
355	unsigned guest_mode:`1`;
356	unsigned passthrough:`1`;
357	unsigned is_mirror:`1`;
358	unsigned :`4`;
359
360	/*
361	* This is left at the top of the word so that
362	* kvm_memslots_for_spte_role can extract it with a
363	* simple shift. While there is room, give it a whole
364	* byte so it is also faster to load it from memory.
365	*/
366	unsigned smm:`8`;
367	};
368	};
369
370	/*
371	* kvm_mmu_extended_role complements kvm_mmu_page_role, tracking properties
372	* relevant to the current MMU configuration. When loading CR0, CR4, or EFER,
373	* including on nested transitions, if nothing in the full role changes then
374	* MMU re-configuration can be skipped. @valid bit is set on first usage so we
375	* don't treat all-zero structure as valid data.
376	*
377	* The properties that are tracked in the extended role but not the page role
378	* are for things that either (a) do not affect the validity of the shadow page
379	* or (b) are indirectly reflected in the shadow page's role. For example,
380	* CR4.PKE only affects permission checks for software walks of the guest page
381	* tables (because KVM doesn't support Protection Keys with shadow paging), and
382	* CR0.PG, CR4.PAE, and CR4.PSE are indirectly reflected in role.level.
383	*
384	* Note, SMEP and SMAP are not redundant with sm*p_andnot_wp in the page role.
385	* If CR0.WP=1, KVM can reuse shadow pages for the guest regardless of SMEP and
386	* SMAP, but the MMU's permission checks for software walks need to be SMEP and
387	* SMAP aware regardless of CR0.WP.
388	*/
389	union kvm_mmu_extended_role {
390	u32 word;
391	struct {
392	unsigned int valid:`1`;
393	unsigned int execonly:`1`;
394	unsigned int cr4_pse:`1`;
395	unsigned int cr4_pke:`1`;
396	unsigned int cr4_smap:`1`;
397	unsigned int cr4_smep:`1`;
398	unsigned int cr4_la57:`1`;
399	unsigned int efer_lma:`1`;
400	};
401	};
402
403	union kvm_cpu_role {
404	u64 as_u64;
405	struct {
406	union kvm_mmu_page_role base;
407	union kvm_mmu_extended_role ext;
408	};
409	};
410
411	struct kvm_rmap_head {
412	atomic_long_t val;
413	};
414
415	struct kvm_pio_request {
416	unsigned long count;
417	int in;
418	int port;
419	int size;
420	};
421
422	#define PT64_ROOT_MAX_LEVEL 5
423
424	struct rsvd_bits_validate {
425	u64 rsvd_bits_mask[`2`][PT64_ROOT_MAX_LEVEL];
426	u64 bad_mt_xwr;
427	};
428
429	struct kvm_mmu_root_info {
430	gpa_t pgd;
431	hpa_t hpa;
432	};
433
434	#define KVM_MMU_ROOT_INFO_INVALID \
435	((struct kvm_mmu_root_info) { .pgd = INVALID_PAGE, .hpa = INVALID_PAGE })
436
437	#define KVM_MMU_NUM_PREV_ROOTS 3
438
439	#define KVM_MMU_ROOT_CURRENT BIT(0)
440	#define KVM_MMU_ROOT_PREVIOUS(i) BIT(1+i)
441	#define KVM_MMU_ROOTS_ALL (BIT(1 + KVM_MMU_NUM_PREV_ROOTS) - 1)
442
443	#define KVM_HAVE_MMU_RWLOCK
444
445	struct kvm_mmu_page;
446	struct kvm_page_fault;
447
448	/*
449	* x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
450	* and 2-level 32-bit). The kvm_mmu structure abstracts the details of the
451	* current mmu mode.
452	*/
453	struct kvm_mmu {
454	unsigned long (get_guest_pgd)(struct* kvm_vcpu *vcpu);
455	u64 (get_pdptr)(struct* kvm_vcpu vcpu, int* index);
456	int (page_fault)(struct* kvm_vcpu vcpu, struct* kvm_page_fault *fault);
457	void (inject_page_fault)(struct* kvm_vcpu *vcpu,
458	struct x86_exception *fault);
459	gpa_t (gva_to_gpa)(struct* kvm_vcpu vcpu, struct* kvm_mmu *mmu,
460	gpa_t gva_or_gpa, u64 access,
461	struct x86_exception *exception);
462	int (sync_spte)(struct* kvm_vcpu *vcpu,
463	struct kvm_mmu_page sp, int* i);
464	struct kvm_mmu_root_info root;
465	hpa_t mirror_root_hpa;
466	union kvm_cpu_role cpu_role;
467	union kvm_mmu_page_role root_role;
468
469	/*
470	* The pkru_mask indicates if protection key checks are needed. It
471	* consists of 16 domains indexed by page fault error code bits [4:1],
472	* with PFEC.RSVD replaced by ACC_USER_MASK from the page tables.
473	* Each domain has 2 bits which are ANDed with AD and WD from PKRU.
474	*/
475	u32 pkru_mask;
476
477	struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS];
478
479	/*
480	* Bitmap; bit set = permission fault
481	* Byte index: page fault error code [4:1]
482	* Bit index: pte permissions in ACC_* format
483	*/
484	u8 permissions[`16`];
485
486	u64 *pae_root;
487	u64 *pml4_root;
488	u64 *pml5_root;
489
490	/*
491	* check zero bits on shadow page table entries, these
492	* bits include not only hardware reserved bits but also
493	* the bits spte never used.
494	*/
495	struct rsvd_bits_validate shadow_zero_check;
496
497	struct rsvd_bits_validate guest_rsvd_check;
498
499	u64 pdptrs[`4`]; / pae /
500	};
501
502	enum pmc_type {
503	KVM_PMC_GP = `0`,
504	KVM_PMC_FIXED,
505	};
506
507	struct kvm_pmc {
508	enum pmc_type type;
509	u8 idx;
510	bool is_paused;
511	bool intr;
512	/*
513	* Base value of the PMC counter, relative to the consumed count in
514	* the associated perf_event. This value includes counter updates from
515	* the perf_event and emulated_count since the last time the counter
516	* was reprogrammed, but it is not the current value as seen by the
517	* guest or userspace.
518	*
519	* The count is relative to the associated perf_event so that KVM
520	* doesn't need to reprogram the perf_event every time the guest writes
521	* to the counter.
522	*/
523	u64 counter;
524	/*
525	* PMC events triggered by KVM emulation that haven't been fully
526	* processed, i.e. haven't undergone overflow detection.
527	*/
528	u64 emulated_counter;
529	u64 eventsel;
530	struct perf_event *perf_event;
531	struct kvm_vcpu *vcpu;
532	/*
533	* only for creating or reusing perf_event,
534	* eventsel value for general purpose counters,
535	* ctrl value for fixed counters.
536	*/
537	u64 current_config;
538	};
539
540	/ More counters may conflict with other existing Architectural MSRs /
541	#define KVM_MAX(a, b) ((a) >= (b) ? (a) : (b))
542	#define KVM_MAX_NR_INTEL_GP_COUNTERS 8
543	#define KVM_MAX_NR_AMD_GP_COUNTERS 6
544	#define KVM_MAX_NR_GP_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_GP_COUNTERS, \
545	KVM_MAX_NR_AMD_GP_COUNTERS)
546
547	#define KVM_MAX_NR_INTEL_FIXED_COUTNERS 3
548	#define KVM_MAX_NR_AMD_FIXED_COUTNERS 0
549	#define KVM_MAX_NR_FIXED_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUTNERS, \
550	KVM_MAX_NR_AMD_FIXED_COUTNERS)
551
552	struct kvm_pmu {
553	u8 version;
554	unsigned nr_arch_gp_counters;
555	unsigned nr_arch_fixed_counters;
556	unsigned available_event_types;
557	u64 fixed_ctr_ctrl;
558	u64 fixed_ctr_ctrl_rsvd;
559	u64 global_ctrl;
560	u64 global_status;
561	u64 counter_bitmask[`2`];
562	u64 global_ctrl_rsvd;
563	u64 global_status_rsvd;
564	u64 reserved_bits;
565	u64 raw_event_mask;
566	struct kvm_pmc gp_counters[KVM_MAX_NR_GP_COUNTERS];
567	struct kvm_pmc fixed_counters[KVM_MAX_NR_FIXED_COUNTERS];
568
569	/*
570	* Overlay the bitmap with a 64-bit atomic so that all bits can be
571	* set in a single access, e.g. to reprogram all counters when the PMU
572	* filter changes.
573	*/
574	union {
575	DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
576	atomic64_t __reprogram_pmi;
577	};
578	DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
579	DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
580
581	u64 ds_area;
582	u64 pebs_enable;
583	u64 pebs_enable_rsvd;
584	u64 pebs_data_cfg;
585	u64 pebs_data_cfg_rsvd;
586
587	/*
588	* If a guest counter is cross-mapped to host counter with different
589	* index, its PEBS capability will be temporarily disabled.
590	*
591	* The user should make sure that this mask is updated
592	* after disabling interrupts and before perf_guest_get_msrs();
593	*/
594	u64 host_cross_mapped_mask;
595
596	/*
597	* The gate to release perf_events not marked in
598	* pmc_in_use only once in a vcpu time slice.
599	*/
600	bool need_cleanup;
601
602	/*
603	* The total number of programmed perf_events and it helps to avoid
604	* redundant check before cleanup if guest don't use vPMU at all.
605	*/
606	u8 event_count;
607	};
608
609	struct kvm_pmu_ops;
610
611	enum {
612	KVM_DEBUGREG_BP_ENABLED = BIT(`0`),
613	KVM_DEBUGREG_WONT_EXIT = BIT(`1`),
614	/*
615	* Guest debug registers (DR0-3, DR6 and DR7) are saved/restored by
616	* hardware on exit from or enter to guest. KVM needn't switch them.
617	* DR0-3, DR6 and DR7 are set to their architectural INIT value on VM
618	* exit, host values need to be restored.
619	*/
620	KVM_DEBUGREG_AUTO_SWITCH = BIT(`2`),
621	};
622
623	struct kvm_mtrr {
624	u64 var[KVM_NR_VAR_MTRR * `2`];
625	u64 fixed_64k;
626	u64 fixed_16k[`2`];
627	u64 fixed_4k[`8`];
628	u64 deftype;
629	};
630
631	/ Hyper-V SynIC timer /
632	struct kvm_vcpu_hv_stimer {
633	struct hrtimer timer;
634	int index;
635	union hv_stimer_config config;
636	u64 count;
637	u64 exp_time;
638	struct hv_message msg;
639	bool msg_pending;
640	};
641
642	/ Hyper-V synthetic interrupt controller (SynIC)/
643	struct kvm_vcpu_hv_synic {
644	u64 version;
645	u64 control;
646	u64 msg_page;
647	u64 evt_page;
648	atomic64_t sint[HV_SYNIC_SINT_COUNT];
649	atomic_t sint_to_gsi[HV_SYNIC_SINT_COUNT];
650	DECLARE_BITMAP(auto_eoi_bitmap, `256`);
651	DECLARE_BITMAP(vec_bitmap, `256`);
652	bool active;
653	bool dont_zero_synic_pages;
654	};
655
656	/ The maximum number of entries on the TLB flush fifo. /
657	#define KVM_HV_TLB_FLUSH_FIFO_SIZE (16)
658	/*
659	* Note: the following 'magic' entry is made up by KVM to avoid putting
660	* anything besides GVA on the TLB flush fifo. It is theoretically possible
661	* to observe a request to flush 4095 PFNs starting from 0xfffffffffffff000
662	* which will look identical. KVM's action to 'flush everything' instead of
663	* flushing these particular addresses is, however, fully legitimate as
664	* flushing more than requested is always OK.
665	*/
666	#define KVM_HV_TLB_FLUSHALL_ENTRY ((u64)-1)
667
668	enum hv_tlb_flush_fifos {
669	HV_L1_TLB_FLUSH_FIFO,
670	HV_L2_TLB_FLUSH_FIFO,
671	HV_NR_TLB_FLUSH_FIFOS,
672	};
673
674	struct kvm_vcpu_hv_tlb_flush_fifo {
675	spinlock_t write_lock;
676	DECLARE_KFIFO(entries, u64, KVM_HV_TLB_FLUSH_FIFO_SIZE);
677	};
678
679	/ Hyper-V per vcpu emulation context /
680	struct kvm_vcpu_hv {
681	struct kvm_vcpu *vcpu;
682	u32 vp_index;
683	u64 hv_vapic;
684	s64 runtime_offset;
685	struct kvm_vcpu_hv_synic synic;
686	struct kvm_hyperv_exit exit;
687	struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT];
688	DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
689	bool enforce_cpuid;
690	struct {
691	u32 features_eax; / HYPERV_CPUID_FEATURES.EAX /
692	u32 features_ebx; / HYPERV_CPUID_FEATURES.EBX /
693	u32 features_edx; / HYPERV_CPUID_FEATURES.EDX /
694	u32 enlightenments_eax; / HYPERV_CPUID_ENLIGHTMENT_INFO.EAX /
695	u32 enlightenments_ebx; / HYPERV_CPUID_ENLIGHTMENT_INFO.EBX /
696	u32 syndbg_cap_eax; / HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX /
697	u32 nested_eax; / HYPERV_CPUID_NESTED_FEATURES.EAX /
698	u32 nested_ebx; / HYPERV_CPUID_NESTED_FEATURES.EBX /
699	} cpuid_cache;
700
701	struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS];
702
703	/ Preallocated buffer for handling hypercalls passing sparse vCPU set /
704	u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS];
705
706	struct hv_vp_assist_page vp_assist_page;
707
708	struct {
709	u64 pa_page_gpa;
710	u64 vm_id;
711	u32 vp_id;
712	} nested;
713	};
714
715	struct kvm_hypervisor_cpuid {
716	u32 base;
717	u32 limit;
718	};
719
720	#ifdef CONFIG_KVM_XEN
721	/ Xen HVM per vcpu emulation context /
722	struct kvm_vcpu_xen {
723	u64 hypercall_rip;
724	u32 current_runstate;
725	u8 upcall_vector;
726	struct gfn_to_pfn_cache vcpu_info_cache;
727	struct gfn_to_pfn_cache vcpu_time_info_cache;
728	struct gfn_to_pfn_cache runstate_cache;
729	struct gfn_to_pfn_cache runstate2_cache;
730	u64 last_steal;
731	u64 runstate_entry_time;
732	u64 runstate_times[`4`];
733	unsigned long evtchn_pending_sel;
734	u32 vcpu_id; / The Xen / ACPI vCPU ID /
735	u32 timer_virq;
736	u64 timer_expires; / In guest epoch /
737	atomic_t timer_pending;
738	struct hrtimer timer;
739	int poll_evtchn;
740	struct timer_list poll_timer;
741	struct kvm_hypervisor_cpuid cpuid;
742	};
743	#endif
744
745	struct kvm_queued_exception {
746	bool pending;
747	bool injected;
748	bool has_error_code;
749	u8 vector;
750	u32 error_code;
751	unsigned long payload;
752	bool has_payload;
753	};
754
755	/*
756	* Hardware-defined CPUID leafs that are either scattered by the kernel or are
757	* unknown to the kernel, but need to be directly used by KVM. Note, these
758	* word values conflict with the kernel's "bug" caps, but KVM doesn't use those.
759	*/
760	enum kvm_only_cpuid_leafs {
761	CPUID_12_EAX = NCAPINTS,
762	CPUID_7_1_EDX,
763	CPUID_8000_0007_EDX,
764	CPUID_8000_0022_EAX,
765	CPUID_7_2_EDX,
766	CPUID_24_0_EBX,
767	NR_KVM_CPU_CAPS,
768
769	NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
770	};
771
772	struct kvm_vcpu_arch {
773	/*
774	* rip and regs accesses must go through
775	* kvm_{register,rip}_{read,write} functions.
776	*/
777	unsigned long regs[NR_VCPU_REGS];
778	u32 regs_avail;
779	u32 regs_dirty;
780
781	unsigned long cr0;
782	unsigned long cr0_guest_owned_bits;
783	unsigned long cr2;
784	unsigned long cr3;
785	unsigned long cr4;
786	unsigned long cr4_guest_owned_bits;
787	unsigned long cr4_guest_rsvd_bits;
788	unsigned long cr8;
789	u32 host_pkru;
790	u32 pkru;
791	u32 hflags;
792	u64 efer;
793	u64 host_debugctl;
794	u64 apic_base;
795	struct kvm_lapic apic; /* kernel irqchip context /
796	bool load_eoi_exitmap_pending;
797	DECLARE_BITMAP(ioapic_handled_vectors, `256`);
798	unsigned long apic_attention;
799	int32_t apic_arb_prio;
800	int mp_state;
801	u64 ia32_misc_enable_msr;
802	u64 smbase;
803	u64 smi_count;
804	bool at_instruction_boundary;
805	bool tpr_access_reporting;
806	bool xfd_no_write_intercept;
807	u64 ia32_xss;
808	u64 microcode_version;
809	u64 arch_capabilities;
810	u64 perf_capabilities;
811
812	/*
813	* Paging state of the vcpu
814	*
815	* If the vcpu runs in guest mode with two level paging this still saves
816	* the paging mode of the l1 guest. This context is always used to
817	* handle faults.
818	*/
819	struct kvm_mmu *mmu;
820
821	/ Non-nested MMU for L1 /
822	struct kvm_mmu root_mmu;
823
824	/ L1 MMU when running nested /
825	struct kvm_mmu guest_mmu;
826
827	/*
828	* Paging state of an L2 guest (used for nested npt)
829	*
830	* This context will save all necessary information to walk page tables
831	* of an L2 guest. This context is only initialized for page table
832	* walking and not for faulting since we never handle l2 page faults on
833	* the host.
834	*/
835	struct kvm_mmu nested_mmu;
836
837	/*
838	* Pointer to the mmu context currently used for
839	* gva_to_gpa translations.
840	*/
841	struct kvm_mmu *walk_mmu;
842
843	struct kvm_mmu_memory_cache mmu_pte_list_desc_cache;
844	struct kvm_mmu_memory_cache mmu_shadow_page_cache;
845	struct kvm_mmu_memory_cache mmu_shadowed_info_cache;
846	struct kvm_mmu_memory_cache mmu_page_header_cache;
847	/*
848	* This cache is to allocate external page table. E.g. private EPT used
849	* by the TDX module.
850	*/
851	struct kvm_mmu_memory_cache mmu_external_spt_cache;
852
853	/*
854	* QEMU userspace and the guest each have their own FPU state.
855	* In vcpu_run, we switch between the user and guest FPU contexts.
856	* While running a VCPU, the VCPU thread will have the guest FPU
857	* context.
858	*
859	* Note that while the PKRU state lives inside the fpu registers,
860	* it is switched out separately at VMENTER and VMEXIT time. The
861	* "guest_fpstate" state here contains the guest FPU context, with the
862	* host PRKU bits.
863	*/
864	struct fpu_guest guest_fpu;
865
866	u64 xcr0;
867	u64 guest_supported_xcr0;
868
869	struct kvm_pio_request pio;
870	void *pio_data;
871	void *sev_pio_data;
872	unsigned sev_pio_count;
873
874	u8 event_exit_inst_len;
875
876	bool exception_from_userspace;
877
878	/ Exceptions to be injected to the guest. /
879	struct kvm_queued_exception exception;
880	/ Exception VM-Exits to be synthesized to L1. /
881	struct kvm_queued_exception exception_vmexit;
882
883	struct kvm_queued_interrupt {
884	bool injected;
885	bool soft;
886	u8 nr;
887	} interrupt;
888
889	int halt_request; / real mode on Intel only /
890
891	int cpuid_nent;
892	struct kvm_cpuid_entry2 *cpuid_entries;
893	bool cpuid_dynamic_bits_dirty;
894	bool is_amd_compatible;
895
896	/*
897	* cpu_caps holds the effective guest capabilities, i.e. the features
898	* the vCPU is allowed to use. Typically, but not always, features can
899	* be used by the guest if and only if both KVM and userspace want to
900	* expose the feature to the guest.
901	*
902	* A common exception is for virtualization holes, i.e. when KVM can't
903	* prevent the guest from using a feature, in which case the vCPU "has"
904	* the feature regardless of what KVM or userspace desires.
905	*
906	* Note, features that don't require KVM involvement in any way are
907	* NOT enforced/sanitized by KVM, i.e. are taken verbatim from the
908	* guest CPUID provided by userspace.
909	*/
910	u32 cpu_caps[NR_KVM_CPU_CAPS];
911
912	u64 reserved_gpa_bits;
913	int maxphyaddr;
914
915	/ emulate context /
916
917	struct x86_emulate_ctxt *emulate_ctxt;
918	bool emulate_regs_need_sync_to_vcpu;
919	bool emulate_regs_need_sync_from_vcpu;
920	int (complete_userspace_io)(struct* kvm_vcpu *vcpu);
921	unsigned long cui_linear_rip;
922
923	gpa_t time;
924	s8 pvclock_tsc_shift;
925	u32 pvclock_tsc_mul;
926	unsigned int hw_tsc_khz;
927	struct gfn_to_pfn_cache pv_time;
928	/ set guest stopped flag in pvclock flags field /
929	bool pvclock_set_guest_stopped_request;
930
931	struct {
932	u8 preempted;
933	u64 msr_val;
934	u64 last_steal;
935	struct gfn_to_hva_cache cache;
936	} st;
937
938	u64 l1_tsc_offset;
939	u64 tsc_offset; / current tsc offset /
940	u64 last_guest_tsc;
941	u64 last_host_tsc;
942	u64 tsc_offset_adjustment;
943	u64 this_tsc_nsec;
944	u64 this_tsc_write;
945	u64 this_tsc_generation;
946	bool tsc_catchup;
947	bool tsc_always_catchup;
948	s8 virtual_tsc_shift;
949	u32 virtual_tsc_mult;
950	u32 virtual_tsc_khz;
951	s64 ia32_tsc_adjust_msr;
952	u64 msr_ia32_power_ctl;
953	u64 l1_tsc_scaling_ratio;
954	u64 tsc_scaling_ratio; / current scaling ratio /
955
956	atomic_t nmi_queued; / unprocessed asynchronous NMIs /
957	/ Number of NMIs pending injection, not including hardware vNMIs. /
958	unsigned int nmi_pending;
959	bool nmi_injected; / Trying to inject an NMI this entry /
960	bool smi_pending; / SMI queued after currently running handler /
961	u8 handling_intr_from_guest;
962
963	struct kvm_mtrr mtrr_state;
964	u64 pat;
965
966	unsigned switch_db_regs;
967	unsigned long db[KVM_NR_DB_REGS];
968	unsigned long dr6;
969	unsigned long dr7;
970	unsigned long eff_db[KVM_NR_DB_REGS];
971	unsigned long guest_debug_dr7;
972	u64 msr_platform_info;
973	u64 msr_misc_features_enables;
974
975	u64 mcg_cap;
976	u64 mcg_status;
977	u64 mcg_ctl;
978	u64 mcg_ext_ctl;
979	u64 *mce_banks;
980	u64 *mci_ctl2_banks;
981
982	/ Cache MMIO info /
983	u64 mmio_gva;
984	unsigned mmio_access;
985	gfn_t mmio_gfn;
986	u64 mmio_gen;
987
988	struct kvm_pmu pmu;
989
990	/ used for guest single stepping over the given code position /
991	unsigned long singlestep_rip;
992
993	#ifdef CONFIG_KVM_HYPERV
994	bool hyperv_enabled;
995	struct kvm_vcpu_hv *hyperv;
996	#endif
997	#ifdef CONFIG_KVM_XEN
998	struct kvm_vcpu_xen xen;
999	#endif
1000	cpumask_var_t wbinvd_dirty_mask;
1001
1002	unsigned long last_retry_eip;
1003	unsigned long last_retry_addr;
1004
1005	struct {
1006	bool halted;
1007	gfn_t gfns[ASYNC_PF_PER_VCPU];
1008	struct gfn_to_hva_cache data;
1009	u64 msr_en_val; / MSR_KVM_ASYNC_PF_EN /
1010	u64 msr_int_val; / MSR_KVM_ASYNC_PF_INT /
1011	u16 vec;
1012	u32 id;
1013	u32 host_apf_flags;
1014	bool send_always;
1015	bool delivery_as_pf_vmexit;
1016	bool pageready_pending;
1017	} apf;
1018
1019	/ OSVW MSRs (AMD only) /
1020	struct {
1021	u64 length;
1022	u64 status;
1023	} osvw;
1024
1025	struct {
1026	u64 msr_val;
1027	struct gfn_to_hva_cache data;
1028	} pv_eoi;
1029
1030	u64 msr_kvm_poll_control;
1031
1032	/ pv related host specific info /
1033	struct {
1034	bool pv_unhalted;
1035	} pv;
1036
1037	int pending_ioapic_eoi;
1038	int pending_external_vector;
1039	int highest_stale_pending_ioapic_eoi;
1040
1041	/ be preempted when it's in kernel-mode(cpl=0) /
1042	bool preempted_in_kernel;
1043
1044	/ Flush the L1 Data cache for L1TF mitigation on VMENTER /
1045	bool l1tf_flush_l1d;
1046
1047	/ Host CPU on which VM-entry was most recently attempted /
1048	int last_vmentry_cpu;
1049
1050	/ AMD MSRC001_0015 Hardware Configuration /
1051	u64 msr_hwcr;
1052
1053	/ pv related cpuid info /
1054	struct {
1055	/*
1056	* value of the eax register in the KVM_CPUID_FEATURES CPUID
1057	* leaf.
1058	*/
1059	u32 features;
1060
1061	/*
1062	* indicates whether pv emulation should be disabled if features
1063	* are not present in the guest's cpuid
1064	*/
1065	bool enforce;
1066	} pv_cpuid;
1067
1068	/ Protected Guests /
1069	bool guest_state_protected;
1070	bool guest_tsc_protected;
1071
1072	/*
1073	* Set when PDPTS were loaded directly by the userspace without
1074	* reading the guest memory
1075	*/
1076	bool pdptrs_from_userspace;
1077
1078	#if IS_ENABLED(CONFIG_HYPERV)
1079	hpa_t hv_root_tdp;
1080	#endif
1081	};
1082
1083	struct kvm_lpage_info {
1084	int disallow_lpage;
1085	};
1086
1087	struct kvm_arch_memory_slot {
1088	struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES];
1089	struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - `1`];
1090	unsigned short *gfn_write_track;
1091	};
1092
1093	/*
1094	* Track the mode of the optimized logical map, as the rules for decoding the
1095	* destination vary per mode. Enabling the optimized logical map requires all
1096	* software-enabled local APIs to be in the same mode, each addressable APIC to
1097	* be mapped to only one MDA, and each MDA to map to at most one APIC.
1098	*/
1099	enum kvm_apic_logical_mode {
1100	/ All local APICs are software disabled. /
1101	KVM_APIC_MODE_SW_DISABLED,
1102	/ All software enabled local APICs in xAPIC cluster addressing mode. /
1103	KVM_APIC_MODE_XAPIC_CLUSTER,
1104	/ All software enabled local APICs in xAPIC flat addressing mode. /
1105	KVM_APIC_MODE_XAPIC_FLAT,
1106	/ All software enabled local APICs in x2APIC mode. /
1107	KVM_APIC_MODE_X2APIC,
1108	/*
1109	* Optimized map disabled, e.g. not all local APICs in the same logical
1110	* mode, same logical ID assigned to multiple APICs, etc.
1111	*/
1112	KVM_APIC_MODE_MAP_DISABLED,
1113	};
1114
1115	struct kvm_apic_map {
1116	struct rcu_head rcu;
1117	enum kvm_apic_logical_mode logical_mode;
1118	u32 max_apic_id;
1119	union {
1120	struct kvm_lapic *xapic_flat_map[`8`];
1121	struct kvm_lapic *xapic_cluster_map[`16`][`4`];
1122	};
1123	struct kvm_lapic *phys_map[];
1124	};
1125
1126	/ Hyper-V synthetic debugger (SynDbg)/
1127	struct kvm_hv_syndbg {
1128	struct {
1129	u64 control;
1130	u64 status;
1131	u64 send_page;
1132	u64 recv_page;
1133	u64 pending_page;
1134	} control;
1135	u64 options;
1136	};
1137
1138	/ Current state of Hyper-V TSC page clocksource /
1139	enum hv_tsc_page_status {
1140	/ TSC page was not set up or disabled /
1141	HV_TSC_PAGE_UNSET = `0`,
1142	/ TSC page MSR was written by the guest, update pending /
1143	HV_TSC_PAGE_GUEST_CHANGED,
1144	/ TSC page update was triggered from the host side /
1145	HV_TSC_PAGE_HOST_CHANGED,
1146	/ TSC page was properly set up and is currently active /
1147	HV_TSC_PAGE_SET,
1148	/ TSC page was set up with an inaccessible GPA /
1149	HV_TSC_PAGE_BROKEN,
1150	};
1151
1152	#ifdef CONFIG_KVM_HYPERV
1153	/ Hyper-V emulation context /
1154	struct kvm_hv {
1155	struct mutex hv_lock;
1156	u64 hv_guest_os_id;
1157	u64 hv_hypercall;
1158	u64 hv_tsc_page;
1159	enum hv_tsc_page_status hv_tsc_page_status;
1160
1161	/ Hyper-v based guest crash (NT kernel bugcheck) parameters /
1162	u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
1163	u64 hv_crash_ctl;
1164
1165	struct ms_hyperv_tsc_page tsc_ref;
1166
1167	struct idr conn_to_evt;
1168
1169	u64 hv_reenlightenment_control;
1170	u64 hv_tsc_emulation_control;
1171	u64 hv_tsc_emulation_status;
1172	u64 hv_invtsc_control;
1173
1174	/ How many vCPUs have VP index != vCPU index /
1175	atomic_t num_mismatched_vp_indexes;
1176
1177	/*
1178	* How many SynICs use 'AutoEOI' feature
1179	* (protected by arch.apicv_update_lock)
1180	*/
1181	unsigned int synic_auto_eoi_used;
1182
1183	struct kvm_hv_syndbg hv_syndbg;
1184
1185	bool xsaves_xsavec_checked;
1186	};
1187	#endif
1188
1189	struct msr_bitmap_range {
1190	u32 flags;
1191	u32 nmsrs;
1192	u32 base;
1193	unsigned long *bitmap;
1194	};
1195
1196	#ifdef CONFIG_KVM_XEN
1197	/ Xen emulation context /
1198	struct kvm_xen {
1199	struct mutex xen_lock;
1200	u32 xen_version;
1201	bool long_mode;
1202	bool runstate_update_flag;
1203	u8 upcall_vector;
1204	struct gfn_to_pfn_cache shinfo_cache;
1205	struct idr evtchn_ports;
1206	unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
1207
1208	struct kvm_xen_hvm_config hvm_config;
1209	};
1210	#endif
1211
1212	enum kvm_irqchip_mode {
1213	KVM_IRQCHIP_NONE,
1214	KVM_IRQCHIP_KERNEL, / created with KVM_CREATE_IRQCHIP /
1215	KVM_IRQCHIP_SPLIT, / created with KVM_CAP_SPLIT_IRQCHIP /
1216	};
1217
1218	struct kvm_x86_msr_filter {
1219	u8 count;
1220	bool default_allow:`1`;
1221	struct msr_bitmap_range ranges[`16`];
1222	};
1223
1224	struct kvm_x86_pmu_event_filter {
1225	__u32 action;
1226	__u32 nevents;
1227	__u32 fixed_counter_bitmap;
1228	__u32 flags;
1229	__u32 nr_includes;
1230	__u32 nr_excludes;
1231	__u64 *includes;
1232	__u64 *excludes;
1233	__u64 events[];
1234	};
1235
1236	enum kvm_apicv_inhibit {
1237
1238	/******************************************************************/
1239	/ INHIBITs that are relevant to both Intel's APICv and AMD's AVIC. /
1240	/******************************************************************/
1241
1242	/*
1243	* APIC acceleration is disabled by a module parameter
1244	* and/or not supported in hardware.
1245	*/
1246	APICV_INHIBIT_REASON_DISABLED,
1247
1248	/*
1249	* APIC acceleration is inhibited because AutoEOI feature is
1250	* being used by a HyperV guest.
1251	*/
1252	APICV_INHIBIT_REASON_HYPERV,
1253
1254	/*
1255	* APIC acceleration is inhibited because the userspace didn't yet
1256	* enable the kernel/split irqchip.
1257	*/
1258	APICV_INHIBIT_REASON_ABSENT,
1259
1260	/ APIC acceleration is inhibited because KVM_GUESTDBG_BLOCKIRQ*
1261	* (out of band, debug measure of blocking all interrupts on this vCPU)
1262	* was enabled, to avoid AVIC/APICv bypassing it.
1263	*/
1264	APICV_INHIBIT_REASON_BLOCKIRQ,
1265
1266	/*
1267	* APICv is disabled because not all vCPUs have a 1:1 mapping between
1268	* APIC ID and vCPU, _and_ KVM is not applying its x2APIC hotplug hack.
1269	*/
1270	APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED,
1271
1272	/*
1273	* For simplicity, the APIC acceleration is inhibited
1274	* first time either APIC ID or APIC base are changed by the guest
1275	* from their reset values.
1276	*/
1277	APICV_INHIBIT_REASON_APIC_ID_MODIFIED,
1278	APICV_INHIBIT_REASON_APIC_BASE_MODIFIED,
1279
1280	/****************************************************/
1281	/ INHIBITs that are relevant only to the AMD's AVIC. /
1282	/****************************************************/
1283
1284	/*
1285	* AVIC is inhibited on a vCPU because it runs a nested guest.
1286	*
1287	* This is needed because unlike APICv, the peers of this vCPU
1288	* cannot use the doorbell mechanism to signal interrupts via AVIC when
1289	* a vCPU runs nested.
1290	*/
1291	APICV_INHIBIT_REASON_NESTED,
1292
1293	/*
1294	* On SVM, the wait for the IRQ window is implemented with pending vIRQ,
1295	* which cannot be injected when the AVIC is enabled, thus AVIC
1296	* is inhibited while KVM waits for IRQ window.
1297	*/
1298	APICV_INHIBIT_REASON_IRQWIN,
1299
1300	/*
1301	* PIT (i8254) 're-inject' mode, relies on EOI intercept,
1302	* which AVIC doesn't support for edge triggered interrupts.
1303	*/
1304	APICV_INHIBIT_REASON_PIT_REINJ,
1305
1306	/*
1307	* AVIC is disabled because SEV doesn't support it.
1308	*/
1309	APICV_INHIBIT_REASON_SEV,
1310
1311	/*
1312	* AVIC is disabled because not all vCPUs with a valid LDR have a 1:1
1313	* mapping between logical ID and vCPU.
1314	*/
1315	APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED,
1316
1317	NR_APICV_INHIBIT_REASONS,
1318	};
1319
1320	#define __APICV_INHIBIT_REASON(reason) \
1321	{ BIT(APICV_INHIBIT_REASON_##reason), #reason }
1322
1323	#define APICV_INHIBIT_REASONS \
1324	__APICV_INHIBIT_REASON(DISABLED), \
1325	__APICV_INHIBIT_REASON(HYPERV), \
1326	__APICV_INHIBIT_REASON(ABSENT), \
1327	__APICV_INHIBIT_REASON(BLOCKIRQ), \
1328	__APICV_INHIBIT_REASON(PHYSICAL_ID_ALIASED), \
1329	__APICV_INHIBIT_REASON(APIC_ID_MODIFIED), \
1330	__APICV_INHIBIT_REASON(APIC_BASE_MODIFIED), \
1331	__APICV_INHIBIT_REASON(NESTED), \
1332	__APICV_INHIBIT_REASON(IRQWIN), \
1333	__APICV_INHIBIT_REASON(PIT_REINJ), \
1334	__APICV_INHIBIT_REASON(SEV), \
1335	__APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED)
1336
1337	struct kvm_arch {
1338	unsigned long n_used_mmu_pages;
1339	unsigned long n_requested_mmu_pages;
1340	unsigned long n_max_mmu_pages;
1341	unsigned int indirect_shadow_pages;
1342	u8 mmu_valid_gen;
1343	u8 vm_type;
1344	bool has_private_mem;
1345	bool has_protected_state;
1346	bool pre_fault_allowed;
1347	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
1348	struct list_head active_mmu_pages;
1349	/*
1350	* A list of kvm_mmu_page structs that, if zapped, could possibly be
1351	* replaced by an NX huge page. A shadow page is on this list if its
1352	* existence disallows an NX huge page (nx_huge_page_disallowed is set)
1353	* and there are no other conditions that prevent a huge page, e.g.
1354	* the backing host page is huge, dirtly logging is not enabled for its
1355	* memslot, etc... Note, zapping shadow pages on this list doesn't
1356	* guarantee an NX huge page will be created in its stead, e.g. if the
1357	* guest attempts to execute from the region then KVM obviously can't
1358	* create an NX huge page (without hanging the guest).
1359	*/
1360	struct list_head possible_nx_huge_pages;
1361	#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
1362	struct kvm_page_track_notifier_head track_notifier_head;
1363	#endif
1364	/*
1365	* Protects marking pages unsync during page faults, as TDP MMU page
1366	* faults only take mmu_lock for read. For simplicity, the unsync
1367	* pages lock is always taken when marking pages unsync regardless of
1368	* whether mmu_lock is held for read or write.
1369	*/
1370	spinlock_t mmu_unsync_pages_lock;
1371
1372	u64 shadow_mmio_value;
1373
1374	#define __KVM_HAVE_ARCH_NONCOHERENT_DMA
1375	atomic_t noncoherent_dma_count;
1376	#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE
1377	atomic_t assigned_device_count;
1378	struct kvm_pic *vpic;
1379	struct kvm_ioapic *vioapic;
1380	struct kvm_pit *vpit;
1381	atomic_t vapics_in_nmi_mode;
1382	struct mutex apic_map_lock;
1383	struct kvm_apic_map __rcu *apic_map;
1384	atomic_t apic_map_dirty;
1385
1386	bool apic_access_memslot_enabled;
1387	bool apic_access_memslot_inhibited;
1388
1389	/ Protects apicv_inhibit_reasons /
1390	struct rw_semaphore apicv_update_lock;
1391	unsigned long apicv_inhibit_reasons;
1392
1393	gpa_t wall_clock;
1394
1395	bool mwait_in_guest;
1396	bool hlt_in_guest;
1397	bool pause_in_guest;
1398	bool cstate_in_guest;
1399
1400	unsigned long irq_sources_bitmap;
1401	s64 kvmclock_offset;
1402
1403	/*
1404	* This also protects nr_vcpus_matched_tsc which is read from a
1405	* preemption-disabled region, so it must be a raw spinlock.
1406	*/
1407	raw_spinlock_t tsc_write_lock;
1408	u64 last_tsc_nsec;
1409	u64 last_tsc_write;
1410	u32 last_tsc_khz;
1411	u64 last_tsc_offset;
1412	u64 cur_tsc_nsec;
1413	u64 cur_tsc_write;
1414	u64 cur_tsc_offset;
1415	u64 cur_tsc_generation;
1416	int nr_vcpus_matched_tsc;
1417
1418	u32 default_tsc_khz;
1419	bool user_set_tsc;
1420	u64 apic_bus_cycle_ns;
1421
1422	seqcount_raw_spinlock_t pvclock_sc;
1423	bool use_master_clock;
1424	u64 master_kernel_ns;
1425	u64 master_cycle_now;
1426	struct delayed_work kvmclock_update_work;
1427	struct delayed_work kvmclock_sync_work;
1428
1429	/ reads protected by irq_srcu, writes by irq_lock /
1430	struct hlist_head mask_notifier_list;
1431
1432	#ifdef CONFIG_KVM_HYPERV
1433	struct kvm_hv hyperv;
1434	#endif
1435
1436	#ifdef CONFIG_KVM_XEN
1437	struct kvm_xen xen;
1438	#endif
1439
1440	bool backwards_tsc_observed;
1441	bool boot_vcpu_runs_old_kvmclock;
1442	u32 bsp_vcpu_id;
1443
1444	u64 disabled_quirks;
1445
1446	enum kvm_irqchip_mode irqchip_mode;
1447	u8 nr_reserved_ioapic_pins;
1448
1449	bool disabled_lapic_found;
1450
1451	bool x2apic_format;
1452	bool x2apic_broadcast_quirk_disabled;
1453
1454	bool guest_can_read_msr_platform_info;
1455	bool exception_payload_enabled;
1456
1457	bool triple_fault_event;
1458
1459	bool bus_lock_detection_enabled;
1460	bool enable_pmu;
1461
1462	u32 notify_window;
1463	u32 notify_vmexit_flags;
1464	/*
1465	* If exit_on_emulation_error is set, and the in-kernel instruction
1466	* emulator fails to emulate an instruction, allow userspace
1467	* the opportunity to look at it.
1468	*/
1469	bool exit_on_emulation_error;
1470
1471	/ Deflect RDMSR and WRMSR to user space when they trigger a #GP /
1472	u32 user_space_msr_mask;
1473	struct kvm_x86_msr_filter __rcu *msr_filter;
1474
1475	u32 hypercall_exit_enabled;
1476
1477	/ Guest can access the SGX PROVISIONKEY. /
1478	bool sgx_provisioning_allowed;
1479
1480	struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter;
1481	struct vhost_task *nx_huge_page_recovery_thread;
1482	u64 nx_huge_page_last;
1483	struct once nx_once;
1484
1485	#ifdef CONFIG_X86_64
1486	#ifdef CONFIG_KVM_PROVE_MMU
1487	/*
1488	* The number of TDP MMU pages across all roots. Used only to sanity
1489	* check that KVM isn't leaking TDP MMU pages.
1490	*/
1491	atomic64_t tdp_mmu_pages;
1492	#endif
1493
1494	/*
1495	* List of struct kvm_mmu_pages being used as roots.
1496	* All struct kvm_mmu_pages in the list should have
1497	* tdp_mmu_page set.
1498	*
1499	* For reads, this list is protected by:
1500	* RCU alone or
1501	* the MMU lock in read mode + RCU or
1502	* the MMU lock in write mode
1503	*
1504	* For writes, this list is protected by tdp_mmu_pages_lock; see
1505	* below for the details.
1506	*
1507	* Roots will remain in the list until their tdp_mmu_root_count
1508	* drops to zero, at which point the thread that decremented the
1509	* count to zero should removed the root from the list and clean
1510	* it up, freeing the root after an RCU grace period.
1511	*/
1512	struct list_head tdp_mmu_roots;
1513
1514	/*
1515	* Protects accesses to the following fields when the MMU lock
1516	* is held in read mode:
1517	* - tdp_mmu_roots (above)
1518	* - the link field of kvm_mmu_page structs used by the TDP MMU
1519	* - possible_nx_huge_pages;
1520	* - the possible_nx_huge_page_link field of kvm_mmu_page structs used
1521	* by the TDP MMU
1522	* Because the lock is only taken within the MMU lock, strictly
1523	* speaking it is redundant to acquire this lock when the thread
1524	* holds the MMU lock in write mode. However it often simplifies
1525	* the code to do so.
1526	*/
1527	spinlock_t tdp_mmu_pages_lock;
1528	#endif /* CONFIG_X86_64 */
1529
1530	/*
1531	* If set, at least one shadow root has been allocated. This flag
1532	* is used as one input when determining whether certain memslot
1533	* related allocations are necessary.
1534	*/
1535	bool shadow_root_allocated;
1536
1537	#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
1538	/*
1539	* If set, the VM has (or had) an external write tracking user, and
1540	* thus all write tracking metadata has been allocated, even if KVM
1541	* itself isn't using write tracking.
1542	*/
1543	bool external_write_tracking_enabled;
1544	#endif
1545
1546	#if IS_ENABLED(CONFIG_HYPERV)
1547	hpa_t hv_root_tdp;
1548	spinlock_t hv_root_tdp_lock;
1549	struct hv_partition_assist_pg *hv_pa_pg;
1550	#endif
1551	/*
1552	* VM-scope maximum vCPU ID. Used to determine the size of structures
1553	* that increase along with the maximum vCPU ID, in which case, using
1554	* the global KVM_MAX_VCPU_IDS may lead to significant memory waste.
1555	*/
1556	u32 max_vcpu_ids;
1557
1558	bool disable_nx_huge_pages;
1559
1560	/*
1561	* Memory caches used to allocate shadow pages when performing eager
1562	* page splitting. No need for a shadowed_info_cache since eager page
1563	* splitting only allocates direct shadow pages.
1564	*
1565	* Protected by kvm->slots_lock.
1566	*/
1567	struct kvm_mmu_memory_cache split_shadow_page_cache;
1568	struct kvm_mmu_memory_cache split_page_header_cache;
1569
1570	/*
1571	* Memory cache used to allocate pte_list_desc structs while splitting
1572	* huge pages. In the worst case, to split one huge page, 512
1573	* pte_list_desc structs are needed to add each lower level leaf sptep
1574	* to the rmap plus 1 to extend the parent_ptes rmap of the lower level
1575	* page table.
1576	*
1577	* Protected by kvm->slots_lock.
1578	*/
1579	#define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1)
1580	struct kvm_mmu_memory_cache split_desc_cache;
1581
1582	gfn_t gfn_direct_bits;
1583
1584	/*
1585	* Size of the CPU's dirty log buffer, i.e. VMX's PML buffer. A Zero
1586	* value indicates CPU dirty logging is unsupported or disabled in
1587	* current VM.
1588	*/
1589	int cpu_dirty_log_size;
1590	};
1591
1592	struct kvm_vm_stat {
1593	struct kvm_vm_stat_generic generic;
1594	u64 mmu_shadow_zapped;
1595	u64 mmu_pte_write;
1596	u64 mmu_pde_zapped;
1597	u64 mmu_flooded;
1598	u64 mmu_recycled;
1599	u64 mmu_cache_miss;
1600	u64 mmu_unsync;
1601	union {
1602	struct {
1603	atomic64_t pages_4k;
1604	atomic64_t pages_2m;
1605	atomic64_t pages_1g;
1606	};
1607	atomic64_t pages[KVM_NR_PAGE_SIZES];
1608	};
1609	u64 nx_lpage_splits;
1610	u64 max_mmu_page_hash_collisions;
1611	u64 max_mmu_rmap_size;
1612	};
1613
1614	struct kvm_vcpu_stat {
1615	struct kvm_vcpu_stat_generic generic;
1616	u64 pf_taken;
1617	u64 pf_fixed;
1618	u64 pf_emulate;
1619	u64 pf_spurious;
1620	u64 pf_fast;
1621	u64 pf_mmio_spte_created;
1622	u64 pf_guest;
1623	u64 tlb_flush;
1624	u64 invlpg;
1625
1626	u64 exits;
1627	u64 io_exits;
1628	u64 mmio_exits;
1629	u64 signal_exits;
1630	u64 irq_window_exits;
1631	u64 nmi_window_exits;
1632	u64 l1d_flush;
1633	u64 halt_exits;
1634	u64 request_irq_exits;
1635	u64 irq_exits;
1636	u64 host_state_reload;
1637	u64 fpu_reload;
1638	u64 insn_emulation;
1639	u64 insn_emulation_fail;
1640	u64 hypercalls;
1641	u64 irq_injections;
1642	u64 nmi_injections;
1643	u64 req_event;
1644	u64 nested_run;
1645	u64 directed_yield_attempted;
1646	u64 directed_yield_successful;
1647	u64 preemption_reported;
1648	u64 preemption_other;
1649	u64 guest_mode;
1650	u64 notify_window_exits;
1651	};
1652
1653	struct x86_instruction_info;
1654
1655	struct msr_data {
1656	bool host_initiated;
1657	u32 index;
1658	u64 data;
1659	};
1660
1661	struct kvm_lapic_irq {
1662	u32 vector;
1663	u16 delivery_mode;
1664	u16 dest_mode;
1665	bool level;
1666	u16 trig_mode;
1667	u32 shorthand;
1668	u32 dest_id;
1669	bool msi_redir_hint;
1670	};
1671
1672	static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical)
1673	{
1674	return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL;
1675	}
1676
1677	struct kvm_x86_ops {
1678	const char *name;
1679
1680	int (check_processor_compatibility)(void*);
1681
1682	int (enable_virtualization_cpu)(void*);
1683	void (disable_virtualization_cpu)(void*);
1684	cpu_emergency_virt_cb *emergency_disable_virtualization_cpu;
1685
1686	void (hardware_unsetup)(void*);
1687	bool (has_emulated_msr)(struct* kvm *kvm, u32 index);
1688	void (vcpu_after_set_cpuid)(struct* kvm_vcpu *vcpu);
1689
1690	unsigned int vm_size;
1691	int (vm_init)(struct* kvm *kvm);
1692	void (vm_destroy)(struct* kvm *kvm);
1693	void (vm_pre_destroy)(struct* kvm *kvm);
1694
1695	/ Create, but do not attach this VCPU /
1696	int (vcpu_precreate)(struct* kvm *kvm);
1697	int (vcpu_create)(struct* kvm_vcpu *vcpu);
1698	void (vcpu_free)(struct* kvm_vcpu *vcpu);
1699	void (vcpu_reset)(struct* kvm_vcpu *vcpu, bool init_event);
1700
1701	void (prepare_switch_to_guest)(struct* kvm_vcpu *vcpu);
1702	void (vcpu_load)(struct* kvm_vcpu vcpu, int* cpu);
1703	void (vcpu_put)(struct* kvm_vcpu *vcpu);
1704
1705	void (update_exception_bitmap)(struct* kvm_vcpu *vcpu);
1706	int (get_msr)(struct* kvm_vcpu vcpu, struct* msr_data *msr);
1707	int (set_msr)(struct* kvm_vcpu vcpu, struct* msr_data *msr);
1708	u64 (get_segment_base)(struct* kvm_vcpu vcpu, int* seg);
1709	void (get_segment)(struct* kvm_vcpu *vcpu,
1710	struct kvm_segment var, int* seg);
1711	int (get_cpl)(struct* kvm_vcpu *vcpu);
1712	int (get_cpl_no_cache)(struct* kvm_vcpu *vcpu);
1713	void (set_segment)(struct* kvm_vcpu *vcpu,
1714	struct kvm_segment var, int* seg);
1715	void (get_cs_db_l_bits)(struct* kvm_vcpu vcpu, int* db, int* *l);
1716	bool (is_valid_cr0)(struct* kvm_vcpu vcpu, unsigned* long cr0);
1717	void (set_cr0)(struct* kvm_vcpu vcpu, unsigned* long cr0);
1718	void (post_set_cr3)(struct* kvm_vcpu vcpu, unsigned* long cr3);
1719	bool (is_valid_cr4)(struct* kvm_vcpu vcpu, unsigned* long cr4);
1720	void (set_cr4)(struct* kvm_vcpu vcpu, unsigned* long cr4);
1721	int (set_efer)(struct* kvm_vcpu *vcpu, u64 efer);
1722	void (get_idt)(struct* kvm_vcpu vcpu, struct* desc_ptr *dt);
1723	void (set_idt)(struct* kvm_vcpu vcpu, struct* desc_ptr *dt);
1724	void (get_gdt)(struct* kvm_vcpu vcpu, struct* desc_ptr *dt);
1725	void (set_gdt)(struct* kvm_vcpu vcpu, struct* desc_ptr *dt);
1726	void (sync_dirty_debug_regs)(struct* kvm_vcpu *vcpu);
1727	void (set_dr6)(struct* kvm_vcpu vcpu, unsigned* long value);
1728	void (set_dr7)(struct* kvm_vcpu vcpu, unsigned* long value);
1729	void (cache_reg)(struct* kvm_vcpu vcpu, enum* kvm_reg reg);
1730	unsigned long (get_rflags)(struct* kvm_vcpu *vcpu);
1731	void (set_rflags)(struct* kvm_vcpu vcpu, unsigned* long rflags);
1732	bool (get_if_flag)(struct* kvm_vcpu *vcpu);
1733
1734	void (flush_tlb_all)(struct* kvm_vcpu *vcpu);
1735	void (flush_tlb_current)(struct* kvm_vcpu *vcpu);
1736	#if IS_ENABLED(CONFIG_HYPERV)
1737	int (flush_remote_tlbs)(struct* kvm *kvm);
1738	int (flush_remote_tlbs_range)(struct* kvm *kvm, gfn_t gfn,
1739	gfn_t nr_pages);
1740	#endif
1741
1742	/*
1743	* Flush any TLB entries associated with the given GVA.
1744	* Does not need to flush GPA->HPA mappings.
1745	* Can potentially get non-canonical addresses through INVLPGs, which
1746	* the implementation may choose to ignore if appropriate.
1747	*/
1748	void (flush_tlb_gva)(struct* kvm_vcpu *vcpu, gva_t addr);
1749
1750	/*
1751	* Flush any TLB entries created by the guest. Like tlb_flush_gva(),
1752	* does not need to flush GPA->HPA mappings.
1753	*/
1754	void (flush_tlb_guest)(struct* kvm_vcpu *vcpu);
1755
1756	int (vcpu_pre_run)(struct* kvm_vcpu *vcpu);
1757	enum exit_fastpath_completion (vcpu_run)(struct* kvm_vcpu *vcpu,
1758	bool force_immediate_exit);
1759	int (handle_exit)(struct* kvm_vcpu *vcpu,
1760	enum exit_fastpath_completion exit_fastpath);
1761	int (skip_emulated_instruction)(struct* kvm_vcpu *vcpu);
1762	void (update_emulated_instruction)(struct* kvm_vcpu *vcpu);
1763	void (set_interrupt_shadow)(struct* kvm_vcpu vcpu, int* mask);
1764	u32 (get_interrupt_shadow)(struct* kvm_vcpu *vcpu);
1765	void (patch_hypercall)(struct* kvm_vcpu *vcpu,
1766	unsigned char *hypercall_addr);
1767	void (inject_irq)(struct* kvm_vcpu *vcpu, bool reinjected);
1768	void (inject_nmi)(struct* kvm_vcpu *vcpu);
1769	void (inject_exception)(struct* kvm_vcpu *vcpu);
1770	void (cancel_injection)(struct* kvm_vcpu *vcpu);
1771	int (interrupt_allowed)(struct* kvm_vcpu *vcpu, bool for_injection);
1772	int (nmi_allowed)(struct* kvm_vcpu *vcpu, bool for_injection);
1773	bool (get_nmi_mask)(struct* kvm_vcpu *vcpu);
1774	void (set_nmi_mask)(struct* kvm_vcpu *vcpu, bool masked);
1775	/ Whether or not a virtual NMI is pending in hardware. /
1776	bool (is_vnmi_pending)(struct* kvm_vcpu *vcpu);
1777	/*
1778	* Attempt to pend a virtual NMI in hardware. Returns %true on success
1779	* to allow using static_call_ret0 as the fallback.
1780	*/
1781	bool (set_vnmi_pending)(struct* kvm_vcpu *vcpu);
1782	void (enable_nmi_window)(struct* kvm_vcpu *vcpu);
1783	void (enable_irq_window)(struct* kvm_vcpu *vcpu);
1784	void (update_cr8_intercept)(struct* kvm_vcpu vcpu, int* tpr, int irr);
1785
1786	const bool x2apic_icr_is_split;
1787	const unsigned long required_apicv_inhibits;
1788	bool allow_apicv_in_x2apic_without_x2apic_virtualization;
1789	void (refresh_apicv_exec_ctrl)(struct* kvm_vcpu *vcpu);
1790	void (hwapic_isr_update)(struct* kvm_vcpu vcpu, int* isr);
1791	void (load_eoi_exitmap)(struct* kvm_vcpu vcpu, u64 eoi_exit_bitmap);
1792	void (set_virtual_apic_mode)(struct* kvm_vcpu *vcpu);
1793	void (set_apic_access_page_addr)(struct* kvm_vcpu *vcpu);
1794	void (deliver_interrupt)(struct* kvm_lapic apic, int* delivery_mode,
1795	int trig_mode, int vector);
1796	int (sync_pir_to_irr)(struct* kvm_vcpu *vcpu);
1797	int (set_tss_addr)(struct* kvm kvm, unsigned* int addr);
1798	int (set_identity_map_addr)(struct* kvm *kvm, u64 ident_addr);
1799	u8 (get_mt_mask)(struct* kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
1800
1801	void (load_mmu_pgd)(struct* kvm_vcpu *vcpu, hpa_t root_hpa,
1802	int root_level);
1803
1804	/ Update external mapping with page table link. /
1805	int (link_external_spt)(struct* kvm kvm, gfn_t gfn, enum* pg_level level,
1806	void *external_spt);
1807	/ Update the external page table from spte getting set. /
1808	int (set_external_spte)(struct* kvm kvm, gfn_t gfn, enum* pg_level level,
1809	kvm_pfn_t pfn_for_gfn);
1810
1811	/ Update external page tables for page table about to be freed. /
1812	int (free_external_spt)(struct* kvm kvm, gfn_t gfn, enum* pg_level level,
1813	void *external_spt);
1814
1815	/ Update external page table from spte getting removed, and flush TLB. /
1816	int (remove_external_spte)(struct* kvm kvm, gfn_t gfn, enum* pg_level level,
1817	kvm_pfn_t pfn_for_gfn);
1818
1819	bool (has_wbinvd_exit)(void*);
1820
1821	u64 (get_l2_tsc_offset)(struct* kvm_vcpu *vcpu);
1822	u64 (get_l2_tsc_multiplier)(struct* kvm_vcpu *vcpu);
1823	void (write_tsc_offset)(struct* kvm_vcpu *vcpu);
1824	void (write_tsc_multiplier)(struct* kvm_vcpu *vcpu);
1825
1826	/*
1827	* Retrieve somewhat arbitrary exit/entry information. Intended to
1828	* be used only from within tracepoints or error paths.
1829	*/
1830	void (get_exit_info)(struct* kvm_vcpu vcpu, u32 reason,
1831	u64 info1, u64 info2,
1832	u32 intr_info, u32 error_code);
1833
1834	void (get_entry_info)(struct* kvm_vcpu *vcpu,
1835	u32 intr_info, u32 error_code);
1836
1837	int (check_intercept)(struct* kvm_vcpu *vcpu,
1838	struct x86_instruction_info *info,
1839	enum x86_intercept_stage stage,
1840	struct x86_exception *exception);
1841	void (handle_exit_irqoff)(struct* kvm_vcpu *vcpu);
1842
1843	void (update_cpu_dirty_logging)(struct* kvm_vcpu *vcpu);
1844
1845	const struct kvm_x86_nested_ops *nested_ops;
1846
1847	void (vcpu_blocking)(struct* kvm_vcpu *vcpu);
1848	void (vcpu_unblocking)(struct* kvm_vcpu *vcpu);
1849
1850	int (pi_update_irte)(struct* kvm kvm, unsigned* int host_irq,
1851	uint32_t guest_irq, bool set);
1852	void (pi_start_assignment)(struct* kvm *kvm);
1853	void (apicv_pre_state_restore)(struct* kvm_vcpu *vcpu);
1854	void (apicv_post_state_restore)(struct* kvm_vcpu *vcpu);
1855	bool (dy_apicv_has_pending_interrupt)(struct* kvm_vcpu *vcpu);
1856	bool (protected_apic_has_interrupt)(struct* kvm_vcpu *vcpu);
1857
1858	int (set_hv_timer)(struct* kvm_vcpu *vcpu, u64 guest_deadline_tsc,
1859	bool *expired);
1860	void (cancel_hv_timer)(struct* kvm_vcpu *vcpu);
1861
1862	void (setup_mce)(struct* kvm_vcpu *vcpu);
1863
1864	#ifdef CONFIG_KVM_SMM
1865	int (smi_allowed)(struct* kvm_vcpu *vcpu, bool for_injection);
1866	int (enter_smm)(struct* kvm_vcpu vcpu, union* kvm_smram *smram);
1867	int (leave_smm)(struct* kvm_vcpu vcpu, const* union kvm_smram *smram);
1868	void (enable_smi_window)(struct* kvm_vcpu *vcpu);
1869	#endif
1870
1871	int (dev_get_attr)(u32 group, u64 attr, u64 val);
1872	int (mem_enc_ioctl)(struct* kvm kvm, void* __user *argp);
1873	int (vcpu_mem_enc_ioctl)(struct* kvm_vcpu vcpu, void* __user *argp);
1874	int (mem_enc_register_region)(struct* kvm kvm, struct* kvm_enc_region *argp);
1875	int (mem_enc_unregister_region)(struct* kvm kvm, struct* kvm_enc_region *argp);
1876	int (vm_copy_enc_context_from)(struct* kvm kvm, unsigned* int source_fd);
1877	int (vm_move_enc_context_from)(struct* kvm kvm, unsigned* int source_fd);
1878	void (guest_memory_reclaimed)(struct* kvm *kvm);
1879
1880	int (get_feature_msr)(u32 msr, u64 data);
1881
1882	int (check_emulate_instruction)(struct* kvm_vcpu vcpu, int* emul_type,
1883	void insn, int* insn_len);
1884
1885	bool (apic_init_signal_blocked)(struct* kvm_vcpu *vcpu);
1886	int (enable_l2_tlb_flush)(struct* kvm_vcpu *vcpu);
1887
1888	void (migrate_timers)(struct* kvm_vcpu *vcpu);
1889	void (msr_filter_changed)(struct* kvm_vcpu *vcpu);
1890	int (complete_emulated_msr)(struct* kvm_vcpu vcpu, int* err);
1891
1892	void (vcpu_deliver_sipi_vector)(struct* kvm_vcpu *vcpu, u8 vector);
1893
1894	/*
1895	* Returns vCPU specific APICv inhibit reasons
1896	*/
1897	unsigned long (vcpu_get_apicv_inhibit_reasons)(struct* kvm_vcpu *vcpu);
1898
1899	gva_t (get_untagged_addr)(struct* kvm_vcpu vcpu, gva_t gva, unsigned* int flags);
1900	void (alloc_apic_backing_page)(struct kvm_vcpu *vcpu);
1901	int (gmem_prepare)(struct* kvm kvm, kvm_pfn_t pfn, gfn_t gfn, int* max_order);
1902	void (*gmem_invalidate)(kvm_pfn_t start, kvm_pfn_t end);
1903	int (private_max_mapping_level)(struct* kvm *kvm, kvm_pfn_t pfn);
1904	};
1905
1906	struct kvm_x86_nested_ops {
1907	void (leave_nested)(struct* kvm_vcpu *vcpu);
1908	bool (is_exception_vmexit)(struct* kvm_vcpu *vcpu, u8 vector,
1909	u32 error_code);
1910	int (check_events)(struct* kvm_vcpu *vcpu);
1911	bool (has_events)(struct* kvm_vcpu *vcpu, bool for_injection);
1912	void (triple_fault)(struct* kvm_vcpu *vcpu);
1913	int (get_state)(struct* kvm_vcpu *vcpu,
1914	struct kvm_nested_state __user *user_kvm_nested_state,
1915	unsigned user_data_size);
1916	int (set_state)(struct* kvm_vcpu *vcpu,
1917	struct kvm_nested_state __user *user_kvm_nested_state,
1918	struct kvm_nested_state *kvm_state);
1919	bool (get_nested_state_pages)(struct* kvm_vcpu *vcpu);
1920	int (write_log_dirty)(struct* kvm_vcpu *vcpu, gpa_t l2_gpa);
1921
1922	int (enable_evmcs)(struct* kvm_vcpu *vcpu,
1923	uint16_t *vmcs_version);
1924	uint16_t (get_evmcs_version)(struct* kvm_vcpu *vcpu);
1925	void (hv_inject_synthetic_vmexit_post_tlb_flush)(struct* kvm_vcpu *vcpu);
1926	};
1927
1928	struct kvm_x86_init_ops {
1929	int (hardware_setup)(void*);
1930	unsigned int (handle_intel_pt_intr)(void*);
1931
1932	struct kvm_x86_ops *runtime_ops;
1933	struct kvm_pmu_ops *pmu_ops;
1934	};
1935
1936	struct kvm_arch_async_pf {
1937	u32 token;
1938	gfn_t gfn;
1939	unsigned long cr3;
1940	bool direct_map;
1941	u64 error_code;
1942	};
1943
1944	extern u32 __read_mostly kvm_nr_uret_msrs;
1945	extern bool __read_mostly allow_smaller_maxphyaddr;
1946	extern bool __read_mostly enable_apicv;
1947	extern bool __read_mostly enable_device_posted_irqs;
1948	extern struct kvm_x86_ops kvm_x86_ops;
1949
1950	#define kvm_x86_call(func) static_call(kvm_x86_##func)
1951	#define kvm_pmu_call(func) static_call(kvm_x86_pmu_##func)
1952
1953	#define KVM_X86_OP(func) \
1954	DECLARE_STATIC_CALL(kvm_x86_##func, (((struct kvm_x86_ops )0)->func));
1955	#define KVM_X86_OP_OPTIONAL KVM_X86_OP
1956	#define KVM_X86_OP_OPTIONAL_RET0 KVM_X86_OP
1957	#include <asm/kvm-x86-ops.h>
1958
1959	int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops);
1960	void kvm_x86_vendor_exit(void);
1961
1962	#define __KVM_HAVE_ARCH_VM_ALLOC
1963	static inline struct kvm kvm_arch_alloc_vm(void*)
1964	{
1965	return __vmalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT \| __GFP_ZERO);
1966	}
1967
1968	#define __KVM_HAVE_ARCH_VM_FREE
1969	void kvm_arch_free_vm(struct kvm *kvm);
1970
1971	#if IS_ENABLED(CONFIG_HYPERV)
1972	#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
1973	static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
1974	{
1975	if (kvm_x86_ops.flush_remote_tlbs &&
1976	!kvm_x86_call(flush_remote_tlbs)(kvm))
1977	return `0`;
1978	else
1979	return -ENOTSUPP;
1980	}
1981
1982	#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
1983	static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn,
1984	u64 nr_pages)
1985	{
1986	if (!kvm_x86_ops.flush_remote_tlbs_range)
1987	return -EOPNOTSUPP;
1988
1989	return kvm_x86_call(flush_remote_tlbs_range)(kvm, gfn, nr_pages);
1990	}
1991	#endif /* CONFIG_HYPERV */
1992
1993	enum kvm_intr_type {
1994	/ Values are arbitrary, but must be non-zero. /
1995	KVM_HANDLING_IRQ = `1`,
1996	KVM_HANDLING_NMI,
1997	};
1998
1999	/ Enable perf NMI and timer modes to work, and minimise false positives. /
2000	#define kvm_arch_pmi_in_guest(vcpu) \
2001	((vcpu) && (vcpu)->arch.handling_intr_from_guest && \
2002	(!!in_nmi() == ((vcpu)->arch.handling_intr_from_guest == KVM_HANDLING_NMI)))
2003
2004	void __init kvm_mmu_x86_module_init(void);
2005	int kvm_mmu_vendor_module_init(void);
2006	void kvm_mmu_vendor_module_exit(void);
2007
2008	void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
2009	int kvm_mmu_create(struct kvm_vcpu *vcpu);
2010	void kvm_mmu_init_vm(struct kvm *kvm);
2011	void kvm_mmu_uninit_vm(struct kvm *kvm);
2012
2013	void kvm_mmu_init_memslot_memory_attributes(struct kvm *kvm,
2014	struct kvm_memory_slot *slot);
2015
2016	void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu);
2017	void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
2018	void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
2019	const struct kvm_memory_slot *memslot,
2020	int start_level);
2021	void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm,
2022	const struct kvm_memory_slot *memslot,
2023	int target_level);
2024	void kvm_mmu_try_split_huge_pages(struct kvm *kvm,
2025	const struct kvm_memory_slot *memslot,
2026	u64 start, u64 end,
2027	int target_level);
2028	void kvm_mmu_recover_huge_pages(struct kvm *kvm,
2029	const struct kvm_memory_slot *memslot);
2030	void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
2031	const struct kvm_memory_slot *memslot);
2032	void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
2033	void kvm_mmu_change_mmu_pages(struct kvm kvm, unsigned* long kvm_nr_mmu_pages);
2034	void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
2035
2036	int load_pdptrs(struct kvm_vcpu vcpu, unsigned* long cr3);
2037
2038	int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
2039	const void val, int* bytes);
2040
2041	struct kvm_irq_mask_notifier {
2042	void (func)(struct* kvm_irq_mask_notifier *kimn, bool masked);
2043	int irq;
2044	struct hlist_node link;
2045	};
2046
2047	void kvm_register_irq_mask_notifier(struct kvm kvm, int* irq,
2048	struct kvm_irq_mask_notifier *kimn);
2049	void kvm_unregister_irq_mask_notifier(struct kvm kvm, int* irq,
2050	struct kvm_irq_mask_notifier *kimn);
2051	void kvm_fire_mask_notifiers(struct kvm kvm, unsigned* irqchip, unsigned pin,
2052	bool mask);
2053
2054	extern bool tdp_enabled;
2055
2056	u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
2057
2058	/*
2059	* EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing
2060	* userspace I/O) to indicate that the emulation context
2061	* should be reused as is, i.e. skip initialization of
2062	* emulation context, instruction fetch and decode.
2063	*
2064	* EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware.
2065	* Indicates that only select instructions (tagged with
2066	* EmulateOnUD) should be emulated (to minimize the emulator
2067	* attack surface). See also EMULTYPE_TRAP_UD_FORCED.
2068	*
2069	* EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to
2070	* decode the instruction length. For use only by
2071	* kvm_x86_ops.skip_emulated_instruction() implementations if
2072	* EMULTYPE_COMPLETE_USER_EXIT is not set.
2073	*
2074	* EMULTYPE_ALLOW_RETRY_PF - Set when the emulator should resume the guest to
2075	* retry native execution under certain conditions,
2076	* Can only be set in conjunction with EMULTYPE_PF.
2077	*
2078	* EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was
2079	* triggered by KVM's magic "force emulation" prefix,
2080	* which is opt in via module param (off by default).
2081	* Bypasses EmulateOnUD restriction despite emulating
2082	* due to an intercepted #UD (see EMULTYPE_TRAP_UD).
2083	* Used to test the full emulator from userspace.
2084	*
2085	* EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware
2086	* backdoor emulation, which is opt in via module param.
2087	* VMware backdoor emulation handles select instructions
2088	* and reinjects the #GP for all other cases.
2089	*
2090	* EMULTYPE_PF - Set when an intercepted #PF triggers the emulation, in which case
2091	* the CR2/GPA value pass on the stack is valid.
2092	*
2093	* EMULTYPE_COMPLETE_USER_EXIT - Set when the emulator should update interruptibility
2094	* state and inject single-step #DBs after skipping
2095	* an instruction (after completing userspace I/O).
2096	*
2097	* EMULTYPE_WRITE_PF_TO_SP - Set when emulating an intercepted page fault that
2098	* is attempting to write a gfn that contains one or
2099	* more of the PTEs used to translate the write itself,
2100	* and the owning page table is being shadowed by KVM.
2101	* If emulation of the faulting instruction fails and
2102	* this flag is set, KVM will exit to userspace instead
2103	* of retrying emulation as KVM cannot make forward
2104	* progress.
2105	*
2106	* If emulation fails for a write to guest page tables,
2107	* KVM unprotects (zaps) the shadow page for the target
2108	* gfn and resumes the guest to retry the non-emulatable
2109	* instruction (on hardware). Unprotecting the gfn
2110	* doesn't allow forward progress for a self-changing
2111	* access because doing so also zaps the translation for
2112	* the gfn, i.e. retrying the instruction will hit a
2113	* !PRESENT fault, which results in a new shadow page
2114	* and sends KVM back to square one.
2115	*/
2116	#define EMULTYPE_NO_DECODE (1 << 0)
2117	#define EMULTYPE_TRAP_UD (1 << 1)
2118	#define EMULTYPE_SKIP (1 << 2)
2119	#define EMULTYPE_ALLOW_RETRY_PF (1 << 3)
2120	#define EMULTYPE_TRAP_UD_FORCED (1 << 4)
2121	#define EMULTYPE_VMWARE_GP (1 << 5)
2122	#define EMULTYPE_PF (1 << 6)
2123	#define EMULTYPE_COMPLETE_USER_EXIT (1 << 7)
2124	#define EMULTYPE_WRITE_PF_TO_SP (1 << 8)
2125
2126	static inline bool kvm_can_emulate_event_vectoring(int emul_type)
2127	{
2128	return !(emul_type & EMULTYPE_PF);
2129	}
2130
2131	int kvm_emulate_instruction(struct kvm_vcpu vcpu, int* emulation_type);
2132	int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
2133	void insn, int* insn_len);
2134	void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu,
2135	u64 *data, u8 ndata);
2136	void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu);
2137
2138	void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa);
2139
2140	void kvm_enable_efer_bits(u64);
2141	bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
2142	int kvm_get_msr_with_filter(struct kvm_vcpu vcpu, u32 index, u64 data);
2143	int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data);
2144	int __kvm_get_msr(struct kvm_vcpu vcpu, u32 index, u64 data, bool host_initiated);
2145	int kvm_get_msr(struct kvm_vcpu vcpu, u32 index, u64 data);
2146	int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data);
2147	int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu);
2148	int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu);
2149	int kvm_emulate_as_nop(struct kvm_vcpu *vcpu);
2150	int kvm_emulate_invd(struct kvm_vcpu *vcpu);
2151	int kvm_emulate_mwait(struct kvm_vcpu *vcpu);
2152	int kvm_handle_invalid_op(struct kvm_vcpu *vcpu);
2153	int kvm_emulate_monitor(struct kvm_vcpu *vcpu);
2154
2155	int kvm_fast_pio(struct kvm_vcpu vcpu, int* size, unsigned short port, int in);
2156	int kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
2157	int kvm_emulate_halt(struct kvm_vcpu *vcpu);
2158	int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu);
2159	int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu);
2160	int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
2161
2162	void kvm_get_segment(struct kvm_vcpu vcpu, struct* kvm_segment var, int* seg);
2163	void kvm_set_segment(struct kvm_vcpu vcpu, struct* kvm_segment var, int* seg);
2164	int kvm_load_segment_descriptor(struct kvm_vcpu vcpu, u16 selector, int* seg);
2165	void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
2166
2167	int kvm_task_switch(struct kvm_vcpu vcpu, u16 tss_selector, int* idt_index,
2168	int reason, bool has_error_code, u32 error_code);
2169
2170	void kvm_post_set_cr0(struct kvm_vcpu vcpu, unsigned* long old_cr0, unsigned long cr0);
2171	void kvm_post_set_cr4(struct kvm_vcpu vcpu, unsigned* long old_cr4, unsigned long cr4);
2172	int kvm_set_cr0(struct kvm_vcpu vcpu, unsigned* long cr0);
2173	int kvm_set_cr3(struct kvm_vcpu vcpu, unsigned* long cr3);
2174	int kvm_set_cr4(struct kvm_vcpu vcpu, unsigned* long cr4);
2175	int kvm_set_cr8(struct kvm_vcpu vcpu, unsigned* long cr8);
2176	int kvm_set_dr(struct kvm_vcpu vcpu, int* dr, unsigned long val);
2177	unsigned long kvm_get_dr(struct kvm_vcpu vcpu, int* dr);
2178	unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu);
2179	void kvm_lmsw(struct kvm_vcpu vcpu, unsigned* long msw);
2180	int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu);
2181
2182	int kvm_get_msr_common(struct kvm_vcpu vcpu, struct* msr_data *msr);
2183	int kvm_set_msr_common(struct kvm_vcpu vcpu, struct* msr_data *msr);
2184
2185	unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
2186	void kvm_set_rflags(struct kvm_vcpu vcpu, unsigned* long rflags);
2187	int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu);
2188
2189	void kvm_queue_exception(struct kvm_vcpu vcpu, unsigned* nr);
2190	void kvm_queue_exception_e(struct kvm_vcpu vcpu, unsigned* nr, u32 error_code);
2191	void kvm_queue_exception_p(struct kvm_vcpu vcpu, unsigned* nr, unsigned long payload);
2192	void kvm_requeue_exception(struct kvm_vcpu vcpu, unsigned* int nr,
2193	bool has_error_code, u32 error_code);
2194	void kvm_inject_page_fault(struct kvm_vcpu vcpu, struct* x86_exception *fault);
2195	void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
2196	struct x86_exception *fault);
2197	bool kvm_require_cpl(struct kvm_vcpu vcpu, int* required_cpl);
2198	bool kvm_require_dr(struct kvm_vcpu vcpu, int* dr);
2199
2200	static inline int __kvm_irq_line_state(unsigned long *irq_state,
2201	int irq_source_id, int level)
2202	{
2203	/ Logical OR for level trig interrupt /
2204	if (level)
2205	__set_bit(irq_source_id, irq_state);
2206	else
2207	__clear_bit(irq_source_id, irq_state);
2208
2209	return !!(*irq_state);
2210	}
2211
2212	int kvm_pic_set_irq(struct kvm_pic pic, int* irq, int irq_source_id, int level);
2213	void kvm_pic_clear_all(struct kvm_pic pic, int* irq_source_id);
2214
2215	void kvm_inject_nmi(struct kvm_vcpu *vcpu);
2216	int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu);
2217
2218	void kvm_update_dr7(struct kvm_vcpu *vcpu);
2219
2220	bool __kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
2221	bool always_retry);
2222
2223	static inline bool kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu,
2224	gpa_t cr2_or_gpa)
2225	{
2226	return __kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa, always_retry: false);
2227	}
2228
2229	void kvm_mmu_free_roots(struct kvm kvm, struct* kvm_mmu *mmu,
2230	ulong roots_to_free);
2231	void kvm_mmu_free_guest_mode_roots(struct kvm kvm, struct* kvm_mmu *mmu);
2232	gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
2233	struct x86_exception *exception);
2234	gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
2235	struct x86_exception *exception);
2236	gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
2237	struct x86_exception *exception);
2238
2239	bool kvm_apicv_activated(struct kvm *kvm);
2240	bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu);
2241	void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
2242	void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
2243	enum kvm_apicv_inhibit reason, bool set);
2244	void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
2245	enum kvm_apicv_inhibit reason, bool set);
2246
2247	static inline void kvm_set_apicv_inhibit(struct kvm *kvm,
2248	enum kvm_apicv_inhibit reason)
2249	{
2250	kvm_set_or_clear_apicv_inhibit(kvm, reason, set: true);
2251	}
2252
2253	static inline void kvm_clear_apicv_inhibit(struct kvm *kvm,
2254	enum kvm_apicv_inhibit reason)
2255	{
2256	kvm_set_or_clear_apicv_inhibit(kvm, reason, set: false);
2257	}
2258
2259	int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
2260	void insn, int* insn_len);
2261	void kvm_mmu_print_sptes(struct kvm_vcpu vcpu, gpa_t gpa, const* char *msg);
2262	void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
2263	void kvm_mmu_invalidate_addr(struct kvm_vcpu vcpu, struct* kvm_mmu *mmu,
2264	u64 addr, unsigned long roots);
2265	void kvm_mmu_invpcid_gva(struct kvm_vcpu vcpu, gva_t gva, unsigned* long pcid);
2266	void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd);
2267
2268	void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
2269	int tdp_max_root_level, int tdp_huge_page_level);
2270
2271
2272	#ifdef CONFIG_KVM_PRIVATE_MEM
2273	#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem)
2274	#else
2275	#define kvm_arch_has_private_mem(kvm) false
2276	#endif
2277
2278	#define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state)
2279
2280	static inline u16 kvm_read_ldt(void)
2281	{
2282	u16 ldt;
2283	asm("sldt %0" : "=g"(ldt));
2284	return ldt;
2285	}
2286
2287	static inline void kvm_load_ldt(u16 sel)
2288	{
2289	asm("lldt %0" : : "rm"(sel));
2290	}
2291
2292	#ifdef CONFIG_X86_64
2293	static inline unsigned long read_msr(unsigned long msr)
2294	{
2295	u64 value;
2296
2297	rdmsrq(msr, value);
2298	return value;
2299	}
2300	#endif
2301
2302	static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
2303	{
2304	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
2305	}
2306
2307	#define TSS_IOPB_BASE_OFFSET 0x66
2308	#define TSS_BASE_SIZE 0x68
2309	#define TSS_IOPB_SIZE (65536 / 8)
2310	#define TSS_REDIRECTION_SIZE (256 / 8)
2311	#define RMODE_TSS_SIZE \
2312	(TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1)
2313
2314	enum {
2315	TASK_SWITCH_CALL = `0`,
2316	TASK_SWITCH_IRET = `1`,
2317	TASK_SWITCH_JMP = `2`,
2318	TASK_SWITCH_GATE = `3`,
2319	};
2320
2321	#define HF_GUEST_MASK (1 << 0) /* VCPU is in guest-mode */
2322
2323	#ifdef CONFIG_KVM_SMM
2324	#define HF_SMM_MASK (1 << 1)
2325	#define HF_SMM_INSIDE_NMI_MASK (1 << 2)
2326
2327	# define KVM_MAX_NR_ADDRESS_SPACES 2
2328	/ SMM is currently unsupported for guests with private memory. /
2329	# define kvm_arch_nr_memslot_as_ids(kvm) (kvm_arch_has_private_mem(kvm) ? 1 : 2)
2330	# define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
2331	# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
2332	#else
2333	# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, 0)
2334	#endif
2335
2336	int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
2337	int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
2338	int kvm_cpu_has_extint(struct kvm_vcpu *v);
2339	int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
2340	int kvm_cpu_get_extint(struct kvm_vcpu *v);
2341	int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
2342	void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
2343
2344	int kvm_pv_send_ipi(struct kvm kvm, unsigned* long ipi_bitmap_low,
2345	unsigned long ipi_bitmap_high, u32 min,
2346	unsigned long icr, int op_64_bit);
2347
2348	int kvm_add_user_return_msr(u32 msr);
2349	int kvm_find_user_return_msr(u32 msr);
2350	int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
2351	void kvm_user_return_msr_update_cache(unsigned int index, u64 val);
2352
2353	static inline bool kvm_is_supported_user_return_msr(u32 msr)
2354	{
2355	return kvm_find_user_return_msr(msr) >= `0`;
2356	}
2357
2358	u64 kvm_scale_tsc(u64 tsc, u64 ratio);
2359	u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
2360	u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier);
2361	u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier);
2362
2363	unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
2364	bool kvm_is_linear_rip(struct kvm_vcpu vcpu, unsigned* long linear_rip);
2365
2366	void kvm_make_scan_ioapic_request(struct kvm *kvm);
2367	void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
2368	unsigned long *vcpu_bitmap);
2369
2370	bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2371	struct kvm_async_pf *work);
2372	void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2373	struct kvm_async_pf *work);
2374	void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2375	struct kvm_async_pf *work);
2376	void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu);
2377	bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu);
2378	extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
2379
2380	int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
2381	int kvm_complete_insn_gp(struct kvm_vcpu vcpu, int* err);
2382
2383	void __user __x86_set_memory_region(struct* kvm kvm, int* id, gpa_t gpa,
2384	u32 size);
2385	bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
2386	bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
2387
2388	bool kvm_intr_is_single_vcpu(struct kvm kvm, struct* kvm_lapic_irq *irq,
2389	struct kvm_vcpu **dest_vcpu);
2390
2391	void kvm_set_msi_irq(struct kvm kvm, struct* kvm_kernel_irq_routing_entry *e,
2392	struct kvm_lapic_irq *irq);
2393
2394	static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
2395	{
2396	/ We can only post Fixed and LowPrio IRQs /
2397	return (irq->delivery_mode == APIC_DM_FIXED \|\|
2398	irq->delivery_mode == APIC_DM_LOWEST);
2399	}
2400
2401	static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
2402	{
2403	kvm_x86_call(vcpu_blocking)(vcpu);
2404	}
2405
2406	static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
2407	{
2408	kvm_x86_call(vcpu_unblocking)(vcpu);
2409	}
2410
2411	static inline int kvm_cpu_get_apicid(int mps_cpu)
2412	{
2413	#ifdef CONFIG_X86_LOCAL_APIC
2414	return default_cpu_present_to_apicid(mps_cpu);
2415	#else
2416	WARN_ON_ONCE(`1`);
2417	return BAD_APICID;
2418	#endif
2419	}
2420
2421	int memslot_rmap_alloc(struct kvm_memory_slot slot, unsigned* long npages);
2422
2423	#define KVM_CLOCK_VALID_FLAGS \
2424	(KVM_CLOCK_TSC_STABLE \| KVM_CLOCK_REALTIME \| KVM_CLOCK_HOST_TSC)
2425
2426	#define KVM_X86_VALID_QUIRKS \
2427	(KVM_X86_QUIRK_LINT0_REENABLED \| \
2428	KVM_X86_QUIRK_CD_NW_CLEARED \| \
2429	KVM_X86_QUIRK_LAPIC_MMIO_HOLE \| \
2430	KVM_X86_QUIRK_OUT_7E_INC_RIP \| \
2431	KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT \| \
2432	KVM_X86_QUIRK_FIX_HYPERCALL_INSN \| \
2433	KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS \| \
2434	KVM_X86_QUIRK_SLOT_ZAP_ALL \| \
2435	KVM_X86_QUIRK_STUFF_FEATURE_MSRS \| \
2436	KVM_X86_QUIRK_IGNORE_GUEST_PAT)
2437
2438	#define KVM_X86_CONDITIONAL_QUIRKS \
2439	(KVM_X86_QUIRK_CD_NW_CLEARED \| \
2440	KVM_X86_QUIRK_IGNORE_GUEST_PAT)
2441
2442	/*
2443	* KVM previously used a u32 field in kvm_run to indicate the hypercall was
2444	* initiated from long mode. KVM now sets bit 0 to indicate long mode, but the
2445	* remaining 31 lower bits must be 0 to preserve ABI.
2446	*/
2447	#define KVM_EXIT_HYPERCALL_MBZ GENMASK_ULL(31, 1)
2448
2449	static inline bool kvm_arch_has_irq_bypass(void)
2450	{
2451	return enable_device_posted_irqs;
2452	}
2453
2454	#endif /* _ASM_X86_KVM_HOST_H */
2455

Provided by KDAB

Definitions

kvm_reg
exit_fastpath_completion
kvm_mmu_page_role
kvm_mmu_extended_role
kvm_cpu_role
kvm_rmap_head
kvm_pio_request
rsvd_bits_validate
kvm_mmu_root_info
kvm_mmu
pmc_type
kvm_pmc
kvm_pmu
kvm_mtrr
kvm_vcpu_hv_stimer
kvm_vcpu_hv_synic
hv_tlb_flush_fifos
kvm_vcpu_hv_tlb_flush_fifo
kvm_vcpu_hv
kvm_hypervisor_cpuid
kvm_vcpu_xen
kvm_queued_exception
kvm_only_cpuid_leafs
kvm_vcpu_arch
kvm_queued_interrupt
kvm_lpage_info
kvm_arch_memory_slot
kvm_apic_logical_mode
kvm_apic_map
kvm_hv_syndbg
hv_tsc_page_status
kvm_hv
msr_bitmap_range
kvm_xen
kvm_irqchip_mode
kvm_x86_msr_filter
kvm_x86_pmu_event_filter
kvm_apicv_inhibit
kvm_arch
kvm_vm_stat
kvm_vcpu_stat
msr_data
kvm_lapic_irq
kvm_lapic_irq_dest_mode
kvm_x86_ops
kvm_x86_nested_ops
kvm_x86_init_ops
kvm_arch_async_pf
kvm_arch_alloc_vm
kvm_arch_flush_remote_tlbs
kvm_arch_flush_remote_tlbs_range
kvm_intr_type
kvm_irq_mask_notifier
kvm_can_emulate_event_vectoring
__kvm_irq_line_state
kvm_mmu_unprotect_gfn_and_retry
kvm_set_apicv_inhibit
kvm_clear_apicv_inhibit
kvm_read_ldt
kvm_load_ldt
read_msr
kvm_inject_gp
kvm_is_supported_user_return_msr
kvm_irq_is_postable
kvm_arch_vcpu_blocking
kvm_arch_vcpu_unblocking
kvm_cpu_get_apicid

Improve your Profiling and Debugging skills

Find out more

Definitions

source code of linux/arch/x86/include/asm/kvm_host.h