From e7e80b66fb242a63cdfc3d3534cff62a5e293185 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 5 Jun 2024 16:19:09 -0700 Subject: [PATCH 01/10] x86/cpu: KVM: Add common defines for architectural memory types (PAT, MTRRs, etc.) Add defines for the architectural memory types that can be shoved into various MSRs and registers, e.g. MTRRs, PAT, VMX capabilities MSRs, EPTPs, etc. While most MSRs/registers support only a subset of all memory types, the values themselves are architectural and identical across all users. Leave the goofy MTRR_TYPE_* definitions as-is since they are in a uapi header, but add compile-time assertions to connect the dots (and sanity check that the msr-index.h values didn't get fat-fingered). Keep the VMX_EPTP_MT_* defines so that it's slightly more obvious that the EPTP holds a single memory type in 3 of its 64 bits; those bits just happen to be 2:0, i.e. don't need to be shifted. Opportunistically use X86_MEMTYPE_WB instead of an open coded '6' in setup_vmcs_config(). No functional change intended. Reviewed-by: Thomas Gleixner Acked-by: Kai Huang Reviewed-by: Xiaoyao Li Reviewed-by: Kai Huang Link: https://lore.kernel.org/r/20240605231918.2915961-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/msr-index.h | 15 ++++++++++++++- arch/x86/include/asm/vmx.h | 5 +++-- arch/x86/kernel/cpu/mtrr/mtrr.c | 6 ++++++ arch/x86/kvm/vmx/nested.c | 2 +- arch/x86/kvm/vmx/vmx.c | 2 +- arch/x86/mm/pat/memtype.c | 33 ++++++++++++-------------------- 6 files changed, 37 insertions(+), 26 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 82c6a4d350e0..bb43a07616e6 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -36,6 +36,20 @@ #define EFER_FFXSR (1<<_EFER_FFXSR) #define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS) +/* + * Architectural memory types that are common to MTRRs, PAT, VMX MSRs, etc. + * Most MSRs support/allow only a subset of memory types, but the values + * themselves are common across all relevant MSRs. + */ +#define X86_MEMTYPE_UC 0ull /* Uncacheable, a.k.a. Strong Uncacheable */ +#define X86_MEMTYPE_WC 1ull /* Write Combining */ +/* RESERVED 2 */ +/* RESERVED 3 */ +#define X86_MEMTYPE_WT 4ull /* Write Through */ +#define X86_MEMTYPE_WP 5ull /* Write Protected */ +#define X86_MEMTYPE_WB 6ull /* Write Back */ +#define X86_MEMTYPE_UC_MINUS 7ull /* Weak Uncacheabled (PAT only) */ + /* FRED MSRs */ #define MSR_IA32_FRED_RSP0 0x1cc /* Level 0 stack pointer */ #define MSR_IA32_FRED_RSP1 0x1cd /* Level 1 stack pointer */ @@ -1163,7 +1177,6 @@ #define VMX_BASIC_64 0x0001000000000000LLU #define VMX_BASIC_MEM_TYPE_SHIFT 50 #define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU -#define VMX_BASIC_MEM_TYPE_WB 6LLU #define VMX_BASIC_INOUT 0x0040000000000000LLU /* Resctrl MSRs: */ diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index d77a31039f24..e531d8d80a11 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -508,9 +508,10 @@ enum vmcs_field { #define VMX_EPTP_PWL_4 0x18ull #define VMX_EPTP_PWL_5 0x20ull #define VMX_EPTP_AD_ENABLE_BIT (1ull << 6) +/* The EPTP memtype is encoded in bits 2:0, i.e. doesn't need to be shifted. */ #define VMX_EPTP_MT_MASK 0x7ull -#define VMX_EPTP_MT_WB 0x6ull -#define VMX_EPTP_MT_UC 0x0ull +#define VMX_EPTP_MT_WB X86_MEMTYPE_WB +#define VMX_EPTP_MT_UC X86_MEMTYPE_UC #define VMX_EPT_READABLE_MASK 0x1ull #define VMX_EPT_WRITABLE_MASK 0x2ull #define VMX_EPT_EXECUTABLE_MASK 0x4ull diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c index 2a2fc14955cd..989d368be04f 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.c +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c @@ -55,6 +55,12 @@ #include "mtrr.h" +static_assert(X86_MEMTYPE_UC == MTRR_TYPE_UNCACHABLE); +static_assert(X86_MEMTYPE_WC == MTRR_TYPE_WRCOMB); +static_assert(X86_MEMTYPE_WT == MTRR_TYPE_WRTHROUGH); +static_assert(X86_MEMTYPE_WP == MTRR_TYPE_WRPROT); +static_assert(X86_MEMTYPE_WB == MTRR_TYPE_WRBACK); + /* arch_phys_wc_add returns an MTRR register index plus this offset. */ #define MTRR_TO_PHYS_WC_OFFSET 1000 diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 2392a7ef254d..504fe5ffd47b 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -7070,7 +7070,7 @@ static void nested_vmx_setup_basic(struct nested_vmx_msrs *msrs) VMCS12_REVISION | VMX_BASIC_TRUE_CTLS | ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) | - (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT); + (X86_MEMTYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT); if (cpu_has_vmx_basic_inout()) msrs->basic |= VMX_BASIC_INOUT; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f18c2d8c7476..a4d077db04cf 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2747,7 +2747,7 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf, #endif /* Require Write-Back (WB) memory type for VMCS accesses. */ - if (((vmx_msr_high >> 18) & 15) != 6) + if (((vmx_msr_high >> 18) & 15) != X86_MEMTYPE_WB) return -EIO; rdmsrl(MSR_IA32_VMX_MISC, misc_msr); diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index bdc2a240c2aa..15b888ebaf17 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -176,15 +176,6 @@ static inline void set_page_memtype(struct page *pg, } #endif -enum { - PAT_UC = 0, /* uncached */ - PAT_WC = 1, /* Write combining */ - PAT_WT = 4, /* Write Through */ - PAT_WP = 5, /* Write Protected */ - PAT_WB = 6, /* Write Back (default) */ - PAT_UC_MINUS = 7, /* UC, but can be overridden by MTRR */ -}; - #define CM(c) (_PAGE_CACHE_MODE_ ## c) static enum page_cache_mode __init pat_get_cache_mode(unsigned int pat_val, @@ -194,13 +185,13 @@ static enum page_cache_mode __init pat_get_cache_mode(unsigned int pat_val, char *cache_mode; switch (pat_val) { - case PAT_UC: cache = CM(UC); cache_mode = "UC "; break; - case PAT_WC: cache = CM(WC); cache_mode = "WC "; break; - case PAT_WT: cache = CM(WT); cache_mode = "WT "; break; - case PAT_WP: cache = CM(WP); cache_mode = "WP "; break; - case PAT_WB: cache = CM(WB); cache_mode = "WB "; break; - case PAT_UC_MINUS: cache = CM(UC_MINUS); cache_mode = "UC- "; break; - default: cache = CM(WB); cache_mode = "WB "; break; + case X86_MEMTYPE_UC: cache = CM(UC); cache_mode = "UC "; break; + case X86_MEMTYPE_WC: cache = CM(WC); cache_mode = "WC "; break; + case X86_MEMTYPE_WT: cache = CM(WT); cache_mode = "WT "; break; + case X86_MEMTYPE_WP: cache = CM(WP); cache_mode = "WP "; break; + case X86_MEMTYPE_WB: cache = CM(WB); cache_mode = "WB "; break; + case X86_MEMTYPE_UC_MINUS: cache = CM(UC_MINUS); cache_mode = "UC- "; break; + default: cache = CM(WB); cache_mode = "WB "; break; } memcpy(msg, cache_mode, 4); @@ -257,11 +248,11 @@ void pat_cpu_init(void) void __init pat_bp_init(void) { struct cpuinfo_x86 *c = &boot_cpu_data; -#define PAT(p0, p1, p2, p3, p4, p5, p6, p7) \ - (((u64)PAT_ ## p0) | ((u64)PAT_ ## p1 << 8) | \ - ((u64)PAT_ ## p2 << 16) | ((u64)PAT_ ## p3 << 24) | \ - ((u64)PAT_ ## p4 << 32) | ((u64)PAT_ ## p5 << 40) | \ - ((u64)PAT_ ## p6 << 48) | ((u64)PAT_ ## p7 << 56)) +#define PAT(p0, p1, p2, p3, p4, p5, p6, p7) \ + ((X86_MEMTYPE_ ## p0) | (X86_MEMTYPE_ ## p1 << 8) | \ + (X86_MEMTYPE_ ## p2 << 16) | (X86_MEMTYPE_ ## p3 << 24) | \ + (X86_MEMTYPE_ ## p4 << 32) | (X86_MEMTYPE_ ## p5 << 40) | \ + (X86_MEMTYPE_ ## p6 << 48) | (X86_MEMTYPE_ ## p7 << 56)) if (!IS_ENABLED(CONFIG_X86_PAT)) From beb2e446046f8dd96bbeed3267b5f26bf1926ef9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 5 Jun 2024 16:19:10 -0700 Subject: [PATCH 02/10] x86/cpu: KVM: Move macro to encode PAT value to common header Move pat/memtype.c's PAT() macro to msr-index.h as PAT_VALUE(), and use it in KVM to define the default (Power-On / RESET) PAT value instead of open coding an inscrutable magic number. No functional change intended. Reviewed-by: Xiaoyao Li Reviewed-by: Kai Huang Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240605231918.2915961-3-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/msr-index.h | 6 ++++++ arch/x86/kvm/x86.h | 3 ++- arch/x86/mm/pat/memtype.c | 13 +++---------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index bb43a07616e6..f6eb3460e169 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -377,6 +377,12 @@ #define MSR_IA32_CR_PAT 0x00000277 +#define PAT_VALUE(p0, p1, p2, p3, p4, p5, p6, p7) \ + ((X86_MEMTYPE_ ## p0) | (X86_MEMTYPE_ ## p1 << 8) | \ + (X86_MEMTYPE_ ## p2 << 16) | (X86_MEMTYPE_ ## p3 << 24) | \ + (X86_MEMTYPE_ ## p4 << 32) | (X86_MEMTYPE_ ## p5 << 40) | \ + (X86_MEMTYPE_ ## p6 << 48) | (X86_MEMTYPE_ ## p7 << 56)) + #define MSR_IA32_DEBUGCTLMSR 0x000001d9 #define MSR_IA32_LASTBRANCHFROMIP 0x000001db #define MSR_IA32_LASTBRANCHTOIP 0x000001dc diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 50596f6f8320..5585c2ef147a 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -103,7 +103,8 @@ static inline unsigned int __shrink_ple_window(unsigned int val, return max(val, min); } -#define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL +#define MSR_IA32_CR_PAT_DEFAULT \ + PAT_VALUE(WB, WT, UC_MINUS, UC, WB, WT, UC_MINUS, UC) void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu); int kvm_check_nested_events(struct kvm_vcpu *vcpu); diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 15b888ebaf17..6c4e29457c10 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -248,12 +248,6 @@ void pat_cpu_init(void) void __init pat_bp_init(void) { struct cpuinfo_x86 *c = &boot_cpu_data; -#define PAT(p0, p1, p2, p3, p4, p5, p6, p7) \ - ((X86_MEMTYPE_ ## p0) | (X86_MEMTYPE_ ## p1 << 8) | \ - (X86_MEMTYPE_ ## p2 << 16) | (X86_MEMTYPE_ ## p3 << 24) | \ - (X86_MEMTYPE_ ## p4 << 32) | (X86_MEMTYPE_ ## p5 << 40) | \ - (X86_MEMTYPE_ ## p6 << 48) | (X86_MEMTYPE_ ## p7 << 56)) - if (!IS_ENABLED(CONFIG_X86_PAT)) pr_info_once("x86/PAT: PAT support disabled because CONFIG_X86_PAT is disabled in the kernel.\n"); @@ -284,7 +278,7 @@ void __init pat_bp_init(void) * NOTE: When WC or WP is used, it is redirected to UC- per * the default setup in __cachemode2pte_tbl[]. */ - pat_msr_val = PAT(WB, WT, UC_MINUS, UC, WB, WT, UC_MINUS, UC); + pat_msr_val = PAT_VALUE(WB, WT, UC_MINUS, UC, WB, WT, UC_MINUS, UC); } /* @@ -319,7 +313,7 @@ void __init pat_bp_init(void) * NOTE: When WT or WP is used, it is redirected to UC- per * the default setup in __cachemode2pte_tbl[]. */ - pat_msr_val = PAT(WB, WC, UC_MINUS, UC, WB, WC, UC_MINUS, UC); + pat_msr_val = PAT_VALUE(WB, WC, UC_MINUS, UC, WB, WC, UC_MINUS, UC); } else { /* * Full PAT support. We put WT in slot 7 to improve @@ -347,13 +341,12 @@ void __init pat_bp_init(void) * The reserved slots are unused, but mapped to their * corresponding types in the presence of PAT errata. */ - pat_msr_val = PAT(WB, WC, UC_MINUS, UC, WB, WP, UC_MINUS, WT); + pat_msr_val = PAT_VALUE(WB, WC, UC_MINUS, UC, WB, WP, UC_MINUS, WT); } memory_caching_control |= CACHE_PAT; init_cache_modes(pat_msr_val); -#undef PAT } static DEFINE_SPINLOCK(memtype_lock); /* protects memtype accesses */ From b6717d35d8597d19f44736f11963e3bd5b8881b4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 5 Jun 2024 16:19:11 -0700 Subject: [PATCH 03/10] KVM: x86: Stuff vCPU's PAT with default value at RESET, not creation Move the stuffing of the vCPU's PAT to the architectural "default" value from kvm_arch_vcpu_create() to kvm_vcpu_reset(), guarded by !init_event, to better capture that the default value is the value "Following Power-up or Reset". E.g. setting PAT only during creation would break if KVM were to expose a RESET ioctl() to userspace (which is unlikely, but that's not a good reason to have unintuitive code). No functional change. Reviewed-by: Xiaoyao Li Reviewed-by: Kai Huang Reviewed-by: Jim Mattson Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20240605231918.2915961-4-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 70219e406987..3ef6299ef823 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12260,8 +12260,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu); - vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT; - kvm_async_pf_hash_reset(vcpu); vcpu->arch.perf_capabilities = kvm_caps.supported_perf_cap; @@ -12427,6 +12425,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) if (!init_event) { vcpu->arch.smbase = 0x30000; + vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT; + vcpu->arch.msr_misc_features_enables = 0; vcpu->arch.ia32_misc_enable_msr = MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL | MSR_IA32_MISC_ENABLE_BTS_UNAVAIL; From d7bfc9ffd58037ff86f9fd0c3cef77cccb555da3 Mon Sep 17 00:00:00 2001 From: Xin Li Date: Wed, 5 Jun 2024 16:19:12 -0700 Subject: [PATCH 04/10] KVM: VMX: Move MSR_IA32_VMX_BASIC bit defines to asm/vmx.h Move the bit defines for MSR_IA32_VMX_BASIC from msr-index.h to vmx.h so that they are colocated with other VMX MSR bit defines, and with the helpers that extract specific information from an MSR_IA32_VMX_BASIC value. Opportunistically use BIT_ULL() instead of open coding hex values. Opportunistically rename VMX_BASIC_64 to VMX_BASIC_32BIT_PHYS_ADDR_ONLY, as "VMX_BASIC_64" is widly misleading. The flag enumerates that addresses are limited to 32 bits, not that 64-bit addresses are allowed. Last but not least, opportunistically #define DUAL_MONITOR_TREATMENT so that all known single-bit feature flags are defined (this will allow replacing open-coded literals in the future). Cc: Shan Kang Cc: Kai Huang Signed-off-by: Xin Li [sean: split to separate patch, write changelog] Reviewed-by: Zhao Liu Reviewed-by: Kai Huang Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20240605231918.2915961-5-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/msr-index.h | 8 -------- arch/x86/include/asm/vmx.h | 7 +++++++ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index f6eb3460e169..ad0b579d1c01 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -1177,14 +1177,6 @@ #define MSR_IA32_VMX_VMFUNC 0x00000491 #define MSR_IA32_VMX_PROCBASED_CTLS3 0x00000492 -/* VMX_BASIC bits and bitmasks */ -#define VMX_BASIC_VMCS_SIZE_SHIFT 32 -#define VMX_BASIC_TRUE_CTLS (1ULL << 55) -#define VMX_BASIC_64 0x0001000000000000LLU -#define VMX_BASIC_MEM_TYPE_SHIFT 50 -#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU -#define VMX_BASIC_INOUT 0x0040000000000000LLU - /* Resctrl MSRs: */ /* - Intel: */ #define MSR_IA32_L3_QOS_CFG 0xc81 diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index e531d8d80a11..81b986e501a9 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -135,6 +135,13 @@ #define VMX_VMFUNC_EPTP_SWITCHING VMFUNC_CONTROL_BIT(EPTP_SWITCHING) #define VMFUNC_EPTP_ENTRIES 512 +#define VMX_BASIC_VMCS_SIZE_SHIFT 32 +#define VMX_BASIC_32BIT_PHYS_ADDR_ONLY BIT_ULL(48) +#define VMX_BASIC_DUAL_MONITOR_TREATMENT BIT_ULL(49) +#define VMX_BASIC_MEM_TYPE_SHIFT 50 +#define VMX_BASIC_INOUT BIT_ULL(54) +#define VMX_BASIC_TRUE_CTLS BIT_ULL(55) + static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic) { return vmx_basic & GENMASK_ULL(30, 0); From 9df398ff7d2ab4fa2ecf6131044431dc94c5bdf6 Mon Sep 17 00:00:00 2001 From: Xin Li Date: Wed, 5 Jun 2024 16:19:13 -0700 Subject: [PATCH 05/10] KVM: VMX: Track CPU's MSR_IA32_VMX_BASIC as a single 64-bit value Track the "basic" capabilities VMX MSR as a single u64 in vmcs_config instead of splitting it across three fields, that obviously don't combine into a single 64-bit value, so that KVM can use the macros that define MSR bits using their absolute position. Replace all open coded shifts and masks, many of which are relative to the "high" half, with the appropriate macro. Opportunistically use VMX_BASIC_32BIT_PHYS_ADDR_ONLY instead of an open coded equivalent, and clean up the related comment to not reference a specific SDM section (to the surprise of no one, the comment is stale). No functional change intended (though obviously the code generation will be quite different). Cc: Shan Kang Cc: Kai Huang Signed-off-by: Xin Li [sean: split to separate patch, write changelog] Reviewed-by: Xiaoyao Li Reviewed-by: Kai Huang Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20240605231918.2915961-6-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/vmx.h | 5 +++++ arch/x86/kvm/vmx/capabilities.h | 6 ++---- arch/x86/kvm/vmx/vmx.c | 28 ++++++++++++++-------------- 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 81b986e501a9..90963b14afaa 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -152,6 +152,11 @@ static inline u32 vmx_basic_vmcs_size(u64 vmx_basic) return (vmx_basic & GENMASK_ULL(44, 32)) >> 32; } +static inline u32 vmx_basic_vmcs_mem_type(u64 vmx_basic) +{ + return (vmx_basic & GENMASK_ULL(53, 50)) >> 50; +} + static inline int vmx_misc_preemption_timer_rate(u64 vmx_misc) { return vmx_misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 41a4533f9989..86ce8bb96bed 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -54,9 +54,7 @@ struct nested_vmx_msrs { }; struct vmcs_config { - int size; - u32 basic_cap; - u32 revision_id; + u64 basic; u32 pin_based_exec_ctrl; u32 cpu_based_exec_ctrl; u32 cpu_based_2nd_exec_ctrl; @@ -76,7 +74,7 @@ extern struct vmx_capability vmx_capability __ro_after_init; static inline bool cpu_has_vmx_basic_inout(void) { - return (((u64)vmcs_config.basic_cap << 32) & VMX_BASIC_INOUT); + return vmcs_config.basic & VMX_BASIC_INOUT; } static inline bool cpu_has_virtual_nmis(void) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index a4d077db04cf..5cbd86c4386b 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2605,13 +2605,13 @@ static u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr) static int setup_vmcs_config(struct vmcs_config *vmcs_conf, struct vmx_capability *vmx_cap) { - u32 vmx_msr_low, vmx_msr_high; u32 _pin_based_exec_control = 0; u32 _cpu_based_exec_control = 0; u32 _cpu_based_2nd_exec_control = 0; u64 _cpu_based_3rd_exec_control = 0; u32 _vmexit_control = 0; u32 _vmentry_control = 0; + u64 basic_msr; u64 misc_msr; int i; @@ -2734,29 +2734,29 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf, _vmexit_control &= ~x_ctrl; } - rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high); + rdmsrl(MSR_IA32_VMX_BASIC, basic_msr); /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ - if ((vmx_msr_high & 0x1fff) > PAGE_SIZE) + if (vmx_basic_vmcs_size(basic_msr) > PAGE_SIZE) return -EIO; #ifdef CONFIG_X86_64 - /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */ - if (vmx_msr_high & (1u<<16)) + /* + * KVM expects to be able to shove all legal physical addresses into + * VMCS fields for 64-bit kernels, and per the SDM, "This bit is always + * 0 for processors that support Intel 64 architecture". + */ + if (basic_msr & VMX_BASIC_32BIT_PHYS_ADDR_ONLY) return -EIO; #endif /* Require Write-Back (WB) memory type for VMCS accesses. */ - if (((vmx_msr_high >> 18) & 15) != X86_MEMTYPE_WB) + if (vmx_basic_vmcs_mem_type(basic_msr) != X86_MEMTYPE_WB) return -EIO; rdmsrl(MSR_IA32_VMX_MISC, misc_msr); - vmcs_conf->size = vmx_msr_high & 0x1fff; - vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff; - - vmcs_conf->revision_id = vmx_msr_low; - + vmcs_conf->basic = basic_msr; vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control; vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control; vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control; @@ -2903,13 +2903,13 @@ struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags) if (!pages) return NULL; vmcs = page_address(pages); - memset(vmcs, 0, vmcs_config.size); + memset(vmcs, 0, vmx_basic_vmcs_size(vmcs_config.basic)); /* KVM supports Enlightened VMCS v1 only */ if (kvm_is_using_evmcs()) vmcs->hdr.revision_id = KVM_EVMCS_VERSION; else - vmcs->hdr.revision_id = vmcs_config.revision_id; + vmcs->hdr.revision_id = vmx_basic_vmcs_revision_id(vmcs_config.basic); if (shadow) vmcs->hdr.shadow_vmcs = 1; @@ -3002,7 +3002,7 @@ static __init int alloc_kvm_area(void) * physical CPU. */ if (kvm_is_using_evmcs()) - vmcs->hdr.revision_id = vmcs_config.revision_id; + vmcs->hdr.revision_id = vmx_basic_vmcs_revision_id(vmcs_config.basic); per_cpu(vmxarea, cpu) = vmcs; } From c97b106fa8aa500823695abfda7c9bdefb42a648 Mon Sep 17 00:00:00 2001 From: Xin Li Date: Wed, 5 Jun 2024 16:19:14 -0700 Subject: [PATCH 06/10] KVM: nVMX: Use macros and #defines in vmx_restore_vmx_basic() Use macros in vmx_restore_vmx_basic() instead of open coding everything using BIT_ULL() and GENMASK_ULL(). Opportunistically split feature bits and reserved bits into separate variables, and add a comment explaining the subset logic (it's not immediately obvious that the set of feature bits is NOT the set of _supported_ feature bits). Cc: Shan Kang Cc: Kai Huang Signed-off-by: Xin Li [sean: split to separate patch, write changelog, drop #defines] Reviewed-by: Zhao Liu Reviewed-by: Xiaoyao Li Reviewed-by: Kai Huang Link: https://lore.kernel.org/r/20240605231918.2915961-7-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/nested.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 504fe5ffd47b..eba5e94a3c7c 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -1251,21 +1251,32 @@ static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask) static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data) { - const u64 feature_and_reserved = - /* feature (except bit 48; see below) */ - BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) | - /* reserved */ - BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56); + const u64 feature_bits = VMX_BASIC_DUAL_MONITOR_TREATMENT | + VMX_BASIC_INOUT | + VMX_BASIC_TRUE_CTLS; + + const u64 reserved_bits = GENMASK_ULL(63, 56) | + GENMASK_ULL(47, 45) | + BIT_ULL(31); + u64 vmx_basic = vmcs_config.nested.basic; - if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved)) + BUILD_BUG_ON(feature_bits & reserved_bits); + + /* + * Except for 32BIT_PHYS_ADDR_ONLY, which is an anti-feature bit (has + * inverted polarity), the incoming value must not set feature bits or + * reserved bits that aren't allowed/supported by KVM. Fields, i.e. + * multi-bit values, are explicitly checked below. + */ + if (!is_bitwise_subset(vmx_basic, data, feature_bits | reserved_bits)) return -EINVAL; /* * KVM does not emulate a version of VMX that constrains physical * addresses of VMX structures (e.g. VMCS) to 32-bits. */ - if (data & BIT_ULL(48)) + if (data & VMX_BASIC_32BIT_PHYS_ADDR_ONLY) return -EINVAL; if (vmx_basic_vmcs_revision_id(vmx_basic) != From 92e648042c237c3bba223ab7f7e1a76f41ab3ef7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 5 Jun 2024 16:19:15 -0700 Subject: [PATCH 07/10] KVM: nVMX: Add a helper to encode VMCS info in MSR_IA32_VMX_BASIC Add a helper to encode the VMCS revision, size, and supported memory types in MSR_IA32_VMX_BASIC, i.e. when synthesizing KVM's supported BASIC MSR value, and delete the now unused VMCS size and memtype shift macros. For a variety of reasons, KVM has shifted (pun intended) to using helpers to *get* information from the VMX MSRs, as opposed to defined MASK and SHIFT macros for direct use. Provide a similar helper for the nested VMX code, which needs to *set* information, so that KVM isn't left with a mix of SHIFT macros and dedicated helpers. Reported-by: Xiaoyao Li Reviewed-by: Xiaoyao Li Reviewed-by: Kai Huang Link: https://lore.kernel.org/r/20240605231918.2915961-8-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/vmx.h | 7 +++++-- arch/x86/kvm/vmx/nested.c | 8 +++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 90963b14afaa..65aaf0577265 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -135,10 +135,8 @@ #define VMX_VMFUNC_EPTP_SWITCHING VMFUNC_CONTROL_BIT(EPTP_SWITCHING) #define VMFUNC_EPTP_ENTRIES 512 -#define VMX_BASIC_VMCS_SIZE_SHIFT 32 #define VMX_BASIC_32BIT_PHYS_ADDR_ONLY BIT_ULL(48) #define VMX_BASIC_DUAL_MONITOR_TREATMENT BIT_ULL(49) -#define VMX_BASIC_MEM_TYPE_SHIFT 50 #define VMX_BASIC_INOUT BIT_ULL(54) #define VMX_BASIC_TRUE_CTLS BIT_ULL(55) @@ -157,6 +155,11 @@ static inline u32 vmx_basic_vmcs_mem_type(u64 vmx_basic) return (vmx_basic & GENMASK_ULL(53, 50)) >> 50; } +static inline u64 vmx_basic_encode_vmcs_info(u32 revision, u16 size, u8 memtype) +{ + return revision | ((u64)size << 32) | ((u64)memtype << 50); +} + static inline int vmx_misc_preemption_timer_rate(u64 vmx_misc) { return vmx_misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index eba5e94a3c7c..335f1a650497 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -7077,12 +7077,10 @@ static void nested_vmx_setup_basic(struct nested_vmx_msrs *msrs) * guest, and the VMCS structure we give it - not about the * VMX support of the underlying hardware. */ - msrs->basic = - VMCS12_REVISION | - VMX_BASIC_TRUE_CTLS | - ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) | - (X86_MEMTYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT); + msrs->basic = vmx_basic_encode_vmcs_info(VMCS12_REVISION, VMCS12_SIZE, + X86_MEMTYPE_WB); + msrs->basic |= VMX_BASIC_TRUE_CTLS; if (cpu_has_vmx_basic_inout()) msrs->basic |= VMX_BASIC_INOUT; } From dc1e67f70f6d4e336da06cb61bf6669d6fa39869 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 5 Jun 2024 16:19:16 -0700 Subject: [PATCH 08/10] KVM VMX: Move MSR_IA32_VMX_MISC bit defines to asm/vmx.h Move the handful of MSR_IA32_VMX_MISC bit defines that are currently in msr-indx.h to vmx.h so that all of the VMX_MISC defines and wrappers can be found in a single location. Opportunistically use BIT_ULL() instead of open coding hex values, add defines for feature bits that are architecturally defined, and move the defines down in the file so that they are colocated with the helpers for getting fields from VMX_MISC. No functional change intended. Cc: Shan Kang Cc: Kai Huang Signed-off-by: Xin Li [sean: split to separate patch, write changelog] Reviewed-by: Zhao Liu Reviewed-by: Kai Huang Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20240605231918.2915961-9-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/msr-index.h | 5 ----- arch/x86/include/asm/vmx.h | 19 ++++++++++++------- arch/x86/kvm/vmx/capabilities.h | 4 ++-- arch/x86/kvm/vmx/nested.c | 2 +- arch/x86/kvm/vmx/nested.h | 2 +- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ad0b579d1c01..72c2c0ecb62c 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -1194,11 +1194,6 @@ #define MSR_IA32_SMBA_BW_BASE 0xc0000280 #define MSR_IA32_EVT_CFG_BASE 0xc0000400 -/* MSR_IA32_VMX_MISC bits */ -#define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14) -#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) -#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F - /* AMD-V MSRs */ #define MSR_VM_CR 0xc0010114 #define MSR_VM_IGNNE 0xc0010115 diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 65aaf0577265..400819ccb42c 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -122,13 +122,6 @@ #define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff -#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f -#define VMX_MISC_SAVE_EFER_LMA 0x00000020 -#define VMX_MISC_ACTIVITY_HLT 0x00000040 -#define VMX_MISC_ACTIVITY_WAIT_SIPI 0x00000100 -#define VMX_MISC_ZERO_LEN_INS 0x40000000 -#define VMX_MISC_MSR_LIST_MULTIPLIER 512 - /* VMFUNC functions */ #define VMFUNC_CONTROL_BIT(x) BIT((VMX_FEATURE_##x & 0x1f) - 28) @@ -160,6 +153,18 @@ static inline u64 vmx_basic_encode_vmcs_info(u32 revision, u16 size, u8 memtype) return revision | ((u64)size << 32) | ((u64)memtype << 50); } +#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK GENMASK_ULL(4, 0) +#define VMX_MISC_SAVE_EFER_LMA BIT_ULL(5) +#define VMX_MISC_ACTIVITY_HLT BIT_ULL(6) +#define VMX_MISC_ACTIVITY_SHUTDOWN BIT_ULL(7) +#define VMX_MISC_ACTIVITY_WAIT_SIPI BIT_ULL(8) +#define VMX_MISC_INTEL_PT BIT_ULL(14) +#define VMX_MISC_RDMSR_IN_SMM BIT_ULL(15) +#define VMX_MISC_VMXOFF_BLOCK_SMI BIT_ULL(28) +#define VMX_MISC_VMWRITE_SHADOW_RO_FIELDS BIT_ULL(29) +#define VMX_MISC_ZERO_LEN_INS BIT_ULL(30) +#define VMX_MISC_MSR_LIST_MULTIPLIER 512 + static inline int vmx_misc_preemption_timer_rate(u64 vmx_misc) { return vmx_misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 86ce8bb96bed..cb6588238f46 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -223,7 +223,7 @@ static inline bool cpu_has_vmx_vmfunc(void) static inline bool cpu_has_vmx_shadow_vmcs(void) { /* check if the cpu supports writing r/o exit information fields */ - if (!(vmcs_config.misc & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS)) + if (!(vmcs_config.misc & VMX_MISC_VMWRITE_SHADOW_RO_FIELDS)) return false; return vmcs_config.cpu_based_2nd_exec_ctrl & @@ -365,7 +365,7 @@ static inline bool cpu_has_vmx_invvpid_global(void) static inline bool cpu_has_vmx_intel_pt(void) { - return (vmcs_config.misc & MSR_IA32_VMX_MISC_INTEL_PT) && + return (vmcs_config.misc & VMX_MISC_INTEL_PT) && (vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_PT_USE_GPA) && (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_RTIT_CTL); } diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 335f1a650497..72db3718d8b2 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -7062,7 +7062,7 @@ static void nested_vmx_setup_misc_data(struct vmcs_config *vmcs_conf, { msrs->misc_low = (u32)vmcs_conf->misc & VMX_MISC_SAVE_EFER_LMA; msrs->misc_low |= - MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS | + VMX_MISC_VMWRITE_SHADOW_RO_FIELDS | VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE | VMX_MISC_ACTIVITY_HLT | VMX_MISC_ACTIVITY_WAIT_SIPI; diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index cce4e2aa30fb..0782fe599757 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -109,7 +109,7 @@ static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu) static inline bool nested_cpu_has_vmwrite_any_field(struct kvm_vcpu *vcpu) { return to_vmx(vcpu)->nested.msrs.misc_low & - MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS; + VMX_MISC_VMWRITE_SHADOW_RO_FIELDS; } static inline bool nested_cpu_has_zero_length_injection(struct kvm_vcpu *vcpu) From 8f56b14e9fa0f91daf4d3be05988abf336255dc6 Mon Sep 17 00:00:00 2001 From: Xin Li Date: Wed, 5 Jun 2024 16:19:17 -0700 Subject: [PATCH 09/10] KVM: VMX: Open code VMX preemption timer rate mask in its accessor Use vmx_misc_preemption_timer_rate() to get the rate in hardware_setup(), and open code the rate's bitmask in vmx_misc_preemption_timer_rate() so that the function looks like all the helpers that grab values from VMX_BASIC and VMX_MISC MSR values. No functional change intended. Cc: Shan Kang Cc: Kai Huang Signed-off-by: Xin Li [sean: split to separate patch, write changelog] Reviewed-by: Kai Huang Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20240605231918.2915961-10-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/asm/vmx.h | 3 +-- arch/x86/kvm/vmx/vmx.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 400819ccb42c..f7fd4369b821 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -153,7 +153,6 @@ static inline u64 vmx_basic_encode_vmcs_info(u32 revision, u16 size, u8 memtype) return revision | ((u64)size << 32) | ((u64)memtype << 50); } -#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK GENMASK_ULL(4, 0) #define VMX_MISC_SAVE_EFER_LMA BIT_ULL(5) #define VMX_MISC_ACTIVITY_HLT BIT_ULL(6) #define VMX_MISC_ACTIVITY_SHUTDOWN BIT_ULL(7) @@ -167,7 +166,7 @@ static inline u64 vmx_basic_encode_vmcs_info(u32 revision, u16 size, u8 memtype) static inline int vmx_misc_preemption_timer_rate(u64 vmx_misc) { - return vmx_misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; + return vmx_misc & GENMASK_ULL(4, 0); } static inline int vmx_misc_cr3_count(u64 vmx_misc) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 5cbd86c4386b..39a26ecf87ea 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -8517,7 +8517,7 @@ __init int vmx_hardware_setup(void) u64 use_timer_freq = 5000ULL * 1000 * 1000; cpu_preemption_timer_multi = - vmcs_config.misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; + vmx_misc_preemption_timer_rate(vmcs_config.misc); if (tsc_khz) use_timer_freq = (u64)tsc_khz * 1000; From 566975f6ecd85247bd8989884d7b909d5a456da1 Mon Sep 17 00:00:00 2001 From: Xin Li Date: Wed, 5 Jun 2024 16:19:18 -0700 Subject: [PATCH 10/10] KVM: nVMX: Use macros and #defines in vmx_restore_vmx_misc() Use macros in vmx_restore_vmx_misc() instead of open coding everything using BIT_ULL() and GENMASK_ULL(). Opportunistically split feature bits and reserved bits into separate variables, and add a comment explaining the subset logic (it's not immediately obvious that the set of feature bits is NOT the set of _supported_ feature bits). Cc: Shan Kang Cc: Kai Huang Signed-off-by: Xin Li [sean: split to separate patch, write changelog, drop #defines] Reviewed-by: Xiaoyao Li Reviewed-by: Kai Huang Reviewed-by: Zhao Liu Link: https://lore.kernel.org/r/20240605231918.2915961-11-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/nested.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 72db3718d8b2..97ecc2722c8f 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -1345,16 +1345,29 @@ vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data) { - const u64 feature_and_reserved_bits = - /* feature */ - BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | BIT_ULL(15) | - BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) | - /* reserved */ - GENMASK_ULL(13, 9) | BIT_ULL(31); + const u64 feature_bits = VMX_MISC_SAVE_EFER_LMA | + VMX_MISC_ACTIVITY_HLT | + VMX_MISC_ACTIVITY_SHUTDOWN | + VMX_MISC_ACTIVITY_WAIT_SIPI | + VMX_MISC_INTEL_PT | + VMX_MISC_RDMSR_IN_SMM | + VMX_MISC_VMWRITE_SHADOW_RO_FIELDS | + VMX_MISC_VMXOFF_BLOCK_SMI | + VMX_MISC_ZERO_LEN_INS; + + const u64 reserved_bits = BIT_ULL(31) | GENMASK_ULL(13, 9); + u64 vmx_misc = vmx_control_msr(vmcs_config.nested.misc_low, vmcs_config.nested.misc_high); - if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits)) + BUILD_BUG_ON(feature_bits & reserved_bits); + + /* + * The incoming value must not set feature bits or reserved bits that + * aren't allowed/supported by KVM. Fields, i.e. multi-bit values, are + * explicitly checked below. + */ + if (!is_bitwise_subset(vmx_misc, data, feature_bits | reserved_bits)) return -EINVAL; if ((vmx->nested.msrs.pinbased_ctls_high &