From 90ac806c32952e4eb39fa21cc4e93f086a4047c3 Mon Sep 17 00:00:00 2001 From: Rachel Menge Date: Thu, 20 Jun 2024 18:50:40 -0400 Subject: [PATCH 1/9] Drivers: hv: Remove deprecated hv_fcopy declarations There are lingering hv_fcopy declarations which do not have definitions. The fcopy driver was removed in commit ec314f61e4fc ("Drivers: hv: Remove fcopy driver"). Therefore, remove the hv_fcopy declarations which are no longer needed or defined. Fixes: ec314f61e4fc ("Drivers: hv: Remove fcopy driver") Signed-off-by: Rachel Menge Reviewed-by: Dexuan Cui Reviewed-by: Saurabh Sengar Link: https://lore.kernel.org/r/20240620225040.700563-1-rachelmenge@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <20240620225040.700563-1-rachelmenge@linux.microsoft.com> --- drivers/hv/hyperv_vmbus.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 76ac5185a01a..d2856023d53c 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -380,12 +380,6 @@ void hv_vss_deinit(void); int hv_vss_pre_suspend(void); int hv_vss_pre_resume(void); void hv_vss_onchannelcallback(void *context); - -int hv_fcopy_init(struct hv_util_service *srv); -void hv_fcopy_deinit(void); -int hv_fcopy_pre_suspend(void); -int hv_fcopy_pre_resume(void); -void hv_fcopy_onchannelcallback(void *context); void vmbus_initiate_unload(bool crash); static inline void hv_poll_channel(struct vmbus_channel *channel, From 7f828d5fff7d24752e1ecf6bebb6617a81f97b93 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 20 Jun 2024 23:16:14 -0700 Subject: [PATCH 2/9] clocksource: hyper-v: Use lapic timer in a TDX VM without paravisor In a TDX VM without paravisor, currently the default timer is the Hyper-V timer, which depends on the slow VM Reference Counter MSR: the Hyper-V TSC page is not enabled in such a VM because the VM uses Invariant TSC as a better clocksource and it's challenging to mark the Hyper-V TSC page shared in very early boot. Lower the rating of the Hyper-V timer so the local APIC timer becomes the the default timer in such a VM, and print a warning in case Invariant TSC is unavailable in such a VM. This change should cause no perceivable performance difference. Cc: stable@vger.kernel.org # 6.6+ Reviewed-by: Roman Kisel Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20240621061614.8339-1-decui@microsoft.com Signed-off-by: Wei Liu Message-ID: <20240621061614.8339-1-decui@microsoft.com> --- arch/x86/kernel/cpu/mshyperv.c | 16 +++++++++++++++- drivers/clocksource/hyperv_timer.c | 16 +++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index e0fd57a8ba84..954b7cbfa2f0 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -449,9 +449,23 @@ static void __init ms_hyperv_init_platform(void) ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED; if (!ms_hyperv.paravisor_present) { - /* To be supported: more work is required. */ + /* + * Mark the Hyper-V TSC page feature as disabled + * in a TDX VM without paravisor so that the + * Invariant TSC, which is a better clocksource + * anyway, is used instead. + */ ms_hyperv.features &= ~HV_MSR_REFERENCE_TSC_AVAILABLE; + /* + * The Invariant TSC is expected to be available + * in a TDX VM without paravisor, but if not, + * print a warning message. The slower Hyper-V MSR-based + * Ref Counter should end up being the clocksource. + */ + if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT)) + pr_warn("Hyper-V: Invariant TSC is unavailable\n"); + /* HV_MSR_CRASH_CTL is unsupported. */ ms_hyperv.misc_features &= ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index b2a080647e41..99177835cade 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -137,7 +137,21 @@ static int hv_stimer_init(unsigned int cpu) ce->name = "Hyper-V clockevent"; ce->features = CLOCK_EVT_FEAT_ONESHOT; ce->cpumask = cpumask_of(cpu); - ce->rating = 1000; + + /* + * Lower the rating of the Hyper-V timer in a TDX VM without paravisor, + * so the local APIC timer (lapic_clockevent) is the default timer in + * such a VM. The Hyper-V timer is not preferred in such a VM because + * it depends on the slow VM Reference Counter MSR (the Hyper-V TSC + * page is not enbled in such a VM because the VM uses Invariant TSC + * as a better clocksource and it's challenging to mark the Hyper-V + * TSC page shared in very early boot). + */ + if (!ms_hyperv.paravisor_present && hv_isolation_type_tdx()) + ce->rating = 90; + else + ce->rating = 1000; + ce->set_state_shutdown = hv_ce_shutdown; ce->set_state_oneshot = hv_ce_set_oneshot; ce->set_next_event = hv_ce_set_next_event; From 3b85a2eacd3d886f4d4133a83cdfc2f3b48f06c0 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Tue, 18 Jun 2024 09:50:59 -0700 Subject: [PATCH 3/9] Documentation: hyperv: Add overview of Confidential Computing VM support Add documentation topic for Confidential Computing (CoCo) VM support in Linux guests on Hyper-V. Signed-off-by: Michael Kelley Reviewed-by: Easwar Hariharan Link: https://lore.kernel.org/r/20240618165059.10174-1-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20240618165059.10174-1-mhklinux@outlook.com> --- Documentation/virt/hyperv/coco.rst | 260 ++++++++++++++++++++++++++++ Documentation/virt/hyperv/index.rst | 1 + 2 files changed, 261 insertions(+) create mode 100644 Documentation/virt/hyperv/coco.rst diff --git a/Documentation/virt/hyperv/coco.rst b/Documentation/virt/hyperv/coco.rst new file mode 100644 index 000000000000..c15d6fe34b4e --- /dev/null +++ b/Documentation/virt/hyperv/coco.rst @@ -0,0 +1,260 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Confidential Computing VMs +========================== +Hyper-V can create and run Linux guests that are Confidential Computing +(CoCo) VMs. Such VMs cooperate with the physical processor to better protect +the confidentiality and integrity of data in the VM's memory, even in the +face of a hypervisor/VMM that has been compromised and may behave maliciously. +CoCo VMs on Hyper-V share the generic CoCo VM threat model and security +objectives described in Documentation/security/snp-tdx-threat-model.rst. Note +that Hyper-V specific code in Linux refers to CoCo VMs as "isolated VMs" or +"isolation VMs". + +A Linux CoCo VM on Hyper-V requires the cooperation and interaction of the +following: + +* Physical hardware with a processor that supports CoCo VMs + +* The hardware runs a version of Windows/Hyper-V with support for CoCo VMs + +* The VM runs a version of Linux that supports being a CoCo VM + +The physical hardware requirements are as follows: + +* AMD processor with SEV-SNP. Hyper-V does not run guest VMs with AMD SME, + SEV, or SEV-ES encryption, and such encryption is not sufficient for a CoCo + VM on Hyper-V. + +* Intel processor with TDX + +To create a CoCo VM, the "Isolated VM" attribute must be specified to Hyper-V +when the VM is created. A VM cannot be changed from a CoCo VM to a normal VM, +or vice versa, after it is created. + +Operational Modes +----------------- +Hyper-V CoCo VMs can run in two modes. The mode is selected when the VM is +created and cannot be changed during the life of the VM. + +* Fully-enlightened mode. In this mode, the guest operating system is + enlightened to understand and manage all aspects of running as a CoCo VM. + +* Paravisor mode. In this mode, a paravisor layer between the guest and the + host provides some operations needed to run as a CoCo VM. The guest operating + system can have fewer CoCo enlightenments than is required in the + fully-enlightened case. + +Conceptually, fully-enlightened mode and paravisor mode may be treated as +points on a spectrum spanning the degree of guest enlightenment needed to run +as a CoCo VM. Fully-enlightened mode is one end of the spectrum. A full +implementation of paravisor mode is the other end of the spectrum, where all +aspects of running as a CoCo VM are handled by the paravisor, and a normal +guest OS with no knowledge of memory encryption or other aspects of CoCo VMs +can run successfully. However, the Hyper-V implementation of paravisor mode +does not go this far, and is somewhere in the middle of the spectrum. Some +aspects of CoCo VMs are handled by the Hyper-V paravisor while the guest OS +must be enlightened for other aspects. Unfortunately, there is no +standardized enumeration of feature/functions that might be provided in the +paravisor, and there is no standardized mechanism for a guest OS to query the +paravisor for the feature/functions it provides. The understanding of what +the paravisor provides is hard-coded in the guest OS. + +Paravisor mode has similarities to the `Coconut project`_, which aims to provide +a limited paravisor to provide services to the guest such as a virtual TPM. +However, the Hyper-V paravisor generally handles more aspects of CoCo VMs +than is currently envisioned for Coconut, and so is further toward the "no +guest enlightenments required" end of the spectrum. + +.. _Coconut project: https://github.com/coconut-svsm/svsm + +In the CoCo VM threat model, the paravisor is in the guest security domain +and must be trusted by the guest OS. By implication, the hypervisor/VMM must +protect itself against a potentially malicious paravisor just like it +protects against a potentially malicious guest. + +The hardware architectural approach to fully-enlightened vs. paravisor mode +varies depending on the underlying processor. + +* With AMD SEV-SNP processors, in fully-enlightened mode the guest OS runs in + VMPL 0 and has full control of the guest context. In paravisor mode, the + guest OS runs in VMPL 2 and the paravisor runs in VMPL 0. The paravisor + running in VMPL 0 has privileges that the guest OS in VMPL 2 does not have. + Certain operations require the guest to invoke the paravisor. Furthermore, in + paravisor mode the guest OS operates in "virtual Top Of Memory" (vTOM) mode + as defined by the SEV-SNP architecture. This mode simplifies guest management + of memory encryption when a paravisor is used. + +* With Intel TDX processor, in fully-enlightened mode the guest OS runs in an + L1 VM. In paravisor mode, TD partitioning is used. The paravisor runs in the + L1 VM, and the guest OS runs in a nested L2 VM. + +Hyper-V exposes a synthetic MSR to guests that describes the CoCo mode. This +MSR indicates if the underlying processor uses AMD SEV-SNP or Intel TDX, and +whether a paravisor is being used. It is straightforward to build a single +kernel image that can boot and run properly on either architecture, and in +either mode. + +Paravisor Effects +----------------- +Running in paravisor mode affects the following areas of generic Linux kernel +CoCo VM functionality: + +* Initial guest memory setup. When a new VM is created in paravisor mode, the + paravisor runs first and sets up the guest physical memory as encrypted. The + guest Linux does normal memory initialization, except for explicitly marking + appropriate ranges as decrypted (shared). In paravisor mode, Linux does not + perform the early boot memory setup steps that are particularly tricky with + AMD SEV-SNP in fully-enlightened mode. + +* #VC/#VE exception handling. In paravisor mode, Hyper-V configures the guest + CoCo VM to route #VC and #VE exceptions to VMPL 0 and the L1 VM, + respectively, and not the guest Linux. Consequently, these exception handlers + do not run in the guest Linux and are not a required enlightenment for a + Linux guest in paravisor mode. + +* CPUID flags. Both AMD SEV-SNP and Intel TDX provide a CPUID flag in the + guest indicating that the VM is operating with the respective hardware + support. While these CPUID flags are visible in fully-enlightened CoCo VMs, + the paravisor filters out these flags and the guest Linux does not see them. + Throughout the Linux kernel, explicitly testing these flags has mostly been + eliminated in favor of the cc_platform_has() function, with the goal of + abstracting the differences between SEV-SNP and TDX. But the + cc_platform_has() abstraction also allows the Hyper-V paravisor configuration + to selectively enable aspects of CoCo VM functionality even when the CPUID + flags are not set. The exception is early boot memory setup on SEV-SNP, which + tests the CPUID SEV-SNP flag. But not having the flag in Hyper-V paravisor + mode VM achieves the desired effect or not running SEV-SNP specific early + boot memory setup. + +* Device emulation. In paravisor mode, the Hyper-V paravisor provides + emulation of devices such as the IO-APIC and TPM. Because the emulation + happens in the paravisor in the guest context (instead of the hypervisor/VMM + context), MMIO accesses to these devices must be encrypted references instead + of the decrypted references that would be used in a fully-enlightened CoCo + VM. The __ioremap_caller() function has been enhanced to make a callback to + check whether a particular address range should be treated as encrypted + (private). See the "is_private_mmio" callback. + +* Encrypt/decrypt memory transitions. In a CoCo VM, transitioning guest + memory between encrypted and decrypted requires coordinating with the + hypervisor/VMM. This is done via callbacks invoked from + __set_memory_enc_pgtable(). In fully-enlightened mode, the normal SEV-SNP and + TDX implementations of these callbacks are used. In paravisor mode, a Hyper-V + specific set of callbacks is used. These callbacks invoke the paravisor so + that the paravisor can coordinate the transitions and inform the hypervisor + as necessary. See hv_vtom_init() where these callback are set up. + +* Interrupt injection. In fully enlightened mode, a malicious hypervisor + could inject interrupts into the guest OS at times that violate x86/x64 + architectural rules. For full protection, the guest OS should include + enlightenments that use the interrupt injection management features provided + by CoCo-capable processors. In paravisor mode, the paravisor mediates + interrupt injection into the guest OS, and ensures that the guest OS only + sees interrupts that are "legal". The paravisor uses the interrupt injection + management features provided by the CoCo-capable physical processor, thereby + masking these complexities from the guest OS. + +Hyper-V Hypercalls +------------------ +When in fully-enlightened mode, hypercalls made by the Linux guest are routed +directly to the hypervisor, just as in a non-CoCo VM. But in paravisor mode, +normal hypercalls trap to the paravisor first, which may in turn invoke the +hypervisor. But the paravisor is idiosyncratic in this regard, and a few +hypercalls made by the Linux guest must always be routed directly to the +hypervisor. These hypercall sites test for a paravisor being present, and use +a special invocation sequence. See hv_post_message(), for example. + +Guest communication with Hyper-V +-------------------------------- +Separate from the generic Linux kernel handling of memory encryption in Linux +CoCo VMs, Hyper-V has VMBus and VMBus devices that communicate using memory +shared between the Linux guest and the host. This shared memory must be +marked decrypted to enable communication. Furthermore, since the threat model +includes a compromised and potentially malicious host, the guest must guard +against leaking any unintended data to the host through this shared memory. + +These Hyper-V and VMBus memory pages are marked as decrypted: + +* VMBus monitor pages + +* Synthetic interrupt controller (synic) related pages (unless supplied by + the paravisor) + +* Per-cpu hypercall input and output pages (unless running with a paravisor) + +* VMBus ring buffers. The direct mapping is marked decrypted in + __vmbus_establish_gpadl(). The secondary mapping created in + hv_ringbuffer_init() must also include the "decrypted" attribute. + +When the guest writes data to memory that is shared with the host, it must +ensure that only the intended data is written. Padding or unused fields must +be initialized to zeros before copying into the shared memory so that random +kernel data is not inadvertently given to the host. + +Similarly, when the guest reads memory that is shared with the host, it must +validate the data before acting on it so that a malicious host cannot induce +the guest to expose unintended data. Doing such validation can be tricky +because the host can modify the shared memory areas even while or after +validation is performed. For messages passed from the host to the guest in a +VMBus ring buffer, the length of the message is validated, and the message is +copied into a temporary (encrypted) buffer for further validation and +processing. The copying adds a small amount of overhead, but is the only way +to protect against a malicious host. See hv_pkt_iter_first(). + +Many drivers for VMBus devices have been "hardened" by adding code to fully +validate messages received over VMBus, instead of assuming that Hyper-V is +acting cooperatively. Such drivers are marked as "allowed_in_isolated" in the +vmbus_devs[] table. Other drivers for VMBus devices that are not needed in a +CoCo VM have not been hardened, and they are not allowed to load in a CoCo +VM. See vmbus_is_valid_offer() where such devices are excluded. + +Two VMBus devices depend on the Hyper-V host to do DMA data transfers: +storvsc for disk I/O and netvsc for network I/O. storvsc uses the normal +Linux kernel DMA APIs, and so bounce buffering through decrypted swiotlb +memory is done implicitly. netvsc has two modes for data transfers. The first +mode goes through send and receive buffer space that is explicitly allocated +by the netvsc driver, and is used for most smaller packets. These send and +receive buffers are marked decrypted by __vmbus_establish_gpadl(). Because +the netvsc driver explicitly copies packets to/from these buffers, the +equivalent of bounce buffering between encrypted and decrypted memory is +already part of the data path. The second mode uses the normal Linux kernel +DMA APIs, and is bounce buffered through swiotlb memory implicitly like in +storvsc. + +Finally, the VMBus virtual PCI driver needs special handling in a CoCo VM. +Linux PCI device drivers access PCI config space using standard APIs provided +by the Linux PCI subsystem. On Hyper-V, these functions directly access MMIO +space, and the access traps to Hyper-V for emulation. But in CoCo VMs, memory +encryption prevents Hyper-V from reading the guest instruction stream to +emulate the access. So in a CoCo VM, these functions must make a hypercall +with arguments explicitly describing the access. See +_hv_pcifront_read_config() and _hv_pcifront_write_config() and the +"use_calls" flag indicating to use hypercalls. + +load_unaligned_zeropad() +------------------------ +When transitioning memory between encrypted and decrypted, the caller of +set_memory_encrypted() or set_memory_decrypted() is responsible for ensuring +the memory isn't in use and isn't referenced while the transition is in +progress. The transition has multiple steps, and includes interaction with +the Hyper-V host. The memory is in an inconsistent state until all steps are +complete. A reference while the state is inconsistent could result in an +exception that can't be cleanly fixed up. + +However, the kernel load_unaligned_zeropad() mechanism may make stray +references that can't be prevented by the caller of set_memory_encrypted() or +set_memory_decrypted(), so there's specific code in the #VC or #VE exception +handler to fixup this case. But a CoCo VM running on Hyper-V may be +configured to run with a paravisor, with the #VC or #VE exception routed to +the paravisor. There's no architectural way to forward the exceptions back to +the guest kernel, and in such a case, the load_unaligned_zeropad() fixup code +in the #VC/#VE handlers doesn't run. + +To avoid this problem, the Hyper-V specific functions for notifying the +hypervisor of the transition mark pages as "not present" while a transition +is in progress. If load_unaligned_zeropad() causes a stray reference, a +normal page fault is generated instead of #VC or #VE, and the page-fault- +based handlers for load_unaligned_zeropad() fixup the reference. When the +encrypted/decrypted transition is complete, the pages are marked as "present" +again. See hv_vtom_clear_present() and hv_vtom_set_host_visibility(). diff --git a/Documentation/virt/hyperv/index.rst b/Documentation/virt/hyperv/index.rst index de447e11b4a5..79bc4080329e 100644 --- a/Documentation/virt/hyperv/index.rst +++ b/Documentation/virt/hyperv/index.rst @@ -11,3 +11,4 @@ Hyper-V Enlightenments vmbus clocks vpci + coco From 8fcc514809de41153b43ccbe1a0cdf7f72b78e7e Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 5 Jun 2024 19:55:59 -0700 Subject: [PATCH 4/9] x86/hyperv: Set X86_FEATURE_TSC_KNOWN_FREQ when Hyper-V provides frequency A Linux guest on Hyper-V gets the TSC frequency from a synthetic MSR, if available. In this case, set X86_FEATURE_TSC_KNOWN_FREQ so that Linux doesn't unnecessarily do refined TSC calibration when setting up the TSC clocksource. With this change, a message such as this is no longer output during boot when the TSC is used as the clocksource: [ 1.115141] tsc: Refined TSC clocksource calibration: 2918.408 MHz Furthermore, the guest and host will have exactly the same view of the TSC frequency, which is important for features such as the TSC deadline timer that are emulated by the Hyper-V host. Signed-off-by: Michael Kelley Reviewed-by: Roman Kisel Link: https://lore.kernel.org/r/20240606025559.1631-1-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20240606025559.1631-1-mhklinux@outlook.com> --- arch/x86/kernel/cpu/mshyperv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 954b7cbfa2f0..6a9aa057f9ca 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -424,6 +424,7 @@ static void __init ms_hyperv_init_platform(void) ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { x86_platform.calibrate_tsc = hv_get_tsc_khz; x86_platform.calibrate_cpu = hv_get_tsc_khz; + setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); } if (ms_hyperv.priv_high & HV_ISOLATION) { From c6e2b45a544b45ce1a26858ded61a0dc4896d54a Mon Sep 17 00:00:00 2001 From: Anthony Nandaa Date: Tue, 2 Jul 2024 10:22:50 +0000 Subject: [PATCH 5/9] tools: hv: lsvmbus: change shebang to use python3 In many modern Linux distros, running `lsvmbus` returns the error: ``` /usr/bin/env: 'python': No such file or directory ``` because 'python' doesn't point anywhere. Now that python2 has reached EOL as of January 1, 2020 and is no longer maintained[1], these distros have python3 instead. Also, the script isn't executable by default because the permissions are set to mode 644. Fix this by updating the shebang in the `lsvmbus` to use python3 instead of python. Also fix the permissions to be 755 so that is executable by default, which matches other similar scripts in `tools/hv`. The script is also tested and verified that is compatible with python3. [1] https://www.python.org/doc/sunset-python-2/ Signed-off-by: Anthony Nandaa Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20240702102250.13935-1-profnandaa@gmail.com Signed-off-by: Wei Liu Message-ID: <20240702102250.13935-1-profnandaa@gmail.com> --- tools/hv/lsvmbus | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 100755 tools/hv/lsvmbus diff --git a/tools/hv/lsvmbus b/tools/hv/lsvmbus old mode 100644 new mode 100755 index 099f2c44dbed..f83698f14da2 --- a/tools/hv/lsvmbus +++ b/tools/hv/lsvmbus @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 import os From 4430556935db6808b63daf1bae91e9a4386e92bd Mon Sep 17 00:00:00 2001 From: Roman Kisel Date: Thu, 1 Aug 2024 14:22:35 -0700 Subject: [PATCH 6/9] Drivers: hv: vmbus: Fix the misplaced function description When hv_synic_disable_regs was introduced, it received the description of hv_synic_cleanup. Fix that. Fixes: dba61cda3046 ("Drivers: hv: vmbus: Break out synic enable and disable operations") Signed-off-by: Roman Kisel Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20240801212235.352220-1-romank@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <20240801212235.352220-1-romank@linux.microsoft.com> --- drivers/hv/hv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index e0d676c74f14..36d9ba097ff5 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -342,9 +342,6 @@ int hv_synic_init(unsigned int cpu) return 0; } -/* - * hv_synic_cleanup - Cleanup routine for hv_synic_init(). - */ void hv_synic_disable_regs(unsigned int cpu) { struct hv_per_cpu_context *hv_cpu = @@ -436,6 +433,9 @@ static bool hv_synic_event_pending(void) return pending; } +/* + * hv_synic_cleanup - Cleanup routine for hv_synic_init(). + */ int hv_synic_cleanup(unsigned int cpu) { struct vmbus_channel *channel, *sc; From b9af6418279c4cf73ca073f8ea024992b38be8ab Mon Sep 17 00:00:00 2001 From: "Anirudh Rayabharam (Microsoft)" Date: Wed, 28 Aug 2024 16:51:56 +0530 Subject: [PATCH 7/9] x86/hyperv: fix kexec crash due to VP assist page corruption commit 9636be85cc5b ("x86/hyperv: Fix hyperv_pcpu_input_arg handling when CPUs go online/offline") introduces a new cpuhp state for hyperv initialization. cpuhp_setup_state() returns the state number if state is CPUHP_AP_ONLINE_DYN or CPUHP_BP_PREPARE_DYN and 0 for all other states. For the hyperv case, since a new cpuhp state was introduced it would return 0. However, in hv_machine_shutdown(), the cpuhp_remove_state() call is conditioned upon "hyperv_init_cpuhp > 0". This will never be true and so hv_cpu_die() won't be called on all CPUs. This means the VP assist page won't be reset. When the kexec kernel tries to setup the VP assist page again, the hypervisor corrupts the memory region of the old VP assist page causing a panic in case the kexec kernel is using that memory elsewhere. This was originally fixed in commit dfe94d4086e4 ("x86/hyperv: Fix kexec panic/hang issues"). Get rid of hyperv_init_cpuhp entirely since we are no longer using a dynamic cpuhp state and use CPUHP_AP_HYPERV_ONLINE directly with cpuhp_remove_state(). Cc: stable@vger.kernel.org Fixes: 9636be85cc5b ("x86/hyperv: Fix hyperv_pcpu_input_arg handling when CPUs go online/offline") Signed-off-by: Anirudh Rayabharam (Microsoft) Reviewed-by: Vitaly Kuznetsov Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20240828112158.3538342-1-anirudh@anirudhrb.com Signed-off-by: Wei Liu Message-ID: <20240828112158.3538342-1-anirudh@anirudhrb.com> --- arch/x86/hyperv/hv_init.c | 5 +---- arch/x86/include/asm/mshyperv.h | 1 - arch/x86/kernel/cpu/mshyperv.c | 4 ++-- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 17a71e92a343..95eada2994e1 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -35,7 +35,6 @@ #include #include -int hyperv_init_cpuhp; u64 hv_current_partition_id = ~0ull; EXPORT_SYMBOL_GPL(hv_current_partition_id); @@ -607,8 +606,6 @@ void __init hyperv_init(void) register_syscore_ops(&hv_syscore_ops); - hyperv_init_cpuhp = cpuhp; - if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_ACCESS_PARTITION_ID) hv_get_partition_id(); @@ -637,7 +634,7 @@ void __init hyperv_init(void) clean_guest_os_id: wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0); - cpuhp_remove_state(cpuhp); + cpuhp_remove_state(CPUHP_AP_HYPERV_ONLINE); free_ghcb_page: free_percpu(hv_ghcb_pg); free_vp_assist_page: diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 390c4d13956d..5f0bc6a6d025 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -40,7 +40,6 @@ static inline unsigned char hv_get_nmi_reason(void) } #if IS_ENABLED(CONFIG_HYPERV) -extern int hyperv_init_cpuhp; extern bool hyperv_paravisor_present; extern void *hv_hypercall_pg; diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 6a9aa057f9ca..ead967479fa6 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -199,8 +199,8 @@ static void hv_machine_shutdown(void) * Call hv_cpu_die() on all the CPUs, otherwise later the hypervisor * corrupts the old VP Assist Pages and can crash the kexec kernel. */ - if (kexec_in_progress && hyperv_init_cpuhp > 0) - cpuhp_remove_state(hyperv_init_cpuhp); + if (kexec_in_progress) + cpuhp_remove_state(CPUHP_AP_HYPERV_ONLINE); /* The function calls stop_other_cpus(). */ native_machine_shutdown(); From 5e5cc1eb65256e6017e3deec04f9806f2f317853 Mon Sep 17 00:00:00 2001 From: zhang jiao Date: Mon, 2 Sep 2024 12:21:03 +0800 Subject: [PATCH 8/9] tools: hv: rm .*.cmd when make clean rm .*.cmd when make clean Signed-off-by: zhang jiao Reviewed-by: Saurabh Sengar Link: https://lore.kernel.org/r/20240902042103.5867-1-zhangjiao2@cmss.chinamobile.com Signed-off-by: Wei Liu Message-ID: <20240902042103.5867-1-zhangjiao2@cmss.chinamobile.com> --- tools/hv/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/hv/Makefile b/tools/hv/Makefile index 2e60e2c212cd..34ffcec264ab 100644 --- a/tools/hv/Makefile +++ b/tools/hv/Makefile @@ -52,7 +52,7 @@ $(OUTPUT)hv_fcopy_uio_daemon: $(HV_FCOPY_UIO_DAEMON_IN) clean: rm -f $(ALL_PROGRAMS) - find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete + find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete -o -name '\.*.cmd' -delete install: $(ALL_PROGRAMS) install -d -m 755 $(DESTDIR)$(sbindir); \ From 895384881ec960aa4c602397a69f0a44a8169405 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Wed, 4 Sep 2024 09:15:53 +0800 Subject: [PATCH 9/9] hv: vmbus: Constify struct kobj_type and struct attribute_group vmbus_chan_group and vmbus_chan_type are not modified. They are only used in the helpers which take a const type parameter. Constifying these structures and moving them to a read-only section can increase over all security. ``` [Before] text data bss dec hex filename 20568 4699 48 25315 62e3 drivers/hv/vmbus_drv.o [After] text data bss dec hex filename 20696 4571 48 25315 62e3 drivers/hv/vmbus_drv.o ``` Signed-off-by: Hongbo Li Reviewed-by: Naman Jain Link: https://lore.kernel.org/r/20240904011553.2010203-1-lihongbo22@huawei.com Signed-off-by: Wei Liu Message-ID: <20240904011553.2010203-1-lihongbo22@huawei.com> --- drivers/hv/vmbus_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 12a707ab73f8..7b1b20fa18f6 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1803,12 +1803,12 @@ static umode_t vmbus_chan_attr_is_visible(struct kobject *kobj, return attr->mode; } -static struct attribute_group vmbus_chan_group = { +static const struct attribute_group vmbus_chan_group = { .attrs = vmbus_chan_attrs, .is_visible = vmbus_chan_attr_is_visible }; -static struct kobj_type vmbus_chan_ktype = { +static const struct kobj_type vmbus_chan_ktype = { .sysfs_ops = &vmbus_chan_sysfs_ops, .release = vmbus_chan_release, };