summaryrefslogtreecommitdiffstats
path: root/0001-x86-hyperv-Suspend-resume-the-VP-assist-page-for-hib.patch
diff options
context:
space:
mode:
Diffstat (limited to '0001-x86-hyperv-Suspend-resume-the-VP-assist-page-for-hib.patch')
-rw-r--r--0001-x86-hyperv-Suspend-resume-the-VP-assist-page-for-hib.patch92
1 files changed, 92 insertions, 0 deletions
diff --git a/0001-x86-hyperv-Suspend-resume-the-VP-assist-page-for-hib.patch b/0001-x86-hyperv-Suspend-resume-the-VP-assist-page-for-hib.patch
new file mode 100644
index 000000000..be667bd95
--- /dev/null
+++ b/0001-x86-hyperv-Suspend-resume-the-VP-assist-page-for-hib.patch
@@ -0,0 +1,92 @@
+From 421f090c819d695942a470051cd624dc43deaf95 Mon Sep 17 00:00:00 2001
+From: Dexuan Cui <decui@microsoft.com>
+Date: Mon, 20 Apr 2020 19:46:11 -0700
+Subject: [PATCH] x86/hyperv: Suspend/resume the VP assist page for hibernation
+
+Unlike the other CPUs, CPU0 is never offlined during hibernation, so in the
+resume path, the "new" kernel's VP assist page is not suspended (i.e. not
+disabled), and later when we jump to the "old" kernel, the page is not
+properly re-enabled for CPU0 with the allocated page from the old kernel.
+
+So far, the VP assist page is used by hv_apic_eoi_write(), and is also
+used in the case of nested virtualization (running KVM atop Hyper-V).
+
+For hv_apic_eoi_write(), when the page is not properly re-enabled,
+hvp->apic_assist is always 0, so the HV_X64_MSR_EOI MSR is always written.
+This is not ideal with respect to performance, but Hyper-V can still
+correctly handle this according to the Hyper-V spec; nevertheless, Linux
+still must update the Hyper-V hypervisor with the correct VP assist page
+to prevent Hyper-V from writing to the stale page, which causes guest
+memory corruption and consequently may have caused the hangs and triple
+faults seen during non-boot CPUs resume.
+
+Fix the issue by calling hv_cpu_die()/hv_cpu_init() in the syscore ops.
+Without the fix, hibernation can fail at a rate of 1/300 ~ 1/500.
+With the fix, hibernation can pass a long-haul test of 2000 runs.
+
+In the case of nested virtualization, disabling/reenabling the assist
+page upon hibernation may be unsafe if there are active L2 guests.
+It looks KVM should be enhanced to abort the hibernation request if
+there is any active L2 guest.
+
+Fixes: 05bd330a7fd8 ("x86/hyperv: Suspend/resume the hypercall page for hibernation")
+Cc: stable@vger.kernel.org
+Signed-off-by: Dexuan Cui <decui@microsoft.com>
+Link: https://lore.kernel.org/r/1587437171-2472-1-git-send-email-decui@microsoft.com
+Signed-off-by: Wei Liu <wei.liu@kernel.org>
+---
+ arch/x86/hyperv/hv_init.c | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
+index 624f5d9b0f79..fd51bac11b46 100644
+--- a/arch/x86/hyperv/hv_init.c
++++ b/arch/x86/hyperv/hv_init.c
+@@ -73,7 +73,8 @@ static int hv_cpu_init(unsigned int cpu)
+ struct page *pg;
+
+ input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
+- pg = alloc_page(GFP_KERNEL);
++ /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
++ pg = alloc_page(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL);
+ if (unlikely(!pg))
+ return -ENOMEM;
+ *input_arg = page_address(pg);
+@@ -254,6 +255,7 @@ static int __init hv_pci_init(void)
+ static int hv_suspend(void)
+ {
+ union hv_x64_msr_hypercall_contents hypercall_msr;
++ int ret;
+
+ /*
+ * Reset the hypercall page as it is going to be invalidated
+@@ -270,12 +272,17 @@ static int hv_suspend(void)
+ hypercall_msr.enable = 0;
+ wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+
+- return 0;
++ ret = hv_cpu_die(0);
++ return ret;
+ }
+
+ static void hv_resume(void)
+ {
+ union hv_x64_msr_hypercall_contents hypercall_msr;
++ int ret;
++
++ ret = hv_cpu_init(0);
++ WARN_ON(ret);
+
+ /* Re-enable the hypercall page */
+ rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+@@ -288,6 +295,7 @@ static void hv_resume(void)
+ hv_hypercall_pg_saved = NULL;
+ }
+
++/* Note: when the ops are called, only CPU0 is online and IRQs are disabled. */
+ static struct syscore_ops hv_syscore_ops = {
+ .suspend = hv_suspend,
+ .resume = hv_resume,
+--
+2.26.2
+