From f5b42c3324494ea3f9bf795e2a7e4d3cbb06c607 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 6 Mar 2007 12:05:53 +0200 Subject: KVM: Fix guest sysenter on vmx The vmx code currently treats the guest's sysenter support msrs as 32-bit values, which breaks 32-bit compat mode userspace on 64-bit guests. Fix by using the native word width of the machine. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index c07178e6112..bfa0ce42ea9 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -371,10 +371,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) data = vmcs_read32(GUEST_SYSENTER_CS); break; case MSR_IA32_SYSENTER_EIP: - data = vmcs_read32(GUEST_SYSENTER_EIP); + data = vmcs_readl(GUEST_SYSENTER_EIP); break; case MSR_IA32_SYSENTER_ESP: - data = vmcs_read32(GUEST_SYSENTER_ESP); + data = vmcs_readl(GUEST_SYSENTER_ESP); break; default: msr = find_msr_entry(vcpu, msr_index); @@ -412,10 +412,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) vmcs_write32(GUEST_SYSENTER_CS, data); break; case MSR_IA32_SYSENTER_EIP: - vmcs_write32(GUEST_SYSENTER_EIP, data); + vmcs_writel(GUEST_SYSENTER_EIP, data); break; case MSR_IA32_SYSENTER_ESP: - vmcs_write32(GUEST_SYSENTER_ESP, data); + vmcs_writel(GUEST_SYSENTER_ESP, data); break; case MSR_IA32_TIME_STAMP_COUNTER: guest_write_tsc(data); -- cgit From 6af11b9e827aac1d664ccd31e94f122c7698416b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 19 Mar 2007 13:18:10 +0200 Subject: KVM: Prevent system selectors leaking into guest on real->protected mode transition on vmx Intel virtualization extensions do not support virtualizing real mode. So kvm uses virtualized vm86 mode to run real mode code. Unfortunately, this virtualized vm86 mode does not support the so called "big real" mode, where the segment selector and base do not agree with each other according to the real mode rules (base == selector << 4). To work around this, kvm checks whether a selector/base pair violates the virtualized vm86 rules, and if so, forces it into conformance. On a transition back to protected mode, if we see that the guest did not touch a forced segment, we restore it back to the original protected mode value. This pile of hacks breaks down if the gdt has changed in real mode, as it can cause a segment selector to point to a system descriptor instead of a normal data segment. In fact, this happens with the Windows bootloader and the qemu acpi bios, where a protected mode memcpy routine issues an innocent 'pop %es' and traps on an attempt to load a system descriptor. "Fix" by checking if the to-be-restored selector points at a system segment, and if so, coercing it into a normal data segment. The long term solution, of course, is to abandon vm86 mode and use emulation for big real mode. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index bfa0ce42ea9..25b24719922 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -618,7 +618,7 @@ static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save) { struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; - if (vmcs_readl(sf->base) == save->base) { + if (vmcs_readl(sf->base) == save->base && (save->base & AR_S_MASK)) { vmcs_write16(sf->selector, save->selector); vmcs_writel(sf->base, save->base); vmcs_write32(sf->limit, save->limit); -- cgit From 6d9658df07789a124e5c1f8677afcd7773439f3e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 11 Mar 2007 13:52:33 +0100 Subject: KVM: always reload segment selectors failed VM entry on VMX might still change %fs or %gs, thus make sure that KVM always reloads the segment selectors. This is crutial on both x86 and x86_64: x86 has __KERNEL_PDA in %fs on which things like 'current' depends and x86_64 has 0 there and needs MSR_GS_BASE to work. Signed-off-by: Ingo Molnar --- drivers/kvm/vmx.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) (limited to 'drivers/kvm/vmx.c') diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 25b24719922..fbbf9d6b299 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1888,6 +1888,27 @@ again: [cr2]"i"(offsetof(struct kvm_vcpu, cr2)) : "cc", "memory" ); + /* + * Reload segment selectors ASAP. (it's needed for a functional + * kernel: x86 relies on having __KERNEL_PDA in %fs and x86_64 + * relies on having 0 in %gs for the CPU PDA to work.) + */ + if (fs_gs_ldt_reload_needed) { + load_ldt(ldt_sel); + load_fs(fs_sel); + /* + * If we have to reload gs, we must take care to + * preserve our gs base. + */ + local_irq_disable(); + load_gs(gs_sel); +#ifdef CONFIG_X86_64 + wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); +#endif + local_irq_enable(); + + reload_tss(); + } ++kvm_stat.exits; save_msrs(vcpu->guest_msrs, NR_BAD_MSRS); @@ -1905,22 +1926,6 @@ again: kvm_run->exit_reason = vmcs_read32(VM_INSTRUCTION_ERROR); r = 0; } else { - if (fs_gs_ldt_reload_needed) { - load_ldt(ldt_sel); - load_fs(fs_sel); - /* - * If we have to reload gs, we must take care to - * preserve our gs base. - */ - local_irq_disable(); - load_gs(gs_sel); -#ifdef CONFIG_X86_64 - wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); -#endif - local_irq_enable(); - - reload_tss(); - } /* * Profile KVM exit RIPs: */ -- cgit