diff options
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r-- | arch/x86/kvm/vmx.c | 393 |
1 files changed, 226 insertions, 167 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7611af57682..bb481330716 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -91,6 +91,7 @@ struct vcpu_vmx { } rmode; int vpid; bool emulation_required; + enum emulation_result invalid_state_emulation_result; /* Support for vnmi-less CPUs */ int soft_vnmi_blocked; @@ -189,21 +190,21 @@ static inline int is_page_fault(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == - (INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); + (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); } static inline int is_no_device(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == - (INTR_TYPE_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); + (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); } static inline int is_invalid_opcode(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == - (INTR_TYPE_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); + (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); } static inline int is_external_interrupt(u32 intr_info) @@ -480,8 +481,13 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) eb = (1u << PF_VECTOR) | (1u << UD_VECTOR); if (!vcpu->fpu_active) eb |= 1u << NM_VECTOR; - if (vcpu->guest_debug.enabled) - eb |= 1u << DB_VECTOR; + if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { + if (vcpu->guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) + eb |= 1u << DB_VECTOR; + if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) + eb |= 1u << BP_VECTOR; + } if (vcpu->arch.rmode.active) eb = ~0; if (vm_need_ept()) @@ -747,29 +753,33 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, bool has_error_code, u32 error_code) { struct vcpu_vmx *vmx = to_vmx(vcpu); + u32 intr_info = nr | INTR_INFO_VALID_MASK; - if (has_error_code) + if (has_error_code) { vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); + intr_info |= INTR_INFO_DELIVER_CODE_MASK; + } if (vcpu->arch.rmode.active) { vmx->rmode.irq.pending = true; vmx->rmode.irq.vector = nr; vmx->rmode.irq.rip = kvm_rip_read(vcpu); - if (nr == BP_VECTOR) + if (nr == BP_VECTOR || nr == OF_VECTOR) vmx->rmode.irq.rip++; - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, - nr | INTR_TYPE_SOFT_INTR - | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0) - | INTR_INFO_VALID_MASK); + intr_info |= INTR_TYPE_SOFT_INTR; + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); return; } - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, - nr | INTR_TYPE_EXCEPTION - | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0) - | INTR_INFO_VALID_MASK); + if (nr == BP_VECTOR || nr == OF_VECTOR) { + vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); + intr_info |= INTR_TYPE_SOFT_EXCEPTION; + } else + intr_info |= INTR_TYPE_HARD_EXCEPTION; + + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); } static bool vmx_exception_injected(struct kvm_vcpu *vcpu) @@ -856,11 +866,8 @@ static u64 guest_read_tsc(void) * writes 'guest_tsc' into guest's timestamp counter "register" * guest_tsc = host_tsc + tsc_offset ==> tsc_offset = guest_tsc - host_tsc */ -static void guest_write_tsc(u64 guest_tsc) +static void guest_write_tsc(u64 guest_tsc, u64 host_tsc) { - u64 host_tsc; - - rdtscll(host_tsc); vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc); } @@ -925,14 +932,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct kvm_msr_entry *msr; + u64 host_tsc; int ret = 0; switch (msr_index) { -#ifdef CONFIG_X86_64 case MSR_EFER: vmx_load_host_state(vmx); ret = kvm_set_msr_common(vcpu, msr_index, data); break; +#ifdef CONFIG_X86_64 case MSR_FS_BASE: vmcs_writel(GUEST_FS_BASE, data); break; @@ -950,7 +958,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) vmcs_writel(GUEST_SYSENTER_ESP, data); break; case MSR_IA32_TIME_STAMP_COUNTER: - guest_write_tsc(data); + rdtscll(host_tsc); + guest_write_tsc(data, host_tsc); break; case MSR_P6_PERFCTR0: case MSR_P6_PERFCTR1: @@ -999,40 +1008,28 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) } } -static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) +static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - unsigned long dr7 = 0x400; - int old_singlestep; - - old_singlestep = vcpu->guest_debug.singlestep; - - vcpu->guest_debug.enabled = dbg->enabled; - if (vcpu->guest_debug.enabled) { - int i; + int old_debug = vcpu->guest_debug; + unsigned long flags; - dr7 |= 0x200; /* exact */ - for (i = 0; i < 4; ++i) { - if (!dbg->breakpoints[i].enabled) - continue; - vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address; - dr7 |= 2 << (i*2); /* global enable */ - dr7 |= 0 << (i*4+16); /* execution breakpoint */ - } + vcpu->guest_debug = dbg->control; + if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)) + vcpu->guest_debug = 0; - vcpu->guest_debug.singlestep = dbg->singlestep; - } else - vcpu->guest_debug.singlestep = 0; - - if (old_singlestep && !vcpu->guest_debug.singlestep) { - unsigned long flags; + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) + vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]); + else + vmcs_writel(GUEST_DR7, vcpu->arch.dr7); - flags = vmcs_readl(GUEST_RFLAGS); + flags = vmcs_readl(GUEST_RFLAGS); + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) + flags |= X86_EFLAGS_TF | X86_EFLAGS_RF; + else if (old_debug & KVM_GUESTDBG_SINGLESTEP) flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); - vmcs_writel(GUEST_RFLAGS, flags); - } + vmcs_writel(GUEST_RFLAGS, flags); update_exception_bitmap(vcpu); - vmcs_writel(GUEST_DR7, dr7); return 0; } @@ -1433,6 +1430,29 @@ continue_rmode: init_rmode(vcpu->kvm); } +static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); + + vcpu->arch.shadow_efer = efer; + if (!msr) + return; + if (efer & EFER_LMA) { + vmcs_write32(VM_ENTRY_CONTROLS, + vmcs_read32(VM_ENTRY_CONTROLS) | + VM_ENTRY_IA32E_MODE); + msr->data = efer; + } else { + vmcs_write32(VM_ENTRY_CONTROLS, + vmcs_read32(VM_ENTRY_CONTROLS) & + ~VM_ENTRY_IA32E_MODE); + + msr->data = efer & ~EFER_LME; + } + setup_msrs(vmx); +} + #ifdef CONFIG_X86_64 static void enter_lmode(struct kvm_vcpu *vcpu) @@ -1447,13 +1467,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu) (guest_tr_ar & ~AR_TYPE_MASK) | AR_TYPE_BUSY_64_TSS); } - vcpu->arch.shadow_efer |= EFER_LMA; - - find_msr_entry(to_vmx(vcpu), MSR_EFER)->data |= EFER_LMA | EFER_LME; - vmcs_write32(VM_ENTRY_CONTROLS, - vmcs_read32(VM_ENTRY_CONTROLS) - | VM_ENTRY_IA32E_MODE); + vmx_set_efer(vcpu, vcpu->arch.shadow_efer); } static void exit_lmode(struct kvm_vcpu *vcpu) @@ -1612,30 +1627,6 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) vmcs_writel(GUEST_CR4, hw_cr4); } -static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); - - vcpu->arch.shadow_efer = efer; - if (!msr) - return; - if (efer & EFER_LMA) { - vmcs_write32(VM_ENTRY_CONTROLS, - vmcs_read32(VM_ENTRY_CONTROLS) | - VM_ENTRY_IA32E_MODE); - msr->data = efer; - - } else { - vmcs_write32(VM_ENTRY_CONTROLS, - vmcs_read32(VM_ENTRY_CONTROLS) & - ~VM_ENTRY_IA32E_MODE); - - msr->data = efer & ~EFER_LME; - } - setup_msrs(vmx); -} - static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) { struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; @@ -1653,7 +1644,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, var->limit = vmcs_read32(sf->limit); var->selector = vmcs_read16(sf->selector); ar = vmcs_read32(sf->ar_bytes); - if (ar & AR_UNUSABLE_MASK) + if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) ar = 0; var->type = ar & 15; var->s = (ar >> 4) & 1; @@ -1788,14 +1779,16 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu) vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); cs_rpl = cs.selector & SELECTOR_RPL_MASK; + if (cs.unusable) + return false; if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK)) return false; if (!cs.s) return false; - if (!(~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK))) { + if (cs.type & AR_TYPE_WRITEABLE_MASK) { if (cs.dpl > cs_rpl) return false; - } else if (cs.type & AR_TYPE_CODE_MASK) { + } else { if (cs.dpl != cs_rpl) return false; } @@ -1814,7 +1807,9 @@ static bool stack_segment_valid(struct kvm_vcpu *vcpu) vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); ss_rpl = ss.selector & SELECTOR_RPL_MASK; - if ((ss.type != 3) || (ss.type != 7)) + if (ss.unusable) + return true; + if (ss.type != 3 && ss.type != 7) return false; if (!ss.s) return false; @@ -1834,6 +1829,8 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg) vmx_get_segment(vcpu, &var, seg); rpl = var.selector & SELECTOR_RPL_MASK; + if (var.unusable) + return true; if (!var.s) return false; if (!var.present) @@ -1855,9 +1852,11 @@ static bool tr_valid(struct kvm_vcpu *vcpu) vmx_get_segment(vcpu, &tr, VCPU_SREG_TR); + if (tr.unusable) + return false; if (tr.selector & SELECTOR_TI_MASK) /* TI = 1 */ return false; - if ((tr.type != 3) || (tr.type != 11)) /* TODO: Check if guest is in IA32e mode */ + if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */ return false; if (!tr.present) return false; @@ -1871,6 +1870,8 @@ static bool ldtr_valid(struct kvm_vcpu *vcpu) vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR); + if (ldtr.unusable) + return true; if (ldtr.selector & SELECTOR_TI_MASK) /* TI = 1 */ return false; if (ldtr.type != 2) @@ -2112,7 +2113,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) { u32 host_sysenter_cs, msr_low, msr_high; u32 junk; - u64 host_pat; + u64 host_pat, tsc_this, tsc_base; unsigned long a; struct descriptor_table dt; int i; @@ -2240,6 +2241,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); + tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc; + rdtscll(tsc_this); + if (tsc_this < vmx->vcpu.kvm->arch.vm_init_tsc) + tsc_base = tsc_this; + + guest_write_tsc(0, tsc_base); return 0; } @@ -2319,7 +2326,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) kvm_rip_write(vcpu, 0); kvm_register_write(vcpu, VCPU_REGS_RSP, 0); - /* todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 */ vmcs_writel(GUEST_DR7, 0x400); vmcs_writel(GUEST_GDTR_BASE, 0); @@ -2332,8 +2338,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); - guest_write_tsc(0); - /* Special registers */ vmcs_write64(GUEST_IA32_DEBUGCTL, 0); @@ -2486,6 +2490,11 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu, { vmx_update_window_states(vcpu); + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) + vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_STI | + GUEST_INTR_STATE_MOV_SS); + if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { if (vcpu->arch.interrupt.pending) { enable_nmi_window(vcpu); @@ -2536,24 +2545,6 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) return 0; } -static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu) -{ - struct kvm_guest_debug *dbg = &vcpu->guest_debug; - - set_debugreg(dbg->bp[0], 0); - set_debugreg(dbg->bp[1], 1); - set_debugreg(dbg->bp[2], 2); - set_debugreg(dbg->bp[3], 3); - - if (dbg->singlestep) { - unsigned long flags; - - flags = vmcs_readl(GUEST_RFLAGS); - flags |= X86_EFLAGS_TF | X86_EFLAGS_RF; - vmcs_writel(GUEST_RFLAGS, flags); - } -} - static int handle_rmode_exception(struct kvm_vcpu *vcpu, int vec, u32 err_code) { @@ -2570,9 +2561,17 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, * the required debugging infrastructure rework. */ switch (vec) { - case DE_VECTOR: case DB_VECTOR: + if (vcpu->guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) + return 0; + kvm_queue_exception(vcpu, vec); + return 1; case BP_VECTOR: + if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) + return 0; + /* fall through */ + case DE_VECTOR: case OF_VECTOR: case BR_VECTOR: case UD_VECTOR: @@ -2589,8 +2588,8 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 intr_info, error_code; - unsigned long cr2, rip; + u32 intr_info, ex_no, error_code; + unsigned long cr2, rip, dr6; u32 vect_info; enum emulation_result er; @@ -2649,14 +2648,30 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; } - if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) == - (INTR_TYPE_EXCEPTION | 1)) { + ex_no = intr_info & INTR_INFO_VECTOR_MASK; + switch (ex_no) { + case DB_VECTOR: + dr6 = vmcs_readl(EXIT_QUALIFICATION); + if (!(vcpu->guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { + vcpu->arch.dr6 = dr6 | DR6_FIXED_1; + kvm_queue_exception(vcpu, DB_VECTOR); + return 1; + } + kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1; + kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); + /* fall through */ + case BP_VECTOR: kvm_run->exit_reason = KVM_EXIT_DEBUG; - return 0; + kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; + kvm_run->debug.arch.exception = ex_no; + break; + default: + kvm_run->exit_reason = KVM_EXIT_EXCEPTION; + kvm_run->ex.exception = ex_no; + kvm_run->ex.error_code = error_code; + break; } - kvm_run->exit_reason = KVM_EXIT_EXCEPTION; - kvm_run->ex.exception = intr_info & INTR_INFO_VECTOR_MASK; - kvm_run->ex.error_code = error_code; return 0; } @@ -2677,7 +2692,7 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { unsigned long exit_qualification; - int size, down, in, string, rep; + int size, in, string; unsigned port; ++vcpu->stat.io_exits; @@ -2693,8 +2708,6 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) size = (exit_qualification & 7) + 1; in = (exit_qualification & 8) != 0; - down = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; - rep = (exit_qualification & 32) != 0; port = exit_qualification >> 16; skip_emulated_instruction(vcpu); @@ -2795,21 +2808,44 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) unsigned long val; int dr, reg; - /* - * FIXME: this code assumes the host is debugging the guest. - * need to deal with guest debugging itself too. - */ + dr = vmcs_readl(GUEST_DR7); + if (dr & DR7_GD) { + /* + * As the vm-exit takes precedence over the debug trap, we + * need to emulate the latter, either for the host or the + * guest debugging itself. + */ + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { + kvm_run->debug.arch.dr6 = vcpu->arch.dr6; + kvm_run->debug.arch.dr7 = dr; + kvm_run->debug.arch.pc = + vmcs_readl(GUEST_CS_BASE) + + vmcs_readl(GUEST_RIP); + kvm_run->debug.arch.exception = DB_VECTOR; + kvm_run->exit_reason = KVM_EXIT_DEBUG; + return 0; + } else { + vcpu->arch.dr7 &= ~DR7_GD; + vcpu->arch.dr6 |= DR6_BD; + vmcs_writel(GUEST_DR7, vcpu->arch.dr7); + kvm_queue_exception(vcpu, DB_VECTOR); + return 1; + } + } + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - dr = exit_qualification & 7; - reg = (exit_qualification >> 8) & 15; - if (exit_qualification & 16) { - /* mov from dr */ + dr = exit_qualification & DEBUG_REG_ACCESS_NUM; + reg = DEBUG_REG_ACCESS_REG(exit_qualification); + if (exit_qualification & TYPE_MOV_FROM_DR) { switch (dr) { + case 0 ... 3: + val = vcpu->arch.db[dr]; + break; case 6: - val = 0xffff0ff0; + val = vcpu->arch.dr6; break; case 7: - val = 0x400; + val = vcpu->arch.dr7; break; default: val = 0; @@ -2817,7 +2853,38 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_register_write(vcpu, reg, val); KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler); } else { - /* mov to dr */ + val = vcpu->arch.regs[reg]; + switch (dr) { + case 0 ... 3: + vcpu->arch.db[dr] = val; + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) + vcpu->arch.eff_db[dr] = val; + break; + case 4 ... 5: + if (vcpu->arch.cr4 & X86_CR4_DE) + kvm_queue_exception(vcpu, UD_VECTOR); + break; + case 6: + if (val & 0xffffffff00000000ULL) { + kvm_queue_exception(vcpu, GP_VECTOR); + break; + } + vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; + break; + case 7: + if (val & 0xffffffff00000000ULL) { + kvm_queue_exception(vcpu, GP_VECTOR); + break; + } + vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { + vmcs_writel(GUEST_DR7, vcpu->arch.dr7); + vcpu->arch.switch_db_regs = + (val & DR7_BP_EN_MASK); + } + break; + } + KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)val, handler); } skip_emulated_instruction(vcpu); return 1; @@ -2968,17 +3035,25 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } tss_selector = exit_qualification; - return kvm_task_switch(vcpu, tss_selector, reason); + if (!kvm_task_switch(vcpu, tss_selector, reason)) + return 0; + + /* clear all local breakpoint enable flags */ + vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55); + + /* + * TODO: What about debug traps on tss switch? + * Are we supposed to inject them and update dr6? + */ + + return 1; } static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u64 exit_qualification; - enum emulation_result er; gpa_t gpa; - unsigned long hva; int gla_validity; - int r; exit_qualification = vmcs_read64(EXIT_QUALIFICATION); @@ -3001,32 +3076,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); - hva = gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT); - if (!kvm_is_error_hva(hva)) { - r = kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0); - if (r < 0) { - printk(KERN_ERR "EPT: Not enough memory!\n"); - return -ENOMEM; - } - return 1; - } else { - /* must be MMIO */ - er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); - - if (er == EMULATE_FAIL) { - printk(KERN_ERR - "EPT: Fail to handle EPT violation vmexit!er is %d\n", - er); - printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", - (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), - (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS)); - printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", - (long unsigned int)exit_qualification); - return -ENOTSUPP; - } else if (er == EMULATE_DO_MMIO) - return 0; - } - return 1; + return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0); } static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) @@ -3046,7 +3096,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct vcpu_vmx *vmx = to_vmx(vcpu); - int err; + enum emulation_result err = EMULATE_DONE; preempt_enable(); local_irq_enable(); @@ -3071,10 +3121,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, local_irq_disable(); preempt_disable(); - /* Guest state should be valid now except if we need to - * emulate an MMIO */ - if (guest_state_valid(vcpu)) - vmx->emulation_required = 0; + vmx->invalid_state_emulation_result = err; } /* @@ -3123,8 +3170,11 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) /* If we need to emulate an MMIO from handle_invalid_guest_state * we just return 0 */ - if (vmx->emulation_required && emulate_invalid_guest_state) - return 0; + if (vmx->emulation_required && emulate_invalid_guest_state) { + if (guest_state_valid(vcpu)) + vmx->emulation_required = 0; + return vmx->invalid_state_emulation_result != EMULATE_DO_MMIO; + } /* Access CR3 don't cause VMExit in paging mode, so we need * to sync with guest real CR3. */ @@ -3238,7 +3288,8 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) vmx->vcpu.arch.nmi_injected = false; } kvm_clear_exception_queue(&vmx->vcpu); - if (idtv_info_valid && type == INTR_TYPE_EXCEPTION) { + if (idtv_info_valid && (type == INTR_TYPE_HARD_EXCEPTION || + type == INTR_TYPE_SOFT_EXCEPTION)) { if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { error = vmcs_read32(IDT_VECTORING_ERROR_CODE); kvm_queue_exception_e(&vmx->vcpu, vector, error); @@ -3259,6 +3310,11 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) vmx_update_window_states(vcpu); + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) + vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_STI | + GUEST_INTR_STATE_MOV_SS); + if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { if (vcpu->arch.interrupt.pending) { enable_nmi_window(vcpu); @@ -3347,6 +3403,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) */ vmcs_writel(HOST_CR0, read_cr0()); + set_debugreg(vcpu->arch.dr6, 6); + asm( /* Store host registers */ "push %%"R"dx; push %%"R"bp;" @@ -3441,6 +3499,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); vcpu->arch.regs_dirty = 0; + get_debugreg(vcpu->arch.dr6, 6); + vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); if (vmx->rmode.irq.pending) fixup_rmode_irq(vmx); @@ -3595,7 +3655,6 @@ static struct kvm_x86_ops vmx_x86_ops = { .vcpu_put = vmx_vcpu_put, .set_guest_debug = set_guest_debug, - .guest_debug_pre = kvm_guest_debug_pre, .get_msr = vmx_get_msr, .set_msr = vmx_set_msr, .get_segment_base = vmx_get_segment_base, |