diff options
author | Josh Boyer <jwboyer@fedoraproject.org> | 2014-10-24 13:34:55 -0400 |
---|---|---|
committer | Josh Boyer <jwboyer@fedoraproject.org> | 2014-10-24 20:14:02 -0400 |
commit | 4ac9db0e269fe760faec68a3b163da77bf419134 (patch) | |
tree | 27f1865eb604ae68042905f482006a2139e04c31 | |
parent | 65c87507f926ee79f8c085183dae5e6c54aad115 (diff) | |
download | kernel-4ac9db0e269fe760faec68a3b163da77bf419134.tar.gz kernel-4ac9db0e269fe760faec68a3b163da77bf419134.tar.xz kernel-4ac9db0e269fe760faec68a3b163da77bf419134.zip |
CVE-2014-3610 kvm: noncanonical MSR writes (rhbz 1144883 1156543)
- CVE-2014-3611 kvm: PIT timer race condition (rhbz 1144878 1156537)
- CVE-2014-3646 kvm: vmx: invvpid vm exit not handled (rhbz 1144825 1156534)
- CVE-2014-8369 kvm: excessive pages un-pinning in kvm_iommu_map error path (rhbz 1156518 1156522)
- CVE-2014-8480 CVE-2014-8481 kvm: NULL pointer dereference during rip relative instruction emulation (rhbz 1156615 1156616)
-rw-r--r-- | KVM-emulate-avoid-accessing-NULL-ctxt-memopp.patch | 37 | ||||
-rw-r--r-- | KVM-x86-Check-non-canonical-addresses-upon-WRMSR.patch | 145 | ||||
-rw-r--r-- | KVM-x86-Decoding-guest-instructions-which-cross-page.patch | 38 | ||||
-rw-r--r-- | KVM-x86-Emulator-does-not-decode-clflush-well.patch | 77 | ||||
-rw-r--r-- | KVM-x86-Emulator-fixes-for-eip-canonical-checks-on-n.patch | 234 | ||||
-rw-r--r-- | KVM-x86-Fix-wrong-masking-on-relative-jump-call.patch | 64 | ||||
-rw-r--r-- | KVM-x86-Handle-errors-when-RIP-is-set-during-far-jum.patch | 249 | ||||
-rw-r--r-- | KVM-x86-Improve-thread-safety-in-pit.patch | 36 | ||||
-rw-r--r-- | KVM-x86-PREFETCH-and-HINT_NOP-should-have-SrcMem-fla.patch | 51 | ||||
-rw-r--r-- | KVM-x86-Prevent-host-from-panicking-on-shared-MSR-wr.patch | 89 | ||||
-rw-r--r-- | kernel.spec | 47 | ||||
-rw-r--r-- | kvm-fix-excessive-pages-un-pinning-in-kvm_iommu_map-.patch | 78 | ||||
-rw-r--r-- | kvm-vmx-handle-invvpid-vm-exit-gracefully.patch | 78 | ||||
-rw-r--r-- | kvm-x86-don-t-kill-guest-on-unknown-exit-reason.patch | 54 |
14 files changed, 1276 insertions, 1 deletions
diff --git a/KVM-emulate-avoid-accessing-NULL-ctxt-memopp.patch b/KVM-emulate-avoid-accessing-NULL-ctxt-memopp.patch new file mode 100644 index 000000000..3aeacadc6 --- /dev/null +++ b/KVM-emulate-avoid-accessing-NULL-ctxt-memopp.patch @@ -0,0 +1,37 @@ +From: Paolo Bonzini <pbonzini@redhat.com> +Date: Fri, 24 Oct 2014 17:07:21 +0200 +Subject: [PATCH] KVM: emulate: avoid accessing NULL ctxt->memopp + +A failure to decode the instruction can cause a NULL pointer access. +This is fixed simply by moving the "done" label as close as possible +to the return. + +This fixes CVE-2014-8481. + +Reported-by: Andy Lutomirski <luto@amacapital.net> +Cc: stable@vger.kernel.org +Fixes: 41061cdb98a0bec464278b4db8e894a3121671f5 +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +--- + arch/x86/kvm/emulate.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c +index 02c8ea804aaf..eb3b1c46f995 100644 +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -4580,10 +4580,10 @@ done_prefixes: + /* Decode and fetch the destination operand: register or memory. */ + rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); + +-done: + if (ctxt->rip_relative) + ctxt->memopp->addr.mem.ea += ctxt->_eip; + ++done: + return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; + } + +-- +1.9.3 + diff --git a/KVM-x86-Check-non-canonical-addresses-upon-WRMSR.patch b/KVM-x86-Check-non-canonical-addresses-upon-WRMSR.patch new file mode 100644 index 000000000..61d0b6058 --- /dev/null +++ b/KVM-x86-Check-non-canonical-addresses-upon-WRMSR.patch @@ -0,0 +1,145 @@ +From: Nadav Amit <namit@cs.technion.ac.il> +Date: Fri, 24 Oct 2014 17:07:12 +0200 +Subject: [PATCH] KVM: x86: Check non-canonical addresses upon WRMSR + +Upon WRMSR, the CPU should inject #GP if a non-canonical value (address) is +written to certain MSRs. The behavior is "almost" identical for AMD and Intel +(ignoring MSRs that are not implemented in either architecture since they would +anyhow #GP). However, IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if +non-canonical address is written on Intel but not on AMD (which ignores the top +32-bits). + +Accordingly, this patch injects a #GP on the MSRs which behave identically on +Intel and AMD. To eliminate the differences between the architecutres, the +value which is written to IA32_SYSENTER_ESP and IA32_SYSENTER_EIP is turned to +canonical value before writing instead of injecting a #GP. + +Some references from Intel and AMD manuals: + +According to Intel SDM description of WRMSR instruction #GP is expected on +WRMSR "If the source register contains a non-canonical address and ECX +specifies one of the following MSRs: IA32_DS_AREA, IA32_FS_BASE, IA32_GS_BASE, +IA32_KERNEL_GS_BASE, IA32_LSTAR, IA32_SYSENTER_EIP, IA32_SYSENTER_ESP." + +According to AMD manual instruction manual: +LSTAR/CSTAR (SYSCALL): "The WRMSR instruction loads the target RIP into the +LSTAR and CSTAR registers. If an RIP written by WRMSR is not in canonical +form, a general-protection exception (#GP) occurs." +IA32_GS_BASE and IA32_FS_BASE (WRFSBASE/WRGSBASE): "The address written to the +base field must be in canonical form or a #GP fault will occur." +IA32_KERNEL_GS_BASE (SWAPGS): "The address stored in the KernelGSbase MSR must +be in canonical form." + +This patch fixes CVE-2014-3610. + +Cc: stable@vger.kernel.org +Signed-off-by: Nadav Amit <namit@cs.technion.ac.il> +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +--- + arch/x86/include/asm/kvm_host.h | 14 ++++++++++++++ + arch/x86/kvm/svm.c | 2 +- + arch/x86/kvm/vmx.c | 2 +- + arch/x86/kvm/x86.c | 27 ++++++++++++++++++++++++++- + 4 files changed, 42 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h +index 7d603a71ab3a..ccc94de4ac49 100644 +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -989,6 +989,20 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) + kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); + } + ++static inline u64 get_canonical(u64 la) ++{ ++ return ((int64_t)la << 16) >> 16; ++} ++ ++static inline bool is_noncanonical_address(u64 la) ++{ ++#ifdef CONFIG_X86_64 ++ return get_canonical(la) != la; ++#else ++ return false; ++#endif ++} ++ + #define TSS_IOPB_BASE_OFFSET 0x66 + #define TSS_BASE_SIZE 0x68 + #define TSS_IOPB_SIZE (65536 / 8) +diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c +index 65510f624dfe..00bed2c5e948 100644 +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -3251,7 +3251,7 @@ static int wrmsr_interception(struct vcpu_svm *svm) + msr.host_initiated = false; + + svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; +- if (svm_set_msr(&svm->vcpu, &msr)) { ++ if (kvm_set_msr(&svm->vcpu, &msr)) { + trace_kvm_msr_write_ex(ecx, data); + kvm_inject_gp(&svm->vcpu, 0); + } else { +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 0acac81f198b..148020a7dd98 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -5291,7 +5291,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu) + msr.data = data; + msr.index = ecx; + msr.host_initiated = false; +- if (vmx_set_msr(vcpu, &msr) != 0) { ++ if (kvm_set_msr(vcpu, &msr) != 0) { + trace_kvm_msr_write_ex(ecx, data); + kvm_inject_gp(vcpu, 0); + return 1; +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 34c8f94331f8..5a7195573a32 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -987,7 +987,6 @@ void kvm_enable_efer_bits(u64 mask) + } + EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); + +- + /* + * Writes msr value into into the appropriate "register". + * Returns 0 on success, non-0 otherwise. +@@ -995,8 +994,34 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); + */ + int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) + { ++ switch (msr->index) { ++ case MSR_FS_BASE: ++ case MSR_GS_BASE: ++ case MSR_KERNEL_GS_BASE: ++ case MSR_CSTAR: ++ case MSR_LSTAR: ++ if (is_noncanonical_address(msr->data)) ++ return 1; ++ break; ++ case MSR_IA32_SYSENTER_EIP: ++ case MSR_IA32_SYSENTER_ESP: ++ /* ++ * IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if ++ * non-canonical address is written on Intel but not on ++ * AMD (which ignores the top 32-bits, because it does ++ * not implement 64-bit SYSENTER). ++ * ++ * 64-bit code should hence be able to write a non-canonical ++ * value on AMD. Making the address canonical ensures that ++ * vmentry does not fail on Intel after writing a non-canonical ++ * value, and that something deterministic happens if the guest ++ * invokes 64-bit SYSENTER. ++ */ ++ msr->data = get_canonical(msr->data); ++ } + return kvm_x86_ops->set_msr(vcpu, msr); + } ++EXPORT_SYMBOL_GPL(kvm_set_msr); + + /* + * Adapt set_msr() to msr_io()'s calling convention +-- +1.9.3 + diff --git a/KVM-x86-Decoding-guest-instructions-which-cross-page.patch b/KVM-x86-Decoding-guest-instructions-which-cross-page.patch new file mode 100644 index 000000000..86ed83f8f --- /dev/null +++ b/KVM-x86-Decoding-guest-instructions-which-cross-page.patch @@ -0,0 +1,38 @@ +From: Nadav Amit <namit@cs.technion.ac.il> +Date: Fri, 24 Oct 2014 17:07:20 +0200 +Subject: [PATCH] KVM: x86: Decoding guest instructions which cross page + boundary may fail + +Once an instruction crosses a page boundary, the size read from the second page +disregards the common case that part of the operand resides on the first page. +As a result, fetch of long insturctions may fail, and thereby cause the +decoding to fail as well. + +Cc: stable@vger.kernel.org +Fixes: 5cfc7e0f5e5e1adf998df94f8e36edaf5d30d38e +Signed-off-by: Nadav Amit <namit@cs.technion.ac.il> +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +--- + arch/x86/kvm/emulate.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c +index c0deaff8d9f0..02c8ea804aaf 100644 +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -778,8 +778,10 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size) + static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, + unsigned size) + { +- if (unlikely(ctxt->fetch.end - ctxt->fetch.ptr < size)) +- return __do_insn_fetch_bytes(ctxt, size); ++ unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr; ++ ++ if (unlikely(done_size < size)) ++ return __do_insn_fetch_bytes(ctxt, size - done_size); + else + return X86EMUL_CONTINUE; + } +-- +1.9.3 + diff --git a/KVM-x86-Emulator-does-not-decode-clflush-well.patch b/KVM-x86-Emulator-does-not-decode-clflush-well.patch new file mode 100644 index 000000000..8681f5b1e --- /dev/null +++ b/KVM-x86-Emulator-does-not-decode-clflush-well.patch @@ -0,0 +1,77 @@ +From: Nadav Amit <namit@cs.technion.ac.il> +Date: Fri, 24 Oct 2014 17:07:22 +0200 +Subject: [PATCH] KVM: x86: Emulator does not decode clflush well + +Currently, all group15 instructions are decoded as clflush (e.g., mfence, +xsave). In addition, the clflush instruction requires no prefix (66/f2/f3) +would exist. If prefix exists it may encode a different instruction (e.g., +clflushopt). + +Creating a group for clflush, and different group for each prefix. + +This has been the case forever, but the next patch needs the cflush group +in order to fix a bug introduced in 3.17. + +Fixes: 41061cdb98a0bec464278b4db8e894a3121671f5 +Cc: stable@vger.kernel.org +Signed-off-by: Nadav Amit <namit@cs.technion.ac.il> +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +--- + arch/x86/kvm/emulate.c | 20 +++++++++++++++++--- + 1 file changed, 17 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c +index eb3b1c46f995..97da5034d812 100644 +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -3462,6 +3462,12 @@ static int em_bswap(struct x86_emulate_ctxt *ctxt) + return X86EMUL_CONTINUE; + } + ++static int em_clflush(struct x86_emulate_ctxt *ctxt) ++{ ++ /* emulating clflush regardless of cpuid */ ++ return X86EMUL_CONTINUE; ++} ++ + static bool valid_cr(int nr) + { + switch (nr) { +@@ -3800,6 +3806,16 @@ static const struct opcode group11[] = { + X7(D(Undefined)), + }; + ++static const struct gprefix pfx_0f_ae_7 = { ++ I(0, em_clflush), N, N, N, ++}; ++ ++static const struct group_dual group15 = { { ++ N, N, N, N, N, N, N, GP(0, &pfx_0f_ae_7), ++}, { ++ N, N, N, N, N, N, N, N, ++} }; ++ + static const struct gprefix pfx_0f_6f_0f_7f = { + I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), + }; +@@ -4063,7 +4079,7 @@ static const struct opcode twobyte_table[256] = { + F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), + F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), + F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), +- D(ModRM), F(DstReg | SrcMem | ModRM, em_imul), ++ GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul), + /* 0xB0 - 0xB7 */ + I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), + I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), +@@ -4993,8 +5009,6 @@ twobyte_insn: + case 0x90 ... 0x9f: /* setcc r/m8 */ + ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); + break; +- case 0xae: /* clflush */ +- break; + case 0xb6 ... 0xb7: /* movzx */ + ctxt->dst.bytes = ctxt->op_bytes; + ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val +-- +1.9.3 + diff --git a/KVM-x86-Emulator-fixes-for-eip-canonical-checks-on-n.patch b/KVM-x86-Emulator-fixes-for-eip-canonical-checks-on-n.patch new file mode 100644 index 000000000..a3df3979a --- /dev/null +++ b/KVM-x86-Emulator-fixes-for-eip-canonical-checks-on-n.patch @@ -0,0 +1,234 @@ +From: Nadav Amit <namit@cs.technion.ac.il> +Date: Fri, 24 Oct 2014 17:07:16 +0200 +Subject: [PATCH] KVM: x86: Emulator fixes for eip canonical checks on near + branches + +Before changing rip (during jmp, call, ret, etc.) the target should be asserted +to be canonical one, as real CPUs do. During sysret, both target rsp and rip +should be canonical. If any of these values is noncanonical, a #GP exception +should occur. The exception to this rule are syscall and sysenter instructions +in which the assigned rip is checked during the assignment to the relevant +MSRs. + +This patch fixes the emulator to behave as real CPUs do for near branches. +Far branches are handled by the next patch. + +This fixes CVE-2014-3647. + +Cc: stable@vger.kernel.org +Signed-off-by: Nadav Amit <namit@cs.technion.ac.il> +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +--- + arch/x86/kvm/emulate.c | 78 ++++++++++++++++++++++++++++++++++---------------- + 1 file changed, 54 insertions(+), 24 deletions(-) + +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c +index 047698974799..a1b9139169f6 100644 +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -564,7 +564,8 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt) + return emulate_exception(ctxt, NM_VECTOR, 0, false); + } + +-static inline void assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst) ++static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst, ++ int cs_l) + { + switch (ctxt->op_bytes) { + case 2: +@@ -574,16 +575,25 @@ static inline void assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst) + ctxt->_eip = (u32)dst; + break; + case 8: ++ if ((cs_l && is_noncanonical_address(dst)) || ++ (!cs_l && (dst & ~(u32)-1))) ++ return emulate_gp(ctxt, 0); + ctxt->_eip = dst; + break; + default: + WARN(1, "unsupported eip assignment size\n"); + } ++ return X86EMUL_CONTINUE; ++} ++ ++static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst) ++{ ++ return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64); + } + +-static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) ++static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) + { +- assign_eip_near(ctxt, ctxt->_eip + rel); ++ return assign_eip_near(ctxt, ctxt->_eip + rel); + } + + static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) +@@ -1998,13 +2008,15 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt) + case 2: /* call near abs */ { + long int old_eip; + old_eip = ctxt->_eip; +- ctxt->_eip = ctxt->src.val; ++ rc = assign_eip_near(ctxt, ctxt->src.val); ++ if (rc != X86EMUL_CONTINUE) ++ break; + ctxt->src.val = old_eip; + rc = em_push(ctxt); + break; + } + case 4: /* jmp abs */ +- ctxt->_eip = ctxt->src.val; ++ rc = assign_eip_near(ctxt, ctxt->src.val); + break; + case 5: /* jmp far */ + rc = em_jmp_far(ctxt); +@@ -2039,10 +2051,14 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) + + static int em_ret(struct x86_emulate_ctxt *ctxt) + { +- ctxt->dst.type = OP_REG; +- ctxt->dst.addr.reg = &ctxt->_eip; +- ctxt->dst.bytes = ctxt->op_bytes; +- return em_pop(ctxt); ++ int rc; ++ unsigned long eip; ++ ++ rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); ++ if (rc != X86EMUL_CONTINUE) ++ return rc; ++ ++ return assign_eip_near(ctxt, eip); + } + + static int em_ret_far(struct x86_emulate_ctxt *ctxt) +@@ -2323,7 +2339,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) + { + const struct x86_emulate_ops *ops = ctxt->ops; + struct desc_struct cs, ss; +- u64 msr_data; ++ u64 msr_data, rcx, rdx; + int usermode; + u16 cs_sel = 0, ss_sel = 0; + +@@ -2339,6 +2355,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) + else + usermode = X86EMUL_MODE_PROT32; + ++ rcx = reg_read(ctxt, VCPU_REGS_RCX); ++ rdx = reg_read(ctxt, VCPU_REGS_RDX); ++ + cs.dpl = 3; + ss.dpl = 3; + ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); +@@ -2356,6 +2375,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) + ss_sel = cs_sel + 8; + cs.d = 0; + cs.l = 1; ++ if (is_noncanonical_address(rcx) || ++ is_noncanonical_address(rdx)) ++ return emulate_gp(ctxt, 0); + break; + } + cs_sel |= SELECTOR_RPL_MASK; +@@ -2364,8 +2386,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) + ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); + ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); + +- ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX); +- *reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX); ++ ctxt->_eip = rdx; ++ *reg_write(ctxt, VCPU_REGS_RSP) = rcx; + + return X86EMUL_CONTINUE; + } +@@ -2905,10 +2927,13 @@ static int em_aad(struct x86_emulate_ctxt *ctxt) + + static int em_call(struct x86_emulate_ctxt *ctxt) + { ++ int rc; + long rel = ctxt->src.val; + + ctxt->src.val = (unsigned long)ctxt->_eip; +- jmp_rel(ctxt, rel); ++ rc = jmp_rel(ctxt, rel); ++ if (rc != X86EMUL_CONTINUE) ++ return rc; + return em_push(ctxt); + } + +@@ -2940,11 +2965,12 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) + static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) + { + int rc; ++ unsigned long eip; + +- ctxt->dst.type = OP_REG; +- ctxt->dst.addr.reg = &ctxt->_eip; +- ctxt->dst.bytes = ctxt->op_bytes; +- rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes); ++ rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); ++ if (rc != X86EMUL_CONTINUE) ++ return rc; ++ rc = assign_eip_near(ctxt, eip); + if (rc != X86EMUL_CONTINUE) + return rc; + rsp_increment(ctxt, ctxt->src.val); +@@ -3271,20 +3297,24 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt) + + static int em_loop(struct x86_emulate_ctxt *ctxt) + { ++ int rc = X86EMUL_CONTINUE; ++ + register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); + if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) && + (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags))) +- jmp_rel(ctxt, ctxt->src.val); ++ rc = jmp_rel(ctxt, ctxt->src.val); + +- return X86EMUL_CONTINUE; ++ return rc; + } + + static int em_jcxz(struct x86_emulate_ctxt *ctxt) + { ++ int rc = X86EMUL_CONTINUE; ++ + if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) +- jmp_rel(ctxt, ctxt->src.val); ++ rc = jmp_rel(ctxt, ctxt->src.val); + +- return X86EMUL_CONTINUE; ++ return rc; + } + + static int em_in(struct x86_emulate_ctxt *ctxt) +@@ -4743,7 +4773,7 @@ special_insn: + break; + case 0x70 ... 0x7f: /* jcc (short) */ + if (test_cc(ctxt->b, ctxt->eflags)) +- jmp_rel(ctxt, ctxt->src.val); ++ rc = jmp_rel(ctxt, ctxt->src.val); + break; + case 0x8d: /* lea r16/r32, m */ + ctxt->dst.val = ctxt->src.addr.mem.ea; +@@ -4773,7 +4803,7 @@ special_insn: + break; + case 0xe9: /* jmp rel */ + case 0xeb: /* jmp rel short */ +- jmp_rel(ctxt, ctxt->src.val); ++ rc = jmp_rel(ctxt, ctxt->src.val); + ctxt->dst.type = OP_NONE; /* Disable writeback. */ + break; + case 0xf4: /* hlt */ +@@ -4898,7 +4928,7 @@ twobyte_insn: + break; + case 0x80 ... 0x8f: /* jnz rel, etc*/ + if (test_cc(ctxt->b, ctxt->eflags)) +- jmp_rel(ctxt, ctxt->src.val); ++ rc = jmp_rel(ctxt, ctxt->src.val); + break; + case 0x90 ... 0x9f: /* setcc r/m8 */ + ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); +-- +1.9.3 + diff --git a/KVM-x86-Fix-wrong-masking-on-relative-jump-call.patch b/KVM-x86-Fix-wrong-masking-on-relative-jump-call.patch new file mode 100644 index 000000000..4b173faf0 --- /dev/null +++ b/KVM-x86-Fix-wrong-masking-on-relative-jump-call.patch @@ -0,0 +1,64 @@ +From: Nadav Amit <namit@cs.technion.ac.il> +Date: Fri, 24 Oct 2014 17:07:15 +0200 +Subject: [PATCH] KVM: x86: Fix wrong masking on relative jump/call + +Relative jumps and calls do the masking according to the operand size, and not +according to the address size as the KVM emulator does today. + +This patch fixes KVM behavior. + +Cc: stable@vger.kernel.org +Signed-off-by: Nadav Amit <namit@cs.technion.ac.il> +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +--- + arch/x86/kvm/emulate.c | 27 ++++++++++++++++++++++----- + 1 file changed, 22 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c +index a46207a05835..047698974799 100644 +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -504,11 +504,6 @@ static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc) + masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc); + } + +-static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) +-{ +- register_address_increment(ctxt, &ctxt->_eip, rel); +-} +- + static u32 desc_limit_scaled(struct desc_struct *desc) + { + u32 limit = get_desc_limit(desc); +@@ -569,6 +564,28 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt) + return emulate_exception(ctxt, NM_VECTOR, 0, false); + } + ++static inline void assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst) ++{ ++ switch (ctxt->op_bytes) { ++ case 2: ++ ctxt->_eip = (u16)dst; ++ break; ++ case 4: ++ ctxt->_eip = (u32)dst; ++ break; ++ case 8: ++ ctxt->_eip = dst; ++ break; ++ default: ++ WARN(1, "unsupported eip assignment size\n"); ++ } ++} ++ ++static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) ++{ ++ assign_eip_near(ctxt, ctxt->_eip + rel); ++} ++ + static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) + { + u16 selector; +-- +1.9.3 + diff --git a/KVM-x86-Handle-errors-when-RIP-is-set-during-far-jum.patch b/KVM-x86-Handle-errors-when-RIP-is-set-during-far-jum.patch new file mode 100644 index 000000000..4c60a1f51 --- /dev/null +++ b/KVM-x86-Handle-errors-when-RIP-is-set-during-far-jum.patch @@ -0,0 +1,249 @@ +From: Nadav Amit <namit@cs.technion.ac.il> +Date: Fri, 24 Oct 2014 17:07:17 +0200 +Subject: [PATCH] KVM: x86: Handle errors when RIP is set during far jumps + +Far jmp/call/ret may fault while loading a new RIP. Currently KVM does not +handle this case, and may result in failed vm-entry once the assignment is +done. The tricky part of doing so is that loading the new CS affects the +VMCS/VMCB state, so if we fail during loading the new RIP, we are left in +unconsistent state. Therefore, this patch saves on 64-bit the old CS +descriptor and restores it if loading RIP failed. + +This fixes CVE-2014-3647. + +Cc: stable@vger.kernel.org +Signed-off-by: Nadav Amit <namit@cs.technion.ac.il> +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +--- + arch/x86/kvm/emulate.c | 118 ++++++++++++++++++++++++++++++++++++------------- + 1 file changed, 88 insertions(+), 30 deletions(-) + +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c +index a1b9139169f6..c0deaff8d9f0 100644 +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -1443,7 +1443,9 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, + + /* Does not support long mode */ + static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, +- u16 selector, int seg, u8 cpl, bool in_task_switch) ++ u16 selector, int seg, u8 cpl, ++ bool in_task_switch, ++ struct desc_struct *desc) + { + struct desc_struct seg_desc, old_desc; + u8 dpl, rpl; +@@ -1584,6 +1586,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, + } + load: + ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); ++ if (desc) ++ *desc = seg_desc; + return X86EMUL_CONTINUE; + exception: + return emulate_exception(ctxt, err_vec, err_code, true); +@@ -1593,7 +1597,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, + u16 selector, int seg) + { + u8 cpl = ctxt->ops->cpl(ctxt); +- return __load_segment_descriptor(ctxt, selector, seg, cpl, false); ++ return __load_segment_descriptor(ctxt, selector, seg, cpl, false, NULL); + } + + static void write_register_operand(struct operand *op) +@@ -1987,17 +1991,31 @@ static int em_iret(struct x86_emulate_ctxt *ctxt) + static int em_jmp_far(struct x86_emulate_ctxt *ctxt) + { + int rc; +- unsigned short sel; ++ unsigned short sel, old_sel; ++ struct desc_struct old_desc, new_desc; ++ const struct x86_emulate_ops *ops = ctxt->ops; ++ u8 cpl = ctxt->ops->cpl(ctxt); ++ ++ /* Assignment of RIP may only fail in 64-bit mode */ ++ if (ctxt->mode == X86EMUL_MODE_PROT64) ++ ops->get_segment(ctxt, &old_sel, &old_desc, NULL, ++ VCPU_SREG_CS); + + memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); + +- rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS); ++ rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false, ++ &new_desc); + if (rc != X86EMUL_CONTINUE) + return rc; + +- ctxt->_eip = 0; +- memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); +- return X86EMUL_CONTINUE; ++ rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l); ++ if (rc != X86EMUL_CONTINUE) { ++ WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64); ++ /* assigning eip failed; restore the old cs */ ++ ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS); ++ return rc; ++ } ++ return rc; + } + + static int em_grp45(struct x86_emulate_ctxt *ctxt) +@@ -2064,21 +2082,34 @@ static int em_ret(struct x86_emulate_ctxt *ctxt) + static int em_ret_far(struct x86_emulate_ctxt *ctxt) + { + int rc; +- unsigned long cs; ++ unsigned long eip, cs; ++ u16 old_cs; + int cpl = ctxt->ops->cpl(ctxt); ++ struct desc_struct old_desc, new_desc; ++ const struct x86_emulate_ops *ops = ctxt->ops; ++ ++ if (ctxt->mode == X86EMUL_MODE_PROT64) ++ ops->get_segment(ctxt, &old_cs, &old_desc, NULL, ++ VCPU_SREG_CS); + +- rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes); ++ rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); + if (rc != X86EMUL_CONTINUE) + return rc; +- if (ctxt->op_bytes == 4) +- ctxt->_eip = (u32)ctxt->_eip; + rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); + if (rc != X86EMUL_CONTINUE) + return rc; + /* Outer-privilege level return is not implemented */ + if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) + return X86EMUL_UNHANDLEABLE; +- rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); ++ rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false, ++ &new_desc); ++ if (rc != X86EMUL_CONTINUE) ++ return rc; ++ rc = assign_eip_far(ctxt, eip, new_desc.l); ++ if (rc != X86EMUL_CONTINUE) { ++ WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64); ++ ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); ++ } + return rc; + } + +@@ -2505,19 +2536,24 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, + * Now load segment descriptors. If fault happens at this stage + * it is handled in a context of new task + */ +- ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, true); ++ ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, ++ true, NULL); + if (ret != X86EMUL_CONTINUE) + return ret; +- ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true); ++ ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, ++ true, NULL); + if (ret != X86EMUL_CONTINUE) + return ret; +- ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true); ++ ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, ++ true, NULL); + if (ret != X86EMUL_CONTINUE) + return ret; +- ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true); ++ ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, ++ true, NULL); + if (ret != X86EMUL_CONTINUE) + return ret; +- ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true); ++ ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, ++ true, NULL); + if (ret != X86EMUL_CONTINUE) + return ret; + +@@ -2642,25 +2678,32 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, + * Now load segment descriptors. If fault happenes at this stage + * it is handled in a context of new task + */ +- ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, cpl, true); ++ ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, ++ cpl, true, NULL); + if (ret != X86EMUL_CONTINUE) + return ret; +- ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true); ++ ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, ++ true, NULL); + if (ret != X86EMUL_CONTINUE) + return ret; +- ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true); ++ ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, ++ true, NULL); + if (ret != X86EMUL_CONTINUE) + return ret; +- ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true); ++ ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, ++ true, NULL); + if (ret != X86EMUL_CONTINUE) + return ret; +- ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true); ++ ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, ++ true, NULL); + if (ret != X86EMUL_CONTINUE) + return ret; +- ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, true); ++ ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, ++ true, NULL); + if (ret != X86EMUL_CONTINUE) + return ret; +- ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, true); ++ ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, ++ true, NULL); + if (ret != X86EMUL_CONTINUE) + return ret; + +@@ -2942,24 +2985,39 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) + u16 sel, old_cs; + ulong old_eip; + int rc; ++ struct desc_struct old_desc, new_desc; ++ const struct x86_emulate_ops *ops = ctxt->ops; ++ int cpl = ctxt->ops->cpl(ctxt); + +- old_cs = get_segment_selector(ctxt, VCPU_SREG_CS); + old_eip = ctxt->_eip; ++ ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS); + + memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); +- if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS)) ++ rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false, ++ &new_desc); ++ if (rc != X86EMUL_CONTINUE) + return X86EMUL_CONTINUE; + +- ctxt->_eip = 0; +- memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); ++ rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l); ++ if (rc != X86EMUL_CONTINUE) ++ goto fail; + + ctxt->src.val = old_cs; + rc = em_push(ctxt); + if (rc != X86EMUL_CONTINUE) +- return rc; ++ goto fail; + + ctxt->src.val = old_eip; +- return em_push(ctxt); ++ rc = em_push(ctxt); ++ /* If we failed, we tainted the memory, but the very least we should ++ restore cs */ ++ if (rc != X86EMUL_CONTINUE) ++ goto fail; ++ return rc; ++fail: ++ ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); ++ return rc; ++ + } + + static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) +-- +1.9.3 + diff --git a/KVM-x86-Improve-thread-safety-in-pit.patch b/KVM-x86-Improve-thread-safety-in-pit.patch new file mode 100644 index 000000000..e6ba59924 --- /dev/null +++ b/KVM-x86-Improve-thread-safety-in-pit.patch @@ -0,0 +1,36 @@ +From: Andy Honig <ahonig@google.com> +Date: Fri, 24 Oct 2014 17:07:14 +0200 +Subject: [PATCH] KVM: x86: Improve thread safety in pit + +There's a race condition in the PIT emulation code in KVM. In +__kvm_migrate_pit_timer the pit_timer object is accessed without +synchronization. If the race condition occurs at the wrong time this +can crash the host kernel. + +This fixes CVE-2014-3611. + +Cc: stable@vger.kernel.org +Signed-off-by: Andrew Honig <ahonig@google.com> +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +--- + arch/x86/kvm/i8254.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c +index 518d86471b76..298781d4cfb4 100644 +--- a/arch/x86/kvm/i8254.c ++++ b/arch/x86/kvm/i8254.c +@@ -262,8 +262,10 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) + return; + + timer = &pit->pit_state.timer; ++ mutex_lock(&pit->pit_state.lock); + if (hrtimer_cancel(timer)) + hrtimer_start_expires(timer, HRTIMER_MODE_ABS); ++ mutex_unlock(&pit->pit_state.lock); + } + + static void destroy_pit_timer(struct kvm_pit *pit) +-- +1.9.3 + diff --git a/KVM-x86-PREFETCH-and-HINT_NOP-should-have-SrcMem-fla.patch b/KVM-x86-PREFETCH-and-HINT_NOP-should-have-SrcMem-fla.patch new file mode 100644 index 000000000..9405ba130 --- /dev/null +++ b/KVM-x86-PREFETCH-and-HINT_NOP-should-have-SrcMem-fla.patch @@ -0,0 +1,51 @@ +From: Nadav Amit <namit@cs.technion.ac.il> +Date: Fri, 24 Oct 2014 17:07:23 +0200 +Subject: [PATCH] KVM: x86: PREFETCH and HINT_NOP should have SrcMem flag + +The decode phase of the x86 emulator assumes that every instruction with the +ModRM flag, and which can be used with RIP-relative addressing, has either +SrcMem or DstMem. This is not the case for several instructions - prefetch, +hint-nop and clflush. + +Adding SrcMem|NoAccess for prefetch and hint-nop and SrcMem for clflush. + +This fixes CVE-2014-8480. + +Fixes: 41061cdb98a0bec464278b4db8e894a3121671f5 +Cc: stable@vger.kernel.org +Signed-off-by: Nadav Amit <namit@cs.technion.ac.il> +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +--- + arch/x86/kvm/emulate.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c +index 97da5034d812..749f9fa38254 100644 +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -3807,7 +3807,7 @@ static const struct opcode group11[] = { + }; + + static const struct gprefix pfx_0f_ae_7 = { +- I(0, em_clflush), N, N, N, ++ I(SrcMem | ByteOp, em_clflush), N, N, N, + }; + + static const struct group_dual group15 = { { +@@ -4024,10 +4024,11 @@ static const struct opcode twobyte_table[256] = { + N, I(ImplicitOps | EmulateOnUD, em_syscall), + II(ImplicitOps | Priv, em_clts, clts), N, + DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, +- N, D(ImplicitOps | ModRM), N, N, ++ N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N, + /* 0x10 - 0x1F */ + N, N, N, N, N, N, N, N, +- D(ImplicitOps | ModRM), N, N, N, N, N, N, D(ImplicitOps | ModRM), ++ D(ImplicitOps | ModRM | SrcMem | NoAccess), ++ N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess), + /* 0x20 - 0x2F */ + DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read), + DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read), +-- +1.9.3 + diff --git a/KVM-x86-Prevent-host-from-panicking-on-shared-MSR-wr.patch b/KVM-x86-Prevent-host-from-panicking-on-shared-MSR-wr.patch new file mode 100644 index 000000000..d832e64cc --- /dev/null +++ b/KVM-x86-Prevent-host-from-panicking-on-shared-MSR-wr.patch @@ -0,0 +1,89 @@ +From: Andy Honig <ahonig@google.com> +Date: Fri, 24 Oct 2014 17:07:13 +0200 +Subject: [PATCH] KVM: x86: Prevent host from panicking on shared MSR writes. + +The previous patch blocked invalid writes directly when the MSR +is written. As a precaution, prevent future similar mistakes by +gracefulling handle GPs caused by writes to shared MSRs. + +Cc: stable@vger.kernel.org +Signed-off-by: Andrew Honig <ahonig@google.com> +[Remove parts obsoleted by Nadav's patch. - Paolo] +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +--- + arch/x86/include/asm/kvm_host.h | 2 +- + arch/x86/kvm/vmx.c | 7 +++++-- + arch/x86/kvm/x86.c | 11 ++++++++--- + 3 files changed, 14 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h +index ccc94de4ac49..6ed0c30d6a0c 100644 +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -1064,7 +1064,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, + unsigned long address); + + void kvm_define_shared_msr(unsigned index, u32 msr); +-void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); ++int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); + + bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); + +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 148020a7dd98..7e2c098b59c9 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -2659,12 +2659,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) + default: + msr = find_msr_entry(vmx, msr_index); + if (msr) { ++ u64 old_msr_data = msr->data; + msr->data = data; + if (msr - vmx->guest_msrs < vmx->save_nmsrs) { + preempt_disable(); +- kvm_set_shared_msr(msr->index, msr->data, +- msr->mask); ++ ret = kvm_set_shared_msr(msr->index, msr->data, ++ msr->mask); + preempt_enable(); ++ if (ret) ++ msr->data = old_msr_data; + } + break; + } +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 5a7195573a32..0033df32a745 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -229,20 +229,25 @@ static void kvm_shared_msr_cpu_online(void) + shared_msr_update(i, shared_msrs_global.msrs[i]); + } + +-void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) ++int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) + { + unsigned int cpu = smp_processor_id(); + struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); ++ int err; + + if (((value ^ smsr->values[slot].curr) & mask) == 0) +- return; ++ return 0; + smsr->values[slot].curr = value; +- wrmsrl(shared_msrs_global.msrs[slot], value); ++ err = wrmsrl_safe(shared_msrs_global.msrs[slot], value); ++ if (err) ++ return 1; ++ + if (!smsr->registered) { + smsr->urn.on_user_return = kvm_on_user_return; + user_return_notifier_register(&smsr->urn); + smsr->registered = true; + } ++ return 0; + } + EXPORT_SYMBOL_GPL(kvm_set_shared_msr); + +-- +1.9.3 + diff --git a/kernel.spec b/kernel.spec index fd4fb2e20..adbf99c8f 100644 --- a/kernel.spec +++ b/kernel.spec @@ -42,7 +42,7 @@ Summary: The Linux kernel # For non-released -rc kernels, this will be appended after the rcX and # gitX tags, so a 3 here would become part of release "0.rcX.gitX.3" # -%global baserelease 1 +%global baserelease 3 %global fedora_build %{baserelease} # base_sublevel is the kernel version we're starting with and patching @@ -620,6 +620,25 @@ Patch26058: asus-nb-wmi-Add-wapf4-quirk-for-the-X550VB.patch #rhbz 1111138 Patch26059: i8042-Add-notimeout-quirk-for-Fujitsu-Lifebook-A544-.patch +# CVE-2014-3610 kvm: noncanonical MSR writes (rhbz 1144883 1156543) +# CVE-2014-3611 kvm: PIT timer race condition (rhbz 1144878 1156537) +# CVE-2014-3646 kvm: vmx: invvpid vm exit not handled (rhbz 1144825 1156534) +# CVE-2014-8369 kvm: excessive pages un-pinning in kvm_iommu_map error path (rhbz 1156518 1156522) +# CVE-2014-8480 CVE-2014-8481 kvm: NULL pointer dereference during rip relative instruction emulation (rhbz 1156615 1156616) +Patch26070: KVM-x86-Check-non-canonical-addresses-upon-WRMSR.patch +Patch26071: KVM-x86-Prevent-host-from-panicking-on-shared-MSR-wr.patch +Patch26072: KVM-x86-Improve-thread-safety-in-pit.patch +Patch26073: KVM-x86-Fix-wrong-masking-on-relative-jump-call.patch +Patch26074: KVM-x86-Emulator-fixes-for-eip-canonical-checks-on-n.patch +Patch26075: KVM-x86-Handle-errors-when-RIP-is-set-during-far-jum.patch +Patch26076: kvm-vmx-handle-invvpid-vm-exit-gracefully.patch +Patch26077: kvm-x86-don-t-kill-guest-on-unknown-exit-reason.patch +Patch26078: KVM-x86-Decoding-guest-instructions-which-cross-page.patch +Patch26079: KVM-emulate-avoid-accessing-NULL-ctxt-memopp.patch +Patch26080: KVM-x86-Emulator-does-not-decode-clflush-well.patch +Patch26081: KVM-x86-PREFETCH-and-HINT_NOP-should-have-SrcMem-fla.patch +Patch26082: kvm-fix-excessive-pages-un-pinning-in-kvm_iommu_map-.patch + # git clone ssh://git.fedorahosted.org/git/kernel-arm64.git, git diff master...devel Patch30000: kernel-arm64.patch @@ -1349,6 +1368,25 @@ ApplyPatch asus-nb-wmi-Add-wapf4-quirk-for-the-X550VB.patch #rhbz 1111138 ApplyPatch i8042-Add-notimeout-quirk-for-Fujitsu-Lifebook-A544-.patch +# CVE-2014-3610 kvm: noncanonical MSR writes (rhbz 1144883 1156543) +# CVE-2014-3611 kvm: PIT timer race condition (rhbz 1144878 1156537) +# CVE-2014-3646 kvm: vmx: invvpid vm exit not handled (rhbz 1144825 1156534) +# CVE-2014-8369 kvm: excessive pages un-pinning in kvm_iommu_map error path (rhbz 1156518 1156522) +# CVE-2014-8480 CVE-2014-8481 kvm: NULL pointer dereference during rip relative instruction emulation (rhbz 1156615 1156616) +ApplyPatch KVM-x86-Check-non-canonical-addresses-upon-WRMSR.patch +ApplyPatch KVM-x86-Prevent-host-from-panicking-on-shared-MSR-wr.patch +ApplyPatch KVM-x86-Improve-thread-safety-in-pit.patch +ApplyPatch KVM-x86-Fix-wrong-masking-on-relative-jump-call.patch +ApplyPatch KVM-x86-Emulator-fixes-for-eip-canonical-checks-on-n.patch +ApplyPatch KVM-x86-Handle-errors-when-RIP-is-set-during-far-jum.patch +ApplyPatch kvm-vmx-handle-invvpid-vm-exit-gracefully.patch +ApplyPatch kvm-x86-don-t-kill-guest-on-unknown-exit-reason.patch +ApplyPatch KVM-x86-Decoding-guest-instructions-which-cross-page.patch +ApplyPatch KVM-emulate-avoid-accessing-NULL-ctxt-memopp.patch +ApplyPatch KVM-x86-Emulator-does-not-decode-clflush-well.patch +ApplyPatch KVM-x86-PREFETCH-and-HINT_NOP-should-have-SrcMem-fla.patch +ApplyPatch kvm-fix-excessive-pages-un-pinning-in-kvm_iommu_map-.patch + %if 0%{?aarch64patches} ApplyPatch kernel-arm64.patch %ifnarch aarch64 # this is stupid, but i want to notice before secondary koji does. @@ -2217,6 +2255,13 @@ fi # ||----w | # || || %changelog +* Fri Oct 24 2014 Josh Boyer <jwboyer@fedoraproject.org> +- CVE-2014-3610 kvm: noncanonical MSR writes (rhbz 1144883 1156543) +- CVE-2014-3611 kvm: PIT timer race condition (rhbz 1144878 1156537) +- CVE-2014-3646 kvm: vmx: invvpid vm exit not handled (rhbz 1144825 1156534) +- CVE-2014-8369 kvm: excessive pages un-pinning in kvm_iommu_map error path (rhbz 1156518 1156522) +- CVE-2014-8480 CVE-2014-8481 kvm: NULL pointer dereference during rip relative instruction emulation (rhbz 1156615 1156616) + * Fri Oct 24 2014 Josh Boyer <jwboyer@fedoraproject.org> - 3.18.0-0.rc1.git3.1 - Linux v3.18-rc1-280-g816fb4175c29 - Add touchpad quirk for Fujitsu Lifebook A544/AH544 models (rhbz 1111138) diff --git a/kvm-fix-excessive-pages-un-pinning-in-kvm_iommu_map-.patch b/kvm-fix-excessive-pages-un-pinning-in-kvm_iommu_map-.patch new file mode 100644 index 000000000..3209468fa --- /dev/null +++ b/kvm-fix-excessive-pages-un-pinning-in-kvm_iommu_map-.patch @@ -0,0 +1,78 @@ +From: Quentin Casasnovas <quentin.casasnovas@oracle.com> +Date: Fri, 24 Oct 2014 17:07:24 +0200 +Subject: [PATCH] kvm: fix excessive pages un-pinning in kvm_iommu_map error + path. + +The third parameter of kvm_unpin_pages() when called from +kvm_iommu_map_pages() is wrong, it should be the number of pages to un-pin +and not the page size. + +This error was facilitated with an inconsistent API: kvm_pin_pages() takes +a size, but kvn_unpin_pages() takes a number of pages, so fix the problem +by matching the two. + +This was introduced by commit 350b8bd ("kvm: iommu: fix the third parameter +of kvm_iommu_put_pages (CVE-2014-3601)"), which fixes the lack of +un-pinning for pages intended to be un-pinned (i.e. memory leak) but +unfortunately potentially aggravated the number of pages we un-pin that +should have stayed pinned. As far as I understand though, the same +practical mitigations apply. + +This issue was found during review of Red Hat 6.6 patches to prepare +Ksplice rebootless updates. + +Thanks to Vegard for his time on a late Friday evening to help me in +understanding this code. + +Fixes: 350b8bd ("kvm: iommu: fix the third parameter of... (CVE-2014-3601)") +Cc: stable@vger.kernel.org +Signed-off-by: Quentin Casasnovas <quentin.casasnovas@oracle.com> +Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com> +Signed-off-by: Jamie Iles <jamie.iles@oracle.com> +Reviewed-by: Sasha Levin <sasha.levin@oracle.com> +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +--- + virt/kvm/iommu.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c +index e51d9f9b995f..c1e6ae989a43 100644 +--- a/virt/kvm/iommu.c ++++ b/virt/kvm/iommu.c +@@ -43,13 +43,13 @@ static void kvm_iommu_put_pages(struct kvm *kvm, + gfn_t base_gfn, unsigned long npages); + + static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn, +- unsigned long size) ++ unsigned long npages) + { + gfn_t end_gfn; + pfn_t pfn; + + pfn = gfn_to_pfn_memslot(slot, gfn); +- end_gfn = gfn + (size >> PAGE_SHIFT); ++ end_gfn = gfn + npages; + gfn += 1; + + if (is_error_noslot_pfn(pfn)) +@@ -119,7 +119,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) + * Pin all pages we are about to map in memory. This is + * important because we unmap and unpin in 4kb steps later. + */ +- pfn = kvm_pin_pages(slot, gfn, page_size); ++ pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT); + if (is_error_noslot_pfn(pfn)) { + gfn += 1; + continue; +@@ -131,7 +131,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) + if (r) { + printk(KERN_ERR "kvm_iommu_map_address:" + "iommu failed to map pfn=%llx\n", pfn); +- kvm_unpin_pages(kvm, pfn, page_size); ++ kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT); + goto unmap_pages; + } + +-- +1.9.3 + diff --git a/kvm-vmx-handle-invvpid-vm-exit-gracefully.patch b/kvm-vmx-handle-invvpid-vm-exit-gracefully.patch new file mode 100644 index 000000000..154cd4875 --- /dev/null +++ b/kvm-vmx-handle-invvpid-vm-exit-gracefully.patch @@ -0,0 +1,78 @@ +From: Petr Matousek <pmatouse@redhat.com> +Date: Fri, 24 Oct 2014 17:07:18 +0200 +Subject: [PATCH] kvm: vmx: handle invvpid vm exit gracefully + +On systems with invvpid instruction support (corresponding bit in +IA32_VMX_EPT_VPID_CAP MSR is set) guest invocation of invvpid +causes vm exit, which is currently not handled and results in +propagation of unknown exit to userspace. + +Fix this by installing an invvpid vm exit handler. + +This is CVE-2014-3646. + +Cc: stable@vger.kernel.org +Signed-off-by: Petr Matousek <pmatouse@redhat.com> +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +--- + arch/x86/include/uapi/asm/vmx.h | 2 ++ + arch/x86/kvm/vmx.c | 9 ++++++++- + 2 files changed, 10 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h +index 0e79420376eb..990a2fe1588d 100644 +--- a/arch/x86/include/uapi/asm/vmx.h ++++ b/arch/x86/include/uapi/asm/vmx.h +@@ -67,6 +67,7 @@ + #define EXIT_REASON_EPT_MISCONFIG 49 + #define EXIT_REASON_INVEPT 50 + #define EXIT_REASON_PREEMPTION_TIMER 52 ++#define EXIT_REASON_INVVPID 53 + #define EXIT_REASON_WBINVD 54 + #define EXIT_REASON_XSETBV 55 + #define EXIT_REASON_APIC_WRITE 56 +@@ -114,6 +115,7 @@ + { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ + { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ + { EXIT_REASON_INVD, "INVD" }, \ ++ { EXIT_REASON_INVVPID, "INVVPID" }, \ + { EXIT_REASON_INVPCID, "INVPCID" } + + #endif /* _UAPIVMX_H */ +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index 7e2c098b59c9..cf3cd079ec52 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -6746,6 +6746,12 @@ static int handle_invept(struct kvm_vcpu *vcpu) + return 1; + } + ++static int handle_invvpid(struct kvm_vcpu *vcpu) ++{ ++ kvm_queue_exception(vcpu, UD_VECTOR); ++ return 1; ++} ++ + /* + * The exit handlers return 1 if the exit was handled fully and guest execution + * may resume. Otherwise they set the kvm_run parameter to indicate what needs +@@ -6791,6 +6797,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { + [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait, + [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, + [EXIT_REASON_INVEPT] = handle_invept, ++ [EXIT_REASON_INVVPID] = handle_invvpid, + }; + + static const int kvm_vmx_max_exit_handlers = +@@ -7026,7 +7033,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) + case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: + case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: + case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: +- case EXIT_REASON_INVEPT: ++ case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID: + /* + * VMX instructions trap unconditionally. This allows L1 to + * emulate them for its L2 guest, i.e., allows 3-level nesting! +-- +1.9.3 + diff --git a/kvm-x86-don-t-kill-guest-on-unknown-exit-reason.patch b/kvm-x86-don-t-kill-guest-on-unknown-exit-reason.patch new file mode 100644 index 000000000..8f7fb3f23 --- /dev/null +++ b/kvm-x86-don-t-kill-guest-on-unknown-exit-reason.patch @@ -0,0 +1,54 @@ +From: "Michael S. Tsirkin" <mst@redhat.com> +Date: Fri, 24 Oct 2014 17:07:19 +0200 +Subject: [PATCH] kvm: x86: don't kill guest on unknown exit reason + +KVM_EXIT_UNKNOWN is a kvm bug, we don't really know whether it was +triggered by a priveledged application. Let's not kill the guest: WARN +and inject #UD instead. + +Cc: stable@vger.kernel.org +Signed-off-by: Michael S. Tsirkin <mst@redhat.com> +Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> +--- + arch/x86/kvm/svm.c | 6 +++--- + arch/x86/kvm/vmx.c | 6 +++--- + 2 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c +index 00bed2c5e948..7527cefc5a43 100644 +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -3551,9 +3551,9 @@ static int handle_exit(struct kvm_vcpu *vcpu) + + if (exit_code >= ARRAY_SIZE(svm_exit_handlers) + || !svm_exit_handlers[exit_code]) { +- kvm_run->exit_reason = KVM_EXIT_UNKNOWN; +- kvm_run->hw.hardware_exit_reason = exit_code; +- return 0; ++ WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code); ++ kvm_queue_exception(vcpu, UD_VECTOR); ++ return 1; + } + + return svm_exit_handlers[exit_code](svm); +diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c +index cf3cd079ec52..a8b76c4c95e2 100644 +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -7174,10 +7174,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) + && kvm_vmx_exit_handlers[exit_reason]) + return kvm_vmx_exit_handlers[exit_reason](vcpu); + else { +- vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; +- vcpu->run->hw.hardware_exit_reason = exit_reason; ++ WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason); ++ kvm_queue_exception(vcpu, UD_VECTOR); ++ return 1; + } +- return 0; + } + + static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) +-- +1.9.3 + |