diff options
Diffstat (limited to 'patch-5.18-redhat.patch')
-rw-r--r-- | patch-5.18-redhat.patch | 4645 |
1 files changed, 4591 insertions, 54 deletions
diff --git a/patch-5.18-redhat.patch b/patch-5.18-redhat.patch index 570f2adcb..522818b55 100644 --- a/patch-5.18-redhat.patch +++ b/patch-5.18-redhat.patch @@ -1,52 +1,168 @@ + Documentation/admin-guide/kernel-parameters.txt | 25 ++ Makefile | 4 + arch/arm/Kconfig | 4 +- arch/arm64/Kconfig | 3 +- arch/s390/include/asm/ipl.h | 1 + arch/s390/kernel/ipl.c | 5 + arch/s390/kernel/setup.c | 4 + + arch/x86/Kconfig | 103 ++++- + arch/x86/Makefile | 6 + arch/x86/boot/header.S | 4 + + arch/x86/entry/Makefile | 2 +- + arch/x86/entry/calling.h | 72 +++- + arch/x86/entry/entry.S | 22 + + arch/x86/entry/entry_32.S | 2 - + arch/x86/entry/entry_64.S | 88 +++- + arch/x86/entry/entry_64_compat.S | 21 +- + arch/x86/entry/vdso/Makefile | 1 + + arch/x86/entry/vsyscall/vsyscall_emu_64.S | 9 +- + arch/x86/include/asm/alternative.h | 1 + + arch/x86/include/asm/cpufeatures.h | 12 +- + arch/x86/include/asm/disabled-features.h | 21 +- arch/x86/include/asm/efi.h | 5 + - arch/x86/kernel/setup.c | 22 ++-- - drivers/acpi/apei/hest.c | 8 ++ - drivers/acpi/irq.c | 17 ++- - drivers/acpi/scan.c | 9 ++ - drivers/ata/libahci.c | 18 +++ - drivers/char/ipmi/ipmi_dmi.c | 15 +++ - drivers/char/ipmi/ipmi_msghandler.c | 16 ++- - drivers/firmware/efi/Kconfig | 12 ++ + arch/x86/include/asm/linkage.h | 8 + + arch/x86/include/asm/msr-index.h | 13 + + arch/x86/include/asm/nospec-branch.h | 68 ++- + arch/x86/include/asm/static_call.h | 19 +- + arch/x86/include/asm/traps.h | 2 +- + arch/x86/include/asm/unwind_hints.h | 14 +- + arch/x86/kernel/alternative.c | 69 +++ + arch/x86/kernel/asm-offsets.c | 6 + + arch/x86/kernel/cpu/amd.c | 46 +- + arch/x86/kernel/cpu/bugs.c | 475 ++++++++++++++++++--- + arch/x86/kernel/cpu/common.c | 61 ++- + arch/x86/kernel/cpu/cpu.h | 2 + + arch/x86/kernel/cpu/hygon.c | 6 + + arch/x86/kernel/cpu/scattered.c | 1 + + arch/x86/kernel/ftrace.c | 7 +- + arch/x86/kernel/head_64.S | 5 + + arch/x86/kernel/module.c | 8 +- + arch/x86/kernel/process.c | 2 +- + arch/x86/kernel/relocate_kernel_32.S | 25 +- + arch/x86/kernel/relocate_kernel_64.S | 23 +- + arch/x86/kernel/setup.c | 22 +- + arch/x86/kernel/static_call.c | 51 ++- + arch/x86/kernel/traps.c | 19 +- + arch/x86/kernel/vmlinux.lds.S | 9 +- + arch/x86/kvm/emulate.c | 28 +- + arch/x86/kvm/svm/vmenter.S | 18 + + arch/x86/kvm/vmx/capabilities.h | 4 +- + arch/x86/kvm/vmx/nested.c | 2 +- + arch/x86/kvm/vmx/run_flags.h | 8 + + arch/x86/kvm/vmx/vmenter.S | 194 +++++---- + arch/x86/kvm/vmx/vmx.c | 84 ++-- + arch/x86/kvm/vmx/vmx.h | 10 +- + arch/x86/kvm/vmx/vmx_ops.h | 2 +- + arch/x86/kvm/x86.c | 4 +- + arch/x86/lib/memmove_64.S | 7 +- + arch/x86/lib/retpoline.S | 79 +++- + arch/x86/mm/mem_encrypt_boot.S | 10 +- + arch/x86/net/bpf_jit_comp.c | 26 +- + arch/x86/xen/setup.c | 6 +- + arch/x86/xen/xen-asm.S | 30 +- + arch/x86/xen/xen-head.S | 1 + + arch/x86/xen/xen-ops.h | 6 +- + drivers/acpi/apei/hest.c | 8 + + drivers/acpi/irq.c | 17 +- + drivers/acpi/scan.c | 9 + + drivers/ata/libahci.c | 18 + + drivers/base/cpu.c | 8 + + drivers/char/ipmi/ipmi_dmi.c | 15 + + drivers/char/ipmi/ipmi_msghandler.c | 16 +- + drivers/firmware/efi/Kconfig | 12 + drivers/firmware/efi/Makefile | 1 + - drivers/firmware/efi/efi.c | 124 +++++++++++++++------ - drivers/firmware/efi/libstub/efistub.h | 74 ++++++++++++ - drivers/firmware/efi/libstub/x86-stub.c | 119 +++++++++++++++++++- - drivers/firmware/efi/secureboot.c | 38 +++++++ - drivers/firmware/sysfb.c | 18 ++- - drivers/gpu/drm/i915/display/intel_psr.c | 9 ++ - drivers/hid/hid-rmi.c | 64 ----------- - drivers/hwtracing/coresight/coresight-etm4x-core.c | 19 ++++ - drivers/input/rmi4/rmi_driver.c | 124 ++++++++++++--------- - drivers/iommu/iommu.c | 22 ++++ - drivers/nvme/host/core.c | 22 +++- - drivers/nvme/host/multipath.c | 19 ++-- + drivers/firmware/efi/efi.c | 124 ++++-- + drivers/firmware/efi/libstub/efistub.h | 74 ++++ + drivers/firmware/efi/libstub/x86-stub.c | 119 +++++- + drivers/firmware/efi/secureboot.c | 38 ++ + drivers/firmware/sysfb.c | 18 +- + drivers/gpu/drm/i915/display/intel_psr.c | 9 + + drivers/hid/hid-rmi.c | 64 --- + drivers/hwtracing/coresight/coresight-etm4x-core.c | 19 + + drivers/idle/intel_idle.c | 44 +- + drivers/input/rmi4/rmi_driver.c | 124 +++--- + drivers/iommu/iommu.c | 22 + + drivers/nvme/host/core.c | 22 +- + drivers/nvme/host/multipath.c | 19 +- drivers/nvme/host/nvme.h | 4 + - drivers/pci/quirks.c | 24 ++++ - drivers/usb/core/hub.c | 7 ++ - include/linux/efi.h | 24 ++-- + drivers/pci/quirks.c | 24 ++ + drivers/usb/core/hub.c | 7 + + include/linux/cpu.h | 2 + + include/linux/efi.h | 24 +- + include/linux/kvm_host.h | 2 +- include/linux/lsm_hook_defs.h | 2 + include/linux/lsm_hooks.h | 6 + + include/linux/objtool.h | 9 +- include/linux/rmi.h | 1 + include/linux/security.h | 5 + init/Kconfig | 2 +- kernel/module_signing.c | 9 +- - net/netfilter/nf_tables_api.c | 9 +- + scripts/Makefile.build | 1 + + scripts/link-vmlinux.sh | 3 + scripts/tags.sh | 2 + + security/Kconfig | 11 - security/integrity/platform_certs/load_uefi.c | 6 +- - security/lockdown/Kconfig | 13 +++ + security/lockdown/Kconfig | 13 + security/lockdown/lockdown.c | 1 + security/security.c | 6 + - 45 files changed, 729 insertions(+), 192 deletions(-) - + tools/arch/x86/include/asm/msr-index.h | 9 + + tools/include/linux/objtool.h | 9 +- + tools/objtool/arch/x86/decode.c | 5 + + tools/objtool/builtin-check.c | 4 +- + tools/objtool/check.c | 331 +++++++++++++- + tools/objtool/include/objtool/arch.h | 1 + + tools/objtool/include/objtool/builtin.h | 2 +- + tools/objtool/include/objtool/check.h | 24 +- + tools/objtool/include/objtool/elf.h | 1 + + tools/objtool/include/objtool/objtool.h | 1 + + tools/objtool/objtool.c | 1 + + 118 files changed, 2609 insertions(+), 614 deletions(-) + +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index c4893782055b..eb92195ca015 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -5124,6 +5124,30 @@ + + retain_initrd [RAM] Keep initrd memory after extraction + ++ retbleed= [X86] Control mitigation of RETBleed (Arbitrary ++ Speculative Code Execution with Return Instructions) ++ vulnerability. ++ ++ off - no mitigation ++ auto - automatically select a migitation ++ auto,nosmt - automatically select a mitigation, ++ disabling SMT if necessary for ++ the full mitigation (only on Zen1 ++ and older without STIBP). ++ ibpb - mitigate short speculation windows on ++ basic block boundaries too. Safe, highest ++ perf impact. ++ unret - force enable untrained return thunks, ++ only effective on AMD f15h-f17h ++ based systems. ++ unret,nosmt - like unret, will disable SMT when STIBP ++ is not available. ++ ++ Selecting 'auto' will choose a mitigation method at run ++ time according to the CPU. ++ ++ Not specifying this option is equivalent to retbleed=auto. ++ + rfkill.default_state= + 0 "airplane mode". All wifi, bluetooth, wimax, gps, fm, + etc. communication is blocked by default. +@@ -5482,6 +5506,7 @@ + eibrs - enhanced IBRS + eibrs,retpoline - enhanced IBRS + Retpolines + eibrs,lfence - enhanced IBRS + LFENCE ++ ibrs - use IBRS to protect kernel + + Not specifying this option is equivalent to + spectre_v2=auto. diff --git a/Makefile b/Makefile -index 088b84f99203..53ce8dbdd481 100644 +index 323032d60ac3..bbb113602cc8 100644 --- a/Makefile +++ b/Makefile @@ -18,6 +18,10 @@ $(if $(filter __%, $(MAKECMDGOALS)), \ @@ -144,6 +260,144 @@ index 2cef49983e9e..c50998b4b554 100644 /* Have one command line that is parsed and saved in /proc/cmdline */ /* boot_command_line has been already set up in early.c */ *cmdline_p = boot_command_line; +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index b2c65f573353..4d1d87f76a74 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -457,27 +457,6 @@ config GOLDFISH + def_bool y + depends on X86_GOLDFISH + +-config RETPOLINE +- bool "Avoid speculative indirect branches in kernel" +- default y +- help +- Compile kernel with the retpoline compiler options to guard against +- kernel-to-user data leaks by avoiding speculative indirect +- branches. Requires a compiler with -mindirect-branch=thunk-extern +- support for full protection. The kernel may run slower. +- +-config CC_HAS_SLS +- def_bool $(cc-option,-mharden-sls=all) +- +-config SLS +- bool "Mitigate Straight-Line-Speculation" +- depends on CC_HAS_SLS && X86_64 +- default n +- help +- Compile the kernel with straight-line-speculation options to guard +- against straight line speculation. The kernel image might be slightly +- larger. +- + config X86_CPU_RESCTRL + bool "x86 CPU resource control support" + depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD) +@@ -2449,6 +2428,88 @@ source "kernel/livepatch/Kconfig" + + endmenu + ++config CC_HAS_SLS ++ def_bool $(cc-option,-mharden-sls=all) ++ ++config CC_HAS_RETURN_THUNK ++ def_bool $(cc-option,-mfunction-return=thunk-extern) ++ ++menuconfig SPECULATION_MITIGATIONS ++ bool "Mitigations for speculative execution vulnerabilities" ++ default y ++ help ++ Say Y here to enable options which enable mitigations for ++ speculative execution hardware vulnerabilities. ++ ++ If you say N, all mitigations will be disabled. You really ++ should know what you are doing to say so. ++ ++if SPECULATION_MITIGATIONS ++ ++config PAGE_TABLE_ISOLATION ++ bool "Remove the kernel mapping in user mode" ++ default y ++ depends on (X86_64 || X86_PAE) ++ help ++ This feature reduces the number of hardware side channels by ++ ensuring that the majority of kernel addresses are not mapped ++ into userspace. ++ ++ See Documentation/x86/pti.rst for more details. ++ ++config RETPOLINE ++ bool "Avoid speculative indirect branches in kernel" ++ default y ++ help ++ Compile kernel with the retpoline compiler options to guard against ++ kernel-to-user data leaks by avoiding speculative indirect ++ branches. Requires a compiler with -mindirect-branch=thunk-extern ++ support for full protection. The kernel may run slower. ++ ++config RETHUNK ++ bool "Enable return-thunks" ++ depends on RETPOLINE && CC_HAS_RETURN_THUNK ++ default y ++ help ++ Compile the kernel with the return-thunks compiler option to guard ++ against kernel-to-user data leaks by avoiding return speculation. ++ Requires a compiler with -mfunction-return=thunk-extern ++ support for full protection. The kernel may run slower. ++ ++config CPU_UNRET_ENTRY ++ bool "Enable UNRET on kernel entry" ++ depends on CPU_SUP_AMD && RETHUNK ++ default y ++ help ++ Compile the kernel with support for the retbleed=unret mitigation. ++ ++config CPU_IBPB_ENTRY ++ bool "Enable IBPB on kernel entry" ++ depends on CPU_SUP_AMD ++ default y ++ help ++ Compile the kernel with support for the retbleed=ibpb mitigation. ++ ++config CPU_IBRS_ENTRY ++ bool "Enable IBRS on kernel entry" ++ depends on CPU_SUP_INTEL ++ default y ++ help ++ Compile the kernel with support for the spectre_v2=ibrs mitigation. ++ This mitigates both spectre_v2 and retbleed at great cost to ++ performance. ++ ++config SLS ++ bool "Mitigate Straight-Line-Speculation" ++ depends on CC_HAS_SLS && X86_64 ++ default n ++ help ++ Compile the kernel with straight-line-speculation options to guard ++ against straight line speculation. The kernel image might be slightly ++ larger. ++ ++endif ++ + config ARCH_HAS_ADD_PAGES + def_bool y + depends on ARCH_ENABLE_MEMORY_HOTPLUG +diff --git a/arch/x86/Makefile b/arch/x86/Makefile +index 63d50f65b828..fb0de637411c 100644 +--- a/arch/x86/Makefile ++++ b/arch/x86/Makefile +@@ -21,6 +21,12 @@ ifdef CONFIG_CC_IS_CLANG + RETPOLINE_CFLAGS := -mretpoline-external-thunk + RETPOLINE_VDSO_CFLAGS := -mretpoline + endif ++ ++ifdef CONFIG_RETHUNK ++RETHUNK_CFLAGS := -mfunction-return=thunk-extern ++RETPOLINE_CFLAGS += $(RETHUNK_CFLAGS) ++endif ++ + export RETPOLINE_CFLAGS + export RETPOLINE_VDSO_CFLAGS + diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 6dbd7e9f74c9..0352e4589efa 100644 --- a/arch/x86/boot/header.S @@ -160,6 +414,646 @@ index 6dbd7e9f74c9..0352e4589efa 100644 #ifdef CONFIG_X86_32 .long 0 # SizeOfStackReserve .long 0 # SizeOfStackCommit +diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile +index 7fec5dcf6438..eeadbd7d92cc 100644 +--- a/arch/x86/entry/Makefile ++++ b/arch/x86/entry/Makefile +@@ -11,7 +11,7 @@ CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE) + + CFLAGS_common.o += -fno-stack-protector + +-obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o ++obj-y := entry.o entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o + obj-y += common.o + + obj-y += vdso/ +diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h +index a4c061fb7c6e..b00a3a95fbfa 100644 +--- a/arch/x86/entry/calling.h ++++ b/arch/x86/entry/calling.h +@@ -7,6 +7,8 @@ + #include <asm/asm-offsets.h> + #include <asm/processor-flags.h> + #include <asm/ptrace-abi.h> ++#include <asm/msr.h> ++#include <asm/nospec-branch.h> + + /* + +@@ -119,27 +121,19 @@ For 32-bit we have the following conventions - kernel is built with + CLEAR_REGS + .endm + +-.macro POP_REGS pop_rdi=1 skip_r11rcx=0 ++.macro POP_REGS pop_rdi=1 + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx +- .if \skip_r11rcx +- popq %rsi +- .else + popq %r11 +- .endif + popq %r10 + popq %r9 + popq %r8 + popq %rax +- .if \skip_r11rcx +- popq %rsi +- .else + popq %rcx +- .endif + popq %rdx + popq %rsi + .if \pop_rdi +@@ -289,6 +283,66 @@ For 32-bit we have the following conventions - kernel is built with + + #endif + ++/* ++ * IBRS kernel mitigation for Spectre_v2. ++ * ++ * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers ++ * the regs it uses (AX, CX, DX). Must be called before the first RET ++ * instruction (NOTE! UNTRAIN_RET includes a RET instruction) ++ * ++ * The optional argument is used to save/restore the current value, ++ * which is used on the paranoid paths. ++ * ++ * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set. ++ */ ++.macro IBRS_ENTER save_reg ++#ifdef CONFIG_CPU_IBRS_ENTRY ++ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS ++ movl $MSR_IA32_SPEC_CTRL, %ecx ++ ++.ifnb \save_reg ++ rdmsr ++ shl $32, %rdx ++ or %rdx, %rax ++ mov %rax, \save_reg ++ test $SPEC_CTRL_IBRS, %eax ++ jz .Ldo_wrmsr_\@ ++ lfence ++ jmp .Lend_\@ ++.Ldo_wrmsr_\@: ++.endif ++ ++ movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx ++ movl %edx, %eax ++ shr $32, %rdx ++ wrmsr ++.Lend_\@: ++#endif ++.endm ++ ++/* ++ * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX) ++ * regs. Must be called after the last RET. ++ */ ++.macro IBRS_EXIT save_reg ++#ifdef CONFIG_CPU_IBRS_ENTRY ++ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS ++ movl $MSR_IA32_SPEC_CTRL, %ecx ++ ++.ifnb \save_reg ++ mov \save_reg, %rdx ++.else ++ movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx ++ andl $(~SPEC_CTRL_IBRS), %edx ++.endif ++ ++ movl %edx, %eax ++ shr $32, %rdx ++ wrmsr ++.Lend_\@: ++#endif ++.endm ++ + /* + * Mitigate Spectre v1 for conditional swapgs code paths. + * +diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S +new file mode 100644 +index 000000000000..bfb7bcb362bc +--- /dev/null ++++ b/arch/x86/entry/entry.S +@@ -0,0 +1,22 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * Common place for both 32- and 64-bit entry routines. ++ */ ++ ++#include <linux/linkage.h> ++#include <asm/export.h> ++#include <asm/msr-index.h> ++ ++.pushsection .noinstr.text, "ax" ++ ++SYM_FUNC_START(entry_ibpb) ++ movl $MSR_IA32_PRED_CMD, %ecx ++ movl $PRED_CMD_IBPB, %eax ++ xorl %edx, %edx ++ wrmsr ++ RET ++SYM_FUNC_END(entry_ibpb) ++/* For KVM */ ++EXPORT_SYMBOL_GPL(entry_ibpb); ++ ++.popsection +diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S +index 887420844066..e309e7156038 100644 +--- a/arch/x86/entry/entry_32.S ++++ b/arch/x86/entry/entry_32.S +@@ -698,7 +698,6 @@ SYM_CODE_START(__switch_to_asm) + movl %ebx, PER_CPU_VAR(__stack_chk_guard) + #endif + +-#ifdef CONFIG_RETPOLINE + /* + * When switching from a shallower to a deeper call stack + * the RSB may either underflow or use entries populated +@@ -707,7 +706,6 @@ SYM_CODE_START(__switch_to_asm) + * speculative execution to prevent attack. + */ + FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +-#endif + + /* Restore flags or the incoming task to restore AC state. */ + popfl +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index d8376e5fe1af..2ea185d47cfd 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -85,7 +85,7 @@ + */ + + SYM_CODE_START(entry_SYSCALL_64) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + ENDBR + + swapgs +@@ -112,6 +112,11 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) + movq %rsp, %rdi + /* Sign extend the lower 32bit as syscall numbers are treated as int */ + movslq %eax, %rsi ++ ++ /* clobbers %rax, make sure it is after saving the syscall nr */ ++ IBRS_ENTER ++ UNTRAIN_RET ++ + call do_syscall_64 /* returns with IRQs disabled */ + + /* +@@ -191,8 +196,8 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) + * perf profiles. Nothing jumps here. + */ + syscall_return_via_sysret: +- /* rcx and r11 are already restored (see code above) */ +- POP_REGS pop_rdi=0 skip_r11rcx=1 ++ IBRS_EXIT ++ POP_REGS pop_rdi=0 + + /* + * Now all regs are restored except RSP and RDI. +@@ -245,7 +250,6 @@ SYM_FUNC_START(__switch_to_asm) + movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset + #endif + +-#ifdef CONFIG_RETPOLINE + /* + * When switching from a shallower to a deeper call stack + * the RSB may either underflow or use entries populated +@@ -254,7 +258,6 @@ SYM_FUNC_START(__switch_to_asm) + * speculative execution to prevent attack. + */ + FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +-#endif + + /* restore callee-saved registers */ + popq %r15 +@@ -318,6 +321,14 @@ SYM_CODE_END(ret_from_fork) + #endif + .endm + ++SYM_CODE_START_LOCAL(xen_error_entry) ++ UNWIND_HINT_FUNC ++ PUSH_AND_CLEAR_REGS save_ret=1 ++ ENCODE_FRAME_POINTER 8 ++ UNTRAIN_RET ++ RET ++SYM_CODE_END(xen_error_entry) ++ + /** + * idtentry_body - Macro to emit code calling the C function + * @cfunc: C function to be called +@@ -325,7 +336,18 @@ SYM_CODE_END(ret_from_fork) + */ + .macro idtentry_body cfunc has_error_code:req + +- call error_entry ++ /* ++ * Call error_entry() and switch to the task stack if from userspace. ++ * ++ * When in XENPV, it is already in the task stack, and it can't fault ++ * for native_iret() nor native_load_gs_index() since XENPV uses its ++ * own pvops for IRET and load_gs_index(). And it doesn't need to ++ * switch the CR3. So it can skip invoking error_entry(). ++ */ ++ ALTERNATIVE "call error_entry; movq %rax, %rsp", \ ++ "call xen_error_entry", X86_FEATURE_XENPV ++ ++ ENCODE_FRAME_POINTER + UNWIND_HINT_REGS + + movq %rsp, %rdi /* pt_regs pointer into 1st argument*/ +@@ -582,6 +604,7 @@ __irqentry_text_end: + + SYM_CODE_START_LOCAL(common_interrupt_return) + SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) ++ IBRS_EXIT + #ifdef CONFIG_DEBUG_ENTRY + /* Assert that pt_regs indicates user mode. */ + testb $3, CS(%rsp) +@@ -695,6 +718,7 @@ native_irq_return_ldt: + pushq %rdi /* Stash user RDI */ + swapgs /* to kernel GS */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */ ++ UNTRAIN_RET + + movq PER_CPU_VAR(espfix_waddr), %rdi + movq %rax, (0*8)(%rdi) /* user RAX */ +@@ -867,6 +891,9 @@ SYM_CODE_END(xen_failsafe_callback) + * 1 -> no SWAPGS on exit + * + * Y GSBASE value at entry, must be restored in paranoid_exit ++ * ++ * R14 - old CR3 ++ * R15 - old SPEC_CTRL + */ + SYM_CODE_START_LOCAL(paranoid_entry) + UNWIND_HINT_FUNC +@@ -911,7 +938,7 @@ SYM_CODE_START_LOCAL(paranoid_entry) + * is needed here. + */ + SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx +- RET ++ jmp .Lparanoid_gsbase_done + + .Lparanoid_entry_checkgs: + /* EBX = 1 -> kernel GSBASE active, no restore required */ +@@ -930,8 +957,16 @@ SYM_CODE_START_LOCAL(paranoid_entry) + xorl %ebx, %ebx + swapgs + .Lparanoid_kernel_gsbase: +- + FENCE_SWAPGS_KERNEL_ENTRY ++.Lparanoid_gsbase_done: ++ ++ /* ++ * Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like ++ * CR3 above, keep the old value in a callee saved register. ++ */ ++ IBRS_ENTER save_reg=%r15 ++ UNTRAIN_RET ++ + RET + SYM_CODE_END(paranoid_entry) + +@@ -953,9 +988,19 @@ SYM_CODE_END(paranoid_entry) + * 1 -> no SWAPGS on exit + * + * Y User space GSBASE, must be restored unconditionally ++ * ++ * R14 - old CR3 ++ * R15 - old SPEC_CTRL + */ + SYM_CODE_START_LOCAL(paranoid_exit) + UNWIND_HINT_REGS ++ ++ /* ++ * Must restore IBRS state before both CR3 and %GS since we need access ++ * to the per-CPU x86_spec_ctrl_shadow variable. ++ */ ++ IBRS_EXIT save_reg=%r15 ++ + /* + * The order of operations is important. RESTORE_CR3 requires + * kernel GSBASE. +@@ -984,13 +1029,15 @@ SYM_CODE_START_LOCAL(paranoid_exit) + SYM_CODE_END(paranoid_exit) + + /* +- * Save all registers in pt_regs, and switch GS if needed. ++ * Switch GS and CR3 if needed. + */ + SYM_CODE_START_LOCAL(error_entry) + UNWIND_HINT_FUNC + cld ++ + PUSH_AND_CLEAR_REGS save_ret=1 + ENCODE_FRAME_POINTER 8 ++ + testb $3, CS+8(%rsp) + jz .Lerror_kernelspace + +@@ -1002,15 +1049,14 @@ SYM_CODE_START_LOCAL(error_entry) + FENCE_SWAPGS_USER_ENTRY + /* We have user CR3. Change to kernel CR3. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax ++ IBRS_ENTER ++ UNTRAIN_RET + ++ leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ + .Lerror_entry_from_usermode_after_swapgs: ++ + /* Put us onto the real thread stack. */ +- popq %r12 /* save return addr in %12 */ +- movq %rsp, %rdi /* arg0 = pt_regs pointer */ + call sync_regs +- movq %rax, %rsp /* switch stack */ +- ENCODE_FRAME_POINTER +- pushq %r12 + RET + + /* +@@ -1042,6 +1088,8 @@ SYM_CODE_START_LOCAL(error_entry) + */ + .Lerror_entry_done_lfence: + FENCE_SWAPGS_KERNEL_ENTRY ++ leaq 8(%rsp), %rax /* return pt_regs pointer */ ++ ANNOTATE_UNRET_END + RET + + .Lbstep_iret: +@@ -1057,14 +1105,16 @@ SYM_CODE_START_LOCAL(error_entry) + SWAPGS + FENCE_SWAPGS_USER_ENTRY + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax ++ IBRS_ENTER ++ UNTRAIN_RET + + /* + * Pretend that the exception came from user mode: set up pt_regs + * as if we faulted immediately after IRET. + */ +- mov %rsp, %rdi ++ leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ + call fixup_bad_iret +- mov %rax, %rsp ++ mov %rax, %rdi + jmp .Lerror_entry_from_usermode_after_swapgs + SYM_CODE_END(error_entry) + +@@ -1162,6 +1212,9 @@ SYM_CODE_START(asm_exc_nmi) + PUSH_AND_CLEAR_REGS rdx=(%rdx) + ENCODE_FRAME_POINTER + ++ IBRS_ENTER ++ UNTRAIN_RET ++ + /* + * At this point we no longer need to worry about stack damage + * due to nesting -- we're on the normal thread stack and we're +@@ -1386,6 +1439,9 @@ end_repeat_nmi: + movq $-1, %rsi + call exc_nmi + ++ /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */ ++ IBRS_EXIT save_reg=%r15 ++ + /* Always restore stashed CR3 value (see paranoid_entry) */ + RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 + +diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S +index 4fdb007cddbd..4f479cdc7a40 100644 +--- a/arch/x86/entry/entry_64_compat.S ++++ b/arch/x86/entry/entry_64_compat.S +@@ -4,7 +4,6 @@ + * + * Copyright 2000-2002 Andi Kleen, SuSE Labs. + */ +-#include "calling.h" + #include <asm/asm-offsets.h> + #include <asm/current.h> + #include <asm/errno.h> +@@ -14,9 +13,12 @@ + #include <asm/irqflags.h> + #include <asm/asm.h> + #include <asm/smap.h> ++#include <asm/nospec-branch.h> + #include <linux/linkage.h> + #include <linux/err.h> + ++#include "calling.h" ++ + .section .entry.text, "ax" + + /* +@@ -47,7 +49,7 @@ + * 0(%ebp) arg6 + */ + SYM_CODE_START(entry_SYSENTER_compat) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + ENDBR + /* Interrupts are off on entry. */ + SWAPGS +@@ -113,6 +115,9 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) + + cld + ++ IBRS_ENTER ++ UNTRAIN_RET ++ + /* + * SYSENTER doesn't filter flags, so we need to clear NT and AC + * ourselves. To save a few cycles, we can check whether +@@ -199,7 +204,7 @@ SYM_CODE_END(entry_SYSENTER_compat) + * 0(%esp) arg6 + */ + SYM_CODE_START(entry_SYSCALL_compat) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + ENDBR + /* Interrupts are off on entry. */ + swapgs +@@ -256,6 +261,9 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) + + UNWIND_HINT_REGS + ++ IBRS_ENTER ++ UNTRAIN_RET ++ + movq %rsp, %rdi + call do_fast_syscall_32 + /* XEN PV guests always use IRET path */ +@@ -270,6 +278,8 @@ sysret32_from_system_call: + */ + STACKLEAK_ERASE + ++ IBRS_EXIT ++ + movq RBX(%rsp), %rbx /* pt_regs->rbx */ + movq RBP(%rsp), %rbp /* pt_regs->rbp */ + movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */ +@@ -343,7 +353,7 @@ SYM_CODE_END(entry_SYSCALL_compat) + * ebp arg6 + */ + SYM_CODE_START(entry_INT80_compat) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + ENDBR + /* + * Interrupts are off on entry. +@@ -414,6 +424,9 @@ SYM_CODE_START(entry_INT80_compat) + + cld + ++ IBRS_ENTER ++ UNTRAIN_RET ++ + movq %rsp, %rdi + call do_int80_syscall_32 + jmp swapgs_restore_regs_and_return_to_usermode +diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile +index 693f8b9031fb..e893af5aa8f5 100644 +--- a/arch/x86/entry/vdso/Makefile ++++ b/arch/x86/entry/vdso/Makefile +@@ -92,6 +92,7 @@ endif + endif + + $(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) ++$(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO + + # + # vDSO code runs in userspace and -pg doesn't help with profiling anyway. +diff --git a/arch/x86/entry/vsyscall/vsyscall_emu_64.S b/arch/x86/entry/vsyscall/vsyscall_emu_64.S +index 15e35159ebb6..ef2dd1827243 100644 +--- a/arch/x86/entry/vsyscall/vsyscall_emu_64.S ++++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S +@@ -19,17 +19,20 @@ __vsyscall_page: + + mov $__NR_gettimeofday, %rax + syscall +- RET ++ ret ++ int3 + + .balign 1024, 0xcc + mov $__NR_time, %rax + syscall +- RET ++ ret ++ int3 + + .balign 1024, 0xcc + mov $__NR_getcpu, %rax + syscall +- RET ++ ret ++ int3 + + .balign 4096, 0xcc + +diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h +index 9b10c8c76087..9542c582d546 100644 +--- a/arch/x86/include/asm/alternative.h ++++ b/arch/x86/include/asm/alternative.h +@@ -76,6 +76,7 @@ extern int alternatives_patched; + extern void alternative_instructions(void); + extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); + extern void apply_retpolines(s32 *start, s32 *end); ++extern void apply_returns(s32 *start, s32 *end); + extern void apply_ibt_endbr(s32 *start, s32 *end); + + struct module; +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index e17de69faa54..5d09ded0c491 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -203,8 +203,8 @@ + #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ + /* FREE! ( 7*32+10) */ + #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ +-#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ +-#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */ ++#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ ++#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ + #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ + #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ + #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ +@@ -295,6 +295,12 @@ + #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ + #define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ + #define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ ++#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ ++#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */ ++#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ ++#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ ++#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ ++#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ + + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ + #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ +@@ -315,6 +321,7 @@ + #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ + #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ + #define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */ ++#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */ + + /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ + #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ +@@ -444,5 +451,6 @@ + #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ + #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ + #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ ++#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ + + #endif /* _ASM_X86_CPUFEATURES_H */ +diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h +index 1231d63f836d..f7be189e9723 100644 +--- a/arch/x86/include/asm/disabled-features.h ++++ b/arch/x86/include/asm/disabled-features.h +@@ -56,6 +56,25 @@ + # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) + #endif + ++#ifdef CONFIG_RETPOLINE ++# define DISABLE_RETPOLINE 0 ++#else ++# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ ++ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) ++#endif ++ ++#ifdef CONFIG_RETHUNK ++# define DISABLE_RETHUNK 0 ++#else ++# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31)) ++#endif ++ ++#ifdef CONFIG_CPU_UNRET_ENTRY ++# define DISABLE_UNRET 0 ++#else ++# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31)) ++#endif ++ + #ifdef CONFIG_INTEL_IOMMU_SVM + # define DISABLE_ENQCMD 0 + #else +@@ -82,7 +101,7 @@ + #define DISABLED_MASK8 0 + #define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX) + #define DISABLED_MASK10 0 +-#define DISABLED_MASK11 0 ++#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET) + #define DISABLED_MASK12 0 + #define DISABLED_MASK13 0 + #define DISABLED_MASK14 0 diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 98938a68251c..bed74a0f2932 100644 --- a/arch/x86/include/asm/efi.h @@ -176,6 +1070,1611 @@ index 98938a68251c..bed74a0f2932 100644 #else /* CONFIG_EFI_MIXED */ static inline bool efi_is_64bit(void) +diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h +index 85865f1645bd..73ca20049835 100644 +--- a/arch/x86/include/asm/linkage.h ++++ b/arch/x86/include/asm/linkage.h +@@ -19,19 +19,27 @@ + #define __ALIGN_STR __stringify(__ALIGN) + #endif + ++#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) ++#define RET jmp __x86_return_thunk ++#else /* CONFIG_RETPOLINE */ + #ifdef CONFIG_SLS + #define RET ret; int3 + #else + #define RET ret + #endif ++#endif /* CONFIG_RETPOLINE */ + + #else /* __ASSEMBLY__ */ + ++#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) ++#define ASM_RET "jmp __x86_return_thunk\n\t" ++#else /* CONFIG_RETPOLINE */ + #ifdef CONFIG_SLS + #define ASM_RET "ret; int3\n\t" + #else + #define ASM_RET "ret\n\t" + #endif ++#endif /* CONFIG_RETPOLINE */ + + #endif /* __ASSEMBLY__ */ + +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h +index 4425d6773183..ad084326f24c 100644 +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -51,6 +51,8 @@ + #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ + #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ + #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ ++#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ ++#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) + + #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ + #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ +@@ -91,6 +93,7 @@ + #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a + #define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ + #define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ ++#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */ + #define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ + #define ARCH_CAP_SSB_NO BIT(4) /* + * Not susceptible to Speculative Store Bypass +@@ -138,6 +141,13 @@ + * bit available to control VERW + * behavior. + */ ++#define ARCH_CAP_RRSBA BIT(19) /* ++ * Indicates RET may use predictors ++ * other than the RSB. With eIBRS ++ * enabled predictions in kernel mode ++ * are restricted to targets in ++ * kernel. ++ */ + + #define MSR_IA32_FLUSH_CMD 0x0000010b + #define L1D_FLUSH BIT(0) /* +@@ -552,6 +562,9 @@ + /* Fam 17h MSRs */ + #define MSR_F17H_IRPERF 0xc00000e9 + ++#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3 ++#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1) ++ + /* Fam 16h MSRs */ + #define MSR_F16H_L2I_PERF_CTL 0xc0010230 + #define MSR_F16H_L2I_PERF_CTR 0xc0010231 +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index da251a5645b0..bb05ed4f46bd 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -75,6 +75,23 @@ + .popsection + .endm + ++/* ++ * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions ++ * vs RETBleed validation. ++ */ ++#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE ++ ++/* ++ * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should ++ * eventually turn into it's own annotation. ++ */ ++.macro ANNOTATE_UNRET_END ++#ifdef CONFIG_DEBUG_ENTRY ++ ANNOTATE_RETPOLINE_SAFE ++ nop ++#endif ++.endm ++ + /* + * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple + * indirect jmp/call which may be susceptible to the Spectre variant 2 +@@ -105,10 +122,34 @@ + * monstrosity above, manually. + */ + .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req +-#ifdef CONFIG_RETPOLINE + ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr + __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP) + .Lskip_rsb_\@: ++.endm ++ ++#ifdef CONFIG_CPU_UNRET_ENTRY ++#define CALL_ZEN_UNTRAIN_RET "call zen_untrain_ret" ++#else ++#define CALL_ZEN_UNTRAIN_RET "" ++#endif ++ ++/* ++ * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the ++ * return thunk isn't mapped into the userspace tables (then again, AMD ++ * typically has NO_MELTDOWN). ++ * ++ * While zen_untrain_ret() doesn't clobber anything but requires stack, ++ * entry_ibpb() will clobber AX, CX, DX. ++ * ++ * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point ++ * where we have a stack but before any RET instruction. ++ */ ++.macro UNTRAIN_RET ++#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) ++ ANNOTATE_UNRET_END ++ ALTERNATIVE_2 "", \ ++ CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ ++ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB + #endif + .endm + +@@ -120,17 +161,20 @@ + _ASM_PTR " 999b\n\t" \ + ".popsection\n\t" + +-#ifdef CONFIG_RETPOLINE +- + typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; ++extern retpoline_thunk_t __x86_indirect_thunk_array[]; ++ ++extern void __x86_return_thunk(void); ++extern void zen_untrain_ret(void); ++extern void entry_ibpb(void); ++ ++#ifdef CONFIG_RETPOLINE + + #define GEN(reg) \ + extern retpoline_thunk_t __x86_indirect_thunk_ ## reg; + #include <asm/GEN-for-each-reg.h> + #undef GEN + +-extern retpoline_thunk_t __x86_indirect_thunk_array[]; +- + #ifdef CONFIG_X86_64 + + /* +@@ -193,6 +237,7 @@ enum spectre_v2_mitigation { + SPECTRE_V2_EIBRS, + SPECTRE_V2_EIBRS_RETPOLINE, + SPECTRE_V2_EIBRS_LFENCE, ++ SPECTRE_V2_IBRS, + }; + + /* The indirect branch speculation control variants */ +@@ -235,6 +280,9 @@ static inline void indirect_branch_prediction_barrier(void) + + /* The Intel SPEC CTRL MSR base value cache */ + extern u64 x86_spec_ctrl_base; ++extern u64 x86_spec_ctrl_current; ++extern void write_spec_ctrl_current(u64 val, bool force); ++extern u64 spec_ctrl_current(void); + + /* + * With retpoline, we must use IBRS to restrict branch prediction +@@ -244,18 +292,16 @@ extern u64 x86_spec_ctrl_base; + */ + #define firmware_restrict_branch_speculation_start() \ + do { \ +- u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \ +- \ + preempt_disable(); \ +- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ ++ alternative_msr_write(MSR_IA32_SPEC_CTRL, \ ++ spec_ctrl_current() | SPEC_CTRL_IBRS, \ + X86_FEATURE_USE_IBRS_FW); \ + } while (0) + + #define firmware_restrict_branch_speculation_end() \ + do { \ +- u64 val = x86_spec_ctrl_base; \ +- \ +- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ ++ alternative_msr_write(MSR_IA32_SPEC_CTRL, \ ++ spec_ctrl_current(), \ + X86_FEATURE_USE_IBRS_FW); \ + preempt_enable(); \ + } while (0) +diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h +index 2d8dacd02643..343b722ccaf2 100644 +--- a/arch/x86/include/asm/static_call.h ++++ b/arch/x86/include/asm/static_call.h +@@ -21,6 +21,16 @@ + * relative displacement across sections. + */ + ++/* ++ * The trampoline is 8 bytes and of the general form: ++ * ++ * jmp.d32 \func ++ * ud1 %esp, %ecx ++ * ++ * That trailing #UD provides both a speculation stop and serves as a unique ++ * 3 byte signature identifying static call trampolines. Also see tramp_ud[] ++ * and __static_call_fixup(). ++ */ + #define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \ + asm(".pushsection .static_call.text, \"ax\" \n" \ + ".align 4 \n" \ +@@ -28,7 +38,7 @@ + STATIC_CALL_TRAMP_STR(name) ": \n" \ + ANNOTATE_NOENDBR \ + insns " \n" \ +- ".byte 0x53, 0x43, 0x54 \n" \ ++ ".byte 0x0f, 0xb9, 0xcc \n" \ + ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \ + ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \ + ".popsection \n") +@@ -36,8 +46,13 @@ + #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)") + ++#ifdef CONFIG_RETHUNK ++#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ ++ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk") ++#else + #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop") ++#endif + + #define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) \ + ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0) +@@ -48,4 +63,6 @@ + ".long " STATIC_CALL_KEY_STR(name) " - . \n" \ + ".popsection \n") + ++extern bool __static_call_fixup(void *tramp, u8 op, void *dest); ++ + #endif /* _ASM_STATIC_CALL_H */ +diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h +index 35317c5c551d..47ecfff2c83d 100644 +--- a/arch/x86/include/asm/traps.h ++++ b/arch/x86/include/asm/traps.h +@@ -13,7 +13,7 @@ + #ifdef CONFIG_X86_64 + asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs); + asmlinkage __visible notrace +-struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s); ++struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs); + void __init trap_init(void); + asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs); + #endif +diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h +index 8b33674288ea..f66fbe6537dd 100644 +--- a/arch/x86/include/asm/unwind_hints.h ++++ b/arch/x86/include/asm/unwind_hints.h +@@ -8,7 +8,11 @@ + #ifdef __ASSEMBLY__ + + .macro UNWIND_HINT_EMPTY +- UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1 ++ UNWIND_HINT type=UNWIND_HINT_TYPE_CALL end=1 ++.endm ++ ++.macro UNWIND_HINT_ENTRY ++ UNWIND_HINT type=UNWIND_HINT_TYPE_ENTRY end=1 + .endm + + .macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0 +@@ -52,6 +56,14 @@ + UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC + .endm + ++.macro UNWIND_HINT_SAVE ++ UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE ++.endm ++ ++.macro UNWIND_HINT_RESTORE ++ UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE ++.endm ++ + #else + + #define UNWIND_HINT_FUNC \ +diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c +index d374cb3cf024..46427b785bc8 100644 +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -115,6 +115,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) + } + + extern s32 __retpoline_sites[], __retpoline_sites_end[]; ++extern s32 __return_sites[], __return_sites_end[]; + extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[]; + extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; + extern s32 __smp_locks[], __smp_locks_end[]; +@@ -507,9 +508,76 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) + } + } + ++#ifdef CONFIG_RETHUNK ++/* ++ * Rewrite the compiler generated return thunk tail-calls. ++ * ++ * For example, convert: ++ * ++ * JMP __x86_return_thunk ++ * ++ * into: ++ * ++ * RET ++ */ ++static int patch_return(void *addr, struct insn *insn, u8 *bytes) ++{ ++ int i = 0; ++ ++ if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) ++ return -1; ++ ++ bytes[i++] = RET_INSN_OPCODE; ++ ++ for (; i < insn->length;) ++ bytes[i++] = INT3_INSN_OPCODE; ++ ++ return i; ++} ++ ++void __init_or_module noinline apply_returns(s32 *start, s32 *end) ++{ ++ s32 *s; ++ ++ for (s = start; s < end; s++) { ++ void *dest = NULL, *addr = (void *)s + *s; ++ struct insn insn; ++ int len, ret; ++ u8 bytes[16]; ++ u8 op; ++ ++ ret = insn_decode_kernel(&insn, addr); ++ if (WARN_ON_ONCE(ret < 0)) ++ continue; ++ ++ op = insn.opcode.bytes[0]; ++ if (op == JMP32_INSN_OPCODE) ++ dest = addr + insn.length + insn.immediate.value; ++ ++ if (__static_call_fixup(addr, op, dest) || ++ WARN_ON_ONCE(dest != &__x86_return_thunk)) ++ continue; ++ ++ DPRINTK("return thunk at: %pS (%px) len: %d to: %pS", ++ addr, addr, insn.length, ++ addr + insn.length + insn.immediate.value); ++ ++ len = patch_return(addr, &insn, bytes); ++ if (len == insn.length) { ++ DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); ++ DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); ++ text_poke_early(addr, bytes, len); ++ } ++ } ++} ++#else ++void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } ++#endif /* CONFIG_RETHUNK */ ++ + #else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */ + + void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } ++void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } + + #endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */ + +@@ -860,6 +928,7 @@ void __init alternative_instructions(void) + * those can rewrite the retpoline thunks. + */ + apply_retpolines(__retpoline_sites, __retpoline_sites_end); ++ apply_returns(__return_sites, __return_sites_end); + + /* + * Then patch alternatives, such that those paravirt calls that are in +diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c +index 9fb0a2f8b62a..6434ea941348 100644 +--- a/arch/x86/kernel/asm-offsets.c ++++ b/arch/x86/kernel/asm-offsets.c +@@ -18,6 +18,7 @@ + #include <asm/bootparam.h> + #include <asm/suspend.h> + #include <asm/tlbflush.h> ++#include "../kvm/vmx/vmx.h" + + #ifdef CONFIG_XEN + #include <xen/interface/xen.h> +@@ -90,4 +91,9 @@ static void __used common(void) + OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); + OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); + OFFSET(TSS_sp2, tss_struct, x86_tss.sp2); ++ ++ if (IS_ENABLED(CONFIG_KVM_INTEL)) { ++ BLANK(); ++ OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl); ++ } + } +diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c +index 0c0b09796ced..35d5288394cb 100644 +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -862,6 +862,28 @@ static void init_amd_bd(struct cpuinfo_x86 *c) + clear_rdrand_cpuid_bit(c); + } + ++void init_spectral_chicken(struct cpuinfo_x86 *c) ++{ ++#ifdef CONFIG_CPU_UNRET_ENTRY ++ u64 value; ++ ++ /* ++ * On Zen2 we offer this chicken (bit) on the altar of Speculation. ++ * ++ * This suppresses speculation from the middle of a basic block, i.e. it ++ * suppresses non-branch predictions. ++ * ++ * We use STIBP as a heuristic to filter out Zen2 from the rest of F17H ++ */ ++ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_AMD_STIBP)) { ++ if (!rdmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, &value)) { ++ value |= MSR_ZEN2_SPECTRAL_CHICKEN_BIT; ++ wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value); ++ } ++ } ++#endif ++} ++ + static void init_amd_zn(struct cpuinfo_x86 *c) + { + set_cpu_cap(c, X86_FEATURE_ZEN); +@@ -870,12 +892,21 @@ static void init_amd_zn(struct cpuinfo_x86 *c) + node_reclaim_distance = 32; + #endif + +- /* +- * Fix erratum 1076: CPB feature bit not being set in CPUID. +- * Always set it, except when running under a hypervisor. +- */ +- if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB)) +- set_cpu_cap(c, X86_FEATURE_CPB); ++ /* Fix up CPUID bits, but only if not virtualised. */ ++ if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) { ++ ++ /* Erratum 1076: CPB feature bit not being set in CPUID. */ ++ if (!cpu_has(c, X86_FEATURE_CPB)) ++ set_cpu_cap(c, X86_FEATURE_CPB); ++ ++ /* ++ * Zen3 (Fam19 model < 0x10) parts are not susceptible to ++ * Branch Type Confusion, but predate the allocation of the ++ * BTC_NO bit. ++ */ ++ if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO)) ++ set_cpu_cap(c, X86_FEATURE_BTC_NO); ++ } + } + + static void init_amd(struct cpuinfo_x86 *c) +@@ -907,7 +938,8 @@ static void init_amd(struct cpuinfo_x86 *c) + case 0x12: init_amd_ln(c); break; + case 0x15: init_amd_bd(c); break; + case 0x16: init_amd_jg(c); break; +- case 0x17: fallthrough; ++ case 0x17: init_spectral_chicken(c); ++ fallthrough; + case 0x19: init_amd_zn(c); break; + } + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index a8a9f6406331..f6dfa26ed88b 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -38,6 +38,8 @@ + + static void __init spectre_v1_select_mitigation(void); + static void __init spectre_v2_select_mitigation(void); ++static void __init retbleed_select_mitigation(void); ++static void __init spectre_v2_user_select_mitigation(void); + static void __init ssb_select_mitigation(void); + static void __init l1tf_select_mitigation(void); + static void __init mds_select_mitigation(void); +@@ -48,16 +50,40 @@ static void __init mmio_select_mitigation(void); + static void __init srbds_select_mitigation(void); + static void __init l1d_flush_select_mitigation(void); + +-/* The base value of the SPEC_CTRL MSR that always has to be preserved. */ ++/* The base value of the SPEC_CTRL MSR without task-specific bits set */ + u64 x86_spec_ctrl_base; + EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); ++ ++/* The current value of the SPEC_CTRL MSR with task-specific bits set */ ++DEFINE_PER_CPU(u64, x86_spec_ctrl_current); ++EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); ++ + static DEFINE_MUTEX(spec_ctrl_mutex); + + /* +- * The vendor and possibly platform specific bits which can be modified in +- * x86_spec_ctrl_base. ++ * Keep track of the SPEC_CTRL MSR value for the current task, which may differ ++ * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). + */ +-static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; ++void write_spec_ctrl_current(u64 val, bool force) ++{ ++ if (this_cpu_read(x86_spec_ctrl_current) == val) ++ return; ++ ++ this_cpu_write(x86_spec_ctrl_current, val); ++ ++ /* ++ * When KERNEL_IBRS this MSR is written on return-to-user, unless ++ * forced the update can be delayed until that time. ++ */ ++ if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) ++ wrmsrl(MSR_IA32_SPEC_CTRL, val); ++} ++ ++u64 spec_ctrl_current(void) ++{ ++ return this_cpu_read(x86_spec_ctrl_current); ++} ++EXPORT_SYMBOL_GPL(spec_ctrl_current); + + /* + * AMD specific MSR info for Speculative Store Bypass control. +@@ -114,13 +140,21 @@ void __init check_bugs(void) + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + +- /* Allow STIBP in MSR_SPEC_CTRL if supported */ +- if (boot_cpu_has(X86_FEATURE_STIBP)) +- x86_spec_ctrl_mask |= SPEC_CTRL_STIBP; +- + /* Select the proper CPU mitigations before patching alternatives: */ + spectre_v1_select_mitigation(); + spectre_v2_select_mitigation(); ++ /* ++ * retbleed_select_mitigation() relies on the state set by ++ * spectre_v2_select_mitigation(); specifically it wants to know about ++ * spectre_v2=ibrs. ++ */ ++ retbleed_select_mitigation(); ++ /* ++ * spectre_v2_user_select_mitigation() relies on the state set by ++ * retbleed_select_mitigation(); specifically the STIBP selection is ++ * forced for UNRET. ++ */ ++ spectre_v2_user_select_mitigation(); + ssb_select_mitigation(); + l1tf_select_mitigation(); + md_clear_select_mitigation(); +@@ -161,31 +195,17 @@ void __init check_bugs(void) + #endif + } + ++/* ++ * NOTE: This function is *only* called for SVM. VMX spec_ctrl handling is ++ * done in vmenter.S. ++ */ + void + x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) + { +- u64 msrval, guestval, hostval = x86_spec_ctrl_base; ++ u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current(); + struct thread_info *ti = current_thread_info(); + +- /* Is MSR_SPEC_CTRL implemented ? */ + if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { +- /* +- * Restrict guest_spec_ctrl to supported values. Clear the +- * modifiable bits in the host base value and or the +- * modifiable bits from the guest value. +- */ +- guestval = hostval & ~x86_spec_ctrl_mask; +- guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; +- +- /* SSBD controlled in MSR_SPEC_CTRL */ +- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || +- static_cpu_has(X86_FEATURE_AMD_SSBD)) +- hostval |= ssbd_tif_to_spec_ctrl(ti->flags); +- +- /* Conditional STIBP enabled? */ +- if (static_branch_unlikely(&switch_to_cond_stibp)) +- hostval |= stibp_tif_to_spec_ctrl(ti->flags); +- + if (hostval != guestval) { + msrval = setguest ? guestval : hostval; + wrmsrl(MSR_IA32_SPEC_CTRL, msrval); +@@ -745,12 +765,180 @@ static int __init nospectre_v1_cmdline(char *str) + } + early_param("nospectre_v1", nospectre_v1_cmdline); + +-#undef pr_fmt +-#define pr_fmt(fmt) "Spectre V2 : " fmt +- + static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = + SPECTRE_V2_NONE; + ++#undef pr_fmt ++#define pr_fmt(fmt) "RETBleed: " fmt ++ ++enum retbleed_mitigation { ++ RETBLEED_MITIGATION_NONE, ++ RETBLEED_MITIGATION_UNRET, ++ RETBLEED_MITIGATION_IBPB, ++ RETBLEED_MITIGATION_IBRS, ++ RETBLEED_MITIGATION_EIBRS, ++}; ++ ++enum retbleed_mitigation_cmd { ++ RETBLEED_CMD_OFF, ++ RETBLEED_CMD_AUTO, ++ RETBLEED_CMD_UNRET, ++ RETBLEED_CMD_IBPB, ++}; ++ ++const char * const retbleed_strings[] = { ++ [RETBLEED_MITIGATION_NONE] = "Vulnerable", ++ [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk", ++ [RETBLEED_MITIGATION_IBPB] = "Mitigation: IBPB", ++ [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS", ++ [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS", ++}; ++ ++static enum retbleed_mitigation retbleed_mitigation __ro_after_init = ++ RETBLEED_MITIGATION_NONE; ++static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init = ++ RETBLEED_CMD_AUTO; ++ ++static int __ro_after_init retbleed_nosmt = false; ++ ++static int __init retbleed_parse_cmdline(char *str) ++{ ++ if (!str) ++ return -EINVAL; ++ ++ while (str) { ++ char *next = strchr(str, ','); ++ if (next) { ++ *next = 0; ++ next++; ++ } ++ ++ if (!strcmp(str, "off")) { ++ retbleed_cmd = RETBLEED_CMD_OFF; ++ } else if (!strcmp(str, "auto")) { ++ retbleed_cmd = RETBLEED_CMD_AUTO; ++ } else if (!strcmp(str, "unret")) { ++ retbleed_cmd = RETBLEED_CMD_UNRET; ++ } else if (!strcmp(str, "ibpb")) { ++ retbleed_cmd = RETBLEED_CMD_IBPB; ++ } else if (!strcmp(str, "nosmt")) { ++ retbleed_nosmt = true; ++ } else { ++ pr_err("Ignoring unknown retbleed option (%s).", str); ++ } ++ ++ str = next; ++ } ++ ++ return 0; ++} ++early_param("retbleed", retbleed_parse_cmdline); ++ ++#define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" ++#define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n" ++ ++static void __init retbleed_select_mitigation(void) ++{ ++ bool mitigate_smt = false; ++ ++ if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off()) ++ return; ++ ++ switch (retbleed_cmd) { ++ case RETBLEED_CMD_OFF: ++ return; ++ ++ case RETBLEED_CMD_UNRET: ++ if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) { ++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ } else { ++ pr_err("WARNING: kernel not compiled with CPU_UNRET_ENTRY.\n"); ++ goto do_cmd_auto; ++ } ++ break; ++ ++ case RETBLEED_CMD_IBPB: ++ if (!boot_cpu_has(X86_FEATURE_IBPB)) { ++ pr_err("WARNING: CPU does not support IBPB.\n"); ++ goto do_cmd_auto; ++ } else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { ++ retbleed_mitigation = RETBLEED_MITIGATION_IBPB; ++ } else { ++ pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); ++ goto do_cmd_auto; ++ } ++ break; ++ ++do_cmd_auto: ++ case RETBLEED_CMD_AUTO: ++ default: ++ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || ++ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { ++ if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) ++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY) && boot_cpu_has(X86_FEATURE_IBPB)) ++ retbleed_mitigation = RETBLEED_MITIGATION_IBPB; ++ } ++ ++ /* ++ * The Intel mitigation (IBRS or eIBRS) was already selected in ++ * spectre_v2_select_mitigation(). 'retbleed_mitigation' will ++ * be set accordingly below. ++ */ ++ ++ break; ++ } ++ ++ switch (retbleed_mitigation) { ++ case RETBLEED_MITIGATION_UNRET: ++ setup_force_cpu_cap(X86_FEATURE_RETHUNK); ++ setup_force_cpu_cap(X86_FEATURE_UNRET); ++ ++ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && ++ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) ++ pr_err(RETBLEED_UNTRAIN_MSG); ++ ++ mitigate_smt = true; ++ break; ++ ++ case RETBLEED_MITIGATION_IBPB: ++ setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); ++ mitigate_smt = true; ++ break; ++ ++ default: ++ break; ++ } ++ ++ if (mitigate_smt && !boot_cpu_has(X86_FEATURE_STIBP) && ++ (retbleed_nosmt || cpu_mitigations_auto_nosmt())) ++ cpu_smt_disable(false); ++ ++ /* ++ * Let IBRS trump all on Intel without affecting the effects of the ++ * retbleed= cmdline option. ++ */ ++ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { ++ switch (spectre_v2_enabled) { ++ case SPECTRE_V2_IBRS: ++ retbleed_mitigation = RETBLEED_MITIGATION_IBRS; ++ break; ++ case SPECTRE_V2_EIBRS: ++ case SPECTRE_V2_EIBRS_RETPOLINE: ++ case SPECTRE_V2_EIBRS_LFENCE: ++ retbleed_mitigation = RETBLEED_MITIGATION_EIBRS; ++ break; ++ default: ++ pr_err(RETBLEED_INTEL_MSG); ++ } ++ } ++ ++ pr_info("%s\n", retbleed_strings[retbleed_mitigation]); ++} ++ ++#undef pr_fmt ++#define pr_fmt(fmt) "Spectre V2 : " fmt ++ + static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init = + SPECTRE_V2_USER_NONE; + static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init = +@@ -821,6 +1009,7 @@ enum spectre_v2_mitigation_cmd { + SPECTRE_V2_CMD_EIBRS, + SPECTRE_V2_CMD_EIBRS_RETPOLINE, + SPECTRE_V2_CMD_EIBRS_LFENCE, ++ SPECTRE_V2_CMD_IBRS, + }; + + enum spectre_v2_user_cmd { +@@ -861,13 +1050,15 @@ static void __init spec_v2_user_print_cond(const char *reason, bool secure) + pr_info("spectre_v2_user=%s forced on command line.\n", reason); + } + ++static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd; ++ + static enum spectre_v2_user_cmd __init +-spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) ++spectre_v2_parse_user_cmdline(void) + { + char arg[20]; + int ret, i; + +- switch (v2_cmd) { ++ switch (spectre_v2_cmd) { + case SPECTRE_V2_CMD_NONE: + return SPECTRE_V2_USER_CMD_NONE; + case SPECTRE_V2_CMD_FORCE: +@@ -893,15 +1084,16 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) + return SPECTRE_V2_USER_CMD_AUTO; + } + +-static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode) ++static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode) + { +- return (mode == SPECTRE_V2_EIBRS || +- mode == SPECTRE_V2_EIBRS_RETPOLINE || +- mode == SPECTRE_V2_EIBRS_LFENCE); ++ return mode == SPECTRE_V2_IBRS || ++ mode == SPECTRE_V2_EIBRS || ++ mode == SPECTRE_V2_EIBRS_RETPOLINE || ++ mode == SPECTRE_V2_EIBRS_LFENCE; + } + + static void __init +-spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) ++spectre_v2_user_select_mitigation(void) + { + enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; + bool smt_possible = IS_ENABLED(CONFIG_SMP); +@@ -914,7 +1106,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) + cpu_smt_control == CPU_SMT_NOT_SUPPORTED) + smt_possible = false; + +- cmd = spectre_v2_parse_user_cmdline(v2_cmd); ++ cmd = spectre_v2_parse_user_cmdline(); + switch (cmd) { + case SPECTRE_V2_USER_CMD_NONE: + goto set_mode; +@@ -962,12 +1154,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) + } + + /* +- * If no STIBP, enhanced IBRS is enabled or SMT impossible, STIBP is not +- * required. ++ * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible, ++ * STIBP is not required. + */ + if (!boot_cpu_has(X86_FEATURE_STIBP) || + !smt_possible || +- spectre_v2_in_eibrs_mode(spectre_v2_enabled)) ++ spectre_v2_in_ibrs_mode(spectre_v2_enabled)) + return; + + /* +@@ -979,6 +1171,13 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) + boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) + mode = SPECTRE_V2_USER_STRICT_PREFERRED; + ++ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) { ++ if (mode != SPECTRE_V2_USER_STRICT && ++ mode != SPECTRE_V2_USER_STRICT_PREFERRED) ++ pr_info("Selecting STIBP always-on mode to complement retbleed mitigation'\n"); ++ mode = SPECTRE_V2_USER_STRICT_PREFERRED; ++ } ++ + spectre_v2_user_stibp = mode; + + set_mode: +@@ -992,6 +1191,7 @@ static const char * const spectre_v2_strings[] = { + [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS", + [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE", + [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines", ++ [SPECTRE_V2_IBRS] = "Mitigation: IBRS", + }; + + static const struct { +@@ -1009,6 +1209,7 @@ static const struct { + { "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false }, + { "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false }, + { "auto", SPECTRE_V2_CMD_AUTO, false }, ++ { "ibrs", SPECTRE_V2_CMD_IBRS, false }, + }; + + static void __init spec_v2_print_cond(const char *reason, bool secure) +@@ -1071,6 +1272,30 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) + return SPECTRE_V2_CMD_AUTO; + } + ++ if (cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_CPU_IBRS_ENTRY)) { ++ pr_err("%s selected but not compiled in. Switching to AUTO select\n", ++ mitigation_options[i].option); ++ return SPECTRE_V2_CMD_AUTO; ++ } ++ ++ if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { ++ pr_err("%s selected but not Intel CPU. Switching to AUTO select\n", ++ mitigation_options[i].option); ++ return SPECTRE_V2_CMD_AUTO; ++ } ++ ++ if (cmd == SPECTRE_V2_CMD_IBRS && !boot_cpu_has(X86_FEATURE_IBRS)) { ++ pr_err("%s selected but CPU doesn't have IBRS. Switching to AUTO select\n", ++ mitigation_options[i].option); ++ return SPECTRE_V2_CMD_AUTO; ++ } ++ ++ if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_has(X86_FEATURE_XENPV)) { ++ pr_err("%s selected but running as XenPV guest. Switching to AUTO select\n", ++ mitigation_options[i].option); ++ return SPECTRE_V2_CMD_AUTO; ++ } ++ + spec_v2_print_cond(mitigation_options[i].option, + mitigation_options[i].secure); + return cmd; +@@ -1086,6 +1311,22 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) + return SPECTRE_V2_RETPOLINE; + } + ++/* Disable in-kernel use of non-RSB RET predictors */ ++static void __init spec_ctrl_disable_kernel_rrsba(void) ++{ ++ u64 ia32_cap; ++ ++ if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) ++ return; ++ ++ ia32_cap = x86_read_arch_cap_msr(); ++ ++ if (ia32_cap & ARCH_CAP_RRSBA) { ++ x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; ++ write_spec_ctrl_current(x86_spec_ctrl_base, true); ++ } ++} ++ + static void __init spectre_v2_select_mitigation(void) + { + enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); +@@ -1110,6 +1351,15 @@ static void __init spectre_v2_select_mitigation(void) + break; + } + ++ if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) && ++ boot_cpu_has_bug(X86_BUG_RETBLEED) && ++ retbleed_cmd != RETBLEED_CMD_OFF && ++ boot_cpu_has(X86_FEATURE_IBRS) && ++ boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { ++ mode = SPECTRE_V2_IBRS; ++ break; ++ } ++ + mode = spectre_v2_select_retpoline(); + break; + +@@ -1126,6 +1376,10 @@ static void __init spectre_v2_select_mitigation(void) + mode = spectre_v2_select_retpoline(); + break; + ++ case SPECTRE_V2_CMD_IBRS: ++ mode = SPECTRE_V2_IBRS; ++ break; ++ + case SPECTRE_V2_CMD_EIBRS: + mode = SPECTRE_V2_EIBRS; + break; +@@ -1142,10 +1396,9 @@ static void __init spectre_v2_select_mitigation(void) + if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) + pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); + +- if (spectre_v2_in_eibrs_mode(mode)) { +- /* Force it so VMEXIT will restore correctly */ ++ if (spectre_v2_in_ibrs_mode(mode)) { + x86_spec_ctrl_base |= SPEC_CTRL_IBRS; +- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base, true); + } + + switch (mode) { +@@ -1153,6 +1406,10 @@ static void __init spectre_v2_select_mitigation(void) + case SPECTRE_V2_EIBRS: + break; + ++ case SPECTRE_V2_IBRS: ++ setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS); ++ break; ++ + case SPECTRE_V2_LFENCE: + case SPECTRE_V2_EIBRS_LFENCE: + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE); +@@ -1164,43 +1421,107 @@ static void __init spectre_v2_select_mitigation(void) + break; + } + ++ /* ++ * Disable alternate RSB predictions in kernel when indirect CALLs and ++ * JMPs gets protection against BHI and Intramode-BTI, but RET ++ * prediction from a non-RSB predictor is still a risk. ++ */ ++ if (mode == SPECTRE_V2_EIBRS_LFENCE || ++ mode == SPECTRE_V2_EIBRS_RETPOLINE || ++ mode == SPECTRE_V2_RETPOLINE) ++ spec_ctrl_disable_kernel_rrsba(); ++ + spectre_v2_enabled = mode; + pr_info("%s\n", spectre_v2_strings[mode]); + + /* +- * If spectre v2 protection has been enabled, unconditionally fill +- * RSB during a context switch; this protects against two independent +- * issues: ++ * If Spectre v2 protection has been enabled, fill the RSB during a ++ * context switch. In general there are two types of RSB attacks ++ * across context switches, for which the CALLs/RETs may be unbalanced. + * +- * - RSB underflow (and switch to BTB) on Skylake+ +- * - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs ++ * 1) RSB underflow ++ * ++ * Some Intel parts have "bottomless RSB". When the RSB is empty, ++ * speculated return targets may come from the branch predictor, ++ * which could have a user-poisoned BTB or BHB entry. ++ * ++ * AMD has it even worse: *all* returns are speculated from the BTB, ++ * regardless of the state of the RSB. ++ * ++ * When IBRS or eIBRS is enabled, the "user -> kernel" attack ++ * scenario is mitigated by the IBRS branch prediction isolation ++ * properties, so the RSB buffer filling wouldn't be necessary to ++ * protect against this type of attack. ++ * ++ * The "user -> user" attack scenario is mitigated by RSB filling. ++ * ++ * 2) Poisoned RSB entry ++ * ++ * If the 'next' in-kernel return stack is shorter than 'prev', ++ * 'next' could be tricked into speculating with a user-poisoned RSB ++ * entry. ++ * ++ * The "user -> kernel" attack scenario is mitigated by SMEP and ++ * eIBRS. ++ * ++ * The "user -> user" scenario, also known as SpectreBHB, requires ++ * RSB clearing. ++ * ++ * So to mitigate all cases, unconditionally fill RSB on context ++ * switches. ++ * ++ * FIXME: Is this pointless for retbleed-affected AMD? + */ + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); + pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); + + /* +- * Retpoline means the kernel is safe because it has no indirect +- * branches. Enhanced IBRS protects firmware too, so, enable restricted +- * speculation around firmware calls only when Enhanced IBRS isn't +- * supported. ++ * Similar to context switches, there are two types of RSB attacks ++ * after vmexit: ++ * ++ * 1) RSB underflow ++ * ++ * 2) Poisoned RSB entry ++ * ++ * When retpoline is enabled, both are mitigated by filling/clearing ++ * the RSB. ++ * ++ * When IBRS is enabled, while #1 would be mitigated by the IBRS branch ++ * prediction isolation protections, RSB still needs to be cleared ++ * because of #2. Note that SMEP provides no protection here, unlike ++ * user-space-poisoned RSB entries. ++ * ++ * eIBRS, on the other hand, has RSB-poisoning protections, so it ++ * doesn't need RSB clearing after vmexit. ++ */ ++ if (boot_cpu_has(X86_FEATURE_RETPOLINE) || ++ boot_cpu_has(X86_FEATURE_KERNEL_IBRS)) ++ setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); ++ ++ /* ++ * Retpoline protects the kernel, but doesn't protect firmware. IBRS ++ * and Enhanced IBRS protect firmware too, so enable IBRS around ++ * firmware calls only when IBRS / Enhanced IBRS aren't otherwise ++ * enabled. + * + * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because + * the user might select retpoline on the kernel command line and if + * the CPU supports Enhanced IBRS, kernel might un-intentionally not + * enable IBRS around firmware calls. + */ +- if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) { ++ if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) { + setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); + pr_info("Enabling Restricted Speculation for firmware calls\n"); + } + + /* Set up IBPB and STIBP depending on the general spectre V2 command */ +- spectre_v2_user_select_mitigation(cmd); ++ spectre_v2_cmd = cmd; + } + + static void update_stibp_msr(void * __unused) + { +- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); ++ u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP); ++ write_spec_ctrl_current(val, true); + } + + /* Update x86_spec_ctrl_base in case SMT state changed. */ +@@ -1416,16 +1737,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) + break; + } + +- /* +- * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper +- * bit in the mask to allow guests to use the mitigation even in the +- * case where the host does not enable it. +- */ +- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || +- static_cpu_has(X86_FEATURE_AMD_SSBD)) { +- x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; +- } +- + /* + * We have three CPU feature flags that are in play here: + * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. +@@ -1443,7 +1754,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) + x86_amd_ssb_disable(); + } else { + x86_spec_ctrl_base |= SPEC_CTRL_SSBD; +- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base, true); + } + } + +@@ -1694,7 +2005,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) + void x86_spec_ctrl_setup_ap(void) + { + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) +- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base, true); + + if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) + x86_amd_ssb_disable(); +@@ -1931,7 +2242,7 @@ static ssize_t mmio_stale_data_show_state(char *buf) + + static char *stibp_state(void) + { +- if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) ++ if (spectre_v2_in_ibrs_mode(spectre_v2_enabled)) + return ""; + + switch (spectre_v2_user_stibp) { +@@ -1987,6 +2298,24 @@ static ssize_t srbds_show_state(char *buf) + return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]); + } + ++static ssize_t retbleed_show_state(char *buf) ++{ ++ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) { ++ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && ++ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) ++ return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n"); ++ ++ return sprintf(buf, "%s; SMT %s\n", ++ retbleed_strings[retbleed_mitigation], ++ !sched_smt_active() ? "disabled" : ++ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || ++ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ? ++ "enabled with STIBP protection" : "vulnerable"); ++ } ++ ++ return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]); ++} ++ + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, + char *buf, unsigned int bug) + { +@@ -2032,6 +2361,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr + case X86_BUG_MMIO_STALE_DATA: + return mmio_stale_data_show_state(buf); + ++ case X86_BUG_RETBLEED: ++ return retbleed_show_state(buf); ++ + default: + break; + } +@@ -2088,4 +2420,9 @@ ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *at + { + return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); + } ++ ++ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf) ++{ ++ return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED); ++} + #endif +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index af5d0c188f7b..1f43ddf2ffc3 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1231,48 +1231,60 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { + {} + }; + ++#define VULNBL(vendor, family, model, blacklist) \ ++ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist) ++ + #define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \ + X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \ + INTEL_FAM6_##model, steppings, \ + X86_FEATURE_ANY, issues) + ++#define VULNBL_AMD(family, blacklist) \ ++ VULNBL(AMD, family, X86_MODEL_ANY, blacklist) ++ ++#define VULNBL_HYGON(family, blacklist) \ ++ VULNBL(HYGON, family, X86_MODEL_ANY, blacklist) ++ + #define SRBDS BIT(0) + /* CPU is affected by X86_BUG_MMIO_STALE_DATA */ + #define MMIO BIT(1) + /* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */ + #define MMIO_SBDS BIT(2) ++/* CPU is affected by RETbleed, speculating where you would not expect it */ ++#define RETBLEED BIT(3) + + static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { + VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS), +- VULNBL_INTEL_STEPPINGS(HASWELL_X, BIT(2) | BIT(4), MMIO), +- VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x5), MMIO), ++ VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO), ++ VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), +- VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO), +- VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS), +- VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) | +- BIT(7) | BIT(0xB), MMIO), +- VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO), +- VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS), +- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO), +- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS), +- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO), +- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS), +- VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS), +- VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO), +- VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO), +- VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS), +- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), +- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO), +- VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), +- VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO), +- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), ++ VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED), ++ VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO), ++ VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO), ++ VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), +- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS), ++ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS), ++ ++ VULNBL_AMD(0x15, RETBLEED), ++ VULNBL_AMD(0x16, RETBLEED), ++ VULNBL_AMD(0x17, RETBLEED), ++ VULNBL_HYGON(0x18, RETBLEED), + {} + }; + +@@ -1374,6 +1386,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) + !arch_cap_mmio_immune(ia32_cap)) + setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + ++ if (!cpu_has(c, X86_FEATURE_BTC_NO)) { ++ if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) ++ setup_force_cpu_bug(X86_BUG_RETBLEED); ++ } ++ + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) + return; + +diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h +index 2a8e584fc991..7c9b5893c30a 100644 +--- a/arch/x86/kernel/cpu/cpu.h ++++ b/arch/x86/kernel/cpu/cpu.h +@@ -61,6 +61,8 @@ static inline void tsx_init(void) { } + static inline void tsx_ap_init(void) { } + #endif /* CONFIG_CPU_SUP_INTEL */ + ++extern void init_spectral_chicken(struct cpuinfo_x86 *c); ++ + extern void get_cpu_cap(struct cpuinfo_x86 *c); + extern void get_cpu_address_sizes(struct cpuinfo_x86 *c); + extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); +diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c +index 3fcdda4c1e11..21fd425088fe 100644 +--- a/arch/x86/kernel/cpu/hygon.c ++++ b/arch/x86/kernel/cpu/hygon.c +@@ -302,6 +302,12 @@ static void init_hygon(struct cpuinfo_x86 *c) + /* get apicid instead of initial apic id from cpuid */ + c->apicid = hard_smp_processor_id(); + ++ /* ++ * XXX someone from Hygon needs to confirm this DTRT ++ * ++ init_spectral_chicken(c); ++ */ ++ + set_cpu_cap(c, X86_FEATURE_ZEN); + set_cpu_cap(c, X86_FEATURE_CPB); + +diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c +index 4143b1e4c5c6..fcfb03f5f89b 100644 +--- a/arch/x86/kernel/cpu/scattered.c ++++ b/arch/x86/kernel/cpu/scattered.c +@@ -27,6 +27,7 @@ static const struct cpuid_bit cpuid_bits[] = { + { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, + { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, + { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 }, ++ { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, + { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, + { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, + { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, +diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c +index 1e31c7d21597..6892ca67d9c6 100644 +--- a/arch/x86/kernel/ftrace.c ++++ b/arch/x86/kernel/ftrace.c +@@ -303,7 +303,7 @@ union ftrace_op_code_union { + } __attribute__((packed)); + }; + +-#define RET_SIZE 1 + IS_ENABLED(CONFIG_SLS) ++#define RET_SIZE (IS_ENABLED(CONFIG_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS)) + + static unsigned long + create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) +@@ -359,7 +359,10 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) + goto fail; + + ip = trampoline + size; +- memcpy(ip, retq, RET_SIZE); ++ if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) ++ __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, &__x86_return_thunk, JMP32_INSN_SIZE); ++ else ++ memcpy(ip, retq, sizeof(retq)); + + /* No need to test direct calls on created trampolines */ + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { +diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S +index b8e3019547a5..3178fd81f93f 100644 +--- a/arch/x86/kernel/head_64.S ++++ b/arch/x86/kernel/head_64.S +@@ -334,6 +334,8 @@ SYM_CODE_START_NOALIGN(vc_boot_ghcb) + UNWIND_HINT_IRET_REGS offset=8 + ENDBR + ++ ANNOTATE_UNRET_END ++ + /* Build pt_regs */ + PUSH_AND_CLEAR_REGS + +@@ -393,6 +395,7 @@ SYM_CODE_END(early_idt_handler_array) + + SYM_CODE_START_LOCAL(early_idt_handler_common) + UNWIND_HINT_IRET_REGS offset=16 ++ ANNOTATE_UNRET_END + /* + * The stack is the hardware frame, an error code or zero, and the + * vector number. +@@ -442,6 +445,8 @@ SYM_CODE_START_NOALIGN(vc_no_ghcb) + UNWIND_HINT_IRET_REGS offset=8 + ENDBR + ++ ANNOTATE_UNRET_END ++ + /* Build pt_regs */ + PUSH_AND_CLEAR_REGS + +diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c +index b98ffcf4d250..67828d973389 100644 +--- a/arch/x86/kernel/module.c ++++ b/arch/x86/kernel/module.c +@@ -253,7 +253,7 @@ int module_finalize(const Elf_Ehdr *hdr, + { + const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, + *para = NULL, *orc = NULL, *orc_ip = NULL, +- *retpolines = NULL, *ibt_endbr = NULL; ++ *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL; + char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { +@@ -271,6 +271,8 @@ int module_finalize(const Elf_Ehdr *hdr, + orc_ip = s; + if (!strcmp(".retpoline_sites", secstrings + s->sh_name)) + retpolines = s; ++ if (!strcmp(".return_sites", secstrings + s->sh_name)) ++ returns = s; + if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name)) + ibt_endbr = s; + } +@@ -287,6 +289,10 @@ int module_finalize(const Elf_Ehdr *hdr, + void *rseg = (void *)retpolines->sh_addr; + apply_retpolines(rseg, rseg + retpolines->sh_size); + } ++ if (returns) { ++ void *rseg = (void *)returns->sh_addr; ++ apply_returns(rseg, rseg + returns->sh_size); ++ } + if (alt) { + /* patch .altinstructions */ + void *aseg = (void *)alt->sh_addr; +diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c +index b370767f5b19..622dc3673c37 100644 +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -600,7 +600,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, + } + + if (updmsr) +- wrmsrl(MSR_IA32_SPEC_CTRL, msr); ++ write_spec_ctrl_current(msr, false); + } + + static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) +diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S +index fcc8a7699103..c7c4b1917336 100644 +--- a/arch/x86/kernel/relocate_kernel_32.S ++++ b/arch/x86/kernel/relocate_kernel_32.S +@@ -7,10 +7,12 @@ + #include <linux/linkage.h> + #include <asm/page_types.h> + #include <asm/kexec.h> ++#include <asm/nospec-branch.h> + #include <asm/processor-flags.h> + + /* +- * Must be relocatable PIC code callable as a C function ++ * Must be relocatable PIC code callable as a C function, in particular ++ * there must be a plain RET and not jump to return thunk. + */ + + #define PTR(x) (x << 2) +@@ -91,7 +93,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel) + movl %edi, %eax + addl $(identity_mapped - relocate_kernel), %eax + pushl %eax +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(relocate_kernel) + + SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) +@@ -159,12 +163,15 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) + xorl %edx, %edx + xorl %esi, %esi + xorl %ebp, %ebp +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + 1: + popl %edx + movl CP_PA_SWAP_PAGE(%edi), %esp + addl $PAGE_SIZE, %esp + 2: ++ ANNOTATE_RETPOLINE_SAFE + call *%edx + + /* get the re-entry point of the peer system */ +@@ -190,7 +197,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) + movl %edi, %eax + addl $(virtual_mapped - relocate_kernel), %eax + pushl %eax +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(identity_mapped) + + SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) +@@ -208,7 +217,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) + popl %edi + popl %esi + popl %ebx +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(virtual_mapped) + + /* Do the copies */ +@@ -271,7 +282,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) + popl %edi + popl %ebx + popl %ebp +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(swap_pages) + + .globl kexec_control_code_size +diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S +index c1d8626c53b6..4809c0dc4eb0 100644 +--- a/arch/x86/kernel/relocate_kernel_64.S ++++ b/arch/x86/kernel/relocate_kernel_64.S +@@ -13,7 +13,8 @@ + #include <asm/unwind_hints.h> + + /* +- * Must be relocatable PIC code callable as a C function ++ * Must be relocatable PIC code callable as a C function, in particular ++ * there must be a plain RET and not jump to return thunk. + */ + + #define PTR(x) (x << 3) +@@ -105,7 +106,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel) + /* jump to identity mapped page */ + addq $(identity_mapped - relocate_kernel), %r8 + pushq %r8 +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(relocate_kernel) + + SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) +@@ -200,7 +203,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) + xorl %r14d, %r14d + xorl %r15d, %r15d + +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + + 1: + popq %rdx +@@ -219,7 +224,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) + call swap_pages + movq $virtual_mapped, %rax + pushq %rax +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(identity_mapped) + + SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) +@@ -241,7 +248,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) + popq %r12 + popq %rbp + popq %rbx +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(virtual_mapped) + + /* Do the copies */ +@@ -298,7 +307,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) + lea PAGE_SIZE(%rax), %rsi + jmp 0b + 3: +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(swap_pages) + + .globl kexec_control_code_size diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c95b9ac5a457..6640be279dae 100644 --- a/arch/x86/kernel/setup.c @@ -223,6 +2722,1143 @@ index c95b9ac5a457..6640be279dae 100644 reserve_initrd(); +diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c +index aa72cefdd5be..aaaba85d6d7f 100644 +--- a/arch/x86/kernel/static_call.c ++++ b/arch/x86/kernel/static_call.c +@@ -11,6 +11,13 @@ enum insn_type { + RET = 3, /* tramp / site cond-tail-call */ + }; + ++/* ++ * ud1 %esp, %ecx - a 3 byte #UD that is unique to trampolines, chosen such ++ * that there is no false-positive trampoline identification while also being a ++ * speculation stop. ++ */ ++static const u8 tramp_ud[] = { 0x0f, 0xb9, 0xcc }; ++ + /* + * cs cs cs xorl %eax, %eax - a single 5 byte instruction that clears %[er]ax + */ +@@ -18,7 +25,8 @@ static const u8 xor5rax[] = { 0x2e, 0x2e, 0x2e, 0x31, 0xc0 }; + + static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc }; + +-static void __ref __static_call_transform(void *insn, enum insn_type type, void *func) ++static void __ref __static_call_transform(void *insn, enum insn_type type, ++ void *func, bool modinit) + { + const void *emulate = NULL; + int size = CALL_INSN_SIZE; +@@ -43,14 +51,17 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void + break; + + case RET: +- code = &retinsn; ++ if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) ++ code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk); ++ else ++ code = &retinsn; + break; + } + + if (memcmp(insn, code, size) == 0) + return; + +- if (unlikely(system_state == SYSTEM_BOOTING)) ++ if (system_state == SYSTEM_BOOTING || modinit) + return text_poke_early(insn, code, size); + + text_poke_bp(insn, code, size, emulate); +@@ -60,7 +71,7 @@ static void __static_call_validate(void *insn, bool tail, bool tramp) + { + u8 opcode = *(u8 *)insn; + +- if (tramp && memcmp(insn+5, "SCT", 3)) { ++ if (tramp && memcmp(insn+5, tramp_ud, 3)) { + pr_err("trampoline signature fail"); + BUG(); + } +@@ -104,14 +115,42 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) + + if (tramp) { + __static_call_validate(tramp, true, true); +- __static_call_transform(tramp, __sc_insn(!func, true), func); ++ __static_call_transform(tramp, __sc_insn(!func, true), func, false); + } + + if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) { + __static_call_validate(site, tail, false); +- __static_call_transform(site, __sc_insn(!func, tail), func); ++ __static_call_transform(site, __sc_insn(!func, tail), func, false); + } + + mutex_unlock(&text_mutex); + } + EXPORT_SYMBOL_GPL(arch_static_call_transform); ++ ++#ifdef CONFIG_RETHUNK ++/* ++ * This is called by apply_returns() to fix up static call trampolines, ++ * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as ++ * having a return trampoline. ++ * ++ * The problem is that static_call() is available before determining ++ * X86_FEATURE_RETHUNK and, by implication, running alternatives. ++ * ++ * This means that __static_call_transform() above can have overwritten the ++ * return trampoline and we now need to fix things up to be consistent. ++ */ ++bool __static_call_fixup(void *tramp, u8 op, void *dest) ++{ ++ if (memcmp(tramp+5, tramp_ud, 3)) { ++ /* Not a trampoline site, not our problem. */ ++ return false; ++ } ++ ++ mutex_lock(&text_mutex); ++ if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk) ++ __static_call_transform(tramp, RET, NULL, true); ++ mutex_unlock(&text_mutex); ++ ++ return true; ++} ++#endif +diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c +index 1563fb995005..4167215333fd 100644 +--- a/arch/x86/kernel/traps.c ++++ b/arch/x86/kernel/traps.c +@@ -892,14 +892,10 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r + } + #endif + +-struct bad_iret_stack { +- void *error_entry_ret; +- struct pt_regs regs; +-}; +- +-asmlinkage __visible noinstr +-struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) ++asmlinkage __visible noinstr struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs) + { ++ struct pt_regs tmp, *new_stack; ++ + /* + * This is called from entry_64.S early in handling a fault + * caused by a bad iret to user mode. To handle the fault +@@ -908,19 +904,18 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) + * just below the IRET frame) and we want to pretend that the + * exception came from the IRET target. + */ +- struct bad_iret_stack tmp, *new_stack = +- (struct bad_iret_stack *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; ++ new_stack = (struct pt_regs *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; + + /* Copy the IRET target to the temporary storage. */ +- __memcpy(&tmp.regs.ip, (void *)s->regs.sp, 5*8); ++ __memcpy(&tmp.ip, (void *)bad_regs->sp, 5*8); + + /* Copy the remainder of the stack from the current stack. */ +- __memcpy(&tmp, s, offsetof(struct bad_iret_stack, regs.ip)); ++ __memcpy(&tmp, bad_regs, offsetof(struct pt_regs, ip)); + + /* Update the entry stack */ + __memcpy(new_stack, &tmp, sizeof(tmp)); + +- BUG_ON(!user_mode(&new_stack->regs)); ++ BUG_ON(!user_mode(new_stack)); + return new_stack; + } + #endif +diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S +index 7fda7f27e762..071faf2c8a77 100644 +--- a/arch/x86/kernel/vmlinux.lds.S ++++ b/arch/x86/kernel/vmlinux.lds.S +@@ -141,7 +141,7 @@ SECTIONS + + #ifdef CONFIG_RETPOLINE + __indirect_thunk_start = .; +- *(.text.__x86.indirect_thunk) ++ *(.text.__x86.*) + __indirect_thunk_end = .; + #endif + } :text =0xcccc +@@ -283,6 +283,13 @@ SECTIONS + *(.retpoline_sites) + __retpoline_sites_end = .; + } ++ ++ . = ALIGN(8); ++ .return_sites : AT(ADDR(.return_sites) - LOAD_OFFSET) { ++ __return_sites = .; ++ *(.return_sites) ++ __return_sites_end = .; ++ } + #endif + + #ifdef CONFIG_X86_KERNEL_IBT +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c +index 89b11e7dca8a..db96bf7d1122 100644 +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -325,13 +325,15 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); + #define FOP_RET(name) \ + __FOP_RET(#name) + +-#define FOP_START(op) \ ++#define __FOP_START(op, align) \ + extern void em_##op(struct fastop *fake); \ + asm(".pushsection .text, \"ax\" \n\t" \ + ".global em_" #op " \n\t" \ +- ".align " __stringify(FASTOP_SIZE) " \n\t" \ ++ ".align " __stringify(align) " \n\t" \ + "em_" #op ":\n\t" + ++#define FOP_START(op) __FOP_START(op, FASTOP_SIZE) ++ + #define FOP_END \ + ".popsection") + +@@ -435,16 +437,15 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); + /* + * Depending on .config the SETcc functions look like: + * +- * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT] +- * SETcc %al [3 bytes] +- * RET [1 byte] +- * INT3 [1 byte; CONFIG_SLS] +- * +- * Which gives possible sizes 4, 5, 8 or 9. When rounded up to the +- * next power-of-two alignment they become 4, 8 or 16 resp. ++ * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT] ++ * SETcc %al [3 bytes] ++ * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK] ++ * INT3 [1 byte; CONFIG_SLS] + */ +-#define SETCC_LENGTH (ENDBR_INSN_SIZE + 4 + IS_ENABLED(CONFIG_SLS)) +-#define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS) << HAS_KERNEL_IBT) ++#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETHUNK)) + \ ++ IS_ENABLED(CONFIG_SLS)) ++#define SETCC_LENGTH (ENDBR_INSN_SIZE + 3 + RET_LENGTH) ++#define SETCC_ALIGN (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1)) + static_assert(SETCC_LENGTH <= SETCC_ALIGN); + + #define FOP_SETCC(op) \ +@@ -453,9 +454,10 @@ static_assert(SETCC_LENGTH <= SETCC_ALIGN); + #op ": \n\t" \ + ASM_ENDBR \ + #op " %al \n\t" \ +- __FOP_RET(#op) ++ __FOP_RET(#op) \ ++ ".skip " __stringify(SETCC_ALIGN) " - (.-" #op "), 0xcc \n\t" + +-FOP_START(setcc) ++__FOP_START(setcc, SETCC_ALIGN) + FOP_SETCC(seto) + FOP_SETCC(setno) + FOP_SETCC(setc) +diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S +index dfaeb47fcf2a..723f8534986c 100644 +--- a/arch/x86/kvm/svm/vmenter.S ++++ b/arch/x86/kvm/svm/vmenter.S +@@ -110,6 +110,15 @@ SYM_FUNC_START(__svm_vcpu_run) + mov %r15, VCPU_R15(%_ASM_AX) + #endif + ++ /* ++ * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be ++ * untrained as soon as we exit the VM and are back to the ++ * kernel. This should be done before re-enabling interrupts ++ * because interrupt handlers won't sanitize 'ret' if the return is ++ * from the kernel. ++ */ ++ UNTRAIN_RET ++ + /* + * Clear all general purpose registers except RSP and RAX to prevent + * speculative use of the guest's values, even those that are reloaded +@@ -190,6 +199,15 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) + FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE + #endif + ++ /* ++ * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be ++ * untrained as soon as we exit the VM and are back to the ++ * kernel. This should be done before re-enabling interrupts ++ * because interrupt handlers won't sanitize RET if the return is ++ * from the kernel. ++ */ ++ UNTRAIN_RET ++ + pop %_ASM_BX + + #ifdef CONFIG_X86_64 +diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h +index 3f430e218375..c0e24826a86f 100644 +--- a/arch/x86/kvm/vmx/capabilities.h ++++ b/arch/x86/kvm/vmx/capabilities.h +@@ -4,8 +4,8 @@ + + #include <asm/vmx.h> + +-#include "lapic.h" +-#include "x86.h" ++#include "../lapic.h" ++#include "../x86.h" + + extern bool __read_mostly enable_vpid; + extern bool __read_mostly flexpriority_enabled; +diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c +index ee7df31883cd..28ccf25c4124 100644 +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -3091,7 +3091,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) + } + + vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, +- vmx->loaded_vmcs->launched); ++ __vmx_vcpu_run_flags(vmx)); + + if (vmx->msr_autoload.host.nr) + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); +diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h +new file mode 100644 +index 000000000000..edc3f16cc189 +--- /dev/null ++++ b/arch/x86/kvm/vmx/run_flags.h +@@ -0,0 +1,8 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef __KVM_X86_VMX_RUN_FLAGS_H ++#define __KVM_X86_VMX_RUN_FLAGS_H ++ ++#define VMX_RUN_VMRESUME (1 << 0) ++#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1) ++ ++#endif /* __KVM_X86_VMX_RUN_FLAGS_H */ +diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S +index 435c187927c4..4182c7ffc909 100644 +--- a/arch/x86/kvm/vmx/vmenter.S ++++ b/arch/x86/kvm/vmx/vmenter.S +@@ -1,10 +1,13 @@ + /* SPDX-License-Identifier: GPL-2.0 */ + #include <linux/linkage.h> + #include <asm/asm.h> ++#include <asm/asm-offsets.h> + #include <asm/bitsperlong.h> + #include <asm/kvm_vcpu_regs.h> + #include <asm/nospec-branch.h> ++#include <asm/percpu.h> + #include <asm/segment.h> ++#include "run_flags.h" + + #define WORD_SIZE (BITS_PER_LONG / 8) + +@@ -30,73 +33,12 @@ + + .section .noinstr.text, "ax" + +-/** +- * vmx_vmenter - VM-Enter the current loaded VMCS +- * +- * %RFLAGS.ZF: !VMCS.LAUNCHED, i.e. controls VMLAUNCH vs. VMRESUME +- * +- * Returns: +- * %RFLAGS.CF is set on VM-Fail Invalid +- * %RFLAGS.ZF is set on VM-Fail Valid +- * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit +- * +- * Note that VMRESUME/VMLAUNCH fall-through and return directly if +- * they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump +- * to vmx_vmexit. +- */ +-SYM_FUNC_START_LOCAL(vmx_vmenter) +- /* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */ +- je 2f +- +-1: vmresume +- RET +- +-2: vmlaunch +- RET +- +-3: cmpb $0, kvm_rebooting +- je 4f +- RET +-4: ud2 +- +- _ASM_EXTABLE(1b, 3b) +- _ASM_EXTABLE(2b, 3b) +- +-SYM_FUNC_END(vmx_vmenter) +- +-/** +- * vmx_vmexit - Handle a VMX VM-Exit +- * +- * Returns: +- * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit +- * +- * This is vmx_vmenter's partner in crime. On a VM-Exit, control will jump +- * here after hardware loads the host's state, i.e. this is the destination +- * referred to by VMCS.HOST_RIP. +- */ +-SYM_FUNC_START(vmx_vmexit) +-#ifdef CONFIG_RETPOLINE +- ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE +- /* Preserve guest's RAX, it's used to stuff the RSB. */ +- push %_ASM_AX +- +- /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ +- FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE +- +- /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */ +- or $1, %_ASM_AX +- +- pop %_ASM_AX +-.Lvmexit_skip_rsb: +-#endif +- RET +-SYM_FUNC_END(vmx_vmexit) +- + /** + * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode +- * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp) ++ * @vmx: struct vcpu_vmx * + * @regs: unsigned long * (to guest registers) +- * @launched: %true if the VMCS has been launched ++ * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH ++ * VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl + * + * Returns: + * 0 on VM-Exit, 1 on VM-Fail +@@ -115,24 +57,56 @@ SYM_FUNC_START(__vmx_vcpu_run) + #endif + push %_ASM_BX + ++ /* Save @vmx for SPEC_CTRL handling */ ++ push %_ASM_ARG1 ++ ++ /* Save @flags for SPEC_CTRL handling */ ++ push %_ASM_ARG3 ++ + /* + * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and + * @regs is needed after VM-Exit to save the guest's register values. + */ + push %_ASM_ARG2 + +- /* Copy @launched to BL, _ASM_ARG3 is volatile. */ ++ /* Copy @flags to BL, _ASM_ARG3 is volatile. */ + mov %_ASM_ARG3B, %bl + +- /* Adjust RSP to account for the CALL to vmx_vmenter(). */ +- lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2 ++ lea (%_ASM_SP), %_ASM_ARG2 + call vmx_update_host_rsp + ++ ALTERNATIVE "jmp .Lspec_ctrl_done", "", X86_FEATURE_MSR_SPEC_CTRL ++ ++ /* ++ * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the ++ * host's, write the MSR. ++ * ++ * IMPORTANT: To avoid RSB underflow attacks and any other nastiness, ++ * there must not be any returns or indirect branches between this code ++ * and vmentry. ++ */ ++ mov 2*WORD_SIZE(%_ASM_SP), %_ASM_DI ++ movl VMX_spec_ctrl(%_ASM_DI), %edi ++ movl PER_CPU_VAR(x86_spec_ctrl_current), %esi ++ cmp %edi, %esi ++ je .Lspec_ctrl_done ++ mov $MSR_IA32_SPEC_CTRL, %ecx ++ xor %edx, %edx ++ mov %edi, %eax ++ wrmsr ++ ++.Lspec_ctrl_done: ++ ++ /* ++ * Since vmentry is serializing on affected CPUs, there's no need for ++ * an LFENCE to stop speculation from skipping the wrmsr. ++ */ ++ + /* Load @regs to RAX. */ + mov (%_ASM_SP), %_ASM_AX + + /* Check if vmlaunch or vmresume is needed */ +- testb %bl, %bl ++ testb $VMX_RUN_VMRESUME, %bl + + /* Load guest registers. Don't clobber flags. */ + mov VCPU_RCX(%_ASM_AX), %_ASM_CX +@@ -154,11 +128,37 @@ SYM_FUNC_START(__vmx_vcpu_run) + /* Load guest RAX. This kills the @regs pointer! */ + mov VCPU_RAX(%_ASM_AX), %_ASM_AX + +- /* Enter guest mode */ +- call vmx_vmenter ++ /* Check EFLAGS.ZF from 'testb' above */ ++ jz .Lvmlaunch ++ ++ /* ++ * After a successful VMRESUME/VMLAUNCH, control flow "magically" ++ * resumes below at 'vmx_vmexit' due to the VMCS HOST_RIP setting. ++ * So this isn't a typical function and objtool needs to be told to ++ * save the unwind state here and restore it below. ++ */ ++ UNWIND_HINT_SAVE ++ ++/* ++ * If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at ++ * the 'vmx_vmexit' label below. ++ */ ++.Lvmresume: ++ vmresume ++ jmp .Lvmfail ++ ++.Lvmlaunch: ++ vmlaunch ++ jmp .Lvmfail + +- /* Jump on VM-Fail. */ +- jbe 2f ++ _ASM_EXTABLE(.Lvmresume, .Lfixup) ++ _ASM_EXTABLE(.Lvmlaunch, .Lfixup) ++ ++SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) ++ ++ /* Restore unwind state from before the VMRESUME/VMLAUNCH. */ ++ UNWIND_HINT_RESTORE ++ ENDBR + + /* Temporarily save guest's RAX. */ + push %_ASM_AX +@@ -185,21 +185,23 @@ SYM_FUNC_START(__vmx_vcpu_run) + mov %r15, VCPU_R15(%_ASM_AX) + #endif + +- /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */ +- xor %eax, %eax ++ /* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */ ++ xor %ebx, %ebx + ++.Lclear_regs: + /* +- * Clear all general purpose registers except RSP and RAX to prevent ++ * Clear all general purpose registers except RSP and RBX to prevent + * speculative use of the guest's values, even those that are reloaded + * via the stack. In theory, an L1 cache miss when restoring registers + * could lead to speculative execution with the guest's values. + * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially + * free. RSP and RAX are exempt as RSP is restored by hardware during +- * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail. ++ * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return ++ * value. + */ +-1: xor %ecx, %ecx ++ xor %eax, %eax ++ xor %ecx, %ecx + xor %edx, %edx +- xor %ebx, %ebx + xor %ebp, %ebp + xor %esi, %esi + xor %edi, %edi +@@ -216,8 +218,30 @@ SYM_FUNC_START(__vmx_vcpu_run) + + /* "POP" @regs. */ + add $WORD_SIZE, %_ASM_SP +- pop %_ASM_BX + ++ /* ++ * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before ++ * the first unbalanced RET after vmexit! ++ * ++ * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB ++ * entries and (in some cases) RSB underflow. ++ * ++ * eIBRS has its own protection against poisoned RSB, so it doesn't ++ * need the RSB filling sequence. But it does need to be enabled ++ * before the first unbalanced RET. ++ */ ++ ++ FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT ++ ++ pop %_ASM_ARG2 /* @flags */ ++ pop %_ASM_ARG1 /* @vmx */ ++ ++ call vmx_spec_ctrl_restore_host ++ ++ /* Put return value in AX */ ++ mov %_ASM_BX, %_ASM_AX ++ ++ pop %_ASM_BX + #ifdef CONFIG_X86_64 + pop %r12 + pop %r13 +@@ -230,9 +254,15 @@ SYM_FUNC_START(__vmx_vcpu_run) + pop %_ASM_BP + RET + +- /* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */ +-2: mov $1, %eax +- jmp 1b ++.Lfixup: ++ cmpb $0, kvm_rebooting ++ jne .Lvmfail ++ ud2 ++.Lvmfail: ++ /* VM-Fail: set return value to 1 */ ++ mov $1, %_ASM_BX ++ jmp .Lclear_regs ++ + SYM_FUNC_END(__vmx_vcpu_run) + + +diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c +index 9646ae886b4b..4b6a0268c78e 100644 +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -383,9 +383,9 @@ static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx) + if (!vmx->disable_fb_clear) + return; + +- rdmsrl(MSR_IA32_MCU_OPT_CTRL, msr); ++ msr = __rdmsr(MSR_IA32_MCU_OPT_CTRL); + msr |= FB_CLEAR_DIS; +- wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr); ++ native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr); + /* Cache the MSR value to avoid reading it later */ + vmx->msr_ia32_mcu_opt_ctrl = msr; + } +@@ -396,7 +396,7 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) + return; + + vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS; +- wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl); ++ native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl); + } + + static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) +@@ -839,6 +839,24 @@ static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr) + MSR_IA32_SPEC_CTRL); + } + ++unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) ++{ ++ unsigned int flags = 0; ++ ++ if (vmx->loaded_vmcs->launched) ++ flags |= VMX_RUN_VMRESUME; ++ ++ /* ++ * If writes to the SPEC_CTRL MSR aren't intercepted, the guest is free ++ * to change it directly without causing a vmexit. In that case read ++ * it after vmexit and store it in vmx->spec_ctrl. ++ */ ++ if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) ++ flags |= VMX_RUN_SAVE_SPEC_CTRL; ++ ++ return flags; ++} ++ + static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, + unsigned long entry, unsigned long exit) + { +@@ -6814,6 +6832,31 @@ void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) + } + } + ++void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, ++ unsigned int flags) ++{ ++ u64 hostval = this_cpu_read(x86_spec_ctrl_current); ++ ++ if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL)) ++ return; ++ ++ if (flags & VMX_RUN_SAVE_SPEC_CTRL) ++ vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL); ++ ++ /* ++ * If the guest/host SPEC_CTRL values differ, restore the host value. ++ * ++ * For legacy IBRS, the IBRS bit always needs to be written after ++ * transitioning from a less privileged predictor mode, regardless of ++ * whether the guest/host values differ. ++ */ ++ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) || ++ vmx->spec_ctrl != hostval) ++ native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval); ++ ++ barrier_nospec(); ++} ++ + static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) + { + switch (to_vmx(vcpu)->exit_reason.basic) { +@@ -6827,7 +6870,8 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) + } + + static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, +- struct vcpu_vmx *vmx) ++ struct vcpu_vmx *vmx, ++ unsigned long flags) + { + guest_state_enter_irqoff(); + +@@ -6846,7 +6890,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, + native_write_cr2(vcpu->arch.cr2); + + vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, +- vmx->loaded_vmcs->launched); ++ flags); + + vcpu->arch.cr2 = native_read_cr2(); + +@@ -6945,36 +6989,8 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) + + kvm_wait_lapic_expire(vcpu); + +- /* +- * If this vCPU has touched SPEC_CTRL, restore the guest's value if +- * it's non-zero. Since vmentry is serialising on affected CPUs, there +- * is no need to worry about the conditional branch over the wrmsr +- * being speculatively taken. +- */ +- x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); +- + /* The actual VMENTER/EXIT is in the .noinstr.text section. */ +- vmx_vcpu_enter_exit(vcpu, vmx); +- +- /* +- * We do not use IBRS in the kernel. If this vCPU has used the +- * SPEC_CTRL MSR it may have left it on; save the value and +- * turn it off. This is much more efficient than blindly adding +- * it to the atomic save/restore list. Especially as the former +- * (Saving guest MSRs on vmexit) doesn't even exist in KVM. +- * +- * For non-nested case: +- * If the L01 MSR bitmap does not intercept the MSR, then we need to +- * save it. +- * +- * For nested case: +- * If the L02 MSR bitmap does not intercept the MSR, then we need to +- * save it. +- */ +- if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) +- vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); +- +- x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); ++ vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx)); + + /* All fields are clean at this point */ + if (static_branch_unlikely(&enable_evmcs)) { +diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h +index 8d2342ede0c5..1e7f9453894b 100644 +--- a/arch/x86/kvm/vmx/vmx.h ++++ b/arch/x86/kvm/vmx/vmx.h +@@ -8,11 +8,12 @@ + #include <asm/intel_pt.h> + + #include "capabilities.h" +-#include "kvm_cache_regs.h" ++#include "../kvm_cache_regs.h" + #include "posted_intr.h" + #include "vmcs.h" + #include "vmx_ops.h" +-#include "cpuid.h" ++#include "../cpuid.h" ++#include "run_flags.h" + + #define MSR_TYPE_R 1 + #define MSR_TYPE_W 2 +@@ -404,7 +405,10 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); + struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); + void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); + void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); +-bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); ++void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags); ++unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx); ++bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, ++ unsigned int flags); + int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr); + void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu); + +diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h +index 5e7f41225780..5cfc49ddb1b4 100644 +--- a/arch/x86/kvm/vmx/vmx_ops.h ++++ b/arch/x86/kvm/vmx/vmx_ops.h +@@ -8,7 +8,7 @@ + + #include "evmcs.h" + #include "vmcs.h" +-#include "x86.h" ++#include "../x86.h" + + asmlinkage void vmread_error(unsigned long field, bool fault); + __attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field, +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 558d1f2ab5b4..9caa902f0de1 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -12531,9 +12531,9 @@ void kvm_arch_end_assignment(struct kvm *kvm) + } + EXPORT_SYMBOL_GPL(kvm_arch_end_assignment); + +-bool kvm_arch_has_assigned_device(struct kvm *kvm) ++bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm) + { +- return atomic_read(&kvm->arch.assigned_device_count); ++ return arch_atomic_read(&kvm->arch.assigned_device_count); + } + EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device); + +diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S +index d83cba364e31..724bbf83eb5b 100644 +--- a/arch/x86/lib/memmove_64.S ++++ b/arch/x86/lib/memmove_64.S +@@ -39,7 +39,7 @@ SYM_FUNC_START(__memmove) + /* FSRM implies ERMS => no length checks, do the copy directly */ + .Lmemmove_begin_forward: + ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM +- ALTERNATIVE "", __stringify(movq %rdx, %rcx; rep movsb; RET), X86_FEATURE_ERMS ++ ALTERNATIVE "", "jmp .Lmemmove_erms", X86_FEATURE_ERMS + + /* + * movsq instruction have many startup latency +@@ -205,6 +205,11 @@ SYM_FUNC_START(__memmove) + movb %r11b, (%rdi) + 13: + RET ++ ++.Lmemmove_erms: ++ movq %rdx, %rcx ++ rep movsb ++ RET + SYM_FUNC_END(__memmove) + EXPORT_SYMBOL(__memmove) + +diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S +index b2b2366885a2..073289a55f84 100644 +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -33,9 +33,9 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) + UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR + +- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ +- __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \ +- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE ++ ALTERNATIVE_2 __stringify(RETPOLINE \reg), \ ++ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \ ++ __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE) + + .endm + +@@ -67,3 +67,76 @@ SYM_CODE_END(__x86_indirect_thunk_array) + #define GEN(reg) EXPORT_THUNK(reg) + #include <asm/GEN-for-each-reg.h> + #undef GEN ++ ++/* ++ * This function name is magical and is used by -mfunction-return=thunk-extern ++ * for the compiler to generate JMPs to it. ++ */ ++#ifdef CONFIG_RETHUNK ++ ++ .section .text.__x86.return_thunk ++ ++/* ++ * Safety details here pertain to the AMD Zen{1,2} microarchitecture: ++ * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for ++ * alignment within the BTB. ++ * 2) The instruction at zen_untrain_ret must contain, and not ++ * end with, the 0xc3 byte of the RET. ++ * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread ++ * from re-poisioning the BTB prediction. ++ */ ++ .align 64 ++ .skip 63, 0xcc ++SYM_FUNC_START_NOALIGN(zen_untrain_ret); ++ ++ /* ++ * As executed from zen_untrain_ret, this is: ++ * ++ * TEST $0xcc, %bl ++ * LFENCE ++ * JMP __x86_return_thunk ++ * ++ * Executing the TEST instruction has a side effect of evicting any BTB ++ * prediction (potentially attacker controlled) attached to the RET, as ++ * __x86_return_thunk + 1 isn't an instruction boundary at the moment. ++ */ ++ .byte 0xf6 ++ ++ /* ++ * As executed from __x86_return_thunk, this is a plain RET. ++ * ++ * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8. ++ * ++ * We subsequently jump backwards and architecturally execute the RET. ++ * This creates a correct BTB prediction (type=ret), but in the ++ * meantime we suffer Straight Line Speculation (because the type was ++ * no branch) which is halted by the INT3. ++ * ++ * With SMT enabled and STIBP active, a sibling thread cannot poison ++ * RET's prediction to a type of its choice, but can evict the ++ * prediction due to competitive sharing. If the prediction is ++ * evicted, __x86_return_thunk will suffer Straight Line Speculation ++ * which will be contained safely by the INT3. ++ */ ++SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL) ++ ret ++ int3 ++SYM_CODE_END(__x86_return_thunk) ++ ++ /* ++ * Ensure the TEST decoding / BTB invalidation is complete. ++ */ ++ lfence ++ ++ /* ++ * Jump back and execute the RET in the middle of the TEST instruction. ++ * INT3 is for SLS protection. ++ */ ++ jmp __x86_return_thunk ++ int3 ++SYM_FUNC_END(zen_untrain_ret) ++__EXPORT_THUNK(zen_untrain_ret) ++ ++EXPORT_SYMBOL(__x86_return_thunk) ++ ++#endif /* CONFIG_RETHUNK */ +diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S +index 3d1dba05fce4..9de3d900bc92 100644 +--- a/arch/x86/mm/mem_encrypt_boot.S ++++ b/arch/x86/mm/mem_encrypt_boot.S +@@ -65,7 +65,10 @@ SYM_FUNC_START(sme_encrypt_execute) + movq %rbp, %rsp /* Restore original stack pointer */ + pop %rbp + +- RET ++ /* Offset to __x86_return_thunk would be wrong here */ ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_FUNC_END(sme_encrypt_execute) + + SYM_FUNC_START(__enc_copy) +@@ -151,6 +154,9 @@ SYM_FUNC_START(__enc_copy) + pop %r12 + pop %r15 + +- RET ++ /* Offset to __x86_return_thunk would be wrong here */ ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + .L__enc_copy_end: + SYM_FUNC_END(__enc_copy) +diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c +index 4c71fa04e784..2dab2816b3f7 100644 +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -407,16 +407,30 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) + { + u8 *prog = *pprog; + +-#ifdef CONFIG_RETPOLINE + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { + EMIT_LFENCE(); + EMIT2(0xFF, 0xE0 + reg); + } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { + OPTIMIZER_HIDE_VAR(reg); + emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); +- } else +-#endif +- EMIT2(0xFF, 0xE0 + reg); ++ } else { ++ EMIT2(0xFF, 0xE0 + reg); ++ } ++ ++ *pprog = prog; ++} ++ ++static void emit_return(u8 **pprog, u8 *ip) ++{ ++ u8 *prog = *pprog; ++ ++ if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { ++ emit_jump(&prog, &__x86_return_thunk, ip); ++ } else { ++ EMIT1(0xC3); /* ret */ ++ if (IS_ENABLED(CONFIG_SLS)) ++ EMIT1(0xCC); /* int3 */ ++ } + + *pprog = prog; + } +@@ -1681,7 +1695,7 @@ st: if (is_imm8(insn->off)) + ctx->cleanup_addr = proglen; + pop_callee_regs(&prog, callee_regs_used); + EMIT1(0xC9); /* leave */ +- EMIT1(0xC3); /* ret */ ++ emit_return(&prog, image + addrs[i - 1] + (prog - temp)); + break; + + default: +@@ -2158,7 +2172,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i + if (flags & BPF_TRAMP_F_SKIP_FRAME) + /* skip our return address and return to parent */ + EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ +- EMIT1(0xC3); /* ret */ ++ emit_return(&prog, prog); + /* Make sure the trampoline generation logic doesn't overflow */ + if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) { + ret = -EFAULT; +diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c +index 81aa46f770c5..cfa99e8f054b 100644 +--- a/arch/x86/xen/setup.c ++++ b/arch/x86/xen/setup.c +@@ -918,7 +918,7 @@ void xen_enable_sysenter(void) + if (!boot_cpu_has(sysenter_feature)) + return; + +- ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target); ++ ret = register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat); + if(ret != 0) + setup_clear_cpu_cap(sysenter_feature); + } +@@ -927,7 +927,7 @@ void xen_enable_syscall(void) + { + int ret; + +- ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); ++ ret = register_callback(CALLBACKTYPE_syscall, xen_entry_SYSCALL_64); + if (ret != 0) { + printk(KERN_ERR "Failed to set syscall callback: %d\n", ret); + /* Pretty fatal; 64-bit userspace has no other +@@ -936,7 +936,7 @@ void xen_enable_syscall(void) + + if (boot_cpu_has(X86_FEATURE_SYSCALL32)) { + ret = register_callback(CALLBACKTYPE_syscall32, +- xen_syscall32_target); ++ xen_entry_SYSCALL_compat); + if (ret != 0) + setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); + } +diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S +index caa9bc2fa100..6b4fdf6b9542 100644 +--- a/arch/x86/xen/xen-asm.S ++++ b/arch/x86/xen/xen-asm.S +@@ -121,7 +121,7 @@ SYM_FUNC_END(xen_read_cr2_direct); + + .macro xen_pv_trap name + SYM_CODE_START(xen_\name) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + ENDBR + pop %rcx + pop %r11 +@@ -234,8 +234,8 @@ SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode) + */ + + /* Normal 64-bit system call target */ +-SYM_CODE_START(xen_syscall_target) +- UNWIND_HINT_EMPTY ++SYM_CODE_START(xen_entry_SYSCALL_64) ++ UNWIND_HINT_ENTRY + ENDBR + popq %rcx + popq %r11 +@@ -249,13 +249,13 @@ SYM_CODE_START(xen_syscall_target) + movq $__USER_CS, 1*8(%rsp) + + jmp entry_SYSCALL_64_after_hwframe +-SYM_CODE_END(xen_syscall_target) ++SYM_CODE_END(xen_entry_SYSCALL_64) + + #ifdef CONFIG_IA32_EMULATION + + /* 32-bit compat syscall target */ +-SYM_CODE_START(xen_syscall32_target) +- UNWIND_HINT_EMPTY ++SYM_CODE_START(xen_entry_SYSCALL_compat) ++ UNWIND_HINT_ENTRY + ENDBR + popq %rcx + popq %r11 +@@ -269,11 +269,11 @@ SYM_CODE_START(xen_syscall32_target) + movq $__USER32_CS, 1*8(%rsp) + + jmp entry_SYSCALL_compat_after_hwframe +-SYM_CODE_END(xen_syscall32_target) ++SYM_CODE_END(xen_entry_SYSCALL_compat) + + /* 32-bit compat sysenter target */ +-SYM_CODE_START(xen_sysenter_target) +- UNWIND_HINT_EMPTY ++SYM_CODE_START(xen_entry_SYSENTER_compat) ++ UNWIND_HINT_ENTRY + ENDBR + /* + * NB: Xen is polite and clears TF from EFLAGS for us. This means +@@ -291,19 +291,19 @@ SYM_CODE_START(xen_sysenter_target) + movq $__USER32_CS, 1*8(%rsp) + + jmp entry_SYSENTER_compat_after_hwframe +-SYM_CODE_END(xen_sysenter_target) ++SYM_CODE_END(xen_entry_SYSENTER_compat) + + #else /* !CONFIG_IA32_EMULATION */ + +-SYM_CODE_START(xen_syscall32_target) +-SYM_CODE_START(xen_sysenter_target) +- UNWIND_HINT_EMPTY ++SYM_CODE_START(xen_entry_SYSCALL_compat) ++SYM_CODE_START(xen_entry_SYSENTER_compat) ++ UNWIND_HINT_ENTRY + ENDBR + lea 16(%rsp), %rsp /* strip %rcx, %r11 */ + mov $-ENOSYS, %rax + pushq $0 + jmp hypercall_iret +-SYM_CODE_END(xen_sysenter_target) +-SYM_CODE_END(xen_syscall32_target) ++SYM_CODE_END(xen_entry_SYSENTER_compat) ++SYM_CODE_END(xen_entry_SYSCALL_compat) + + #endif /* CONFIG_IA32_EMULATION */ +diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S +index 3a2cd93bf059..fa884fc73e07 100644 +--- a/arch/x86/xen/xen-head.S ++++ b/arch/x86/xen/xen-head.S +@@ -26,6 +26,7 @@ SYM_CODE_START(hypercall_page) + .rept (PAGE_SIZE / 32) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR ++ ANNOTATE_UNRET_SAFE + ret + /* + * Xen will write the hypercall page, and sort out ENDBR. +diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h +index fd0fec6e92f4..9a8bb972193d 100644 +--- a/arch/x86/xen/xen-ops.h ++++ b/arch/x86/xen/xen-ops.h +@@ -10,10 +10,10 @@ + /* These are code, but not functions. Defined in entry.S */ + extern const char xen_failsafe_callback[]; + +-void xen_sysenter_target(void); ++void xen_entry_SYSENTER_compat(void); + #ifdef CONFIG_X86_64 +-void xen_syscall_target(void); +-void xen_syscall32_target(void); ++void xen_entry_SYSCALL_64(void); ++void xen_entry_SYSCALL_compat(void); + #endif + + extern void *xen_initial_gdt; diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index 6aef1ee5e1bd..8f146b1b4972 100644 --- a/drivers/acpi/apei/hest.c @@ -334,6 +3970,39 @@ index cf8c7fd59ada..28a8189be64f 100644 /* wait for engine to stop. This could be as long as 500 msec */ tmp = ata_wait_register(ap, port_mmio + PORT_CMD, PORT_CMD_LIST_ON, PORT_CMD_LIST_ON, 1, 500); +diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c +index a97776ea9d99..4c98849577d4 100644 +--- a/drivers/base/cpu.c ++++ b/drivers/base/cpu.c +@@ -570,6 +570,12 @@ ssize_t __weak cpu_show_mmio_stale_data(struct device *dev, + return sysfs_emit(buf, "Not affected\n"); + } + ++ssize_t __weak cpu_show_retbleed(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ return sysfs_emit(buf, "Not affected\n"); ++} ++ + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); + static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); + static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); +@@ -580,6 +586,7 @@ static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL); + static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL); + static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL); + static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); ++static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); + + static struct attribute *cpu_root_vulnerabilities_attrs[] = { + &dev_attr_meltdown.attr, +@@ -592,6 +599,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { + &dev_attr_itlb_multihit.attr, + &dev_attr_srbds.attr, + &dev_attr_mmio_stale_data.attr, ++ &dev_attr_retbleed.attr, + NULL + }; + diff --git a/drivers/char/ipmi/ipmi_dmi.c b/drivers/char/ipmi/ipmi_dmi.c index bbf7029e224b..cf7faa970dd6 100644 --- a/drivers/char/ipmi/ipmi_dmi.c @@ -1096,6 +4765,129 @@ index 7f416a12000e..68be4afaa58a 100644 amba_driver_unregister(&etm4x_amba_driver); platform_driver_unregister(&etm4_platform_driver); etm4_pm_clear(); +diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c +index c5a019eab5ec..b463d85bfb35 100644 +--- a/drivers/idle/intel_idle.c ++++ b/drivers/idle/intel_idle.c +@@ -47,11 +47,13 @@ + #include <linux/tick.h> + #include <trace/events/power.h> + #include <linux/sched.h> ++#include <linux/sched/smt.h> + #include <linux/notifier.h> + #include <linux/cpu.h> + #include <linux/moduleparam.h> + #include <asm/cpu_device_id.h> + #include <asm/intel-family.h> ++#include <asm/nospec-branch.h> + #include <asm/mwait.h> + #include <asm/msr.h> + +@@ -105,6 +107,12 @@ static unsigned int mwait_substates __initdata; + */ + #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) + ++/* ++ * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE ++ * above. ++ */ ++#define CPUIDLE_FLAG_IBRS BIT(16) ++ + /* + * MWAIT takes an 8-bit "hint" in EAX "suggesting" + * the C-state (top nibble) and sub-state (bottom nibble) +@@ -159,6 +167,24 @@ static __cpuidle int intel_idle_irq(struct cpuidle_device *dev, + return ret; + } + ++static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev, ++ struct cpuidle_driver *drv, int index) ++{ ++ bool smt_active = sched_smt_active(); ++ u64 spec_ctrl = spec_ctrl_current(); ++ int ret; ++ ++ if (smt_active) ++ wrmsrl(MSR_IA32_SPEC_CTRL, 0); ++ ++ ret = __intel_idle(dev, drv, index); ++ ++ if (smt_active) ++ wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl); ++ ++ return ret; ++} ++ + /** + * intel_idle_s2idle - Ask the processor to enter the given idle state. + * @dev: cpuidle device of the target CPU. +@@ -680,7 +706,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { + { + .name = "C6", + .desc = "MWAIT 0x20", +- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, ++ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, + .exit_latency = 85, + .target_residency = 200, + .enter = &intel_idle, +@@ -688,7 +714,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { + { + .name = "C7s", + .desc = "MWAIT 0x33", +- .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, ++ .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, + .exit_latency = 124, + .target_residency = 800, + .enter = &intel_idle, +@@ -696,7 +722,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { + { + .name = "C8", + .desc = "MWAIT 0x40", +- .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, ++ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, + .exit_latency = 200, + .target_residency = 800, + .enter = &intel_idle, +@@ -704,7 +730,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { + { + .name = "C9", + .desc = "MWAIT 0x50", +- .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, ++ .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, + .exit_latency = 480, + .target_residency = 5000, + .enter = &intel_idle, +@@ -712,7 +738,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { + { + .name = "C10", + .desc = "MWAIT 0x60", +- .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, ++ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, + .exit_latency = 890, + .target_residency = 5000, + .enter = &intel_idle, +@@ -741,7 +767,7 @@ static struct cpuidle_state skx_cstates[] __initdata = { + { + .name = "C6", + .desc = "MWAIT 0x20", +- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, ++ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, + .exit_latency = 133, + .target_residency = 600, + .enter = &intel_idle, +@@ -1686,6 +1712,12 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) + if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE) + drv->states[drv->state_count].enter = intel_idle_irq; + ++ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) && ++ cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) { ++ WARN_ON_ONCE(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE); ++ drv->states[drv->state_count].enter = intel_idle_ibrs; ++ } ++ + if ((disabled_states_mask & BIT(drv->state_count)) || + ((icpu->use_acpi || force_use_acpi) && + intel_idle_off_by_default(mwait_hint) && diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c index 258d5fe3d395..f7298e3dc8f3 100644 --- a/drivers/input/rmi4/rmi_driver.c @@ -1517,6 +5309,19 @@ index 1460857026e0..7e1964891089 100644 /* Lock the device, then check to see if we were * disconnected while waiting for the lock to succeed. */ usb_lock_device(hdev); +diff --git a/include/linux/cpu.h b/include/linux/cpu.h +index 2c7477354744..314802f98b9d 100644 +--- a/include/linux/cpu.h ++++ b/include/linux/cpu.h +@@ -68,6 +68,8 @@ extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, + extern ssize_t cpu_show_mmio_stale_data(struct device *dev, + struct device_attribute *attr, + char *buf); ++extern ssize_t cpu_show_retbleed(struct device *dev, ++ struct device_attribute *attr, char *buf); + + extern __printf(4, 5) + struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/include/linux/efi.h b/include/linux/efi.h index cc6d2be2ffd5..418d814d2eb7 100644 --- a/include/linux/efi.h @@ -1601,6 +5406,19 @@ index cc6d2be2ffd5..418d814d2eb7 100644 static inline enum efi_secureboot_mode efi_get_secureboot_mode(efi_get_variable_t *get_var) { +diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h +index 34eed5f85ed6..88d94cf515e1 100644 +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -1511,7 +1511,7 @@ static inline void kvm_arch_end_assignment(struct kvm *kvm) + { + } + +-static inline bool kvm_arch_has_assigned_device(struct kvm *kvm) ++static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm) + { + return false; + } diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index db924fe379c9..1169d78af2de 100644 --- a/include/linux/lsm_hook_defs.h @@ -1631,6 +5449,45 @@ index 419b5febc3ca..491323dfe4e0 100644 * Security hooks for perf events * * @perf_event_open: +diff --git a/include/linux/objtool.h b/include/linux/objtool.h +index c81ea2264ad8..376110ead758 100644 +--- a/include/linux/objtool.h ++++ b/include/linux/objtool.h +@@ -32,11 +32,16 @@ struct unwind_hint { + * + * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function. + * Useful for code which doesn't have an ELF function annotation. ++ * ++ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc. + */ + #define UNWIND_HINT_TYPE_CALL 0 + #define UNWIND_HINT_TYPE_REGS 1 + #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 + #define UNWIND_HINT_TYPE_FUNC 3 ++#define UNWIND_HINT_TYPE_ENTRY 4 ++#define UNWIND_HINT_TYPE_SAVE 5 ++#define UNWIND_HINT_TYPE_RESTORE 6 + + #ifdef CONFIG_STACK_VALIDATION + +@@ -122,7 +127,7 @@ struct unwind_hint { + * the debuginfo as necessary. It will also warn if it sees any + * inconsistencies. + */ +-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 ++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 + .Lunwind_hint_ip_\@: + .pushsection .discard.unwind_hints + /* struct unwind_hint */ +@@ -175,7 +180,7 @@ struct unwind_hint { + #define ASM_REACHABLE + #else + #define ANNOTATE_INTRA_FUNCTION_CALL +-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 ++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 + .endm + .macro STACK_FRAME_NON_STANDARD func:req + .endm diff --git a/include/linux/rmi.h b/include/linux/rmi.h index ab7eea01ab42..fff7c5f737fc 100644 --- a/include/linux/rmi.h @@ -1700,32 +5557,32 @@ index 8723ae70ea1f..fb2d773498c2 100644 + } + return ret; } -diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c -index 81243c834abb..a136148627e7 100644 ---- a/net/netfilter/nf_tables_api.c -+++ b/net/netfilter/nf_tables_api.c -@@ -5213,13 +5213,20 @@ static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set, - struct nft_data *data, - struct nlattr *attr) - { -+ u32 dtype; - int err; - - err = nft_data_init(ctx, data, NFT_DATA_VALUE_MAXLEN, desc, attr); - if (err < 0) - return err; - -- if (desc->type != NFT_DATA_VERDICT && desc->len != set->dlen) { -+ if (set->dtype == NFT_DATA_VERDICT) -+ dtype = NFT_DATA_VERDICT; -+ else -+ dtype = NFT_DATA_VALUE; -+ -+ if (dtype != desc->type || -+ set->dlen != desc->len) { - nft_data_release(data, desc->type); - return -EINVAL; - } +diff --git a/scripts/Makefile.build b/scripts/Makefile.build +index 33c1ed581522..2a0521f77e5f 100644 +--- a/scripts/Makefile.build ++++ b/scripts/Makefile.build +@@ -233,6 +233,7 @@ objtool_args = \ + $(if $(CONFIG_FRAME_POINTER),, --no-fp) \ + $(if $(CONFIG_GCOV_KERNEL), --no-unreachable) \ + $(if $(CONFIG_RETPOLINE), --retpoline) \ ++ $(if $(CONFIG_RETHUNK), --rethunk) \ + $(if $(CONFIG_X86_SMAP), --uaccess) \ + $(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount) \ + $(if $(CONFIG_SLS), --sls) +diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh +index 9361a1ef02c9..d4d028595fb4 100755 +--- a/scripts/link-vmlinux.sh ++++ b/scripts/link-vmlinux.sh +@@ -130,6 +130,9 @@ objtool_link() + + if is_enabled CONFIG_VMLINUX_VALIDATION; then + objtoolopt="${objtoolopt} --noinstr" ++ if is_enabled CONFIG_CPU_UNRET_ENTRY; then ++ objtoolopt="${objtoolopt} --unret" ++ fi + fi + + if [ -n "${objtoolopt}" ]; then diff --git a/scripts/tags.sh b/scripts/tags.sh index 16d475b3e203..4e333f14b84e 100755 --- a/scripts/tags.sh @@ -1739,6 +5596,28 @@ index 16d475b3e203..4e333f14b84e 100755 # Use make KBUILD_ABS_SRCTREE=1 {tags|cscope} # to force full paths for a non-O= build +diff --git a/security/Kconfig b/security/Kconfig +index 9b2c4925585a..34e2d7edd085 100644 +--- a/security/Kconfig ++++ b/security/Kconfig +@@ -54,17 +54,6 @@ config SECURITY_NETWORK + implement socket and networking access controls. + If you are unsure how to answer this question, answer N. + +-config PAGE_TABLE_ISOLATION +- bool "Remove the kernel mapping in user mode" +- default y +- depends on (X86_64 || X86_PAE) && !UML +- help +- This feature reduces the number of hardware side channels by +- ensuring that the majority of kernel addresses are not mapped +- into userspace. +- +- See Documentation/x86/pti.rst for more details. +- + config SECURITY_INFINIBAND + bool "Infiniband Security Hooks" + depends on SECURITY && INFINIBAND diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c index 093894a640dc..1c43a9462b4b 100644 --- a/security/integrity/platform_certs/load_uefi.c @@ -1816,3 +5695,661 @@ index aaf6566deb9f..86926966c15d 100644 #ifdef CONFIG_PERF_EVENTS int security_perf_event_open(struct perf_event_attr *attr, int type) { +diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h +index 4425d6773183..8a0a53cf360d 100644 +--- a/tools/arch/x86/include/asm/msr-index.h ++++ b/tools/arch/x86/include/asm/msr-index.h +@@ -51,6 +51,8 @@ + #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ + #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ + #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ ++#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ ++#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) + + #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ + #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ +@@ -138,6 +140,13 @@ + * bit available to control VERW + * behavior. + */ ++#define ARCH_CAP_RRSBA BIT(19) /* ++ * Indicates RET may use predictors ++ * other than the RSB. With eIBRS ++ * enabled predictions in kernel mode ++ * are restricted to targets in ++ * kernel. ++ */ + + #define MSR_IA32_FLUSH_CMD 0x0000010b + #define L1D_FLUSH BIT(0) /* +diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h +index c81ea2264ad8..376110ead758 100644 +--- a/tools/include/linux/objtool.h ++++ b/tools/include/linux/objtool.h +@@ -32,11 +32,16 @@ struct unwind_hint { + * + * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function. + * Useful for code which doesn't have an ELF function annotation. ++ * ++ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc. + */ + #define UNWIND_HINT_TYPE_CALL 0 + #define UNWIND_HINT_TYPE_REGS 1 + #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 + #define UNWIND_HINT_TYPE_FUNC 3 ++#define UNWIND_HINT_TYPE_ENTRY 4 ++#define UNWIND_HINT_TYPE_SAVE 5 ++#define UNWIND_HINT_TYPE_RESTORE 6 + + #ifdef CONFIG_STACK_VALIDATION + +@@ -122,7 +127,7 @@ struct unwind_hint { + * the debuginfo as necessary. It will also warn if it sees any + * inconsistencies. + */ +-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 ++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 + .Lunwind_hint_ip_\@: + .pushsection .discard.unwind_hints + /* struct unwind_hint */ +@@ -175,7 +180,7 @@ struct unwind_hint { + #define ASM_REACHABLE + #else + #define ANNOTATE_INTRA_FUNCTION_CALL +-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 ++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 + .endm + .macro STACK_FRAME_NON_STANDARD func:req + .endm +diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c +index 943cb41cddf7..1ecf50bbd554 100644 +--- a/tools/objtool/arch/x86/decode.c ++++ b/tools/objtool/arch/x86/decode.c +@@ -787,3 +787,8 @@ bool arch_is_retpoline(struct symbol *sym) + { + return !strncmp(sym->name, "__x86_indirect_", 15); + } ++ ++bool arch_is_rethunk(struct symbol *sym) ++{ ++ return !strcmp(sym->name, "__x86_return_thunk"); ++} +diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c +index fc6975ab8b06..cd4bbc98f8c1 100644 +--- a/tools/objtool/builtin-check.c ++++ b/tools/objtool/builtin-check.c +@@ -21,7 +21,7 @@ + + bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, + lto, vmlinux, mcount, noinstr, backup, sls, dryrun, +- ibt; ++ ibt, unret, rethunk; + + static const char * const check_usage[] = { + "objtool check [<options>] file.o", +@@ -37,6 +37,8 @@ const struct option check_options[] = { + OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"), + OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"), + OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"), ++ OPT_BOOLEAN(0, "rethunk", &rethunk, "validate and annotate rethunk usage"), ++ OPT_BOOLEAN(0, "unret", &unret, "validate entry unret placement"), + OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"), + OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"), + OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"), +diff --git a/tools/objtool/check.c b/tools/objtool/check.c +index f66e4ac0af94..57b7a68d3b66 100644 +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -374,7 +374,8 @@ static int decode_instructions(struct objtool_file *file) + sec->text = true; + + if (!strcmp(sec->name, ".noinstr.text") || +- !strcmp(sec->name, ".entry.text")) ++ !strcmp(sec->name, ".entry.text") || ++ !strncmp(sec->name, ".text.__x86.", 12)) + sec->noinstr = true; + + for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) { +@@ -747,6 +748,52 @@ static int create_retpoline_sites_sections(struct objtool_file *file) + return 0; + } + ++static int create_return_sites_sections(struct objtool_file *file) ++{ ++ struct instruction *insn; ++ struct section *sec; ++ int idx; ++ ++ sec = find_section_by_name(file->elf, ".return_sites"); ++ if (sec) { ++ WARN("file already has .return_sites, skipping"); ++ return 0; ++ } ++ ++ idx = 0; ++ list_for_each_entry(insn, &file->return_thunk_list, call_node) ++ idx++; ++ ++ if (!idx) ++ return 0; ++ ++ sec = elf_create_section(file->elf, ".return_sites", 0, ++ sizeof(int), idx); ++ if (!sec) { ++ WARN("elf_create_section: .return_sites"); ++ return -1; ++ } ++ ++ idx = 0; ++ list_for_each_entry(insn, &file->return_thunk_list, call_node) { ++ ++ int *site = (int *)sec->data->d_buf + idx; ++ *site = 0; ++ ++ if (elf_add_reloc_to_insn(file->elf, sec, ++ idx * sizeof(int), ++ R_X86_64_PC32, ++ insn->sec, insn->offset)) { ++ WARN("elf_add_reloc_to_insn: .return_sites"); ++ return -1; ++ } ++ ++ idx++; ++ } ++ ++ return 0; ++} ++ + static int create_ibt_endbr_seal_sections(struct objtool_file *file) + { + struct instruction *insn; +@@ -1081,6 +1128,11 @@ __weak bool arch_is_retpoline(struct symbol *sym) + return false; + } + ++__weak bool arch_is_rethunk(struct symbol *sym) ++{ ++ return false; ++} ++ + #define NEGATIVE_RELOC ((void *)-1L) + + static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn) +@@ -1248,6 +1300,20 @@ static void add_retpoline_call(struct objtool_file *file, struct instruction *in + annotate_call_site(file, insn, false); + } + ++static void add_return_call(struct objtool_file *file, struct instruction *insn, bool add) ++{ ++ /* ++ * Return thunk tail calls are really just returns in disguise, ++ * so convert them accordingly. ++ */ ++ insn->type = INSN_RETURN; ++ insn->retpoline_safe = true; ++ ++ /* Skip the non-text sections, specially .discard ones */ ++ if (add && insn->sec->text) ++ list_add_tail(&insn->call_node, &file->return_thunk_list); ++} ++ + static bool same_function(struct instruction *insn1, struct instruction *insn2) + { + return insn1->func->pfunc == insn2->func->pfunc; +@@ -1300,6 +1366,9 @@ static int add_jump_destinations(struct objtool_file *file) + } else if (reloc->sym->retpoline_thunk) { + add_retpoline_call(file, insn); + continue; ++ } else if (reloc->sym->return_thunk) { ++ add_return_call(file, insn, true); ++ continue; + } else if (insn->func) { + /* + * External sibling call or internal sibling call with +@@ -1318,6 +1387,21 @@ static int add_jump_destinations(struct objtool_file *file) + + jump_dest = find_insn(file, dest_sec, dest_off); + if (!jump_dest) { ++ struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off); ++ ++ /* ++ * This is a special case for zen_untrain_ret(). ++ * It jumps to __x86_return_thunk(), but objtool ++ * can't find the thunk's starting RET ++ * instruction, because the RET is also in the ++ * middle of another instruction. Objtool only ++ * knows about the outer instruction. ++ */ ++ if (sym && sym->return_thunk) { ++ add_return_call(file, insn, false); ++ continue; ++ } ++ + WARN_FUNC("can't find jump dest instruction at %s+0x%lx", + insn->sec, insn->offset, dest_sec->name, + dest_off); +@@ -1947,16 +2031,35 @@ static int read_unwind_hints(struct objtool_file *file) + + insn->hint = true; + +- if (ibt && hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) { ++ if (hint->type == UNWIND_HINT_TYPE_SAVE) { ++ insn->hint = false; ++ insn->save = true; ++ continue; ++ } ++ ++ if (hint->type == UNWIND_HINT_TYPE_RESTORE) { ++ insn->restore = true; ++ continue; ++ } ++ ++ if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) { + struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset); + +- if (sym && sym->bind == STB_GLOBAL && +- insn->type != INSN_ENDBR && !insn->noendbr) { +- WARN_FUNC("UNWIND_HINT_IRET_REGS without ENDBR", +- insn->sec, insn->offset); ++ if (sym && sym->bind == STB_GLOBAL) { ++ if (ibt && insn->type != INSN_ENDBR && !insn->noendbr) { ++ WARN_FUNC("UNWIND_HINT_IRET_REGS without ENDBR", ++ insn->sec, insn->offset); ++ } ++ ++ insn->entry = 1; + } + } + ++ if (hint->type == UNWIND_HINT_TYPE_ENTRY) { ++ hint->type = UNWIND_HINT_TYPE_CALL; ++ insn->entry = 1; ++ } ++ + if (hint->type == UNWIND_HINT_TYPE_FUNC) { + insn->cfi = &func_cfi; + continue; +@@ -2030,8 +2133,10 @@ static int read_retpoline_hints(struct objtool_file *file) + } + + if (insn->type != INSN_JUMP_DYNAMIC && +- insn->type != INSN_CALL_DYNAMIC) { +- WARN_FUNC("retpoline_safe hint not an indirect jump/call", ++ insn->type != INSN_CALL_DYNAMIC && ++ insn->type != INSN_RETURN && ++ insn->type != INSN_NOP) { ++ WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret/nop", + insn->sec, insn->offset); + return -1; + } +@@ -2182,6 +2287,9 @@ static int classify_symbols(struct objtool_file *file) + if (arch_is_retpoline(func)) + func->retpoline_thunk = true; + ++ if (arch_is_rethunk(func)) ++ func->return_thunk = true; ++ + if (!strcmp(func->name, "__fentry__")) + func->fentry = true; + +@@ -3324,8 +3432,8 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, + return 1; + } + +- visited = 1 << state.uaccess; +- if (insn->visited) { ++ visited = VISITED_BRANCH << state.uaccess; ++ if (insn->visited & VISITED_BRANCH_MASK) { + if (!insn->hint && !insn_cfi_match(insn, &state.cfi)) + return 1; + +@@ -3339,6 +3447,35 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, + state.instr += insn->instr; + + if (insn->hint) { ++ if (insn->restore) { ++ struct instruction *save_insn, *i; ++ ++ i = insn; ++ save_insn = NULL; ++ ++ sym_for_each_insn_continue_reverse(file, func, i) { ++ if (i->save) { ++ save_insn = i; ++ break; ++ } ++ } ++ ++ if (!save_insn) { ++ WARN_FUNC("no corresponding CFI save for CFI restore", ++ sec, insn->offset); ++ return 1; ++ } ++ ++ if (!save_insn->visited) { ++ WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo", ++ sec, insn->offset); ++ return 1; ++ } ++ ++ insn->cfi = save_insn->cfi; ++ nr_cfi_reused++; ++ } ++ + state.cfi = *insn->cfi; + } else { + /* XXX track if we actually changed state.cfi */ +@@ -3554,6 +3691,145 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec) + return warnings; + } + ++/* ++ * Validate rethunk entry constraint: must untrain RET before the first RET. ++ * ++ * Follow every branch (intra-function) and ensure ANNOTATE_UNRET_END comes ++ * before an actual RET instruction. ++ */ ++static int validate_entry(struct objtool_file *file, struct instruction *insn) ++{ ++ struct instruction *next, *dest; ++ int ret, warnings = 0; ++ ++ for (;;) { ++ next = next_insn_to_validate(file, insn); ++ ++ if (insn->visited & VISITED_ENTRY) ++ return 0; ++ ++ insn->visited |= VISITED_ENTRY; ++ ++ if (!insn->ignore_alts && !list_empty(&insn->alts)) { ++ struct alternative *alt; ++ bool skip_orig = false; ++ ++ list_for_each_entry(alt, &insn->alts, list) { ++ if (alt->skip_orig) ++ skip_orig = true; ++ ++ ret = validate_entry(file, alt->insn); ++ if (ret) { ++ if (backtrace) ++ BT_FUNC("(alt)", insn); ++ return ret; ++ } ++ } ++ ++ if (skip_orig) ++ return 0; ++ } ++ ++ switch (insn->type) { ++ ++ case INSN_CALL_DYNAMIC: ++ case INSN_JUMP_DYNAMIC: ++ case INSN_JUMP_DYNAMIC_CONDITIONAL: ++ WARN_FUNC("early indirect call", insn->sec, insn->offset); ++ return 1; ++ ++ case INSN_JUMP_UNCONDITIONAL: ++ case INSN_JUMP_CONDITIONAL: ++ if (!is_sibling_call(insn)) { ++ if (!insn->jump_dest) { ++ WARN_FUNC("unresolved jump target after linking?!?", ++ insn->sec, insn->offset); ++ return -1; ++ } ++ ret = validate_entry(file, insn->jump_dest); ++ if (ret) { ++ if (backtrace) { ++ BT_FUNC("(branch%s)", insn, ++ insn->type == INSN_JUMP_CONDITIONAL ? "-cond" : ""); ++ } ++ return ret; ++ } ++ ++ if (insn->type == INSN_JUMP_UNCONDITIONAL) ++ return 0; ++ ++ break; ++ } ++ ++ /* fallthrough */ ++ case INSN_CALL: ++ dest = find_insn(file, insn->call_dest->sec, ++ insn->call_dest->offset); ++ if (!dest) { ++ WARN("Unresolved function after linking!?: %s", ++ insn->call_dest->name); ++ return -1; ++ } ++ ++ ret = validate_entry(file, dest); ++ if (ret) { ++ if (backtrace) ++ BT_FUNC("(call)", insn); ++ return ret; ++ } ++ /* ++ * If a call returns without error, it must have seen UNTRAIN_RET. ++ * Therefore any non-error return is a success. ++ */ ++ return 0; ++ ++ case INSN_RETURN: ++ WARN_FUNC("RET before UNTRAIN", insn->sec, insn->offset); ++ return 1; ++ ++ case INSN_NOP: ++ if (insn->retpoline_safe) ++ return 0; ++ break; ++ ++ default: ++ break; ++ } ++ ++ if (!next) { ++ WARN_FUNC("teh end!", insn->sec, insn->offset); ++ return -1; ++ } ++ insn = next; ++ } ++ ++ return warnings; ++} ++ ++/* ++ * Validate that all branches starting at 'insn->entry' encounter UNRET_END ++ * before RET. ++ */ ++static int validate_unret(struct objtool_file *file) ++{ ++ struct instruction *insn; ++ int ret, warnings = 0; ++ ++ for_each_insn(file, insn) { ++ if (!insn->entry) ++ continue; ++ ++ ret = validate_entry(file, insn); ++ if (ret < 0) { ++ WARN_FUNC("Failed UNRET validation", insn->sec, insn->offset); ++ return ret; ++ } ++ warnings += ret; ++ } ++ ++ return warnings; ++} ++ + static int validate_retpoline(struct objtool_file *file) + { + struct instruction *insn; +@@ -3561,7 +3837,8 @@ static int validate_retpoline(struct objtool_file *file) + + for_each_insn(file, insn) { + if (insn->type != INSN_JUMP_DYNAMIC && +- insn->type != INSN_CALL_DYNAMIC) ++ insn->type != INSN_CALL_DYNAMIC && ++ insn->type != INSN_RETURN) + continue; + + if (insn->retpoline_safe) +@@ -3576,9 +3853,17 @@ static int validate_retpoline(struct objtool_file *file) + if (!strcmp(insn->sec->name, ".init.text") && !module) + continue; + +- WARN_FUNC("indirect %s found in RETPOLINE build", +- insn->sec, insn->offset, +- insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call"); ++ if (insn->type == INSN_RETURN) { ++ if (rethunk) { ++ WARN_FUNC("'naked' return found in RETHUNK build", ++ insn->sec, insn->offset); ++ } else ++ continue; ++ } else { ++ WARN_FUNC("indirect %s found in RETPOLINE build", ++ insn->sec, insn->offset, ++ insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call"); ++ } + + warnings++; + } +@@ -3911,6 +4196,17 @@ int check(struct objtool_file *file) + goto out; + warnings += ret; + ++ if (unret) { ++ /* ++ * Must be after validate_branch() and friends, it plays ++ * further games with insn->visited. ++ */ ++ ret = validate_unret(file); ++ if (ret < 0) ++ return ret; ++ warnings += ret; ++ } ++ + if (ibt) { + ret = validate_ibt(file); + if (ret < 0) +@@ -3937,6 +4233,13 @@ int check(struct objtool_file *file) + warnings += ret; + } + ++ if (rethunk) { ++ ret = create_return_sites_sections(file); ++ if (ret < 0) ++ goto out; ++ warnings += ret; ++ } ++ + if (mcount) { + ret = create_mcount_loc_sections(file); + if (ret < 0) +diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h +index 9b19cc304195..beb2f3aa94ff 100644 +--- a/tools/objtool/include/objtool/arch.h ++++ b/tools/objtool/include/objtool/arch.h +@@ -89,6 +89,7 @@ const char *arch_ret_insn(int len); + int arch_decode_hint_reg(u8 sp_reg, int *base); + + bool arch_is_retpoline(struct symbol *sym); ++bool arch_is_rethunk(struct symbol *sym); + + int arch_rewrite_retpolines(struct objtool_file *file); + +diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h +index c39dbfaef6dc..b6bb605faf3f 100644 +--- a/tools/objtool/include/objtool/builtin.h ++++ b/tools/objtool/include/objtool/builtin.h +@@ -10,7 +10,7 @@ + extern const struct option check_options[]; + extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, + lto, vmlinux, mcount, noinstr, backup, sls, dryrun, +- ibt; ++ ibt, unret, rethunk; + + extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]); + +diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h +index f10d7374f388..036129cebeee 100644 +--- a/tools/objtool/include/objtool/check.h ++++ b/tools/objtool/include/objtool/check.h +@@ -46,16 +46,19 @@ struct instruction { + enum insn_type type; + unsigned long immediate; + +- u8 dead_end : 1, +- ignore : 1, +- ignore_alts : 1, +- hint : 1, +- retpoline_safe : 1, +- noendbr : 1; +- /* 2 bit hole */ ++ u16 dead_end : 1, ++ ignore : 1, ++ ignore_alts : 1, ++ hint : 1, ++ save : 1, ++ restore : 1, ++ retpoline_safe : 1, ++ noendbr : 1, ++ entry : 1; ++ /* 7 bit hole */ ++ + s8 instr; + u8 visited; +- /* u8 hole */ + + struct alt_group *alt_group; + struct symbol *call_dest; +@@ -69,6 +72,11 @@ struct instruction { + struct cfi_state *cfi; + }; + ++#define VISITED_BRANCH 0x01 ++#define VISITED_BRANCH_UACCESS 0x02 ++#define VISITED_BRANCH_MASK 0x03 ++#define VISITED_ENTRY 0x04 ++ + static inline bool is_static_jump(struct instruction *insn) + { + return insn->type == INSN_JUMP_CONDITIONAL || +diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h +index 82e57eb4b4c5..94a618e2a79e 100644 +--- a/tools/objtool/include/objtool/elf.h ++++ b/tools/objtool/include/objtool/elf.h +@@ -57,6 +57,7 @@ struct symbol { + u8 uaccess_safe : 1; + u8 static_call_tramp : 1; + u8 retpoline_thunk : 1; ++ u8 return_thunk : 1; + u8 fentry : 1; + u8 profiling_func : 1; + struct list_head pv_target; +diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h +index a6e72d916807..7f2d1b095333 100644 +--- a/tools/objtool/include/objtool/objtool.h ++++ b/tools/objtool/include/objtool/objtool.h +@@ -24,6 +24,7 @@ struct objtool_file { + struct list_head insn_list; + DECLARE_HASHTABLE(insn_hash, 20); + struct list_head retpoline_call_list; ++ struct list_head return_thunk_list; + struct list_head static_call_list; + struct list_head mcount_loc_list; + struct list_head endbr_list; +diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c +index 843ff3c2f28e..983687345d35 100644 +--- a/tools/objtool/objtool.c ++++ b/tools/objtool/objtool.c +@@ -126,6 +126,7 @@ struct objtool_file *objtool_open_read(const char *_objname) + INIT_LIST_HEAD(&file.insn_list); + hash_init(file.insn_hash); + INIT_LIST_HEAD(&file.retpoline_call_list); ++ INIT_LIST_HEAD(&file.return_thunk_list); + INIT_LIST_HEAD(&file.static_call_list); + INIT_LIST_HEAD(&file.mcount_loc_list); + INIT_LIST_HEAD(&file.endbr_list); |