summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorAnton Arapov <anton@redhat.com>2012-08-07 11:21:50 +0200
committerAnton Arapov <anton@redhat.com>2012-08-07 12:52:25 +0200
commit1d44b6f3fcf6058fb7c960b7558766967e8028f7 (patch)
tree53d88547c973ba048d233091a3f91f3173ad01df /arch/x86
parentd91eda5d7b0383e6a0c83e0146ff141ff3b1355b (diff)
downloadkernel-uprobes-1d44b6f3fcf6058fb7c960b7558766967e8028f7.tar.gz
kernel-uprobes-1d44b6f3fcf6058fb7c960b7558766967e8028f7.tar.xz
kernel-uprobes-1d44b6f3fcf6058fb7c960b7558766967e8028f7.zip
fedora kernel: 222b075b3ff0d9e88aa9353e3c80667756ed7361v3.5.0-4
Signed-off-by: Anton Arapov <anton@redhat.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kbuild2
-rw-r--r--arch/x86/Kconfig96
-rw-r--r--arch/x86/Makefile6
-rw-r--r--arch/x86/boot/compressed/eboot.c101
-rw-r--r--arch/x86/boot/compressed/eboot.h6
-rw-r--r--arch/x86/boot/header.S62
-rw-r--r--arch/x86/boot/main.c18
-rw-r--r--arch/x86/boot/tools/build.c154
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c801
-rw-r--r--arch/x86/ia32/ia32_signal.c20
-rw-r--r--arch/x86/ia32/ia32entry.S9
-rw-r--r--arch/x86/ia32/sys_ia32.c27
-rw-r--r--arch/x86/include/asm/acpi.h9
-rw-r--r--arch/x86/include/asm/apic.h23
-rw-r--r--arch/x86/include/asm/apicdef.h2
-rw-r--r--arch/x86/include/asm/asm.h38
-rw-r--r--arch/x86/include/asm/atomic64_32.h10
-rw-r--r--arch/x86/include/asm/bitops.h2
-rw-r--r--arch/x86/include/asm/boot.h2
-rw-r--r--arch/x86/include/asm/bootparam.h3
-rw-r--r--arch/x86/include/asm/compat.h2
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/current.h2
-rw-r--r--arch/x86/include/asm/desc.h1
-rw-r--r--arch/x86/include/asm/device.h4
-rw-r--r--arch/x86/include/asm/dma-contiguous.h13
-rw-r--r--arch/x86/include/asm/dma-mapping.h14
-rw-r--r--arch/x86/include/asm/fpu-internal.h6
-rw-r--r--arch/x86/include/asm/ftrace.h3
-rw-r--r--arch/x86/include/asm/gpio.h57
-rw-r--r--arch/x86/include/asm/hardirq.h9
-rw-r--r--arch/x86/include/asm/ia32.h6
-rw-r--r--arch/x86/include/asm/io_apic.h35
-rw-r--r--arch/x86/include/asm/irq_regs.h4
-rw-r--r--arch/x86/include/asm/irq_remapping.h118
-rw-r--r--arch/x86/include/asm/kbdleds.h17
-rw-r--r--arch/x86/include/asm/kdebug.h1
-rw-r--r--arch/x86/include/asm/kvm_emulate.h4
-rw-r--r--arch/x86/include/asm/kvm_host.h18
-rw-r--r--arch/x86/include/asm/kvm_para.h24
-rw-r--r--arch/x86/include/asm/mca.h43
-rw-r--r--arch/x86/include/asm/mca_dma.h201
-rw-r--r--arch/x86/include/asm/mmu_context.h12
-rw-r--r--arch/x86/include/asm/mmzone_32.h6
-rw-r--r--arch/x86/include/asm/mpspec.h2
-rw-r--r--arch/x86/include/asm/mpspec_def.h3
-rw-r--r--arch/x86/include/asm/msr-index.h5
-rw-r--r--arch/x86/include/asm/msr.h9
-rw-r--r--arch/x86/include/asm/nmi.h36
-rw-r--r--arch/x86/include/asm/nops.h4
-rw-r--r--arch/x86/include/asm/page_32_types.h4
-rw-r--r--arch/x86/include/asm/page_64_types.h4
-rw-r--r--arch/x86/include/asm/paravirt.h6
-rw-r--r--arch/x86/include/asm/percpu.h24
-rw-r--r--arch/x86/include/asm/perf_event.h12
-rw-r--r--arch/x86/include/asm/posix_types_32.h3
-rw-r--r--arch/x86/include/asm/processor.h12
-rw-r--r--arch/x86/include/asm/pvclock-abi.h1
-rw-r--r--arch/x86/include/asm/realmode.h62
-rw-r--r--arch/x86/include/asm/segment.h4
-rw-r--r--arch/x86/include/asm/sighandling.h2
-rw-r--r--arch/x86/include/asm/smp.h15
-rw-r--r--arch/x86/include/asm/spinlock.h2
-rw-r--r--arch/x86/include/asm/sta2x11.h12
-rw-r--r--arch/x86/include/asm/stackprotector.h4
-rw-r--r--arch/x86/include/asm/stat.h21
-rw-r--r--arch/x86/include/asm/syscall.h27
-rw-r--r--arch/x86/include/asm/thread_info.h41
-rw-r--r--arch/x86/include/asm/tlbflush.h10
-rw-r--r--arch/x86/include/asm/topology.h38
-rw-r--r--arch/x86/include/asm/trampoline.h39
-rw-r--r--arch/x86/include/asm/uaccess.h41
-rw-r--r--arch/x86/include/asm/uaccess_32.h17
-rw-r--r--arch/x86/include/asm/uaccess_64.h3
-rw-r--r--arch/x86/include/asm/uprobes.h2
-rw-r--r--arch/x86/include/asm/word-at-a-time.h34
-rw-r--r--arch/x86/include/asm/x86_init.h9
-rw-r--r--arch/x86/include/asm/xen/events.h1
-rw-r--r--arch/x86/include/asm/xen/page.h1
-rw-r--r--arch/x86/include/asm/xor_32.h6
-rw-r--r--arch/x86/include/asm/xor_64.h8
-rw-r--r--arch/x86/include/asm/xor_avx.h214
-rw-r--r--arch/x86/include/asm/xsave.h10
-rw-r--r--arch/x86/kernel/Makefile5
-rw-r--r--arch/x86/kernel/acpi/Makefile9
-rw-r--r--arch/x86/kernel/acpi/boot.c29
-rw-r--r--arch/x86/kernel/acpi/realmode/Makefile59
-rw-r--r--arch/x86/kernel/acpi/realmode/bioscall.S1
-rw-r--r--arch/x86/kernel/acpi/realmode/copy.S1
-rw-r--r--arch/x86/kernel/acpi/realmode/regs.c1
-rw-r--r--arch/x86/kernel/acpi/realmode/video-bios.c1
-rw-r--r--arch/x86/kernel/acpi/realmode/video-mode.c1
-rw-r--r--arch/x86/kernel/acpi/realmode/video-vesa.c1
-rw-r--r--arch/x86/kernel/acpi/realmode/video-vga.c1
-rw-r--r--arch/x86/kernel/acpi/realmode/wakeup.lds.S62
-rw-r--r--arch/x86/kernel/acpi/sleep.c33
-rw-r--r--arch/x86/kernel/acpi/sleep.h2
-rw-r--r--arch/x86/kernel/acpi/wakeup_rm.S12
-rw-r--r--arch/x86/kernel/aperture_64.c6
-rw-r--r--arch/x86/kernel/apic/apic.c42
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c2
-rw-r--r--arch/x86/kernel/apic/apic_noop.c1
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c1
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c1
-rw-r--r--arch/x86/kernel/apic/es7000_32.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c404
-rw-r--r--arch/x86/kernel/apic/numaq_32.c1
-rw-r--r--arch/x86/kernel/apic/probe_32.c1
-rw-r--r--arch/x86/kernel/apic/summit_32.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c1
-rw-r--r--arch/x86/kernel/apm_32.c2
-rw-r--r--arch/x86/kernel/check.c20
-rw-r--r--arch/x86/kernel/cpu/common.c10
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c4
-rw-r--r--arch/x86/kernel/cpu/match.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-apei.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c10
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c94
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c9
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.pl25
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c18
-rw-r--r--arch/x86/kernel/cpu/perf_event.h2
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c11
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c570
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c149
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c15
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c6
-rw-r--r--arch/x86/kernel/cpu/scattered.c2
-rw-r--r--arch/x86/kernel/dumpstack.c23
-rw-r--r--arch/x86/kernel/dumpstack_32.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c2
-rw-r--r--arch/x86/kernel/e820.c53
-rw-r--r--arch/x86/kernel/entry_32.S60
-rw-r--r--arch/x86/kernel/entry_64.S60
-rw-r--r--arch/x86/kernel/ftrace.c594
-rw-r--r--arch/x86/kernel/head32.c1
-rw-r--r--arch/x86/kernel/head64.c1
-rw-r--r--arch/x86/kernel/head_32.S228
-rw-r--r--arch/x86/kernel/head_64.S84
-rw-r--r--arch/x86/kernel/hpet.c66
-rw-r--r--arch/x86/kernel/i387.c2
-rw-r--r--arch/x86/kernel/init_task.c42
-rw-r--r--arch/x86/kernel/irq_32.c8
-rw-r--r--arch/x86/kernel/kgdb.c8
-rw-r--r--arch/x86/kernel/kprobes.c4
-rw-r--r--arch/x86/kernel/kvmclock.c15
-rw-r--r--arch/x86/kernel/mca_32.c476
-rw-r--r--arch/x86/kernel/microcode_core.c31
-rw-r--r--arch/x86/kernel/mpparse.c22
-rw-r--r--arch/x86/kernel/nmi.c107
-rw-r--r--arch/x86/kernel/nmi_selftest.c17
-rw-r--r--arch/x86/kernel/paravirt.c12
-rw-r--r--arch/x86/kernel/pci-calgary_64.c8
-rw-r--r--arch/x86/kernel/pci-dma.c17
-rw-r--r--arch/x86/kernel/pci-nommu.c8
-rw-r--r--arch/x86/kernel/process.c73
-rw-r--r--arch/x86/kernel/process_32.c11
-rw-r--r--arch/x86/kernel/process_64.c19
-rw-r--r--arch/x86/kernel/ptrace.c13
-rw-r--r--arch/x86/kernel/reboot.c270
-rw-r--r--arch/x86/kernel/setup.c42
-rw-r--r--arch/x86/kernel/signal.c74
-rw-r--r--arch/x86/kernel/smp.c100
-rw-r--r--arch/x86/kernel/smpboot.c227
-rw-r--r--arch/x86/kernel/tboot.c7
-rw-r--r--arch/x86/kernel/test_rodata.c10
-rw-r--r--arch/x86/kernel/time.c6
-rw-r--r--arch/x86/kernel/trampoline.c42
-rw-r--r--arch/x86/kernel/trampoline_32.S83
-rw-r--r--arch/x86/kernel/traps.c16
-rw-r--r--arch/x86/kernel/uprobes.c3
-rw-r--r--arch/x86/kernel/vmlinux.lds.S12
-rw-r--r--arch/x86/kernel/vsmp_64.c40
-rw-r--r--arch/x86/kernel/vsyscall_64.c39
-rw-r--r--arch/x86/kernel/x86_init.c8
-rw-r--r--arch/x86/kernel/xsave.c2
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/cpuid.c5
-rw-r--r--arch/x86/kvm/emulate.c293
-rw-r--r--arch/x86/kvm/i8254.c31
-rw-r--r--arch/x86/kvm/i8254.h7
-rw-r--r--arch/x86/kvm/lapic.c31
-rw-r--r--arch/x86/kvm/mmu.c351
-rw-r--r--arch/x86/kvm/mmu_audit.c10
-rw-r--r--arch/x86/kvm/paging_tmpl.h2
-rw-r--r--arch/x86/kvm/svm.c9
-rw-r--r--arch/x86/kvm/vmx.c41
-rw-r--r--arch/x86/kvm/x86.c280
-rw-r--r--arch/x86/kvm/x86.h2
-rw-r--r--arch/x86/lib/checksum_32.S9
-rw-r--r--arch/x86/lib/copy_user_64.S63
-rw-r--r--arch/x86/lib/copy_user_nocache_64.S50
-rw-r--r--arch/x86/lib/csum-copy_64.S16
-rw-r--r--arch/x86/lib/csum-wrappers_64.c2
-rw-r--r--arch/x86/lib/getuser.S9
-rw-r--r--arch/x86/lib/putuser.S12
-rw-r--r--arch/x86/lib/usercopy.c105
-rw-r--r--arch/x86/lib/usercopy_32.c271
-rw-r--r--arch/x86/lib/usercopy_64.c48
-rw-r--r--arch/x86/lib/x86-opcode-map.txt8
-rw-r--r--arch/x86/mm/extable.c142
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/init.c38
-rw-r--r--arch/x86/mm/init_64.c23
-rw-r--r--arch/x86/mm/ioremap.c4
-rw-r--r--arch/x86/mm/numa.c32
-rw-r--r--arch/x86/mm/numa_emulation.c12
-rw-r--r--arch/x86/mm/pageattr.c2
-rw-r--r--arch/x86/mm/pat.c98
-rw-r--r--arch/x86/mm/srat.c7
-rw-r--r--arch/x86/mm/tlb.c16
-rw-r--r--arch/x86/pci/Makefile2
-rw-r--r--arch/x86/pci/acpi.c128
-rw-r--r--arch/x86/pci/amd_bus.c91
-rw-r--r--arch/x86/pci/broadcom_bus.c12
-rw-r--r--arch/x86/pci/bus_numa.c69
-rw-r--r--arch/x86/pci/bus_numa.h18
-rw-r--r--arch/x86/pci/common.c43
-rw-r--r--arch/x86/pci/fixup.c21
-rw-r--r--arch/x86/pci/i386.c2
-rw-r--r--arch/x86/pci/sta2x11-fixup.c366
-rw-r--r--arch/x86/platform/mrst/early_printk_mrst.c13
-rw-r--r--arch/x86/platform/mrst/mrst.c2
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-sci.c43
-rw-r--r--arch/x86/platform/visws/visws_quirks.c2
-rw-r--r--arch/x86/realmode/Makefile18
-rw-r--r--arch/x86/realmode/init.c115
-rw-r--r--arch/x86/realmode/rm/Makefile82
-rw-r--r--arch/x86/realmode/rm/bioscall.S1
-rw-r--r--arch/x86/realmode/rm/copy.S1
-rw-r--r--arch/x86/realmode/rm/header.S41
-rw-r--r--arch/x86/realmode/rm/realmode.h21
-rw-r--r--arch/x86/realmode/rm/realmode.lds.S76
-rw-r--r--arch/x86/realmode/rm/reboot_32.S (renamed from arch/x86/kernel/reboot_32.S)89
-rw-r--r--arch/x86/realmode/rm/regs.c1
-rw-r--r--arch/x86/realmode/rm/stack.S19
-rw-r--r--arch/x86/realmode/rm/trampoline_32.S74
-rw-r--r--arch/x86/realmode/rm/trampoline_64.S (renamed from arch/x86/kernel/trampoline_64.S)148
-rw-r--r--arch/x86/realmode/rm/trampoline_common.S7
-rw-r--r--arch/x86/realmode/rm/video-bios.c1
-rw-r--r--arch/x86/realmode/rm/video-mode.c1
-rw-r--r--arch/x86/realmode/rm/video-vesa.c1
-rw-r--r--arch/x86/realmode/rm/video-vga.c1
-rw-r--r--arch/x86/realmode/rm/wakemain.c (renamed from arch/x86/kernel/acpi/realmode/wakemain.c)3
-rw-r--r--arch/x86/realmode/rm/wakeup.h (renamed from arch/x86/kernel/acpi/realmode/wakeup.h)10
-rw-r--r--arch/x86/realmode/rm/wakeup_asm.S (renamed from arch/x86/kernel/acpi/realmode/wakeup.S)131
-rw-r--r--arch/x86/realmode/rmpiggy.S20
-rw-r--r--arch/x86/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/syscalls/syscall_64.tbl2
-rw-r--r--arch/x86/tools/gen-insn-attr-x86.awk14
-rw-r--r--arch/x86/tools/relocs.c7
-rw-r--r--arch/x86/um/asm/elf.h42
-rw-r--r--arch/x86/um/asm/ptrace.h34
-rw-r--r--arch/x86/um/asm/ptrace_32.h23
-rw-r--r--arch/x86/um/asm/ptrace_64.h26
-rw-r--r--arch/x86/um/checksum_32.S9
-rw-r--r--arch/x86/um/shared/sysdep/ptrace.h67
-rw-r--r--arch/x86/um/shared/sysdep/ptrace_32.h92
-rw-r--r--arch/x86/um/shared/sysdep/ptrace_64.h101
-rw-r--r--arch/x86/um/signal.c31
-rw-r--r--arch/x86/um/sys_call_table_32.c4
-rw-r--r--arch/x86/um/sys_call_table_64.c1
-rw-r--r--arch/x86/um/syscalls_32.c12
-rw-r--r--arch/x86/um/sysrq_32.c8
-rw-r--r--arch/x86/um/sysrq_64.c8
-rw-r--r--arch/x86/um/tls_32.c2
-rw-r--r--arch/x86/xen/Makefile2
-rw-r--r--arch/x86/xen/apic.c33
-rw-r--r--arch/x86/xen/debugfs.c104
-rw-r--r--arch/x86/xen/debugfs.h4
-rw-r--r--arch/x86/xen/enlighten.c15
-rw-r--r--arch/x86/xen/mmu.c27
-rw-r--r--arch/x86/xen/p2m.c140
-rw-r--r--arch/x86/xen/setup.c170
-rw-r--r--arch/x86/xen/smp.c131
-rw-r--r--arch/x86/xen/smp.h12
-rw-r--r--arch/x86/xen/spinlock.c12
-rw-r--r--arch/x86/xen/xen-asm_32.S6
-rw-r--r--arch/x86/xen/xen-ops.h5
282 files changed, 6785 insertions, 5491 deletions
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index 0e9dec6cadd..e5287d8517a 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -1,4 +1,3 @@
-
obj-$(CONFIG_KVM) += kvm/
# Xen paravirtualization support
@@ -7,6 +6,7 @@ obj-$(CONFIG_XEN) += xen/
# lguest paravirtualization support
obj-$(CONFIG_LGUEST_GUEST) += lguest/
+obj-y += realmode/
obj-y += kernel/
obj-y += mm/
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1f5c30703ad..c70684f859e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -12,6 +12,7 @@ config X86_32
config X86_64
def_bool 64BIT
+ select X86_DEV_DMA_OPS
### Arch settings
config X86
@@ -31,6 +32,7 @@ config X86
select ARCH_WANT_OPTIONAL_GPIOLIB
select ARCH_WANT_FRAME_POINTERS
select HAVE_DMA_ATTRS
+ select HAVE_DMA_CONTIGUOUS if !SWIOTLB
select HAVE_KRETPROBES
select HAVE_OPTPROBES
select HAVE_FTRACE_MCOUNT_RECORD
@@ -40,7 +42,6 @@ config X86
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_GRAPH_FP_TEST
select HAVE_FUNCTION_TRACE_MCOUNT_TEST
- select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_KVM
select HAVE_ARCH_KGDB
@@ -77,11 +78,23 @@ config X86
select GENERIC_CLOCKEVENTS_MIN_ADJUST
select IRQ_FORCED_THREADING
select USE_GENERIC_SMP_HELPERS if SMP
- select HAVE_BPF_JIT if (X86_64 && NET)
+ select HAVE_BPF_JIT if X86_64
select CLKEVT_I8253
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_IOMAP
select DCACHE_WORD_ACCESS
+ select GENERIC_SMP_IDLE_THREAD
+ select HAVE_ARCH_SECCOMP_FILTER
+ select BUILDTIME_EXTABLE_SORT
+ select GENERIC_CMOS_UPDATE
+ select CLOCKSOURCE_WATCHDOG
+ select GENERIC_CLOCKEVENTS
+ select ARCH_CLOCKSOURCE_DATA if X86_64
+ select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
+ select GENERIC_TIME_VSYSCALL if X86_64
+ select KTIME_SCALAR if X86_32
+ select GENERIC_STRNCPY_FROM_USER
+ select GENERIC_STRNLEN_USER
config INSTRUCTION_DECODER
def_bool (KPROBES || PERF_EVENTS || UPROBES)
@@ -96,23 +109,6 @@ config ARCH_DEFCONFIG
default "arch/x86/configs/i386_defconfig" if X86_32
default "arch/x86/configs/x86_64_defconfig" if X86_64
-config GENERIC_CMOS_UPDATE
- def_bool y
-
-config CLOCKSOURCE_WATCHDOG
- def_bool y
-
-config GENERIC_CLOCKEVENTS
- def_bool y
-
-config ARCH_CLOCKSOURCE_DATA
- def_bool y
- depends on X86_64
-
-config GENERIC_CLOCKEVENTS_BROADCAST
- def_bool y
- depends on X86_64 || (X86_32 && X86_LOCAL_APIC)
-
config LOCKDEP_SUPPORT
def_bool y
@@ -160,16 +156,9 @@ config RWSEM_GENERIC_SPINLOCK
config RWSEM_XCHGADD_ALGORITHM
def_bool X86_XADD
-config ARCH_HAS_CPU_IDLE_WAIT
- def_bool y
-
config GENERIC_CALIBRATE_DELAY
def_bool y
-config GENERIC_TIME_VSYSCALL
- bool
- default X86_64
-
config ARCH_HAS_CPU_RELAX
def_bool y
@@ -236,9 +225,6 @@ config ARCH_HWEIGHT_CFLAGS
default "-fcall-saved-ecx -fcall-saved-edx" if X86_32
default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64
-config KTIME_SCALAR
- def_bool X86_32
-
config ARCH_CPU_PROBE_RELEASE
def_bool y
depends on HOTPLUG_CPU
@@ -261,8 +247,6 @@ config ZONE_DMA
If unsure, say Y.
-source "kernel/time/Kconfig"
-
config SMP
bool "Symmetric multi-processing support"
---help---
@@ -331,6 +315,7 @@ config X86_EXTENDED_PLATFORM
NUMAQ (IBM/Sequent)
RDC R-321x SoC
SGI 320/540 (Visual Workstation)
+ STA2X11-based (e.g. Northville)
Summit/EXA (IBM x440)
Unisys ES7000 IA32 series
Moorestown MID devices
@@ -377,6 +362,7 @@ config X86_VSMP
select PARAVIRT
depends on X86_64 && PCI
depends on X86_EXTENDED_PLATFORM
+ depends on SMP
---help---
Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is
supposed to run on these EM64T-based machines. Only choose this option
@@ -463,10 +449,10 @@ config X86_32_NON_STANDARD
depends on X86_32 && SMP
depends on X86_EXTENDED_PLATFORM
---help---
- This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default
- subarchitectures. It is intended for a generic binary kernel.
- if you select them all, kernel will probe it one by one. and will
- fallback to default.
+ This option compiles in the NUMAQ, Summit, bigsmp, ES7000,
+ STA2X11, default subarchitectures. It is intended for a generic
+ binary kernel. If you select them all, kernel will probe it
+ one by one and will fallback to default.
# Alphabetically sorted list of Non standard 32 bit platforms
@@ -506,6 +492,22 @@ config X86_VISWS
A kernel compiled for the Visual Workstation will run on general
PCs as well. See <file:Documentation/sgi-visws.txt> for details.
+config STA2X11
+ bool "STA2X11 Companion Chip Support"
+ depends on X86_32_NON_STANDARD && PCI
+ select X86_DEV_DMA_OPS
+ select X86_DMA_REMAP
+ select SWIOTLB
+ select MFD_STA2X11
+ select ARCH_REQUIRE_GPIOLIB
+ default n
+ ---help---
+ This adds support for boards based on the STA2X11 IO-Hub,
+ a.k.a. "ConneXt". The chip is used in place of the standard
+ PC chipset, so all "standard" peripherals are missing. If this
+ option is selected the kernel will still be able to boot on
+ standard PC machines.
+
config X86_SUMMIT
bool "Summit/EXA (IBM x440)"
depends on X86_32_NON_STANDARD
@@ -1242,10 +1244,6 @@ config NODES_SHIFT
Specify the maximum number of NUMA Nodes available on the target
system. Increases memory reserved to accommodate various tables.
-config HAVE_ARCH_BOOTMEM
- def_bool y
- depends on X86_32 && NUMA
-
config HAVE_ARCH_ALLOC_REMAP
def_bool y
depends on X86_32 && NUMA
@@ -1508,6 +1506,8 @@ config EFI_STUB
This kernel feature allows a bzImage to be loaded directly
by EFI firmware without the use of a bootloader.
+ See Documentation/x86/efi-stub.txt for more information.
+
config SECCOMP
def_bool y
prompt "Enable seccomp to safely compute untrusted bytecode"
@@ -2026,16 +2026,6 @@ config EISA
source "drivers/eisa/Kconfig"
-config MCA
- bool "MCA support"
- ---help---
- MicroChannel Architecture is found in some IBM PS/2 machines and
- laptops. It is a bus system similar to PCI or ISA. See
- <file:Documentation/mca.txt> (and especially the web page given
- there) before attempting to build an MCA bus kernel.
-
-source "drivers/mca/Kconfig"
-
config SCx200
tristate "NatSemi SCx200 support"
---help---
@@ -2218,6 +2208,14 @@ config HAVE_TEXT_POKE_SMP
bool
select STOP_MACHINE if SMP
+config X86_DEV_DMA_OPS
+ bool
+ depends on X86_64 || STA2X11
+
+config X86_DMA_REMAP
+ bool
+ depends on STA2X11
+
source "net/Kconfig"
source "drivers/Kconfig"
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index b1c611e6da6..1f252143455 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -115,9 +115,10 @@ cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTI
# does binutils support specific instructions?
asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
+avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
-KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr)
-KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr)
+KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr)
+KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr)
LDFLAGS := -m elf_$(UTS_MACHINE)
@@ -149,7 +150,6 @@ archheaders:
head-y := arch/x86/kernel/head_$(BITS).o
head-y += arch/x86/kernel/head$(BITS).o
head-y += arch/x86/kernel/head.o
-head-y += arch/x86/kernel/init_task.o
libs-y += arch/x86/lib/
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 0cdfc0d2315..4e85f5f8583 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -16,6 +16,26 @@
static efi_system_table_t *sys_table;
+static void efi_printk(char *str)
+{
+ char *s8;
+
+ for (s8 = str; *s8; s8++) {
+ struct efi_simple_text_output_protocol *out;
+ efi_char16_t ch[2] = { 0 };
+
+ ch[0] = *s8;
+ out = (struct efi_simple_text_output_protocol *)sys_table->con_out;
+
+ if (*s8 == '\n') {
+ efi_char16_t nl[2] = { '\r', 0 };
+ efi_call_phys2(out->output_string, out, nl);
+ }
+
+ efi_call_phys2(out->output_string, out, ch);
+ }
+}
+
static efi_status_t __get_map(efi_memory_desc_t **map, unsigned long *map_size,
unsigned long *desc_size)
{
@@ -531,8 +551,10 @@ static efi_status_t handle_ramdisks(efi_loaded_image_t *image,
EFI_LOADER_DATA,
nr_initrds * sizeof(*initrds),
&initrds);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc mem for initrds\n");
goto fail;
+ }
str = (char *)(unsigned long)hdr->cmd_line_ptr;
for (i = 0; i < nr_initrds; i++) {
@@ -575,32 +597,42 @@ static efi_status_t handle_ramdisks(efi_loaded_image_t *image,
status = efi_call_phys3(boottime->handle_protocol,
image->device_handle, &fs_proto, &io);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to handle fs_proto\n");
goto free_initrds;
+ }
status = efi_call_phys2(io->open_volume, io, &fh);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to open volume\n");
goto free_initrds;
+ }
}
status = efi_call_phys5(fh->open, fh, &h, filename_16,
EFI_FILE_MODE_READ, (u64)0);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to open initrd file\n");
goto close_handles;
+ }
initrd->handle = h;
info_sz = 0;
status = efi_call_phys4(h->get_info, h, &info_guid,
&info_sz, NULL);
- if (status != EFI_BUFFER_TOO_SMALL)
+ if (status != EFI_BUFFER_TOO_SMALL) {
+ efi_printk("Failed to get initrd info size\n");
goto close_handles;
+ }
grow:
status = efi_call_phys3(sys_table->boottime->allocate_pool,
EFI_LOADER_DATA, info_sz, &info);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc mem for initrd info\n");
goto close_handles;
+ }
status = efi_call_phys4(h->get_info, h, &info_guid,
&info_sz, info);
@@ -612,8 +644,10 @@ grow:
file_sz = info->file_size;
efi_call_phys1(sys_table->boottime->free_pool, info);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to get initrd info\n");
goto close_handles;
+ }
initrd->size = file_sz;
initrd_total += file_sz;
@@ -629,11 +663,14 @@ grow:
*/
status = high_alloc(initrd_total, 0x1000,
&initrd_addr, hdr->initrd_addr_max);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc highmem for initrds\n");
goto close_handles;
+ }
/* We've run out of free low memory. */
if (initrd_addr > hdr->initrd_addr_max) {
+ efi_printk("We've run out of free low memory\n");
status = EFI_INVALID_PARAMETER;
goto free_initrd_total;
}
@@ -652,8 +689,10 @@ grow:
status = efi_call_phys3(fh->read,
initrds[j].handle,
&chunksize, addr);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to read initrd\n");
goto free_initrd_total;
+ }
addr += chunksize;
size -= chunksize;
}
@@ -674,7 +713,7 @@ free_initrd_total:
low_free(initrd_total, initrd_addr);
close_handles:
- for (k = j; k < nr_initrds; k++)
+ for (k = j; k < i; k++)
efi_call_phys1(fh->close, initrds[k].handle);
free_initrds:
efi_call_phys1(sys_table->boottime->free_pool, initrds);
@@ -732,8 +771,10 @@ static efi_status_t make_boot_params(struct boot_params *boot_params,
options_size++; /* NUL termination */
status = low_alloc(options_size, 1, &cmdline);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc mem for cmdline\n");
goto fail;
+ }
s1 = (u8 *)(unsigned long)cmdline;
s2 = (u16 *)options;
@@ -895,20 +936,32 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table)
status = efi_call_phys3(sys_table->boottime->handle_protocol,
handle, &proto, (void *)&image);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n");
goto fail;
+ }
status = low_alloc(0x4000, 1, (unsigned long *)&boot_params);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc lowmem for boot params\n");
goto fail;
+ }
memset(boot_params, 0x0, 0x4000);
- /* Copy first two sectors to boot_params */
- memcpy(boot_params, image->image_base, 1024);
-
hdr = &boot_params->hdr;
+ /* Copy the second sector to boot_params */
+ memcpy(&hdr->jump, image->image_base + 512, 512);
+
+ /*
+ * Fill out some of the header fields ourselves because the
+ * EFI firmware loader doesn't load the first sector.
+ */
+ hdr->root_flags = 1;
+ hdr->vid_mode = 0xffff;
+ hdr->boot_flag = 0xAA55;
+
/*
* The EFI firmware loader could have placed the kernel image
* anywhere in memory, but the kernel has various restrictions
@@ -925,8 +978,10 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table)
if (status != EFI_SUCCESS) {
status = low_alloc(hdr->init_size, hdr->kernel_alignment,
&start);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc mem for kernel\n");
goto fail;
+ }
}
hdr->code32_start = (__u32)start;
@@ -937,19 +992,25 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table)
status = efi_call_phys3(sys_table->boottime->allocate_pool,
EFI_LOADER_DATA, sizeof(*gdt),
(void **)&gdt);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc mem for gdt structure\n");
goto fail;
+ }
gdt->size = 0x800;
status = low_alloc(gdt->size, 8, (unsigned long *)&gdt->address);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc mem for gdt\n");
goto fail;
+ }
status = efi_call_phys3(sys_table->boottime->allocate_pool,
EFI_LOADER_DATA, sizeof(*idt),
(void **)&idt);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc mem for idt structure\n");
goto fail;
+ }
idt->size = 0;
idt->address = 0;
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
index 39251663e65..3b6e15627c5 100644
--- a/arch/x86/boot/compressed/eboot.h
+++ b/arch/x86/boot/compressed/eboot.h
@@ -58,4 +58,10 @@ struct efi_uga_draw_protocol {
void *blt;
};
+struct efi_simple_text_output_protocol {
+ void *reset;
+ void *output_string;
+ void *test_string;
+};
+
#endif /* BOOT_COMPRESSED_EBOOT_H */
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index f1bbeeb0914..efe5acfc79c 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -94,10 +94,10 @@ bs_die:
.section ".bsdata", "a"
bugger_off_msg:
- .ascii "Direct booting from floppy is no longer supported.\r\n"
- .ascii "Please use a boot loader program instead.\r\n"
+ .ascii "Direct floppy boot is not supported. "
+ .ascii "Use a boot loader program instead.\r\n"
.ascii "\n"
- .ascii "Remove disk and press any key to reboot . . .\r\n"
+ .ascii "Remove disk and press any key to reboot ...\r\n"
.byte 0
#ifdef CONFIG_EFI_STUB
@@ -111,7 +111,7 @@ coff_header:
#else
.word 0x8664 # x86-64
#endif
- .word 2 # nr_sections
+ .word 3 # nr_sections
.long 0 # TimeDateStamp
.long 0 # PointerToSymbolTable
.long 1 # NumberOfSymbols
@@ -147,7 +147,7 @@ optional_header:
# Filled in by build.c
.long 0x0000 # AddressOfEntryPoint
- .long 0x0000 # BaseOfCode
+ .long 0x0200 # BaseOfCode
#ifdef CONFIG_X86_32
.long 0 # data
#endif
@@ -158,8 +158,8 @@ extra_header_fields:
#else
.quad 0 # ImageBase
#endif
- .long 0x1000 # SectionAlignment
- .long 0x200 # FileAlignment
+ .long 0x20 # SectionAlignment
+ .long 0x20 # FileAlignment
.word 0 # MajorOperatingSystemVersion
.word 0 # MinorOperatingSystemVersion
.word 0 # MajorImageVersion
@@ -189,7 +189,7 @@ extra_header_fields:
.quad 0 # SizeOfHeapCommit
#endif
.long 0 # LoaderFlags
- .long 0x1 # NumberOfRvaAndSizes
+ .long 0x6 # NumberOfRvaAndSizes
.quad 0 # ExportTable
.quad 0 # ImportTable
@@ -200,8 +200,10 @@ extra_header_fields:
# Section table
section_table:
- .ascii ".text"
- .byte 0
+ #
+ # The offset & size fields are filled in by build.c.
+ #
+ .ascii ".setup"
.byte 0
.byte 0
.long 0
@@ -217,23 +219,40 @@ section_table:
#
# The EFI application loader requires a relocation section
- # because EFI applications are relocatable and not having
- # this section seems to confuse it. But since we don't need
- # the loader to fixup any relocs for us just fill it with a
- # single dummy reloc.
+ # because EFI applications must be relocatable. The .reloc
+ # offset & size fields are filled in by build.c.
#
.ascii ".reloc"
.byte 0
.byte 0
- .long reloc_end - reloc_start
- .long reloc_start
- .long reloc_end - reloc_start # SizeOfRawData
- .long reloc_start # PointerToRawData
+ .long 0
+ .long 0
+ .long 0 # SizeOfRawData
+ .long 0 # PointerToRawData
.long 0 # PointerToRelocations
.long 0 # PointerToLineNumbers
.word 0 # NumberOfRelocations
.word 0 # NumberOfLineNumbers
.long 0x42100040 # Characteristics (section flags)
+
+ #
+ # The offset & size fields are filled in by build.c.
+ #
+ .ascii ".text"
+ .byte 0
+ .byte 0
+ .byte 0
+ .long 0
+ .long 0x0 # startup_{32,64}
+ .long 0 # Size of initialized data
+ # on disk
+ .long 0x0 # startup_{32,64}
+ .long 0 # PointerToRelocations
+ .long 0 # PointerToLineNumbers
+ .word 0 # NumberOfRelocations
+ .word 0 # NumberOfLineNumbers
+ .long 0x60500020 # Characteristics (section flags)
+
#endif /* CONFIG_EFI_STUB */
# Kernel attributes; used by setup. This is part 1 of the
@@ -469,10 +488,3 @@ setup_corrupt:
.data
dummy: .long 0
-
- .section .reloc
-reloc_start:
- .long dummy - reloc_start
- .long 10
- .word 0
-reloc_end:
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index 40358c8905b..cf6083d444f 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -57,14 +57,20 @@ static void copy_boot_params(void)
}
/*
- * Set the keyboard repeat rate to maximum. Unclear why this
+ * Query the keyboard lock status as given by the BIOS, and
+ * set the keyboard repeat rate to maximum. Unclear why the latter
* is done here; this might be possible to kill off as stale code.
*/
-static void keyboard_set_repeat(void)
+static void keyboard_init(void)
{
- struct biosregs ireg;
+ struct biosregs ireg, oreg;
initregs(&ireg);
- ireg.ax = 0x0305;
+
+ ireg.ah = 0x02; /* Get keyboard status */
+ intcall(0x16, &ireg, &oreg);
+ boot_params.kbd_status = oreg.al;
+
+ ireg.ax = 0x0305; /* Set keyboard repeat rate */
intcall(0x16, &ireg, NULL);
}
@@ -151,8 +157,8 @@ void main(void)
/* Detect memory layout */
detect_memory();
- /* Set keyboard repeat rate (why?) */
- keyboard_set_repeat();
+ /* Set keyboard repeat rate (why?) and query the lock flags */
+ keyboard_init();
/* Query MCA information */
query_mca();
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index 24443a33208..4b8e165ee57 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -50,6 +50,8 @@ typedef unsigned int u32;
u8 buf[SETUP_SECT_MAX*512];
int is_big_kernel;
+#define PECOFF_RELOC_RESERVE 0x20
+
/*----------------------------------------------------------------------*/
static const u32 crctab32[] = {
@@ -133,11 +135,103 @@ static void usage(void)
die("Usage: build setup system [> image]");
}
-int main(int argc, char ** argv)
-{
#ifdef CONFIG_EFI_STUB
- unsigned int file_sz, pe_header;
+
+static void update_pecoff_section_header(char *section_name, u32 offset, u32 size)
+{
+ unsigned int pe_header;
+ unsigned short num_sections;
+ u8 *section;
+
+ pe_header = get_unaligned_le32(&buf[0x3c]);
+ num_sections = get_unaligned_le16(&buf[pe_header + 6]);
+
+#ifdef CONFIG_X86_32
+ section = &buf[pe_header + 0xa8];
+#else
+ section = &buf[pe_header + 0xb8];
#endif
+
+ while (num_sections > 0) {
+ if (strncmp((char*)section, section_name, 8) == 0) {
+ /* section header size field */
+ put_unaligned_le32(size, section + 0x8);
+
+ /* section header vma field */
+ put_unaligned_le32(offset, section + 0xc);
+
+ /* section header 'size of initialised data' field */
+ put_unaligned_le32(size, section + 0x10);
+
+ /* section header 'file offset' field */
+ put_unaligned_le32(offset, section + 0x14);
+
+ break;
+ }
+ section += 0x28;
+ num_sections--;
+ }
+}
+
+static void update_pecoff_setup_and_reloc(unsigned int size)
+{
+ u32 setup_offset = 0x200;
+ u32 reloc_offset = size - PECOFF_RELOC_RESERVE;
+ u32 setup_size = reloc_offset - setup_offset;
+
+ update_pecoff_section_header(".setup", setup_offset, setup_size);
+ update_pecoff_section_header(".reloc", reloc_offset, PECOFF_RELOC_RESERVE);
+
+ /*
+ * Modify .reloc section contents with a single entry. The
+ * relocation is applied to offset 10 of the relocation section.
+ */
+ put_unaligned_le32(reloc_offset + 10, &buf[reloc_offset]);
+ put_unaligned_le32(10, &buf[reloc_offset + 4]);
+}
+
+static void update_pecoff_text(unsigned int text_start, unsigned int file_sz)
+{
+ unsigned int pe_header;
+ unsigned int text_sz = file_sz - text_start;
+
+ pe_header = get_unaligned_le32(&buf[0x3c]);
+
+ /* Size of image */
+ put_unaligned_le32(file_sz, &buf[pe_header + 0x50]);
+
+ /*
+ * Size of code: Subtract the size of the first sector (512 bytes)
+ * which includes the header.
+ */
+ put_unaligned_le32(file_sz - 512, &buf[pe_header + 0x1c]);
+
+#ifdef CONFIG_X86_32
+ /*
+ * Address of entry point.
+ *
+ * The EFI stub entry point is +16 bytes from the start of
+ * the .text section.
+ */
+ put_unaligned_le32(text_start + 16, &buf[pe_header + 0x28]);
+#else
+ /*
+ * Address of entry point. startup_32 is at the beginning and
+ * the 64-bit entry point (startup_64) is always 512 bytes
+ * after. The EFI stub entry point is 16 bytes after that, as
+ * the first instruction allows legacy loaders to jump over
+ * the EFI stub initialisation
+ */
+ put_unaligned_le32(text_start + 528, &buf[pe_header + 0x28]);
+#endif /* CONFIG_X86_32 */
+
+ update_pecoff_section_header(".text", text_start, text_sz);
+}
+
+#endif /* CONFIG_EFI_STUB */
+
+int main(int argc, char ** argv)
+{
unsigned int i, sz, setup_sectors;
int c;
u32 sys_size;
@@ -163,6 +257,12 @@ int main(int argc, char ** argv)
die("Boot block hasn't got boot flag (0xAA55)");
fclose(file);
+#ifdef CONFIG_EFI_STUB
+ /* Reserve 0x20 bytes for .reloc section */
+ memset(buf+c, 0, PECOFF_RELOC_RESERVE);
+ c += PECOFF_RELOC_RESERVE;
+#endif
+
/* Pad unused space with zeros */
setup_sectors = (c + 511) / 512;
if (setup_sectors < SETUP_SECT_MIN)
@@ -170,6 +270,10 @@ int main(int argc, char ** argv)
i = setup_sectors*512;
memset(buf+c, 0, i-c);
+#ifdef CONFIG_EFI_STUB
+ update_pecoff_setup_and_reloc(i);
+#endif
+
/* Set the default root device */
put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]);
@@ -194,48 +298,8 @@ int main(int argc, char ** argv)
put_unaligned_le32(sys_size, &buf[0x1f4]);
#ifdef CONFIG_EFI_STUB
- file_sz = sz + i + ((sys_size * 16) - sz);
-
- pe_header = get_unaligned_le32(&buf[0x3c]);
-
- /* Size of code */
- put_unaligned_le32(file_sz, &buf[pe_header + 0x1c]);
-
- /* Size of image */
- put_unaligned_le32(file_sz, &buf[pe_header + 0x50]);
-
-#ifdef CONFIG_X86_32
- /*
- * Address of entry point.
- *
- * The EFI stub entry point is +16 bytes from the start of
- * the .text section.
- */
- put_unaligned_le32(i + 16, &buf[pe_header + 0x28]);
-
- /* .text size */
- put_unaligned_le32(file_sz, &buf[pe_header + 0xb0]);
-
- /* .text size of initialised data */
- put_unaligned_le32(file_sz, &buf[pe_header + 0xb8]);
-#else
- /*
- * Address of entry point. startup_32 is at the beginning and
- * the 64-bit entry point (startup_64) is always 512 bytes
- * after. The EFI stub entry point is 16 bytes after that, as
- * the first instruction allows legacy loaders to jump over
- * the EFI stub initialisation
- */
- put_unaligned_le32(i + 528, &buf[pe_header + 0x28]);
-
- /* .text size */
- put_unaligned_le32(file_sz, &buf[pe_header + 0xc0]);
-
- /* .text size of initialised data */
- put_unaligned_le32(file_sz, &buf[pe_header + 0xc8]);
-
-#endif /* CONFIG_X86_32 */
-#endif /* CONFIG_EFI_STUB */
+ update_pecoff_text(setup_sectors * 512, sz + i + ((sys_size * 16) - sz));
+#endif
crc = partial_crc32(buf, i, crc);
if (fwrite(buf, 1, i, stdout) != i)
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index c799352e24f..c9bac53ceec 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -222,27 +222,6 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
}
}
-static struct crypto_alg aesni_alg = {
- .cra_name = "aes",
- .cra_driver_name = "aes-aesni",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
- .cra_alignmask = 0,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(aesni_alg.cra_list),
- .cra_u = {
- .cipher = {
- .cia_min_keysize = AES_MIN_KEY_SIZE,
- .cia_max_keysize = AES_MAX_KEY_SIZE,
- .cia_setkey = aes_set_key,
- .cia_encrypt = aes_encrypt,
- .cia_decrypt = aes_decrypt
- }
- }
-};
-
static void __aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
@@ -257,27 +236,6 @@ static void __aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
aesni_dec(ctx, dst, src);
}
-static struct crypto_alg __aesni_alg = {
- .cra_name = "__aes-aesni",
- .cra_driver_name = "__driver-aes-aesni",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
- .cra_alignmask = 0,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(__aesni_alg.cra_list),
- .cra_u = {
- .cipher = {
- .cia_min_keysize = AES_MIN_KEY_SIZE,
- .cia_max_keysize = AES_MAX_KEY_SIZE,
- .cia_setkey = aes_set_key,
- .cia_encrypt = __aes_encrypt,
- .cia_decrypt = __aes_decrypt
- }
- }
-};
-
static int ecb_encrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
@@ -326,28 +284,6 @@ static int ecb_decrypt(struct blkcipher_desc *desc,
return err;
}
-static struct crypto_alg blk_ecb_alg = {
- .cra_name = "__ecb-aes-aesni",
- .cra_driver_name = "__driver-ecb-aes-aesni",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list),
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .setkey = aes_set_key,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-};
-
static int cbc_encrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
@@ -396,28 +332,6 @@ static int cbc_decrypt(struct blkcipher_desc *desc,
return err;
}
-static struct crypto_alg blk_cbc_alg = {
- .cra_name = "__cbc-aes-aesni",
- .cra_driver_name = "__driver-cbc-aes-aesni",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list),
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .setkey = aes_set_key,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-};
-
#ifdef CONFIG_X86_64
static void ctr_crypt_final(struct crypto_aes_ctx *ctx,
struct blkcipher_walk *walk)
@@ -461,29 +375,6 @@ static int ctr_crypt(struct blkcipher_desc *desc,
return err;
}
-
-static struct crypto_alg blk_ctr_alg = {
- .cra_name = "__ctr-aes-aesni",
- .cra_driver_name = "__driver-ctr-aes-aesni",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list),
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = aes_set_key,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- },
- },
-};
#endif
static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
@@ -551,281 +442,65 @@ static void ablk_exit(struct crypto_tfm *tfm)
cryptd_free_ablkcipher(ctx->cryptd_tfm);
}
-static void ablk_init_common(struct crypto_tfm *tfm,
- struct cryptd_ablkcipher *cryptd_tfm)
+static int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name)
{
struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct cryptd_ablkcipher *cryptd_tfm;
+
+ cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
+ if (IS_ERR(cryptd_tfm))
+ return PTR_ERR(cryptd_tfm);
ctx->cryptd_tfm = cryptd_tfm;
tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
crypto_ablkcipher_reqsize(&cryptd_tfm->base);
+
+ return 0;
}
static int ablk_ecb_init(struct crypto_tfm *tfm)
{
- struct cryptd_ablkcipher *cryptd_tfm;
-
- cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ecb-aes-aesni", 0, 0);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
- ablk_init_common(tfm, cryptd_tfm);
- return 0;
+ return ablk_init_common(tfm, "__driver-ecb-aes-aesni");
}
-static struct crypto_alg ablk_ecb_alg = {
- .cra_name = "ecb(aes)",
- .cra_driver_name = "ecb-aes-aesni",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_aes_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(ablk_ecb_alg.cra_list),
- .cra_init = ablk_ecb_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-};
-
static int ablk_cbc_init(struct crypto_tfm *tfm)
{
- struct cryptd_ablkcipher *cryptd_tfm;
-
- cryptd_tfm = cryptd_alloc_ablkcipher("__driver-cbc-aes-aesni", 0, 0);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
- ablk_init_common(tfm, cryptd_tfm);
- return 0;
+ return ablk_init_common(tfm, "__driver-cbc-aes-aesni");
}
-static struct crypto_alg ablk_cbc_alg = {
- .cra_name = "cbc(aes)",
- .cra_driver_name = "cbc-aes-aesni",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_aes_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(ablk_cbc_alg.cra_list),
- .cra_init = ablk_cbc_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-};
-
#ifdef CONFIG_X86_64
static int ablk_ctr_init(struct crypto_tfm *tfm)
{
- struct cryptd_ablkcipher *cryptd_tfm;
-
- cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ctr-aes-aesni", 0, 0);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
- ablk_init_common(tfm, cryptd_tfm);
- return 0;
+ return ablk_init_common(tfm, "__driver-ctr-aes-aesni");
}
-static struct crypto_alg ablk_ctr_alg = {
- .cra_name = "ctr(aes)",
- .cra_driver_name = "ctr-aes-aesni",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct async_aes_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(ablk_ctr_alg.cra_list),
- .cra_init = ablk_ctr_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_encrypt,
- .geniv = "chainiv",
- },
- },
-};
-
#ifdef HAS_CTR
static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm)
{
- struct cryptd_ablkcipher *cryptd_tfm;
-
- cryptd_tfm = cryptd_alloc_ablkcipher(
- "rfc3686(__driver-ctr-aes-aesni)", 0, 0);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
- ablk_init_common(tfm, cryptd_tfm);
- return 0;
+ return ablk_init_common(tfm, "rfc3686(__driver-ctr-aes-aesni)");
}
-
-static struct crypto_alg ablk_rfc3686_ctr_alg = {
- .cra_name = "rfc3686(ctr(aes))",
- .cra_driver_name = "rfc3686-ctr-aes-aesni",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct async_aes_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(ablk_rfc3686_ctr_alg.cra_list),
- .cra_init = ablk_rfc3686_ctr_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE+CTR_RFC3686_NONCE_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE+CTR_RFC3686_NONCE_SIZE,
- .ivsize = CTR_RFC3686_IV_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- .geniv = "seqiv",
- },
- },
-};
#endif
#endif
#ifdef HAS_LRW
static int ablk_lrw_init(struct crypto_tfm *tfm)
{
- struct cryptd_ablkcipher *cryptd_tfm;
-
- cryptd_tfm = cryptd_alloc_ablkcipher("fpu(lrw(__driver-aes-aesni))",
- 0, 0);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
- ablk_init_common(tfm, cryptd_tfm);
- return 0;
+ return ablk_init_common(tfm, "fpu(lrw(__driver-aes-aesni))");
}
-
-static struct crypto_alg ablk_lrw_alg = {
- .cra_name = "lrw(aes)",
- .cra_driver_name = "lrw-aes-aesni",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_aes_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(ablk_lrw_alg.cra_list),
- .cra_init = ablk_lrw_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-};
#endif
#ifdef HAS_PCBC
static int ablk_pcbc_init(struct crypto_tfm *tfm)
{
- struct cryptd_ablkcipher *cryptd_tfm;
-
- cryptd_tfm = cryptd_alloc_ablkcipher("fpu(pcbc(__driver-aes-aesni))",
- 0, 0);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
- ablk_init_common(tfm, cryptd_tfm);
- return 0;
+ return ablk_init_common(tfm, "fpu(pcbc(__driver-aes-aesni))");
}
-
-static struct crypto_alg ablk_pcbc_alg = {
- .cra_name = "pcbc(aes)",
- .cra_driver_name = "pcbc-aes-aesni",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_aes_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(ablk_pcbc_alg.cra_list),
- .cra_init = ablk_pcbc_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-};
#endif
#ifdef HAS_XTS
static int ablk_xts_init(struct crypto_tfm *tfm)
{
- struct cryptd_ablkcipher *cryptd_tfm;
-
- cryptd_tfm = cryptd_alloc_ablkcipher("fpu(xts(__driver-aes-aesni))",
- 0, 0);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
- ablk_init_common(tfm, cryptd_tfm);
- return 0;
+ return ablk_init_common(tfm, "fpu(xts(__driver-aes-aesni))");
}
-
-static struct crypto_alg ablk_xts_alg = {
- .cra_name = "xts(aes)",
- .cra_driver_name = "xts-aes-aesni",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_aes_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(ablk_xts_alg.cra_list),
- .cra_init = ablk_xts_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = 2 * AES_MIN_KEY_SIZE,
- .max_keysize = 2 * AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-};
#endif
#ifdef CONFIG_X86_64
@@ -938,7 +613,7 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
struct aesni_rfc4106_gcm_ctx *child_ctx =
aesni_rfc4106_gcm_ctx_get(cryptd_child);
- u8 *new_key_mem = NULL;
+ u8 *new_key_align, *new_key_mem = NULL;
if (key_len < 4) {
crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
@@ -962,9 +637,9 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
if (!new_key_mem)
return -ENOMEM;
- new_key_mem = PTR_ALIGN(new_key_mem, AESNI_ALIGN);
- memcpy(new_key_mem, key, key_len);
- key = new_key_mem;
+ new_key_align = PTR_ALIGN(new_key_mem, AESNI_ALIGN);
+ memcpy(new_key_align, key, key_len);
+ key = new_key_align;
}
if (!irq_fpu_usable())
@@ -1050,32 +725,6 @@ static int rfc4106_decrypt(struct aead_request *req)
}
}
-static struct crypto_alg rfc4106_alg = {
- .cra_name = "rfc4106(gcm(aes))",
- .cra_driver_name = "rfc4106-gcm-aesni",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + AESNI_ALIGN,
- .cra_alignmask = 0,
- .cra_type = &crypto_nivaead_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(rfc4106_alg.cra_list),
- .cra_init = rfc4106_init,
- .cra_exit = rfc4106_exit,
- .cra_u = {
- .aead = {
- .setkey = rfc4106_set_key,
- .setauthsize = rfc4106_set_authsize,
- .encrypt = rfc4106_encrypt,
- .decrypt = rfc4106_decrypt,
- .geniv = "seqiv",
- .ivsize = 8,
- .maxauthsize = 16,
- },
- },
-};
-
static int __driver_rfc4106_encrypt(struct aead_request *req)
{
u8 one_entry_in_sg = 0;
@@ -1233,26 +882,316 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
}
return retval;
}
+#endif
-static struct crypto_alg __rfc4106_alg = {
+static struct crypto_alg aesni_algs[] = { {
+ .cra_name = "aes",
+ .cra_driver_name = "aes-aesni",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx) +
+ AESNI_ALIGN - 1,
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .cipher = {
+ .cia_min_keysize = AES_MIN_KEY_SIZE,
+ .cia_max_keysize = AES_MAX_KEY_SIZE,
+ .cia_setkey = aes_set_key,
+ .cia_encrypt = aes_encrypt,
+ .cia_decrypt = aes_decrypt
+ }
+ }
+}, {
+ .cra_name = "__aes-aesni",
+ .cra_driver_name = "__driver-aes-aesni",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx) +
+ AESNI_ALIGN - 1,
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .cipher = {
+ .cia_min_keysize = AES_MIN_KEY_SIZE,
+ .cia_max_keysize = AES_MAX_KEY_SIZE,
+ .cia_setkey = aes_set_key,
+ .cia_encrypt = __aes_encrypt,
+ .cia_decrypt = __aes_decrypt
+ }
+ }
+}, {
+ .cra_name = "__ecb-aes-aesni",
+ .cra_driver_name = "__driver-ecb-aes-aesni",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx) +
+ AESNI_ALIGN - 1,
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = aes_set_key,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ },
+ },
+}, {
+ .cra_name = "__cbc-aes-aesni",
+ .cra_driver_name = "__driver-cbc-aes-aesni",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx) +
+ AESNI_ALIGN - 1,
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = aes_set_key,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ },
+ },
+}, {
+ .cra_name = "ecb(aes)",
+ .cra_driver_name = "ecb-aes-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_aes_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_ecb_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ },
+ },
+}, {
+ .cra_name = "cbc(aes)",
+ .cra_driver_name = "cbc-aes-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_aes_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_cbc_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ },
+ },
+#ifdef CONFIG_X86_64
+}, {
+ .cra_name = "__ctr-aes-aesni",
+ .cra_driver_name = "__driver-ctr-aes-aesni",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx) +
+ AESNI_ALIGN - 1,
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = aes_set_key,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ },
+ },
+}, {
+ .cra_name = "ctr(aes)",
+ .cra_driver_name = "ctr-aes-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct async_aes_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_ctr_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_encrypt,
+ .geniv = "chainiv",
+ },
+ },
+}, {
.cra_name = "__gcm-aes-aesni",
.cra_driver_name = "__driver-gcm-aes-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_AEAD,
.cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + AESNI_ALIGN,
+ .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) +
+ AESNI_ALIGN,
.cra_alignmask = 0,
.cra_type = &crypto_aead_type,
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(__rfc4106_alg.cra_list),
.cra_u = {
.aead = {
.encrypt = __driver_rfc4106_encrypt,
.decrypt = __driver_rfc4106_decrypt,
},
},
-};
+}, {
+ .cra_name = "rfc4106(gcm(aes))",
+ .cra_driver_name = "rfc4106-gcm-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) +
+ AESNI_ALIGN,
+ .cra_alignmask = 0,
+ .cra_type = &crypto_nivaead_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = rfc4106_init,
+ .cra_exit = rfc4106_exit,
+ .cra_u = {
+ .aead = {
+ .setkey = rfc4106_set_key,
+ .setauthsize = rfc4106_set_authsize,
+ .encrypt = rfc4106_encrypt,
+ .decrypt = rfc4106_decrypt,
+ .geniv = "seqiv",
+ .ivsize = 8,
+ .maxauthsize = 16,
+ },
+ },
+#ifdef HAS_CTR
+}, {
+ .cra_name = "rfc3686(ctr(aes))",
+ .cra_driver_name = "rfc3686-ctr-aes-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct async_aes_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_rfc3686_ctr_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE +
+ CTR_RFC3686_NONCE_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE +
+ CTR_RFC3686_NONCE_SIZE,
+ .ivsize = CTR_RFC3686_IV_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ .geniv = "seqiv",
+ },
+ },
+#endif
+#endif
+#ifdef HAS_LRW
+}, {
+ .cra_name = "lrw(aes)",
+ .cra_driver_name = "lrw-aes-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_aes_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_lrw_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ },
+ },
+#endif
+#ifdef HAS_PCBC
+}, {
+ .cra_name = "pcbc(aes)",
+ .cra_driver_name = "pcbc-aes-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_aes_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_pcbc_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ },
+ },
#endif
+#ifdef HAS_XTS
+}, {
+ .cra_name = "xts(aes)",
+ .cra_driver_name = "xts-aes-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_aes_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_xts_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ },
+ },
+#endif
+} };
static const struct x86_cpu_id aesni_cpu_id[] = {
@@ -1263,120 +1202,24 @@ MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
static int __init aesni_init(void)
{
- int err;
+ int err, i;
if (!x86_match_cpu(aesni_cpu_id))
return -ENODEV;
- if ((err = crypto_fpu_init()))
- goto fpu_err;
- if ((err = crypto_register_alg(&aesni_alg)))
- goto aes_err;
- if ((err = crypto_register_alg(&__aesni_alg)))
- goto __aes_err;
- if ((err = crypto_register_alg(&blk_ecb_alg)))
- goto blk_ecb_err;
- if ((err = crypto_register_alg(&blk_cbc_alg)))
- goto blk_cbc_err;
- if ((err = crypto_register_alg(&ablk_ecb_alg)))
- goto ablk_ecb_err;
- if ((err = crypto_register_alg(&ablk_cbc_alg)))
- goto ablk_cbc_err;
-#ifdef CONFIG_X86_64
- if ((err = crypto_register_alg(&blk_ctr_alg)))
- goto blk_ctr_err;
- if ((err = crypto_register_alg(&ablk_ctr_alg)))
- goto ablk_ctr_err;
- if ((err = crypto_register_alg(&__rfc4106_alg)))
- goto __aead_gcm_err;
- if ((err = crypto_register_alg(&rfc4106_alg)))
- goto aead_gcm_err;
-#ifdef HAS_CTR
- if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg)))
- goto ablk_rfc3686_ctr_err;
-#endif
-#endif
-#ifdef HAS_LRW
- if ((err = crypto_register_alg(&ablk_lrw_alg)))
- goto ablk_lrw_err;
-#endif
-#ifdef HAS_PCBC
- if ((err = crypto_register_alg(&ablk_pcbc_alg)))
- goto ablk_pcbc_err;
-#endif
-#ifdef HAS_XTS
- if ((err = crypto_register_alg(&ablk_xts_alg)))
- goto ablk_xts_err;
-#endif
- return err;
+ err = crypto_fpu_init();
+ if (err)
+ return err;
-#ifdef HAS_XTS
-ablk_xts_err:
-#endif
-#ifdef HAS_PCBC
- crypto_unregister_alg(&ablk_pcbc_alg);
-ablk_pcbc_err:
-#endif
-#ifdef HAS_LRW
- crypto_unregister_alg(&ablk_lrw_alg);
-ablk_lrw_err:
-#endif
-#ifdef CONFIG_X86_64
-#ifdef HAS_CTR
- crypto_unregister_alg(&ablk_rfc3686_ctr_alg);
-ablk_rfc3686_ctr_err:
-#endif
- crypto_unregister_alg(&rfc4106_alg);
-aead_gcm_err:
- crypto_unregister_alg(&__rfc4106_alg);
-__aead_gcm_err:
- crypto_unregister_alg(&ablk_ctr_alg);
-ablk_ctr_err:
- crypto_unregister_alg(&blk_ctr_alg);
-blk_ctr_err:
-#endif
- crypto_unregister_alg(&ablk_cbc_alg);
-ablk_cbc_err:
- crypto_unregister_alg(&ablk_ecb_alg);
-ablk_ecb_err:
- crypto_unregister_alg(&blk_cbc_alg);
-blk_cbc_err:
- crypto_unregister_alg(&blk_ecb_alg);
-blk_ecb_err:
- crypto_unregister_alg(&__aesni_alg);
-__aes_err:
- crypto_unregister_alg(&aesni_alg);
-aes_err:
-fpu_err:
- return err;
+ for (i = 0; i < ARRAY_SIZE(aesni_algs); i++)
+ INIT_LIST_HEAD(&aesni_algs[i].cra_list);
+
+ return crypto_register_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
}
static void __exit aesni_exit(void)
{
-#ifdef HAS_XTS
- crypto_unregister_alg(&ablk_xts_alg);
-#endif
-#ifdef HAS_PCBC
- crypto_unregister_alg(&ablk_pcbc_alg);
-#endif
-#ifdef HAS_LRW
- crypto_unregister_alg(&ablk_lrw_alg);
-#endif
-#ifdef CONFIG_X86_64
-#ifdef HAS_CTR
- crypto_unregister_alg(&ablk_rfc3686_ctr_alg);
-#endif
- crypto_unregister_alg(&rfc4106_alg);
- crypto_unregister_alg(&__rfc4106_alg);
- crypto_unregister_alg(&ablk_ctr_alg);
- crypto_unregister_alg(&blk_ctr_alg);
-#endif
- crypto_unregister_alg(&ablk_cbc_alg);
- crypto_unregister_alg(&ablk_ecb_alg);
- crypto_unregister_alg(&blk_cbc_alg);
- crypto_unregister_alg(&blk_ecb_alg);
- crypto_unregister_alg(&__aesni_alg);
- crypto_unregister_alg(&aesni_alg);
+ crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
crypto_fpu_exit();
}
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index a69245ba27e..673ac9b63d6 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -38,7 +38,7 @@
int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
{
int err = 0;
- bool ia32 = is_ia32_task();
+ bool ia32 = test_thread_flag(TIF_IA32);
if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
return -EFAULT;
@@ -67,6 +67,10 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
switch (from->si_code >> 16) {
case __SI_FAULT >> 16:
break;
+ case __SI_SYS >> 16:
+ put_user_ex(from->si_syscall, &to->si_syscall);
+ put_user_ex(from->si_arch, &to->si_arch);
+ break;
case __SI_CHLD >> 16:
if (ia32) {
put_user_ex(from->si_utime, &to->si_utime);
@@ -127,18 +131,8 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
asmlinkage long sys32_sigsuspend(int history0, int history1, old_sigset_t mask)
{
sigset_t blocked;
-
- current->saved_sigmask = current->blocked;
-
- mask &= _BLOCKABLE;
siginitset(&blocked, mask);
- set_current_blocked(&blocked);
-
- current->state = TASK_INTERRUPTIBLE;
- schedule();
-
- set_restore_sigmask();
- return -ERESTARTNOHAND;
+ return sigsuspend(&blocked);
}
asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr,
@@ -279,7 +273,6 @@ asmlinkage long sys32_sigreturn(struct pt_regs *regs)
sizeof(frame->extramask))))
goto badframe;
- sigdelsetmask(&set, ~_BLOCKABLE);
set_current_blocked(&set);
if (ia32_restore_sigcontext(regs, &frame->sc, &ax))
@@ -305,7 +298,6 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
- sigdelsetmask(&set, ~_BLOCKABLE);
set_current_blocked(&set);
if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index e3e734005e1..20e5f7ba0e6 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -13,6 +13,7 @@
#include <asm/thread_info.h>
#include <asm/segment.h>
#include <asm/irqflags.h>
+#include <asm/asm.h>
#include <linux/linkage.h>
#include <linux/err.h>
@@ -146,9 +147,7 @@ ENTRY(ia32_sysenter_target)
/* no need to do an access_ok check here because rbp has been
32bit zero extended */
1: movl (%rbp),%ebp
- .section __ex_table,"a"
- .quad 1b,ia32_badarg
- .previous
+ _ASM_EXTABLE(1b,ia32_badarg)
orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
CFI_REMEMBER_STATE
@@ -303,9 +302,7 @@ ENTRY(ia32_cstar_target)
32bit zero extended */
/* hardware stack frame is complete now */
1: movl (%r8),%r9d
- .section __ex_table,"a"
- .quad 1b,ia32_badarg
- .previous
+ _ASM_EXTABLE(1b,ia32_badarg)
orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
CFI_REMEMBER_STATE
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index aec2202a596..4540bece094 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -71,8 +71,8 @@ static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)
{
typeof(ubuf->st_uid) uid = 0;
typeof(ubuf->st_gid) gid = 0;
- SET_UID(uid, stat->uid);
- SET_GID(gid, stat->gid);
+ SET_UID(uid, from_kuid_munged(current_user_ns(), stat->uid));
+ SET_GID(gid, from_kgid_munged(current_user_ns(), stat->gid));
if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct stat64)) ||
__put_user(huge_encode_dev(stat->dev), &ubuf->st_dev) ||
__put_user(stat->ino, &ubuf->__st_ino) ||
@@ -287,11 +287,6 @@ asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act,
return ret;
}
-asmlinkage long sys32_alarm(unsigned int seconds)
-{
- return alarm_setitimer(seconds);
-}
-
asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr,
int options)
{
@@ -300,11 +295,6 @@ asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr,
/* 32-bit timeval and related flotsam. */
-asmlinkage long sys32_sysfs(int option, u32 arg1, u32 arg2)
-{
- return sys_sysfs(option, arg1, arg2);
-}
-
asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid,
struct compat_timespec __user *interval)
{
@@ -375,19 +365,6 @@ asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf,
}
-asmlinkage long sys32_personality(unsigned long personality)
-{
- int ret;
-
- if (personality(current->personality) == PER_LINUX32 &&
- personality == PER_LINUX)
- personality = PER_LINUX32;
- ret = sys_personality(personality);
- if (ret == PER_LINUX32)
- ret = PER_LINUX;
- return ret;
-}
-
asmlinkage long sys32_sendfile(int out_fd, int in_fd,
compat_off_t __user *offset, s32 count)
{
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 610001d385d..0c44630d178 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -29,7 +29,7 @@
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/mpspec.h>
-#include <asm/trampoline.h>
+#include <asm/realmode.h>
#define COMPILER_DEPENDENT_INT64 long long
#define COMPILER_DEPENDENT_UINT64 unsigned long long
@@ -117,11 +117,8 @@ static inline void acpi_disable_pci(void)
/* Low-level suspend routine. */
extern int acpi_suspend_lowlevel(void);
-extern const unsigned char acpi_wakeup_code[];
-#define acpi_wakeup_address (__pa(TRAMPOLINE_SYM(acpi_wakeup_code)))
-
-/* early initialization routine */
-extern void acpi_reserve_wakeup_memory(void);
+/* Physical address to resume after wakeup */
+#define acpi_wakeup_address ((unsigned long)(real_mode_header->wakeup_start))
/*
* Check if the CPU can handle C2 and deeper
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index d8541017126..eaff4790ed9 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -138,6 +138,11 @@ static inline void native_apic_msr_write(u32 reg, u32 v)
wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0);
}
+static inline void native_apic_msr_eoi_write(u32 reg, u32 v)
+{
+ wrmsr(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK, 0);
+}
+
static inline u32 native_apic_msr_read(u32 reg)
{
u64 msr;
@@ -351,6 +356,14 @@ struct apic {
/* apic ops */
u32 (*read)(u32 reg);
void (*write)(u32 reg, u32 v);
+ /*
+ * ->eoi_write() has the same signature as ->write().
+ *
+ * Drivers can support both ->eoi_write() and ->write() by passing the same
+ * callback value. Kernel can override ->eoi_write() and fall back
+ * on write for EOI.
+ */
+ void (*eoi_write)(u32 reg, u32 v);
u64 (*icr_read)(void);
void (*icr_write)(u32 low, u32 high);
void (*wait_icr_idle)(void);
@@ -426,6 +439,11 @@ static inline void apic_write(u32 reg, u32 val)
apic->write(reg, val);
}
+static inline void apic_eoi(void)
+{
+ apic->eoi_write(APIC_EOI, APIC_EOI_ACK);
+}
+
static inline u64 apic_icr_read(void)
{
return apic->icr_read();
@@ -450,6 +468,7 @@ static inline u32 safe_apic_wait_icr_idle(void)
static inline u32 apic_read(u32 reg) { return 0; }
static inline void apic_write(u32 reg, u32 val) { }
+static inline void apic_eoi(void) { }
static inline u64 apic_icr_read(void) { return 0; }
static inline void apic_icr_write(u32 low, u32 high) { }
static inline void apic_wait_icr_idle(void) { }
@@ -463,9 +482,7 @@ static inline void ack_APIC_irq(void)
* ack_APIC_irq() actually gets compiled as a single instruction
* ... yummie.
*/
-
- /* Docs say use 0 for future compatibility */
- apic_write(APIC_EOI, 0);
+ apic_eoi();
}
static inline unsigned default_get_apic_id(unsigned long x)
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 134bba00df0..c46bb99d5fb 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -37,7 +37,7 @@
#define APIC_ARBPRI_MASK 0xFFu
#define APIC_PROCPRI 0xA0
#define APIC_EOI 0xB0
-#define APIC_EIO_ACK 0x0
+#define APIC_EOI_ACK 0x0 /* Docs say 0 for future compat. */
#define APIC_RRR 0xC0
#define APIC_LDR 0xD0
#define APIC_LDR_MASK (0xFFu << 24)
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 9412d6558c8..1c2d247f65c 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -4,11 +4,9 @@
#ifdef __ASSEMBLY__
# define __ASM_FORM(x) x
# define __ASM_FORM_COMMA(x) x,
-# define __ASM_EX_SEC .section __ex_table, "a"
#else
# define __ASM_FORM(x) " " #x " "
# define __ASM_FORM_COMMA(x) " " #x ","
-# define __ASM_EX_SEC " .section __ex_table,\"a\"\n"
#endif
#ifdef CONFIG_X86_32
@@ -42,17 +40,33 @@
/* Exception table entry */
#ifdef __ASSEMBLY__
-# define _ASM_EXTABLE(from,to) \
- __ASM_EX_SEC ; \
- _ASM_ALIGN ; \
- _ASM_PTR from , to ; \
- .previous
+# define _ASM_EXTABLE(from,to) \
+ .pushsection "__ex_table","a" ; \
+ .balign 8 ; \
+ .long (from) - . ; \
+ .long (to) - . ; \
+ .popsection
+
+# define _ASM_EXTABLE_EX(from,to) \
+ .pushsection "__ex_table","a" ; \
+ .balign 8 ; \
+ .long (from) - . ; \
+ .long (to) - . + 0x7ffffff0 ; \
+ .popsection
#else
-# define _ASM_EXTABLE(from,to) \
- __ASM_EX_SEC \
- _ASM_ALIGN "\n" \
- _ASM_PTR #from "," #to "\n" \
- " .previous\n"
+# define _ASM_EXTABLE(from,to) \
+ " .pushsection \"__ex_table\",\"a\"\n" \
+ " .balign 8\n" \
+ " .long (" #from ") - .\n" \
+ " .long (" #to ") - .\n" \
+ " .popsection\n"
+
+# define _ASM_EXTABLE_EX(from,to) \
+ " .pushsection \"__ex_table\",\"a\"\n" \
+ " .balign 8\n" \
+ " .long (" #from ") - .\n" \
+ " .long (" #to ") - . + 0x7ffffff0\n" \
+ " .popsection\n"
#endif
#endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 198119910da..b154de75c90 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -63,7 +63,7 @@ ATOMIC64_DECL(add_unless);
/**
* atomic64_cmpxchg - cmpxchg atomic64 variable
- * @p: pointer to type atomic64_t
+ * @v: pointer to type atomic64_t
* @o: expected value
* @n: new value
*
@@ -98,7 +98,7 @@ static inline long long atomic64_xchg(atomic64_t *v, long long n)
/**
* atomic64_set - set atomic64 variable
* @v: pointer to type atomic64_t
- * @n: value to assign
+ * @i: value to assign
*
* Atomically sets the value of @v to @n.
*/
@@ -200,7 +200,7 @@ static inline long long atomic64_sub(long long i, atomic64_t *v)
* atomic64_sub_and_test - subtract value from variable and test result
* @i: integer value to subtract
* @v: pointer to type atomic64_t
- *
+ *
* Atomically subtracts @i from @v and returns
* true if the result is zero, or false for all
* other cases.
@@ -224,9 +224,9 @@ static inline void atomic64_inc(atomic64_t *v)
/**
* atomic64_dec - decrement atomic64 variable
- * @ptr: pointer to type atomic64_t
+ * @v: pointer to type atomic64_t
*
- * Atomically decrements @ptr by 1.
+ * Atomically decrements @v by 1.
*/
static inline void atomic64_dec(atomic64_t *v)
{
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index b97596e2b68..a6983b27722 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -15,6 +15,8 @@
#include <linux/compiler.h>
#include <asm/alternative.h>
+#define BIT_64(n) (U64_C(1) << (n))
+
/*
* These have to be done with inline assembly: that way the bit-setting
* is guaranteed to be atomic. All bit operations return 0 if the bit
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 5e1a2eef3e7..b13fe63bdc5 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -19,7 +19,7 @@
#ifdef CONFIG_X86_64
#define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT
#else
-#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_ORDER)
+#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_SIZE_ORDER)
#endif
#define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2)
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h
index 2f90c51cc49..eb45aa6b1f2 100644
--- a/arch/x86/include/asm/bootparam.h
+++ b/arch/x86/include/asm/bootparam.h
@@ -112,7 +112,8 @@ struct boot_params {
__u8 e820_entries; /* 0x1e8 */
__u8 eddbuf_entries; /* 0x1e9 */
__u8 edd_mbr_sig_buf_entries; /* 0x1ea */
- __u8 _pad6[6]; /* 0x1eb */
+ __u8 kbd_status; /* 0x1eb */
+ __u8 _pad6[5]; /* 0x1ec */
struct setup_header hdr; /* setup header */ /* 0x1f1 */
__u8 _pad7[0x290-0x1f1-sizeof(struct setup_header)];
__u32 edd_mbr_sig_buffer[EDD_MBR_SIG_MAX]; /* 0x290 */
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index d6805798d6f..fedf32b73e6 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -229,7 +229,7 @@ static inline void __user *arch_compat_alloc_user_space(long len)
sp = task_pt_regs(current)->sp;
} else {
/* -128 for the x32 ABI redzone */
- sp = percpu_read(old_rsp) - 128;
+ sp = this_cpu_read(old_rsp) - 128;
}
return (void __user *)round_down(sp - len, 16);
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 340ee49961a..f91e80f4f18 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -176,7 +176,7 @@
#define X86_FEATURE_XSAVEOPT (7*32+ 4) /* Optimized Xsave */
#define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */
#define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */
-#define X86_FEATURE_DTS (7*32+ 7) /* Digital Thermal Sensor */
+#define X86_FEATURE_DTHERM (7*32+ 7) /* Digital Thermal Sensor */
#define X86_FEATURE_HW_PSTATE (7*32+ 8) /* AMD HW-PState */
/* Virtualization flags: Linux defined, word 8 */
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index 4d447b732d8..9476c04ee63 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -11,7 +11,7 @@ DECLARE_PER_CPU(struct task_struct *, current_task);
static __always_inline struct task_struct *get_current(void)
{
- return percpu_read_stable(current_task);
+ return this_cpu_read_stable(current_task);
}
#define current get_current()
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index e95822d683f..8bf1c06070d 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -6,6 +6,7 @@
#include <asm/mmu.h>
#include <linux/smp.h>
+#include <linux/percpu.h>
static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *info)
{
diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index 63a2a03d7d5..93e1c55f14a 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -5,8 +5,8 @@ struct dev_archdata {
#ifdef CONFIG_ACPI
void *acpi_handle;
#endif
-#ifdef CONFIG_X86_64
-struct dma_map_ops *dma_ops;
+#ifdef CONFIG_X86_DEV_DMA_OPS
+ struct dma_map_ops *dma_ops;
#endif
#if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU)
void *iommu; /* hook for IOMMU specific extension */
diff --git a/arch/x86/include/asm/dma-contiguous.h b/arch/x86/include/asm/dma-contiguous.h
new file mode 100644
index 00000000000..c0924165997
--- /dev/null
+++ b/arch/x86/include/asm/dma-contiguous.h
@@ -0,0 +1,13 @@
+#ifndef ASMX86_DMA_CONTIGUOUS_H
+#define ASMX86_DMA_CONTIGUOUS_H
+
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+#include <asm-generic/dma-contiguous.h>
+
+static inline void
+dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { }
+
+#endif
+#endif
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 4b4331d7193..f7b4c7903e7 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -13,6 +13,7 @@
#include <asm/io.h>
#include <asm/swiotlb.h>
#include <asm-generic/dma-coherent.h>
+#include <linux/dma-contiguous.h>
#ifdef CONFIG_ISA
# define ISA_DMA_BIT_MASK DMA_BIT_MASK(24)
@@ -30,7 +31,7 @@ extern struct dma_map_ops *dma_ops;
static inline struct dma_map_ops *get_dma_ops(struct device *dev)
{
-#ifdef CONFIG_X86_32
+#ifndef CONFIG_X86_DEV_DMA_OPS
return dma_ops;
#else
if (unlikely(!dev) || !dev->archdata.dma_ops)
@@ -62,6 +63,16 @@ extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addr, gfp_t flag,
struct dma_attrs *attrs);
+extern void dma_generic_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_addr,
+ struct dma_attrs *attrs);
+
+#ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */
+extern bool dma_capable(struct device *dev, dma_addr_t addr, size_t size);
+extern dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
+extern phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
+#else
+
static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
{
if (!dev->dma_mask)
@@ -79,6 +90,7 @@ static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
{
return daddr;
}
+#endif /* CONFIG_X86_DMA_REMAP */
static inline void
dma_cache_sync(struct device *dev, void *vaddr, size_t size,
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 4fa88154e4d..75f4c6d6a33 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -290,14 +290,14 @@ static inline int __thread_has_fpu(struct task_struct *tsk)
static inline void __thread_clear_has_fpu(struct task_struct *tsk)
{
tsk->thread.fpu.has_fpu = 0;
- percpu_write(fpu_owner_task, NULL);
+ this_cpu_write(fpu_owner_task, NULL);
}
/* Must be paired with a 'clts' before! */
static inline void __thread_set_has_fpu(struct task_struct *tsk)
{
tsk->thread.fpu.has_fpu = 1;
- percpu_write(fpu_owner_task, tsk);
+ this_cpu_write(fpu_owner_task, tsk);
}
/*
@@ -344,7 +344,7 @@ typedef struct { int preload; } fpu_switch_t;
*/
static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
{
- return new == percpu_read_stable(fpu_owner_task) &&
+ return new == this_cpu_read_stable(fpu_owner_task) &&
cpu == new->thread.fpu.last_cpu;
}
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 268c783ab1c..b0767bc0874 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -34,6 +34,7 @@
#ifndef __ASSEMBLY__
extern void mcount(void);
+extern atomic_t modifying_ftrace_code;
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
@@ -50,6 +51,8 @@ struct dyn_arch_ftrace {
/* No extra data needed for x86 */
};
+int ftrace_int3_handler(struct pt_regs *regs);
+
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* __ASSEMBLY__ */
#endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/x86/include/asm/gpio.h b/arch/x86/include/asm/gpio.h
index 91d915a6525..b3799d88ffc 100644
--- a/arch/x86/include/asm/gpio.h
+++ b/arch/x86/include/asm/gpio.h
@@ -1,53 +1,4 @@
-/*
- * Generic GPIO API implementation for x86.
- *
- * Derived from the generic GPIO API for powerpc:
- *
- * Copyright (c) 2007-2008 MontaVista Software, Inc.
- *
- * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef _ASM_X86_GPIO_H
-#define _ASM_X86_GPIO_H
-
-#include <asm-generic/gpio.h>
-
-#ifdef CONFIG_GPIOLIB
-
-/*
- * Just call gpiolib.
- */
-static inline int gpio_get_value(unsigned int gpio)
-{
- return __gpio_get_value(gpio);
-}
-
-static inline void gpio_set_value(unsigned int gpio, int value)
-{
- __gpio_set_value(gpio, value);
-}
-
-static inline int gpio_cansleep(unsigned int gpio)
-{
- return __gpio_cansleep(gpio);
-}
-
-static inline int gpio_to_irq(unsigned int gpio)
-{
- return __gpio_to_irq(gpio);
-}
-
-static inline int irq_to_gpio(unsigned int irq)
-{
- return -EINVAL;
-}
-
-#endif /* CONFIG_GPIOLIB */
-
-#endif /* _ASM_X86_GPIO_H */
+#ifndef __LINUX_GPIO_H
+#warning Include linux/gpio.h instead of asm/gpio.h
+#include <linux/gpio.h>
+#endif
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 382f75d735f..d3895dbf4dd 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -35,14 +35,15 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
#define __ARCH_IRQ_STAT
-#define inc_irq_stat(member) percpu_inc(irq_stat.member)
+#define inc_irq_stat(member) this_cpu_inc(irq_stat.member)
-#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending)
+#define local_softirq_pending() this_cpu_read(irq_stat.__softirq_pending)
#define __ARCH_SET_SOFTIRQ_PENDING
-#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x))
-#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x))
+#define set_softirq_pending(x) \
+ this_cpu_write(irq_stat.__softirq_pending, (x))
+#define or_softirq_pending(x) this_cpu_or(irq_stat.__softirq_pending, (x))
extern void ack_bad_irq(unsigned int irq);
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index ee52760549f..b04cbdb138c 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -144,6 +144,12 @@ typedef struct compat_siginfo {
int _band; /* POLL_IN, POLL_OUT, POLL_MSG */
int _fd;
} _sigpoll;
+
+ struct {
+ unsigned int _call_addr; /* calling insn */
+ int _syscall; /* triggering system call number */
+ unsigned int _arch; /* AUDIT_ARCH_* of syscall */
+ } _sigsys;
} _sifields;
} compat_siginfo_t;
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 2c4943de515..73d8c5398ea 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -5,7 +5,7 @@
#include <asm/mpspec.h>
#include <asm/apicdef.h>
#include <asm/irq_vectors.h>
-
+#include <asm/x86_init.h>
/*
* Intel IO-APIC support for SMP and UP systems.
*
@@ -21,15 +21,6 @@
#define IO_APIC_REDIR_LEVEL_TRIGGER (1 << 15)
#define IO_APIC_REDIR_MASKED (1 << 16)
-struct io_apic_ops {
- void (*init) (void);
- unsigned int (*read) (unsigned int apic, unsigned int reg);
- void (*write) (unsigned int apic, unsigned int reg, unsigned int value);
- void (*modify)(unsigned int apic, unsigned int reg, unsigned int value);
-};
-
-void __init set_io_apic_ops(const struct io_apic_ops *);
-
/*
* The structure of the IO-APIC:
*/
@@ -156,7 +147,6 @@ struct io_apic_irq_attr;
extern int io_apic_set_pci_routing(struct device *dev, int irq,
struct io_apic_irq_attr *irq_attr);
void setup_IO_APIC_irq_extra(u32 gsi);
-extern void ioapic_and_gsi_init(void);
extern void ioapic_insert_resources(void);
int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr);
@@ -185,12 +175,29 @@ extern void mp_save_irq(struct mpc_intsrc *m);
extern void disable_ioapic_support(void);
+extern void __init native_io_apic_init_mappings(void);
+extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg);
+extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val);
+extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val);
+
+static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+{
+ return x86_io_apic_ops.read(apic, reg);
+}
+
+static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+{
+ x86_io_apic_ops.write(apic, reg, value);
+}
+static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
+{
+ x86_io_apic_ops.modify(apic, reg, value);
+}
#else /* !CONFIG_X86_IO_APIC */
#define io_apic_assign_pci_irqs 0
#define setup_ioapic_ids_from_mpc x86_init_noop
static const int timer_through_8259 = 0;
-static inline void ioapic_and_gsi_init(void) { }
static inline void ioapic_insert_resources(void) { }
#define gsi_top (NR_IRQS_LEGACY)
static inline int mp_find_ioapic(u32 gsi) { return 0; }
@@ -212,6 +219,10 @@ static inline int restore_ioapic_entries(void)
static inline void mp_save_irq(struct mpc_intsrc *m) { };
static inline void disable_ioapic_support(void) { }
+#define native_io_apic_init_mappings NULL
+#define native_io_apic_read NULL
+#define native_io_apic_write NULL
+#define native_io_apic_modify NULL
#endif
#endif /* _ASM_X86_IO_APIC_H */
diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h
index 77843225b7e..d82250b1deb 100644
--- a/arch/x86/include/asm/irq_regs.h
+++ b/arch/x86/include/asm/irq_regs.h
@@ -15,7 +15,7 @@ DECLARE_PER_CPU(struct pt_regs *, irq_regs);
static inline struct pt_regs *get_irq_regs(void)
{
- return percpu_read(irq_regs);
+ return this_cpu_read(irq_regs);
}
static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
@@ -23,7 +23,7 @@ static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
struct pt_regs *old_regs;
old_regs = get_irq_regs();
- percpu_write(irq_regs, new_regs);
+ this_cpu_write(irq_regs, new_regs);
return old_regs;
}
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 47d99934580..5fb9bbbd2f1 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -1,45 +1,101 @@
-#ifndef _ASM_X86_IRQ_REMAPPING_H
-#define _ASM_X86_IRQ_REMAPPING_H
+/*
+ * Copyright (C) 2012 Advanced Micro Devices, Inc.
+ * Author: Joerg Roedel <joerg.roedel@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * This header file contains the interface of the interrupt remapping code to
+ * the x86 interrupt management code.
+ */
-#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8)
+#ifndef __X86_IRQ_REMAPPING_H
+#define __X86_IRQ_REMAPPING_H
+
+#include <asm/io_apic.h>
#ifdef CONFIG_IRQ_REMAP
-static void irq_remap_modify_chip_defaults(struct irq_chip *chip);
-static inline void prepare_irte(struct irte *irte, int vector,
- unsigned int dest)
+
+extern int irq_remapping_enabled;
+
+extern void setup_irq_remapping_ops(void);
+extern int irq_remapping_supported(void);
+extern int irq_remapping_prepare(void);
+extern int irq_remapping_enable(void);
+extern void irq_remapping_disable(void);
+extern int irq_remapping_reenable(int);
+extern int irq_remap_enable_fault_handling(void);
+extern int setup_ioapic_remapped_entry(int irq,
+ struct IO_APIC_route_entry *entry,
+ unsigned int destination,
+ int vector,
+ struct io_apic_irq_attr *attr);
+extern int set_remapped_irq_affinity(struct irq_data *data,
+ const struct cpumask *mask,
+ bool force);
+extern void free_remapped_irq(int irq);
+extern void compose_remapped_msi_msg(struct pci_dev *pdev,
+ unsigned int irq, unsigned int dest,
+ struct msi_msg *msg, u8 hpet_id);
+extern int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
+extern int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
+ int index, int sub_handle);
+extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id);
+
+#else /* CONFIG_IRQ_REMAP */
+
+#define irq_remapping_enabled 0
+
+static inline void setup_irq_remapping_ops(void) { }
+static inline int irq_remapping_supported(void) { return 0; }
+static inline int irq_remapping_prepare(void) { return -ENODEV; }
+static inline int irq_remapping_enable(void) { return -ENODEV; }
+static inline void irq_remapping_disable(void) { }
+static inline int irq_remapping_reenable(int eim) { return -ENODEV; }
+static inline int irq_remap_enable_fault_handling(void) { return -ENODEV; }
+static inline int setup_ioapic_remapped_entry(int irq,
+ struct IO_APIC_route_entry *entry,
+ unsigned int destination,
+ int vector,
+ struct io_apic_irq_attr *attr)
+{
+ return -ENODEV;
+}
+static inline int set_remapped_irq_affinity(struct irq_data *data,
+ const struct cpumask *mask,
+ bool force)
{
- memset(irte, 0, sizeof(*irte));
-
- irte->present = 1;
- irte->dst_mode = apic->irq_dest_mode;
- /*
- * Trigger mode in the IRTE will always be edge, and for IO-APIC, the
- * actual level or edge trigger will be setup in the IO-APIC
- * RTE. This will help simplify level triggered irq migration.
- * For more details, see the comments (in io_apic.c) explainig IO-APIC
- * irq migration in the presence of interrupt-remapping.
- */
- irte->trigger_mode = 0;
- irte->dlvry_mode = apic->irq_delivery_mode;
- irte->vector = vector;
- irte->dest_id = IRTE_DEST(dest);
- irte->redir_hint = 1;
+ return 0;
}
-static inline bool irq_remapped(struct irq_cfg *cfg)
+static inline void free_remapped_irq(int irq) { }
+static inline void compose_remapped_msi_msg(struct pci_dev *pdev,
+ unsigned int irq, unsigned int dest,
+ struct msi_msg *msg, u8 hpet_id)
{
- return cfg->irq_2_iommu.iommu != NULL;
}
-#else
-static void prepare_irte(struct irte *irte, int vector, unsigned int dest)
+static inline int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec)
{
+ return -ENODEV;
}
-static inline bool irq_remapped(struct irq_cfg *cfg)
+static inline int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
+ int index, int sub_handle)
{
- return false;
+ return -ENODEV;
}
-static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip)
+static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
{
+ return -ENODEV;
}
-#endif
+#endif /* CONFIG_IRQ_REMAP */
-#endif /* _ASM_X86_IRQ_REMAPPING_H */
+#endif /* __X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/include/asm/kbdleds.h b/arch/x86/include/asm/kbdleds.h
new file mode 100644
index 00000000000..f27ac5ff597
--- /dev/null
+++ b/arch/x86/include/asm/kbdleds.h
@@ -0,0 +1,17 @@
+#ifndef _ASM_X86_KBDLEDS_H
+#define _ASM_X86_KBDLEDS_H
+
+/*
+ * Some laptops take the 789uiojklm,. keys as number pad when NumLock is on.
+ * This seems a good reason to start with NumLock off. That's why on X86 we
+ * ask the bios for the correct state.
+ */
+
+#include <asm/setup.h>
+
+static inline int kbd_defleds(void)
+{
+ return boot_params.kbd_status & 0x20 ? (1 << VC_NUMLOCK) : 0;
+}
+
+#endif /* _ASM_X86_KBDLEDS_H */
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index d73f1571bde..2c37aadcbc3 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -24,7 +24,6 @@ enum die_val {
extern void printk_address(unsigned long address, int reliable);
extern void die(const char *, struct pt_regs *,long);
extern int __must_check __die(const char *, struct pt_regs *, long);
-extern void show_registers(struct pt_regs *regs);
extern void show_trace(struct task_struct *t, struct pt_regs *regs,
unsigned long *sp, unsigned long bp);
extern void __show_regs(struct pt_regs *regs, int all);
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index c222e1a1b12..1ac46c22dd5 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -200,7 +200,7 @@ typedef u32 __attribute__((vector_size(16))) sse128_t;
/* Type, address-of, and value of an instruction's operand. */
struct operand {
- enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_NONE } type;
+ enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_MM, OP_NONE } type;
unsigned int bytes;
union {
unsigned long orig_val;
@@ -213,12 +213,14 @@ struct operand {
unsigned seg;
} mem;
unsigned xmm;
+ unsigned mm;
} addr;
union {
unsigned long val;
u64 val64;
char valptr[sizeof(unsigned long) + 2];
sse128_t vec_val;
+ u64 mm_val;
};
};
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e216ba066e7..db7c1f2709a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -27,6 +27,7 @@
#include <asm/desc.h>
#include <asm/mtrr.h>
#include <asm/msr-index.h>
+#include <asm/asm.h>
#define KVM_MAX_VCPUS 254
#define KVM_SOFT_MAX_VCPUS 160
@@ -172,6 +173,9 @@ enum {
#define DR7_FIXED_1 0x00000400
#define DR7_VOLATILE 0xffff23ff
+/* apic attention bits */
+#define KVM_APIC_CHECK_VAPIC 0
+
/*
* We don't want allocation failures within the mmu code, so we preallocate
* enough memory for a single page fault in a cache.
@@ -237,8 +241,6 @@ struct kvm_mmu_page {
#endif
int write_flooding_count;
-
- struct rcu_head rcu;
};
struct kvm_pio_request {
@@ -337,6 +339,7 @@ struct kvm_vcpu_arch {
u64 efer;
u64 apic_base;
struct kvm_lapic *apic; /* kernel irqchip context */
+ unsigned long apic_attention;
int32_t apic_arb_prio;
int mp_state;
int sipi_vector;
@@ -536,8 +539,6 @@ struct kvm_arch {
u64 hv_guest_os_id;
u64 hv_hypercall;
- atomic_t reader_counter;
-
#ifdef CONFIG_KVM_MMU_AUDIT
int audit_point;
#endif
@@ -712,8 +713,9 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
-int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn,
- struct kvm_memory_slot *slot);
+void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ gfn_t gfn_offset, unsigned long mask);
void kvm_mmu_zap_all(struct kvm *kvm);
unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
@@ -921,9 +923,7 @@ extern bool kvm_rebooting;
__ASM_SIZE(push) " $666b \n\t" \
"call kvm_spurious_fault \n\t" \
".popsection \n\t" \
- ".pushsection __ex_table, \"a\" \n\t" \
- _ASM_PTR " 666b, 667b \n\t" \
- ".popsection"
+ _ASM_EXTABLE(666b, 667b)
#define __kvm_handle_fault_on_reboot(insn) \
____kvm_handle_fault_on_reboot(insn, "")
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 183922e13de..63ab1661d00 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -95,6 +95,14 @@ struct kvm_vcpu_pv_apf_data {
extern void kvmclock_init(void);
extern int kvm_register_clock(char *txt);
+#ifdef CONFIG_KVM_CLOCK
+bool kvm_check_and_clear_guest_paused(void);
+#else
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+ return false;
+}
+#endif /* CONFIG_KVMCLOCK */
/* This instruction is vmcall. On non-VT architectures, it will generate a
* trap that we will then rewrite to the appropriate instruction.
@@ -173,14 +181,16 @@ static inline int kvm_para_available(void)
if (boot_cpu_data.cpuid_level < 0)
return 0; /* So we don't blow up on old processors */
- cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
- memcpy(signature + 0, &ebx, 4);
- memcpy(signature + 4, &ecx, 4);
- memcpy(signature + 8, &edx, 4);
- signature[12] = 0;
+ if (cpu_has_hypervisor) {
+ cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
+ memcpy(signature + 0, &ebx, 4);
+ memcpy(signature + 4, &ecx, 4);
+ memcpy(signature + 8, &edx, 4);
+ signature[12] = 0;
- if (strcmp(signature, "KVMKVMKVM") == 0)
- return 1;
+ if (strcmp(signature, "KVMKVMKVM") == 0)
+ return 1;
+ }
return 0;
}
diff --git a/arch/x86/include/asm/mca.h b/arch/x86/include/asm/mca.h
deleted file mode 100644
index eedbb6cc1ef..00000000000
--- a/arch/x86/include/asm/mca.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8 -*- */
-
-/* Platform specific MCA defines */
-#ifndef _ASM_X86_MCA_H
-#define _ASM_X86_MCA_H
-
-/* Maximal number of MCA slots - actually, some machines have less, but
- * they all have sufficient number of POS registers to cover 8.
- */
-#define MCA_MAX_SLOT_NR 8
-
-/* Most machines have only one MCA bus. The only multiple bus machines
- * I know have at most two */
-#define MAX_MCA_BUSSES 2
-
-#define MCA_PRIMARY_BUS 0
-#define MCA_SECONDARY_BUS 1
-
-/* Dummy slot numbers on primary MCA for integrated functions */
-#define MCA_INTEGSCSI (MCA_MAX_SLOT_NR)
-#define MCA_INTEGVIDEO (MCA_MAX_SLOT_NR+1)
-#define MCA_MOTHERBOARD (MCA_MAX_SLOT_NR+2)
-
-/* Dummy POS values for integrated functions */
-#define MCA_DUMMY_POS_START 0x10000
-#define MCA_INTEGSCSI_POS (MCA_DUMMY_POS_START+1)
-#define MCA_INTEGVIDEO_POS (MCA_DUMMY_POS_START+2)
-#define MCA_MOTHERBOARD_POS (MCA_DUMMY_POS_START+3)
-
-/* MCA registers */
-
-#define MCA_MOTHERBOARD_SETUP_REG 0x94
-#define MCA_ADAPTER_SETUP_REG 0x96
-#define MCA_POS_REG(n) (0x100+(n))
-
-#define MCA_ENABLED 0x01 /* POS 2, set if adapter enabled */
-
-/* Max number of adapters, including both slots and various integrated
- * things.
- */
-#define MCA_NUMADAPTERS (MCA_MAX_SLOT_NR+3)
-
-#endif /* _ASM_X86_MCA_H */
diff --git a/arch/x86/include/asm/mca_dma.h b/arch/x86/include/asm/mca_dma.h
deleted file mode 100644
index 45271aef82d..00000000000
--- a/arch/x86/include/asm/mca_dma.h
+++ /dev/null
@@ -1,201 +0,0 @@
-#ifndef _ASM_X86_MCA_DMA_H
-#define _ASM_X86_MCA_DMA_H
-
-#include <asm/io.h>
-#include <linux/ioport.h>
-
-/*
- * Microchannel specific DMA stuff. DMA on an MCA machine is fairly similar to
- * standard PC dma, but it certainly has its quirks. DMA register addresses
- * are in a different place and there are some added functions. Most of this
- * should be pretty obvious on inspection. Note that the user must divide
- * count by 2 when using 16-bit dma; that is not handled by these functions.
- *
- * Ramen Noodles are yummy.
- *
- * 1998 Tymm Twillman <tymm@computer.org>
- */
-
-/*
- * Registers that are used by the DMA controller; FN is the function register
- * (tell the controller what to do) and EXE is the execution register (how
- * to do it)
- */
-
-#define MCA_DMA_REG_FN 0x18
-#define MCA_DMA_REG_EXE 0x1A
-
-/*
- * Functions that the DMA controller can do
- */
-
-#define MCA_DMA_FN_SET_IO 0x00
-#define MCA_DMA_FN_SET_ADDR 0x20
-#define MCA_DMA_FN_GET_ADDR 0x30
-#define MCA_DMA_FN_SET_COUNT 0x40
-#define MCA_DMA_FN_GET_COUNT 0x50
-#define MCA_DMA_FN_GET_STATUS 0x60
-#define MCA_DMA_FN_SET_MODE 0x70
-#define MCA_DMA_FN_SET_ARBUS 0x80
-#define MCA_DMA_FN_MASK 0x90
-#define MCA_DMA_FN_RESET_MASK 0xA0
-#define MCA_DMA_FN_MASTER_CLEAR 0xD0
-
-/*
- * Modes (used by setting MCA_DMA_FN_MODE in the function register)
- *
- * Note that the MODE_READ is read from memory (write to device), and
- * MODE_WRITE is vice-versa.
- */
-
-#define MCA_DMA_MODE_XFER 0x04 /* read by default */
-#define MCA_DMA_MODE_READ 0x04 /* same as XFER */
-#define MCA_DMA_MODE_WRITE 0x08 /* OR with MODE_XFER to use */
-#define MCA_DMA_MODE_IO 0x01 /* DMA from IO register */
-#define MCA_DMA_MODE_16 0x40 /* 16 bit xfers */
-
-
-/**
- * mca_enable_dma - channel to enable DMA on
- * @dmanr: DMA channel
- *
- * Enable the MCA bus DMA on a channel. This can be called from
- * IRQ context.
- */
-
-static inline void mca_enable_dma(unsigned int dmanr)
-{
- outb(MCA_DMA_FN_RESET_MASK | dmanr, MCA_DMA_REG_FN);
-}
-
-/**
- * mca_disble_dma - channel to disable DMA on
- * @dmanr: DMA channel
- *
- * Enable the MCA bus DMA on a channel. This can be called from
- * IRQ context.
- */
-
-static inline void mca_disable_dma(unsigned int dmanr)
-{
- outb(MCA_DMA_FN_MASK | dmanr, MCA_DMA_REG_FN);
-}
-
-/**
- * mca_set_dma_addr - load a 24bit DMA address
- * @dmanr: DMA channel
- * @a: 24bit bus address
- *
- * Load the address register in the DMA controller. This has a 24bit
- * limitation (16Mb).
- */
-
-static inline void mca_set_dma_addr(unsigned int dmanr, unsigned int a)
-{
- outb(MCA_DMA_FN_SET_ADDR | dmanr, MCA_DMA_REG_FN);
- outb(a & 0xff, MCA_DMA_REG_EXE);
- outb((a >> 8) & 0xff, MCA_DMA_REG_EXE);
- outb((a >> 16) & 0xff, MCA_DMA_REG_EXE);
-}
-
-/**
- * mca_get_dma_addr - load a 24bit DMA address
- * @dmanr: DMA channel
- *
- * Read the address register in the DMA controller. This has a 24bit
- * limitation (16Mb). The return is a bus address.
- */
-
-static inline unsigned int mca_get_dma_addr(unsigned int dmanr)
-{
- unsigned int addr;
-
- outb(MCA_DMA_FN_GET_ADDR | dmanr, MCA_DMA_REG_FN);
- addr = inb(MCA_DMA_REG_EXE);
- addr |= inb(MCA_DMA_REG_EXE) << 8;
- addr |= inb(MCA_DMA_REG_EXE) << 16;
-
- return addr;
-}
-
-/**
- * mca_set_dma_count - load a 16bit transfer count
- * @dmanr: DMA channel
- * @count: count
- *
- * Set the DMA count for this channel. This can be up to 64Kbytes.
- * Setting a count of zero will not do what you expect.
- */
-
-static inline void mca_set_dma_count(unsigned int dmanr, unsigned int count)
-{
- count--; /* transfers one more than count -- correct for this */
-
- outb(MCA_DMA_FN_SET_COUNT | dmanr, MCA_DMA_REG_FN);
- outb(count & 0xff, MCA_DMA_REG_EXE);
- outb((count >> 8) & 0xff, MCA_DMA_REG_EXE);
-}
-
-/**
- * mca_get_dma_residue - get the remaining bytes to transfer
- * @dmanr: DMA channel
- *
- * This function returns the number of bytes left to transfer
- * on this DMA channel.
- */
-
-static inline unsigned int mca_get_dma_residue(unsigned int dmanr)
-{
- unsigned short count;
-
- outb(MCA_DMA_FN_GET_COUNT | dmanr, MCA_DMA_REG_FN);
- count = 1 + inb(MCA_DMA_REG_EXE);
- count += inb(MCA_DMA_REG_EXE) << 8;
-
- return count;
-}
-
-/**
- * mca_set_dma_io - set the port for an I/O transfer
- * @dmanr: DMA channel
- * @io_addr: an I/O port number
- *
- * Unlike the ISA bus DMA controllers the DMA on MCA bus can transfer
- * with an I/O port target.
- */
-
-static inline void mca_set_dma_io(unsigned int dmanr, unsigned int io_addr)
-{
- /*
- * DMA from a port address -- set the io address
- */
-
- outb(MCA_DMA_FN_SET_IO | dmanr, MCA_DMA_REG_FN);
- outb(io_addr & 0xff, MCA_DMA_REG_EXE);
- outb((io_addr >> 8) & 0xff, MCA_DMA_REG_EXE);
-}
-
-/**
- * mca_set_dma_mode - set the DMA mode
- * @dmanr: DMA channel
- * @mode: mode to set
- *
- * The DMA controller supports several modes. The mode values you can
- * set are-
- *
- * %MCA_DMA_MODE_READ when reading from the DMA device.
- *
- * %MCA_DMA_MODE_WRITE to writing to the DMA device.
- *
- * %MCA_DMA_MODE_IO to do DMA to or from an I/O port.
- *
- * %MCA_DMA_MODE_16 to do 16bit transfers.
- */
-
-static inline void mca_set_dma_mode(unsigned int dmanr, unsigned int mode)
-{
- outb(MCA_DMA_FN_SET_MODE | dmanr, MCA_DMA_REG_FN);
- outb(mode, MCA_DMA_REG_EXE);
-}
-
-#endif /* _ASM_X86_MCA_DMA_H */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 69021528b43..cdbf3677610 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -25,8 +25,8 @@ void destroy_context(struct mm_struct *mm);
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
#ifdef CONFIG_SMP
- if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
- percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
+ if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
+ this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
#endif
}
@@ -37,8 +37,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
if (likely(prev != next)) {
#ifdef CONFIG_SMP
- percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
- percpu_write(cpu_tlbstate.active_mm, next);
+ this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ this_cpu_write(cpu_tlbstate.active_mm, next);
#endif
cpumask_set_cpu(cpu, mm_cpumask(next));
@@ -56,8 +56,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
}
#ifdef CONFIG_SMP
else {
- percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
- BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
+ this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) {
/* We were in lazy tlb mode and leave_mm disabled
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 55728e12147..eb05fb3b02f 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -61,10 +61,4 @@ static inline int pfn_valid(int pfn)
#endif /* CONFIG_DISCONTIGMEM */
-#ifdef CONFIG_NEED_MULTIPLE_NODES
-/* always use node 0 for bootmem on this numa platform */
-#define bootmem_arch_preferred_node(__bdata, size, align, goal, limit) \
- (NODE_DATA(0)->bdata)
-#endif /* CONFIG_NEED_MULTIPLE_NODES */
-
#endif /* _ASM_X86_MMZONE_32_H */
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 9c7d95f6174..3e2f42a4b87 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -40,7 +40,7 @@ extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
#endif /* CONFIG_X86_64 */
-#if defined(CONFIG_MCA) || defined(CONFIG_EISA)
+#ifdef CONFIG_EISA
extern int mp_bus_id_to_type[MAX_MP_BUSSES];
#endif
diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h
index c0a955a9a08..b31f8c09827 100644
--- a/arch/x86/include/asm/mpspec_def.h
+++ b/arch/x86/include/asm/mpspec_def.h
@@ -84,7 +84,7 @@ struct mpc_bus {
#define BUSTYPE_EISA "EISA"
#define BUSTYPE_ISA "ISA"
#define BUSTYPE_INTERN "INTERN" /* Internal BUS */
-#define BUSTYPE_MCA "MCA"
+#define BUSTYPE_MCA "MCA" /* Obsolete */
#define BUSTYPE_VL "VL" /* Local bus */
#define BUSTYPE_PCI "PCI"
#define BUSTYPE_PCMCIA "PCMCIA"
@@ -169,6 +169,5 @@ enum mp_bustype {
MP_BUS_ISA = 1,
MP_BUS_EISA,
MP_BUS_PCI,
- MP_BUS_MCA,
};
#endif /* _ASM_X86_MPSPEC_DEF_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ccb805966f6..957ec87385a 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -134,6 +134,8 @@
#define MSR_AMD64_IBSFETCHCTL 0xc0011030
#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032
+#define MSR_AMD64_IBSFETCH_REG_COUNT 3
+#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1)
#define MSR_AMD64_IBSOPCTL 0xc0011033
#define MSR_AMD64_IBSOPRIP 0xc0011034
#define MSR_AMD64_IBSOPDATA 0xc0011035
@@ -141,8 +143,11 @@
#define MSR_AMD64_IBSOPDATA3 0xc0011037
#define MSR_AMD64_IBSDCLINAD 0xc0011038
#define MSR_AMD64_IBSDCPHYSAD 0xc0011039
+#define MSR_AMD64_IBSOP_REG_COUNT 7
+#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
#define MSR_AMD64_IBSCTL 0xc001103a
#define MSR_AMD64_IBSBRTARGET 0xc001103b
+#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
/* Fam 15h MSRs */
#define MSR_F15H_PERF_CTL 0xc0010200
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 95203d40ffd..084ef95274c 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -169,14 +169,7 @@ static inline int wrmsr_safe(unsigned msr, unsigned low, unsigned high)
return native_write_msr_safe(msr, low, high);
}
-/*
- * rdmsr with exception handling.
- *
- * Please note that the exception handling works only after we've
- * switched to the "smart" #GP handler in trap_init() which knows about
- * exception tables - using this macro earlier than that causes machine
- * hangs on boxes which do not implement the @msr in the first argument.
- */
+/* rdmsr with exception handling */
#define rdmsr_safe(msr, p1, p2) \
({ \
int __err; \
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index fd3f9f18cf3..dc580c42851 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -27,6 +27,8 @@ void arch_trigger_all_cpu_backtrace(void);
enum {
NMI_LOCAL=0,
NMI_UNKNOWN,
+ NMI_SERR,
+ NMI_IO_CHECK,
NMI_MAX
};
@@ -35,8 +37,38 @@ enum {
typedef int (*nmi_handler_t)(unsigned int, struct pt_regs *);
-int register_nmi_handler(unsigned int, nmi_handler_t, unsigned long,
- const char *);
+struct nmiaction {
+ struct list_head list;
+ nmi_handler_t handler;
+ unsigned long flags;
+ const char *name;
+};
+
+#define register_nmi_handler(t, fn, fg, n) \
+({ \
+ static struct nmiaction fn##_na = { \
+ .handler = (fn), \
+ .name = (n), \
+ .flags = (fg), \
+ }; \
+ __register_nmi_handler((t), &fn##_na); \
+})
+
+/*
+ * For special handlers that register/unregister in the
+ * init section only. This should be considered rare.
+ */
+#define register_nmi_handler_initonly(t, fn, fg, n) \
+({ \
+ static struct nmiaction fn##_na __initdata = { \
+ .handler = (fn), \
+ .name = (n), \
+ .flags = (fg), \
+ }; \
+ __register_nmi_handler((t), &fn##_na); \
+})
+
+int __register_nmi_handler(unsigned int, struct nmiaction *);
void unregister_nmi_handler(unsigned int, const char *);
diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h
index 405b4032a60..aff2b335610 100644
--- a/arch/x86/include/asm/nops.h
+++ b/arch/x86/include/asm/nops.h
@@ -87,7 +87,11 @@
#define P6_NOP8 0x0f,0x1f,0x84,0x00,0,0,0,0
#define P6_NOP5_ATOMIC P6_NOP5
+#ifdef __ASSEMBLY__
+#define _ASM_MK_NOP(x) .byte x
+#else
#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n"
+#endif
#if defined(CONFIG_MK7)
#define ASM_NOP1 _ASM_MK_NOP(K7_NOP1)
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index ade619ff9e2..ef17af01347 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -15,8 +15,8 @@
*/
#define __PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
-#define THREAD_ORDER 1
-#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER)
+#define THREAD_SIZE_ORDER 1
+#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
#define STACKFAULT_STACK 0
#define DOUBLEFAULT_STACK 1
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 7639dbf5d22..320f7bb95f7 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -1,8 +1,8 @@
#ifndef _ASM_X86_PAGE_64_DEFS_H
#define _ASM_X86_PAGE_64_DEFS_H
-#define THREAD_ORDER 1
-#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER)
+#define THREAD_SIZE_ORDER 1
+#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
#define CURRENT_MASK (~(THREAD_SIZE - 1))
#define EXCEPTION_STACK_ORDER 0
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index aa0f9130836..6cbbabf5270 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -1023,10 +1023,8 @@ extern void default_banner(void);
call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \
)
-#define GET_CR2_INTO_RCX \
- call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2); \
- movq %rax, %rcx; \
- xorq %rax, %rax;
+#define GET_CR2_INTO_RAX \
+ call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2)
#define PARAVIRT_ADJUST_EXCEPTION_FRAME \
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 7a11910a63c..d9b8e3f7f42 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -46,7 +46,7 @@
#ifdef CONFIG_SMP
#define __percpu_prefix "%%"__stringify(__percpu_seg)":"
-#define __my_cpu_offset percpu_read(this_cpu_off)
+#define __my_cpu_offset this_cpu_read(this_cpu_off)
/*
* Compared to the generic __my_cpu_offset version, the following
@@ -351,23 +351,15 @@ do { \
})
/*
- * percpu_read() makes gcc load the percpu variable every time it is
- * accessed while percpu_read_stable() allows the value to be cached.
- * percpu_read_stable() is more efficient and can be used if its value
+ * this_cpu_read() makes gcc load the percpu variable every time it is
+ * accessed while this_cpu_read_stable() allows the value to be cached.
+ * this_cpu_read_stable() is more efficient and can be used if its value
* is guaranteed to be valid across cpus. The current users include
* get_current() and get_thread_info() both of which are actually
* per-thread variables implemented as per-cpu variables and thus
* stable for the duration of the respective task.
*/
-#define percpu_read(var) percpu_from_op("mov", var, "m" (var))
-#define percpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var)))
-#define percpu_write(var, val) percpu_to_op("mov", var, val)
-#define percpu_add(var, val) percpu_add_op(var, val)
-#define percpu_sub(var, val) percpu_add_op(var, -(val))
-#define percpu_and(var, val) percpu_to_op("and", var, val)
-#define percpu_or(var, val) percpu_to_op("or", var, val)
-#define percpu_xor(var, val) percpu_to_op("xor", var, val)
-#define percpu_inc(var) percpu_unary_op("inc", var)
+#define this_cpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var)))
#define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
@@ -512,7 +504,11 @@ static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
{
unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
- return ((1UL << (nr % BITS_PER_LONG)) & percpu_read(*a)) != 0;
+#ifdef CONFIG_X86_64
+ return ((1UL << (nr % BITS_PER_LONG)) & __this_cpu_read_8(*a)) != 0;
+#else
+ return ((1UL << (nr % BITS_PER_LONG)) & __this_cpu_read_4(*a)) != 0;
+#endif
}
static inline int x86_this_cpu_variable_test_bit(int nr,
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 2291895b183..588f52ea810 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -158,6 +158,7 @@ struct x86_pmu_capability {
#define IBS_CAPS_OPCNT (1U<<4)
#define IBS_CAPS_BRNTRGT (1U<<5)
#define IBS_CAPS_OPCNTEXT (1U<<6)
+#define IBS_CAPS_RIPINVALIDCHK (1U<<7)
#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \
| IBS_CAPS_FETCHSAM \
@@ -170,21 +171,28 @@ struct x86_pmu_capability {
#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8)
#define IBSCTL_LVT_OFFSET_MASK 0x0F
-/* IbsFetchCtl bits/masks */
+/* ibs fetch bits/masks */
#define IBS_FETCH_RAND_EN (1ULL<<57)
#define IBS_FETCH_VAL (1ULL<<49)
#define IBS_FETCH_ENABLE (1ULL<<48)
#define IBS_FETCH_CNT 0xFFFF0000ULL
#define IBS_FETCH_MAX_CNT 0x0000FFFFULL
-/* IbsOpCtl bits */
+/* ibs op bits/masks */
+/* lower 4 bits of the current count are ignored: */
+#define IBS_OP_CUR_CNT (0xFFFF0ULL<<32)
#define IBS_OP_CNT_CTL (1ULL<<19)
#define IBS_OP_VAL (1ULL<<18)
#define IBS_OP_ENABLE (1ULL<<17)
#define IBS_OP_MAX_CNT 0x0000FFFFULL
#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
+#define IBS_RIP_INVALID (1ULL<<38)
+#ifdef CONFIG_X86_LOCAL_APIC
extern u32 get_ibs_caps(void);
+#else
+static inline u32 get_ibs_caps(void) { return 0; }
+#endif
#ifdef CONFIG_PERF_EVENTS
extern void perf_events_lapic_init(void);
diff --git a/arch/x86/include/asm/posix_types_32.h b/arch/x86/include/asm/posix_types_32.h
index 99f262e04b9..8e525059e7d 100644
--- a/arch/x86/include/asm/posix_types_32.h
+++ b/arch/x86/include/asm/posix_types_32.h
@@ -10,9 +10,6 @@
typedef unsigned short __kernel_mode_t;
#define __kernel_mode_t __kernel_mode_t
-typedef unsigned short __kernel_nlink_t;
-#define __kernel_nlink_t __kernel_nlink_t
-
typedef unsigned short __kernel_ipc_pid_t;
#define __kernel_ipc_pid_t __kernel_ipc_pid_t
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 4fa7dcceb6c..39bc5777211 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -544,13 +544,16 @@ static inline void load_sp0(struct tss_struct *tss,
* enable), so that any CPU's that boot up
* after us can get the correct flags.
*/
-extern unsigned long mmu_cr4_features;
+extern unsigned long mmu_cr4_features;
+extern u32 *trampoline_cr4_features;
static inline void set_in_cr4(unsigned long mask)
{
unsigned long cr4;
mmu_cr4_features |= mask;
+ if (trampoline_cr4_features)
+ *trampoline_cr4_features = mmu_cr4_features;
cr4 = read_cr4();
cr4 |= mask;
write_cr4(cr4);
@@ -561,6 +564,8 @@ static inline void clear_in_cr4(unsigned long mask)
unsigned long cr4;
mmu_cr4_features &= ~mask;
+ if (trampoline_cr4_features)
+ *trampoline_cr4_features = mmu_cr4_features;
cr4 = read_cr4();
cr4 &= ~mask;
write_cr4(cr4);
@@ -579,9 +584,6 @@ extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
/* Free all resources held by a thread. */
extern void release_thread(struct task_struct *);
-/* Prepare to copy thread state - unlazy all lazy state */
-extern void prepare_to_copy(struct task_struct *tsk);
-
unsigned long get_wchan(struct task_struct *p);
/*
@@ -974,8 +976,6 @@ extern bool cpu_has_amd_erratum(const int *);
#define cpu_has_amd_erratum(x) (false)
#endif /* CONFIG_CPU_SUP_AMD */
-void cpu_idle_wait(void);
-
extern unsigned long arch_align_stack(unsigned long sp);
extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h
index 35f2d1948ad..6167fd79818 100644
--- a/arch/x86/include/asm/pvclock-abi.h
+++ b/arch/x86/include/asm/pvclock-abi.h
@@ -40,5 +40,6 @@ struct pvclock_wall_clock {
} __attribute__((__packed__));
#define PVCLOCK_TSC_STABLE_BIT (1 << 0)
+#define PVCLOCK_GUEST_STOPPED (1 << 1)
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_PVCLOCK_ABI_H */
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
new file mode 100644
index 00000000000..fce3f4ae5bd
--- /dev/null
+++ b/arch/x86/include/asm/realmode.h
@@ -0,0 +1,62 @@
+#ifndef _ARCH_X86_REALMODE_H
+#define _ARCH_X86_REALMODE_H
+
+#include <linux/types.h>
+#include <asm/io.h>
+
+/* This must match data at realmode.S */
+struct real_mode_header {
+ u32 text_start;
+ u32 ro_end;
+ /* SMP trampoline */
+ u32 trampoline_start;
+ u32 trampoline_status;
+ u32 trampoline_header;
+#ifdef CONFIG_X86_64
+ u32 trampoline_pgd;
+#endif
+ /* ACPI S3 wakeup */
+#ifdef CONFIG_ACPI_SLEEP
+ u32 wakeup_start;
+ u32 wakeup_header;
+#endif
+ /* APM/BIOS reboot */
+#ifdef CONFIG_X86_32
+ u32 machine_real_restart_asm;
+#endif
+};
+
+/* This must match data at trampoline_32/64.S */
+struct trampoline_header {
+#ifdef CONFIG_X86_32
+ u32 start;
+ u16 gdt_pad;
+ u16 gdt_limit;
+ u32 gdt_base;
+#else
+ u64 start;
+ u64 efer;
+ u32 cr4;
+#endif
+};
+
+extern struct real_mode_header *real_mode_header;
+extern unsigned char real_mode_blob_end[];
+
+extern unsigned long init_rsp;
+extern unsigned long initial_code;
+extern unsigned long initial_gs;
+
+extern unsigned char real_mode_blob[];
+extern unsigned char real_mode_relocs[];
+
+#ifdef CONFIG_X86_32
+extern unsigned char startup_32_smp[];
+extern unsigned char boot_gdt[];
+#else
+extern unsigned char secondary_startup_64[];
+#endif
+
+extern void __init setup_real_mode(void);
+
+#endif /* _ARCH_X86_REALMODE_H */
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 165466233ab..c48a95035a7 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -205,13 +205,15 @@
#define IDT_ENTRIES 256
#define NUM_EXCEPTION_VECTORS 32
+/* Bitmask of exception vectors which push an error code on the stack */
+#define EXCEPTION_ERRCODE_MASK 0x00027d00
#define GDT_SIZE (GDT_ENTRIES * 8)
#define GDT_ENTRY_TLS_ENTRIES 3
#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
#ifdef __KERNEL__
#ifndef __ASSEMBLY__
-extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][10];
+extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5];
/*
* Load a segment. Fall back on loading the zero
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index ada93b3b8c6..beff97f7df3 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -7,8 +7,6 @@
#include <asm/processor-flags.h>
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
#define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \
X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \
X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 0434c400287..f48394513c3 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -62,6 +62,8 @@ DECLARE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid);
/* Static state in head.S used to set up a CPU */
extern unsigned long stack_start; /* Initial stack pointer address */
+struct task_struct;
+
struct smp_ops {
void (*smp_prepare_boot_cpu)(void);
void (*smp_prepare_cpus)(unsigned max_cpus);
@@ -70,7 +72,7 @@ struct smp_ops {
void (*stop_other_cpus)(int wait);
void (*smp_send_reschedule)(int cpu);
- int (*cpu_up)(unsigned cpu);
+ int (*cpu_up)(unsigned cpu, struct task_struct *tidle);
int (*cpu_disable)(void);
void (*cpu_die)(unsigned int cpu);
void (*play_dead)(void);
@@ -113,9 +115,9 @@ static inline void smp_cpus_done(unsigned int max_cpus)
smp_ops.smp_cpus_done(max_cpus);
}
-static inline int __cpu_up(unsigned int cpu)
+static inline int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
- return smp_ops.cpu_up(cpu);
+ return smp_ops.cpu_up(cpu, tidle);
}
static inline int __cpu_disable(void)
@@ -152,7 +154,7 @@ void cpu_disable_common(void);
void native_smp_prepare_boot_cpu(void);
void native_smp_prepare_cpus(unsigned int max_cpus);
void native_smp_cpus_done(unsigned int max_cpus);
-int native_cpu_up(unsigned int cpunum);
+int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_disable(void);
void native_cpu_die(unsigned int cpu);
void native_play_dead(void);
@@ -162,6 +164,7 @@ int wbinvd_on_all_cpus(void);
void native_send_call_func_ipi(const struct cpumask *mask);
void native_send_call_func_single_ipi(int cpu);
+void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle);
void smp_store_cpu_info(int id);
#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
@@ -188,11 +191,11 @@ extern unsigned disabled_cpus __cpuinitdata;
* from the initial startup. We map APIC_BASE very early in page_setup(),
* so this is correct in the x86 case.
*/
-#define raw_smp_processor_id() (percpu_read(cpu_number))
+#define raw_smp_processor_id() (this_cpu_read(cpu_number))
extern int safe_smp_processor_id(void);
#elif defined(CONFIG_X86_64_SMP)
-#define raw_smp_processor_id() (percpu_read(cpu_number))
+#define raw_smp_processor_id() (this_cpu_read(cpu_number))
#define stack_smp_processor_id() \
({ \
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 76bfa2cf301..b315a33867f 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -20,10 +20,8 @@
#ifdef CONFIG_X86_32
# define LOCK_PTR_REG "a"
-# define REG_PTR_MODE "k"
#else
# define LOCK_PTR_REG "D"
-# define REG_PTR_MODE "q"
#endif
#if defined(CONFIG_X86_32) && \
diff --git a/arch/x86/include/asm/sta2x11.h b/arch/x86/include/asm/sta2x11.h
new file mode 100644
index 00000000000..e9d32df89cc
--- /dev/null
+++ b/arch/x86/include/asm/sta2x11.h
@@ -0,0 +1,12 @@
+/*
+ * Header file for STMicroelectronics ConneXt (STA2X11) IOHub
+ */
+#ifndef __ASM_STA2X11_H
+#define __ASM_STA2X11_H
+
+#include <linux/pci.h>
+
+/* This needs to be called from the MFD to configure its sub-devices */
+struct sta2x11_instance *sta2x11_get_instance(struct pci_dev *pdev);
+
+#endif /* __ASM_STA2X11_H */
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index b5d9533d2c3..6a998598f17 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -75,9 +75,9 @@ static __always_inline void boot_init_stack_canary(void)
current->stack_canary = canary;
#ifdef CONFIG_X86_64
- percpu_write(irq_stack_union.stack_canary, canary);
+ this_cpu_write(irq_stack_union.stack_canary, canary);
#else
- percpu_write(stack_canary.canary, canary);
+ this_cpu_write(stack_canary.canary, canary);
#endif
}
diff --git a/arch/x86/include/asm/stat.h b/arch/x86/include/asm/stat.h
index e0b1d9bbcbc..7b3ddc34858 100644
--- a/arch/x86/include/asm/stat.h
+++ b/arch/x86/include/asm/stat.h
@@ -25,6 +25,12 @@ struct stat {
unsigned long __unused5;
};
+/* We don't need to memset the whole thing just to initialize the padding */
+#define INIT_STRUCT_STAT_PADDING(st) do { \
+ st.__unused4 = 0; \
+ st.__unused5 = 0; \
+} while (0)
+
#define STAT64_HAS_BROKEN_ST_INO 1
/* This matches struct stat64 in glibc2.1, hence the absolutely
@@ -63,6 +69,12 @@ struct stat64 {
unsigned long long st_ino;
};
+/* We don't need to memset the whole thing just to initialize the padding */
+#define INIT_STRUCT_STAT64_PADDING(st) do { \
+ memset(&st.__pad0, 0, sizeof(st.__pad0)); \
+ memset(&st.__pad3, 0, sizeof(st.__pad3)); \
+} while (0)
+
#else /* __i386__ */
struct stat {
@@ -87,6 +99,15 @@ struct stat {
unsigned long st_ctime_nsec;
long __unused[3];
};
+
+/* We don't need to memset the whole thing just to initialize the padding */
+#define INIT_STRUCT_STAT_PADDING(st) do { \
+ st.__pad0 = 0; \
+ st.__unused[0] = 0; \
+ st.__unused[1] = 0; \
+ st.__unused[2] = 0; \
+} while (0)
+
#endif
/* for 32bit emulation and 32 bit kernels */
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 386b78686c4..1ace47b6259 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -13,9 +13,11 @@
#ifndef _ASM_X86_SYSCALL_H
#define _ASM_X86_SYSCALL_H
+#include <linux/audit.h>
#include <linux/sched.h>
#include <linux/err.h>
#include <asm/asm-offsets.h> /* For NR_syscalls */
+#include <asm/thread_info.h> /* for TS_COMPAT */
#include <asm/unistd.h>
extern const unsigned long sys_call_table[];
@@ -88,6 +90,12 @@ static inline void syscall_set_arguments(struct task_struct *task,
memcpy(&regs->bx + i, args, n * sizeof(args[0]));
}
+static inline int syscall_get_arch(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ return AUDIT_ARCH_I386;
+}
+
#else /* CONFIG_X86_64 */
static inline void syscall_get_arguments(struct task_struct *task,
@@ -212,6 +220,25 @@ static inline void syscall_set_arguments(struct task_struct *task,
}
}
+static inline int syscall_get_arch(struct task_struct *task,
+ struct pt_regs *regs)
+{
+#ifdef CONFIG_IA32_EMULATION
+ /*
+ * TS_COMPAT is set for 32-bit syscall entry and then
+ * remains set until we return to user mode.
+ *
+ * TIF_IA32 tasks should always have TS_COMPAT set at
+ * system call time.
+ *
+ * x32 tasks should be considered AUDIT_ARCH_X86_64.
+ */
+ if (task_thread_info(task)->status & TS_COMPAT)
+ return AUDIT_ARCH_I386;
+#endif
+ /* Both x32 and x86_64 are considered "64-bit". */
+ return AUDIT_ARCH_X86_64;
+}
#endif /* CONFIG_X86_32 */
#endif /* _ASM_X86_SYSCALL_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 0710c11305d..89f794f007e 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -157,24 +157,6 @@ struct thread_info {
#define PREEMPT_ACTIVE 0x10000000
-/* thread information allocation */
-#ifdef CONFIG_DEBUG_STACK_USAGE
-#define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
-#else
-#define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK)
-#endif
-
-#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
-
-#define alloc_thread_info_node(tsk, node) \
-({ \
- struct page *page = alloc_pages_node(node, THREAD_FLAGS, \
- THREAD_ORDER); \
- struct thread_info *ret = page ? page_address(page) : NULL; \
- \
- ret; \
-})
-
#ifdef CONFIG_X86_32
#define STACK_WARN (THREAD_SIZE/8)
@@ -224,7 +206,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack);
static inline struct thread_info *current_thread_info(void)
{
struct thread_info *ti;
- ti = (void *)(percpu_read_stable(kernel_stack) +
+ ti = (void *)(this_cpu_read_stable(kernel_stack) +
KERNEL_STACK_OFFSET - THREAD_SIZE);
return ti;
}
@@ -266,7 +248,23 @@ static inline void set_restore_sigmask(void)
{
struct thread_info *ti = current_thread_info();
ti->status |= TS_RESTORE_SIGMASK;
- set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags);
+ WARN_ON(!test_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags));
+}
+static inline void clear_restore_sigmask(void)
+{
+ current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
+}
+static inline bool test_restore_sigmask(void)
+{
+ return current_thread_info()->status & TS_RESTORE_SIGMASK;
+}
+static inline bool test_and_clear_restore_sigmask(void)
+{
+ struct thread_info *ti = current_thread_info();
+ if (!(ti->status & TS_RESTORE_SIGMASK))
+ return false;
+ ti->status &= ~TS_RESTORE_SIGMASK;
+ return true;
}
static inline bool is_ia32_task(void)
@@ -284,8 +282,7 @@ static inline bool is_ia32_task(void)
#ifndef __ASSEMBLY__
extern void arch_task_cache_init(void);
-extern void free_thread_info(struct thread_info *ti);
extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
-#define arch_task_cache_init arch_task_cache_init
+extern void arch_release_task_struct(struct task_struct *tsk);
#endif
#endif /* _ASM_X86_THREAD_INFO_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index c0e108e0807..36a1a2ab87d 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -62,11 +62,7 @@ static inline void __flush_tlb_one(unsigned long addr)
__flush_tlb();
}
-#ifdef CONFIG_X86_32
-# define TLB_FLUSH_ALL 0xffffffff
-#else
-# define TLB_FLUSH_ALL -1ULL
-#endif
+#define TLB_FLUSH_ALL -1UL
/*
* TLB flushing:
@@ -156,8 +152,8 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
static inline void reset_lazy_tlbstate(void)
{
- percpu_write(cpu_tlbstate.state, 0);
- percpu_write(cpu_tlbstate.active_mm, &init_mm);
+ this_cpu_write(cpu_tlbstate.state, 0);
+ this_cpu_write(cpu_tlbstate.active_mm, &init_mm);
}
#endif /* SMP */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index b9676ae37ad..095b21507b6 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -92,44 +92,6 @@ extern void setup_node_to_cpumask_map(void);
#define pcibus_to_node(bus) __pcibus_to_node(bus)
-#ifdef CONFIG_X86_32
-# define SD_CACHE_NICE_TRIES 1
-# define SD_IDLE_IDX 1
-#else
-# define SD_CACHE_NICE_TRIES 2
-# define SD_IDLE_IDX 2
-#endif
-
-/* sched_domains SD_NODE_INIT for NUMA machines */
-#define SD_NODE_INIT (struct sched_domain) { \
- .min_interval = 8, \
- .max_interval = 32, \
- .busy_factor = 32, \
- .imbalance_pct = 125, \
- .cache_nice_tries = SD_CACHE_NICE_TRIES, \
- .busy_idx = 3, \
- .idle_idx = SD_IDLE_IDX, \
- .newidle_idx = 0, \
- .wake_idx = 0, \
- .forkexec_idx = 0, \
- \
- .flags = 1*SD_LOAD_BALANCE \
- | 1*SD_BALANCE_NEWIDLE \
- | 1*SD_BALANCE_EXEC \
- | 1*SD_BALANCE_FORK \
- | 0*SD_BALANCE_WAKE \
- | 1*SD_WAKE_AFFINE \
- | 0*SD_PREFER_LOCAL \
- | 0*SD_SHARE_CPUPOWER \
- | 0*SD_POWERSAVINGS_BALANCE \
- | 0*SD_SHARE_PKG_RESOURCES \
- | 1*SD_SERIALIZE \
- | 0*SD_PREFER_SIBLING \
- , \
- .last_balance = jiffies, \
- .balance_interval = 1, \
-}
-
extern int __node_distance(int, int);
#define node_distance(a, b) __node_distance(a, b)
diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h
deleted file mode 100644
index feca3118a73..00000000000
--- a/arch/x86/include/asm/trampoline.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef _ASM_X86_TRAMPOLINE_H
-#define _ASM_X86_TRAMPOLINE_H
-
-#ifndef __ASSEMBLY__
-
-#include <linux/types.h>
-#include <asm/io.h>
-
-/*
- * Trampoline 80x86 program as an array. These are in the init rodata
- * segment, but that's okay, because we only care about the relative
- * addresses of the symbols.
- */
-extern const unsigned char x86_trampoline_start [];
-extern const unsigned char x86_trampoline_end [];
-extern unsigned char *x86_trampoline_base;
-
-extern unsigned long init_rsp;
-extern unsigned long initial_code;
-extern unsigned long initial_gs;
-
-extern void __init setup_trampolines(void);
-
-extern const unsigned char trampoline_data[];
-extern const unsigned char trampoline_status[];
-
-#define TRAMPOLINE_SYM(x) \
- ((void *)(x86_trampoline_base + \
- ((const unsigned char *)(x) - x86_trampoline_start)))
-
-/* Address of the SMP trampoline */
-static inline unsigned long trampoline_address(void)
-{
- return virt_to_phys(TRAMPOLINE_SYM(trampoline_data));
-}
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _ASM_X86_TRAMPOLINE_H */
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index e0544597cfe..e1f3a17034f 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -32,9 +32,9 @@
#define segment_eq(a, b) ((a).seg == (b).seg)
-#define __addr_ok(addr) \
- ((unsigned long __force)(addr) < \
- (current_thread_info()->addr_limit.seg))
+#define user_addr_max() (current_thread_info()->addr_limit.seg)
+#define __addr_ok(addr) \
+ ((unsigned long __force)(addr) < user_addr_max())
/*
* Test whether a block of memory is a valid user space address.
@@ -46,14 +46,14 @@
* This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry...
*/
-#define __range_not_ok(addr, size) \
+#define __range_not_ok(addr, size, limit) \
({ \
unsigned long flag, roksum; \
__chk_user_ptr(addr); \
asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0" \
: "=&r" (flag), "=r" (roksum) \
: "1" (addr), "g" ((long)(size)), \
- "rm" (current_thread_info()->addr_limit.seg)); \
+ "rm" (limit)); \
flag; \
})
@@ -76,14 +76,16 @@
* checks that the pointer is in the user space range - after calling
* this function, memory access functions may still return -EFAULT.
*/
-#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0))
+#define access_ok(type, addr, size) \
+ (likely(__range_not_ok(addr, size, user_addr_max()) == 0))
/*
- * The exception table consists of pairs of addresses: the first is the
- * address of an instruction that is allowed to fault, and the second is
- * the address at which the program should continue. No registers are
- * modified, so it is entirely up to the continuation code to figure out
- * what to do.
+ * The exception table consists of pairs of addresses relative to the
+ * exception table enty itself: the first is the address of an
+ * instruction that is allowed to fault, and the second is the address
+ * at which the program should continue. No registers are modified,
+ * so it is entirely up to the continuation code to figure out what to
+ * do.
*
* All the routines below use bits of fixup code that are out of line
* with the main instruction path. This means when everything is well,
@@ -92,10 +94,14 @@
*/
struct exception_table_entry {
- unsigned long insn, fixup;
+ int insn, fixup;
};
+/* This is not the generic standard exception_table_entry format */
+#define ARCH_HAS_SORT_EXTABLE
+#define ARCH_HAS_SEARCH_EXTABLE
extern int fixup_exception(struct pt_regs *regs);
+extern int early_fixup_exception(unsigned long *ip);
/*
* These are the main single-value transfer routines. They automatically
@@ -202,8 +208,8 @@ extern int __get_user_bad(void);
asm volatile("1: movl %%eax,0(%1)\n" \
"2: movl %%edx,4(%1)\n" \
"3:\n" \
- _ASM_EXTABLE(1b, 2b - 1b) \
- _ASM_EXTABLE(2b, 3b - 2b) \
+ _ASM_EXTABLE_EX(1b, 2b) \
+ _ASM_EXTABLE_EX(2b, 3b) \
: : "A" (x), "r" (addr))
#define __put_user_x8(x, ptr, __ret_pu) \
@@ -408,7 +414,7 @@ do { \
#define __get_user_asm_ex(x, addr, itype, rtype, ltype) \
asm volatile("1: mov"itype" %1,%"rtype"0\n" \
"2:\n" \
- _ASM_EXTABLE(1b, 2b - 1b) \
+ _ASM_EXTABLE_EX(1b, 2b) \
: ltype(x) : "m" (__m(addr)))
#define __put_user_nocheck(x, ptr, size) \
@@ -450,7 +456,7 @@ struct __large_struct { unsigned long buf[100]; };
#define __put_user_asm_ex(x, addr, itype, rtype, ltype) \
asm volatile("1: mov"itype" %"rtype"0,%1\n" \
"2:\n" \
- _ASM_EXTABLE(1b, 2b - 1b) \
+ _ASM_EXTABLE_EX(1b, 2b) \
: : ltype(x), "m" (__m(addr)))
/*
@@ -560,6 +566,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n);
extern __must_check long
strncpy_from_user(char *dst, const char __user *src, long count);
+extern __must_check long strlen_user(const char __user *str);
+extern __must_check long strnlen_user(const char __user *str, long n);
+
/*
* movsl can be slow when source and dest are not both 8-byte aligned
*/
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 8084bc73b18..576e39bca6a 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -213,23 +213,6 @@ static inline unsigned long __must_check copy_from_user(void *to,
return n;
}
-/**
- * strlen_user: - Get the size of a string in user space.
- * @str: The string to measure.
- *
- * Context: User context only. This function may sleep.
- *
- * Get the size of a NUL-terminated string in user space.
- *
- * Returns the size of the string INCLUDING the terminating NUL.
- * On exception, returns 0.
- *
- * If there is a limit on the length of a valid string, you may wish to
- * consider using strnlen_user() instead.
- */
-#define strlen_user(str) strnlen_user(str, LONG_MAX)
-
-long strnlen_user(const char __user *str, long n);
unsigned long __must_check clear_user(void __user *mem, unsigned long len);
unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index fcd4b6f3ef0..8e796fbbf9c 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -208,9 +208,6 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
}
}
-__must_check long strnlen_user(const char __user *str, long n);
-__must_check long __strnlen_user(const char __user *str, long n);
-__must_check long strlen_user(const char __user *str);
__must_check unsigned long clear_user(void __user *mem, unsigned long len);
__must_check unsigned long __clear_user(void __user *mem, unsigned long len);
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index f3971bbcd1d..1e9bed14f7a 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -48,7 +48,7 @@ struct arch_uprobe_task {
#endif
};
-extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
+extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm);
extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h
index e58f03b206c..5b238981542 100644
--- a/arch/x86/include/asm/word-at-a-time.h
+++ b/arch/x86/include/asm/word-at-a-time.h
@@ -1,6 +1,8 @@
#ifndef _ASM_WORD_AT_A_TIME_H
#define _ASM_WORD_AT_A_TIME_H
+#include <linux/kernel.h>
+
/*
* This is largely generic for little-endian machines, but the
* optimal byte mask counting is probably going to be something
@@ -8,6 +10,11 @@
* bit count instruction, that might be better than the multiply
* and shift, for example.
*/
+struct word_at_a_time {
+ const unsigned long one_bits, high_bits;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
#ifdef CONFIG_64BIT
@@ -35,12 +42,31 @@ static inline long count_masked_bytes(long mask)
#endif
-#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x))
+/* Return nonzero if it has a zero */
+static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
+{
+ unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
+ *bits = mask;
+ return mask;
+}
+
+static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c)
+{
+ return bits;
+}
+
+static inline unsigned long create_zero_mask(unsigned long bits)
+{
+ bits = (bits - 1) & ~bits;
+ return bits >> 7;
+}
+
+/* The mask we created is directly usable as a bytemask */
+#define zero_bytemask(mask) (mask)
-/* Return the high bit set in the first byte that is a zero */
-static inline unsigned long has_zero(unsigned long a)
+static inline unsigned long find_zero(unsigned long mask)
{
- return ((a - REPEAT_BYTE(0x01)) & ~a) & REPEAT_BYTE(0x80);
+ return count_masked_bytes(mask);
}
/*
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 764b66a4cf8..c090af10ac7 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -188,11 +188,18 @@ struct x86_msi_ops {
void (*restore_msi_irqs)(struct pci_dev *dev, int irq);
};
+struct x86_io_apic_ops {
+ void (*init) (void);
+ unsigned int (*read) (unsigned int apic, unsigned int reg);
+ void (*write) (unsigned int apic, unsigned int reg, unsigned int value);
+ void (*modify)(unsigned int apic, unsigned int reg, unsigned int value);
+};
+
extern struct x86_init_ops x86_init;
extern struct x86_cpuinit_ops x86_cpuinit;
extern struct x86_platform_ops x86_platform;
extern struct x86_msi_ops x86_msi;
-
+extern struct x86_io_apic_ops x86_io_apic_ops;
extern void x86_init_noop(void);
extern void x86_init_uint_noop(unsigned int unused);
diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h
index 1df35417c41..cc146d51449 100644
--- a/arch/x86/include/asm/xen/events.h
+++ b/arch/x86/include/asm/xen/events.h
@@ -6,6 +6,7 @@ enum ipi_vector {
XEN_CALL_FUNCTION_VECTOR,
XEN_CALL_FUNCTION_SINGLE_VECTOR,
XEN_SPIN_UNLOCK_VECTOR,
+ XEN_IRQ_WORK_VECTOR,
XEN_NR_IPIS,
};
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index c34f96c2f7a..93971e841dd 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -44,6 +44,7 @@ extern unsigned long machine_to_phys_nr;
extern unsigned long get_phys_to_machine(unsigned long pfn);
extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
+extern bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn);
extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
extern unsigned long set_phys_range_identity(unsigned long pfn_s,
unsigned long pfn_e);
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index 133b40a0f49..454570891bd 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -861,6 +861,9 @@ static struct xor_block_template xor_block_pIII_sse = {
.do_5 = xor_sse_5,
};
+/* Also try the AVX routines */
+#include "xor_avx.h"
+
/* Also try the generic routines. */
#include <asm-generic/xor.h>
@@ -871,6 +874,7 @@ do { \
xor_speed(&xor_block_8regs_p); \
xor_speed(&xor_block_32regs); \
xor_speed(&xor_block_32regs_p); \
+ AVX_XOR_SPEED; \
if (cpu_has_xmm) \
xor_speed(&xor_block_pIII_sse); \
if (cpu_has_mmx) { \
@@ -883,6 +887,6 @@ do { \
We may also be able to load into the L1 only depending on how the cpu
deals with a load to a line that is being prefetched. */
#define XOR_SELECT_TEMPLATE(FASTEST) \
- (cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
+ AVX_SELECT(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
#endif /* _ASM_X86_XOR_32_H */
diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h
index 1549b5e261f..b9b2323e90f 100644
--- a/arch/x86/include/asm/xor_64.h
+++ b/arch/x86/include/asm/xor_64.h
@@ -347,15 +347,21 @@ static struct xor_block_template xor_block_sse = {
.do_5 = xor_sse_5,
};
+
+/* Also try the AVX routines */
+#include "xor_avx.h"
+
#undef XOR_TRY_TEMPLATES
#define XOR_TRY_TEMPLATES \
do { \
+ AVX_XOR_SPEED; \
xor_speed(&xor_block_sse); \
} while (0)
/* We force the use of the SSE xor block because it can write around L2.
We may also be able to load into the L1 only depending on how the cpu
deals with a load to a line that is being prefetched. */
-#define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_sse)
+#define XOR_SELECT_TEMPLATE(FASTEST) \
+ AVX_SELECT(&xor_block_sse)
#endif /* _ASM_X86_XOR_64_H */
diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h
new file mode 100644
index 00000000000..2510d35f480
--- /dev/null
+++ b/arch/x86/include/asm/xor_avx.h
@@ -0,0 +1,214 @@
+#ifndef _ASM_X86_XOR_AVX_H
+#define _ASM_X86_XOR_AVX_H
+
+/*
+ * Optimized RAID-5 checksumming functions for AVX
+ *
+ * Copyright (C) 2012 Intel Corporation
+ * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
+ *
+ * Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#ifdef CONFIG_AS_AVX
+
+#include <linux/compiler.h>
+#include <asm/i387.h>
+
+#define ALIGN32 __aligned(32)
+
+#define YMM_SAVED_REGS 4
+
+#define YMMS_SAVE \
+do { \
+ preempt_disable(); \
+ cr0 = read_cr0(); \
+ clts(); \
+ asm volatile("vmovaps %%ymm0, %0" : "=m" (ymm_save[0]) : : "memory"); \
+ asm volatile("vmovaps %%ymm1, %0" : "=m" (ymm_save[32]) : : "memory"); \
+ asm volatile("vmovaps %%ymm2, %0" : "=m" (ymm_save[64]) : : "memory"); \
+ asm volatile("vmovaps %%ymm3, %0" : "=m" (ymm_save[96]) : : "memory"); \
+} while (0);
+
+#define YMMS_RESTORE \
+do { \
+ asm volatile("sfence" : : : "memory"); \
+ asm volatile("vmovaps %0, %%ymm3" : : "m" (ymm_save[96])); \
+ asm volatile("vmovaps %0, %%ymm2" : : "m" (ymm_save[64])); \
+ asm volatile("vmovaps %0, %%ymm1" : : "m" (ymm_save[32])); \
+ asm volatile("vmovaps %0, %%ymm0" : : "m" (ymm_save[0])); \
+ write_cr0(cr0); \
+ preempt_enable(); \
+} while (0);
+
+#define BLOCK4(i) \
+ BLOCK(32 * i, 0) \
+ BLOCK(32 * (i + 1), 1) \
+ BLOCK(32 * (i + 2), 2) \
+ BLOCK(32 * (i + 3), 3)
+
+#define BLOCK16() \
+ BLOCK4(0) \
+ BLOCK4(4) \
+ BLOCK4(8) \
+ BLOCK4(12)
+
+static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1)
+{
+ unsigned long cr0, lines = bytes >> 9;
+ char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
+
+ YMMS_SAVE
+
+ while (lines--) {
+#undef BLOCK
+#define BLOCK(i, reg) \
+do { \
+ asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p1[i / sizeof(*p1)])); \
+ asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
+ "m" (p0[i / sizeof(*p0)])); \
+ asm volatile("vmovdqa %%ymm" #reg ", %0" : \
+ "=m" (p0[i / sizeof(*p0)])); \
+} while (0);
+
+ BLOCK16()
+
+ p0 = (unsigned long *)((uintptr_t)p0 + 512);
+ p1 = (unsigned long *)((uintptr_t)p1 + 512);
+ }
+
+ YMMS_RESTORE
+}
+
+static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1,
+ unsigned long *p2)
+{
+ unsigned long cr0, lines = bytes >> 9;
+ char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
+
+ YMMS_SAVE
+
+ while (lines--) {
+#undef BLOCK
+#define BLOCK(i, reg) \
+do { \
+ asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p2[i / sizeof(*p2)])); \
+ asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
+ "m" (p1[i / sizeof(*p1)])); \
+ asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
+ "m" (p0[i / sizeof(*p0)])); \
+ asm volatile("vmovdqa %%ymm" #reg ", %0" : \
+ "=m" (p0[i / sizeof(*p0)])); \
+} while (0);
+
+ BLOCK16()
+
+ p0 = (unsigned long *)((uintptr_t)p0 + 512);
+ p1 = (unsigned long *)((uintptr_t)p1 + 512);
+ p2 = (unsigned long *)((uintptr_t)p2 + 512);
+ }
+
+ YMMS_RESTORE
+}
+
+static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1,
+ unsigned long *p2, unsigned long *p3)
+{
+ unsigned long cr0, lines = bytes >> 9;
+ char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
+
+ YMMS_SAVE
+
+ while (lines--) {
+#undef BLOCK
+#define BLOCK(i, reg) \
+do { \
+ asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p3[i / sizeof(*p3)])); \
+ asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
+ "m" (p2[i / sizeof(*p2)])); \
+ asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
+ "m" (p1[i / sizeof(*p1)])); \
+ asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
+ "m" (p0[i / sizeof(*p0)])); \
+ asm volatile("vmovdqa %%ymm" #reg ", %0" : \
+ "=m" (p0[i / sizeof(*p0)])); \
+} while (0);
+
+ BLOCK16();
+
+ p0 = (unsigned long *)((uintptr_t)p0 + 512);
+ p1 = (unsigned long *)((uintptr_t)p1 + 512);
+ p2 = (unsigned long *)((uintptr_t)p2 + 512);
+ p3 = (unsigned long *)((uintptr_t)p3 + 512);
+ }
+
+ YMMS_RESTORE
+}
+
+static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1,
+ unsigned long *p2, unsigned long *p3, unsigned long *p4)
+{
+ unsigned long cr0, lines = bytes >> 9;
+ char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
+
+ YMMS_SAVE
+
+ while (lines--) {
+#undef BLOCK
+#define BLOCK(i, reg) \
+do { \
+ asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p4[i / sizeof(*p4)])); \
+ asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
+ "m" (p3[i / sizeof(*p3)])); \
+ asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
+ "m" (p2[i / sizeof(*p2)])); \
+ asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
+ "m" (p1[i / sizeof(*p1)])); \
+ asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
+ "m" (p0[i / sizeof(*p0)])); \
+ asm volatile("vmovdqa %%ymm" #reg ", %0" : \
+ "=m" (p0[i / sizeof(*p0)])); \
+} while (0);
+
+ BLOCK16()
+
+ p0 = (unsigned long *)((uintptr_t)p0 + 512);
+ p1 = (unsigned long *)((uintptr_t)p1 + 512);
+ p2 = (unsigned long *)((uintptr_t)p2 + 512);
+ p3 = (unsigned long *)((uintptr_t)p3 + 512);
+ p4 = (unsigned long *)((uintptr_t)p4 + 512);
+ }
+
+ YMMS_RESTORE
+}
+
+static struct xor_block_template xor_block_avx = {
+ .name = "avx",
+ .do_2 = xor_avx_2,
+ .do_3 = xor_avx_3,
+ .do_4 = xor_avx_4,
+ .do_5 = xor_avx_5,
+};
+
+#define AVX_XOR_SPEED \
+do { \
+ if (cpu_has_avx) \
+ xor_speed(&xor_block_avx); \
+} while (0)
+
+#define AVX_SELECT(FASTEST) \
+ (cpu_has_avx ? &xor_block_avx : FASTEST)
+
+#else
+
+#define AVX_XOR_SPEED {}
+
+#define AVX_SELECT(FASTEST) (FASTEST)
+
+#endif
+#endif
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index c6ce2452f10..8a1b6f9b594 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -80,10 +80,7 @@ static inline int xsave_user(struct xsave_struct __user *buf)
"3: movl $-1,%[err]\n"
" jmp 2b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- _ASM_ALIGN "\n"
- _ASM_PTR "1b,3b\n"
- ".previous"
+ _ASM_EXTABLE(1b,3b)
: [err] "=r" (err)
: "D" (buf), "a" (-1), "d" (-1), "0" (0)
: "memory");
@@ -106,10 +103,7 @@ static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask)
"3: movl $-1,%[err]\n"
" jmp 2b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- _ASM_ALIGN "\n"
- _ASM_PTR "1b,3b\n"
- ".previous"
+ _ASM_EXTABLE(1b,3b)
: [err] "=r" (err)
: "D" (xstate), "a" (lmask), "d" (hmask), "0" (0)
: "memory"); /* memory required? */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index d23d83577d6..8215e5652d9 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -2,7 +2,7 @@
# Makefile for the linux kernel.
#
-extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinux.lds
+extra-y := head_$(BITS).o head$(BITS).o head.o vmlinux.lds
CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
@@ -35,7 +35,6 @@ obj-y += tsc.o io_delay.o rtc.o
obj-y += pci-iommu_table.o
obj-y += resource.o
-obj-y += trampoline.o trampoline_$(BITS).o
obj-y += process.o
obj-y += i387.o xsave.o
obj-y += ptrace.o
@@ -48,8 +47,6 @@ obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
obj-y += acpi/
obj-y += reboot.o
-obj-$(CONFIG_X86_32) += reboot_32.o
-obj-$(CONFIG_MCA) += mca_32.o
obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
obj-$(CONFIG_PCI) += early-quirks.o
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile
index 6f35260bb3e..163b2258147 100644
--- a/arch/x86/kernel/acpi/Makefile
+++ b/arch/x86/kernel/acpi/Makefile
@@ -1,14 +1,7 @@
-subdir- := realmode
-
obj-$(CONFIG_ACPI) += boot.o
-obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_rm.o wakeup_$(BITS).o
+obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o
ifneq ($(CONFIG_ACPI_PROCESSOR),)
obj-y += cstate.o
endif
-$(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin
-
-$(obj)/realmode/wakeup.bin: FORCE
- $(Q)$(MAKE) $(build)=$(obj)/realmode
-
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 7c439fe4941..b2297e58c6e 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -422,12 +422,14 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
return 0;
}
- if (intsrc->source_irq == 0 && intsrc->global_irq == 2) {
+ if (intsrc->source_irq == 0) {
if (acpi_skip_timer_override) {
- printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
+ printk(PREFIX "BIOS IRQ0 override ignored.\n");
return 0;
}
- if (acpi_fix_pin2_polarity && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) {
+
+ if ((intsrc->global_irq == 2) && acpi_fix_pin2_polarity
+ && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) {
intsrc->inti_flags &= ~ACPI_MADT_POLARITY_MASK;
printk(PREFIX "BIOS IRQ0 pin2 override: forcing polarity to high active.\n");
}
@@ -990,7 +992,7 @@ void __init mp_config_acpi_legacy_irqs(void)
int i;
struct mpc_intsrc mp_irq;
-#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
+#ifdef CONFIG_EISA
/*
* Fabricate the legacy ISA bus (bus #31).
*/
@@ -1334,17 +1336,12 @@ static int __init dmi_disable_acpi(const struct dmi_system_id *d)
}
/*
- * Force ignoring BIOS IRQ0 pin2 override
+ * Force ignoring BIOS IRQ0 override
*/
static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
{
- /*
- * The ati_ixp4x0_rev() early PCI quirk should have set
- * the acpi_skip_timer_override flag already:
- */
if (!acpi_skip_timer_override) {
- WARN(1, KERN_ERR "ati_ixp4x0 quirk not complete.\n");
- pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n",
+ pr_notice("%s detected: Ignoring BIOS IRQ0 override\n",
d->ident);
acpi_skip_timer_override = 1;
}
@@ -1438,7 +1435,7 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = {
* is enabled. This input is incorrectly designated the
* ISA IRQ 0 via an interrupt source override even though
* it is wired to the output of the master 8259A and INTIN0
- * is not connected at all. Force ignoring BIOS IRQ0 pin2
+ * is not connected at all. Force ignoring BIOS IRQ0
* override in that cases.
*/
{
@@ -1473,6 +1470,14 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"),
},
},
+ {
+ .callback = dmi_ignore_irq0_timer_override,
+ .ident = "FUJITSU SIEMENS",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "AMILO PRO V2030"),
+ },
+ },
{}
};
diff --git a/arch/x86/kernel/acpi/realmode/Makefile b/arch/x86/kernel/acpi/realmode/Makefile
deleted file mode 100644
index 6a564ac67ef..00000000000
--- a/arch/x86/kernel/acpi/realmode/Makefile
+++ /dev/null
@@ -1,59 +0,0 @@
-#
-# arch/x86/kernel/acpi/realmode/Makefile
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License. See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-
-always := wakeup.bin
-targets := wakeup.elf wakeup.lds
-
-wakeup-y += wakeup.o wakemain.o video-mode.o copy.o bioscall.o regs.o
-
-# The link order of the video-*.o modules can matter. In particular,
-# video-vga.o *must* be listed first, followed by video-vesa.o.
-# Hardware-specific drivers should follow in the order they should be
-# probed, and video-bios.o should typically be last.
-wakeup-y += video-vga.o
-wakeup-y += video-vesa.o
-wakeup-y += video-bios.o
-
-targets += $(wakeup-y)
-
-bootsrc := $(src)/../../../boot
-
-# ---------------------------------------------------------------------------
-
-# How to compile the 16-bit code. Note we always compile for -march=i386,
-# that way we can complain to the user if the CPU is insufficient.
-# Compile with _SETUP since this is similar to the boot-time setup code.
-KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D_WAKEUP -D__KERNEL__ \
- -I$(srctree)/$(bootsrc) \
- $(cflags-y) \
- -Wall -Wstrict-prototypes \
- -march=i386 -mregparm=3 \
- -include $(srctree)/$(bootsrc)/code16gcc.h \
- -fno-strict-aliasing -fomit-frame-pointer \
- $(call cc-option, -ffreestanding) \
- $(call cc-option, -fno-toplevel-reorder,\
- $(call cc-option, -fno-unit-at-a-time)) \
- $(call cc-option, -fno-stack-protector) \
- $(call cc-option, -mpreferred-stack-boundary=2)
-KBUILD_CFLAGS += $(call cc-option, -m32)
-KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
-GCOV_PROFILE := n
-
-WAKEUP_OBJS = $(addprefix $(obj)/,$(wakeup-y))
-
-LDFLAGS_wakeup.elf := -T
-
-CPPFLAGS_wakeup.lds += -P -C
-
-$(obj)/wakeup.elf: $(obj)/wakeup.lds $(WAKEUP_OBJS) FORCE
- $(call if_changed,ld)
-
-OBJCOPYFLAGS_wakeup.bin := -O binary
-
-$(obj)/wakeup.bin: $(obj)/wakeup.elf FORCE
- $(call if_changed,objcopy)
diff --git a/arch/x86/kernel/acpi/realmode/bioscall.S b/arch/x86/kernel/acpi/realmode/bioscall.S
deleted file mode 100644
index f51eb0bb56c..00000000000
--- a/arch/x86/kernel/acpi/realmode/bioscall.S
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../boot/bioscall.S"
diff --git a/arch/x86/kernel/acpi/realmode/copy.S b/arch/x86/kernel/acpi/realmode/copy.S
deleted file mode 100644
index dc59ebee69d..00000000000
--- a/arch/x86/kernel/acpi/realmode/copy.S
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../boot/copy.S"
diff --git a/arch/x86/kernel/acpi/realmode/regs.c b/arch/x86/kernel/acpi/realmode/regs.c
deleted file mode 100644
index 6206033ba20..00000000000
--- a/arch/x86/kernel/acpi/realmode/regs.c
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../boot/regs.c"
diff --git a/arch/x86/kernel/acpi/realmode/video-bios.c b/arch/x86/kernel/acpi/realmode/video-bios.c
deleted file mode 100644
index 7deabc144a2..00000000000
--- a/arch/x86/kernel/acpi/realmode/video-bios.c
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../boot/video-bios.c"
diff --git a/arch/x86/kernel/acpi/realmode/video-mode.c b/arch/x86/kernel/acpi/realmode/video-mode.c
deleted file mode 100644
index 328ad209f11..00000000000
--- a/arch/x86/kernel/acpi/realmode/video-mode.c
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../boot/video-mode.c"
diff --git a/arch/x86/kernel/acpi/realmode/video-vesa.c b/arch/x86/kernel/acpi/realmode/video-vesa.c
deleted file mode 100644
index 9dbb9672226..00000000000
--- a/arch/x86/kernel/acpi/realmode/video-vesa.c
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../boot/video-vesa.c"
diff --git a/arch/x86/kernel/acpi/realmode/video-vga.c b/arch/x86/kernel/acpi/realmode/video-vga.c
deleted file mode 100644
index bcc81255f37..00000000000
--- a/arch/x86/kernel/acpi/realmode/video-vga.c
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../boot/video-vga.c"
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S
deleted file mode 100644
index d4f8010a5b1..00000000000
--- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * wakeup.ld
- *
- * Linker script for the real-mode wakeup code
- */
-#undef i386
-#include "wakeup.h"
-
-OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
-OUTPUT_ARCH(i386)
-ENTRY(_start)
-
-SECTIONS
-{
- . = 0;
- .jump : {
- *(.jump)
- } = 0x90909090
-
- . = WAKEUP_HEADER_OFFSET;
- .header : {
- *(.header)
- }
-
- . = ALIGN(16);
- .text : {
- *(.text*)
- } = 0x90909090
-
- . = ALIGN(16);
- .rodata : {
- *(.rodata*)
- }
-
- .videocards : {
- video_cards = .;
- *(.videocards)
- video_cards_end = .;
- }
-
- . = ALIGN(16);
- .data : {
- *(.data*)
- }
-
- . = ALIGN(16);
- .bss : {
- __bss_start = .;
- *(.bss)
- __bss_end = .;
- }
-
- .signature : {
- *(.signature)
- }
-
- _end = .;
-
- /DISCARD/ : {
- *(.note*)
- }
-}
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 146a49c763a..95bf99de905 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -14,8 +14,9 @@
#include <asm/desc.h>
#include <asm/pgtable.h>
#include <asm/cacheflush.h>
+#include <asm/realmode.h>
-#include "realmode/wakeup.h"
+#include "../../realmode/rm/wakeup.h"
#include "sleep.h"
unsigned long acpi_realmode_flags;
@@ -36,13 +37,9 @@ asmlinkage void acpi_enter_s3(void)
*/
int acpi_suspend_lowlevel(void)
{
- struct wakeup_header *header;
- /* address in low memory of the wakeup routine. */
- char *acpi_realmode;
+ struct wakeup_header *header =
+ (struct wakeup_header *) __va(real_mode_header->wakeup_header);
- acpi_realmode = TRAMPOLINE_SYM(acpi_wakeup_code);
-
- header = (struct wakeup_header *)(acpi_realmode + WAKEUP_HEADER_OFFSET);
if (header->signature != WAKEUP_HEADER_SIGNATURE) {
printk(KERN_ERR "wakeup header does not match\n");
return -EINVAL;
@@ -50,27 +47,6 @@ int acpi_suspend_lowlevel(void)
header->video_mode = saved_video_mode;
- header->wakeup_jmp_seg = acpi_wakeup_address >> 4;
-
- /*
- * Set up the wakeup GDT. We set these up as Big Real Mode,
- * that is, with limits set to 4 GB. At least the Lenovo
- * Thinkpad X61 is known to need this for the video BIOS
- * initialization quirk to work; this is likely to also
- * be the case for other laptops or integrated video devices.
- */
-
- /* GDT[0]: GDT self-pointer */
- header->wakeup_gdt[0] =
- (u64)(sizeof(header->wakeup_gdt) - 1) +
- ((u64)__pa(&header->wakeup_gdt) << 16);
- /* GDT[1]: big real mode-like code segment */
- header->wakeup_gdt[1] =
- GDT_ENTRY(0x809b, acpi_wakeup_address, 0xfffff);
- /* GDT[2]: big real mode-like data segment */
- header->wakeup_gdt[2] =
- GDT_ENTRY(0x8093, acpi_wakeup_address, 0xfffff);
-
#ifndef CONFIG_64BIT
store_gdt((struct desc_ptr *)&header->pmode_gdt);
@@ -95,7 +71,6 @@ int acpi_suspend_lowlevel(void)
header->pmode_cr3 = (u32)__pa(&initial_page_table);
saved_magic = 0x12345678;
#else /* CONFIG_64BIT */
- header->trampoline_segment = trampoline_address() >> 4;
#ifdef CONFIG_SMP
stack_start = (unsigned long)temp_stack + sizeof(temp_stack);
early_gdt_descr.address =
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h
index d68677a2a01..5653a5791ec 100644
--- a/arch/x86/kernel/acpi/sleep.h
+++ b/arch/x86/kernel/acpi/sleep.h
@@ -2,8 +2,8 @@
* Variables and functions used by the code in sleep.c
*/
-#include <asm/trampoline.h>
#include <linux/linkage.h>
+#include <asm/realmode.h>
extern unsigned long saved_video_mode;
extern long saved_magic;
diff --git a/arch/x86/kernel/acpi/wakeup_rm.S b/arch/x86/kernel/acpi/wakeup_rm.S
deleted file mode 100644
index 63b8ab524f2..00000000000
--- a/arch/x86/kernel/acpi/wakeup_rm.S
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * Wrapper script for the realmode binary as a transport object
- * before copying to low memory.
- */
-#include <asm/page_types.h>
-
- .section ".x86_trampoline","a"
- .balign PAGE_SIZE
- .globl acpi_wakeup_code
-acpi_wakeup_code:
- .incbin "arch/x86/kernel/acpi/realmode/wakeup.bin"
- .size acpi_wakeup_code, .-acpi_wakeup_code
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 6e76c191a83..d5fd66f0d4c 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -20,7 +20,6 @@
#include <linux/bitops.h>
#include <linux/ioport.h>
#include <linux/suspend.h>
-#include <linux/kmemleak.h>
#include <asm/e820.h>
#include <asm/io.h>
#include <asm/iommu.h>
@@ -95,11 +94,6 @@ static u32 __init allocate_aperture(void)
return 0;
}
memblock_reserve(addr, aper_size);
- /*
- * Kmemleak should not scan this block as it may not be mapped via the
- * kernel direct mapping.
- */
- kmemleak_ignore(phys_to_virt(addr));
printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n",
aper_size >> 10, addr);
insert_aperture_resource((u32)addr, aper_size);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index edc24480469..39a222e094a 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -35,6 +35,7 @@
#include <linux/smp.h>
#include <linux/mm.h>
+#include <asm/irq_remapping.h>
#include <asm/perf_event.h>
#include <asm/x86_init.h>
#include <asm/pgalloc.h>
@@ -1325,11 +1326,13 @@ void __cpuinit setup_local_APIC(void)
acked);
break;
}
- if (cpu_has_tsc) {
- rdtscll(ntsc);
- max_loops = (cpu_khz << 10) - (ntsc - tsc);
- } else
- max_loops--;
+ if (queued) {
+ if (cpu_has_tsc) {
+ rdtscll(ntsc);
+ max_loops = (cpu_khz << 10) - (ntsc - tsc);
+ } else
+ max_loops--;
+ }
} while (queued && max_loops > 0);
WARN_ON(max_loops <= 0);
@@ -1441,8 +1444,8 @@ void __init bsp_end_local_APIC_setup(void)
* Now that local APIC setup is completed for BP, configure the fault
* handling for interrupt remapping.
*/
- if (intr_remapping_enabled)
- enable_drhd_fault_handling();
+ if (irq_remapping_enabled)
+ irq_remap_enable_fault_handling();
}
@@ -1517,7 +1520,7 @@ void enable_x2apic(void)
int __init enable_IR(void)
{
#ifdef CONFIG_IRQ_REMAP
- if (!intr_remapping_supported()) {
+ if (!irq_remapping_supported()) {
pr_debug("intr-remapping not supported\n");
return -1;
}
@@ -1528,7 +1531,7 @@ int __init enable_IR(void)
return -1;
}
- return enable_intr_remapping();
+ return irq_remapping_enable();
#endif
return -1;
}
@@ -1537,10 +1540,13 @@ void __init enable_IR_x2apic(void)
{
unsigned long flags;
int ret, x2apic_enabled = 0;
- int dmar_table_init_ret;
+ int hardware_init_ret;
+
+ /* Make sure irq_remap_ops are initialized */
+ setup_irq_remapping_ops();
- dmar_table_init_ret = dmar_table_init();
- if (dmar_table_init_ret && !x2apic_supported())
+ hardware_init_ret = irq_remapping_prepare();
+ if (hardware_init_ret && !x2apic_supported())
return;
ret = save_ioapic_entries();
@@ -1556,7 +1562,7 @@ void __init enable_IR_x2apic(void)
if (x2apic_preenabled && nox2apic)
disable_x2apic();
- if (dmar_table_init_ret)
+ if (hardware_init_ret)
ret = -1;
else
ret = enable_IR();
@@ -2176,8 +2182,8 @@ static int lapic_suspend(void)
local_irq_save(flags);
disable_local_APIC();
- if (intr_remapping_enabled)
- disable_intr_remapping();
+ if (irq_remapping_enabled)
+ irq_remapping_disable();
local_irq_restore(flags);
return 0;
@@ -2193,7 +2199,7 @@ static void lapic_resume(void)
return;
local_irq_save(flags);
- if (intr_remapping_enabled) {
+ if (irq_remapping_enabled) {
/*
* IO-APIC and PIC have their own resume routines.
* We just mask them here to make sure the interrupt
@@ -2245,8 +2251,8 @@ static void lapic_resume(void)
apic_write(APIC_ESR, 0);
apic_read(APIC_ESR);
- if (intr_remapping_enabled)
- reenable_intr_remapping(x2apic_mode);
+ if (irq_remapping_enabled)
+ irq_remapping_reenable(x2apic_mode);
local_irq_restore(flags);
}
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 359b6899a36..0e881c46e8c 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -227,6 +227,7 @@ static struct apic apic_flat = {
.read = native_apic_mem_read,
.write = native_apic_mem_write,
+ .eoi_write = native_apic_mem_write,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
@@ -386,6 +387,7 @@ static struct apic apic_physflat = {
.read = native_apic_mem_read,
.write = native_apic_mem_write,
+ .eoi_write = native_apic_mem_write,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 634ae6cdd5c..a6e4c6e06c0 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -181,6 +181,7 @@ struct apic apic_noop = {
.read = noop_apic_read,
.write = noop_apic_write,
+ .eoi_write = noop_apic_write,
.icr_read = noop_apic_icr_read,
.icr_write = noop_apic_icr_write,
.wait_icr_idle = noop_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 23e75422e01..6ec6d5d297c 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -295,6 +295,7 @@ static struct apic apic_numachip __refconst = {
.read = native_apic_mem_read,
.write = native_apic_mem_write,
+ .eoi_write = native_apic_mem_write,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 0cdec7065af..31fbdbfbf96 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -248,6 +248,7 @@ static struct apic apic_bigsmp = {
.read = native_apic_mem_read,
.write = native_apic_mem_write,
+ .eoi_write = native_apic_mem_write,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index e42d1d3b913..db4ab1be3c7 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -678,6 +678,7 @@ static struct apic __refdata apic_es7000_cluster = {
.read = native_apic_mem_read,
.write = native_apic_mem_write,
+ .eoi_write = native_apic_mem_write,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
@@ -742,6 +743,7 @@ static struct apic __refdata apic_es7000 = {
.read = native_apic_mem_read,
.write = native_apic_mem_write,
+ .eoi_write = native_apic_mem_write,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index e88300d8e80..5f0ff597437 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -68,23 +68,21 @@
#define for_each_irq_pin(entry, head) \
for (entry = head; entry; entry = entry->next)
-static void __init __ioapic_init_mappings(void);
-
-static unsigned int __io_apic_read (unsigned int apic, unsigned int reg);
-static void __io_apic_write (unsigned int apic, unsigned int reg, unsigned int val);
-static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val);
-
-static struct io_apic_ops io_apic_ops = {
- .init = __ioapic_init_mappings,
- .read = __io_apic_read,
- .write = __io_apic_write,
- .modify = __io_apic_modify,
-};
-
-void __init set_io_apic_ops(const struct io_apic_ops *ops)
+#ifdef CONFIG_IRQ_REMAP
+static void irq_remap_modify_chip_defaults(struct irq_chip *chip);
+static inline bool irq_remapped(struct irq_cfg *cfg)
+{
+ return cfg->irq_2_iommu.iommu != NULL;
+}
+#else
+static inline bool irq_remapped(struct irq_cfg *cfg)
+{
+ return false;
+}
+static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip)
{
- io_apic_ops = *ops;
}
+#endif
/*
* Is the SiS APIC rmw bug present ?
@@ -142,7 +140,7 @@ int mp_irq_entries;
/* GSI interrupts */
static int nr_irqs_gsi = NR_IRQS_LEGACY;
-#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
+#ifdef CONFIG_EISA
int mp_bus_id_to_type[MAX_MP_BUSSES];
#endif
@@ -313,21 +311,6 @@ static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
irq_free_desc(at);
}
-static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
-{
- return io_apic_ops.read(apic, reg);
-}
-
-static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
-{
- io_apic_ops.write(apic, reg, value);
-}
-
-static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
-{
- io_apic_ops.modify(apic, reg, value);
-}
-
struct io_apic {
unsigned int index;
@@ -349,14 +332,14 @@ static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
writel(vector, &io_apic->eoi);
}
-static unsigned int __io_apic_read(unsigned int apic, unsigned int reg)
+unsigned int native_io_apic_read(unsigned int apic, unsigned int reg)
{
struct io_apic __iomem *io_apic = io_apic_base(apic);
writel(reg, &io_apic->index);
return readl(&io_apic->data);
}
-static void __io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
{
struct io_apic __iomem *io_apic = io_apic_base(apic);
@@ -370,7 +353,7 @@ static void __io_apic_write(unsigned int apic, unsigned int reg, unsigned int va
*
* Older SiS APIC requires we rewrite the index register
*/
-static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
+void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
{
struct io_apic __iomem *io_apic = io_apic_base(apic);
@@ -379,29 +362,6 @@ static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int v
writel(value, &io_apic->data);
}
-static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
-{
- struct irq_pin_list *entry;
- unsigned long flags;
-
- raw_spin_lock_irqsave(&ioapic_lock, flags);
- for_each_irq_pin(entry, cfg->irq_2_pin) {
- unsigned int reg;
- int pin;
-
- pin = entry->pin;
- reg = io_apic_read(entry->apic, 0x10 + pin*2);
- /* Is the remote IRR bit set? */
- if (reg & IO_APIC_REDIR_REMOTE_IRR) {
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
- return true;
- }
- }
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-
- return false;
-}
-
union entry_union {
struct { u32 w1, w2; };
struct IO_APIC_route_entry entry;
@@ -875,7 +835,7 @@ static int __init find_isa_irq_apic(int irq, int type)
return -1;
}
-#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+#ifdef CONFIG_EISA
/*
* EISA Edge/Level control register, ELCR
*/
@@ -912,12 +872,6 @@ static int EISA_ELCR(unsigned int irq)
#define default_PCI_trigger(idx) (1)
#define default_PCI_polarity(idx) (1)
-/* MCA interrupts are always polarity zero level triggered,
- * when listed as conforming in the MP table. */
-
-#define default_MCA_trigger(idx) (1)
-#define default_MCA_polarity(idx) default_ISA_polarity(idx)
-
static int irq_polarity(int idx)
{
int bus = mp_irqs[idx].srcbus;
@@ -975,7 +929,7 @@ static int irq_trigger(int idx)
trigger = default_ISA_trigger(idx);
else
trigger = default_PCI_trigger(idx);
-#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+#ifdef CONFIG_EISA
switch (mp_bus_id_to_type[bus]) {
case MP_BUS_ISA: /* ISA pin */
{
@@ -992,11 +946,6 @@ static int irq_trigger(int idx)
/* set before the switch */
break;
}
- case MP_BUS_MCA: /* MCA pin */
- {
- trigger = default_MCA_trigger(idx);
- break;
- }
default:
{
printk(KERN_WARNING "broken BIOS!!\n");
@@ -1246,7 +1195,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
BUG_ON(!cfg->vector);
vector = cfg->vector;
- for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
+ for_each_cpu(cpu, cfg->domain)
per_cpu(vector_irq, cpu)[vector] = -1;
cfg->vector = 0;
@@ -1254,7 +1203,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
if (likely(!cfg->move_in_progress))
return;
- for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
+ for_each_cpu(cpu, cfg->old_domain) {
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
vector++) {
if (per_cpu(vector_irq, cpu)[vector] != irq)
@@ -1361,77 +1310,13 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
fasteoi ? "fasteoi" : "edge");
}
-
-static int setup_ir_ioapic_entry(int irq,
- struct IR_IO_APIC_route_entry *entry,
- unsigned int destination, int vector,
- struct io_apic_irq_attr *attr)
-{
- int index;
- struct irte irte;
- int ioapic_id = mpc_ioapic_id(attr->ioapic);
- struct intel_iommu *iommu = map_ioapic_to_ir(ioapic_id);
-
- if (!iommu) {
- pr_warn("No mapping iommu for ioapic %d\n", ioapic_id);
- return -ENODEV;
- }
-
- index = alloc_irte(iommu, irq, 1);
- if (index < 0) {
- pr_warn("Failed to allocate IRTE for ioapic %d\n", ioapic_id);
- return -ENOMEM;
- }
-
- prepare_irte(&irte, vector, destination);
-
- /* Set source-id of interrupt request */
- set_ioapic_sid(&irte, ioapic_id);
-
- modify_irte(irq, &irte);
-
- apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: "
- "Set IRTE entry (P:%d FPD:%d Dst_Mode:%d "
- "Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X "
- "Avail:%X Vector:%02X Dest:%08X "
- "SID:%04X SQ:%X SVT:%X)\n",
- attr->ioapic, irte.present, irte.fpd, irte.dst_mode,
- irte.redir_hint, irte.trigger_mode, irte.dlvry_mode,
- irte.avail, irte.vector, irte.dest_id,
- irte.sid, irte.sq, irte.svt);
-
- memset(entry, 0, sizeof(*entry));
-
- entry->index2 = (index >> 15) & 0x1;
- entry->zero = 0;
- entry->format = 1;
- entry->index = (index & 0x7fff);
- /*
- * IO-APIC RTE will be configured with virtual vector.
- * irq handler will do the explicit EOI to the io-apic.
- */
- entry->vector = attr->ioapic_pin;
- entry->mask = 0; /* enable IRQ */
- entry->trigger = attr->trigger;
- entry->polarity = attr->polarity;
-
- /* Mask level triggered irqs.
- * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
- */
- if (attr->trigger)
- entry->mask = 1;
-
- return 0;
-}
-
static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
unsigned int destination, int vector,
struct io_apic_irq_attr *attr)
{
- if (intr_remapping_enabled)
- return setup_ir_ioapic_entry(irq,
- (struct IR_IO_APIC_route_entry *)entry,
- destination, vector, attr);
+ if (irq_remapping_enabled)
+ return setup_ioapic_remapped_entry(irq, entry, destination,
+ vector, attr);
memset(entry, 0, sizeof(*entry));
@@ -1588,7 +1473,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
{
struct IO_APIC_route_entry entry;
- if (intr_remapping_enabled)
+ if (irq_remapping_enabled)
return;
memset(&entry, 0, sizeof(entry));
@@ -1674,7 +1559,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
printk(KERN_DEBUG ".... IRQ redirection table:\n");
- if (intr_remapping_enabled) {
+ if (irq_remapping_enabled) {
printk(KERN_DEBUG " NR Indx Fmt Mask Trig IRR"
" Pol Stat Indx2 Zero Vect:\n");
} else {
@@ -1683,7 +1568,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
}
for (i = 0; i <= reg_01.bits.entries; i++) {
- if (intr_remapping_enabled) {
+ if (irq_remapping_enabled) {
struct IO_APIC_route_entry entry;
struct IR_IO_APIC_route_entry *ir_entry;
@@ -2050,7 +1935,7 @@ void disable_IO_APIC(void)
* IOAPIC RTE as well as interrupt-remapping table entry).
* As this gets called during crash dump, keep this simple for now.
*/
- if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) {
+ if (ioapic_i8259.pin != -1 && !irq_remapping_enabled) {
struct IO_APIC_route_entry entry;
memset(&entry, 0, sizeof(entry));
@@ -2074,7 +1959,7 @@ void disable_IO_APIC(void)
* Use virtual wire A mode when interrupt remapping is enabled.
*/
if (cpu_has_apic || apic_from_smp_config())
- disconnect_bsp_APIC(!intr_remapping_enabled &&
+ disconnect_bsp_APIC(!irq_remapping_enabled &&
ioapic_i8259.pin != -1);
}
@@ -2390,71 +2275,6 @@ ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
return ret;
}
-#ifdef CONFIG_IRQ_REMAP
-
-/*
- * Migrate the IO-APIC irq in the presence of intr-remapping.
- *
- * For both level and edge triggered, irq migration is a simple atomic
- * update(of vector and cpu destination) of IRTE and flush the hardware cache.
- *
- * For level triggered, we eliminate the io-apic RTE modification (with the
- * updated vector information), by using a virtual vector (io-apic pin number).
- * Real vector that is used for interrupting cpu will be coming from
- * the interrupt-remapping table entry.
- *
- * As the migration is a simple atomic update of IRTE, the same mechanism
- * is used to migrate MSI irq's in the presence of interrupt-remapping.
- */
-static int
-ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
- bool force)
-{
- struct irq_cfg *cfg = data->chip_data;
- unsigned int dest, irq = data->irq;
- struct irte irte;
-
- if (!cpumask_intersects(mask, cpu_online_mask))
- return -EINVAL;
-
- if (get_irte(irq, &irte))
- return -EBUSY;
-
- if (assign_irq_vector(irq, cfg, mask))
- return -EBUSY;
-
- dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
-
- irte.vector = cfg->vector;
- irte.dest_id = IRTE_DEST(dest);
-
- /*
- * Atomically updates the IRTE with the new destination, vector
- * and flushes the interrupt entry cache.
- */
- modify_irte(irq, &irte);
-
- /*
- * After this point, all the interrupts will start arriving
- * at the new destination. So, time to cleanup the previous
- * vector allocation.
- */
- if (cfg->move_in_progress)
- send_cleanup_vector(cfg);
-
- cpumask_copy(data->affinity, mask);
- return 0;
-}
-
-#else
-static inline int
-ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
- bool force)
-{
- return 0;
-}
-#endif
-
asmlinkage void smp_irq_move_cleanup_interrupt(void)
{
unsigned vector, me;
@@ -2552,6 +2372,29 @@ static void ack_apic_edge(struct irq_data *data)
atomic_t irq_mis_count;
#ifdef CONFIG_GENERIC_PENDING_IRQ
+static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
+{
+ struct irq_pin_list *entry;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ for_each_irq_pin(entry, cfg->irq_2_pin) {
+ unsigned int reg;
+ int pin;
+
+ pin = entry->pin;
+ reg = io_apic_read(entry->apic, 0x10 + pin*2);
+ /* Is the remote IRR bit set? */
+ if (reg & IO_APIC_REDIR_REMOTE_IRR) {
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+ return true;
+ }
+ }
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return false;
+}
+
static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg)
{
/* If we are moving the irq we need to mask it */
@@ -2699,7 +2542,7 @@ static void irq_remap_modify_chip_defaults(struct irq_chip *chip)
chip->irq_eoi = ir_ack_apic_level;
#ifdef CONFIG_SMP
- chip->irq_set_affinity = ir_ioapic_set_affinity;
+ chip->irq_set_affinity = set_remapped_irq_affinity;
#endif
}
#endif /* CONFIG_IRQ_REMAP */
@@ -2912,7 +2755,7 @@ static inline void __init check_timer(void)
* 8259A.
*/
if (pin1 == -1) {
- if (intr_remapping_enabled)
+ if (irq_remapping_enabled)
panic("BIOS bug: timer not connected to IO-APIC");
pin1 = pin2;
apic1 = apic2;
@@ -2945,7 +2788,7 @@ static inline void __init check_timer(void)
clear_IO_APIC_pin(0, pin1);
goto out;
}
- if (intr_remapping_enabled)
+ if (irq_remapping_enabled)
panic("timer doesn't work through Interrupt-remapped IO-APIC");
local_irq_disable();
clear_IO_APIC_pin(apic1, pin1);
@@ -3169,7 +3012,7 @@ void destroy_irq(unsigned int irq)
irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
if (irq_remapped(cfg))
- free_irte(irq);
+ free_remapped_irq(irq);
raw_spin_lock_irqsave(&vector_lock, flags);
__clear_irq_vector(irq, cfg);
raw_spin_unlock_irqrestore(&vector_lock, flags);
@@ -3198,54 +3041,34 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
if (irq_remapped(cfg)) {
- struct irte irte;
- int ir_index;
- u16 sub_handle;
-
- ir_index = map_irq_to_irte_handle(irq, &sub_handle);
- BUG_ON(ir_index == -1);
-
- prepare_irte(&irte, cfg->vector, dest);
-
- /* Set source-id of interrupt request */
- if (pdev)
- set_msi_sid(&irte, pdev);
- else
- set_hpet_sid(&irte, hpet_id);
-
- modify_irte(irq, &irte);
+ compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id);
+ return err;
+ }
+ if (x2apic_enabled())
+ msg->address_hi = MSI_ADDR_BASE_HI |
+ MSI_ADDR_EXT_DEST_ID(dest);
+ else
msg->address_hi = MSI_ADDR_BASE_HI;
- msg->data = sub_handle;
- msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
- MSI_ADDR_IR_SHV |
- MSI_ADDR_IR_INDEX1(ir_index) |
- MSI_ADDR_IR_INDEX2(ir_index);
- } else {
- if (x2apic_enabled())
- msg->address_hi = MSI_ADDR_BASE_HI |
- MSI_ADDR_EXT_DEST_ID(dest);
- else
- msg->address_hi = MSI_ADDR_BASE_HI;
- msg->address_lo =
- MSI_ADDR_BASE_LO |
- ((apic->irq_dest_mode == 0) ?
- MSI_ADDR_DEST_MODE_PHYSICAL:
- MSI_ADDR_DEST_MODE_LOGICAL) |
- ((apic->irq_delivery_mode != dest_LowestPrio) ?
- MSI_ADDR_REDIRECTION_CPU:
- MSI_ADDR_REDIRECTION_LOWPRI) |
- MSI_ADDR_DEST_ID(dest);
+ msg->address_lo =
+ MSI_ADDR_BASE_LO |
+ ((apic->irq_dest_mode == 0) ?
+ MSI_ADDR_DEST_MODE_PHYSICAL:
+ MSI_ADDR_DEST_MODE_LOGICAL) |
+ ((apic->irq_delivery_mode != dest_LowestPrio) ?
+ MSI_ADDR_REDIRECTION_CPU:
+ MSI_ADDR_REDIRECTION_LOWPRI) |
+ MSI_ADDR_DEST_ID(dest);
+
+ msg->data =
+ MSI_DATA_TRIGGER_EDGE |
+ MSI_DATA_LEVEL_ASSERT |
+ ((apic->irq_delivery_mode != dest_LowestPrio) ?
+ MSI_DATA_DELIVERY_FIXED:
+ MSI_DATA_DELIVERY_LOWPRI) |
+ MSI_DATA_VECTOR(cfg->vector);
- msg->data =
- MSI_DATA_TRIGGER_EDGE |
- MSI_DATA_LEVEL_ASSERT |
- ((apic->irq_delivery_mode != dest_LowestPrio) ?
- MSI_DATA_DELIVERY_FIXED:
- MSI_DATA_DELIVERY_LOWPRI) |
- MSI_DATA_VECTOR(cfg->vector);
- }
return err;
}
@@ -3288,33 +3111,6 @@ static struct irq_chip msi_chip = {
.irq_retrigger = ioapic_retrigger_irq,
};
-/*
- * Map the PCI dev to the corresponding remapping hardware unit
- * and allocate 'nvec' consecutive interrupt-remapping table entries
- * in it.
- */
-static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
-{
- struct intel_iommu *iommu;
- int index;
-
- iommu = map_dev_to_ir(dev);
- if (!iommu) {
- printk(KERN_ERR
- "Unable to map PCI %s to iommu\n", pci_name(dev));
- return -ENOENT;
- }
-
- index = alloc_irte(iommu, irq, nvec);
- if (index < 0) {
- printk(KERN_ERR
- "Unable to allocate %d IRTE for PCI %s\n", nvec,
- pci_name(dev));
- return -ENOSPC;
- }
- return index;
-}
-
static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
{
struct irq_chip *chip = &msi_chip;
@@ -3345,7 +3141,6 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
int node, ret, sub_handle, index = 0;
unsigned int irq, irq_want;
struct msi_desc *msidesc;
- struct intel_iommu *iommu = NULL;
/* x86 doesn't support multiple MSI yet */
if (type == PCI_CAP_ID_MSI && nvec > 1)
@@ -3359,7 +3154,7 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
if (irq == 0)
return -1;
irq_want = irq + 1;
- if (!intr_remapping_enabled)
+ if (!irq_remapping_enabled)
goto no_ir;
if (!sub_handle) {
@@ -3367,23 +3162,16 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
* allocate the consecutive block of IRTE's
* for 'nvec'
*/
- index = msi_alloc_irte(dev, irq, nvec);
+ index = msi_alloc_remapped_irq(dev, irq, nvec);
if (index < 0) {
ret = index;
goto error;
}
} else {
- iommu = map_dev_to_ir(dev);
- if (!iommu) {
- ret = -ENOENT;
+ ret = msi_setup_remapped_irq(dev, irq, index,
+ sub_handle);
+ if (ret < 0)
goto error;
- }
- /*
- * setup the mapping between the irq and the IRTE
- * base index, the sub_handle pointing to the
- * appropriate interrupt remap table entry.
- */
- set_irte_irq(irq, iommu, index, sub_handle);
}
no_ir:
ret = setup_msi_irq(dev, msidesc, irq);
@@ -3501,15 +3289,8 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
struct msi_msg msg;
int ret;
- if (intr_remapping_enabled) {
- struct intel_iommu *iommu = map_hpet_to_ir(id);
- int index;
-
- if (!iommu)
- return -1;
-
- index = alloc_irte(iommu, irq, 1);
- if (index < 0)
+ if (irq_remapping_enabled) {
+ if (!setup_hpet_msi_remapped(irq, id))
return -1;
}
@@ -3888,8 +3669,8 @@ void __init setup_ioapic_dest(void)
else
mask = apic->target_cpus();
- if (intr_remapping_enabled)
- ir_ioapic_set_affinity(idata, mask, false);
+ if (irq_remapping_enabled)
+ set_remapped_irq_affinity(idata, mask, false);
else
ioapic_set_affinity(idata, mask, false);
}
@@ -3931,12 +3712,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics)
return res;
}
-void __init ioapic_and_gsi_init(void)
-{
- io_apic_ops.init();
-}
-
-static void __init __ioapic_init_mappings(void)
+void __init native_io_apic_init_mappings(void)
{
unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
struct resource *ioapic_res;
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 00d2422ca7c..f00a68cca37 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -530,6 +530,7 @@ static struct apic __refdata apic_numaq = {
.read = native_apic_mem_read,
.write = native_apic_mem_write,
+ .eoi_write = native_apic_mem_write,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index ff2c1b9aac4..1b291da09e6 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -142,6 +142,7 @@ static struct apic apic_default = {
.read = native_apic_mem_read,
.write = native_apic_mem_write,
+ .eoi_write = native_apic_mem_write,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index fea000b27f0..659897c0075 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -546,6 +546,7 @@ static struct apic apic_summit = {
.read = native_apic_mem_read,
.write = native_apic_mem_write,
+ .eoi_write = native_apic_mem_write,
.icr_read = native_apic_icr_read,
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 48f3103b3c9..ff35cff0e1a 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -260,6 +260,7 @@ static struct apic apic_x2apic_cluster = {
.read = native_apic_msr_read,
.write = native_apic_msr_write,
+ .eoi_write = native_apic_msr_eoi_write,
.icr_read = native_x2apic_icr_read,
.icr_write = native_x2apic_icr_write,
.wait_icr_idle = native_x2apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 991e315f422..c17e982db27 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -172,6 +172,7 @@ static struct apic apic_x2apic_phys = {
.read = native_apic_msr_read,
.write = native_apic_msr_write,
+ .eoi_write = native_apic_msr_eoi_write,
.icr_read = native_x2apic_icr_read,
.icr_write = native_x2apic_icr_write,
.wait_icr_idle = native_x2apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 87bfa69e216..c6d03f7a440 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -404,6 +404,7 @@ static struct apic __refdata apic_x2apic_uv_x = {
.read = native_apic_msr_read,
.write = native_apic_msr_write,
+ .eoi_write = native_apic_msr_eoi_write,
.icr_read = native_x2apic_icr_read,
.icr_write = native_x2apic_icr_write,
.wait_icr_idle = native_x2apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 2245eb5bd06..a899ba2add0 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -2401,7 +2401,7 @@ static void __exit apm_exit(void)
* (pm_idle), Wait for all processors to update cached/local
* copies of pm_idle before proceeding.
*/
- cpu_idle_wait();
+ kick_all_cpus_sync();
}
if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0)
&& (apm_info.connection_version > 0x0100)) {
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 5da1269e8dd..e2dbcb7dabd 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -27,21 +27,29 @@ static int num_scan_areas;
static __init int set_corruption_check(char *arg)
{
- char *end;
+ ssize_t ret;
+ unsigned long val;
- memory_corruption_check = simple_strtol(arg, &end, 10);
+ ret = kstrtoul(arg, 10, &val);
+ if (ret)
+ return ret;
- return (*end == 0) ? 0 : -EINVAL;
+ memory_corruption_check = val;
+ return 0;
}
early_param("memory_corruption_check", set_corruption_check);
static __init int set_corruption_check_period(char *arg)
{
- char *end;
+ ssize_t ret;
+ unsigned long val;
- corruption_check_period = simple_strtoul(arg, &end, 10);
+ ret = kstrtoul(arg, 10, &val);
+ if (ret)
+ return ret;
- return (*end == 0) ? 0 : -EINVAL;
+ corruption_check_period = val;
+ return 0;
}
early_param("memory_corruption_check_period", set_corruption_check_period);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cf79302198a..6b9333b429b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1101,14 +1101,20 @@ int is_debug_stack(unsigned long addr)
addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
}
+static DEFINE_PER_CPU(u32, debug_stack_use_ctr);
+
void debug_stack_set_zero(void)
{
+ this_cpu_inc(debug_stack_use_ctr);
load_idt((const struct desc_ptr *)&nmi_idt_descr);
}
void debug_stack_reset(void)
{
- load_idt((const struct desc_ptr *)&idt_descr);
+ if (WARN_ON(!this_cpu_read(debug_stack_use_ctr)))
+ return;
+ if (this_cpu_dec_return(debug_stack_use_ctr) == 0)
+ load_idt((const struct desc_ptr *)&idt_descr);
}
#else /* CONFIG_X86_64 */
@@ -1185,7 +1191,7 @@ void __cpuinit cpu_init(void)
oist = &per_cpu(orig_ist, cpu);
#ifdef CONFIG_NUMA
- if (cpu != 0 && percpu_read(numa_node) == 0 &&
+ if (cpu != 0 && this_cpu_read(numa_node) == 0 &&
early_cpu_to_node(cpu) != NUMA_NO_NODE)
set_numa_node(early_cpu_to_node(cpu));
#endif
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index b8f3653dddb..9a7c90d80bc 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -615,14 +615,14 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
new_l2 = this_leaf.size/1024;
num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
index_msb = get_count_order(num_threads_sharing);
- l2_id = c->apicid >> index_msb;
+ l2_id = c->apicid & ~((1 << index_msb) - 1);
break;
case 3:
new_l3 = this_leaf.size/1024;
num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
index_msb = get_count_order(
num_threads_sharing);
- l3_id = c->apicid >> index_msb;
+ l3_id = c->apicid & ~((1 << index_msb) - 1);
break;
default:
break;
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index 5502b289341..36565373af8 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -23,7 +23,7 @@
* %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor)
*
* Arrays used to match for this should also be declared using
- * MODULE_DEVICE_TABLE(x86_cpu, ...)
+ * MODULE_DEVICE_TABLE(x86cpu, ...)
*
* This always matches against the boot cpu, assuming models and features are
* consistent over all CPUs.
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
index 507ea58688e..cd8b166a173 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -42,7 +42,8 @@ void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err)
struct mce m;
/* Only corrected MC is reported */
- if (!corrected)
+ if (!corrected || !(mem_err->validation_bits &
+ CPER_MEM_VALID_PHYSICAL_ADDRESS))
return;
mce_setup(&m);
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 1ccd453903d..413c2ced887 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -126,6 +126,16 @@ static struct severity {
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
USER
),
+ MCESEV(
+ KEEP, "HT thread notices Action required: instruction fetch error",
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
+ MCGMASK(MCG_STATUS_EIPV, 0)
+ ),
+ MCESEV(
+ AR, "Action required: instruction fetch error",
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
+ USER
+ ),
#endif
MCESEV(
PANIC, "Action required: unknown MCACOD",
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 61604aefc40..c46ed494f00 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -591,7 +591,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
struct mce m;
int i;
- percpu_inc(mce_poll_count);
+ this_cpu_inc(mce_poll_count);
mce_gather_info(&m, NULL);
@@ -649,16 +649,18 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
* Do a quick check if any of the events requires a panic.
* This decides if we keep the events around or clear them.
*/
-static int mce_no_way_out(struct mce *m, char **msg)
+static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp)
{
- int i;
+ int i, ret = 0;
for (i = 0; i < banks; i++) {
m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
+ if (m->status & MCI_STATUS_VAL)
+ __set_bit(i, validp);
if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
- return 1;
+ ret = 1;
}
- return 0;
+ return ret;
}
/*
@@ -1021,11 +1023,12 @@ void do_machine_check(struct pt_regs *regs, long error_code)
*/
int kill_it = 0;
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
+ DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
char *msg = "Unknown";
atomic_inc(&mce_entry);
- percpu_inc(mce_exception_count);
+ this_cpu_inc(mce_exception_count);
if (!banks)
goto out;
@@ -1035,7 +1038,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
final = &__get_cpu_var(mces_seen);
*final = m;
- no_way_out = mce_no_way_out(&m, &msg);
+ memset(valid_banks, 0, sizeof(valid_banks));
+ no_way_out = mce_no_way_out(&m, &msg, valid_banks);
barrier();
@@ -1055,6 +1059,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
order = mce_start(&no_way_out);
for (i = 0; i < banks; i++) {
__clear_bit(i, toclear);
+ if (!test_bit(i, valid_banks))
+ continue;
if (!mce_banks[i].ctl)
continue;
@@ -1180,6 +1186,7 @@ void mce_notify_process(void)
{
unsigned long pfn;
struct mce_info *mi = mce_find_info();
+ int flags = MF_ACTION_REQUIRED;
if (!mi)
mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL);
@@ -1194,8 +1201,9 @@ void mce_notify_process(void)
* doomed. We still need to mark the page as poisoned and alert any
* other users of the page.
*/
- if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0 ||
- mi->restartable == 0) {
+ if (!mi->restartable)
+ flags |= MF_MUST_KILL;
+ if (memory_failure(pfn, MCE_VECTOR, flags) < 0) {
pr_err("Memory error not recovered");
force_sig(SIGBUS, current);
}
@@ -1245,15 +1253,15 @@ void mce_log_therm_throt_event(__u64 status)
* poller finds an MCE, poll 2x faster. When the poller finds no more
* errors, poll 2x slower (up to check_interval seconds).
*/
-static int check_interval = 5 * 60; /* 5 minutes */
+static unsigned long check_interval = 5 * 60; /* 5 minutes */
-static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */
+static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
static DEFINE_PER_CPU(struct timer_list, mce_timer);
-static void mce_start_timer(unsigned long data)
+static void mce_timer_fn(unsigned long data)
{
- struct timer_list *t = &per_cpu(mce_timer, data);
- int *n;
+ struct timer_list *t = &__get_cpu_var(mce_timer);
+ unsigned long iv;
WARN_ON(smp_processor_id() != data);
@@ -1266,13 +1274,14 @@ static void mce_start_timer(unsigned long data)
* Alert userspace if needed. If we logged an MCE, reduce the
* polling interval, otherwise increase the polling interval.
*/
- n = &__get_cpu_var(mce_next_interval);
+ iv = __this_cpu_read(mce_next_interval);
if (mce_notify_irq())
- *n = max(*n/2, HZ/100);
+ iv = max(iv / 2, (unsigned long) HZ/100);
else
- *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
+ iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
+ __this_cpu_write(mce_next_interval, iv);
- t->expires = jiffies + *n;
+ t->expires = jiffies + iv;
add_timer_on(t, smp_processor_id());
}
@@ -1439,6 +1448,43 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
*/
if (c->x86 == 6 && banks > 0)
mce_banks[0].ctl = 0;
+
+ /*
+ * Turn off MC4_MISC thresholding banks on those models since
+ * they're not supported there.
+ */
+ if (c->x86 == 0x15 &&
+ (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) {
+ int i;
+ u64 val, hwcr;
+ bool need_toggle;
+ u32 msrs[] = {
+ 0x00000413, /* MC4_MISC0 */
+ 0xc0000408, /* MC4_MISC1 */
+ };
+
+ rdmsrl(MSR_K7_HWCR, hwcr);
+
+ /* McStatusWrEn has to be set */
+ need_toggle = !(hwcr & BIT(18));
+
+ if (need_toggle)
+ wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
+
+ for (i = 0; i < ARRAY_SIZE(msrs); i++) {
+ rdmsrl(msrs[i], val);
+
+ /* CntP bit set? */
+ if (val & BIT_64(62)) {
+ val &= ~BIT_64(62);
+ wrmsrl(msrs[i], val);
+ }
+ }
+
+ /* restore old settings */
+ if (need_toggle)
+ wrmsrl(MSR_K7_HWCR, hwcr);
+ }
}
if (c->x86_vendor == X86_VENDOR_INTEL) {
@@ -1513,17 +1559,17 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
static void __mcheck_cpu_init_timer(void)
{
struct timer_list *t = &__get_cpu_var(mce_timer);
- int *n = &__get_cpu_var(mce_next_interval);
+ unsigned long iv = check_interval * HZ;
- setup_timer(t, mce_start_timer, smp_processor_id());
+ setup_timer(t, mce_timer_fn, smp_processor_id());
if (mce_ignore_ce)
return;
- *n = check_interval * HZ;
- if (!*n)
+ __this_cpu_write(mce_next_interval, iv);
+ if (!iv)
return;
- t->expires = round_jiffies(jiffies + *n);
+ t->expires = round_jiffies(jiffies + iv);
add_timer_on(t, smp_processor_id());
}
@@ -2233,7 +2279,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
case CPU_DOWN_FAILED_FROZEN:
if (!mce_ignore_ce && check_interval) {
t->expires = round_jiffies(jiffies +
- __get_cpu_var(mce_next_interval));
+ per_cpu(mce_next_interval, cpu));
add_timer_on(t, cpu);
}
smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 2c1d178be46..f4873a64f46 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -421,10 +421,10 @@ RW_ATTR(threshold_limit);
RW_ATTR(error_count);
static struct attribute *default_attrs[] = {
- &interrupt_enable.attr,
&threshold_limit.attr,
&error_count.attr,
- NULL
+ NULL, /* possibly interrupt_enable if supported, see below */
+ NULL,
};
#define to_block(k) container_of(k, struct threshold_block, kobj)
@@ -501,6 +501,11 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
b->interrupt_capable = lvt_interrupt_supported(bank, high);
b->threshold_limit = THRESHOLD_MAX;
+ if (b->interrupt_capable)
+ threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
+ else
+ threshold_ktype.default_attrs[2] = NULL;
+
INIT_LIST_HEAD(&b->miscj);
if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl
index dfea390e160..c7b3fe2d72e 100644
--- a/arch/x86/kernel/cpu/mkcapflags.pl
+++ b/arch/x86/kernel/cpu/mkcapflags.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/perl -w
#
# Generate the x86_cap_flags[] array from include/asm-x86/cpufeature.h
#
@@ -11,22 +11,35 @@ open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n";
print OUT "#include <asm/cpufeature.h>\n\n";
print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n";
+%features = ();
+$err = 0;
+
while (defined($line = <IN>)) {
if ($line =~ /^\s*\#\s*define\s+(X86_FEATURE_(\S+))\s+(.*)$/) {
$macro = $1;
- $feature = $2;
+ $feature = "\L$2";
$tail = $3;
if ($tail =~ /\/\*\s*\"([^"]*)\".*\*\//) {
- $feature = $1;
+ $feature = "\L$1";
}
- if ($feature ne '') {
- printf OUT "\t%-32s = \"%s\",\n",
- "[$macro]", "\L$feature";
+ next if ($feature eq '');
+
+ if ($features{$feature}++) {
+ print STDERR "$in: duplicate feature name: $feature\n";
+ $err++;
}
+ printf OUT "\t%-32s = \"%s\",\n", "[$macro]", $feature;
}
}
print OUT "};\n";
close(IN);
close(OUT);
+
+if ($err) {
+ unlink($out);
+ exit(1);
+}
+
+exit(0);
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index ac140c7be39..bdda2e6c673 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -266,7 +266,7 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
if (align > max_align)
align = max_align;
- sizek = 1 << align;
+ sizek = 1UL << align;
if (debug_print) {
char start_factor = 'K', size_factor = 'K';
unsigned long start_base, size_base;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index bb8e03407e1..c4706cf9c01 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -484,9 +484,6 @@ static int __x86_pmu_event_init(struct perf_event *event)
/* mark unused */
event->hw.extra_reg.idx = EXTRA_REG_NONE;
-
- /* mark not used */
- event->hw.extra_reg.idx = EXTRA_REG_NONE;
event->hw.branch_reg.idx = EXTRA_REG_NONE;
return x86_pmu.hw_config(event);
@@ -1186,8 +1183,6 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
int idx, handled = 0;
u64 val;
- perf_sample_data_init(&data, 0);
-
cpuc = &__get_cpu_var(cpu_hw_events);
/*
@@ -1222,7 +1217,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
* event overflow
*/
handled++;
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, event->hw.last_period);
if (!x86_perf_event_set_period(event))
continue;
@@ -1501,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void)
if (!cpuc->shared_regs)
goto error;
}
+ cpuc->is_fake = 1;
return cpuc;
error:
free_fake_cpuc(cpuc);
@@ -1761,6 +1757,12 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
}
+static inline int
+valid_user_frame(const void __user *fp, unsigned long size)
+{
+ return (__range_not_ok(fp, size, TASK_SIZE) == 0);
+}
+
#ifdef CONFIG_COMPAT
#include <asm/compat.h>
@@ -1785,7 +1787,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
if (bytes != sizeof(frame))
break;
- if (fp < compat_ptr(regs->sp))
+ if (!valid_user_frame(fp, sizeof(frame)))
break;
perf_callchain_store(entry, frame.return_address);
@@ -1831,7 +1833,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
if (bytes != sizeof(frame))
break;
- if ((unsigned long)fp < regs->sp)
+ if (!valid_user_frame(fp, sizeof(frame)))
break;
perf_callchain_store(entry, frame.return_address);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 6638aaf5449..7241e2fc3c1 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -117,6 +117,7 @@ struct cpu_hw_events {
struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
unsigned int group_flag;
+ int is_fake;
/*
* Intel DebugStore bits
@@ -364,6 +365,7 @@ struct x86_pmu {
int pebs_record_size;
void (*drain_pebs)(struct pt_regs *regs);
struct event_constraint *pebs_constraints;
+ void (*pebs_aliases)(struct perf_event *event);
/*
* Intel LBR
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 9edc786aef8..11a4eb9131d 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -134,8 +134,13 @@ static u64 amd_pmu_event_map(int hw_event)
static int amd_pmu_hw_config(struct perf_event *event)
{
- int ret = x86_pmu_hw_config(event);
+ int ret;
+ /* pass precise event sampling to ibs: */
+ if (event->attr.precise_ip && get_ibs_caps())
+ return -ENOENT;
+
+ ret = x86_pmu_hw_config(event);
if (ret)
return ret;
@@ -205,10 +210,8 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
* when we come here
*/
for (i = 0; i < x86_pmu.num_counters; i++) {
- if (nb->owners[i] == event) {
- cmpxchg(nb->owners+i, event, NULL);
+ if (cmpxchg(nb->owners + i, event, NULL) == event)
break;
- }
}
}
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 3b8a2d30d14..da9bcdcd985 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -9,6 +9,7 @@
#include <linux/perf_event.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/ptrace.h>
#include <asm/apic.h>
@@ -16,36 +17,591 @@ static u32 ibs_caps;
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
-static struct pmu perf_ibs;
+#include <linux/kprobes.h>
+#include <linux/hardirq.h>
+
+#include <asm/nmi.h>
+
+#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
+#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
+
+enum ibs_states {
+ IBS_ENABLED = 0,
+ IBS_STARTED = 1,
+ IBS_STOPPING = 2,
+
+ IBS_MAX_STATES,
+};
+
+struct cpu_perf_ibs {
+ struct perf_event *event;
+ unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)];
+};
+
+struct perf_ibs {
+ struct pmu pmu;
+ unsigned int msr;
+ u64 config_mask;
+ u64 cnt_mask;
+ u64 enable_mask;
+ u64 valid_mask;
+ u64 max_period;
+ unsigned long offset_mask[1];
+ int offset_max;
+ struct cpu_perf_ibs __percpu *pcpu;
+ u64 (*get_count)(u64 config);
+};
+
+struct perf_ibs_data {
+ u32 size;
+ union {
+ u32 data[0]; /* data buffer starts here */
+ u32 caps;
+ };
+ u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
+};
+
+static int
+perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
+{
+ s64 left = local64_read(&hwc->period_left);
+ s64 period = hwc->sample_period;
+ int overflow = 0;
+
+ /*
+ * If we are way outside a reasonable range then just skip forward:
+ */
+ if (unlikely(left <= -period)) {
+ left = period;
+ local64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ overflow = 1;
+ }
+
+ if (unlikely(left < (s64)min)) {
+ left += period;
+ local64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ overflow = 1;
+ }
+
+ /*
+ * If the hw period that triggers the sw overflow is too short
+ * we might hit the irq handler. This biases the results.
+ * Thus we shorten the next-to-last period and set the last
+ * period to the max period.
+ */
+ if (left > max) {
+ left -= max;
+ if (left > max)
+ left = max;
+ else if (left < min)
+ left = min;
+ }
+
+ *hw_period = (u64)left;
+
+ return overflow;
+}
+
+static int
+perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int shift = 64 - width;
+ u64 prev_raw_count;
+ u64 delta;
+
+ /*
+ * Careful: an NMI might modify the previous event value.
+ *
+ * Our tactic to handle this is to first atomically read and
+ * exchange a new raw count - then add that new-prev delta
+ * count to the generic event atomically:
+ */
+ prev_raw_count = local64_read(&hwc->prev_count);
+ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count) != prev_raw_count)
+ return 0;
+
+ /*
+ * Now we have the new raw value and have updated the prev
+ * timestamp already. We can now calculate the elapsed delta
+ * (event-)time and add that to the generic event.
+ *
+ * Careful, not all hw sign-extends above the physical width
+ * of the count.
+ */
+ delta = (new_raw_count << shift) - (prev_raw_count << shift);
+ delta >>= shift;
+
+ local64_add(delta, &event->count);
+ local64_sub(delta, &hwc->period_left);
+
+ return 1;
+}
+
+static struct perf_ibs perf_ibs_fetch;
+static struct perf_ibs perf_ibs_op;
+
+static struct perf_ibs *get_ibs_pmu(int type)
+{
+ if (perf_ibs_fetch.pmu.type == type)
+ return &perf_ibs_fetch;
+ if (perf_ibs_op.pmu.type == type)
+ return &perf_ibs_op;
+ return NULL;
+}
+
+/*
+ * Use IBS for precise event sampling:
+ *
+ * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
+ * perf record -a -e r076:p ... # same as -e cpu-cycles:p
+ * perf record -a -e r0C1:p ... # use ibs op counting micro-ops
+ *
+ * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
+ * MSRC001_1033) is used to select either cycle or micro-ops counting
+ * mode.
+ *
+ * The rip of IBS samples has skid 0. Thus, IBS supports precise
+ * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
+ * rip is invalid when IBS was not able to record the rip correctly.
+ * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
+ *
+ */
+static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
+{
+ switch (event->attr.precise_ip) {
+ case 0:
+ return -ENOENT;
+ case 1:
+ case 2:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ switch (event->attr.type) {
+ case PERF_TYPE_HARDWARE:
+ switch (event->attr.config) {
+ case PERF_COUNT_HW_CPU_CYCLES:
+ *config = 0;
+ return 0;
+ }
+ break;
+ case PERF_TYPE_RAW:
+ switch (event->attr.config) {
+ case 0x0076:
+ *config = 0;
+ return 0;
+ case 0x00C1:
+ *config = IBS_OP_CNT_CTL;
+ return 0;
+ }
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ return -EOPNOTSUPP;
+}
static int perf_ibs_init(struct perf_event *event)
{
- if (perf_ibs.type != event->attr.type)
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_ibs *perf_ibs;
+ u64 max_cnt, config;
+ int ret;
+
+ perf_ibs = get_ibs_pmu(event->attr.type);
+ if (perf_ibs) {
+ config = event->attr.config;
+ } else {
+ perf_ibs = &perf_ibs_op;
+ ret = perf_ibs_precise_event(event, &config);
+ if (ret)
+ return ret;
+ }
+
+ if (event->pmu != &perf_ibs->pmu)
return -ENOENT;
+
+ if (config & ~perf_ibs->config_mask)
+ return -EINVAL;
+
+ if (hwc->sample_period) {
+ if (config & perf_ibs->cnt_mask)
+ /* raw max_cnt may not be set */
+ return -EINVAL;
+ if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
+ /*
+ * lower 4 bits can not be set in ibs max cnt,
+ * but allowing it in case we adjust the
+ * sample period to set a frequency.
+ */
+ return -EINVAL;
+ hwc->sample_period &= ~0x0FULL;
+ if (!hwc->sample_period)
+ hwc->sample_period = 0x10;
+ } else {
+ max_cnt = config & perf_ibs->cnt_mask;
+ config &= ~perf_ibs->cnt_mask;
+ event->attr.sample_period = max_cnt << 4;
+ hwc->sample_period = event->attr.sample_period;
+ }
+
+ if (!hwc->sample_period)
+ return -EINVAL;
+
+ /*
+ * If we modify hwc->sample_period, we also need to update
+ * hwc->last_period and hwc->period_left.
+ */
+ hwc->last_period = hwc->sample_period;
+ local64_set(&hwc->period_left, hwc->sample_period);
+
+ hwc->config_base = perf_ibs->msr;
+ hwc->config = config;
+
return 0;
}
+static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
+ struct hw_perf_event *hwc, u64 *period)
+{
+ int overflow;
+
+ /* ignore lower 4 bits in min count: */
+ overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
+ local64_set(&hwc->prev_count, 0);
+
+ return overflow;
+}
+
+static u64 get_ibs_fetch_count(u64 config)
+{
+ return (config & IBS_FETCH_CNT) >> 12;
+}
+
+static u64 get_ibs_op_count(u64 config)
+{
+ u64 count = 0;
+
+ if (config & IBS_OP_VAL)
+ count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
+
+ if (ibs_caps & IBS_CAPS_RDWROPCNT)
+ count += (config & IBS_OP_CUR_CNT) >> 32;
+
+ return count;
+}
+
+static void
+perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
+ u64 *config)
+{
+ u64 count = perf_ibs->get_count(*config);
+
+ /*
+ * Set width to 64 since we do not overflow on max width but
+ * instead on max count. In perf_ibs_set_period() we clear
+ * prev count manually on overflow.
+ */
+ while (!perf_event_try_update(event, count, 64)) {
+ rdmsrl(event->hw.config_base, *config);
+ count = perf_ibs->get_count(*config);
+ }
+}
+
+static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
+ struct hw_perf_event *hwc, u64 config)
+{
+ wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
+}
+
+/*
+ * Erratum #420 Instruction-Based Sampling Engine May Generate
+ * Interrupt that Cannot Be Cleared:
+ *
+ * Must clear counter mask first, then clear the enable bit. See
+ * Revision Guide for AMD Family 10h Processors, Publication #41322.
+ */
+static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
+ struct hw_perf_event *hwc, u64 config)
+{
+ config &= ~perf_ibs->cnt_mask;
+ wrmsrl(hwc->config_base, config);
+ config &= ~perf_ibs->enable_mask;
+ wrmsrl(hwc->config_base, config);
+}
+
+/*
+ * We cannot restore the ibs pmu state, so we always needs to update
+ * the event while stopping it and then reset the state when starting
+ * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
+ * perf_ibs_start()/perf_ibs_stop() and instead always do it.
+ */
+static void perf_ibs_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+ u64 period;
+
+ if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+ return;
+
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+ hwc->state = 0;
+
+ perf_ibs_set_period(perf_ibs, hwc, &period);
+ set_bit(IBS_STARTED, pcpu->state);
+ perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
+
+ perf_event_update_userpage(event);
+}
+
+static void perf_ibs_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+ u64 config;
+ int stopping;
+
+ stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
+
+ if (!stopping && (hwc->state & PERF_HES_UPTODATE))
+ return;
+
+ rdmsrl(hwc->config_base, config);
+
+ if (stopping) {
+ set_bit(IBS_STOPPING, pcpu->state);
+ perf_ibs_disable_event(perf_ibs, hwc, config);
+ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+ hwc->state |= PERF_HES_STOPPED;
+ }
+
+ if (hwc->state & PERF_HES_UPTODATE)
+ return;
+
+ /*
+ * Clear valid bit to not count rollovers on update, rollovers
+ * are only updated in the irq handler.
+ */
+ config &= ~perf_ibs->valid_mask;
+
+ perf_ibs_event_update(perf_ibs, event, &config);
+ hwc->state |= PERF_HES_UPTODATE;
+}
+
static int perf_ibs_add(struct perf_event *event, int flags)
{
+ struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+
+ if (test_and_set_bit(IBS_ENABLED, pcpu->state))
+ return -ENOSPC;
+
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ pcpu->event = event;
+
+ if (flags & PERF_EF_START)
+ perf_ibs_start(event, PERF_EF_RELOAD);
+
return 0;
}
static void perf_ibs_del(struct perf_event *event, int flags)
{
+ struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+
+ if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
+ return;
+
+ perf_ibs_stop(event, PERF_EF_UPDATE);
+
+ pcpu->event = NULL;
+
+ perf_event_update_userpage(event);
}
-static struct pmu perf_ibs = {
- .event_init= perf_ibs_init,
- .add= perf_ibs_add,
- .del= perf_ibs_del,
+static void perf_ibs_read(struct perf_event *event) { }
+
+static struct perf_ibs perf_ibs_fetch = {
+ .pmu = {
+ .task_ctx_nr = perf_invalid_context,
+
+ .event_init = perf_ibs_init,
+ .add = perf_ibs_add,
+ .del = perf_ibs_del,
+ .start = perf_ibs_start,
+ .stop = perf_ibs_stop,
+ .read = perf_ibs_read,
+ },
+ .msr = MSR_AMD64_IBSFETCHCTL,
+ .config_mask = IBS_FETCH_CONFIG_MASK,
+ .cnt_mask = IBS_FETCH_MAX_CNT,
+ .enable_mask = IBS_FETCH_ENABLE,
+ .valid_mask = IBS_FETCH_VAL,
+ .max_period = IBS_FETCH_MAX_CNT << 4,
+ .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
+ .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
+
+ .get_count = get_ibs_fetch_count,
};
+static struct perf_ibs perf_ibs_op = {
+ .pmu = {
+ .task_ctx_nr = perf_invalid_context,
+
+ .event_init = perf_ibs_init,
+ .add = perf_ibs_add,
+ .del = perf_ibs_del,
+ .start = perf_ibs_start,
+ .stop = perf_ibs_stop,
+ .read = perf_ibs_read,
+ },
+ .msr = MSR_AMD64_IBSOPCTL,
+ .config_mask = IBS_OP_CONFIG_MASK,
+ .cnt_mask = IBS_OP_MAX_CNT,
+ .enable_mask = IBS_OP_ENABLE,
+ .valid_mask = IBS_OP_VAL,
+ .max_period = IBS_OP_MAX_CNT << 4,
+ .offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
+ .offset_max = MSR_AMD64_IBSOP_REG_COUNT,
+
+ .get_count = get_ibs_op_count,
+};
+
+static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
+{
+ struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+ struct perf_event *event = pcpu->event;
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_sample_data data;
+ struct perf_raw_record raw;
+ struct pt_regs regs;
+ struct perf_ibs_data ibs_data;
+ int offset, size, check_rip, offset_max, throttle = 0;
+ unsigned int msr;
+ u64 *buf, *config, period;
+
+ if (!test_bit(IBS_STARTED, pcpu->state)) {
+ /*
+ * Catch spurious interrupts after stopping IBS: After
+ * disabling IBS there could be still incomming NMIs
+ * with samples that even have the valid bit cleared.
+ * Mark all this NMIs as handled.
+ */
+ return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0;
+ }
+
+ msr = hwc->config_base;
+ buf = ibs_data.regs;
+ rdmsrl(msr, *buf);
+ if (!(*buf++ & perf_ibs->valid_mask))
+ return 0;
+
+ config = &ibs_data.regs[0];
+ perf_ibs_event_update(perf_ibs, event, config);
+ perf_sample_data_init(&data, 0, hwc->last_period);
+ if (!perf_ibs_set_period(perf_ibs, hwc, &period))
+ goto out; /* no sw counter overflow */
+
+ ibs_data.caps = ibs_caps;
+ size = 1;
+ offset = 1;
+ check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
+ if (event->attr.sample_type & PERF_SAMPLE_RAW)
+ offset_max = perf_ibs->offset_max;
+ else if (check_rip)
+ offset_max = 2;
+ else
+ offset_max = 1;
+ do {
+ rdmsrl(msr + offset, *buf++);
+ size++;
+ offset = find_next_bit(perf_ibs->offset_mask,
+ perf_ibs->offset_max,
+ offset + 1);
+ } while (offset < offset_max);
+ ibs_data.size = sizeof(u64) * size;
+
+ regs = *iregs;
+ if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
+ regs.flags &= ~PERF_EFLAGS_EXACT;
+ } else {
+ instruction_pointer_set(&regs, ibs_data.regs[1]);
+ regs.flags |= PERF_EFLAGS_EXACT;
+ }
+
+ if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+ raw.size = sizeof(u32) + ibs_data.size;
+ raw.data = ibs_data.data;
+ data.raw = &raw;
+ }
+
+ throttle = perf_event_overflow(event, &data, &regs);
+out:
+ if (throttle)
+ perf_ibs_disable_event(perf_ibs, hwc, *config);
+ else
+ perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
+
+ perf_event_update_userpage(event);
+
+ return 1;
+}
+
+static int __kprobes
+perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
+{
+ int handled = 0;
+
+ handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
+ handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
+
+ if (handled)
+ inc_irq_stat(apic_perf_irqs);
+
+ return handled;
+}
+
+static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
+{
+ struct cpu_perf_ibs __percpu *pcpu;
+ int ret;
+
+ pcpu = alloc_percpu(struct cpu_perf_ibs);
+ if (!pcpu)
+ return -ENOMEM;
+
+ perf_ibs->pcpu = pcpu;
+
+ ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
+ if (ret) {
+ perf_ibs->pcpu = NULL;
+ free_percpu(pcpu);
+ }
+
+ return ret;
+}
+
static __init int perf_event_ibs_init(void)
{
if (!ibs_caps)
return -ENODEV; /* ibs not supported by the cpu */
- perf_pmu_register(&perf_ibs, "ibs", -1);
+ perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
+ if (ibs_caps & IBS_CAPS_OPCNT)
+ perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
+ perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
+ register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
return 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 26b3e2fef10..187c294bc65 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1027,8 +1027,6 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
u64 status;
int handled;
- perf_sample_data_init(&data, 0);
-
cpuc = &__get_cpu_var(cpu_hw_events);
/*
@@ -1082,7 +1080,7 @@ again:
if (!intel_pmu_save_and_restart(event))
continue;
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, event->hw.last_period);
if (has_branch_stack(event))
data.br_stack = &cpuc->lbr_stack;
@@ -1121,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event)
return NULL;
}
-static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
+static int intel_alt_er(int idx)
{
if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
- return false;
+ return idx;
- if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
- event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
- event->hw.config |= 0x01bb;
- event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
- event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
- } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
+ if (idx == EXTRA_REG_RSP_0)
+ return EXTRA_REG_RSP_1;
+
+ if (idx == EXTRA_REG_RSP_1)
+ return EXTRA_REG_RSP_0;
+
+ return idx;
+}
+
+static void intel_fixup_er(struct perf_event *event, int idx)
+{
+ event->hw.extra_reg.idx = idx;
+
+ if (idx == EXTRA_REG_RSP_0) {
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
event->hw.config |= 0x01b7;
- event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
+ } else if (idx == EXTRA_REG_RSP_1) {
+ event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
+ event->hw.config |= 0x01bb;
+ event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
}
-
- if (event->hw.extra_reg.idx == orig_idx)
- return false;
-
- return true;
}
/*
@@ -1159,14 +1163,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
struct event_constraint *c = &emptyconstraint;
struct er_account *era;
unsigned long flags;
- int orig_idx = reg->idx;
+ int idx = reg->idx;
- /* already allocated shared msr */
- if (reg->alloc)
+ /*
+ * reg->alloc can be set due to existing state, so for fake cpuc we
+ * need to ignore this, otherwise we might fail to allocate proper fake
+ * state for this extra reg constraint. Also see the comment below.
+ */
+ if (reg->alloc && !cpuc->is_fake)
return NULL; /* call x86_get_event_constraint() */
again:
- era = &cpuc->shared_regs->regs[reg->idx];
+ era = &cpuc->shared_regs->regs[idx];
/*
* we use spin_lock_irqsave() to avoid lockdep issues when
* passing a fake cpuc
@@ -1175,6 +1183,29 @@ again:
if (!atomic_read(&era->ref) || era->config == reg->config) {
+ /*
+ * If its a fake cpuc -- as per validate_{group,event}() we
+ * shouldn't touch event state and we can avoid doing so
+ * since both will only call get_event_constraints() once
+ * on each event, this avoids the need for reg->alloc.
+ *
+ * Not doing the ER fixup will only result in era->reg being
+ * wrong, but since we won't actually try and program hardware
+ * this isn't a problem either.
+ */
+ if (!cpuc->is_fake) {
+ if (idx != reg->idx)
+ intel_fixup_er(event, idx);
+
+ /*
+ * x86_schedule_events() can call get_event_constraints()
+ * multiple times on events in the case of incremental
+ * scheduling(). reg->alloc ensures we only do the ER
+ * allocation once.
+ */
+ reg->alloc = 1;
+ }
+
/* lock in msr value */
era->config = reg->config;
era->reg = reg->reg;
@@ -1182,17 +1213,17 @@ again:
/* one more user */
atomic_inc(&era->ref);
- /* no need to reallocate during incremental event scheduling */
- reg->alloc = 1;
-
/*
* need to call x86_get_event_constraint()
* to check if associated event has constraints
*/
c = NULL;
- } else if (intel_try_alt_er(event, orig_idx)) {
- raw_spin_unlock_irqrestore(&era->lock, flags);
- goto again;
+ } else {
+ idx = intel_alt_er(idx);
+ if (idx != reg->idx) {
+ raw_spin_unlock_irqrestore(&era->lock, flags);
+ goto again;
+ }
}
raw_spin_unlock_irqrestore(&era->lock, flags);
@@ -1206,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
struct er_account *era;
/*
- * only put constraint if extra reg was actually
- * allocated. Also takes care of event which do
- * not use an extra shared reg
+ * Only put constraint if extra reg was actually allocated. Also takes
+ * care of event which do not use an extra shared reg.
+ *
+ * Also, if this is a fake cpuc we shouldn't touch any event state
+ * (reg->alloc) and we don't care about leaving inconsistent cpuc state
+ * either since it'll be thrown out.
*/
- if (!reg->alloc)
+ if (!reg->alloc || cpuc->is_fake)
return;
era = &cpuc->shared_regs->regs[reg->idx];
@@ -1302,15 +1336,9 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
intel_put_shared_regs_event_constraints(cpuc, event);
}
-static int intel_pmu_hw_config(struct perf_event *event)
+static void intel_pebs_aliases_core2(struct perf_event *event)
{
- int ret = x86_pmu_hw_config(event);
-
- if (ret)
- return ret;
-
- if (event->attr.precise_ip &&
- (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
+ if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
/*
* Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
* (0x003c) so that we can use it with PEBS.
@@ -1331,10 +1359,48 @@ static int intel_pmu_hw_config(struct perf_event *event)
*/
u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
+ alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
+ event->hw.config = alt_config;
+ }
+}
+
+static void intel_pebs_aliases_snb(struct perf_event *event)
+{
+ if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
+ /*
+ * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
+ * (0x003c) so that we can use it with PEBS.
+ *
+ * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
+ * PEBS capable. However we can use UOPS_RETIRED.ALL
+ * (0x01c2), which is a PEBS capable event, to get the same
+ * count.
+ *
+ * UOPS_RETIRED.ALL counts the number of cycles that retires
+ * CNTMASK micro-ops. By setting CNTMASK to a value (16)
+ * larger than the maximum number of micro-ops that can be
+ * retired per cycle (4) and then inverting the condition, we
+ * count all cycles that retire 16 or less micro-ops, which
+ * is every cycle.
+ *
+ * Thereby we gain a PEBS capable cycle counter.
+ */
+ u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16);
alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
event->hw.config = alt_config;
}
+}
+
+static int intel_pmu_hw_config(struct perf_event *event)
+{
+ int ret = x86_pmu_hw_config(event);
+
+ if (ret)
+ return ret;
+
+ if (event->attr.precise_ip && x86_pmu.pebs_aliases)
+ x86_pmu.pebs_aliases(event);
if (intel_pmu_needs_lbr_smpl(event)) {
ret = intel_pmu_setup_lbr_filter(event);
@@ -1609,6 +1675,7 @@ static __initconst const struct x86_pmu intel_pmu = {
.max_period = (1ULL << 31) - 1,
.get_event_constraints = intel_get_event_constraints,
.put_event_constraints = intel_put_event_constraints,
+ .pebs_aliases = intel_pebs_aliases_core2,
.format_attrs = intel_arch3_formats_attr,
@@ -1842,8 +1909,9 @@ __init int intel_pmu_init(void)
break;
case 42: /* SandyBridge */
- x86_add_quirk(intel_sandybridge_quirk);
case 45: /* SandyBridge, "Romely-EP" */
+ x86_add_quirk(intel_sandybridge_quirk);
+ case 58: /* IvyBridge */
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -1851,6 +1919,7 @@ __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_snb_event_constraints;
x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
+ x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
x86_pmu.extra_regs = intel_snb_extra_regs;
/* all extra regs are per-cpu when HT is on */
x86_pmu.er_flags |= ERF_HAS_RSP_1;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 7f64df19e7d..35e2192df9f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -316,8 +316,7 @@ int intel_pmu_drain_bts_buffer(void)
ds->bts_index = ds->bts_buffer_base;
- perf_sample_data_init(&data, 0);
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, event->hw.last_period);
regs.ip = 0;
/*
@@ -401,14 +400,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */
- INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */
- INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */
- INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */
- INTEL_UEVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */
- INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */
- INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */
- INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */
- INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */
+ INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
@@ -564,8 +556,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
if (!intel_pmu_save_and_restart(event))
return;
- perf_sample_data_init(&data, 0);
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, event->hw.last_period);
/*
* We use the interrupt regs as a base because the PEBS record
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index a2dfacfd710..47124a73dd7 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -1005,8 +1005,6 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
int idx, handled = 0;
u64 val;
- perf_sample_data_init(&data, 0);
-
cpuc = &__get_cpu_var(cpu_hw_events);
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
@@ -1034,10 +1032,12 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
handled += overflow;
/* event overflow for sure */
- data.period = event->hw.last_period;
+ perf_sample_data_init(&data, 0, hwc->last_period);
if (!x86_perf_event_set_period(event))
continue;
+
+
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
}
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index addf9e82a7f..ee8e9abc859 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -31,7 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
const struct cpuid_bit *cb;
static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
- { X86_FEATURE_DTS, CR_EAX, 0, 0x00000006, 0 },
+ { X86_FEATURE_DTHERM, CR_EAX, 0, 0x00000006, 0 },
{ X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 },
{ X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 },
{ X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 },
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 1b81839b6c8..571246d81ed 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -271,7 +271,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
return 1;
- show_registers(regs);
+ show_regs(regs);
#ifdef CONFIG_X86_32
if (user_mode_vm(regs)) {
sp = regs->sp;
@@ -311,16 +311,33 @@ void die(const char *str, struct pt_regs *regs, long err)
static int __init kstack_setup(char *s)
{
+ ssize_t ret;
+ unsigned long val;
+
if (!s)
return -EINVAL;
- kstack_depth_to_print = simple_strtoul(s, NULL, 0);
+
+ ret = kstrtoul(s, 0, &val);
+ if (ret)
+ return ret;
+ kstack_depth_to_print = val;
return 0;
}
early_param("kstack", kstack_setup);
static int __init code_bytes_setup(char *s)
{
- code_bytes = simple_strtoul(s, NULL, 0);
+ ssize_t ret;
+ unsigned long val;
+
+ if (!s)
+ return -EINVAL;
+
+ ret = kstrtoul(s, 0, &val);
+ if (ret)
+ return ret;
+
+ code_bytes = val;
if (code_bytes > 8192)
code_bytes = 8192;
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 88ec9129271..e0b1d783daa 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -82,7 +82,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
}
-void show_registers(struct pt_regs *regs)
+void show_regs(struct pt_regs *regs)
{
int i;
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 17107bd6e1f..791b76122aa 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -245,7 +245,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
show_trace_log_lvl(task, regs, sp, bp, log_lvl);
}
-void show_registers(struct pt_regs *regs)
+void show_regs(struct pt_regs *regs)
{
int i;
unsigned long sp;
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 62d61e9976e..41857970517 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -113,7 +113,9 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
int x = e820x->nr_map;
if (x >= ARRAY_SIZE(e820x->map)) {
- printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
+ printk(KERN_ERR "e820: too many entries; ignoring [mem %#010llx-%#010llx]\n",
+ (unsigned long long) start,
+ (unsigned long long) (start + size - 1));
return;
}
@@ -133,19 +135,19 @@ static void __init e820_print_type(u32 type)
switch (type) {
case E820_RAM:
case E820_RESERVED_KERN:
- printk(KERN_CONT "(usable)");
+ printk(KERN_CONT "usable");
break;
case E820_RESERVED:
- printk(KERN_CONT "(reserved)");
+ printk(KERN_CONT "reserved");
break;
case E820_ACPI:
- printk(KERN_CONT "(ACPI data)");
+ printk(KERN_CONT "ACPI data");
break;
case E820_NVS:
- printk(KERN_CONT "(ACPI NVS)");
+ printk(KERN_CONT "ACPI NVS");
break;
case E820_UNUSABLE:
- printk(KERN_CONT "(unusable)");
+ printk(KERN_CONT "unusable");
break;
default:
printk(KERN_CONT "type %u", type);
@@ -158,10 +160,10 @@ void __init e820_print_map(char *who)
int i;
for (i = 0; i < e820.nr_map; i++) {
- printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
+ printk(KERN_INFO "%s: [mem %#018Lx-%#018Lx] ", who,
(unsigned long long) e820.map[i].addr,
(unsigned long long)
- (e820.map[i].addr + e820.map[i].size));
+ (e820.map[i].addr + e820.map[i].size - 1));
e820_print_type(e820.map[i].type);
printk(KERN_CONT "\n");
}
@@ -428,9 +430,8 @@ static u64 __init __e820_update_range(struct e820map *e820x, u64 start,
size = ULLONG_MAX - start;
end = start + size;
- printk(KERN_DEBUG "e820 update range: %016Lx - %016Lx ",
- (unsigned long long) start,
- (unsigned long long) end);
+ printk(KERN_DEBUG "e820: update [mem %#010Lx-%#010Lx] ",
+ (unsigned long long) start, (unsigned long long) (end - 1));
e820_print_type(old_type);
printk(KERN_CONT " ==> ");
e820_print_type(new_type);
@@ -509,9 +510,8 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
size = ULLONG_MAX - start;
end = start + size;
- printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ",
- (unsigned long long) start,
- (unsigned long long) end);
+ printk(KERN_DEBUG "e820: remove [mem %#010Lx-%#010Lx] ",
+ (unsigned long long) start, (unsigned long long) (end - 1));
if (checktype)
e820_print_type(old_type);
printk(KERN_CONT "\n");
@@ -567,7 +567,7 @@ void __init update_e820(void)
if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map))
return;
e820.nr_map = nr_map;
- printk(KERN_INFO "modified physical RAM map:\n");
+ printk(KERN_INFO "e820: modified physical RAM map:\n");
e820_print_map("modified");
}
static void __init update_e820_saved(void)
@@ -637,8 +637,8 @@ __init void e820_setup_gap(void)
if (!found) {
gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024;
printk(KERN_ERR
- "PCI: Warning: Cannot find a gap in the 32bit address range\n"
- "PCI: Unassigned devices with 32bit resource registers may break!\n");
+ "e820: cannot find a gap in the 32bit address range\n"
+ "e820: PCI devices with unassigned 32bit BARs may break!\n");
}
#endif
@@ -648,8 +648,8 @@ __init void e820_setup_gap(void)
pci_mem_start = gapstart;
printk(KERN_INFO
- "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
- pci_mem_start, gapstart, gapsize);
+ "e820: [mem %#010lx-%#010lx] available for PCI devices\n",
+ gapstart, gapstart + gapsize - 1);
}
/**
@@ -667,7 +667,7 @@ void __init parse_e820_ext(struct setup_data *sdata)
extmap = (struct e820entry *)(sdata->data);
__append_e820_map(extmap, entries);
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
- printk(KERN_INFO "extended physical RAM map:\n");
+ printk(KERN_INFO "e820: extended physical RAM map:\n");
e820_print_map("extended");
}
@@ -734,7 +734,7 @@ u64 __init early_reserve_e820(u64 size, u64 align)
addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
if (addr) {
e820_update_range_saved(addr, size, E820_RAM, E820_RESERVED);
- printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
+ printk(KERN_INFO "e820: update e820_saved for early_reserve_e820\n");
update_e820_saved();
}
@@ -784,7 +784,7 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
if (last_pfn > max_arch_pfn)
last_pfn = max_arch_pfn;
- printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n",
+ printk(KERN_INFO "e820: last_pfn = %#lx max_arch_pfn = %#lx\n",
last_pfn, max_arch_pfn);
return last_pfn;
}
@@ -888,7 +888,7 @@ void __init finish_e820_parsing(void)
early_panic("Invalid user supplied memory map");
e820.nr_map = nr;
- printk(KERN_INFO "user-defined physical RAM map:\n");
+ printk(KERN_INFO "e820: user-defined physical RAM map:\n");
e820_print_map("user");
}
}
@@ -996,8 +996,9 @@ void __init e820_reserve_resources_late(void)
end = MAX_RESOURCE_SIZE;
if (start >= end)
continue;
- printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ",
- start, end);
+ printk(KERN_DEBUG
+ "e820: reserve RAM buffer [mem %#010llx-%#010llx]\n",
+ start, end);
reserve_region_with_split(&iomem_resource, start, end,
"RAM buffer");
}
@@ -1047,7 +1048,7 @@ void __init setup_memory_map(void)
who = x86_init.resources.memory_setup();
memcpy(&e820_saved, &e820, sizeof(struct e820map));
- printk(KERN_INFO "BIOS-provided physical RAM map:\n");
+ printk(KERN_INFO "e820: BIOS-provided physical RAM map:\n");
e820_print_map(who);
}
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 7b784f4ef1e..623f2883747 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -56,6 +56,7 @@
#include <asm/irq_vectors.h>
#include <asm/cpufeature.h>
#include <asm/alternative-asm.h>
+#include <asm/asm.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
#include <linux/elf-em.h>
@@ -151,10 +152,8 @@
.pushsection .fixup, "ax"
99: movl $0, (%esp)
jmp 98b
-.section __ex_table, "a"
- .align 4
- .long 98b, 99b
.popsection
+ _ASM_EXTABLE(98b,99b)
.endm
.macro PTGS_TO_GS
@@ -164,10 +163,8 @@
.pushsection .fixup, "ax"
99: movl $0, PT_GS(%esp)
jmp 98b
-.section __ex_table, "a"
- .align 4
- .long 98b, 99b
.popsection
+ _ASM_EXTABLE(98b,99b)
.endm
.macro GS_TO_REG reg
@@ -249,12 +246,10 @@
jmp 2b
6: movl $0, (%esp)
jmp 3b
-.section __ex_table, "a"
- .align 4
- .long 1b, 4b
- .long 2b, 5b
- .long 3b, 6b
.popsection
+ _ASM_EXTABLE(1b,4b)
+ _ASM_EXTABLE(2b,5b)
+ _ASM_EXTABLE(3b,6b)
POP_GS_EX
.endm
@@ -321,7 +316,6 @@ ret_from_exception:
preempt_stop(CLBR_ANY)
ret_from_intr:
GET_THREAD_INFO(%ebp)
-resume_userspace_sig:
#ifdef CONFIG_VM86
movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
movb PT_CS(%esp), %al
@@ -415,10 +409,7 @@ sysenter_past_esp:
jae syscall_fault
1: movl (%ebp),%ebp
movl %ebp,PT_EBP(%esp)
-.section __ex_table,"a"
- .align 4
- .long 1b,syscall_fault
-.previous
+ _ASM_EXTABLE(1b,syscall_fault)
GET_THREAD_INFO(%ebp)
@@ -485,10 +476,8 @@ sysexit_audit:
.pushsection .fixup,"ax"
2: movl $0,PT_FS(%esp)
jmp 1b
-.section __ex_table,"a"
- .align 4
- .long 1b,2b
.popsection
+ _ASM_EXTABLE(1b,2b)
PTGS_TO_GS_EX
ENDPROC(ia32_sysenter_target)
@@ -543,10 +532,7 @@ ENTRY(iret_exc)
pushl $do_iret_error
jmp error_code
.previous
-.section __ex_table,"a"
- .align 4
- .long irq_return,iret_exc
-.previous
+ _ASM_EXTABLE(irq_return,iret_exc)
CFI_RESTORE_STATE
ldt_ss:
@@ -628,9 +614,13 @@ work_notifysig: # deal with pending signals and
# vm86-space
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
+ movb PT_CS(%esp), %bl
+ andb $SEGMENT_RPL_MASK, %bl
+ cmpb $USER_RPL, %bl
+ jb resume_kernel
xorl %edx, %edx
call do_notify_resume
- jmp resume_userspace_sig
+ jmp resume_userspace
ALIGN
work_notifysig_v86:
@@ -643,9 +633,13 @@ work_notifysig_v86:
#endif
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
+ movb PT_CS(%esp), %bl
+ andb $SEGMENT_RPL_MASK, %bl
+ cmpb $USER_RPL, %bl
+ jb resume_kernel
xorl %edx, %edx
call do_notify_resume
- jmp resume_userspace_sig
+ jmp resume_userspace
END(work_pending)
# perform syscall exit tracing
@@ -901,10 +895,7 @@ END(device_not_available)
#ifdef CONFIG_PARAVIRT
ENTRY(native_iret)
iret
-.section __ex_table,"a"
- .align 4
- .long native_iret, iret_exc
-.previous
+ _ASM_EXTABLE(native_iret, iret_exc)
END(native_iret)
ENTRY(native_irq_enable_sysexit)
@@ -1093,13 +1084,10 @@ ENTRY(xen_failsafe_callback)
movl %eax,16(%esp)
jmp 4b
.previous
-.section __ex_table,"a"
- .align 4
- .long 1b,6b
- .long 2b,7b
- .long 3b,8b
- .long 4b,9b
-.previous
+ _ASM_EXTABLE(1b,6b)
+ _ASM_EXTABLE(2b,7b)
+ _ASM_EXTABLE(3b,8b)
+ _ASM_EXTABLE(4b,9b)
ENDPROC(xen_failsafe_callback)
BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK,
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index cdc79b5cfcd..7d65133b51b 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -55,6 +55,7 @@
#include <asm/paravirt.h>
#include <asm/ftrace.h>
#include <asm/percpu.h>
+#include <asm/asm.h>
#include <linux/err.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
@@ -190,6 +191,44 @@ ENDPROC(native_usergs_sysret64)
.endm
/*
+ * When dynamic function tracer is enabled it will add a breakpoint
+ * to all locations that it is about to modify, sync CPUs, update
+ * all the code, sync CPUs, then remove the breakpoints. In this time
+ * if lockdep is enabled, it might jump back into the debug handler
+ * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF).
+ *
+ * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to
+ * make sure the stack pointer does not get reset back to the top
+ * of the debug stack, and instead just reuses the current stack.
+ */
+#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS)
+
+.macro TRACE_IRQS_OFF_DEBUG
+ call debug_stack_set_zero
+ TRACE_IRQS_OFF
+ call debug_stack_reset
+.endm
+
+.macro TRACE_IRQS_ON_DEBUG
+ call debug_stack_set_zero
+ TRACE_IRQS_ON
+ call debug_stack_reset
+.endm
+
+.macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET
+ bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
+ jnc 1f
+ TRACE_IRQS_ON_DEBUG
+1:
+.endm
+
+#else
+# define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF
+# define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON
+# define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ
+#endif
+
+/*
* C code is not supposed to know about undefined top of stack. Every time
* a C function with an pt_regs argument is called from the SYSCALL based
* fast path FIXUP_TOP_OF_STACK is needed.
@@ -900,18 +939,12 @@ restore_args:
irq_return:
INTERRUPT_RETURN
-
- .section __ex_table, "a"
- .quad irq_return, bad_iret
- .previous
+ _ASM_EXTABLE(irq_return, bad_iret)
#ifdef CONFIG_PARAVIRT
ENTRY(native_iret)
iretq
-
- .section __ex_table,"a"
- .quad native_iret, bad_iret
- .previous
+ _ASM_EXTABLE(native_iret, bad_iret)
#endif
.section .fixup,"ax"
@@ -1103,7 +1136,7 @@ ENTRY(\sym)
subq $ORIG_RAX-R15, %rsp
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
call save_paranoid
- TRACE_IRQS_OFF
+ TRACE_IRQS_OFF_DEBUG
movq %rsp,%rdi /* pt_regs pointer */
xorl %esi,%esi /* no error code */
subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)
@@ -1181,10 +1214,7 @@ gs_change:
CFI_ENDPROC
END(native_load_gs_index)
- .section __ex_table,"a"
- .align 8
- .quad gs_change,bad_gs
- .previous
+ _ASM_EXTABLE(gs_change,bad_gs)
.section .fixup,"ax"
/* running with kernelgs */
bad_gs:
@@ -1401,7 +1431,7 @@ paranoidzeroentry machine_check *machine_check_vector(%rip)
ENTRY(paranoid_exit)
DEFAULT_FRAME
DISABLE_INTERRUPTS(CLBR_NONE)
- TRACE_IRQS_OFF
+ TRACE_IRQS_OFF_DEBUG
testl %ebx,%ebx /* swapgs needed? */
jnz paranoid_restore
testl $3,CS(%rsp)
@@ -1412,7 +1442,7 @@ paranoid_swapgs:
RESTORE_ALL 8
jmp irq_return
paranoid_restore:
- TRACE_IRQS_IRETQ 0
+ TRACE_IRQS_IRETQ_DEBUG 0
RESTORE_ALL 8
jmp irq_return
paranoid_userspace:
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index c9a281f272f..c3a7cb4bf6e 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -24,40 +24,21 @@
#include <trace/syscall.h>
#include <asm/cacheflush.h>
+#include <asm/kprobes.h>
#include <asm/ftrace.h>
#include <asm/nops.h>
-#include <asm/nmi.h>
-
#ifdef CONFIG_DYNAMIC_FTRACE
-/*
- * modifying_code is set to notify NMIs that they need to use
- * memory barriers when entering or exiting. But we don't want
- * to burden NMIs with unnecessary memory barriers when code
- * modification is not being done (which is most of the time).
- *
- * A mutex is already held when ftrace_arch_code_modify_prepare
- * and post_process are called. No locks need to be taken here.
- *
- * Stop machine will make sure currently running NMIs are done
- * and new NMIs will see the updated variable before we need
- * to worry about NMIs doing memory barriers.
- */
-static int modifying_code __read_mostly;
-static DEFINE_PER_CPU(int, save_modifying_code);
-
int ftrace_arch_code_modify_prepare(void)
{
set_kernel_text_rw();
set_all_modules_text_rw();
- modifying_code = 1;
return 0;
}
int ftrace_arch_code_modify_post_process(void)
{
- modifying_code = 0;
set_all_modules_text_ro();
set_kernel_text_ro();
return 0;
@@ -90,134 +71,6 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
return calc.code;
}
-/*
- * Modifying code must take extra care. On an SMP machine, if
- * the code being modified is also being executed on another CPU
- * that CPU will have undefined results and possibly take a GPF.
- * We use kstop_machine to stop other CPUS from exectuing code.
- * But this does not stop NMIs from happening. We still need
- * to protect against that. We separate out the modification of
- * the code to take care of this.
- *
- * Two buffers are added: An IP buffer and a "code" buffer.
- *
- * 1) Put the instruction pointer into the IP buffer
- * and the new code into the "code" buffer.
- * 2) Wait for any running NMIs to finish and set a flag that says
- * we are modifying code, it is done in an atomic operation.
- * 3) Write the code
- * 4) clear the flag.
- * 5) Wait for any running NMIs to finish.
- *
- * If an NMI is executed, the first thing it does is to call
- * "ftrace_nmi_enter". This will check if the flag is set to write
- * and if it is, it will write what is in the IP and "code" buffers.
- *
- * The trick is, it does not matter if everyone is writing the same
- * content to the code location. Also, if a CPU is executing code
- * it is OK to write to that code location if the contents being written
- * are the same as what exists.
- */
-
-#define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */
-static atomic_t nmi_running = ATOMIC_INIT(0);
-static int mod_code_status; /* holds return value of text write */
-static void *mod_code_ip; /* holds the IP to write to */
-static const void *mod_code_newcode; /* holds the text to write to the IP */
-
-static unsigned nmi_wait_count;
-static atomic_t nmi_update_count = ATOMIC_INIT(0);
-
-int ftrace_arch_read_dyn_info(char *buf, int size)
-{
- int r;
-
- r = snprintf(buf, size, "%u %u",
- nmi_wait_count,
- atomic_read(&nmi_update_count));
- return r;
-}
-
-static void clear_mod_flag(void)
-{
- int old = atomic_read(&nmi_running);
-
- for (;;) {
- int new = old & ~MOD_CODE_WRITE_FLAG;
-
- if (old == new)
- break;
-
- old = atomic_cmpxchg(&nmi_running, old, new);
- }
-}
-
-static void ftrace_mod_code(void)
-{
- /*
- * Yes, more than one CPU process can be writing to mod_code_status.
- * (and the code itself)
- * But if one were to fail, then they all should, and if one were
- * to succeed, then they all should.
- */
- mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
- MCOUNT_INSN_SIZE);
-
- /* if we fail, then kill any new writers */
- if (mod_code_status)
- clear_mod_flag();
-}
-
-void ftrace_nmi_enter(void)
-{
- __this_cpu_write(save_modifying_code, modifying_code);
-
- if (!__this_cpu_read(save_modifying_code))
- return;
-
- if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
- smp_rmb();
- ftrace_mod_code();
- atomic_inc(&nmi_update_count);
- }
- /* Must have previous changes seen before executions */
- smp_mb();
-}
-
-void ftrace_nmi_exit(void)
-{
- if (!__this_cpu_read(save_modifying_code))
- return;
-
- /* Finish all executions before clearing nmi_running */
- smp_mb();
- atomic_dec(&nmi_running);
-}
-
-static void wait_for_nmi_and_set_mod_flag(void)
-{
- if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
- return;
-
- do {
- cpu_relax();
- } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
-
- nmi_wait_count++;
-}
-
-static void wait_for_nmi(void)
-{
- if (!atomic_read(&nmi_running))
- return;
-
- do {
- cpu_relax();
- } while (atomic_read(&nmi_running));
-
- nmi_wait_count++;
-}
-
static inline int
within(unsigned long addr, unsigned long start, unsigned long end)
{
@@ -238,26 +91,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code)
if (within(ip, (unsigned long)_text, (unsigned long)_etext))
ip = (unsigned long)__va(__pa(ip));
- mod_code_ip = (void *)ip;
- mod_code_newcode = new_code;
-
- /* The buffers need to be visible before we let NMIs write them */
- smp_mb();
-
- wait_for_nmi_and_set_mod_flag();
-
- /* Make sure all running NMIs have finished before we write the code */
- smp_mb();
-
- ftrace_mod_code();
-
- /* Make sure the write happens before clearing the bit */
- smp_mb();
-
- clear_mod_flag();
- wait_for_nmi();
-
- return mod_code_status;
+ return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE);
}
static const unsigned char *ftrace_nop_replace(void)
@@ -266,7 +100,7 @@ static const unsigned char *ftrace_nop_replace(void)
}
static int
-ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
+ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
unsigned const char *new_code)
{
unsigned char replaced[MCOUNT_INSN_SIZE];
@@ -307,7 +141,20 @@ int ftrace_make_nop(struct module *mod,
old = ftrace_call_replace(ip, addr);
new = ftrace_nop_replace();
- return ftrace_modify_code(rec->ip, old, new);
+ /*
+ * On boot up, and when modules are loaded, the MCOUNT_ADDR
+ * is converted to a nop, and will never become MCOUNT_ADDR
+ * again. This code is either running before SMP (on boot up)
+ * or before the code will ever be executed (module load).
+ * We do not want to use the breakpoint version in this case,
+ * just modify the code directly.
+ */
+ if (addr == MCOUNT_ADDR)
+ return ftrace_modify_code_direct(rec->ip, old, new);
+
+ /* Normal cases use add_brk_on_nop */
+ WARN_ONCE(1, "invalid use of ftrace_make_nop");
+ return -EINVAL;
}
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
@@ -318,9 +165,47 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
old = ftrace_nop_replace();
new = ftrace_call_replace(ip, addr);
- return ftrace_modify_code(rec->ip, old, new);
+ /* Should only be called when module is loaded */
+ return ftrace_modify_code_direct(rec->ip, old, new);
}
+/*
+ * The modifying_ftrace_code is used to tell the breakpoint
+ * handler to call ftrace_int3_handler(). If it fails to
+ * call this handler for a breakpoint added by ftrace, then
+ * the kernel may crash.
+ *
+ * As atomic_writes on x86 do not need a barrier, we do not
+ * need to add smp_mb()s for this to work. It is also considered
+ * that we can not read the modifying_ftrace_code before
+ * executing the breakpoint. That would be quite remarkable if
+ * it could do that. Here's the flow that is required:
+ *
+ * CPU-0 CPU-1
+ *
+ * atomic_inc(mfc);
+ * write int3s
+ * <trap-int3> // implicit (r)mb
+ * if (atomic_read(mfc))
+ * call ftrace_int3_handler()
+ *
+ * Then when we are finished:
+ *
+ * atomic_dec(mfc);
+ *
+ * If we hit a breakpoint that was not set by ftrace, it does not
+ * matter if ftrace_int3_handler() is called or not. It will
+ * simply be ignored. But it is crucial that a ftrace nop/caller
+ * breakpoint is handled. No other user should ever place a
+ * breakpoint on an ftrace nop/caller location. It must only
+ * be done by this code.
+ */
+atomic_t modifying_ftrace_code __read_mostly;
+
+static int
+ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
+ unsigned const char *new_code);
+
int ftrace_update_ftrace_func(ftrace_func_t func)
{
unsigned long ip = (unsigned long)(&ftrace_call);
@@ -329,9 +214,376 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
new = ftrace_call_replace(ip, (unsigned long)func);
+
+ /* See comment above by declaration of modifying_ftrace_code */
+ atomic_inc(&modifying_ftrace_code);
+
ret = ftrace_modify_code(ip, old, new);
+ atomic_dec(&modifying_ftrace_code);
+
+ return ret;
+}
+
+/*
+ * A breakpoint was added to the code address we are about to
+ * modify, and this is the handle that will just skip over it.
+ * We are either changing a nop into a trace call, or a trace
+ * call to a nop. While the change is taking place, we treat
+ * it just like it was a nop.
+ */
+int ftrace_int3_handler(struct pt_regs *regs)
+{
+ if (WARN_ON_ONCE(!regs))
+ return 0;
+
+ if (!ftrace_location(regs->ip - 1))
+ return 0;
+
+ regs->ip += MCOUNT_INSN_SIZE - 1;
+
+ return 1;
+}
+
+static int ftrace_write(unsigned long ip, const char *val, int size)
+{
+ /*
+ * On x86_64, kernel text mappings are mapped read-only with
+ * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
+ * of the kernel text mapping to modify the kernel text.
+ *
+ * For 32bit kernels, these mappings are same and we can use
+ * kernel identity mapping to modify code.
+ */
+ if (within(ip, (unsigned long)_text, (unsigned long)_etext))
+ ip = (unsigned long)__va(__pa(ip));
+
+ return probe_kernel_write((void *)ip, val, size);
+}
+
+static int add_break(unsigned long ip, const char *old)
+{
+ unsigned char replaced[MCOUNT_INSN_SIZE];
+ unsigned char brk = BREAKPOINT_INSTRUCTION;
+
+ if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+ return -EFAULT;
+
+ /* Make sure it is what we expect it to be */
+ if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0)
+ return -EINVAL;
+
+ if (ftrace_write(ip, &brk, 1))
+ return -EPERM;
+
+ return 0;
+}
+
+static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned const char *old;
+ unsigned long ip = rec->ip;
+
+ old = ftrace_call_replace(ip, addr);
+
+ return add_break(rec->ip, old);
+}
+
+
+static int add_brk_on_nop(struct dyn_ftrace *rec)
+{
+ unsigned const char *old;
+
+ old = ftrace_nop_replace();
+
+ return add_break(rec->ip, old);
+}
+
+static int add_breakpoints(struct dyn_ftrace *rec, int enable)
+{
+ unsigned long ftrace_addr;
+ int ret;
+
+ ret = ftrace_test_record(rec, enable);
+
+ ftrace_addr = (unsigned long)FTRACE_ADDR;
+
+ switch (ret) {
+ case FTRACE_UPDATE_IGNORE:
+ return 0;
+
+ case FTRACE_UPDATE_MAKE_CALL:
+ /* converting nop to call */
+ return add_brk_on_nop(rec);
+
+ case FTRACE_UPDATE_MAKE_NOP:
+ /* converting a call to a nop */
+ return add_brk_on_call(rec, ftrace_addr);
+ }
+ return 0;
+}
+
+/*
+ * On error, we need to remove breakpoints. This needs to
+ * be done caefully. If the address does not currently have a
+ * breakpoint, we know we are done. Otherwise, we look at the
+ * remaining 4 bytes of the instruction. If it matches a nop
+ * we replace the breakpoint with the nop. Otherwise we replace
+ * it with the call instruction.
+ */
+static int remove_breakpoint(struct dyn_ftrace *rec)
+{
+ unsigned char ins[MCOUNT_INSN_SIZE];
+ unsigned char brk = BREAKPOINT_INSTRUCTION;
+ const unsigned char *nop;
+ unsigned long ftrace_addr;
+ unsigned long ip = rec->ip;
+
+ /* If we fail the read, just give up */
+ if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE))
+ return -EFAULT;
+
+ /* If this does not have a breakpoint, we are done */
+ if (ins[0] != brk)
+ return -1;
+
+ nop = ftrace_nop_replace();
+
+ /*
+ * If the last 4 bytes of the instruction do not match
+ * a nop, then we assume that this is a call to ftrace_addr.
+ */
+ if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) {
+ /*
+ * For extra paranoidism, we check if the breakpoint is on
+ * a call that would actually jump to the ftrace_addr.
+ * If not, don't touch the breakpoint, we make just create
+ * a disaster.
+ */
+ ftrace_addr = (unsigned long)FTRACE_ADDR;
+ nop = ftrace_call_replace(ip, ftrace_addr);
+
+ if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
+ return -EINVAL;
+ }
+
+ return probe_kernel_write((void *)ip, &nop[0], 1);
+}
+
+static int add_update_code(unsigned long ip, unsigned const char *new)
+{
+ /* skip breakpoint */
+ ip++;
+ new++;
+ if (ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1))
+ return -EPERM;
+ return 0;
+}
+
+static int add_update_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long ip = rec->ip;
+ unsigned const char *new;
+
+ new = ftrace_call_replace(ip, addr);
+ return add_update_code(ip, new);
+}
+
+static int add_update_nop(struct dyn_ftrace *rec)
+{
+ unsigned long ip = rec->ip;
+ unsigned const char *new;
+
+ new = ftrace_nop_replace();
+ return add_update_code(ip, new);
+}
+
+static int add_update(struct dyn_ftrace *rec, int enable)
+{
+ unsigned long ftrace_addr;
+ int ret;
+
+ ret = ftrace_test_record(rec, enable);
+
+ ftrace_addr = (unsigned long)FTRACE_ADDR;
+
+ switch (ret) {
+ case FTRACE_UPDATE_IGNORE:
+ return 0;
+
+ case FTRACE_UPDATE_MAKE_CALL:
+ /* converting nop to call */
+ return add_update_call(rec, ftrace_addr);
+
+ case FTRACE_UPDATE_MAKE_NOP:
+ /* converting a call to a nop */
+ return add_update_nop(rec);
+ }
+
+ return 0;
+}
+
+static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long ip = rec->ip;
+ unsigned const char *new;
+
+ new = ftrace_call_replace(ip, addr);
+
+ if (ftrace_write(ip, new, 1))
+ return -EPERM;
+
+ return 0;
+}
+
+static int finish_update_nop(struct dyn_ftrace *rec)
+{
+ unsigned long ip = rec->ip;
+ unsigned const char *new;
+
+ new = ftrace_nop_replace();
+
+ if (ftrace_write(ip, new, 1))
+ return -EPERM;
+ return 0;
+}
+
+static int finish_update(struct dyn_ftrace *rec, int enable)
+{
+ unsigned long ftrace_addr;
+ int ret;
+
+ ret = ftrace_update_record(rec, enable);
+
+ ftrace_addr = (unsigned long)FTRACE_ADDR;
+
+ switch (ret) {
+ case FTRACE_UPDATE_IGNORE:
+ return 0;
+
+ case FTRACE_UPDATE_MAKE_CALL:
+ /* converting nop to call */
+ return finish_update_call(rec, ftrace_addr);
+
+ case FTRACE_UPDATE_MAKE_NOP:
+ /* converting a call to a nop */
+ return finish_update_nop(rec);
+ }
+
+ return 0;
+}
+
+static void do_sync_core(void *data)
+{
+ sync_core();
+}
+
+static void run_sync(void)
+{
+ int enable_irqs = irqs_disabled();
+
+ /* We may be called with interrupts disbled (on bootup). */
+ if (enable_irqs)
+ local_irq_enable();
+ on_each_cpu(do_sync_core, NULL, 1);
+ if (enable_irqs)
+ local_irq_disable();
+}
+
+void ftrace_replace_code(int enable)
+{
+ struct ftrace_rec_iter *iter;
+ struct dyn_ftrace *rec;
+ const char *report = "adding breakpoints";
+ int count = 0;
+ int ret;
+
+ for_ftrace_rec_iter(iter) {
+ rec = ftrace_rec_iter_record(iter);
+
+ ret = add_breakpoints(rec, enable);
+ if (ret)
+ goto remove_breakpoints;
+ count++;
+ }
+
+ run_sync();
+
+ report = "updating code";
+
+ for_ftrace_rec_iter(iter) {
+ rec = ftrace_rec_iter_record(iter);
+
+ ret = add_update(rec, enable);
+ if (ret)
+ goto remove_breakpoints;
+ }
+
+ run_sync();
+
+ report = "removing breakpoints";
+
+ for_ftrace_rec_iter(iter) {
+ rec = ftrace_rec_iter_record(iter);
+
+ ret = finish_update(rec, enable);
+ if (ret)
+ goto remove_breakpoints;
+ }
+
+ run_sync();
+
+ return;
+
+ remove_breakpoints:
+ ftrace_bug(ret, rec ? rec->ip : 0);
+ printk(KERN_WARNING "Failed on %s (%d):\n", report, count);
+ for_ftrace_rec_iter(iter) {
+ rec = ftrace_rec_iter_record(iter);
+ remove_breakpoint(rec);
+ }
+}
+
+static int
+ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
+ unsigned const char *new_code)
+{
+ int ret;
+
+ ret = add_break(ip, old_code);
+ if (ret)
+ goto out;
+
+ run_sync();
+
+ ret = add_update_code(ip, new_code);
+ if (ret)
+ goto fail_update;
+
+ run_sync();
+
+ ret = ftrace_write(ip, new_code, 1);
+ if (ret) {
+ ret = -EPERM;
+ goto out;
+ }
+ run_sync();
+ out:
return ret;
+
+ fail_update:
+ probe_kernel_write((void *)ip, &old_code[0], 1);
+ goto out;
+}
+
+void arch_ftrace_update_code(int command)
+{
+ /* See comment above by declaration of modifying_ftrace_code */
+ atomic_inc(&modifying_ftrace_code);
+
+ ftrace_modify_all_code(command);
+
+ atomic_dec(&modifying_ftrace_code);
}
int __init ftrace_dyn_arch_init(void *data)
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 51ff18616d5..c18f59d1010 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -14,7 +14,6 @@
#include <asm/sections.h>
#include <asm/e820.h>
#include <asm/page.h>
-#include <asm/trampoline.h>
#include <asm/apic.h>
#include <asm/io_apic.h>
#include <asm/bios_ebda.h>
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 3a3b779f41d..037df57a99a 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -24,7 +24,6 @@
#include <asm/sections.h>
#include <asm/kdebug.h>
#include <asm/e820.h>
-#include <asm/trampoline.h>
#include <asm/bios_ebda.h>
static void __init zap_identity_mappings(void)
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index ce0be7cd085..d42ab17b739 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -21,6 +21,7 @@
#include <asm/msr-index.h>
#include <asm/cpufeature.h>
#include <asm/percpu.h>
+#include <asm/nops.h>
/* Physical address */
#define pa(X) ((X) - __PAGE_OFFSET)
@@ -273,10 +274,7 @@ num_subarch_entries = (. - subarch_entries) / 4
* If cpu hotplug is not supported then this code can go in init section
* which will be freed later
*/
-
__CPUINIT
-
-#ifdef CONFIG_SMP
ENTRY(startup_32_smp)
cld
movl $(__BOOT_DS),%eax
@@ -287,7 +285,7 @@ ENTRY(startup_32_smp)
movl pa(stack_start),%ecx
movl %eax,%ss
leal -__PAGE_OFFSET(%ecx),%esp
-#endif /* CONFIG_SMP */
+
default_entry:
/*
@@ -363,28 +361,23 @@ default_entry:
pushl $0
popfl
-#ifdef CONFIG_SMP
- cmpb $0, ready
- jnz checkCPUtype
-#endif /* CONFIG_SMP */
-
/*
* start system 32-bit setup. We need to re-do some of the things done
* in 16-bit mode for the "real" operations.
*/
- call setup_idt
-
-checkCPUtype:
-
- movl $-1,X86_CPUID # -1 for no CPUID initially
-
+ movl setup_once_ref,%eax
+ andl %eax,%eax
+ jz 1f # Did we do this already?
+ call *%eax
+1:
+
/* check if it is 486 or 386. */
/*
* XXX - this does a lot of unnecessary setup. Alignment checks don't
* apply at our cpl of 0 and the stack ought to be aligned already, and
* we don't need to preserve eflags.
*/
-
+ movl $-1,X86_CPUID # -1 for no CPUID initially
movb $3,X86 # at least 386
pushfl # push EFLAGS
popl %eax # get EFLAGS
@@ -450,21 +443,6 @@ is386: movl $2,%ecx # set MP
movl $(__KERNEL_PERCPU), %eax
movl %eax,%fs # set this cpu's percpu
-#ifdef CONFIG_CC_STACKPROTECTOR
- /*
- * The linker can't handle this by relocation. Manually set
- * base address in stack canary segment descriptor.
- */
- cmpb $0,ready
- jne 1f
- movl $gdt_page,%eax
- movl $stack_canary,%ecx
- movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
- shrl $16, %ecx
- movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
- movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax)
-1:
-#endif
movl $(__KERNEL_STACK_CANARY),%eax
movl %eax,%gs
@@ -473,7 +451,6 @@ is386: movl $2,%ecx # set MP
cld # gcc2 wants the direction flag cleared at all times
pushl $0 # fake return address for unwinder
- movb $1, ready
jmp *(initial_code)
/*
@@ -495,81 +472,122 @@ check_x87:
.byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */
ret
+
+#include "verify_cpu.S"
+
/*
- * setup_idt
+ * setup_once
*
- * sets up a idt with 256 entries pointing to
- * ignore_int, interrupt gates. It doesn't actually load
- * idt - that can be done only after paging has been enabled
- * and the kernel moved to PAGE_OFFSET. Interrupts
- * are enabled elsewhere, when we can be relatively
- * sure everything is ok.
+ * The setup work we only want to run on the BSP.
*
* Warning: %esi is live across this function.
*/
-setup_idt:
- lea ignore_int,%edx
- movl $(__KERNEL_CS << 16),%eax
- movw %dx,%ax /* selector = 0x0010 = cs */
- movw $0x8E00,%dx /* interrupt gate - dpl=0, present */
+__INIT
+setup_once:
+ /*
+ * Set up a idt with 256 entries pointing to ignore_int,
+ * interrupt gates. It doesn't actually load idt - that needs
+ * to be done on each CPU. Interrupts are enabled elsewhere,
+ * when we can be relatively sure everything is ok.
+ */
- lea idt_table,%edi
- mov $256,%ecx
-rp_sidt:
+ movl $idt_table,%edi
+ movl $early_idt_handlers,%eax
+ movl $NUM_EXCEPTION_VECTORS,%ecx
+1:
movl %eax,(%edi)
- movl %edx,4(%edi)
+ movl %eax,4(%edi)
+ /* interrupt gate, dpl=0, present */
+ movl $(0x8E000000 + __KERNEL_CS),2(%edi)
+ addl $9,%eax
addl $8,%edi
- dec %ecx
- jne rp_sidt
+ loop 1b
-.macro set_early_handler handler,trapno
- lea \handler,%edx
+ movl $256 - NUM_EXCEPTION_VECTORS,%ecx
+ movl $ignore_int,%edx
movl $(__KERNEL_CS << 16),%eax
- movw %dx,%ax
+ movw %dx,%ax /* selector = 0x0010 = cs */
movw $0x8E00,%dx /* interrupt gate - dpl=0, present */
- lea idt_table,%edi
- movl %eax,8*\trapno(%edi)
- movl %edx,8*\trapno+4(%edi)
-.endm
+2:
+ movl %eax,(%edi)
+ movl %edx,4(%edi)
+ addl $8,%edi
+ loop 2b
- set_early_handler handler=early_divide_err,trapno=0
- set_early_handler handler=early_illegal_opcode,trapno=6
- set_early_handler handler=early_protection_fault,trapno=13
- set_early_handler handler=early_page_fault,trapno=14
+#ifdef CONFIG_CC_STACKPROTECTOR
+ /*
+ * Configure the stack canary. The linker can't handle this by
+ * relocation. Manually set base address in stack canary
+ * segment descriptor.
+ */
+ movl $gdt_page,%eax
+ movl $stack_canary,%ecx
+ movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
+ shrl $16, %ecx
+ movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
+ movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax)
+#endif
+ andl $0,setup_once_ref /* Once is enough, thanks */
ret
-early_divide_err:
- xor %edx,%edx
- pushl $0 /* fake errcode */
- jmp early_fault
+ENTRY(early_idt_handlers)
+ # 36(%esp) %eflags
+ # 32(%esp) %cs
+ # 28(%esp) %eip
+ # 24(%rsp) error code
+ i = 0
+ .rept NUM_EXCEPTION_VECTORS
+ .if (EXCEPTION_ERRCODE_MASK >> i) & 1
+ ASM_NOP2
+ .else
+ pushl $0 # Dummy error code, to make stack frame uniform
+ .endif
+ pushl $i # 20(%esp) Vector number
+ jmp early_idt_handler
+ i = i + 1
+ .endr
+ENDPROC(early_idt_handlers)
+
+ /* This is global to keep gas from relaxing the jumps */
+ENTRY(early_idt_handler)
+ cld
+ cmpl $2,%ss:early_recursion_flag
+ je hlt_loop
+ incl %ss:early_recursion_flag
-early_illegal_opcode:
- movl $6,%edx
- pushl $0 /* fake errcode */
- jmp early_fault
+ push %eax # 16(%esp)
+ push %ecx # 12(%esp)
+ push %edx # 8(%esp)
+ push %ds # 4(%esp)
+ push %es # 0(%esp)
+ movl $(__KERNEL_DS),%eax
+ movl %eax,%ds
+ movl %eax,%es
-early_protection_fault:
- movl $13,%edx
- jmp early_fault
+ cmpl $(__KERNEL_CS),32(%esp)
+ jne 10f
-early_page_fault:
- movl $14,%edx
- jmp early_fault
+ leal 28(%esp),%eax # Pointer to %eip
+ call early_fixup_exception
+ andl %eax,%eax
+ jnz ex_entry /* found an exception entry */
-early_fault:
- cld
+10:
#ifdef CONFIG_PRINTK
- pusha
- movl $(__KERNEL_DS),%eax
- movl %eax,%ds
- movl %eax,%es
- cmpl $2,early_recursion_flag
- je hlt_loop
- incl early_recursion_flag
+ xorl %eax,%eax
+ movw %ax,2(%esp) /* clean up the segment values on some cpus */
+ movw %ax,6(%esp)
+ movw %ax,34(%esp)
+ leal 40(%esp),%eax
+ pushl %eax /* %esp before the exception */
+ pushl %ebx
+ pushl %ebp
+ pushl %esi
+ pushl %edi
movl %cr2,%eax
pushl %eax
- pushl %edx /* trapno */
+ pushl (20+6*4)(%esp) /* trapno */
pushl $fault_msg
call printk
#endif
@@ -578,6 +596,17 @@ hlt_loop:
hlt
jmp hlt_loop
+ex_entry:
+ pop %es
+ pop %ds
+ pop %edx
+ pop %ecx
+ pop %eax
+ addl $8,%esp /* drop vector number and error code */
+ decl %ss:early_recursion_flag
+ iret
+ENDPROC(early_idt_handler)
+
/* This is the default interrupt "handler" :-) */
ALIGN
ignore_int:
@@ -611,13 +640,18 @@ ignore_int:
popl %eax
#endif
iret
+ENDPROC(ignore_int)
+__INITDATA
+ .align 4
+early_recursion_flag:
+ .long 0
-#include "verify_cpu.S"
-
- __REFDATA
-.align 4
+__REFDATA
+ .align 4
ENTRY(initial_code)
.long i386_start_kernel
+ENTRY(setup_once_ref)
+ .long setup_once
/*
* BSS section
@@ -670,22 +704,19 @@ ENTRY(initial_page_table)
ENTRY(stack_start)
.long init_thread_union+THREAD_SIZE
-early_recursion_flag:
- .long 0
-
-ready: .byte 0
-
+__INITRODATA
int_msg:
.asciz "Unknown interrupt or fault at: %p %p %p\n"
fault_msg:
/* fault info: */
.ascii "BUG: Int %d: CR2 %p\n"
-/* pusha regs: */
- .ascii " EDI %p ESI %p EBP %p ESP %p\n"
- .ascii " EBX %p EDX %p ECX %p EAX %p\n"
+/* regs pushed in early_idt_handler: */
+ .ascii " EDI %p ESI %p EBP %p EBX %p\n"
+ .ascii " ESP %p ES %p DS %p\n"
+ .ascii " EDX %p ECX %p EAX %p\n"
/* fault frame: */
- .ascii " err %p EIP %p CS %p flg %p\n"
+ .ascii " vec %p err %p EIP %p CS %p flg %p\n"
.ascii "Stack: %p %p %p %p %p %p %p %p\n"
.ascii " %p %p %p %p %p %p %p %p\n"
.asciz " %p %p %p %p %p %p %p %p\n"
@@ -699,6 +730,7 @@ fault_msg:
* segment size, and 32-bit linear address value:
*/
+ .data
.globl boot_gdt_descr
.globl idt_descr
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 40f4eb3766d..94bf9cc2c7e 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -19,12 +19,15 @@
#include <asm/cache.h>
#include <asm/processor-flags.h>
#include <asm/percpu.h>
+#include <asm/nops.h>
#ifdef CONFIG_PARAVIRT
#include <asm/asm-offsets.h>
#include <asm/paravirt.h>
+#define GET_CR2_INTO(reg) GET_CR2_INTO_RAX ; movq %rax, reg
#else
-#define GET_CR2_INTO_RCX movq %cr2, %rcx
+#define GET_CR2_INTO(reg) movq %cr2, reg
+#define INTERRUPT_RETURN iretq
#endif
/* we are not able to switch in one step to the final KERNEL ADDRESS SPACE
@@ -136,10 +139,6 @@ ident_complete:
/* Fixup phys_base */
addq %rbp, phys_base(%rip)
- /* Fixup trampoline */
- addq %rbp, trampoline_level4_pgt + 0(%rip)
- addq %rbp, trampoline_level4_pgt + (511*8)(%rip)
-
/* Due to ENTRY(), sometimes the empty space gets filled with
* zeros. Better take a jmp than relying on empty space being
* filled with 0x90 (nop)
@@ -270,36 +269,56 @@ bad_address:
jmp bad_address
.section ".init.text","ax"
-#ifdef CONFIG_EARLY_PRINTK
.globl early_idt_handlers
early_idt_handlers:
+ # 104(%rsp) %rflags
+ # 96(%rsp) %cs
+ # 88(%rsp) %rip
+ # 80(%rsp) error code
i = 0
.rept NUM_EXCEPTION_VECTORS
- movl $i, %esi
+ .if (EXCEPTION_ERRCODE_MASK >> i) & 1
+ ASM_NOP2
+ .else
+ pushq $0 # Dummy error code, to make stack frame uniform
+ .endif
+ pushq $i # 72(%rsp) Vector number
jmp early_idt_handler
i = i + 1
.endr
-#endif
ENTRY(early_idt_handler)
-#ifdef CONFIG_EARLY_PRINTK
+ cld
+
cmpl $2,early_recursion_flag(%rip)
jz 1f
incl early_recursion_flag(%rip)
- GET_CR2_INTO_RCX
- movq %rcx,%r9
- xorl %r8d,%r8d # zero for error code
- movl %esi,%ecx # get vector number
- # Test %ecx against mask of vectors that push error code.
- cmpl $31,%ecx
- ja 0f
- movl $1,%eax
- salq %cl,%rax
- testl $0x27d00,%eax
- je 0f
- popq %r8 # get error code
-0: movq 0(%rsp),%rcx # get ip
- movq 8(%rsp),%rdx # get cs
+
+ pushq %rax # 64(%rsp)
+ pushq %rcx # 56(%rsp)
+ pushq %rdx # 48(%rsp)
+ pushq %rsi # 40(%rsp)
+ pushq %rdi # 32(%rsp)
+ pushq %r8 # 24(%rsp)
+ pushq %r9 # 16(%rsp)
+ pushq %r10 # 8(%rsp)
+ pushq %r11 # 0(%rsp)
+
+ cmpl $__KERNEL_CS,96(%rsp)
+ jne 10f
+
+ leaq 88(%rsp),%rdi # Pointer to %rip
+ call early_fixup_exception
+ andl %eax,%eax
+ jnz 20f # Found an exception entry
+
+10:
+#ifdef CONFIG_EARLY_PRINTK
+ GET_CR2_INTO(%r9) # can clobber any volatile register if pv
+ movl 80(%rsp),%r8d # error code
+ movl 72(%rsp),%esi # vector number
+ movl 96(%rsp),%edx # %cs
+ movq 88(%rsp),%rcx # %rip
xorl %eax,%eax
leaq early_idt_msg(%rip),%rdi
call early_printk
@@ -308,17 +327,32 @@ ENTRY(early_idt_handler)
call dump_stack
#ifdef CONFIG_KALLSYMS
leaq early_idt_ripmsg(%rip),%rdi
- movq 0(%rsp),%rsi # get rip again
+ movq 40(%rsp),%rsi # %rip again
call __print_symbol
#endif
#endif /* EARLY_PRINTK */
1: hlt
jmp 1b
-#ifdef CONFIG_EARLY_PRINTK
+20: # Exception table entry found
+ popq %r11
+ popq %r10
+ popq %r9
+ popq %r8
+ popq %rdi
+ popq %rsi
+ popq %rdx
+ popq %rcx
+ popq %rax
+ addq $16,%rsp # drop vector number and error code
+ decl early_recursion_flag(%rip)
+ INTERRUPT_RETURN
+
+ .balign 4
early_recursion_flag:
.long 0
+#ifdef CONFIG_EARLY_PRINTK
early_idt_msg:
.asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n"
early_idt_ripmsg:
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ad0de0c2714..1460a5df92f 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -94,13 +94,18 @@ static int hpet_verbose;
static int __init hpet_setup(char *str)
{
- if (str) {
+ while (str) {
+ char *next = strchr(str, ',');
+
+ if (next)
+ *next++ = 0;
if (!strncmp("disable", str, 7))
boot_hpet_disable = 1;
if (!strncmp("force", str, 5))
hpet_force_user = 1;
if (!strncmp("verbose", str, 7))
hpet_verbose = 1;
+ str = next;
}
return 1;
}
@@ -319,8 +324,6 @@ static void hpet_set_mode(enum clock_event_mode mode,
now = hpet_readl(HPET_COUNTER);
cmp = now + (unsigned int) delta;
cfg = hpet_readl(HPET_Tn_CFG(timer));
- /* Make sure we use edge triggered interrupts */
- cfg &= ~HPET_TN_LEVEL;
cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
HPET_TN_SETVAL | HPET_TN_32BIT;
hpet_writel(cfg, HPET_Tn_CFG(timer));
@@ -787,15 +790,16 @@ static int hpet_clocksource_register(void)
return 0;
}
+static u32 *hpet_boot_cfg;
+
/**
* hpet_enable - Try to setup the HPET timer. Returns 1 on success.
*/
int __init hpet_enable(void)
{
- unsigned long hpet_period;
- unsigned int id;
+ u32 hpet_period, cfg, id;
u64 freq;
- int i;
+ unsigned int i, last;
if (!is_hpet_capable())
return 0;
@@ -847,15 +851,45 @@ int __init hpet_enable(void)
id = hpet_readl(HPET_ID);
hpet_print_config();
+ last = (id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT;
+
#ifdef CONFIG_HPET_EMULATE_RTC
/*
* The legacy routing mode needs at least two channels, tick timer
* and the rtc emulation channel.
*/
- if (!(id & HPET_ID_NUMBER))
+ if (!last)
goto out_nohpet;
#endif
+ cfg = hpet_readl(HPET_CFG);
+ hpet_boot_cfg = kmalloc((last + 2) * sizeof(*hpet_boot_cfg),
+ GFP_KERNEL);
+ if (hpet_boot_cfg)
+ *hpet_boot_cfg = cfg;
+ else
+ pr_warn("HPET initial state will not be saved\n");
+ cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
+ hpet_writel(cfg, HPET_CFG);
+ if (cfg)
+ pr_warn("HPET: Unrecognized bits %#x set in global cfg\n",
+ cfg);
+
+ for (i = 0; i <= last; ++i) {
+ cfg = hpet_readl(HPET_Tn_CFG(i));
+ if (hpet_boot_cfg)
+ hpet_boot_cfg[i + 1] = cfg;
+ cfg &= ~(HPET_TN_ENABLE | HPET_TN_LEVEL | HPET_TN_FSB);
+ hpet_writel(cfg, HPET_Tn_CFG(i));
+ cfg &= ~(HPET_TN_PERIODIC | HPET_TN_PERIODIC_CAP
+ | HPET_TN_64BIT_CAP | HPET_TN_32BIT | HPET_TN_ROUTE
+ | HPET_TN_FSB | HPET_TN_FSB_CAP);
+ if (cfg)
+ pr_warn("HPET: Unrecognized bits %#x set in cfg#%u\n",
+ cfg, i);
+ }
+ hpet_print_config();
+
if (hpet_clocksource_register())
goto out_nohpet;
@@ -923,14 +957,28 @@ fs_initcall(hpet_late_init);
void hpet_disable(void)
{
if (is_hpet_capable() && hpet_virt_address) {
- unsigned int cfg = hpet_readl(HPET_CFG);
+ unsigned int cfg = hpet_readl(HPET_CFG), id, last;
- if (hpet_legacy_int_enabled) {
+ if (hpet_boot_cfg)
+ cfg = *hpet_boot_cfg;
+ else if (hpet_legacy_int_enabled) {
cfg &= ~HPET_CFG_LEGACY;
hpet_legacy_int_enabled = 0;
}
cfg &= ~HPET_CFG_ENABLE;
hpet_writel(cfg, HPET_CFG);
+
+ if (!hpet_boot_cfg)
+ return;
+
+ id = hpet_readl(HPET_ID);
+ last = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
+
+ for (id = 0; id <= last; ++id)
+ hpet_writel(hpet_boot_cfg[id + 1], HPET_Tn_CFG(id));
+
+ if (*hpet_boot_cfg & HPET_CFG_ENABLE)
+ hpet_writel(*hpet_boot_cfg, HPET_CFG);
}
}
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 2d6e6498c17..f250431fb50 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -88,7 +88,7 @@ void kernel_fpu_begin(void)
__thread_clear_has_fpu(me);
/* We do 'stts()' in kernel_fpu_end() */
} else {
- percpu_write(fpu_owner_task, NULL);
+ this_cpu_write(fpu_owner_task, NULL);
clts();
}
}
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
deleted file mode 100644
index 43e9ccf4494..00000000000
--- a/arch/x86/kernel/init_task.c
+++ /dev/null
@@ -1,42 +0,0 @@
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/init_task.h>
-#include <linux/fs.h>
-#include <linux/mqueue.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/desc.h>
-
-static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
-static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-
-/*
- * Initial thread structure.
- *
- * We need to make sure that this is THREAD_SIZE aligned due to the
- * way process stacks are handled. This is done by having a special
- * "init_task" linker map entry..
- */
-union thread_union init_thread_union __init_task_data =
- { INIT_THREAD_INFO(init_task) };
-
-/*
- * Initial task structure.
- *
- * All other task structs will be allocated on slabs in fork.c
- */
-struct task_struct init_task = INIT_TASK(init_task);
-EXPORT_SYMBOL(init_task);
-
-/*
- * per-CPU TSS segments. Threads are completely 'soft' on Linux,
- * no more per-task TSS's. The TSS size is kept cacheline-aligned
- * so they are allowed to end up in the .data..cacheline_aligned
- * section. Since TSS's are completely CPU-local, we want them
- * on exact cacheline boundaries, to eliminate cacheline ping-pong.
- */
-DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
-
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 58b7f27cb3e..344faf8d0d6 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -127,8 +127,8 @@ void __cpuinit irq_ctx_init(int cpu)
return;
irqctx = page_address(alloc_pages_node(cpu_to_node(cpu),
- THREAD_FLAGS,
- THREAD_ORDER));
+ THREADINFO_GFP,
+ THREAD_SIZE_ORDER));
memset(&irqctx->tinfo, 0, sizeof(struct thread_info));
irqctx->tinfo.cpu = cpu;
irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
@@ -137,8 +137,8 @@ void __cpuinit irq_ctx_init(int cpu)
per_cpu(hardirq_ctx, cpu) = irqctx;
irqctx = page_address(alloc_pages_node(cpu_to_node(cpu),
- THREAD_FLAGS,
- THREAD_ORDER));
+ THREADINFO_GFP,
+ THREAD_SIZE_ORDER));
memset(&irqctx->tinfo, 0, sizeof(struct thread_info));
irqctx->tinfo.cpu = cpu;
irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 8bfb6146f75..3f61904365c 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -444,12 +444,12 @@ void kgdb_roundup_cpus(unsigned long flags)
/**
* kgdb_arch_handle_exception - Handle architecture specific GDB packets.
- * @vector: The error vector of the exception that happened.
+ * @e_vector: The error vector of the exception that happened.
* @signo: The signal number of the exception that happened.
* @err_code: The error code of the exception that happened.
- * @remcom_in_buffer: The buffer of the packet we have read.
- * @remcom_out_buffer: The buffer of %BUFMAX bytes to write a packet into.
- * @regs: The &struct pt_regs of the current process.
+ * @remcomInBuffer: The buffer of the packet we have read.
+ * @remcomOutBuffer: The buffer of %BUFMAX bytes to write a packet into.
+ * @linux_regs: The &struct pt_regs of the current process.
*
* This function MUST handle the 'c' and 's' command packets,
* as well packets to set / remove a hardware breakpoint, if used.
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index e213fc8408d..e2f751efb7b 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1037,9 +1037,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
"current sp %p does not match saved sp %p\n",
stack_addr(regs), kcb->jprobe_saved_sp);
printk(KERN_ERR "Saved registers for jprobe %p\n", jp);
- show_registers(saved_regs);
+ show_regs(saved_regs);
printk(KERN_ERR "Current registers\n");
- show_registers(regs);
+ show_regs(regs);
BUG();
}
*regs = kcb->jprobe_saved_regs;
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index f8492da65bf..f1b42b3a186 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -22,6 +22,7 @@
#include <asm/msr.h>
#include <asm/apic.h>
#include <linux/percpu.h>
+#include <linux/hardirq.h>
#include <asm/x86_init.h>
#include <asm/reboot.h>
@@ -114,6 +115,20 @@ static void kvm_get_preset_lpj(void)
preset_lpj = lpj;
}
+bool kvm_check_and_clear_guest_paused(void)
+{
+ bool ret = false;
+ struct pvclock_vcpu_time_info *src;
+
+ src = &__get_cpu_var(hv_clock);
+ if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) {
+ __this_cpu_and(hv_clock.flags, ~PVCLOCK_GUEST_STOPPED);
+ ret = true;
+ }
+
+ return ret;
+}
+
static struct clocksource kvm_clock = {
.name = "kvm-clock",
.read = kvm_clock_get_cycles,
diff --git a/arch/x86/kernel/mca_32.c b/arch/x86/kernel/mca_32.c
deleted file mode 100644
index 7eb1e2b9782..00000000000
--- a/arch/x86/kernel/mca_32.c
+++ /dev/null
@@ -1,476 +0,0 @@
-/*
- * Written by Martin Kolinek, February 1996
- *
- * Changes:
- *
- * Chris Beauregard July 28th, 1996
- * - Fixed up integrated SCSI detection
- *
- * Chris Beauregard August 3rd, 1996
- * - Made mca_info local
- * - Made integrated registers accessible through standard function calls
- * - Added name field
- * - More sanity checking
- *
- * Chris Beauregard August 9th, 1996
- * - Rewrote /proc/mca
- *
- * Chris Beauregard January 7th, 1997
- * - Added basic NMI-processing
- * - Added more information to mca_info structure
- *
- * David Weinehall October 12th, 1998
- * - Made a lot of cleaning up in the source
- * - Added use of save_flags / restore_flags
- * - Added the 'driver_loaded' flag in MCA_adapter
- * - Added an alternative implemention of ZP Gu's mca_find_unused_adapter
- *
- * David Weinehall March 24th, 1999
- * - Fixed the output of 'Driver Installed' in /proc/mca/pos
- * - Made the Integrated Video & SCSI show up even if they have id 0000
- *
- * Alexander Viro November 9th, 1999
- * - Switched to regular procfs methods
- *
- * Alfred Arnold & David Weinehall August 23rd, 2000
- * - Added support for Planar POS-registers
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/mca.h>
-#include <linux/kprobes.h>
-#include <linux/slab.h>
-#include <asm/io.h>
-#include <linux/proc_fs.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/ioport.h>
-#include <asm/uaccess.h>
-#include <linux/init.h>
-
-static unsigned char which_scsi;
-
-int MCA_bus;
-EXPORT_SYMBOL(MCA_bus);
-
-/*
- * Motherboard register spinlock. Untested on SMP at the moment, but
- * are there any MCA SMP boxes?
- *
- * Yes - Alan
- */
-static DEFINE_SPINLOCK(mca_lock);
-
-/* Build the status info for the adapter */
-
-static void mca_configure_adapter_status(struct mca_device *mca_dev)
-{
- mca_dev->status = MCA_ADAPTER_NONE;
-
- mca_dev->pos_id = mca_dev->pos[0]
- + (mca_dev->pos[1] << 8);
-
- if (!mca_dev->pos_id && mca_dev->slot < MCA_MAX_SLOT_NR) {
-
- /*
- * id = 0x0000 usually indicates hardware failure,
- * however, ZP Gu (zpg@castle.net> reports that his 9556
- * has 0x0000 as id and everything still works. There
- * also seem to be an adapter with id = 0x0000; the
- * NCR Parallel Bus Memory Card. Until this is confirmed,
- * however, this code will stay.
- */
-
- mca_dev->status = MCA_ADAPTER_ERROR;
-
- return;
- } else if (mca_dev->pos_id != 0xffff) {
-
- /*
- * 0xffff usually indicates that there's no adapter,
- * however, some integrated adapters may have 0xffff as
- * their id and still be valid. Examples are on-board
- * VGA of the 55sx, the integrated SCSI of the 56 & 57,
- * and possibly also the 95 ULTIMEDIA.
- */
-
- mca_dev->status = MCA_ADAPTER_NORMAL;
- }
-
- if ((mca_dev->pos_id == 0xffff ||
- mca_dev->pos_id == 0x0000) && mca_dev->slot >= MCA_MAX_SLOT_NR) {
- int j;
-
- for (j = 2; j < 8; j++) {
- if (mca_dev->pos[j] != 0xff) {
- mca_dev->status = MCA_ADAPTER_NORMAL;
- break;
- }
- }
- }
-
- if (!(mca_dev->pos[2] & MCA_ENABLED)) {
-
- /* enabled bit is in POS 2 */
-
- mca_dev->status = MCA_ADAPTER_DISABLED;
- }
-} /* mca_configure_adapter_status */
-
-/*--------------------------------------------------------------------*/
-
-static struct resource mca_standard_resources[] = {
- { .start = 0x60, .end = 0x60, .name = "system control port B (MCA)" },
- { .start = 0x90, .end = 0x90, .name = "arbitration (MCA)" },
- { .start = 0x91, .end = 0x91, .name = "card Select Feedback (MCA)" },
- { .start = 0x92, .end = 0x92, .name = "system Control port A (MCA)" },
- { .start = 0x94, .end = 0x94, .name = "system board setup (MCA)" },
- { .start = 0x96, .end = 0x97, .name = "POS (MCA)" },
- { .start = 0x100, .end = 0x107, .name = "POS (MCA)" }
-};
-
-#define MCA_STANDARD_RESOURCES ARRAY_SIZE(mca_standard_resources)
-
-/*
- * mca_read_and_store_pos - read the POS registers into a memory buffer
- * @pos: a char pointer to 8 bytes, contains the POS register value on
- * successful return
- *
- * Returns 1 if a card actually exists (i.e. the pos isn't
- * all 0xff) or 0 otherwise
- */
-static int mca_read_and_store_pos(unsigned char *pos)
-{
- int j;
- int found = 0;
-
- for (j = 0; j < 8; j++) {
- pos[j] = inb_p(MCA_POS_REG(j));
- if (pos[j] != 0xff) {
- /* 0xff all across means no device. 0x00 means
- * something's broken, but a device is
- * probably there. However, if you get 0x00
- * from a motherboard register it won't matter
- * what we find. For the record, on the
- * 57SLC, the integrated SCSI adapter has
- * 0xffff for the adapter ID, but nonzero for
- * other registers. */
-
- found = 1;
- }
- }
- return found;
-}
-
-static unsigned char mca_pc_read_pos(struct mca_device *mca_dev, int reg)
-{
- unsigned char byte;
- unsigned long flags;
-
- if (reg < 0 || reg >= 8)
- return 0;
-
- spin_lock_irqsave(&mca_lock, flags);
- if (mca_dev->pos_register) {
- /* Disable adapter setup, enable motherboard setup */
-
- outb_p(0, MCA_ADAPTER_SETUP_REG);
- outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG);
-
- byte = inb_p(MCA_POS_REG(reg));
- outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG);
- } else {
-
- /* Make sure motherboard setup is off */
-
- outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG);
-
- /* Read the appropriate register */
-
- outb_p(0x8|(mca_dev->slot & 0xf), MCA_ADAPTER_SETUP_REG);
- byte = inb_p(MCA_POS_REG(reg));
- outb_p(0, MCA_ADAPTER_SETUP_REG);
- }
- spin_unlock_irqrestore(&mca_lock, flags);
-
- mca_dev->pos[reg] = byte;
-
- return byte;
-}
-
-static void mca_pc_write_pos(struct mca_device *mca_dev, int reg,
- unsigned char byte)
-{
- unsigned long flags;
-
- if (reg < 0 || reg >= 8)
- return;
-
- spin_lock_irqsave(&mca_lock, flags);
-
- /* Make sure motherboard setup is off */
-
- outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG);
-
- /* Read in the appropriate register */
-
- outb_p(0x8|(mca_dev->slot&0xf), MCA_ADAPTER_SETUP_REG);
- outb_p(byte, MCA_POS_REG(reg));
- outb_p(0, MCA_ADAPTER_SETUP_REG);
-
- spin_unlock_irqrestore(&mca_lock, flags);
-
- /* Update the global register list, while we have the byte */
-
- mca_dev->pos[reg] = byte;
-
-}
-
-/* for the primary MCA bus, we have identity transforms */
-static int mca_dummy_transform_irq(struct mca_device *mca_dev, int irq)
-{
- return irq;
-}
-
-static int mca_dummy_transform_ioport(struct mca_device *mca_dev, int port)
-{
- return port;
-}
-
-static void *mca_dummy_transform_memory(struct mca_device *mca_dev, void *mem)
-{
- return mem;
-}
-
-
-static int __init mca_init(void)
-{
- unsigned int i, j;
- struct mca_device *mca_dev;
- unsigned char pos[8];
- short mca_builtin_scsi_ports[] = {0xf7, 0xfd, 0x00};
- struct mca_bus *bus;
-
- /*
- * WARNING: Be careful when making changes here. Putting an adapter
- * and the motherboard simultaneously into setup mode may result in
- * damage to chips (according to The Indispensable PC Hardware Book
- * by Hans-Peter Messmer). Also, we disable system interrupts (so
- * that we are not disturbed in the middle of this).
- */
-
- /* Make sure the MCA bus is present */
-
- if (mca_system_init()) {
- printk(KERN_ERR "MCA bus system initialisation failed\n");
- return -ENODEV;
- }
-
- if (!MCA_bus)
- return -ENODEV;
-
- printk(KERN_INFO "Micro Channel bus detected.\n");
-
- /* All MCA systems have at least a primary bus */
- bus = mca_attach_bus(MCA_PRIMARY_BUS);
- if (!bus)
- goto out_nomem;
- bus->default_dma_mask = 0xffffffffLL;
- bus->f.mca_write_pos = mca_pc_write_pos;
- bus->f.mca_read_pos = mca_pc_read_pos;
- bus->f.mca_transform_irq = mca_dummy_transform_irq;
- bus->f.mca_transform_ioport = mca_dummy_transform_ioport;
- bus->f.mca_transform_memory = mca_dummy_transform_memory;
-
- /* get the motherboard device */
- mca_dev = kzalloc(sizeof(struct mca_device), GFP_KERNEL);
- if (unlikely(!mca_dev))
- goto out_nomem;
-
- /*
- * We do not expect many MCA interrupts during initialization,
- * but let us be safe:
- */
- spin_lock_irq(&mca_lock);
-
- /* Make sure adapter setup is off */
-
- outb_p(0, MCA_ADAPTER_SETUP_REG);
-
- /* Read motherboard POS registers */
-
- mca_dev->pos_register = 0x7f;
- outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG);
- mca_dev->name[0] = 0;
- mca_read_and_store_pos(mca_dev->pos);
- mca_configure_adapter_status(mca_dev);
- /* fake POS and slot for a motherboard */
- mca_dev->pos_id = MCA_MOTHERBOARD_POS;
- mca_dev->slot = MCA_MOTHERBOARD;
- mca_register_device(MCA_PRIMARY_BUS, mca_dev);
-
- mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC);
- if (unlikely(!mca_dev))
- goto out_unlock_nomem;
-
- /* Put motherboard into video setup mode, read integrated video
- * POS registers, and turn motherboard setup off.
- */
-
- mca_dev->pos_register = 0xdf;
- outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG);
- mca_dev->name[0] = 0;
- mca_read_and_store_pos(mca_dev->pos);
- mca_configure_adapter_status(mca_dev);
- /* fake POS and slot for the integrated video */
- mca_dev->pos_id = MCA_INTEGVIDEO_POS;
- mca_dev->slot = MCA_INTEGVIDEO;
- mca_register_device(MCA_PRIMARY_BUS, mca_dev);
-
- /*
- * Put motherboard into scsi setup mode, read integrated scsi
- * POS registers, and turn motherboard setup off.
- *
- * It seems there are two possible SCSI registers. Martin says that
- * for the 56,57, 0xf7 is the one, but fails on the 76.
- * Alfredo (apena@vnet.ibm.com) says
- * 0xfd works on his machine. We'll try both of them. I figure it's
- * a good bet that only one could be valid at a time. This could
- * screw up though if one is used for something else on the other
- * machine.
- */
-
- for (i = 0; (which_scsi = mca_builtin_scsi_ports[i]) != 0; i++) {
- outb_p(which_scsi, MCA_MOTHERBOARD_SETUP_REG);
- if (mca_read_and_store_pos(pos))
- break;
- }
- if (which_scsi) {
- /* found a scsi card */
- mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC);
- if (unlikely(!mca_dev))
- goto out_unlock_nomem;
-
- for (j = 0; j < 8; j++)
- mca_dev->pos[j] = pos[j];
-
- mca_configure_adapter_status(mca_dev);
- /* fake POS and slot for integrated SCSI controller */
- mca_dev->pos_id = MCA_INTEGSCSI_POS;
- mca_dev->slot = MCA_INTEGSCSI;
- mca_dev->pos_register = which_scsi;
- mca_register_device(MCA_PRIMARY_BUS, mca_dev);
- }
-
- /* Turn off motherboard setup */
-
- outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG);
-
- /*
- * Now loop over MCA slots: put each adapter into setup mode, and
- * read its POS registers. Then put adapter setup off.
- */
-
- for (i = 0; i < MCA_MAX_SLOT_NR; i++) {
- outb_p(0x8|(i&0xf), MCA_ADAPTER_SETUP_REG);
- if (!mca_read_and_store_pos(pos))
- continue;
-
- mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC);
- if (unlikely(!mca_dev))
- goto out_unlock_nomem;
-
- for (j = 0; j < 8; j++)
- mca_dev->pos[j] = pos[j];
-
- mca_dev->driver_loaded = 0;
- mca_dev->slot = i;
- mca_dev->pos_register = 0;
- mca_configure_adapter_status(mca_dev);
- mca_register_device(MCA_PRIMARY_BUS, mca_dev);
- }
- outb_p(0, MCA_ADAPTER_SETUP_REG);
-
- /* Enable interrupts and return memory start */
- spin_unlock_irq(&mca_lock);
-
- for (i = 0; i < MCA_STANDARD_RESOURCES; i++)
- request_resource(&ioport_resource, mca_standard_resources + i);
-
- mca_do_proc_init();
-
- return 0;
-
- out_unlock_nomem:
- spin_unlock_irq(&mca_lock);
- out_nomem:
- printk(KERN_EMERG "Failed memory allocation in MCA setup!\n");
- return -ENOMEM;
-}
-
-subsys_initcall(mca_init);
-
-/*--------------------------------------------------------------------*/
-
-static __kprobes void
-mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag)
-{
- int slot = mca_dev->slot;
-
- if (slot == MCA_INTEGSCSI) {
- printk(KERN_CRIT "NMI: caused by MCA integrated SCSI adapter (%s)\n",
- mca_dev->name);
- } else if (slot == MCA_INTEGVIDEO) {
- printk(KERN_CRIT "NMI: caused by MCA integrated video adapter (%s)\n",
- mca_dev->name);
- } else if (slot == MCA_MOTHERBOARD) {
- printk(KERN_CRIT "NMI: caused by motherboard (%s)\n",
- mca_dev->name);
- }
-
- /* More info available in POS 6 and 7? */
-
- if (check_flag) {
- unsigned char pos6, pos7;
-
- pos6 = mca_device_read_pos(mca_dev, 6);
- pos7 = mca_device_read_pos(mca_dev, 7);
-
- printk(KERN_CRIT "NMI: POS 6 = 0x%x, POS 7 = 0x%x\n", pos6, pos7);
- }
-
-} /* mca_handle_nmi_slot */
-
-/*--------------------------------------------------------------------*/
-
-static int __kprobes mca_handle_nmi_callback(struct device *dev, void *data)
-{
- struct mca_device *mca_dev = to_mca_device(dev);
- unsigned char pos5;
-
- pos5 = mca_device_read_pos(mca_dev, 5);
-
- if (!(pos5 & 0x80)) {
- /*
- * Bit 7 of POS 5 is reset when this adapter has a hardware
- * error. Bit 7 it reset if there's error information
- * available in POS 6 and 7.
- */
- mca_handle_nmi_device(mca_dev, !(pos5 & 0x40));
- return 1;
- }
- return 0;
-}
-
-void __kprobes mca_handle_nmi(void)
-{
- /*
- * First try - scan the various adapters and see if a specific
- * adapter was responsible for the error.
- */
- bus_for_each_dev(&mca_bus_type, NULL, NULL, mca_handle_nmi_callback);
-}
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index c9bda6d6035..24b852b61be 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -298,20 +298,31 @@ static ssize_t reload_store(struct device *dev,
const char *buf, size_t size)
{
unsigned long val;
- int cpu = dev->id;
- int ret = 0;
- char *end;
+ int cpu;
+ ssize_t ret = 0, tmp_ret;
- val = simple_strtoul(buf, &end, 0);
- if (end == buf)
+ /* allow reload only from the BSP */
+ if (boot_cpu_data.cpu_index != dev->id)
return -EINVAL;
- if (val == 1) {
- get_online_cpus();
- if (cpu_online(cpu))
- ret = reload_for_cpu(cpu);
- put_online_cpus();
+ ret = kstrtoul(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ if (val != 1)
+ return size;
+
+ get_online_cpus();
+ for_each_online_cpu(cpu) {
+ tmp_ret = reload_for_cpu(cpu);
+ if (tmp_ret != 0)
+ pr_warn("Error reloading microcode on CPU %d\n", cpu);
+
+ /* save retval of the first encountered reload error */
+ if (!ret)
+ ret = tmp_ret;
}
+ put_online_cpus();
if (!ret)
ret = size;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index ca470e4c92d..d2b56489d70 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -27,7 +27,6 @@
#include <asm/proto.h>
#include <asm/bios_ebda.h>
#include <asm/e820.h>
-#include <asm/trampoline.h>
#include <asm/setup.h>
#include <asm/smp.h>
@@ -97,7 +96,7 @@ static void __init MP_bus_info(struct mpc_bus *m)
set_bit(m->busid, mp_bus_not_pci);
if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) {
-#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+#ifdef CONFIG_EISA
mp_bus_id_to_type[m->busid] = MP_BUS_ISA;
#endif
} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
@@ -105,12 +104,10 @@ static void __init MP_bus_info(struct mpc_bus *m)
x86_init.mpparse.mpc_oem_pci_bus(m);
clear_bit(m->busid, mp_bus_not_pci);
-#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+#ifdef CONFIG_EISA
mp_bus_id_to_type[m->busid] = MP_BUS_PCI;
} else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) {
mp_bus_id_to_type[m->busid] = MP_BUS_EISA;
- } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA) - 1) == 0) {
- mp_bus_id_to_type[m->busid] = MP_BUS_MCA;
#endif
} else
printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
@@ -368,9 +365,6 @@ static void __init construct_ioapic_table(int mpc_default_type)
case 3:
memcpy(bus.bustype, "EISA ", 6);
break;
- case 4:
- case 7:
- memcpy(bus.bustype, "MCA ", 6);
}
MP_bus_info(&bus);
if (mpc_default_type > 4) {
@@ -573,8 +567,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
struct mpf_intel *mpf;
unsigned long mem;
- apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
- bp, length);
+ apic_printk(APIC_VERBOSE, "Scan for SMP in [mem %#010lx-%#010lx]\n",
+ base, base + length - 1);
BUILD_BUG_ON(sizeof(*mpf) != 16);
while (length > 0) {
@@ -589,8 +583,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
#endif
mpf_found = mpf;
- printk(KERN_INFO "found SMP MP-table at [%p] %llx\n",
- mpf, (u64)virt_to_phys(mpf));
+ printk(KERN_INFO "found SMP MP-table at [mem %#010llx-%#010llx] mapped at [%p]\n",
+ (unsigned long long) virt_to_phys(mpf),
+ (unsigned long long) virt_to_phys(mpf) +
+ sizeof(*mpf) - 1, mpf);
mem = virt_to_phys(mpf);
memblock_reserve(mem, sizeof(*mpf));
@@ -623,7 +619,7 @@ void __init default_find_smp_config(void)
return;
/*
* If it is an SMP machine we should know now, unless the
- * configuration is in an EISA/MCA bus machine with an
+ * configuration is in an EISA bus machine with an
* extended bios data area.
*
* there is a real-mode segmented pointer pointing to the
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 32856fa4384..a0b2f84457b 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -19,8 +19,6 @@
#include <linux/slab.h>
#include <linux/export.h>
-#include <linux/mca.h>
-
#if defined(CONFIG_EDAC)
#include <linux/edac.h>
#endif
@@ -31,14 +29,6 @@
#include <asm/nmi.h>
#include <asm/x86_init.h>
-#define NMI_MAX_NAMELEN 16
-struct nmiaction {
- struct list_head list;
- nmi_handler_t handler;
- unsigned int flags;
- char *name;
-};
-
struct nmi_desc {
spinlock_t lock;
struct list_head head;
@@ -54,6 +44,14 @@ static struct nmi_desc nmi_desc[NMI_MAX] =
.lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock),
.head = LIST_HEAD_INIT(nmi_desc[1].head),
},
+ {
+ .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[2].lock),
+ .head = LIST_HEAD_INIT(nmi_desc[2].head),
+ },
+ {
+ .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[3].lock),
+ .head = LIST_HEAD_INIT(nmi_desc[3].head),
+ },
};
@@ -84,7 +82,7 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
#define nmi_to_desc(type) (&nmi_desc[type])
-static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
+static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
{
struct nmi_desc *desc = nmi_to_desc(type);
struct nmiaction *a;
@@ -107,11 +105,14 @@ static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs,
return handled;
}
-static int __setup_nmi(unsigned int type, struct nmiaction *action)
+int __register_nmi_handler(unsigned int type, struct nmiaction *action)
{
struct nmi_desc *desc = nmi_to_desc(type);
unsigned long flags;
+ if (!action->handler)
+ return -EINVAL;
+
spin_lock_irqsave(&desc->lock, flags);
/*
@@ -120,6 +121,8 @@ static int __setup_nmi(unsigned int type, struct nmiaction *action)
* to manage expectations
*/
WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head));
+ WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head));
+ WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head));
/*
* some handlers need to be executed first otherwise a fake
@@ -133,8 +136,9 @@ static int __setup_nmi(unsigned int type, struct nmiaction *action)
spin_unlock_irqrestore(&desc->lock, flags);
return 0;
}
+EXPORT_SYMBOL(__register_nmi_handler);
-static struct nmiaction *__free_nmi(unsigned int type, const char *name)
+void unregister_nmi_handler(unsigned int type, const char *name)
{
struct nmi_desc *desc = nmi_to_desc(type);
struct nmiaction *n;
@@ -157,61 +161,16 @@ static struct nmiaction *__free_nmi(unsigned int type, const char *name)
spin_unlock_irqrestore(&desc->lock, flags);
synchronize_rcu();
- return (n);
}
-
-int register_nmi_handler(unsigned int type, nmi_handler_t handler,
- unsigned long nmiflags, const char *devname)
-{
- struct nmiaction *action;
- int retval = -ENOMEM;
-
- if (!handler)
- return -EINVAL;
-
- action = kzalloc(sizeof(struct nmiaction), GFP_KERNEL);
- if (!action)
- goto fail_action;
-
- action->handler = handler;
- action->flags = nmiflags;
- action->name = kstrndup(devname, NMI_MAX_NAMELEN, GFP_KERNEL);
- if (!action->name)
- goto fail_action_name;
-
- retval = __setup_nmi(type, action);
-
- if (retval)
- goto fail_setup_nmi;
-
- return retval;
-
-fail_setup_nmi:
- kfree(action->name);
-fail_action_name:
- kfree(action);
-fail_action:
-
- return retval;
-}
-EXPORT_SYMBOL_GPL(register_nmi_handler);
-
-void unregister_nmi_handler(unsigned int type, const char *name)
-{
- struct nmiaction *a;
-
- a = __free_nmi(type, name);
- if (a) {
- kfree(a->name);
- kfree(a);
- }
-}
-
EXPORT_SYMBOL_GPL(unregister_nmi_handler);
-static notrace __kprobes void
+static __kprobes void
pci_serr_error(unsigned char reason, struct pt_regs *regs)
{
+ /* check to see if anyone registered against these types of errors */
+ if (nmi_handle(NMI_SERR, regs, false))
+ return;
+
pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
reason, smp_processor_id());
@@ -236,15 +195,19 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs)
outb(reason, NMI_REASON_PORT);
}
-static notrace __kprobes void
+static __kprobes void
io_check_error(unsigned char reason, struct pt_regs *regs)
{
unsigned long i;
+ /* check to see if anyone registered against these types of errors */
+ if (nmi_handle(NMI_IO_CHECK, regs, false))
+ return;
+
pr_emerg(
"NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
reason, smp_processor_id());
- show_registers(regs);
+ show_regs(regs);
if (panic_on_io_nmi)
panic("NMI IOCK error: Not continuing");
@@ -263,7 +226,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
outb(reason, NMI_REASON_PORT);
}
-static notrace __kprobes void
+static __kprobes void
unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
{
int handled;
@@ -282,16 +245,6 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
__this_cpu_add(nmi_stats.unknown, 1);
-#ifdef CONFIG_MCA
- /*
- * Might actually be able to figure out what the guilty party
- * is:
- */
- if (MCA_bus) {
- mca_handle_nmi();
- return;
- }
-#endif
pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
reason, smp_processor_id());
@@ -305,7 +258,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
static DEFINE_PER_CPU(bool, swallow_nmi);
static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
-static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
+static __kprobes void default_do_nmi(struct pt_regs *regs)
{
unsigned char reason = 0;
int handled;
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c
index 2c39dcd510f..149b8d9c6ad 100644
--- a/arch/x86/kernel/nmi_selftest.c
+++ b/arch/x86/kernel/nmi_selftest.c
@@ -13,6 +13,7 @@
#include <linux/cpumask.h>
#include <linux/delay.h>
#include <linux/init.h>
+#include <linux/percpu.h>
#include <asm/apic.h>
#include <asm/nmi.h>
@@ -41,7 +42,7 @@ static int __init nmi_unk_cb(unsigned int val, struct pt_regs *regs)
static void __init init_nmi_testsuite(void)
{
/* trap all the unknown NMIs we may generate */
- register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk");
+ register_nmi_handler_initonly(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk");
}
static void __init cleanup_nmi_testsuite(void)
@@ -63,7 +64,7 @@ static void __init test_nmi_ipi(struct cpumask *mask)
{
unsigned long timeout;
- if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback,
+ if (register_nmi_handler_initonly(NMI_LOCAL, test_nmi_ipi_callback,
NMI_FLAG_FIRST, "nmi_selftest")) {
nmi_fail = FAILURE;
return;
@@ -117,15 +118,15 @@ static void __init dotest(void (*testcase_fn)(void), int expected)
unexpected_testcase_failures++;
if (nmi_fail == FAILURE)
- printk("FAILED |");
+ printk(KERN_CONT "FAILED |");
else if (nmi_fail == TIMEOUT)
- printk("TIMEOUT|");
+ printk(KERN_CONT "TIMEOUT|");
else
- printk("ERROR |");
+ printk(KERN_CONT "ERROR |");
dump_stack();
} else {
testcase_successes++;
- printk(" ok |");
+ printk(KERN_CONT " ok |");
}
testcase_total++;
@@ -150,10 +151,10 @@ void __init nmi_selftest(void)
print_testname("remote IPI");
dotest(remote_ipi, SUCCESS);
- printk("\n");
+ printk(KERN_CONT "\n");
print_testname("local IPI");
dotest(local_ipi, SUCCESS);
- printk("\n");
+ printk(KERN_CONT "\n");
cleanup_nmi_testsuite();
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index ab137605e69..9ce885996fd 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -241,16 +241,16 @@ static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LA
static inline void enter_lazy(enum paravirt_lazy_mode mode)
{
- BUG_ON(percpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
+ BUG_ON(this_cpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
- percpu_write(paravirt_lazy_mode, mode);
+ this_cpu_write(paravirt_lazy_mode, mode);
}
static void leave_lazy(enum paravirt_lazy_mode mode)
{
- BUG_ON(percpu_read(paravirt_lazy_mode) != mode);
+ BUG_ON(this_cpu_read(paravirt_lazy_mode) != mode);
- percpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
+ this_cpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
}
void paravirt_enter_lazy_mmu(void)
@@ -267,7 +267,7 @@ void paravirt_start_context_switch(struct task_struct *prev)
{
BUG_ON(preemptible());
- if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) {
+ if (this_cpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) {
arch_leave_lazy_mmu_mode();
set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES);
}
@@ -289,7 +289,7 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
if (in_interrupt())
return PARAVIRT_LAZY_NONE;
- return percpu_read(paravirt_lazy_mode);
+ return this_cpu_read(paravirt_lazy_mode);
}
void arch_flush_lazy_mmu_mode(void)
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index d0b2fb9ccbb..b72838bae64 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1480,8 +1480,9 @@ cleanup:
static int __init calgary_parse_options(char *p)
{
unsigned int bridge;
+ unsigned long val;
size_t len;
- char* endp;
+ ssize_t ret;
while (*p) {
if (!strncmp(p, "64k", 3))
@@ -1512,10 +1513,11 @@ static int __init calgary_parse_options(char *p)
++p;
if (*p == '\0')
break;
- bridge = simple_strtoul(p, &endp, 0);
- if (p == endp)
+ ret = kstrtoul(p, 0, &val);
+ if (ret)
break;
+ bridge = val;
if (bridge < MAX_PHB_BUS_NUM) {
printk(KERN_INFO "Calgary: disabling "
"translation for PHB %#x\n", bridge);
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 3003250ac51..c0f420f76cd 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -101,13 +101,18 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
{
unsigned long dma_mask;
struct page *page;
+ unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
dma_addr_t addr;
dma_mask = dma_alloc_coherent_mask(dev, flag);
flag |= __GFP_ZERO;
again:
- page = alloc_pages_node(dev_to_node(dev), flag, get_order(size));
+ page = NULL;
+ if (!(flag & GFP_ATOMIC))
+ page = dma_alloc_from_contiguous(dev, count, get_order(size));
+ if (!page)
+ page = alloc_pages_node(dev_to_node(dev), flag, get_order(size));
if (!page)
return NULL;
@@ -127,6 +132,16 @@ again:
return page_address(page);
}
+void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr,
+ dma_addr_t dma_addr, struct dma_attrs *attrs)
+{
+ unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ struct page *page = virt_to_page(vaddr);
+
+ if (!dma_release_from_contiguous(dev, page, count))
+ free_pages((unsigned long)vaddr, get_order(size));
+}
+
/*
* See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel
* parameter documentation.
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index f96050685b4..871be4a84c7 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -74,12 +74,6 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
return nents;
}
-static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_addr, struct dma_attrs *attrs)
-{
- free_pages((unsigned long)vaddr, get_order(size));
-}
-
static void nommu_sync_single_for_device(struct device *dev,
dma_addr_t addr, size_t size,
enum dma_data_direction dir)
@@ -97,7 +91,7 @@ static void nommu_sync_sg_for_device(struct device *dev,
struct dma_map_ops nommu_dma_ops = {
.alloc = dma_generic_alloc_coherent,
- .free = nommu_free_coherent,
+ .free = dma_generic_free_coherent,
.map_sg = nommu_map_sg,
.map_page = nommu_map_page,
.sync_single_for_device = nommu_sync_single_for_device,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 1d92a5ab6e8..735279e54e5 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -27,6 +27,15 @@
#include <asm/debugreg.h>
#include <asm/nmi.h>
+/*
+ * per-CPU TSS segments. Threads are completely 'soft' on Linux,
+ * no more per-task TSS's. The TSS size is kept cacheline-aligned
+ * so they are allowed to end up in the .data..cacheline_aligned
+ * section. Since TSS's are completely CPU-local, we want them
+ * on exact cacheline boundaries, to eliminate cacheline ping-pong.
+ */
+DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
+
#ifdef CONFIG_X86_64
static DEFINE_PER_CPU(unsigned char, is_idle);
static ATOMIC_NOTIFIER_HEAD(idle_notifier);
@@ -47,10 +56,16 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
struct kmem_cache *task_xstate_cachep;
EXPORT_SYMBOL_GPL(task_xstate_cachep);
+/*
+ * this gets called so that we can store lazy state into memory and copy the
+ * current task into the new thread.
+ */
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
int ret;
+ unlazy_fpu(src);
+
*dst = *src;
if (fpu_allocated(&src->thread.fpu)) {
memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu));
@@ -67,10 +82,9 @@ void free_thread_xstate(struct task_struct *tsk)
fpu_free(&tsk->thread.fpu);
}
-void free_thread_info(struct thread_info *ti)
+void arch_release_task_struct(struct task_struct *tsk)
{
- free_thread_xstate(ti->task);
- free_pages((unsigned long)ti, THREAD_ORDER);
+ free_thread_xstate(tsk);
}
void arch_task_cache_init(void)
@@ -81,6 +95,16 @@ void arch_task_cache_init(void)
SLAB_PANIC | SLAB_NOTRACK, NULL);
}
+static inline void drop_fpu(struct task_struct *tsk)
+{
+ /*
+ * Forget coprocessor state..
+ */
+ tsk->fpu_counter = 0;
+ clear_fpu(tsk);
+ clear_used_math();
+}
+
/*
* Free current thread data structures etc..
*/
@@ -103,12 +127,8 @@ void exit_thread(void)
put_cpu();
kfree(bp);
}
-}
-void show_regs(struct pt_regs *regs)
-{
- show_registers(regs);
- show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), 0);
+ drop_fpu(me);
}
void show_regs_common(void)
@@ -143,12 +163,7 @@ void flush_thread(void)
flush_ptrace_hw_breakpoint(tsk);
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
- /*
- * Forget coprocessor state..
- */
- tsk->fpu_counter = 0;
- clear_fpu(tsk);
- clear_used_math();
+ drop_fpu(tsk);
}
static void hard_disable_TSC(void)
@@ -377,7 +392,7 @@ static inline void play_dead(void)
#ifdef CONFIG_X86_64
void enter_idle(void)
{
- percpu_write(is_idle, 1);
+ this_cpu_write(is_idle, 1);
atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
}
@@ -516,26 +531,6 @@ void stop_this_cpu(void *dummy)
}
}
-static void do_nothing(void *unused)
-{
-}
-
-/*
- * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
- * pm_idle and update to new pm_idle value. Required while changing pm_idle
- * handler on SMP systems.
- *
- * Caller must have changed pm_idle to the new value before the call. Old
- * pm_idle value will not be used by any CPU after the return of this function.
- */
-void cpu_idle_wait(void)
-{
- smp_mb();
- /* kick all the CPUs so that they exit out of pm_idle */
- smp_call_function(do_nothing, NULL, 1);
-}
-EXPORT_SYMBOL_GPL(cpu_idle_wait);
-
/* Default MONITOR/MWAIT with no hints, used for default C1 state */
static void mwait_idle(void)
{
@@ -594,9 +589,17 @@ int mwait_usable(const struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
+ /* Use mwait if idle=mwait boot option is given */
if (boot_option_idle_override == IDLE_FORCE_MWAIT)
return 1;
+ /*
+ * Any idle= boot option other than idle=mwait means that we must not
+ * use mwait. Eg: idle=halt or idle=poll or idle=nomwait
+ */
+ if (boot_option_idle_override != IDLE_NO_OVERRIDE)
+ return 0;
+
if (c->cpuid_level < MWAIT_INFO)
return 0;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index ae6847303e2..516fa186121 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -126,15 +126,6 @@ void release_thread(struct task_struct *dead_task)
release_vm86_irqs(dead_task);
}
-/*
- * This gets called before we allocate a new thread and copy
- * the current task into it.
- */
-void prepare_to_copy(struct task_struct *tsk)
-{
- unlazy_fpu(tsk);
-}
-
int copy_thread(unsigned long clone_flags, unsigned long sp,
unsigned long unused,
struct task_struct *p, struct pt_regs *regs)
@@ -302,7 +293,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
switch_fpu_finish(next_p, fpu);
- percpu_write(current_task, next_p);
+ this_cpu_write(current_task, next_p);
return prev_p;
}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 43d8b48b23e..61cdf7fdf09 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -145,15 +145,6 @@ static inline u32 read_32bit_tls(struct task_struct *t, int tls)
return get_desc_base(&t->thread.tls_array[tls]);
}
-/*
- * This gets called before we allocate a new thread and copy
- * the current task into it.
- */
-void prepare_to_copy(struct task_struct *tsk)
-{
- unlazy_fpu(tsk);
-}
-
int copy_thread(unsigned long clone_flags, unsigned long sp,
unsigned long unused,
struct task_struct *p, struct pt_regs *regs)
@@ -237,7 +228,7 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
current->thread.usersp = new_sp;
regs->ip = new_ip;
regs->sp = new_sp;
- percpu_write(old_rsp, new_sp);
+ this_cpu_write(old_rsp, new_sp);
regs->cs = _cs;
regs->ss = _ss;
regs->flags = X86_EFLAGS_IF;
@@ -359,11 +350,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/*
* Switch the PDA and FPU contexts.
*/
- prev->usersp = percpu_read(old_rsp);
- percpu_write(old_rsp, next->usersp);
- percpu_write(current_task, next_p);
+ prev->usersp = this_cpu_read(old_rsp);
+ this_cpu_write(old_rsp, next->usersp);
+ this_cpu_write(current_task, next_p);
- percpu_write(kernel_stack,
+ this_cpu_write(kernel_stack,
(unsigned long)task_stack_page(next_p) +
THREAD_SIZE - KERNEL_STACK_OFFSET);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 4609190683f..c4c6a5c2bf0 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1415,12 +1415,6 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
#endif
}
-/*
- * This is declared in linux/regset.h and defined in machine-dependent
- * code. We put the export here to ensure no machine forgets it.
- */
-EXPORT_SYMBOL_GPL(task_user_regset_view);
-
static void fill_sigtrap_info(struct task_struct *tsk,
struct pt_regs *regs,
int error_code, int si_code,
@@ -1480,7 +1474,11 @@ long syscall_trace_enter(struct pt_regs *regs)
regs->flags |= X86_EFLAGS_TF;
/* do the secure computing check first */
- secure_computing(regs->orig_ax);
+ if (secure_computing(regs->orig_ax)) {
+ /* seccomp failures shouldn't expose any additional code. */
+ ret = -1L;
+ goto out;
+ }
if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
ret = -1L;
@@ -1505,6 +1503,7 @@ long syscall_trace_enter(struct pt_regs *regs)
regs->dx, regs->r10);
#endif
+out:
return ret ?: regs->orig_ax;
}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index d840e69a853..5de92f1abd7 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -24,6 +24,7 @@
#ifdef CONFIG_X86_32
# include <linux/ctype.h>
# include <linux/mc146818rtc.h>
+# include <asm/realmode.h>
#else
# include <asm/x86_init.h>
#endif
@@ -39,7 +40,8 @@ static int reboot_mode;
enum reboot_type reboot_type = BOOT_ACPI;
int reboot_force;
-/* This variable is used privately to keep track of whether or not
+/*
+ * This variable is used privately to keep track of whether or not
* reboot_type is still set to its default value (i.e., reboot= hasn't
* been set on the command line). This is needed so that we can
* suppress DMI scanning for reboot quirks. Without it, it's
@@ -51,7 +53,8 @@ static int reboot_default = 1;
static int reboot_cpu = -1;
#endif
-/* This is set if we need to go through the 'emergency' path.
+/*
+ * This is set if we need to go through the 'emergency' path.
* When machine_emergency_restart() is called, we may be on
* an inconsistent state and won't be able to do a clean cleanup
*/
@@ -60,22 +63,24 @@ static int reboot_emergency;
/* This is set by the PCI code if either type 1 or type 2 PCI is detected */
bool port_cf9_safe = false;
-/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci]
- warm Don't set the cold reboot flag
- cold Set the cold reboot flag
- bios Reboot by jumping through the BIOS (only for X86_32)
- smp Reboot by executing reset on BSP or other CPU (only for X86_32)
- triple Force a triple fault (init)
- kbd Use the keyboard controller. cold reset (default)
- acpi Use the RESET_REG in the FADT
- efi Use efi reset_system runtime service
- pci Use the so-called "PCI reset register", CF9
- force Avoid anything that could hang.
+/*
+ * reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci]
+ * warm Don't set the cold reboot flag
+ * cold Set the cold reboot flag
+ * bios Reboot by jumping through the BIOS (only for X86_32)
+ * smp Reboot by executing reset on BSP or other CPU (only for X86_32)
+ * triple Force a triple fault (init)
+ * kbd Use the keyboard controller. cold reset (default)
+ * acpi Use the RESET_REG in the FADT
+ * efi Use efi reset_system runtime service
+ * pci Use the so-called "PCI reset register", CF9
+ * force Avoid anything that could hang.
*/
static int __init reboot_setup(char *str)
{
for (;;) {
- /* Having anything passed on the command line via
+ /*
+ * Having anything passed on the command line via
* reboot= will cause us to disable DMI checking
* below.
*/
@@ -98,9 +103,11 @@ static int __init reboot_setup(char *str)
if (isdigit(*(str+2)))
reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0');
}
- /* we will leave sorting out the final value
- when we are ready to reboot, since we might not
- have detected BSP APIC ID or smp_num_cpu */
+ /*
+ * We will leave sorting out the final value
+ * when we are ready to reboot, since we might not
+ * have detected BSP APIC ID or smp_num_cpu
+ */
break;
#endif /* CONFIG_SMP */
@@ -150,6 +157,62 @@ static int __init set_bios_reboot(const struct dmi_system_id *d)
return 0;
}
+void machine_real_restart(unsigned int type)
+{
+ void (*restart_lowmem)(unsigned int) = (void (*)(unsigned int))
+ real_mode_header->machine_real_restart_asm;
+
+ local_irq_disable();
+
+ /*
+ * Write zero to CMOS register number 0x0f, which the BIOS POST
+ * routine will recognize as telling it to do a proper reboot. (Well
+ * that's what this book in front of me says -- it may only apply to
+ * the Phoenix BIOS though, it's not clear). At the same time,
+ * disable NMIs by setting the top bit in the CMOS address register,
+ * as we're about to do peculiar things to the CPU. I'm not sure if
+ * `outb_p' is needed instead of just `outb'. Use it to be on the
+ * safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.)
+ */
+ spin_lock(&rtc_lock);
+ CMOS_WRITE(0x00, 0x8f);
+ spin_unlock(&rtc_lock);
+
+ /*
+ * Switch back to the initial page table.
+ */
+ load_cr3(initial_page_table);
+
+ /*
+ * Write 0x1234 to absolute memory location 0x472. The BIOS reads
+ * this on booting to tell it to "Bypass memory test (also warm
+ * boot)". This seems like a fairly standard thing that gets set by
+ * REBOOT.COM programs, and the previous reset routine did this
+ * too. */
+ *((unsigned short *)0x472) = reboot_mode;
+
+ /* Jump to the identity-mapped low memory code */
+ restart_lowmem(type);
+}
+#ifdef CONFIG_APM_MODULE
+EXPORT_SYMBOL(machine_real_restart);
+#endif
+
+#endif /* CONFIG_X86_32 */
+
+/*
+ * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot
+ */
+static int __init set_pci_reboot(const struct dmi_system_id *d)
+{
+ if (reboot_type != BOOT_CF9) {
+ reboot_type = BOOT_CF9;
+ printk(KERN_INFO "%s series board detected. "
+ "Selecting PCI-method for reboots.\n", d->ident);
+ }
+ return 0;
+}
+
static int __init set_kbd_reboot(const struct dmi_system_id *d)
{
if (reboot_type != BOOT_KBD) {
@@ -159,7 +222,12 @@ static int __init set_kbd_reboot(const struct dmi_system_id *d)
return 0;
}
+/*
+ * This is a single dmi_table handling all reboot quirks. Note that
+ * REBOOT_BIOS is only available for 32bit
+ */
static struct dmi_system_id __initdata reboot_dmi_table[] = {
+#ifdef CONFIG_X86_32
{ /* Handle problems with rebooting on Dell E520's */
.callback = set_bios_reboot,
.ident = "Dell E520",
@@ -184,7 +252,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"),
},
},
- { /* Handle problems with rebooting on Dell Optiplex 745's SFF*/
+ { /* Handle problems with rebooting on Dell Optiplex 745's SFF */
.callback = set_bios_reboot,
.ident = "Dell OptiPlex 745",
.matches = {
@@ -192,7 +260,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
},
},
- { /* Handle problems with rebooting on Dell Optiplex 745's DFF*/
+ { /* Handle problems with rebooting on Dell Optiplex 745's DFF */
.callback = set_bios_reboot,
.ident = "Dell OptiPlex 745",
.matches = {
@@ -201,7 +269,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_BOARD_NAME, "0MM599"),
},
},
- { /* Handle problems with rebooting on Dell Optiplex 745 with 0KW626 */
+ { /* Handle problems with rebooting on Dell Optiplex 745 with 0KW626 */
.callback = set_bios_reboot,
.ident = "Dell OptiPlex 745",
.matches = {
@@ -210,7 +278,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_BOARD_NAME, "0KW626"),
},
},
- { /* Handle problems with rebooting on Dell Optiplex 330 with 0KP561 */
+ { /* Handle problems with rebooting on Dell Optiplex 330 with 0KP561 */
.callback = set_bios_reboot,
.ident = "Dell OptiPlex 330",
.matches = {
@@ -219,7 +287,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_BOARD_NAME, "0KP561"),
},
},
- { /* Handle problems with rebooting on Dell Optiplex 360 with 0T656F */
+ { /* Handle problems with rebooting on Dell Optiplex 360 with 0T656F */
.callback = set_bios_reboot,
.ident = "Dell OptiPlex 360",
.matches = {
@@ -228,7 +296,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_BOARD_NAME, "0T656F"),
},
},
- { /* Handle problems with rebooting on Dell OptiPlex 760 with 0G919G*/
+ { /* Handle problems with rebooting on Dell OptiPlex 760 with 0G919G */
.callback = set_bios_reboot,
.ident = "Dell OptiPlex 760",
.matches = {
@@ -301,7 +369,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"),
},
},
- { /* Handle problems with rebooting on ASUS P4S800 */
+ { /* Handle problems with rebooting on ASUS P4S800 */
.callback = set_bios_reboot,
.ident = "ASUS P4S800",
.matches = {
@@ -309,7 +377,9 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
},
},
- { /* Handle reboot issue on Acer Aspire one */
+#endif /* CONFIG_X86_32 */
+
+ { /* Handle reboot issue on Acer Aspire one */
.callback = set_kbd_reboot,
.ident = "Acer Aspire One A110",
.matches = {
@@ -317,96 +387,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"),
},
},
- { }
-};
-
-static int __init reboot_init(void)
-{
- /* Only do the DMI check if reboot_type hasn't been overridden
- * on the command line
- */
- if (reboot_default) {
- dmi_check_system(reboot_dmi_table);
- }
- return 0;
-}
-core_initcall(reboot_init);
-
-extern const unsigned char machine_real_restart_asm[];
-extern const u64 machine_real_restart_gdt[3];
-
-void machine_real_restart(unsigned int type)
-{
- void *restart_va;
- unsigned long restart_pa;
- void (*restart_lowmem)(unsigned int);
- u64 *lowmem_gdt;
-
- local_irq_disable();
-
- /* Write zero to CMOS register number 0x0f, which the BIOS POST
- routine will recognize as telling it to do a proper reboot. (Well
- that's what this book in front of me says -- it may only apply to
- the Phoenix BIOS though, it's not clear). At the same time,
- disable NMIs by setting the top bit in the CMOS address register,
- as we're about to do peculiar things to the CPU. I'm not sure if
- `outb_p' is needed instead of just `outb'. Use it to be on the
- safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.)
- */
- spin_lock(&rtc_lock);
- CMOS_WRITE(0x00, 0x8f);
- spin_unlock(&rtc_lock);
-
- /*
- * Switch back to the initial page table.
- */
- load_cr3(initial_page_table);
-
- /* Write 0x1234 to absolute memory location 0x472. The BIOS reads
- this on booting to tell it to "Bypass memory test (also warm
- boot)". This seems like a fairly standard thing that gets set by
- REBOOT.COM programs, and the previous reset routine did this
- too. */
- *((unsigned short *)0x472) = reboot_mode;
-
- /* Patch the GDT in the low memory trampoline */
- lowmem_gdt = TRAMPOLINE_SYM(machine_real_restart_gdt);
-
- restart_va = TRAMPOLINE_SYM(machine_real_restart_asm);
- restart_pa = virt_to_phys(restart_va);
- restart_lowmem = (void (*)(unsigned int))restart_pa;
-
- /* GDT[0]: GDT self-pointer */
- lowmem_gdt[0] =
- (u64)(sizeof(machine_real_restart_gdt) - 1) +
- ((u64)virt_to_phys(lowmem_gdt) << 16);
- /* GDT[1]: 64K real mode code segment */
- lowmem_gdt[1] =
- GDT_ENTRY(0x009b, restart_pa, 0xffff);
-
- /* Jump to the identity-mapped low memory code */
- restart_lowmem(type);
-}
-#ifdef CONFIG_APM_MODULE
-EXPORT_SYMBOL(machine_real_restart);
-#endif
-
-#endif /* CONFIG_X86_32 */
-
-/*
- * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot
- */
-static int __init set_pci_reboot(const struct dmi_system_id *d)
-{
- if (reboot_type != BOOT_CF9) {
- reboot_type = BOOT_CF9;
- printk(KERN_INFO "%s series board detected. "
- "Selecting PCI-method for reboots.\n", d->ident);
- }
- return 0;
-}
-
-static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
{ /* Handle problems with rebooting on Apple MacBook5 */
.callback = set_pci_reboot,
.ident = "Apple MacBook5",
@@ -471,20 +451,28 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"),
},
},
+ { /* Handle problems with rebooting on the Precision M6600. */
+ .callback = set_pci_reboot,
+ .ident = "Dell OptiPlex 990",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"),
+ },
+ },
{ }
};
-static int __init pci_reboot_init(void)
+static int __init reboot_init(void)
{
- /* Only do the DMI check if reboot_type hasn't been overridden
+ /*
+ * Only do the DMI check if reboot_type hasn't been overridden
* on the command line
*/
- if (reboot_default) {
- dmi_check_system(pci_reboot_dmi_table);
- }
+ if (reboot_default)
+ dmi_check_system(reboot_dmi_table);
return 0;
}
-core_initcall(pci_reboot_init);
+core_initcall(reboot_init);
static inline void kb_wait(void)
{
@@ -502,14 +490,14 @@ static void vmxoff_nmi(int cpu, struct pt_regs *regs)
cpu_emergency_vmxoff();
}
-/* Use NMIs as IPIs to tell all CPUs to disable virtualization
- */
+/* Use NMIs as IPIs to tell all CPUs to disable virtualization */
static void emergency_vmx_disable_all(void)
{
/* Just make sure we won't change CPUs while doing this */
local_irq_disable();
- /* We need to disable VMX on all CPUs before rebooting, otherwise
+ /*
+ * We need to disable VMX on all CPUs before rebooting, otherwise
* we risk hanging up the machine, because the CPU ignore INIT
* signals when VMX is enabled.
*
@@ -528,8 +516,7 @@ static void emergency_vmx_disable_all(void)
* is still enabling VMX.
*/
if (cpu_has_vmx() && cpu_vmx_enabled()) {
- /* Disable VMX on this CPU.
- */
+ /* Disable VMX on this CPU. */
cpu_vmxoff();
/* Halt and disable VMX on the other CPUs */
@@ -574,12 +561,12 @@ static void native_machine_emergency_restart(void)
/* Could also try the reset bit in the Hammer NB */
switch (reboot_type) {
case BOOT_KBD:
- mach_reboot_fixups(); /* for board specific fixups */
+ mach_reboot_fixups(); /* For board specific fixups */
for (i = 0; i < 10; i++) {
kb_wait();
udelay(50);
- outb(0xfe, 0x64); /* pulse reset low */
+ outb(0xfe, 0x64); /* Pulse reset low */
udelay(50);
}
if (attempt == 0 && orig_reboot_type == BOOT_ACPI) {
@@ -621,7 +608,7 @@ static void native_machine_emergency_restart(void)
case BOOT_CF9:
port_cf9_safe = true;
- /* fall through */
+ /* Fall through */
case BOOT_CF9_COND:
if (port_cf9_safe) {
@@ -659,9 +646,12 @@ void native_machine_shutdown(void)
/* Make certain I only run on the appropriate processor */
set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id));
- /* O.K Now that I'm on the appropriate processor,
- * stop all of the others.
+ /*
+ * O.K Now that I'm on the appropriate processor, stop all of the
+ * others. Also disable the local irq to not receive the per-cpu
+ * timer interrupt which may trigger scheduler's load balance.
*/
+ local_irq_disable();
stop_other_cpus();
#endif
@@ -697,12 +687,11 @@ static void native_machine_restart(char *__unused)
static void native_machine_halt(void)
{
- /* stop other cpus and apics */
+ /* Stop other cpus and apics */
machine_shutdown();
tboot_shutdown(TB_SHUTDOWN_HALT);
- /* stop this cpu */
stop_this_cpu(NULL);
}
@@ -713,7 +702,7 @@ static void native_machine_power_off(void)
machine_shutdown();
pm_power_off();
}
- /* a fallback in case there is no PM info available */
+ /* A fallback in case there is no PM info available */
tboot_shutdown(TB_SHUTDOWN_HALT);
}
@@ -775,7 +764,8 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
cpu = raw_smp_processor_id();
- /* Don't do anything if this handler is invoked on crashing cpu.
+ /*
+ * Don't do anything if this handler is invoked on crashing cpu.
* Otherwise, system will completely hang. Crashing cpu can get
* an NMI if system was initially booted with nmi_watchdog parameter.
*/
@@ -799,7 +789,8 @@ static void smp_send_nmi_allbutself(void)
apic->send_IPI_allbutself(NMI_VECTOR);
}
-/* Halt all other CPUs, calling the specified function on each of them
+/*
+ * Halt all other CPUs, calling the specified function on each of them
*
* This function can be used to halt all other CPUs on crash
* or emergency reboot time. The function passed as parameter
@@ -810,7 +801,7 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
unsigned long msecs;
local_irq_disable();
- /* Make a note of crashing cpu. Will be used in NMI callback.*/
+ /* Make a note of crashing cpu. Will be used in NMI callback. */
crashing_cpu = safe_smp_processor_id();
shootdown_callback = callback;
@@ -819,8 +810,9 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
/* Would it be better to replace the trap vector here? */
if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback,
NMI_FLAG_FIRST, "crash"))
- return; /* return what? */
- /* Ensure the new callback function is set before sending
+ return; /* Return what? */
+ /*
+ * Ensure the new callback function is set before sending
* out the NMI
*/
wmb();
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 1a290156205..16be6dc14db 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -34,7 +34,6 @@
#include <linux/memblock.h>
#include <linux/seq_file.h>
#include <linux/console.h>
-#include <linux/mca.h>
#include <linux/root_dev.h>
#include <linux/highmem.h>
#include <linux/module.h>
@@ -50,6 +49,7 @@
#include <asm/pci-direct.h>
#include <linux/init_ohci1394_dma.h>
#include <linux/kvm_para.h>
+#include <linux/dma-contiguous.h>
#include <linux/errno.h>
#include <linux/kernel.h>
@@ -73,7 +73,7 @@
#include <asm/mtrr.h>
#include <asm/apic.h>
-#include <asm/trampoline.h>
+#include <asm/realmode.h>
#include <asm/e820.h>
#include <asm/mpspec.h>
#include <asm/setup.h>
@@ -179,12 +179,6 @@ struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1};
/* common cpu data for all cpus */
struct cpuinfo_x86 boot_cpu_data __read_mostly = {0, 0, 0, 0, -1, 1, 0, 0, -1};
EXPORT_SYMBOL(boot_cpu_data);
-static void set_mca_bus(int x)
-{
-#ifdef CONFIG_MCA
- MCA_bus = x;
-#endif
-}
unsigned int def_to_bigsmp;
@@ -340,8 +334,8 @@ static void __init relocate_initrd(void)
memblock_reserve(ramdisk_here, area_size);
initrd_start = ramdisk_here + PAGE_OFFSET;
initrd_end = initrd_start + ramdisk_size;
- printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
- ramdisk_here, ramdisk_here + ramdisk_size);
+ printk(KERN_INFO "Allocated new RAMDISK: [mem %#010llx-%#010llx]\n",
+ ramdisk_here, ramdisk_here + ramdisk_size - 1);
q = (char *)initrd_start;
@@ -372,8 +366,8 @@ static void __init relocate_initrd(void)
/* high pages is not converted by early_res_to_bootmem */
ramdisk_image = boot_params.hdr.ramdisk_image;
ramdisk_size = boot_params.hdr.ramdisk_size;
- printk(KERN_INFO "Move RAMDISK from %016llx - %016llx to"
- " %08llx - %08llx\n",
+ printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to"
+ " [mem %#010llx-%#010llx]\n",
ramdisk_image, ramdisk_image + ramdisk_size - 1,
ramdisk_here, ramdisk_here + ramdisk_size - 1);
}
@@ -393,14 +387,13 @@ static void __init reserve_initrd(void)
initrd_start = 0;
if (ramdisk_size >= (end_of_lowmem>>1)) {
- memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
- printk(KERN_ERR "initrd too large to handle, "
- "disabling initrd\n");
- return;
+ panic("initrd too large to handle, "
+ "disabling initrd (%lld needed, %lld available)\n",
+ ramdisk_size, end_of_lowmem>>1);
}
- printk(KERN_INFO "RAMDISK: %08llx - %08llx\n", ramdisk_image,
- ramdisk_end);
+ printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image,
+ ramdisk_end - 1);
if (ramdisk_end <= end_of_lowmem) {
@@ -717,7 +710,6 @@ void __init setup_arch(char **cmdline_p)
apm_info.bios = boot_params.apm_bios_info;
ist_info = boot_params.ist_info;
if (boot_params.sys_desc_table.length != 0) {
- set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2);
machine_id = boot_params.sys_desc_table.table[0];
machine_submodel_id = boot_params.sys_desc_table.table[1];
BIOS_revision = boot_params.sys_desc_table.table[2];
@@ -914,10 +906,10 @@ void __init setup_arch(char **cmdline_p)
setup_bios_corruption_check();
#endif
- printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n",
- max_pfn_mapped<<PAGE_SHIFT);
+ printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n",
+ (max_pfn_mapped<<PAGE_SHIFT) - 1);
- setup_trampolines();
+ setup_real_mode();
init_gbpages();
@@ -934,6 +926,7 @@ void __init setup_arch(char **cmdline_p)
}
#endif
memblock.current_limit = get_max_mapped();
+ dma_contiguous_reserve(0);
/*
* NOTE: On x86-32, only from this point on, fixmaps are ready for use.
@@ -975,6 +968,8 @@ void __init setup_arch(char **cmdline_p)
if (boot_cpu_data.cpuid_level >= 0) {
/* A CPU has %cr4 if and only if it has CPUID */
mmu_cr4_features = read_cr4();
+ if (trampoline_cr4_features)
+ *trampoline_cr4_features = mmu_cr4_features;
}
#ifdef CONFIG_X86_32
@@ -1012,7 +1007,8 @@ void __init setup_arch(char **cmdline_p)
init_cpu_to_node();
init_apic_mappings();
- ioapic_and_gsi_init();
+ if (x86_io_apic_ops.init)
+ x86_io_apic_ops.init();
kvm_guest_init();
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 041af2fd088..21af737053a 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -479,18 +479,8 @@ asmlinkage int
sys_sigsuspend(int history0, int history1, old_sigset_t mask)
{
sigset_t blocked;
-
- current->saved_sigmask = current->blocked;
-
- mask &= _BLOCKABLE;
siginitset(&blocked, mask);
- set_current_blocked(&blocked);
-
- current->state = TASK_INTERRUPTIBLE;
- schedule();
-
- set_restore_sigmask();
- return -ERESTARTNOHAND;
+ return sigsuspend(&blocked);
}
asmlinkage int
@@ -565,7 +555,6 @@ unsigned long sys_sigreturn(struct pt_regs *regs)
sizeof(frame->extramask))))
goto badframe;
- sigdelsetmask(&set, ~_BLOCKABLE);
set_current_blocked(&set);
if (restore_sigcontext(regs, &frame->sc, &ax))
@@ -591,7 +580,6 @@ long sys_rt_sigreturn(struct pt_regs *regs)
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
- sigdelsetmask(&set, ~_BLOCKABLE);
set_current_blocked(&set);
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
@@ -657,42 +645,28 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
struct pt_regs *regs)
{
int usig = signr_convert(sig);
- sigset_t *set = &current->blocked;
- int ret;
-
- if (current_thread_info()->status & TS_RESTORE_SIGMASK)
- set = &current->saved_sigmask;
+ sigset_t *set = sigmask_to_save();
/* Set up the stack frame */
if (is_ia32) {
if (ka->sa.sa_flags & SA_SIGINFO)
- ret = ia32_setup_rt_frame(usig, ka, info, set, regs);
+ return ia32_setup_rt_frame(usig, ka, info, set, regs);
else
- ret = ia32_setup_frame(usig, ka, set, regs);
+ return ia32_setup_frame(usig, ka, set, regs);
#ifdef CONFIG_X86_X32_ABI
} else if (is_x32) {
- ret = x32_setup_rt_frame(usig, ka, info,
+ return x32_setup_rt_frame(usig, ka, info,
(compat_sigset_t *)set, regs);
#endif
} else {
- ret = __setup_rt_frame(sig, ka, info, set, regs);
+ return __setup_rt_frame(sig, ka, info, set, regs);
}
-
- if (ret) {
- force_sigsegv(sig, current);
- return -EFAULT;
- }
-
- current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
- return ret;
}
-static int
+static void
handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
struct pt_regs *regs)
{
- int ret;
-
/* Are we from a system call? */
if (syscall_get_nr(current, regs) >= 0) {
/* If so, check system call restarting.. */
@@ -723,10 +697,10 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
regs->flags &= ~X86_EFLAGS_TF;
- ret = setup_rt_frame(sig, ka, info, regs);
-
- if (ret)
- return ret;
+ if (setup_rt_frame(sig, ka, info, regs) < 0) {
+ force_sigsegv(sig, current);
+ return;
+ }
/*
* Clear the direction flag as per the ABI for function entry.
@@ -741,12 +715,8 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
*/
regs->flags &= ~X86_EFLAGS_TF;
- block_sigmask(ka, sig);
-
- tracehook_signal_handler(sig, info, ka, regs,
- test_thread_flag(TIF_SINGLESTEP));
-
- return 0;
+ signal_delivered(sig, info, ka, regs,
+ test_thread_flag(TIF_SINGLESTEP));
}
#ifdef CONFIG_X86_32
@@ -767,16 +737,6 @@ static void do_signal(struct pt_regs *regs)
siginfo_t info;
int signr;
- /*
- * We want the common case to go fast, which is why we may in certain
- * cases get here from kernel mode. Just return without doing anything
- * if so.
- * X86_32: vm86 regs switched out by assembly code before reaching
- * here, so testing against kernel CS suffices.
- */
- if (!user_mode(regs))
- return;
-
signr = get_signal_to_deliver(&info, &ka, regs, NULL);
if (signr > 0) {
/* Whee! Actually deliver the signal. */
@@ -806,10 +766,7 @@ static void do_signal(struct pt_regs *regs)
* If there's no signal to deliver, we just put the saved sigmask
* back.
*/
- if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
- current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
- set_current_blocked(&current->saved_sigmask);
- }
+ restore_saved_sigmask();
}
/*
@@ -837,8 +794,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
- if (current->replacement_session_keyring)
- key_replace_session_keyring();
}
if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
fire_user_return_notifiers();
@@ -946,7 +901,6 @@ asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
- sigdelsetmask(&set, ~_BLOCKABLE);
set_current_blocked(&set);
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 66c74f481ca..48d2b7ded42 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -109,6 +109,9 @@
* about nothing of note with C stepping upwards.
*/
+static atomic_t stopping_cpu = ATOMIC_INIT(-1);
+static bool smp_no_nmi_ipi = false;
+
/*
* this function sends a 'reschedule' IPI to another CPU.
* it goes straight through and wastes no time serializing
@@ -149,8 +152,6 @@ void native_send_call_func_ipi(const struct cpumask *mask)
free_cpumask_var(allbutself);
}
-static atomic_t stopping_cpu = ATOMIC_INIT(-1);
-
static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
{
/* We are registered on stopping cpu too, avoid spurious NMI */
@@ -162,7 +163,19 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
return NMI_HANDLED;
}
-static void native_nmi_stop_other_cpus(int wait)
+/*
+ * this function calls the 'stop' function on all other CPUs in the system.
+ */
+
+asmlinkage void smp_reboot_interrupt(void)
+{
+ ack_APIC_irq();
+ irq_enter();
+ stop_this_cpu(NULL);
+ irq_exit();
+}
+
+static void native_stop_other_cpus(int wait)
{
unsigned long flags;
unsigned long timeout;
@@ -174,20 +187,25 @@ static void native_nmi_stop_other_cpus(int wait)
* Use an own vector here because smp_call_function
* does lots of things not suitable in a panic situation.
*/
+
+ /*
+ * We start by using the REBOOT_VECTOR irq.
+ * The irq is treated as a sync point to allow critical
+ * regions of code on other cpus to release their spin locks
+ * and re-enable irqs. Jumping straight to an NMI might
+ * accidentally cause deadlocks with further shutdown/panic
+ * code. By syncing, we give the cpus up to one second to
+ * finish their work before we force them off with the NMI.
+ */
if (num_online_cpus() > 1) {
/* did someone beat us here? */
if (atomic_cmpxchg(&stopping_cpu, -1, safe_smp_processor_id()) != -1)
return;
- if (register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback,
- NMI_FLAG_FIRST, "smp_stop"))
- /* Note: we ignore failures here */
- return;
-
- /* sync above data before sending NMI */
+ /* sync above data before sending IRQ */
wmb();
- apic->send_IPI_allbutself(NMI_VECTOR);
+ apic->send_IPI_allbutself(REBOOT_VECTOR);
/*
* Don't wait longer than a second if the caller
@@ -197,63 +215,37 @@ static void native_nmi_stop_other_cpus(int wait)
while (num_online_cpus() > 1 && (wait || timeout--))
udelay(1);
}
+
+ /* if the REBOOT_VECTOR didn't work, try with the NMI */
+ if ((num_online_cpus() > 1) && (!smp_no_nmi_ipi)) {
+ if (register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback,
+ NMI_FLAG_FIRST, "smp_stop"))
+ /* Note: we ignore failures here */
+ /* Hope the REBOOT_IRQ is good enough */
+ goto finish;
- local_irq_save(flags);
- disable_local_APIC();
- local_irq_restore(flags);
-}
-
-/*
- * this function calls the 'stop' function on all other CPUs in the system.
- */
-
-asmlinkage void smp_reboot_interrupt(void)
-{
- ack_APIC_irq();
- irq_enter();
- stop_this_cpu(NULL);
- irq_exit();
-}
-
-static void native_irq_stop_other_cpus(int wait)
-{
- unsigned long flags;
- unsigned long timeout;
+ /* sync above data before sending IRQ */
+ wmb();
- if (reboot_force)
- return;
+ pr_emerg("Shutting down cpus with NMI\n");
- /*
- * Use an own vector here because smp_call_function
- * does lots of things not suitable in a panic situation.
- * On most systems we could also use an NMI here,
- * but there are a few systems around where NMI
- * is problematic so stay with an non NMI for now
- * (this implies we cannot stop CPUs spinning with irq off
- * currently)
- */
- if (num_online_cpus() > 1) {
- apic->send_IPI_allbutself(REBOOT_VECTOR);
+ apic->send_IPI_allbutself(NMI_VECTOR);
/*
- * Don't wait longer than a second if the caller
+ * Don't wait longer than a 10 ms if the caller
* didn't ask us to wait.
*/
- timeout = USEC_PER_SEC;
+ timeout = USEC_PER_MSEC * 10;
while (num_online_cpus() > 1 && (wait || timeout--))
udelay(1);
}
+finish:
local_irq_save(flags);
disable_local_APIC();
local_irq_restore(flags);
}
-static void native_smp_disable_nmi_ipi(void)
-{
- smp_ops.stop_other_cpus = native_irq_stop_other_cpus;
-}
-
/*
* Reschedule call back.
*/
@@ -287,8 +279,8 @@ void smp_call_function_single_interrupt(struct pt_regs *regs)
static int __init nonmi_ipi_setup(char *str)
{
- native_smp_disable_nmi_ipi();
- return 1;
+ smp_no_nmi_ipi = true;
+ return 1;
}
__setup("nonmi_ipi", nonmi_ipi_setup);
@@ -298,7 +290,7 @@ struct smp_ops smp_ops = {
.smp_prepare_cpus = native_smp_prepare_cpus,
.smp_cpus_done = native_smp_cpus_done,
- .stop_other_cpus = native_nmi_stop_other_cpus,
+ .stop_other_cpus = native_stop_other_cpus,
.smp_send_reschedule = native_smp_send_reschedule,
.cpu_up = native_cpu_up,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6e1e406038c..7bd8a082365 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -57,7 +57,7 @@
#include <asm/nmi.h>
#include <asm/irq.h>
#include <asm/idle.h>
-#include <asm/trampoline.h>
+#include <asm/realmode.h>
#include <asm/cpu.h>
#include <asm/numa.h>
#include <asm/pgtable.h>
@@ -73,23 +73,13 @@
#include <asm/smpboot_hooks.h>
#include <asm/i8259.h>
+#include <asm/realmode.h>
+
/* State of each CPU */
DEFINE_PER_CPU(int, cpu_state) = { 0 };
-/* Store all idle threads, this can be reused instead of creating
-* a new thread. Also avoids complicated thread destroy functionality
-* for idle threads.
-*/
#ifdef CONFIG_HOTPLUG_CPU
/*
- * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is
- * removed after init for !CONFIG_HOTPLUG_CPU.
- */
-static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
-#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x))
-#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p))
-
-/*
* We need this for trampoline_base protection from concurrent accesses when
* off- and onlining cores wildly.
*/
@@ -97,20 +87,16 @@ static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex);
void cpu_hotplug_driver_lock(void)
{
- mutex_lock(&x86_cpu_hotplug_driver_mutex);
+ mutex_lock(&x86_cpu_hotplug_driver_mutex);
}
void cpu_hotplug_driver_unlock(void)
{
- mutex_unlock(&x86_cpu_hotplug_driver_mutex);
+ mutex_unlock(&x86_cpu_hotplug_driver_mutex);
}
ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; }
ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; }
-#else
-static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
-#define get_idle_for_cpu(x) (idle_thread_array[(x)])
-#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p))
#endif
/* Number of siblings per CPU package */
@@ -315,59 +301,102 @@ void __cpuinit smp_store_cpu_info(int id)
identify_secondary_cpu(c);
}
-static void __cpuinit link_thread_siblings(int cpu1, int cpu2)
+static bool __cpuinit
+topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name)
+{
+ int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
+
+ return !WARN_ONCE(cpu_to_node(cpu1) != cpu_to_node(cpu2),
+ "sched: CPU #%d's %s-sibling CPU #%d is not on the same node! "
+ "[node: %d != %d]. Ignoring dependency.\n",
+ cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2));
+}
+
+#define link_mask(_m, c1, c2) \
+do { \
+ cpumask_set_cpu((c1), cpu_##_m##_mask(c2)); \
+ cpumask_set_cpu((c2), cpu_##_m##_mask(c1)); \
+} while (0)
+
+static bool __cpuinit match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+{
+ if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
+ int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
+
+ if (c->phys_proc_id == o->phys_proc_id &&
+ per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2) &&
+ c->compute_unit_id == o->compute_unit_id)
+ return topology_sane(c, o, "smt");
+
+ } else if (c->phys_proc_id == o->phys_proc_id &&
+ c->cpu_core_id == o->cpu_core_id) {
+ return topology_sane(c, o, "smt");
+ }
+
+ return false;
+}
+
+static bool __cpuinit match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
{
- cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2));
- cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1));
- cpumask_set_cpu(cpu1, cpu_core_mask(cpu2));
- cpumask_set_cpu(cpu2, cpu_core_mask(cpu1));
- cpumask_set_cpu(cpu1, cpu_llc_shared_mask(cpu2));
- cpumask_set_cpu(cpu2, cpu_llc_shared_mask(cpu1));
+ int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
+
+ if (per_cpu(cpu_llc_id, cpu1) != BAD_APICID &&
+ per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2))
+ return topology_sane(c, o, "llc");
+
+ return false;
}
+static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+{
+ if (c->phys_proc_id == o->phys_proc_id) {
+ if (cpu_has(c, X86_FEATURE_AMD_DCM))
+ return true;
+
+ return topology_sane(c, o, "mc");
+ }
+ return false;
+}
void __cpuinit set_cpu_sibling_map(int cpu)
{
- int i;
+ bool has_mc = boot_cpu_data.x86_max_cores > 1;
+ bool has_smt = smp_num_siblings > 1;
struct cpuinfo_x86 *c = &cpu_data(cpu);
+ struct cpuinfo_x86 *o;
+ int i;
cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
- if (smp_num_siblings > 1) {
- for_each_cpu(i, cpu_sibling_setup_mask) {
- struct cpuinfo_x86 *o = &cpu_data(i);
-
- if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
- if (c->phys_proc_id == o->phys_proc_id &&
- per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i) &&
- c->compute_unit_id == o->compute_unit_id)
- link_thread_siblings(cpu, i);
- } else if (c->phys_proc_id == o->phys_proc_id &&
- c->cpu_core_id == o->cpu_core_id) {
- link_thread_siblings(cpu, i);
- }
- }
- } else {
+ if (!has_smt && !has_mc) {
cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
+ cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
+ cpumask_set_cpu(cpu, cpu_core_mask(cpu));
+ c->booted_cores = 1;
+ return;
}
- cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
+ for_each_cpu(i, cpu_sibling_setup_mask) {
+ o = &cpu_data(i);
+
+ if ((i == cpu) || (has_smt && match_smt(c, o)))
+ link_mask(sibling, cpu, i);
+
+ if ((i == cpu) || (has_mc && match_llc(c, o)))
+ link_mask(llc_shared, cpu, i);
- if (__this_cpu_read(cpu_info.x86_max_cores) == 1) {
- cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
- c->booted_cores = 1;
- return;
}
+ /*
+ * This needs a separate iteration over the cpus because we rely on all
+ * cpu_sibling_mask links to be set-up.
+ */
for_each_cpu(i, cpu_sibling_setup_mask) {
- if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
- per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
- cpumask_set_cpu(i, cpu_llc_shared_mask(cpu));
- cpumask_set_cpu(cpu, cpu_llc_shared_mask(i));
- }
- if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
- cpumask_set_cpu(i, cpu_core_mask(cpu));
- cpumask_set_cpu(cpu, cpu_core_mask(i));
+ o = &cpu_data(i);
+
+ if ((i == cpu) || (has_mc && match_mc(c, o))) {
+ link_mask(core, cpu, i);
+
/*
* Does this new cpu bringup a new core?
*/
@@ -393,16 +422,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
/* maps the cpu to the sched domain representing multi-core */
const struct cpumask *cpu_coregroup_mask(int cpu)
{
- struct cpuinfo_x86 *c = &cpu_data(cpu);
- /*
- * For perf, we return last level cache shared map.
- * And for power savings, we return cpu_core_map
- */
- if ((sched_mc_power_savings || sched_smt_power_savings) &&
- !(cpu_has(c, X86_FEATURE_AMD_DCM)))
- return cpu_core_mask(cpu);
- else
- return cpu_llc_shared_mask(cpu);
+ return cpu_llc_shared_mask(cpu);
}
static void impress_friends(void)
@@ -618,22 +638,6 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
return (send_status | accept_status);
}
-struct create_idle {
- struct work_struct work;
- struct task_struct *idle;
- struct completion done;
- int cpu;
-};
-
-static void __cpuinit do_fork_idle(struct work_struct *work)
-{
- struct create_idle *c_idle =
- container_of(work, struct create_idle, work);
-
- c_idle->idle = fork_idle(c_idle->cpu);
- complete(&c_idle->done);
-}
-
/* reduce the number of lines printed when booting a large cpu count system */
static void __cpuinit announce_cpu(int cpu, int apicid)
{
@@ -660,61 +664,35 @@ static void __cpuinit announce_cpu(int cpu, int apicid)
* Returns zero if CPU booted OK, else error code from
* ->wakeup_secondary_cpu.
*/
-static int __cpuinit do_boot_cpu(int apicid, int cpu)
+static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
{
+ volatile u32 *trampoline_status =
+ (volatile u32 *) __va(real_mode_header->trampoline_status);
+ /* start_ip had better be page-aligned! */
+ unsigned long start_ip = real_mode_header->trampoline_start;
+
unsigned long boot_error = 0;
- unsigned long start_ip;
int timeout;
- struct create_idle c_idle = {
- .cpu = cpu,
- .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
- };
-
- INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
alternatives_smp_switch(1);
- c_idle.idle = get_idle_for_cpu(cpu);
+ idle->thread.sp = (unsigned long) (((struct pt_regs *)
+ (THREAD_SIZE + task_stack_page(idle))) - 1);
+ per_cpu(current_task, cpu) = idle;
- /*
- * We can't use kernel_thread since we must avoid to
- * reschedule the child.
- */
- if (c_idle.idle) {
- c_idle.idle->thread.sp = (unsigned long) (((struct pt_regs *)
- (THREAD_SIZE + task_stack_page(c_idle.idle))) - 1);
- init_idle(c_idle.idle, cpu);
- goto do_rest;
- }
-
- schedule_work(&c_idle.work);
- wait_for_completion(&c_idle.done);
-
- if (IS_ERR(c_idle.idle)) {
- printk("failed fork for CPU %d\n", cpu);
- destroy_work_on_stack(&c_idle.work);
- return PTR_ERR(c_idle.idle);
- }
-
- set_idle_for_cpu(cpu, c_idle.idle);
-do_rest:
- per_cpu(current_task, cpu) = c_idle.idle;
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
irq_ctx_init(cpu);
#else
- clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
+ clear_tsk_thread_flag(idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu);
per_cpu(kernel_stack, cpu) =
- (unsigned long)task_stack_page(c_idle.idle) -
+ (unsigned long)task_stack_page(idle) -
KERNEL_STACK_OFFSET + THREAD_SIZE;
#endif
early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
initial_code = (unsigned long)start_secondary;
- stack_start = c_idle.idle->thread.sp;
-
- /* start_ip had better be page-aligned! */
- start_ip = trampoline_address();
+ stack_start = idle->thread.sp;
/* So we see what's up */
announce_cpu(cpu, apicid);
@@ -778,8 +756,7 @@ do_rest:
pr_debug("CPU%d: has booted.\n", cpu);
} else {
boot_error = 1;
- if (*(volatile u32 *)TRAMPOLINE_SYM(trampoline_status)
- == 0xA5A5A5A5)
+ if (*trampoline_status == 0xA5A5A5A5)
/* trampoline started but...? */
pr_err("CPU%d: Stuck ??\n", cpu);
else
@@ -805,7 +782,7 @@ do_rest:
}
/* mark "stuck" area as not stuck */
- *(volatile u32 *)TRAMPOLINE_SYM(trampoline_status) = 0;
+ *trampoline_status = 0;
if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
/*
@@ -813,12 +790,10 @@ do_rest:
*/
smpboot_restore_warm_reset_vector();
}
-
- destroy_work_on_stack(&c_idle.work);
return boot_error;
}
-int __cpuinit native_cpu_up(unsigned int cpu)
+int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle)
{
int apicid = apic->cpu_present_to_apicid(cpu);
unsigned long flags;
@@ -851,7 +826,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
- err = do_boot_cpu(apicid, cpu);
+ err = do_boot_cpu(apicid, cpu, tidle);
if (err) {
pr_debug("do_boot_cpu failed %d\n", err);
return -EIO;
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 6410744ac5c..f84fe00fad4 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -32,7 +32,7 @@
#include <linux/mm.h>
#include <linux/tboot.h>
-#include <asm/trampoline.h>
+#include <asm/realmode.h>
#include <asm/processor.h>
#include <asm/bootparam.h>
#include <asm/pgtable.h>
@@ -44,7 +44,7 @@
#include <asm/e820.h>
#include <asm/io.h>
-#include "acpi/realmode/wakeup.h"
+#include "../realmode/rm/wakeup.h"
/* Global pointer to shared data; NULL means no measured launch. */
struct tboot *tboot __read_mostly;
@@ -201,7 +201,8 @@ static int tboot_setup_sleep(void)
add_mac_region(e820.map[i].addr, e820.map[i].size);
}
- tboot->acpi_sinfo.kernel_s3_resume_vector = acpi_wakeup_address;
+ tboot->acpi_sinfo.kernel_s3_resume_vector =
+ real_mode_header->wakeup_start;
return 0;
}
diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c
index c29e235792a..b79133abda4 100644
--- a/arch/x86/kernel/test_rodata.c
+++ b/arch/x86/kernel/test_rodata.c
@@ -12,6 +12,7 @@
#include <linux/module.h>
#include <asm/cacheflush.h>
#include <asm/sections.h>
+#include <asm/asm.h>
int rodata_test(void)
{
@@ -42,14 +43,7 @@ int rodata_test(void)
".section .fixup,\"ax\"\n"
"2: jmp 1b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 16\n"
-#ifdef CONFIG_X86_32
- " .long 0b,2b\n"
-#else
- " .quad 0b,2b\n"
-#endif
- ".previous"
+ _ASM_EXTABLE(0b,2b)
: [rslt] "=r" (result)
: [rodata_test] "r" (&rodata_test_data), [zero] "r" (0UL)
);
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index c6eba2b4267..24d3c91e981 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -14,7 +14,6 @@
#include <linux/i8253.h>
#include <linux/time.h>
#include <linux/export.h>
-#include <linux/mca.h>
#include <asm/vsyscall.h>
#include <asm/x86_init.h>
@@ -58,11 +57,6 @@ EXPORT_SYMBOL(profile_pc);
static irqreturn_t timer_interrupt(int irq, void *dev_id)
{
global_clock_event->event_handler(global_clock_event);
-
- /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */
- if (MCA_bus)
- outb_p(inb_p(0x61)| 0x80, 0x61);
-
return IRQ_HANDLED;
}
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
deleted file mode 100644
index a73b61055ad..00000000000
--- a/arch/x86/kernel/trampoline.c
+++ /dev/null
@@ -1,42 +0,0 @@
-#include <linux/io.h>
-#include <linux/memblock.h>
-
-#include <asm/trampoline.h>
-#include <asm/cacheflush.h>
-#include <asm/pgtable.h>
-
-unsigned char *x86_trampoline_base;
-
-void __init setup_trampolines(void)
-{
- phys_addr_t mem;
- size_t size = PAGE_ALIGN(x86_trampoline_end - x86_trampoline_start);
-
- /* Has to be in very low memory so we can execute real-mode AP code. */
- mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
- if (!mem)
- panic("Cannot allocate trampoline\n");
-
- x86_trampoline_base = __va(mem);
- memblock_reserve(mem, size);
-
- printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
- x86_trampoline_base, (unsigned long long)mem, size);
-
- memcpy(x86_trampoline_base, x86_trampoline_start, size);
-}
-
-/*
- * setup_trampolines() gets called very early, to guarantee the
- * availability of low memory. This is before the proper kernel page
- * tables are set up, so we cannot set page permissions in that
- * function. Thus, we use an arch_initcall instead.
- */
-static int __init configure_trampolines(void)
-{
- size_t size = PAGE_ALIGN(x86_trampoline_end - x86_trampoline_start);
-
- set_memory_x((unsigned long)x86_trampoline_base, size >> PAGE_SHIFT);
- return 0;
-}
-arch_initcall(configure_trampolines);
diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S
deleted file mode 100644
index 451c0a7ef7f..00000000000
--- a/arch/x86/kernel/trampoline_32.S
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- *
- * Trampoline.S Derived from Setup.S by Linus Torvalds
- *
- * 4 Jan 1997 Michael Chastain: changed to gnu as.
- *
- * This is only used for booting secondary CPUs in SMP machine
- *
- * Entry: CS:IP point to the start of our code, we are
- * in real mode with no stack, but the rest of the
- * trampoline page to make our stack and everything else
- * is a mystery.
- *
- * We jump into arch/x86/kernel/head_32.S.
- *
- * On entry to trampoline_data, the processor is in real mode
- * with 16-bit addressing and 16-bit data. CS has some value
- * and IP is zero. Thus, data addresses need to be absolute
- * (no relocation) and are taken with regard to r_base.
- *
- * If you work on this file, check the object module with
- * objdump --reloc to make sure there are no relocation
- * entries except for:
- *
- * TYPE VALUE
- * R_386_32 startup_32_smp
- * R_386_32 boot_gdt
- */
-
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/segment.h>
-#include <asm/page_types.h>
-
-#ifdef CONFIG_SMP
-
- .section ".x86_trampoline","a"
- .balign PAGE_SIZE
- .code16
-
-ENTRY(trampoline_data)
-r_base = .
- wbinvd # Needed for NUMA-Q should be harmless for others
- mov %cs, %ax # Code and data in the same place
- mov %ax, %ds
-
- cli # We should be safe anyway
-
- movl $0xA5A5A5A5, trampoline_status - r_base
- # write marker for master knows we're running
-
- /* GDT tables in non default location kernel can be beyond 16MB and
- * lgdt will not be able to load the address as in real mode default
- * operand size is 16bit. Use lgdtl instead to force operand size
- * to 32 bit.
- */
-
- lidtl boot_idt_descr - r_base # load idt with 0, 0
- lgdtl boot_gdt_descr - r_base # load gdt with whatever is appropriate
-
- xor %ax, %ax
- inc %ax # protected mode (PE) bit
- lmsw %ax # into protected mode
- # flush prefetch and jump to startup_32_smp in arch/i386/kernel/head.S
- ljmpl $__BOOT_CS, $(startup_32_smp-__PAGE_OFFSET)
-
- # These need to be in the same 64K segment as the above;
- # hence we don't use the boot_gdt_descr defined in head.S
-boot_gdt_descr:
- .word __BOOT_DS + 7 # gdt limit
- .long boot_gdt - __PAGE_OFFSET # gdt base
-
-boot_idt_descr:
- .word 0 # idt limit = 0
- .long 0 # idt base = 0L
-
-ENTRY(trampoline_status)
- .long 0
-
-.globl trampoline_end
-trampoline_end:
-
-#endif /* CONFIG_SMP */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ff9281f1602..05b31d92f69 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -37,10 +37,6 @@
#include <linux/eisa.h>
#endif
-#ifdef CONFIG_MCA
-#include <linux/mca.h>
-#endif
-
#if defined(CONFIG_EDAC)
#include <linux/edac.h>
#endif
@@ -50,6 +46,7 @@
#include <asm/processor.h>
#include <asm/debugreg.h>
#include <linux/atomic.h>
+#include <asm/ftrace.h>
#include <asm/traps.h>
#include <asm/desc.h>
#include <asm/i387.h>
@@ -303,8 +300,17 @@ gp_in_kernel:
}
/* May run on IST stack. */
-dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
+dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code)
{
+#ifdef CONFIG_DYNAMIC_FTRACE
+ /*
+ * ftrace must be first, everything else may cause a recursive crash.
+ * See note by declaration of modifying_ftrace_code in ftrace.c
+ */
+ if (unlikely(atomic_read(&modifying_ftrace_code)) &&
+ ftrace_int3_handler(regs))
+ return;
+#endif
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
SIGTRAP) == NOTIFY_STOP)
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 36fd42091fa..dc4e910a7d9 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -409,10 +409,9 @@ static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm,
* arch_uprobe_analyze_insn - instruction analysis including validity and fixups.
* @mm: the probed address space.
* @arch_uprobe: the probepoint information.
- * @addr: virtual address at which to install the probepoint
* Return 0 on success or a -ve number on error.
*/
-int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr)
+int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm)
{
int ret;
struct insn insn;
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 0f703f10901..22a1530146a 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -197,18 +197,6 @@ SECTIONS
INIT_DATA_SECTION(16)
- /*
- * Code and data for a variety of lowlevel trampolines, to be
- * copied into base memory (< 1 MiB) during initialization.
- * Since it is copied early, the main copy can be discarded
- * afterwards.
- */
- .x86_trampoline : AT(ADDR(.x86_trampoline) - LOAD_OFFSET) {
- x86_trampoline_start = .;
- *(.x86_trampoline)
- x86_trampoline_end = .;
- }
-
.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
__x86_cpu_dev_start = .;
*(.x86_cpu_dev.init)
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index a1d804bcd48..8eeb55a551b 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -15,6 +15,7 @@
#include <linux/init.h>
#include <linux/pci_ids.h>
#include <linux/pci_regs.h>
+#include <linux/smp.h>
#include <asm/apic.h>
#include <asm/pci-direct.h>
@@ -22,6 +23,8 @@
#include <asm/paravirt.h>
#include <asm/setup.h>
+#define TOPOLOGY_REGISTER_OFFSET 0x10
+
#if defined CONFIG_PCI && defined CONFIG_PARAVIRT
/*
* Interrupt control on vSMPowered systems:
@@ -149,12 +152,49 @@ int is_vsmp_box(void)
return 0;
}
#endif
+
+static void __init vsmp_cap_cpus(void)
+{
+#if !defined(CONFIG_X86_VSMP) && defined(CONFIG_SMP)
+ void __iomem *address;
+ unsigned int cfg, topology, node_shift, maxcpus;
+
+ /*
+ * CONFIG_X86_VSMP is not configured, so limit the number CPUs to the
+ * ones present in the first board, unless explicitly overridden by
+ * setup_max_cpus
+ */
+ if (setup_max_cpus != NR_CPUS)
+ return;
+
+ /* Read the vSMP Foundation topology register */
+ cfg = read_pci_config(0, 0x1f, 0, PCI_BASE_ADDRESS_0);
+ address = early_ioremap(cfg + TOPOLOGY_REGISTER_OFFSET, 4);
+ if (WARN_ON(!address))
+ return;
+
+ topology = readl(address);
+ node_shift = (topology >> 16) & 0x7;
+ if (!node_shift)
+ /* The value 0 should be decoded as 8 */
+ node_shift = 8;
+ maxcpus = (topology & ((1 << node_shift) - 1)) + 1;
+
+ pr_info("vSMP CTL: Capping CPUs to %d (CONFIG_X86_VSMP is unset)\n",
+ maxcpus);
+ setup_max_cpus = maxcpus;
+ early_iounmap(address, 4);
+#endif
+}
+
void __init vsmp_init(void)
{
detect_vsmp_box();
if (!is_vsmp_box())
return;
+ vsmp_cap_cpus();
+
set_vsmp_pv_ops();
return;
}
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 7515cf0e180..5db36caf428 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -139,6 +139,19 @@ static int addr_to_vsyscall_nr(unsigned long addr)
return nr;
}
+#ifdef CONFIG_SECCOMP
+static int vsyscall_seccomp(struct task_struct *tsk, int syscall_nr)
+{
+ if (!seccomp_mode(&tsk->seccomp))
+ return 0;
+ task_pt_regs(tsk)->orig_ax = syscall_nr;
+ task_pt_regs(tsk)->ax = syscall_nr;
+ return __secure_computing(syscall_nr);
+}
+#else
+#define vsyscall_seccomp(_tsk, _nr) 0
+#endif
+
static bool write_ok_or_segv(unsigned long ptr, size_t size)
{
/*
@@ -174,6 +187,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
int vsyscall_nr;
int prev_sig_on_uaccess_error;
long ret;
+ int skip;
/*
* No point in checking CS -- the only way to get here is a user mode
@@ -205,9 +219,6 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
}
tsk = current;
- if (seccomp_mode(&tsk->seccomp))
- do_exit(SIGKILL);
-
/*
* With a real vsyscall, page faults cause SIGSEGV. We want to
* preserve that behavior to make writing exploits harder.
@@ -222,8 +233,13 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
* address 0".
*/
ret = -EFAULT;
+ skip = 0;
switch (vsyscall_nr) {
case 0:
+ skip = vsyscall_seccomp(tsk, __NR_gettimeofday);
+ if (skip)
+ break;
+
if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
!write_ok_or_segv(regs->si, sizeof(struct timezone)))
break;
@@ -234,6 +250,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
break;
case 1:
+ skip = vsyscall_seccomp(tsk, __NR_time);
+ if (skip)
+ break;
+
if (!write_ok_or_segv(regs->di, sizeof(time_t)))
break;
@@ -241,6 +261,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
break;
case 2:
+ skip = vsyscall_seccomp(tsk, __NR_getcpu);
+ if (skip)
+ break;
+
if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
!write_ok_or_segv(regs->si, sizeof(unsigned)))
break;
@@ -253,6 +277,12 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
+ if (skip) {
+ if ((long)regs->ax <= 0L) /* seccomp errno emulation */
+ goto do_ret;
+ goto done; /* seccomp trace/trap */
+ }
+
if (ret == -EFAULT) {
/* Bad news -- userspace fed a bad pointer to a vsyscall. */
warn_bad_vsyscall(KERN_INFO, regs,
@@ -271,10 +301,11 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
regs->ax = ret;
+do_ret:
/* Emulate a ret instruction. */
regs->ip = caller;
regs->sp += 8;
-
+done:
return true;
sigsegv:
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 9cf71d0b2d3..35c5e543f55 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -18,6 +18,7 @@
#include <asm/e820.h>
#include <asm/time.h>
#include <asm/irq.h>
+#include <asm/io_apic.h>
#include <asm/pat.h>
#include <asm/tsc.h>
#include <asm/iommu.h>
@@ -119,3 +120,10 @@ struct x86_msi_ops x86_msi = {
.teardown_msi_irqs = default_teardown_msi_irqs,
.restore_msi_irqs = default_restore_msi_irqs,
};
+
+struct x86_io_apic_ops x86_io_apic_ops = {
+ .init = native_io_apic_init_mappings,
+ .read = native_io_apic_read,
+ .write = native_io_apic_write,
+ .modify = native_io_apic_modify,
+};
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index e62728e30b0..bd18149b2b0 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -48,8 +48,6 @@ void __sanitize_i387_state(struct task_struct *tsk)
if (!fx)
return;
- BUG_ON(__thread_has_fpu(tsk));
-
xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv;
/*
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 1a7fe868f37..a28f338843e 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -36,6 +36,7 @@ config KVM
select TASKSTATS
select TASK_DELAY_ACCT
select PERF_EVENTS
+ select HAVE_KVM_MSI
---help---
Support hosting fully virtualized guest machines using hardware
virtualization extensions. You will need a fairly recent
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 9fed5bedaad..7df1c6d839f 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -247,7 +247,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 7.0.ebx */
const u32 kvm_supported_word9_x86_features =
- F(FSGSBASE) | F(BMI1) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS);
+ F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
+ F(BMI2) | F(ERMS) | F(RTM);
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
@@ -397,7 +398,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
case KVM_CPUID_SIGNATURE: {
char signature[12] = "KVMKVMKVM\0\0";
u32 *sigptr = (u32 *)signature;
- entry->eax = 0;
+ entry->eax = KVM_CPUID_FEATURES;
entry->ebx = sigptr[0];
entry->ecx = sigptr[1];
entry->edx = sigptr[2];
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 83756223f8a..f95d242ee9f 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -142,6 +142,10 @@
#define Src2FS (OpFS << Src2Shift)
#define Src2GS (OpGS << Src2Shift)
#define Src2Mask (OpMask << Src2Shift)
+#define Mmx ((u64)1 << 40) /* MMX Vector instruction */
+#define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
+#define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */
+#define Avx ((u64)1 << 43) /* Advanced Vector Extensions */
#define X2(x...) x, x
#define X3(x...) X2(x), x
@@ -557,6 +561,29 @@ static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
}
+/*
+ * x86 defines three classes of vector instructions: explicitly
+ * aligned, explicitly unaligned, and the rest, which change behaviour
+ * depending on whether they're AVX encoded or not.
+ *
+ * Also included is CMPXCHG16B which is not a vector instruction, yet it is
+ * subject to the same check.
+ */
+static bool insn_aligned(struct x86_emulate_ctxt *ctxt, unsigned size)
+{
+ if (likely(size < 16))
+ return false;
+
+ if (ctxt->d & Aligned)
+ return true;
+ else if (ctxt->d & Unaligned)
+ return false;
+ else if (ctxt->d & Avx)
+ return false;
+ else
+ return true;
+}
+
static int __linearize(struct x86_emulate_ctxt *ctxt,
struct segmented_address addr,
unsigned size, bool write, bool fetch,
@@ -621,6 +648,8 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
}
if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : ctxt->ad_bytes != 8)
la &= (u32)-1;
+ if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
+ return emulate_gp(ctxt, 0);
*linear = la;
return X86EMUL_CONTINUE;
bad:
@@ -859,6 +888,40 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
ctxt->ops->put_fpu(ctxt);
}
+static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
+{
+ ctxt->ops->get_fpu(ctxt);
+ switch (reg) {
+ case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
+ case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
+ case 2: asm("movq %%mm2, %0" : "=m"(*data)); break;
+ case 3: asm("movq %%mm3, %0" : "=m"(*data)); break;
+ case 4: asm("movq %%mm4, %0" : "=m"(*data)); break;
+ case 5: asm("movq %%mm5, %0" : "=m"(*data)); break;
+ case 6: asm("movq %%mm6, %0" : "=m"(*data)); break;
+ case 7: asm("movq %%mm7, %0" : "=m"(*data)); break;
+ default: BUG();
+ }
+ ctxt->ops->put_fpu(ctxt);
+}
+
+static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
+{
+ ctxt->ops->get_fpu(ctxt);
+ switch (reg) {
+ case 0: asm("movq %0, %%mm0" : : "m"(*data)); break;
+ case 1: asm("movq %0, %%mm1" : : "m"(*data)); break;
+ case 2: asm("movq %0, %%mm2" : : "m"(*data)); break;
+ case 3: asm("movq %0, %%mm3" : : "m"(*data)); break;
+ case 4: asm("movq %0, %%mm4" : : "m"(*data)); break;
+ case 5: asm("movq %0, %%mm5" : : "m"(*data)); break;
+ case 6: asm("movq %0, %%mm6" : : "m"(*data)); break;
+ case 7: asm("movq %0, %%mm7" : : "m"(*data)); break;
+ default: BUG();
+ }
+ ctxt->ops->put_fpu(ctxt);
+}
+
static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
struct operand *op)
{
@@ -875,6 +938,13 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
read_sse_reg(ctxt, &op->vec_val, reg);
return;
}
+ if (ctxt->d & Mmx) {
+ reg &= 7;
+ op->type = OP_MM;
+ op->bytes = 8;
+ op->addr.mm = reg;
+ return;
+ }
op->type = OP_REG;
if (ctxt->d & ByteOp) {
@@ -902,7 +972,6 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
ctxt->modrm_rm = base_reg = (ctxt->rex_prefix & 1) << 3; /* REG.B */
}
- ctxt->modrm = insn_fetch(u8, ctxt);
ctxt->modrm_mod |= (ctxt->modrm & 0xc0) >> 6;
ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
ctxt->modrm_rm |= (ctxt->modrm & 0x07);
@@ -920,6 +989,12 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
read_sse_reg(ctxt, &op->vec_val, ctxt->modrm_rm);
return rc;
}
+ if (ctxt->d & Mmx) {
+ op->type = OP_MM;
+ op->bytes = 8;
+ op->addr.xmm = ctxt->modrm_rm & 7;
+ return rc;
+ }
fetch_register_operand(op);
return rc;
}
@@ -1387,6 +1462,9 @@ static int writeback(struct x86_emulate_ctxt *ctxt)
case OP_XMM:
write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm);
break;
+ case OP_MM:
+ write_mmx_reg(ctxt, &ctxt->dst.mm_val, ctxt->dst.addr.mm);
+ break;
case OP_NONE:
/* no writeback */
break;
@@ -2790,7 +2868,7 @@ static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
static int em_mov(struct x86_emulate_ctxt *ctxt)
{
- ctxt->dst.val = ctxt->src.val;
+ memcpy(ctxt->dst.valptr, ctxt->src.valptr, ctxt->op_bytes);
return X86EMUL_CONTINUE;
}
@@ -2870,12 +2948,6 @@ static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
}
-static int em_movdqu(struct x86_emulate_ctxt *ctxt)
-{
- memcpy(&ctxt->dst.vec_val, &ctxt->src.vec_val, ctxt->op_bytes);
- return X86EMUL_CONTINUE;
-}
-
static int em_invlpg(struct x86_emulate_ctxt *ctxt)
{
int rc;
@@ -3061,35 +3133,13 @@ static int em_btc(struct x86_emulate_ctxt *ctxt)
static int em_bsf(struct x86_emulate_ctxt *ctxt)
{
- u8 zf;
-
- __asm__ ("bsf %2, %0; setz %1"
- : "=r"(ctxt->dst.val), "=q"(zf)
- : "r"(ctxt->src.val));
-
- ctxt->eflags &= ~X86_EFLAGS_ZF;
- if (zf) {
- ctxt->eflags |= X86_EFLAGS_ZF;
- /* Disable writeback. */
- ctxt->dst.type = OP_NONE;
- }
+ emulate_2op_SrcV_nobyte(ctxt, "bsf");
return X86EMUL_CONTINUE;
}
static int em_bsr(struct x86_emulate_ctxt *ctxt)
{
- u8 zf;
-
- __asm__ ("bsr %2, %0; setz %1"
- : "=r"(ctxt->dst.val), "=q"(zf)
- : "r"(ctxt->src.val));
-
- ctxt->eflags &= ~X86_EFLAGS_ZF;
- if (zf) {
- ctxt->eflags |= X86_EFLAGS_ZF;
- /* Disable writeback. */
- ctxt->dst.type = OP_NONE;
- }
+ emulate_2op_SrcV_nobyte(ctxt, "bsr");
return X86EMUL_CONTINUE;
}
@@ -3286,8 +3336,8 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
.check_perm = (_p) }
#define N D(0)
#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
-#define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) }
-#define GD(_f, _g) { .flags = ((_f) | GroupDual), .u.gdual = (_g) }
+#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
+#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
#define II(_f, _e, _i) \
{ .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i }
@@ -3307,25 +3357,25 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
static struct opcode group7_rm1[] = {
- DI(SrcNone | ModRM | Priv, monitor),
- DI(SrcNone | ModRM | Priv, mwait),
+ DI(SrcNone | Priv, monitor),
+ DI(SrcNone | Priv, mwait),
N, N, N, N, N, N,
};
static struct opcode group7_rm3[] = {
- DIP(SrcNone | ModRM | Prot | Priv, vmrun, check_svme_pa),
- II(SrcNone | ModRM | Prot | VendorSpecific, em_vmmcall, vmmcall),
- DIP(SrcNone | ModRM | Prot | Priv, vmload, check_svme_pa),
- DIP(SrcNone | ModRM | Prot | Priv, vmsave, check_svme_pa),
- DIP(SrcNone | ModRM | Prot | Priv, stgi, check_svme),
- DIP(SrcNone | ModRM | Prot | Priv, clgi, check_svme),
- DIP(SrcNone | ModRM | Prot | Priv, skinit, check_svme),
- DIP(SrcNone | ModRM | Prot | Priv, invlpga, check_svme),
+ DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
+ II(SrcNone | Prot | VendorSpecific, em_vmmcall, vmmcall),
+ DIP(SrcNone | Prot | Priv, vmload, check_svme_pa),
+ DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa),
+ DIP(SrcNone | Prot | Priv, stgi, check_svme),
+ DIP(SrcNone | Prot | Priv, clgi, check_svme),
+ DIP(SrcNone | Prot | Priv, skinit, check_svme),
+ DIP(SrcNone | Prot | Priv, invlpga, check_svme),
};
static struct opcode group7_rm7[] = {
N,
- DIP(SrcNone | ModRM, rdtscp, check_rdtsc),
+ DIP(SrcNone, rdtscp, check_rdtsc),
N, N, N, N, N, N,
};
@@ -3341,81 +3391,86 @@ static struct opcode group1[] = {
};
static struct opcode group1A[] = {
- I(DstMem | SrcNone | ModRM | Mov | Stack, em_pop), N, N, N, N, N, N, N,
+ I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N,
};
static struct opcode group3[] = {
- I(DstMem | SrcImm | ModRM, em_test),
- I(DstMem | SrcImm | ModRM, em_test),
- I(DstMem | SrcNone | ModRM | Lock, em_not),
- I(DstMem | SrcNone | ModRM | Lock, em_neg),
- I(SrcMem | ModRM, em_mul_ex),
- I(SrcMem | ModRM, em_imul_ex),
- I(SrcMem | ModRM, em_div_ex),
- I(SrcMem | ModRM, em_idiv_ex),
+ I(DstMem | SrcImm, em_test),
+ I(DstMem | SrcImm, em_test),
+ I(DstMem | SrcNone | Lock, em_not),
+ I(DstMem | SrcNone | Lock, em_neg),
+ I(SrcMem, em_mul_ex),
+ I(SrcMem, em_imul_ex),
+ I(SrcMem, em_div_ex),
+ I(SrcMem, em_idiv_ex),
};
static struct opcode group4[] = {
- I(ByteOp | DstMem | SrcNone | ModRM | Lock, em_grp45),
- I(ByteOp | DstMem | SrcNone | ModRM | Lock, em_grp45),
+ I(ByteOp | DstMem | SrcNone | Lock, em_grp45),
+ I(ByteOp | DstMem | SrcNone | Lock, em_grp45),
N, N, N, N, N, N,
};
static struct opcode group5[] = {
- I(DstMem | SrcNone | ModRM | Lock, em_grp45),
- I(DstMem | SrcNone | ModRM | Lock, em_grp45),
- I(SrcMem | ModRM | Stack, em_grp45),
- I(SrcMemFAddr | ModRM | ImplicitOps | Stack, em_call_far),
- I(SrcMem | ModRM | Stack, em_grp45),
- I(SrcMemFAddr | ModRM | ImplicitOps, em_grp45),
- I(SrcMem | ModRM | Stack, em_grp45), N,
+ I(DstMem | SrcNone | Lock, em_grp45),
+ I(DstMem | SrcNone | Lock, em_grp45),
+ I(SrcMem | Stack, em_grp45),
+ I(SrcMemFAddr | ImplicitOps | Stack, em_call_far),
+ I(SrcMem | Stack, em_grp45),
+ I(SrcMemFAddr | ImplicitOps, em_grp45),
+ I(SrcMem | Stack, em_grp45), N,
};
static struct opcode group6[] = {
- DI(ModRM | Prot, sldt),
- DI(ModRM | Prot, str),
- DI(ModRM | Prot | Priv, lldt),
- DI(ModRM | Prot | Priv, ltr),
+ DI(Prot, sldt),
+ DI(Prot, str),
+ DI(Prot | Priv, lldt),
+ DI(Prot | Priv, ltr),
N, N, N, N,
};
static struct group_dual group7 = { {
- DI(ModRM | Mov | DstMem | Priv, sgdt),
- DI(ModRM | Mov | DstMem | Priv, sidt),
- II(ModRM | SrcMem | Priv, em_lgdt, lgdt),
- II(ModRM | SrcMem | Priv, em_lidt, lidt),
- II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N,
- II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw),
- II(SrcMem | ModRM | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
+ DI(Mov | DstMem | Priv, sgdt),
+ DI(Mov | DstMem | Priv, sidt),
+ II(SrcMem | Priv, em_lgdt, lgdt),
+ II(SrcMem | Priv, em_lidt, lidt),
+ II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
+ II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
+ II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
}, {
- I(SrcNone | ModRM | Priv | VendorSpecific, em_vmcall),
+ I(SrcNone | Priv | VendorSpecific, em_vmcall),
EXT(0, group7_rm1),
N, EXT(0, group7_rm3),
- II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N,
- II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), EXT(0, group7_rm7),
+ II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
+ II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
+ EXT(0, group7_rm7),
} };
static struct opcode group8[] = {
N, N, N, N,
- I(DstMem | SrcImmByte | ModRM, em_bt),
- I(DstMem | SrcImmByte | ModRM | Lock | PageTable, em_bts),
- I(DstMem | SrcImmByte | ModRM | Lock, em_btr),
- I(DstMem | SrcImmByte | ModRM | Lock | PageTable, em_btc),
+ I(DstMem | SrcImmByte, em_bt),
+ I(DstMem | SrcImmByte | Lock | PageTable, em_bts),
+ I(DstMem | SrcImmByte | Lock, em_btr),
+ I(DstMem | SrcImmByte | Lock | PageTable, em_btc),
};
static struct group_dual group9 = { {
- N, I(DstMem64 | ModRM | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
+ N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
}, {
N, N, N, N, N, N, N, N,
} };
static struct opcode group11[] = {
- I(DstMem | SrcImm | ModRM | Mov | PageTable, em_mov),
+ I(DstMem | SrcImm | Mov | PageTable, em_mov),
X7(D(Undefined)),
};
static struct gprefix pfx_0f_6f_0f_7f = {
- N, N, N, I(Sse, em_movdqu),
+ I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
+};
+
+static struct gprefix pfx_vmovntpx = {
+ I(0, em_mov), N, N, N,
};
static struct opcode opcode_table[256] = {
@@ -3464,10 +3519,10 @@ static struct opcode opcode_table[256] = {
/* 0x70 - 0x7F */
X16(D(SrcImmByte)),
/* 0x80 - 0x87 */
- G(ByteOp | DstMem | SrcImm | ModRM | Group, group1),
- G(DstMem | SrcImm | ModRM | Group, group1),
- G(ByteOp | DstMem | SrcImm | ModRM | No64 | Group, group1),
- G(DstMem | SrcImmByte | ModRM | Group, group1),
+ G(ByteOp | DstMem | SrcImm, group1),
+ G(DstMem | SrcImm, group1),
+ G(ByteOp | DstMem | SrcImm | No64, group1),
+ G(DstMem | SrcImmByte, group1),
I2bv(DstMem | SrcReg | ModRM, em_test),
I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
/* 0x88 - 0x8F */
@@ -3549,7 +3604,8 @@ static struct opcode twobyte_table[256] = {
IIP(ModRM | SrcMem | Priv | Op3264, em_cr_write, cr_write, check_cr_write),
IIP(ModRM | SrcMem | Priv | Op3264, em_dr_write, dr_write, check_dr_write),
N, N, N, N,
- N, N, N, N, N, N, N, N,
+ N, N, N, GP(ModRM | DstMem | SrcReg | Sse | Mov | Aligned, &pfx_vmovntpx),
+ N, N, N, N,
/* 0x30 - 0x3F */
II(ImplicitOps | Priv, em_wrmsr, wrmsr),
IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
@@ -3897,17 +3953,16 @@ done_prefixes:
}
ctxt->d = opcode.flags;
+ if (ctxt->d & ModRM)
+ ctxt->modrm = insn_fetch(u8, ctxt);
+
while (ctxt->d & GroupMask) {
switch (ctxt->d & GroupMask) {
case Group:
- ctxt->modrm = insn_fetch(u8, ctxt);
- --ctxt->_eip;
goffset = (ctxt->modrm >> 3) & 7;
opcode = opcode.u.group[goffset];
break;
case GroupDual:
- ctxt->modrm = insn_fetch(u8, ctxt);
- --ctxt->_eip;
goffset = (ctxt->modrm >> 3) & 7;
if ((ctxt->modrm >> 6) == 3)
opcode = opcode.u.gdual->mod3[goffset];
@@ -3960,6 +4015,8 @@ done_prefixes:
if (ctxt->d & Sse)
ctxt->op_bytes = 16;
+ else if (ctxt->d & Mmx)
+ ctxt->op_bytes = 8;
/* ModRM and SIB bytes. */
if (ctxt->d & ModRM) {
@@ -4030,6 +4087,35 @@ static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
return false;
}
+static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
+{
+ bool fault = false;
+
+ ctxt->ops->get_fpu(ctxt);
+ asm volatile("1: fwait \n\t"
+ "2: \n\t"
+ ".pushsection .fixup,\"ax\" \n\t"
+ "3: \n\t"
+ "movb $1, %[fault] \n\t"
+ "jmp 2b \n\t"
+ ".popsection \n\t"
+ _ASM_EXTABLE(1b, 3b)
+ : [fault]"+qm"(fault));
+ ctxt->ops->put_fpu(ctxt);
+
+ if (unlikely(fault))
+ return emulate_exception(ctxt, MF_VECTOR, 0, false);
+
+ return X86EMUL_CONTINUE;
+}
+
+static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
+ struct operand *op)
+{
+ if (op->type == OP_MM)
+ read_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
+}
+
int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
{
struct x86_emulate_ops *ops = ctxt->ops;
@@ -4054,18 +4140,31 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
goto done;
}
- if ((ctxt->d & Sse)
- && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)
- || !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
+ if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
+ || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
rc = emulate_ud(ctxt);
goto done;
}
- if ((ctxt->d & Sse) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
+ if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
rc = emulate_nm(ctxt);
goto done;
}
+ if (ctxt->d & Mmx) {
+ rc = flush_pending_x87_faults(ctxt);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
+ /*
+ * Now that we know the fpu is exception safe, we can fetch
+ * operands from it.
+ */
+ fetch_possible_mmx_operand(ctxt, &ctxt->src);
+ fetch_possible_mmx_operand(ctxt, &ctxt->src2);
+ if (!(ctxt->d & Mov))
+ fetch_possible_mmx_operand(ctxt, &ctxt->dst);
+ }
+
if (unlikely(ctxt->guest_mode) && ctxt->intercept) {
rc = emulator_check_intercept(ctxt, ctxt->intercept,
X86_ICPT_PRE_EXCEPT);
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index d68f99df690..adba28f88d1 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -34,7 +34,6 @@
#include <linux/kvm_host.h>
#include <linux/slab.h>
-#include <linux/workqueue.h>
#include "irq.h"
#include "i8254.h"
@@ -249,7 +248,7 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
/* in this case, we had multiple outstanding pit interrupts
* that we needed to inject. Reinject
*/
- queue_work(ps->pit->wq, &ps->pit->expired);
+ queue_kthread_work(&ps->pit->worker, &ps->pit->expired);
ps->irq_ack = 1;
spin_unlock(&ps->inject_lock);
}
@@ -270,7 +269,7 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
static void destroy_pit_timer(struct kvm_pit *pit)
{
hrtimer_cancel(&pit->pit_state.pit_timer.timer);
- cancel_work_sync(&pit->expired);
+ flush_kthread_work(&pit->expired);
}
static bool kpit_is_periodic(struct kvm_timer *ktimer)
@@ -284,7 +283,7 @@ static struct kvm_timer_ops kpit_ops = {
.is_periodic = kpit_is_periodic,
};
-static void pit_do_work(struct work_struct *work)
+static void pit_do_work(struct kthread_work *work)
{
struct kvm_pit *pit = container_of(work, struct kvm_pit, expired);
struct kvm *kvm = pit->kvm;
@@ -328,7 +327,7 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
atomic_inc(&ktimer->pending);
- queue_work(pt->wq, &pt->expired);
+ queue_kthread_work(&pt->worker, &pt->expired);
}
if (ktimer->t_ops->is_periodic(ktimer)) {
@@ -353,7 +352,7 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
/* TODO The new value only affected after the retriggered */
hrtimer_cancel(&pt->timer);
- cancel_work_sync(&ps->pit->expired);
+ flush_kthread_work(&ps->pit->expired);
pt->period = interval;
ps->is_periodic = is_period;
@@ -669,6 +668,8 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
{
struct kvm_pit *pit;
struct kvm_kpit_state *pit_state;
+ struct pid *pid;
+ pid_t pid_nr;
int ret;
pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
@@ -685,14 +686,20 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
mutex_lock(&pit->pit_state.lock);
spin_lock_init(&pit->pit_state.inject_lock);
- pit->wq = create_singlethread_workqueue("kvm-pit-wq");
- if (!pit->wq) {
+ pid = get_pid(task_tgid(current));
+ pid_nr = pid_vnr(pid);
+ put_pid(pid);
+
+ init_kthread_worker(&pit->worker);
+ pit->worker_task = kthread_run(kthread_worker_fn, &pit->worker,
+ "kvm-pit/%d", pid_nr);
+ if (IS_ERR(pit->worker_task)) {
mutex_unlock(&pit->pit_state.lock);
kvm_free_irq_source_id(kvm, pit->irq_source_id);
kfree(pit);
return NULL;
}
- INIT_WORK(&pit->expired, pit_do_work);
+ init_kthread_work(&pit->expired, pit_do_work);
kvm->arch.vpit = pit;
pit->kvm = kvm;
@@ -736,7 +743,7 @@ fail:
kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
kvm_unregister_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
kvm_free_irq_source_id(kvm, pit->irq_source_id);
- destroy_workqueue(pit->wq);
+ kthread_stop(pit->worker_task);
kfree(pit);
return NULL;
}
@@ -756,10 +763,10 @@ void kvm_free_pit(struct kvm *kvm)
mutex_lock(&kvm->arch.vpit->pit_state.lock);
timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
hrtimer_cancel(timer);
- cancel_work_sync(&kvm->arch.vpit->expired);
+ flush_kthread_work(&kvm->arch.vpit->expired);
+ kthread_stop(kvm->arch.vpit->worker_task);
kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id);
mutex_unlock(&kvm->arch.vpit->pit_state.lock);
- destroy_workqueue(kvm->arch.vpit->wq);
kfree(kvm->arch.vpit);
}
}
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index 51a97426e79..fdf40425ea1 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -1,6 +1,8 @@
#ifndef __I8254_H
#define __I8254_H
+#include <linux/kthread.h>
+
#include "iodev.h"
struct kvm_kpit_channel_state {
@@ -39,8 +41,9 @@ struct kvm_pit {
struct kvm_kpit_state pit_state;
int irq_source_id;
struct kvm_irq_mask_notifier mask_notifier;
- struct workqueue_struct *wq;
- struct work_struct expired;
+ struct kthread_worker worker;
+ struct task_struct *worker_task;
+ struct kthread_work expired;
};
#define KVM_PIT_BASE_ADDRESS 0x40
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 858432287ab..93c15743f1e 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -92,6 +92,11 @@ static inline int apic_test_and_clear_vector(int vec, void *bitmap)
return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
}
+static inline int apic_test_vector(int vec, void *bitmap)
+{
+ return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+}
+
static inline void apic_set_vector(int vec, void *bitmap)
{
set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -480,7 +485,6 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
static void apic_set_eoi(struct kvm_lapic *apic)
{
int vector = apic_find_highest_isr(apic);
- int trigger_mode;
/*
* Not every write EOI will has corresponding ISR,
* one example is when Kernel check timer on setup_IO_APIC
@@ -491,12 +495,15 @@ static void apic_set_eoi(struct kvm_lapic *apic)
apic_clear_vector(vector, apic->regs + APIC_ISR);
apic_update_ppr(apic);
- if (apic_test_and_clear_vector(vector, apic->regs + APIC_TMR))
- trigger_mode = IOAPIC_LEVEL_TRIG;
- else
- trigger_mode = IOAPIC_EDGE_TRIG;
- if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI))
+ if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
+ kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
+ int trigger_mode;
+ if (apic_test_vector(vector, apic->regs + APIC_TMR))
+ trigger_mode = IOAPIC_LEVEL_TRIG;
+ else
+ trigger_mode = IOAPIC_EDGE_TRIG;
kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
+ }
kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
}
@@ -1081,6 +1088,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
apic_update_ppr(apic);
vcpu->arch.apic_arb_prio = 0;
+ vcpu->arch.apic_attention = 0;
apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr="
"0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
@@ -1280,7 +1288,7 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
u32 data;
void *vapic;
- if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr)
+ if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
return;
vapic = kmap_atomic(vcpu->arch.apic->vapic_page);
@@ -1297,7 +1305,7 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
struct kvm_lapic *apic;
void *vapic;
- if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr)
+ if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
return;
apic = vcpu->arch.apic;
@@ -1317,10 +1325,11 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
{
- if (!irqchip_in_kernel(vcpu->kvm))
- return;
-
vcpu->arch.apic->vapic_addr = vapic_addr;
+ if (vapic_addr)
+ __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
+ else
+ __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
}
int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 4cb16426884..57e168e27b5 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -135,8 +135,6 @@ module_param(dbg, bool, 0644);
#define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \
| PT64_NX_MASK)
-#define PTE_LIST_EXT 4
-
#define ACC_EXEC_MASK 1
#define ACC_WRITE_MASK PT_WRITABLE_MASK
#define ACC_USER_MASK PT_USER_MASK
@@ -151,6 +149,9 @@ module_param(dbg, bool, 0644);
#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
+/* make pte_list_desc fit well in cache line */
+#define PTE_LIST_EXT 3
+
struct pte_list_desc {
u64 *sptes[PTE_LIST_EXT];
struct pte_list_desc *more;
@@ -550,19 +551,29 @@ static u64 mmu_spte_get_lockless(u64 *sptep)
static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu)
{
- rcu_read_lock();
- atomic_inc(&vcpu->kvm->arch.reader_counter);
-
- /* Increase the counter before walking shadow page table */
- smp_mb__after_atomic_inc();
+ /*
+ * Prevent page table teardown by making any free-er wait during
+ * kvm_flush_remote_tlbs() IPI to all active vcpus.
+ */
+ local_irq_disable();
+ vcpu->mode = READING_SHADOW_PAGE_TABLES;
+ /*
+ * Make sure a following spte read is not reordered ahead of the write
+ * to vcpu->mode.
+ */
+ smp_mb();
}
static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
{
- /* Decrease the counter after walking shadow page table finished */
- smp_mb__before_atomic_dec();
- atomic_dec(&vcpu->kvm->arch.reader_counter);
- rcu_read_unlock();
+ /*
+ * Make sure the write to vcpu->mode is not reordered in front of
+ * reads to sptes. If it does, kvm_commit_zap_page() can see us
+ * OUTSIDE_GUEST_MODE and proceed to free the shadow page table.
+ */
+ smp_mb();
+ vcpu->mode = OUTSIDE_GUEST_MODE;
+ local_irq_enable();
}
static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
@@ -841,32 +852,6 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
return count;
}
-static u64 *pte_list_next(unsigned long *pte_list, u64 *spte)
-{
- struct pte_list_desc *desc;
- u64 *prev_spte;
- int i;
-
- if (!*pte_list)
- return NULL;
- else if (!(*pte_list & 1)) {
- if (!spte)
- return (u64 *)*pte_list;
- return NULL;
- }
- desc = (struct pte_list_desc *)(*pte_list & ~1ul);
- prev_spte = NULL;
- while (desc) {
- for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) {
- if (prev_spte == spte)
- return desc->sptes[i];
- prev_spte = desc->sptes[i];
- }
- desc = desc->more;
- }
- return NULL;
-}
-
static void
pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc,
int i, struct pte_list_desc *prev_desc)
@@ -987,11 +972,6 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
return pte_list_add(vcpu, spte, rmapp);
}
-static u64 *rmap_next(unsigned long *rmapp, u64 *spte)
-{
- return pte_list_next(rmapp, spte);
-}
-
static void rmap_remove(struct kvm *kvm, u64 *spte)
{
struct kvm_mmu_page *sp;
@@ -1004,106 +984,201 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
pte_list_remove(spte, rmapp);
}
+/*
+ * Used by the following functions to iterate through the sptes linked by a
+ * rmap. All fields are private and not assumed to be used outside.
+ */
+struct rmap_iterator {
+ /* private fields */
+ struct pte_list_desc *desc; /* holds the sptep if not NULL */
+ int pos; /* index of the sptep */
+};
+
+/*
+ * Iteration must be started by this function. This should also be used after
+ * removing/dropping sptes from the rmap link because in such cases the
+ * information in the itererator may not be valid.
+ *
+ * Returns sptep if found, NULL otherwise.
+ */
+static u64 *rmap_get_first(unsigned long rmap, struct rmap_iterator *iter)
+{
+ if (!rmap)
+ return NULL;
+
+ if (!(rmap & 1)) {
+ iter->desc = NULL;
+ return (u64 *)rmap;
+ }
+
+ iter->desc = (struct pte_list_desc *)(rmap & ~1ul);
+ iter->pos = 0;
+ return iter->desc->sptes[iter->pos];
+}
+
+/*
+ * Must be used with a valid iterator: e.g. after rmap_get_first().
+ *
+ * Returns sptep if found, NULL otherwise.
+ */
+static u64 *rmap_get_next(struct rmap_iterator *iter)
+{
+ if (iter->desc) {
+ if (iter->pos < PTE_LIST_EXT - 1) {
+ u64 *sptep;
+
+ ++iter->pos;
+ sptep = iter->desc->sptes[iter->pos];
+ if (sptep)
+ return sptep;
+ }
+
+ iter->desc = iter->desc->more;
+
+ if (iter->desc) {
+ iter->pos = 0;
+ /* desc->sptes[0] cannot be NULL */
+ return iter->desc->sptes[iter->pos];
+ }
+ }
+
+ return NULL;
+}
+
static void drop_spte(struct kvm *kvm, u64 *sptep)
{
if (mmu_spte_clear_track_bits(sptep))
rmap_remove(kvm, sptep);
}
-int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn,
- struct kvm_memory_slot *slot)
+static int __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, int level)
{
- unsigned long *rmapp;
- u64 *spte;
- int i, write_protected = 0;
-
- rmapp = __gfn_to_rmap(gfn, PT_PAGE_TABLE_LEVEL, slot);
- spte = rmap_next(rmapp, NULL);
- while (spte) {
- BUG_ON(!(*spte & PT_PRESENT_MASK));
- rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
- if (is_writable_pte(*spte)) {
- mmu_spte_update(spte, *spte & ~PT_WRITABLE_MASK);
- write_protected = 1;
+ u64 *sptep;
+ struct rmap_iterator iter;
+ int write_protected = 0;
+
+ for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
+ BUG_ON(!(*sptep & PT_PRESENT_MASK));
+ rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep);
+
+ if (!is_writable_pte(*sptep)) {
+ sptep = rmap_get_next(&iter);
+ continue;
}
- spte = rmap_next(rmapp, spte);
- }
- /* check for huge page mappings */
- for (i = PT_DIRECTORY_LEVEL;
- i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
- rmapp = __gfn_to_rmap(gfn, i, slot);
- spte = rmap_next(rmapp, NULL);
- while (spte) {
- BUG_ON(!(*spte & PT_PRESENT_MASK));
- BUG_ON(!is_large_pte(*spte));
- pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn);
- if (is_writable_pte(*spte)) {
- drop_spte(kvm, spte);
- --kvm->stat.lpages;
- spte = NULL;
- write_protected = 1;
- }
- spte = rmap_next(rmapp, spte);
+ if (level == PT_PAGE_TABLE_LEVEL) {
+ mmu_spte_update(sptep, *sptep & ~PT_WRITABLE_MASK);
+ sptep = rmap_get_next(&iter);
+ } else {
+ BUG_ON(!is_large_pte(*sptep));
+ drop_spte(kvm, sptep);
+ --kvm->stat.lpages;
+ sptep = rmap_get_first(*rmapp, &iter);
}
+
+ write_protected = 1;
}
return write_protected;
}
+/**
+ * kvm_mmu_write_protect_pt_masked - write protect selected PT level pages
+ * @kvm: kvm instance
+ * @slot: slot to protect
+ * @gfn_offset: start of the BITS_PER_LONG pages we care about
+ * @mask: indicates which pages we should protect
+ *
+ * Used when we do not need to care about huge page mappings: e.g. during dirty
+ * logging we do not have any such mappings.
+ */
+void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ gfn_t gfn_offset, unsigned long mask)
+{
+ unsigned long *rmapp;
+
+ while (mask) {
+ rmapp = &slot->rmap[gfn_offset + __ffs(mask)];
+ __rmap_write_protect(kvm, rmapp, PT_PAGE_TABLE_LEVEL);
+
+ /* clear the first set bit */
+ mask &= mask - 1;
+ }
+}
+
static int rmap_write_protect(struct kvm *kvm, u64 gfn)
{
struct kvm_memory_slot *slot;
+ unsigned long *rmapp;
+ int i;
+ int write_protected = 0;
slot = gfn_to_memslot(kvm, gfn);
- return kvm_mmu_rmap_write_protect(kvm, gfn, slot);
+
+ for (i = PT_PAGE_TABLE_LEVEL;
+ i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
+ rmapp = __gfn_to_rmap(gfn, i, slot);
+ write_protected |= __rmap_write_protect(kvm, rmapp, i);
+ }
+
+ return write_protected;
}
static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
unsigned long data)
{
- u64 *spte;
+ u64 *sptep;
+ struct rmap_iterator iter;
int need_tlb_flush = 0;
- while ((spte = rmap_next(rmapp, NULL))) {
- BUG_ON(!(*spte & PT_PRESENT_MASK));
- rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte);
- drop_spte(kvm, spte);
+ while ((sptep = rmap_get_first(*rmapp, &iter))) {
+ BUG_ON(!(*sptep & PT_PRESENT_MASK));
+ rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, *sptep);
+
+ drop_spte(kvm, sptep);
need_tlb_flush = 1;
}
+
return need_tlb_flush;
}
static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
unsigned long data)
{
+ u64 *sptep;
+ struct rmap_iterator iter;
int need_flush = 0;
- u64 *spte, new_spte;
+ u64 new_spte;
pte_t *ptep = (pte_t *)data;
pfn_t new_pfn;
WARN_ON(pte_huge(*ptep));
new_pfn = pte_pfn(*ptep);
- spte = rmap_next(rmapp, NULL);
- while (spte) {
- BUG_ON(!is_shadow_present_pte(*spte));
- rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte);
+
+ for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
+ BUG_ON(!is_shadow_present_pte(*sptep));
+ rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", sptep, *sptep);
+
need_flush = 1;
+
if (pte_write(*ptep)) {
- drop_spte(kvm, spte);
- spte = rmap_next(rmapp, NULL);
+ drop_spte(kvm, sptep);
+ sptep = rmap_get_first(*rmapp, &iter);
} else {
- new_spte = *spte &~ (PT64_BASE_ADDR_MASK);
+ new_spte = *sptep & ~PT64_BASE_ADDR_MASK;
new_spte |= (u64)new_pfn << PAGE_SHIFT;
new_spte &= ~PT_WRITABLE_MASK;
new_spte &= ~SPTE_HOST_WRITEABLE;
new_spte &= ~shadow_accessed_mask;
- mmu_spte_clear_track_bits(spte);
- mmu_spte_set(spte, new_spte);
- spte = rmap_next(rmapp, spte);
+
+ mmu_spte_clear_track_bits(sptep);
+ mmu_spte_set(sptep, new_spte);
+ sptep = rmap_get_next(&iter);
}
}
+
if (need_flush)
kvm_flush_remote_tlbs(kvm);
@@ -1162,7 +1237,8 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
unsigned long data)
{
- u64 *spte;
+ u64 *sptep;
+ struct rmap_iterator iter;
int young = 0;
/*
@@ -1175,25 +1251,24 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
if (!shadow_accessed_mask)
return kvm_unmap_rmapp(kvm, rmapp, data);
- spte = rmap_next(rmapp, NULL);
- while (spte) {
- int _young;
- u64 _spte = *spte;
- BUG_ON(!(_spte & PT_PRESENT_MASK));
- _young = _spte & PT_ACCESSED_MASK;
- if (_young) {
+ for (sptep = rmap_get_first(*rmapp, &iter); sptep;
+ sptep = rmap_get_next(&iter)) {
+ BUG_ON(!(*sptep & PT_PRESENT_MASK));
+
+ if (*sptep & PT_ACCESSED_MASK) {
young = 1;
- clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte);
+ clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)sptep);
}
- spte = rmap_next(rmapp, spte);
}
+
return young;
}
static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
unsigned long data)
{
- u64 *spte;
+ u64 *sptep;
+ struct rmap_iterator iter;
int young = 0;
/*
@@ -1204,16 +1279,14 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
if (!shadow_accessed_mask)
goto out;
- spte = rmap_next(rmapp, NULL);
- while (spte) {
- u64 _spte = *spte;
- BUG_ON(!(_spte & PT_PRESENT_MASK));
- young = _spte & PT_ACCESSED_MASK;
- if (young) {
+ for (sptep = rmap_get_first(*rmapp, &iter); sptep;
+ sptep = rmap_get_next(&iter)) {
+ BUG_ON(!(*sptep & PT_PRESENT_MASK));
+
+ if (*sptep & PT_ACCESSED_MASK) {
young = 1;
break;
}
- spte = rmap_next(rmapp, spte);
}
out:
return young;
@@ -1865,10 +1938,11 @@ static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte)
static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
{
- u64 *parent_pte;
+ u64 *sptep;
+ struct rmap_iterator iter;
- while ((parent_pte = pte_list_next(&sp->parent_ptes, NULL)))
- drop_parent_pte(sp, parent_pte);
+ while ((sptep = rmap_get_first(sp->parent_ptes, &iter)))
+ drop_parent_pte(sp, sptep);
}
static int mmu_zap_unsync_children(struct kvm *kvm,
@@ -1925,30 +1999,6 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
return ret;
}
-static void kvm_mmu_isolate_pages(struct list_head *invalid_list)
-{
- struct kvm_mmu_page *sp;
-
- list_for_each_entry(sp, invalid_list, link)
- kvm_mmu_isolate_page(sp);
-}
-
-static void free_pages_rcu(struct rcu_head *head)
-{
- struct kvm_mmu_page *next, *sp;
-
- sp = container_of(head, struct kvm_mmu_page, rcu);
- while (sp) {
- if (!list_empty(&sp->link))
- next = list_first_entry(&sp->link,
- struct kvm_mmu_page, link);
- else
- next = NULL;
- kvm_mmu_free_page(sp);
- sp = next;
- }
-}
-
static void kvm_mmu_commit_zap_page(struct kvm *kvm,
struct list_head *invalid_list)
{
@@ -1957,17 +2007,17 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
if (list_empty(invalid_list))
return;
- kvm_flush_remote_tlbs(kvm);
-
- if (atomic_read(&kvm->arch.reader_counter)) {
- kvm_mmu_isolate_pages(invalid_list);
- sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
- list_del_init(invalid_list);
+ /*
+ * wmb: make sure everyone sees our modifications to the page tables
+ * rmb: make sure we see changes to vcpu->mode
+ */
+ smp_mb();
- trace_kvm_mmu_delay_free_pages(sp);
- call_rcu(&sp->rcu, free_pages_rcu);
- return;
- }
+ /*
+ * Wait for all vcpus to exit guest mode and/or lockless shadow
+ * page table walks.
+ */
+ kvm_flush_remote_tlbs(kvm);
do {
sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
@@ -1975,7 +2025,6 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
kvm_mmu_isolate_page(sp);
kvm_mmu_free_page(sp);
} while (!list_empty(invalid_list));
-
}
/*
@@ -2546,8 +2595,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
*gfnp = gfn;
kvm_release_pfn_clean(pfn);
pfn &= ~mask;
- if (!get_page_unless_zero(pfn_to_page(pfn)))
- BUG();
+ kvm_get_pfn(pfn);
*pfnp = pfn;
}
}
@@ -3554,7 +3602,7 @@ static bool detect_write_flooding(struct kvm_mmu_page *sp)
* Skip write-flooding detected for the sp whose level is 1, because
* it can become unsync, then the guest page is not write-protected.
*/
- if (sp->role.level == 1)
+ if (sp->role.level == PT_PAGE_TABLE_LEVEL)
return false;
return ++sp->write_flooding_count >= 3;
@@ -3886,6 +3934,9 @@ static void kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm,
{
struct kvm_mmu_page *page;
+ if (list_empty(&kvm->arch.active_mmu_pages))
+ return;
+
page = container_of(kvm->arch.active_mmu_pages.prev,
struct kvm_mmu_page, link);
kvm_mmu_prepare_zap_page(kvm, page, invalid_list);
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index 715da5a19a5..7d7d0b9e23e 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -192,7 +192,8 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
{
struct kvm_memory_slot *slot;
unsigned long *rmapp;
- u64 *spte;
+ u64 *sptep;
+ struct rmap_iterator iter;
if (sp->role.direct || sp->unsync || sp->role.invalid)
return;
@@ -200,13 +201,12 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
slot = gfn_to_memslot(kvm, sp->gfn);
rmapp = &slot->rmap[sp->gfn - slot->base_gfn];
- spte = rmap_next(rmapp, NULL);
- while (spte) {
- if (is_writable_pte(*spte))
+ for (sptep = rmap_get_first(*rmapp, &iter); sptep;
+ sptep = rmap_get_next(&iter)) {
+ if (is_writable_pte(*sptep))
audit_printk(kvm, "shadow page has writable "
"mappings: gfn %llx role %x\n",
sp->gfn, sp->role.word);
- spte = rmap_next(rmapp, spte);
}
}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index df5a70311be..34f970937ef 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -658,7 +658,7 @@ static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
{
int offset = 0;
- WARN_ON(sp->role.level != 1);
+ WARN_ON(sp->role.level != PT_PAGE_TABLE_LEVEL);
if (PTTYPE == 32)
offset = sp->role.quadrant << PT64_LEVEL_BITS;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e334389e1c7..f75af406b26 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -22,6 +22,7 @@
#include "x86.h"
#include <linux/module.h>
+#include <linux/mod_devicetable.h>
#include <linux/kernel.h>
#include <linux/vmalloc.h>
#include <linux/highmem.h>
@@ -42,6 +43,12 @@
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
+static const struct x86_cpu_id svm_cpu_id[] = {
+ X86_FEATURE_MATCH(X86_FEATURE_SVM),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
+
#define IOPM_ALLOC_ORDER 2
#define MSRPM_ALLOC_ORDER 1
@@ -3240,6 +3247,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
svm_clear_vintr(svm);
svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
mark_dirty(svm->vmcb, VMCB_INTR);
+ ++svm->vcpu.stat.irq_window_exits;
/*
* If the user space waits to inject interrupts, exit as soon as
* possible
@@ -3247,7 +3255,6 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
if (!irqchip_in_kernel(svm->vcpu.kvm) &&
kvm_run->request_interrupt_window &&
!kvm_cpu_has_interrupt(&svm->vcpu)) {
- ++svm->vcpu.stat.irq_window_exits;
kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
return 0;
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4ff0ab9bc3c..32eb5886629 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -27,6 +27,7 @@
#include <linux/highmem.h>
#include <linux/sched.h>
#include <linux/moduleparam.h>
+#include <linux/mod_devicetable.h>
#include <linux/ftrace_event.h>
#include <linux/slab.h>
#include <linux/tboot.h>
@@ -51,6 +52,12 @@
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
+static const struct x86_cpu_id vmx_cpu_id[] = {
+ X86_FEATURE_MATCH(X86_FEATURE_VMX),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
+
static bool __read_mostly enable_vpid = 1;
module_param_named(vpid, enable_vpid, bool, 0444);
@@ -386,6 +393,9 @@ struct vcpu_vmx {
struct {
int loaded;
u16 fs_sel, gs_sel, ldt_sel;
+#ifdef CONFIG_X86_64
+ u16 ds_sel, es_sel;
+#endif
int gs_ldt_reload_needed;
int fs_reload_needed;
} host_state;
@@ -1411,6 +1421,11 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
}
#ifdef CONFIG_X86_64
+ savesegment(ds, vmx->host_state.ds_sel);
+ savesegment(es, vmx->host_state.es_sel);
+#endif
+
+#ifdef CONFIG_X86_64
vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
#else
@@ -1450,6 +1465,19 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
}
if (vmx->host_state.fs_reload_needed)
loadsegment(fs, vmx->host_state.fs_sel);
+#ifdef CONFIG_X86_64
+ if (unlikely(vmx->host_state.ds_sel | vmx->host_state.es_sel)) {
+ loadsegment(ds, vmx->host_state.ds_sel);
+ loadsegment(es, vmx->host_state.es_sel);
+ }
+#else
+ /*
+ * The sysexit path does not restore ds/es, so we must set them to
+ * a reasonable value ourselves.
+ */
+ loadsegment(ds, __USER_DS);
+ loadsegment(es, __USER_DS);
+#endif
reload_tss();
#ifdef CONFIG_X86_64
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
@@ -3633,8 +3661,18 @@ static void vmx_set_constant_host_state(void)
vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */
vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
+#ifdef CONFIG_X86_64
+ /*
+ * Load null selectors, so we can avoid reloading them in
+ * __vmx_load_host_state(), in case userspace uses the null selectors
+ * too (the expected case).
+ */
+ vmcs_write16(HOST_DS_SELECTOR, 0);
+ vmcs_write16(HOST_ES_SELECTOR, 0);
+#else
vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */
+#endif
vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
@@ -6256,7 +6294,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
}
}
- asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
vmx->loaded_vmcs->launched = 1;
vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
@@ -6343,7 +6380,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
return &vmx->vcpu;
free_vmcs:
- free_vmcs(vmx->loaded_vmcs->vmcs);
+ free_loaded_vmcs(vmx->loaded_vmcs);
free_msrs:
kfree(vmx->guest_msrs);
uninit_vcpu:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 185a2b823a2..be6d54929fa 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2147,6 +2147,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_ASYNC_PF:
case KVM_CAP_GET_TSC_KHZ:
case KVM_CAP_PCI_2_3:
+ case KVM_CAP_KVMCLOCK_CTRL:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -2597,6 +2598,23 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
return r;
}
+/*
+ * kvm_set_guest_paused() indicates to the guest kernel that it has been
+ * stopped by the hypervisor. This function will be called from the host only.
+ * EINVAL is returned when the host attempts to set the flag for a guest that
+ * does not support pv clocks.
+ */
+static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
+{
+ struct pvclock_vcpu_time_info *src = &vcpu->arch.hv_clock;
+ if (!vcpu->arch.time_page)
+ return -EINVAL;
+ src->flags |= PVCLOCK_GUEST_STOPPED;
+ mark_page_dirty(vcpu->kvm, vcpu->arch.time >> PAGE_SHIFT);
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+ return 0;
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -2873,6 +2891,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = vcpu->arch.virtual_tsc_khz;
goto out;
}
+ case KVM_KVMCLOCK_CTRL: {
+ r = kvm_set_guest_paused(vcpu);
+ goto out;
+ }
default:
r = -EINVAL;
}
@@ -3045,57 +3067,32 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
}
/**
- * write_protect_slot - write protect a slot for dirty logging
- * @kvm: the kvm instance
- * @memslot: the slot we protect
- * @dirty_bitmap: the bitmap indicating which pages are dirty
- * @nr_dirty_pages: the number of dirty pages
+ * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
+ * @kvm: kvm instance
+ * @log: slot id and address to which we copy the log
*
- * We have two ways to find all sptes to protect:
- * 1. Use kvm_mmu_slot_remove_write_access() which walks all shadow pages and
- * checks ones that have a spte mapping a page in the slot.
- * 2. Use kvm_mmu_rmap_write_protect() for each gfn found in the bitmap.
+ * We need to keep it in mind that VCPU threads can write to the bitmap
+ * concurrently. So, to avoid losing data, we keep the following order for
+ * each bit:
*
- * Generally speaking, if there are not so many dirty pages compared to the
- * number of shadow pages, we should use the latter.
+ * 1. Take a snapshot of the bit and clear it if needed.
+ * 2. Write protect the corresponding page.
+ * 3. Flush TLB's if needed.
+ * 4. Copy the snapshot to the userspace.
*
- * Note that letting others write into a page marked dirty in the old bitmap
- * by using the remaining tlb entry is not a problem. That page will become
- * write protected again when we flush the tlb and then be reported dirty to
- * the user space by copying the old bitmap.
- */
-static void write_protect_slot(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- unsigned long *dirty_bitmap,
- unsigned long nr_dirty_pages)
-{
- spin_lock(&kvm->mmu_lock);
-
- /* Not many dirty pages compared to # of shadow pages. */
- if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) {
- unsigned long gfn_offset;
-
- for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) {
- unsigned long gfn = memslot->base_gfn + gfn_offset;
-
- kvm_mmu_rmap_write_protect(kvm, gfn, memslot);
- }
- kvm_flush_remote_tlbs(kvm);
- } else
- kvm_mmu_slot_remove_write_access(kvm, memslot->id);
-
- spin_unlock(&kvm->mmu_lock);
-}
-
-/*
- * Get (and clear) the dirty memory log for a memory slot.
+ * Between 2 and 3, the guest may write to the page using the remaining TLB
+ * entry. This is not a problem because the page will be reported dirty at
+ * step 4 using the snapshot taken before and step 3 ensures that successive
+ * writes will be logged for the next call.
*/
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
- struct kvm_dirty_log *log)
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
{
int r;
struct kvm_memory_slot *memslot;
- unsigned long n, nr_dirty_pages;
+ unsigned long n, i;
+ unsigned long *dirty_bitmap;
+ unsigned long *dirty_bitmap_buffer;
+ bool is_dirty = false;
mutex_lock(&kvm->slots_lock);
@@ -3104,49 +3101,42 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
goto out;
memslot = id_to_memslot(kvm->memslots, log->slot);
+
+ dirty_bitmap = memslot->dirty_bitmap;
r = -ENOENT;
- if (!memslot->dirty_bitmap)
+ if (!dirty_bitmap)
goto out;
n = kvm_dirty_bitmap_bytes(memslot);
- nr_dirty_pages = memslot->nr_dirty_pages;
- /* If nothing is dirty, don't bother messing with page tables. */
- if (nr_dirty_pages) {
- struct kvm_memslots *slots, *old_slots;
- unsigned long *dirty_bitmap, *dirty_bitmap_head;
+ dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
+ memset(dirty_bitmap_buffer, 0, n);
- dirty_bitmap = memslot->dirty_bitmap;
- dirty_bitmap_head = memslot->dirty_bitmap_head;
- if (dirty_bitmap == dirty_bitmap_head)
- dirty_bitmap_head += n / sizeof(long);
- memset(dirty_bitmap_head, 0, n);
+ spin_lock(&kvm->mmu_lock);
- r = -ENOMEM;
- slots = kmemdup(kvm->memslots, sizeof(*kvm->memslots), GFP_KERNEL);
- if (!slots)
- goto out;
+ for (i = 0; i < n / sizeof(long); i++) {
+ unsigned long mask;
+ gfn_t offset;
- memslot = id_to_memslot(slots, log->slot);
- memslot->nr_dirty_pages = 0;
- memslot->dirty_bitmap = dirty_bitmap_head;
- update_memslots(slots, NULL);
+ if (!dirty_bitmap[i])
+ continue;
- old_slots = kvm->memslots;
- rcu_assign_pointer(kvm->memslots, slots);
- synchronize_srcu_expedited(&kvm->srcu);
- kfree(old_slots);
+ is_dirty = true;
- write_protect_slot(kvm, memslot, dirty_bitmap, nr_dirty_pages);
+ mask = xchg(&dirty_bitmap[i], 0);
+ dirty_bitmap_buffer[i] = mask;
- r = -EFAULT;
- if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n))
- goto out;
- } else {
- r = -EFAULT;
- if (clear_user(log->dirty_bitmap, n))
- goto out;
+ offset = i * BITS_PER_LONG;
+ kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
}
+ if (is_dirty)
+ kvm_flush_remote_tlbs(kvm);
+
+ spin_unlock(&kvm->mmu_lock);
+
+ r = -EFAULT;
+ if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
+ goto out;
r = 0;
out:
@@ -3728,9 +3718,8 @@ struct read_write_emulator_ops {
static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
{
if (vcpu->mmio_read_completed) {
- memcpy(val, vcpu->mmio_data, bytes);
trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
- vcpu->mmio_phys_addr, *(u64 *)val);
+ vcpu->mmio_fragments[0].gpa, *(u64 *)val);
vcpu->mmio_read_completed = 0;
return 1;
}
@@ -3766,8 +3755,9 @@ static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
void *val, int bytes)
{
- memcpy(vcpu->mmio_data, val, bytes);
- memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8);
+ struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
+
+ memcpy(vcpu->run->mmio.data, frag->data, frag->len);
return X86EMUL_CONTINUE;
}
@@ -3794,10 +3784,7 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
gpa_t gpa;
int handled, ret;
bool write = ops->write;
-
- if (ops->read_write_prepare &&
- ops->read_write_prepare(vcpu, val, bytes))
- return X86EMUL_CONTINUE;
+ struct kvm_mmio_fragment *frag;
ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
@@ -3823,15 +3810,19 @@ mmio:
bytes -= handled;
val += handled;
- vcpu->mmio_needed = 1;
- vcpu->run->exit_reason = KVM_EXIT_MMIO;
- vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
- vcpu->mmio_size = bytes;
- vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
- vcpu->run->mmio.is_write = vcpu->mmio_is_write = write;
- vcpu->mmio_index = 0;
+ while (bytes) {
+ unsigned now = min(bytes, 8U);
- return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
+ frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
+ frag->gpa = gpa;
+ frag->data = val;
+ frag->len = now;
+
+ gpa += now;
+ val += now;
+ bytes -= now;
+ }
+ return X86EMUL_CONTINUE;
}
int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
@@ -3840,10 +3831,18 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
struct read_write_emulator_ops *ops)
{
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+ gpa_t gpa;
+ int rc;
+
+ if (ops->read_write_prepare &&
+ ops->read_write_prepare(vcpu, val, bytes))
+ return X86EMUL_CONTINUE;
+
+ vcpu->mmio_nr_fragments = 0;
/* Crossing a page boundary? */
if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
- int rc, now;
+ int now;
now = -addr & ~PAGE_MASK;
rc = emulator_read_write_onepage(addr, val, now, exception,
@@ -3856,8 +3855,25 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
bytes -= now;
}
- return emulator_read_write_onepage(addr, val, bytes, exception,
- vcpu, ops);
+ rc = emulator_read_write_onepage(addr, val, bytes, exception,
+ vcpu, ops);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ if (!vcpu->mmio_nr_fragments)
+ return rc;
+
+ gpa = vcpu->mmio_fragments[0].gpa;
+
+ vcpu->mmio_needed = 1;
+ vcpu->mmio_cur_fragment = 0;
+
+ vcpu->run->mmio.len = vcpu->mmio_fragments[0].len;
+ vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
+ vcpu->run->exit_reason = KVM_EXIT_MMIO;
+ vcpu->run->mmio.phys_addr = gpa;
+
+ return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
}
static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
@@ -5263,10 +5279,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_deliver_pmi(vcpu);
}
- r = kvm_mmu_reload(vcpu);
- if (unlikely(r))
- goto out;
-
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
inject_pending_event(vcpu);
@@ -5282,6 +5294,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
}
}
+ r = kvm_mmu_reload(vcpu);
+ if (unlikely(r)) {
+ kvm_x86_ops->cancel_injection(vcpu);
+ goto out;
+ }
+
preempt_disable();
kvm_x86_ops->prepare_guest_switch(vcpu);
@@ -5456,33 +5474,55 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
return r;
}
+/*
+ * Implements the following, as a state machine:
+ *
+ * read:
+ * for each fragment
+ * write gpa, len
+ * exit
+ * copy data
+ * execute insn
+ *
+ * write:
+ * for each fragment
+ * write gpa, len
+ * copy data
+ * exit
+ */
static int complete_mmio(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
+ struct kvm_mmio_fragment *frag;
int r;
if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
return 1;
if (vcpu->mmio_needed) {
- vcpu->mmio_needed = 0;
+ /* Complete previous fragment */
+ frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
if (!vcpu->mmio_is_write)
- memcpy(vcpu->mmio_data + vcpu->mmio_index,
- run->mmio.data, 8);
- vcpu->mmio_index += 8;
- if (vcpu->mmio_index < vcpu->mmio_size) {
- run->exit_reason = KVM_EXIT_MMIO;
- run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index;
- memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8);
- run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8);
- run->mmio.is_write = vcpu->mmio_is_write;
- vcpu->mmio_needed = 1;
- return 0;
+ memcpy(frag->data, run->mmio.data, frag->len);
+ if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
+ vcpu->mmio_needed = 0;
+ if (vcpu->mmio_is_write)
+ return 1;
+ vcpu->mmio_read_completed = 1;
+ goto done;
}
+ /* Initiate next fragment */
+ ++frag;
+ run->exit_reason = KVM_EXIT_MMIO;
+ run->mmio.phys_addr = frag->gpa;
if (vcpu->mmio_is_write)
- return 1;
- vcpu->mmio_read_completed = 1;
+ memcpy(run->mmio.data, frag->data, frag->len);
+ run->mmio.len = frag->len;
+ run->mmio.is_write = vcpu->mmio_is_write;
+ return 0;
+
}
+done:
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
@@ -6399,21 +6439,9 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
kvm_cpu_has_interrupt(vcpu));
}
-void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
{
- int me;
- int cpu = vcpu->cpu;
-
- if (waitqueue_active(&vcpu->wq)) {
- wake_up_interruptible(&vcpu->wq);
- ++vcpu->stat.halt_wakeup;
- }
-
- me = get_cpu();
- if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
- if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE)
- smp_send_reschedule(cpu);
- put_cpu();
+ return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
}
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index cb80c293cdd..3d1134ddb88 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -64,7 +64,7 @@ static inline int is_pse(struct kvm_vcpu *vcpu)
static inline int is_paging(struct kvm_vcpu *vcpu)
{
- return kvm_read_cr0_bits(vcpu, X86_CR0_PG);
+ return likely(kvm_read_cr0_bits(vcpu, X86_CR0_PG));
}
static inline u32 bit(int bitno)
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index 78d16a554db..2af5df3ade7 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -28,6 +28,7 @@
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/errno.h>
+#include <asm/asm.h>
/*
* computes a partial checksum, e.g. for TCP/UDP fragments
@@ -282,15 +283,11 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst,
#define SRC(y...) \
9999: y; \
- .section __ex_table, "a"; \
- .long 9999b, 6001f ; \
- .previous
+ _ASM_EXTABLE(9999b, 6001f)
#define DST(y...) \
9999: y; \
- .section __ex_table, "a"; \
- .long 9999b, 6002f ; \
- .previous
+ _ASM_EXTABLE(9999b, 6002f)
#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 024840266ba..5b2995f4557 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -16,6 +16,7 @@
#include <asm/thread_info.h>
#include <asm/cpufeature.h>
#include <asm/alternative-asm.h>
+#include <asm/asm.h>
/*
* By placing feature2 after feature1 in altinstructions section, we logically
@@ -63,11 +64,8 @@
jmp copy_user_handle_tail
.previous
- .section __ex_table,"a"
- .align 8
- .quad 100b,103b
- .quad 101b,103b
- .previous
+ _ASM_EXTABLE(100b,103b)
+ _ASM_EXTABLE(101b,103b)
#endif
.endm
@@ -191,29 +189,26 @@ ENTRY(copy_user_generic_unrolled)
60: jmp copy_user_handle_tail /* ecx is zerorest also */
.previous
- .section __ex_table,"a"
- .align 8
- .quad 1b,30b
- .quad 2b,30b
- .quad 3b,30b
- .quad 4b,30b
- .quad 5b,30b
- .quad 6b,30b
- .quad 7b,30b
- .quad 8b,30b
- .quad 9b,30b
- .quad 10b,30b
- .quad 11b,30b
- .quad 12b,30b
- .quad 13b,30b
- .quad 14b,30b
- .quad 15b,30b
- .quad 16b,30b
- .quad 18b,40b
- .quad 19b,40b
- .quad 21b,50b
- .quad 22b,50b
- .previous
+ _ASM_EXTABLE(1b,30b)
+ _ASM_EXTABLE(2b,30b)
+ _ASM_EXTABLE(3b,30b)
+ _ASM_EXTABLE(4b,30b)
+ _ASM_EXTABLE(5b,30b)
+ _ASM_EXTABLE(6b,30b)
+ _ASM_EXTABLE(7b,30b)
+ _ASM_EXTABLE(8b,30b)
+ _ASM_EXTABLE(9b,30b)
+ _ASM_EXTABLE(10b,30b)
+ _ASM_EXTABLE(11b,30b)
+ _ASM_EXTABLE(12b,30b)
+ _ASM_EXTABLE(13b,30b)
+ _ASM_EXTABLE(14b,30b)
+ _ASM_EXTABLE(15b,30b)
+ _ASM_EXTABLE(16b,30b)
+ _ASM_EXTABLE(18b,40b)
+ _ASM_EXTABLE(19b,40b)
+ _ASM_EXTABLE(21b,50b)
+ _ASM_EXTABLE(22b,50b)
CFI_ENDPROC
ENDPROC(copy_user_generic_unrolled)
@@ -259,11 +254,8 @@ ENTRY(copy_user_generic_string)
jmp copy_user_handle_tail
.previous
- .section __ex_table,"a"
- .align 8
- .quad 1b,11b
- .quad 3b,12b
- .previous
+ _ASM_EXTABLE(1b,11b)
+ _ASM_EXTABLE(3b,12b)
CFI_ENDPROC
ENDPROC(copy_user_generic_string)
@@ -294,9 +286,6 @@ ENTRY(copy_user_enhanced_fast_string)
jmp copy_user_handle_tail
.previous
- .section __ex_table,"a"
- .align 8
- .quad 1b,12b
- .previous
+ _ASM_EXTABLE(1b,12b)
CFI_ENDPROC
ENDPROC(copy_user_enhanced_fast_string)
diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S
index cb0c112386f..cacddc7163e 100644
--- a/arch/x86/lib/copy_user_nocache_64.S
+++ b/arch/x86/lib/copy_user_nocache_64.S
@@ -14,6 +14,7 @@
#include <asm/current.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
+#include <asm/asm.h>
.macro ALIGN_DESTINATION
#ifdef FIX_ALIGNMENT
@@ -36,11 +37,8 @@
jmp copy_user_handle_tail
.previous
- .section __ex_table,"a"
- .align 8
- .quad 100b,103b
- .quad 101b,103b
- .previous
+ _ASM_EXTABLE(100b,103b)
+ _ASM_EXTABLE(101b,103b)
#endif
.endm
@@ -111,27 +109,25 @@ ENTRY(__copy_user_nocache)
jmp copy_user_handle_tail
.previous
- .section __ex_table,"a"
- .quad 1b,30b
- .quad 2b,30b
- .quad 3b,30b
- .quad 4b,30b
- .quad 5b,30b
- .quad 6b,30b
- .quad 7b,30b
- .quad 8b,30b
- .quad 9b,30b
- .quad 10b,30b
- .quad 11b,30b
- .quad 12b,30b
- .quad 13b,30b
- .quad 14b,30b
- .quad 15b,30b
- .quad 16b,30b
- .quad 18b,40b
- .quad 19b,40b
- .quad 21b,50b
- .quad 22b,50b
- .previous
+ _ASM_EXTABLE(1b,30b)
+ _ASM_EXTABLE(2b,30b)
+ _ASM_EXTABLE(3b,30b)
+ _ASM_EXTABLE(4b,30b)
+ _ASM_EXTABLE(5b,30b)
+ _ASM_EXTABLE(6b,30b)
+ _ASM_EXTABLE(7b,30b)
+ _ASM_EXTABLE(8b,30b)
+ _ASM_EXTABLE(9b,30b)
+ _ASM_EXTABLE(10b,30b)
+ _ASM_EXTABLE(11b,30b)
+ _ASM_EXTABLE(12b,30b)
+ _ASM_EXTABLE(13b,30b)
+ _ASM_EXTABLE(14b,30b)
+ _ASM_EXTABLE(15b,30b)
+ _ASM_EXTABLE(16b,30b)
+ _ASM_EXTABLE(18b,40b)
+ _ASM_EXTABLE(19b,40b)
+ _ASM_EXTABLE(21b,50b)
+ _ASM_EXTABLE(22b,50b)
CFI_ENDPROC
ENDPROC(__copy_user_nocache)
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
index fb903b758da..2419d5fefae 100644
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -8,6 +8,7 @@
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/errno.h>
+#include <asm/asm.h>
/*
* Checksum copy with exception handling.
@@ -31,26 +32,17 @@
.macro source
10:
- .section __ex_table, "a"
- .align 8
- .quad 10b, .Lbad_source
- .previous
+ _ASM_EXTABLE(10b, .Lbad_source)
.endm
.macro dest
20:
- .section __ex_table, "a"
- .align 8
- .quad 20b, .Lbad_dest
- .previous
+ _ASM_EXTABLE(20b, .Lbad_dest)
.endm
.macro ignore L=.Lignore
30:
- .section __ex_table, "a"
- .align 8
- .quad 30b, \L
- .previous
+ _ASM_EXTABLE(30b, \L)
.endm
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c
index 459b58a8a15..25b7ae8d058 100644
--- a/arch/x86/lib/csum-wrappers_64.c
+++ b/arch/x86/lib/csum-wrappers_64.c
@@ -115,7 +115,7 @@ EXPORT_SYMBOL(csum_partial_copy_to_user);
* @src: source address
* @dst: destination address
* @len: number of bytes to be copied.
- * @isum: initial sum that is added into the result (32bit unfolded)
+ * @sum: initial sum that is added into the result (32bit unfolded)
*
* Returns an 32bit unfolded checksum of the buffer.
*/
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 51f1504cddd..b33b1fb1e6d 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -95,10 +95,9 @@ bad_get_user:
CFI_ENDPROC
END(bad_get_user)
-.section __ex_table,"a"
- _ASM_PTR 1b,bad_get_user
- _ASM_PTR 2b,bad_get_user
- _ASM_PTR 3b,bad_get_user
+ _ASM_EXTABLE(1b,bad_get_user)
+ _ASM_EXTABLE(2b,bad_get_user)
+ _ASM_EXTABLE(3b,bad_get_user)
#ifdef CONFIG_X86_64
- _ASM_PTR 4b,bad_get_user
+ _ASM_EXTABLE(4b,bad_get_user)
#endif
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index 36b0d15ae6e..7f951c8f76c 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -86,12 +86,10 @@ bad_put_user:
EXIT
END(bad_put_user)
-.section __ex_table,"a"
- _ASM_PTR 1b,bad_put_user
- _ASM_PTR 2b,bad_put_user
- _ASM_PTR 3b,bad_put_user
- _ASM_PTR 4b,bad_put_user
+ _ASM_EXTABLE(1b,bad_put_user)
+ _ASM_EXTABLE(2b,bad_put_user)
+ _ASM_EXTABLE(3b,bad_put_user)
+ _ASM_EXTABLE(4b,bad_put_user)
#ifdef CONFIG_X86_32
- _ASM_PTR 5b,bad_put_user
+ _ASM_EXTABLE(5b,bad_put_user)
#endif
-.previous
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index d6ae30bbd7b..4f74d94c8d9 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -8,6 +8,7 @@
#include <linux/module.h>
#include <asm/word-at-a-time.h>
+#include <linux/sched.h>
/*
* best effort, GUP based copy_from_user() that is NMI-safe
@@ -21,6 +22,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
void *map;
int ret;
+ if (__range_not_ok(from, n, TASK_SIZE))
+ return len;
+
do {
ret = __get_user_pages_fast(addr, 1, 0, &page);
if (!ret)
@@ -43,104 +47,3 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
return len;
}
EXPORT_SYMBOL_GPL(copy_from_user_nmi);
-
-static inline unsigned long count_bytes(unsigned long mask)
-{
- mask = (mask - 1) & ~mask;
- mask >>= 7;
- return count_masked_bytes(mask);
-}
-
-/*
- * Do a strncpy, return length of string without final '\0'.
- * 'count' is the user-supplied count (return 'count' if we
- * hit it), 'max' is the address space maximum (and we return
- * -EFAULT if we hit it).
- */
-static inline long do_strncpy_from_user(char *dst, const char __user *src, long count, unsigned long max)
-{
- long res = 0;
-
- /*
- * Truncate 'max' to the user-specified limit, so that
- * we only have one limit we need to check in the loop
- */
- if (max > count)
- max = count;
-
- while (max >= sizeof(unsigned long)) {
- unsigned long c;
-
- /* Fall back to byte-at-a-time if we get a page fault */
- if (unlikely(__get_user(c,(unsigned long __user *)(src+res))))
- break;
- /* This can write a few bytes past the NUL character, but that's ok */
- *(unsigned long *)(dst+res) = c;
- c = has_zero(c);
- if (c)
- return res + count_bytes(c);
- res += sizeof(unsigned long);
- max -= sizeof(unsigned long);
- }
-
- while (max) {
- char c;
-
- if (unlikely(__get_user(c,src+res)))
- return -EFAULT;
- dst[res] = c;
- if (!c)
- return res;
- res++;
- max--;
- }
-
- /*
- * Uhhuh. We hit 'max'. But was that the user-specified maximum
- * too? If so, that's ok - we got as much as the user asked for.
- */
- if (res >= count)
- return res;
-
- /*
- * Nope: we hit the address space limit, and we still had more
- * characters the caller would have wanted. That's an EFAULT.
- */
- return -EFAULT;
-}
-
-/**
- * strncpy_from_user: - Copy a NUL terminated string from userspace.
- * @dst: Destination address, in kernel space. This buffer must be at
- * least @count bytes long.
- * @src: Source address, in user space.
- * @count: Maximum number of bytes to copy, including the trailing NUL.
- *
- * Copies a NUL-terminated string from userspace to kernel space.
- *
- * On success, returns the length of the string (not including the trailing
- * NUL).
- *
- * If access to userspace fails, returns -EFAULT (some data may have been
- * copied).
- *
- * If @count is smaller than the length of the string, copies @count bytes
- * and returns @count.
- */
-long
-strncpy_from_user(char *dst, const char __user *src, long count)
-{
- unsigned long max_addr, src_addr;
-
- if (unlikely(count <= 0))
- return 0;
-
- max_addr = current_thread_info()->addr_limit.seg;
- src_addr = (unsigned long)src;
- if (likely(src_addr < max_addr)) {
- unsigned long max = max_addr - src_addr;
- return do_strncpy_from_user(dst, src, count, max);
- }
- return -EFAULT;
-}
-EXPORT_SYMBOL(strncpy_from_user);
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index ef2a6a5d78e..1781b2f950e 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -13,6 +13,7 @@
#include <linux/interrupt.h>
#include <asm/uaccess.h>
#include <asm/mmx.h>
+#include <asm/asm.h>
#ifdef CONFIG_X86_INTEL_USERCOPY
/*
@@ -94,50 +95,6 @@ __clear_user(void __user *to, unsigned long n)
}
EXPORT_SYMBOL(__clear_user);
-/**
- * strnlen_user: - Get the size of a string in user space.
- * @s: The string to measure.
- * @n: The maximum valid length
- *
- * Get the size of a NUL-terminated string in user space.
- *
- * Returns the size of the string INCLUDING the terminating NUL.
- * On exception, returns 0.
- * If the string is too long, returns a value greater than @n.
- */
-long strnlen_user(const char __user *s, long n)
-{
- unsigned long mask = -__addr_ok(s);
- unsigned long res, tmp;
-
- might_fault();
-
- __asm__ __volatile__(
- " testl %0, %0\n"
- " jz 3f\n"
- " andl %0,%%ecx\n"
- "0: repne; scasb\n"
- " setne %%al\n"
- " subl %%ecx,%0\n"
- " addl %0,%%eax\n"
- "1:\n"
- ".section .fixup,\"ax\"\n"
- "2: xorl %%eax,%%eax\n"
- " jmp 1b\n"
- "3: movb $1,%%al\n"
- " jmp 1b\n"
- ".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 0b,2b\n"
- ".previous"
- :"=&r" (n), "=&D" (s), "=&a" (res), "=&c" (tmp)
- :"0" (n), "1" (s), "2" (0), "3" (mask)
- :"cc");
- return res & mask;
-}
-EXPORT_SYMBOL(strnlen_user);
-
#ifdef CONFIG_X86_INTEL_USERCOPY
static unsigned long
__copy_user_intel(void __user *to, const void *from, unsigned long size)
@@ -199,47 +156,44 @@ __copy_user_intel(void __user *to, const void *from, unsigned long size)
"101: lea 0(%%eax,%0,4),%0\n"
" jmp 100b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 1b,100b\n"
- " .long 2b,100b\n"
- " .long 3b,100b\n"
- " .long 4b,100b\n"
- " .long 5b,100b\n"
- " .long 6b,100b\n"
- " .long 7b,100b\n"
- " .long 8b,100b\n"
- " .long 9b,100b\n"
- " .long 10b,100b\n"
- " .long 11b,100b\n"
- " .long 12b,100b\n"
- " .long 13b,100b\n"
- " .long 14b,100b\n"
- " .long 15b,100b\n"
- " .long 16b,100b\n"
- " .long 17b,100b\n"
- " .long 18b,100b\n"
- " .long 19b,100b\n"
- " .long 20b,100b\n"
- " .long 21b,100b\n"
- " .long 22b,100b\n"
- " .long 23b,100b\n"
- " .long 24b,100b\n"
- " .long 25b,100b\n"
- " .long 26b,100b\n"
- " .long 27b,100b\n"
- " .long 28b,100b\n"
- " .long 29b,100b\n"
- " .long 30b,100b\n"
- " .long 31b,100b\n"
- " .long 32b,100b\n"
- " .long 33b,100b\n"
- " .long 34b,100b\n"
- " .long 35b,100b\n"
- " .long 36b,100b\n"
- " .long 37b,100b\n"
- " .long 99b,101b\n"
- ".previous"
+ _ASM_EXTABLE(1b,100b)
+ _ASM_EXTABLE(2b,100b)
+ _ASM_EXTABLE(3b,100b)
+ _ASM_EXTABLE(4b,100b)
+ _ASM_EXTABLE(5b,100b)
+ _ASM_EXTABLE(6b,100b)
+ _ASM_EXTABLE(7b,100b)
+ _ASM_EXTABLE(8b,100b)
+ _ASM_EXTABLE(9b,100b)
+ _ASM_EXTABLE(10b,100b)
+ _ASM_EXTABLE(11b,100b)
+ _ASM_EXTABLE(12b,100b)
+ _ASM_EXTABLE(13b,100b)
+ _ASM_EXTABLE(14b,100b)
+ _ASM_EXTABLE(15b,100b)
+ _ASM_EXTABLE(16b,100b)
+ _ASM_EXTABLE(17b,100b)
+ _ASM_EXTABLE(18b,100b)
+ _ASM_EXTABLE(19b,100b)
+ _ASM_EXTABLE(20b,100b)
+ _ASM_EXTABLE(21b,100b)
+ _ASM_EXTABLE(22b,100b)
+ _ASM_EXTABLE(23b,100b)
+ _ASM_EXTABLE(24b,100b)
+ _ASM_EXTABLE(25b,100b)
+ _ASM_EXTABLE(26b,100b)
+ _ASM_EXTABLE(27b,100b)
+ _ASM_EXTABLE(28b,100b)
+ _ASM_EXTABLE(29b,100b)
+ _ASM_EXTABLE(30b,100b)
+ _ASM_EXTABLE(31b,100b)
+ _ASM_EXTABLE(32b,100b)
+ _ASM_EXTABLE(33b,100b)
+ _ASM_EXTABLE(34b,100b)
+ _ASM_EXTABLE(35b,100b)
+ _ASM_EXTABLE(36b,100b)
+ _ASM_EXTABLE(37b,100b)
+ _ASM_EXTABLE(99b,101b)
: "=&c"(size), "=&D" (d0), "=&S" (d1)
: "1"(to), "2"(from), "0"(size)
: "eax", "edx", "memory");
@@ -312,29 +266,26 @@ __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size)
" popl %0\n"
" jmp 8b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 0b,16b\n"
- " .long 1b,16b\n"
- " .long 2b,16b\n"
- " .long 21b,16b\n"
- " .long 3b,16b\n"
- " .long 31b,16b\n"
- " .long 4b,16b\n"
- " .long 41b,16b\n"
- " .long 10b,16b\n"
- " .long 51b,16b\n"
- " .long 11b,16b\n"
- " .long 61b,16b\n"
- " .long 12b,16b\n"
- " .long 71b,16b\n"
- " .long 13b,16b\n"
- " .long 81b,16b\n"
- " .long 14b,16b\n"
- " .long 91b,16b\n"
- " .long 6b,9b\n"
- " .long 7b,16b\n"
- ".previous"
+ _ASM_EXTABLE(0b,16b)
+ _ASM_EXTABLE(1b,16b)
+ _ASM_EXTABLE(2b,16b)
+ _ASM_EXTABLE(21b,16b)
+ _ASM_EXTABLE(3b,16b)
+ _ASM_EXTABLE(31b,16b)
+ _ASM_EXTABLE(4b,16b)
+ _ASM_EXTABLE(41b,16b)
+ _ASM_EXTABLE(10b,16b)
+ _ASM_EXTABLE(51b,16b)
+ _ASM_EXTABLE(11b,16b)
+ _ASM_EXTABLE(61b,16b)
+ _ASM_EXTABLE(12b,16b)
+ _ASM_EXTABLE(71b,16b)
+ _ASM_EXTABLE(13b,16b)
+ _ASM_EXTABLE(81b,16b)
+ _ASM_EXTABLE(14b,16b)
+ _ASM_EXTABLE(91b,16b)
+ _ASM_EXTABLE(6b,9b)
+ _ASM_EXTABLE(7b,16b)
: "=&c"(size), "=&D" (d0), "=&S" (d1)
: "1"(to), "2"(from), "0"(size)
: "eax", "edx", "memory");
@@ -414,29 +365,26 @@ static unsigned long __copy_user_zeroing_intel_nocache(void *to,
" popl %0\n"
" jmp 8b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 0b,16b\n"
- " .long 1b,16b\n"
- " .long 2b,16b\n"
- " .long 21b,16b\n"
- " .long 3b,16b\n"
- " .long 31b,16b\n"
- " .long 4b,16b\n"
- " .long 41b,16b\n"
- " .long 10b,16b\n"
- " .long 51b,16b\n"
- " .long 11b,16b\n"
- " .long 61b,16b\n"
- " .long 12b,16b\n"
- " .long 71b,16b\n"
- " .long 13b,16b\n"
- " .long 81b,16b\n"
- " .long 14b,16b\n"
- " .long 91b,16b\n"
- " .long 6b,9b\n"
- " .long 7b,16b\n"
- ".previous"
+ _ASM_EXTABLE(0b,16b)
+ _ASM_EXTABLE(1b,16b)
+ _ASM_EXTABLE(2b,16b)
+ _ASM_EXTABLE(21b,16b)
+ _ASM_EXTABLE(3b,16b)
+ _ASM_EXTABLE(31b,16b)
+ _ASM_EXTABLE(4b,16b)
+ _ASM_EXTABLE(41b,16b)
+ _ASM_EXTABLE(10b,16b)
+ _ASM_EXTABLE(51b,16b)
+ _ASM_EXTABLE(11b,16b)
+ _ASM_EXTABLE(61b,16b)
+ _ASM_EXTABLE(12b,16b)
+ _ASM_EXTABLE(71b,16b)
+ _ASM_EXTABLE(13b,16b)
+ _ASM_EXTABLE(81b,16b)
+ _ASM_EXTABLE(14b,16b)
+ _ASM_EXTABLE(91b,16b)
+ _ASM_EXTABLE(6b,9b)
+ _ASM_EXTABLE(7b,16b)
: "=&c"(size), "=&D" (d0), "=&S" (d1)
: "1"(to), "2"(from), "0"(size)
: "eax", "edx", "memory");
@@ -505,29 +453,26 @@ static unsigned long __copy_user_intel_nocache(void *to,
"9: lea 0(%%eax,%0,4),%0\n"
"16: jmp 8b\n"
".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 0b,16b\n"
- " .long 1b,16b\n"
- " .long 2b,16b\n"
- " .long 21b,16b\n"
- " .long 3b,16b\n"
- " .long 31b,16b\n"
- " .long 4b,16b\n"
- " .long 41b,16b\n"
- " .long 10b,16b\n"
- " .long 51b,16b\n"
- " .long 11b,16b\n"
- " .long 61b,16b\n"
- " .long 12b,16b\n"
- " .long 71b,16b\n"
- " .long 13b,16b\n"
- " .long 81b,16b\n"
- " .long 14b,16b\n"
- " .long 91b,16b\n"
- " .long 6b,9b\n"
- " .long 7b,16b\n"
- ".previous"
+ _ASM_EXTABLE(0b,16b)
+ _ASM_EXTABLE(1b,16b)
+ _ASM_EXTABLE(2b,16b)
+ _ASM_EXTABLE(21b,16b)
+ _ASM_EXTABLE(3b,16b)
+ _ASM_EXTABLE(31b,16b)
+ _ASM_EXTABLE(4b,16b)
+ _ASM_EXTABLE(41b,16b)
+ _ASM_EXTABLE(10b,16b)
+ _ASM_EXTABLE(51b,16b)
+ _ASM_EXTABLE(11b,16b)
+ _ASM_EXTABLE(61b,16b)
+ _ASM_EXTABLE(12b,16b)
+ _ASM_EXTABLE(71b,16b)
+ _ASM_EXTABLE(13b,16b)
+ _ASM_EXTABLE(81b,16b)
+ _ASM_EXTABLE(14b,16b)
+ _ASM_EXTABLE(91b,16b)
+ _ASM_EXTABLE(6b,9b)
+ _ASM_EXTABLE(7b,16b)
: "=&c"(size), "=&D" (d0), "=&S" (d1)
: "1"(to), "2"(from), "0"(size)
: "eax", "edx", "memory");
@@ -574,12 +519,9 @@ do { \
"3: lea 0(%3,%0,4),%0\n" \
" jmp 2b\n" \
".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 4b,5b\n" \
- " .long 0b,3b\n" \
- " .long 1b,2b\n" \
- ".previous" \
+ _ASM_EXTABLE(4b,5b) \
+ _ASM_EXTABLE(0b,3b) \
+ _ASM_EXTABLE(1b,2b) \
: "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \
: "3"(size), "0"(size), "1"(to), "2"(from) \
: "memory"); \
@@ -616,12 +558,9 @@ do { \
" popl %0\n" \
" jmp 2b\n" \
".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 4b,5b\n" \
- " .long 0b,3b\n" \
- " .long 1b,6b\n" \
- ".previous" \
+ _ASM_EXTABLE(4b,5b) \
+ _ASM_EXTABLE(0b,3b) \
+ _ASM_EXTABLE(1b,6b) \
: "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \
: "3"(size), "0"(size), "1"(to), "2"(from) \
: "memory"); \
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 0d0326f388c..e5b130bc2d0 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -52,54 +52,6 @@ unsigned long clear_user(void __user *to, unsigned long n)
}
EXPORT_SYMBOL(clear_user);
-/*
- * Return the size of a string (including the ending 0)
- *
- * Return 0 on exception, a value greater than N if too long
- */
-
-long __strnlen_user(const char __user *s, long n)
-{
- long res = 0;
- char c;
-
- while (1) {
- if (res>n)
- return n+1;
- if (__get_user(c, s))
- return 0;
- if (!c)
- return res+1;
- res++;
- s++;
- }
-}
-EXPORT_SYMBOL(__strnlen_user);
-
-long strnlen_user(const char __user *s, long n)
-{
- if (!access_ok(VERIFY_READ, s, 1))
- return 0;
- return __strnlen_user(s, n);
-}
-EXPORT_SYMBOL(strnlen_user);
-
-long strlen_user(const char __user *s)
-{
- long res = 0;
- char c;
-
- for (;;) {
- if (get_user(c, s))
- return 0;
- if (!c)
- return res+1;
- res++;
- s++;
- }
-}
-EXPORT_SYMBOL(strlen_user);
-
unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len)
{
if (access_ok(VERIFY_WRITE, to, len) && access_ok(VERIFY_READ, from, len)) {
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 81913790442..5d7e51f3fd2 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -28,7 +28,7 @@
# - (66): the last prefix is 0x66
# - (F3): the last prefix is 0xF3
# - (F2): the last prefix is 0xF2
-#
+# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case)
Table: one byte opcode
Referrer:
@@ -515,12 +515,12 @@ b4: LFS Gv,Mp
b5: LGS Gv,Mp
b6: MOVZX Gv,Eb
b7: MOVZX Gv,Ew
-b8: JMPE | POPCNT Gv,Ev (F3)
+b8: JMPE (!F3) | POPCNT Gv,Ev (F3)
b9: Grp10 (1A)
ba: Grp8 Ev,Ib (1A)
bb: BTC Ev,Gv
-bc: BSF Gv,Ev | TZCNT Gv,Ev (F3)
-bd: BSR Gv,Ev | LZCNT Gv,Ev (F3)
+bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3)
+bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3)
be: MOVSX Gv,Eb
bf: MOVSX Gv,Ew
# 0x0f 0xc0-0xcf
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 1fb85dbe390..903ec1e9c32 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -1,11 +1,23 @@
#include <linux/module.h>
#include <linux/spinlock.h>
+#include <linux/sort.h>
#include <asm/uaccess.h>
+static inline unsigned long
+ex_insn_addr(const struct exception_table_entry *x)
+{
+ return (unsigned long)&x->insn + x->insn;
+}
+static inline unsigned long
+ex_fixup_addr(const struct exception_table_entry *x)
+{
+ return (unsigned long)&x->fixup + x->fixup;
+}
int fixup_exception(struct pt_regs *regs)
{
const struct exception_table_entry *fixup;
+ unsigned long new_ip;
#ifdef CONFIG_PNPBIOS
if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) {
@@ -23,15 +35,135 @@ int fixup_exception(struct pt_regs *regs)
fixup = search_exception_tables(regs->ip);
if (fixup) {
- /* If fixup is less than 16, it means uaccess error */
- if (fixup->fixup < 16) {
+ new_ip = ex_fixup_addr(fixup);
+
+ if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) {
+ /* Special hack for uaccess_err */
current_thread_info()->uaccess_err = 1;
- regs->ip += fixup->fixup;
- return 1;
+ new_ip -= 0x7ffffff0;
}
- regs->ip = fixup->fixup;
+ regs->ip = new_ip;
return 1;
}
return 0;
}
+
+/* Restricted version used during very early boot */
+int __init early_fixup_exception(unsigned long *ip)
+{
+ const struct exception_table_entry *fixup;
+ unsigned long new_ip;
+
+ fixup = search_exception_tables(*ip);
+ if (fixup) {
+ new_ip = ex_fixup_addr(fixup);
+
+ if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) {
+ /* uaccess handling not supported during early boot */
+ return 0;
+ }
+
+ *ip = new_ip;
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Search one exception table for an entry corresponding to the
+ * given instruction address, and return the address of the entry,
+ * or NULL if none is found.
+ * We use a binary search, and thus we assume that the table is
+ * already sorted.
+ */
+const struct exception_table_entry *
+search_extable(const struct exception_table_entry *first,
+ const struct exception_table_entry *last,
+ unsigned long value)
+{
+ while (first <= last) {
+ const struct exception_table_entry *mid;
+ unsigned long addr;
+
+ mid = ((last - first) >> 1) + first;
+ addr = ex_insn_addr(mid);
+ if (addr < value)
+ first = mid + 1;
+ else if (addr > value)
+ last = mid - 1;
+ else
+ return mid;
+ }
+ return NULL;
+}
+
+/*
+ * The exception table needs to be sorted so that the binary
+ * search that we use to find entries in it works properly.
+ * This is used both for the kernel exception table and for
+ * the exception tables of modules that get loaded.
+ *
+ */
+static int cmp_ex(const void *a, const void *b)
+{
+ const struct exception_table_entry *x = a, *y = b;
+
+ /*
+ * This value will always end up fittin in an int, because on
+ * both i386 and x86-64 the kernel symbol-reachable address
+ * space is < 2 GiB.
+ *
+ * This compare is only valid after normalization.
+ */
+ return x->insn - y->insn;
+}
+
+void sort_extable(struct exception_table_entry *start,
+ struct exception_table_entry *finish)
+{
+ struct exception_table_entry *p;
+ int i;
+
+ /* Convert all entries to being relative to the start of the section */
+ i = 0;
+ for (p = start; p < finish; p++) {
+ p->insn += i;
+ i += 4;
+ p->fixup += i;
+ i += 4;
+ }
+
+ sort(start, finish - start, sizeof(struct exception_table_entry),
+ cmp_ex, NULL);
+
+ /* Denormalize all entries */
+ i = 0;
+ for (p = start; p < finish; p++) {
+ p->insn -= i;
+ i += 4;
+ p->fixup -= i;
+ i += 4;
+ }
+}
+
+#ifdef CONFIG_MODULES
+/*
+ * If the exception table is sorted, any referring to the module init
+ * will be at the beginning or the end.
+ */
+void trim_init_extable(struct module *m)
+{
+ /*trim the beginning*/
+ while (m->num_exentries &&
+ within_module_init(ex_insn_addr(&m->extable[0]), m)) {
+ m->extable++;
+ m->num_exentries--;
+ }
+ /*trim the end*/
+ while (m->num_exentries &&
+ within_module_init(ex_insn_addr(&m->extable[m->num_exentries-1]), m))
+ m->num_exentries--;
+}
+#endif /* CONFIG_MODULES */
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 3ecfd1aaf21..76dcd9d8e0b 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -582,7 +582,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
pte_t *pte = lookup_address(address, &level);
if (pte && pte_present(*pte) && !pte_exec(*pte))
- printk(nx_warning, current_uid());
+ printk(nx_warning, from_kuid(&init_user_ns, current_uid()));
}
printk(KERN_ALERT "BUG: unable to handle kernel ");
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 4f0cec7e4ff..bc4e9d84157 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -29,8 +29,14 @@ int direct_gbpages
#endif
;
-static void __init find_early_table_space(unsigned long end, int use_pse,
- int use_gbpages)
+struct map_range {
+ unsigned long start;
+ unsigned long end;
+ unsigned page_size_mask;
+};
+
+static void __init find_early_table_space(struct map_range *mr, unsigned long end,
+ int use_pse, int use_gbpages)
{
unsigned long puds, pmds, ptes, tables, start = 0, good_end = end;
phys_addr_t base;
@@ -55,6 +61,10 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
#ifdef CONFIG_X86_32
extra += PMD_SIZE;
#endif
+ /* The first 2/4M doesn't use large pages. */
+ if (mr->start < PMD_SIZE)
+ extra += mr->end - mr->start;
+
ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
} else
ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -75,8 +85,9 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
pgt_buf_end = pgt_buf_start;
pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT);
- printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
- end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT);
+ printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx]\n",
+ end - 1, pgt_buf_start << PAGE_SHIFT,
+ (pgt_buf_top << PAGE_SHIFT) - 1);
}
void __init native_pagetable_reserve(u64 start, u64 end)
@@ -84,12 +95,6 @@ void __init native_pagetable_reserve(u64 start, u64 end)
memblock_reserve(start, end - start);
}
-struct map_range {
- unsigned long start;
- unsigned long end;
- unsigned page_size_mask;
-};
-
#ifdef CONFIG_X86_32
#define NR_RANGE_MR 3
#else /* CONFIG_X86_64 */
@@ -129,7 +134,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
int nr_range, i;
int use_pse, use_gbpages;
- printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
+ printk(KERN_INFO "init_memory_mapping: [mem %#010lx-%#010lx]\n",
+ start, end - 1);
#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
/*
@@ -248,8 +254,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
}
for (i = 0; i < nr_range; i++)
- printk(KERN_DEBUG " %010lx - %010lx page %s\n",
- mr[i].start, mr[i].end,
+ printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n",
+ mr[i].start, mr[i].end - 1,
(mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
(mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
@@ -261,7 +267,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
* nodes are discovered.
*/
if (!after_bootmem)
- find_early_table_space(end, use_pse, use_gbpages);
+ find_early_table_space(&mr[0], end, use_pse, use_gbpages);
for (i = 0; i < nr_range; i++)
ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
@@ -347,8 +353,8 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
* create a kernel page fault:
*/
#ifdef CONFIG_DEBUG_PAGEALLOC
- printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n",
- begin, end);
+ printk(KERN_INFO "debug: unmapping init [mem %#010lx-%#010lx]\n",
+ begin, end - 1);
set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
#else
/*
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index fc18be0f6f2..2b6b4a3c8be 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -407,12 +407,12 @@ static unsigned long __meminit
phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
unsigned long page_size_mask, pgprot_t prot)
{
- unsigned long pages = 0;
+ unsigned long pages = 0, next;
unsigned long last_map_addr = end;
int i = pmd_index(address);
- for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
+ for (; i < PTRS_PER_PMD; i++, address = next) {
unsigned long pte_phys;
pmd_t *pmd = pmd_page + pmd_index(address);
pte_t *pte;
@@ -426,6 +426,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
break;
}
+ next = (address & PMD_MASK) + PMD_SIZE;
+
if (pmd_val(*pmd)) {
if (!pmd_large(*pmd)) {
spin_lock(&init_mm.page_table_lock);
@@ -449,7 +451,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
* attributes.
*/
if (page_size_mask & (1 << PG_LEVEL_2M)) {
- pages++;
+ last_map_addr = next;
continue;
}
new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd));
@@ -462,7 +464,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
pfn_pte(address >> PAGE_SHIFT,
__pgprot(pgprot_val(prot) | _PAGE_PSE)));
spin_unlock(&init_mm.page_table_lock);
- last_map_addr = (address & PMD_MASK) + PMD_SIZE;
+ last_map_addr = next;
continue;
}
@@ -482,11 +484,11 @@ static unsigned long __meminit
phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
unsigned long page_size_mask)
{
- unsigned long pages = 0;
+ unsigned long pages = 0, next;
unsigned long last_map_addr = end;
int i = pud_index(addr);
- for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE) {
+ for (; i < PTRS_PER_PUD; i++, addr = next) {
unsigned long pmd_phys;
pud_t *pud = pud_page + pud_index(addr);
pmd_t *pmd;
@@ -495,8 +497,9 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
if (addr >= end)
break;
- if (!after_bootmem &&
- !e820_any_mapped(addr, addr+PUD_SIZE, 0)) {
+ next = (addr & PUD_MASK) + PUD_SIZE;
+
+ if (!after_bootmem && !e820_any_mapped(addr, next, 0)) {
set_pud(pud, __pud(0));
continue;
}
@@ -523,7 +526,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
* attributes.
*/
if (page_size_mask & (1 << PG_LEVEL_1G)) {
- pages++;
+ last_map_addr = next;
continue;
}
prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
@@ -535,7 +538,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
set_pte((pte_t *)pud,
pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
spin_unlock(&init_mm.page_table_lock);
- last_map_addr = (addr & PUD_MASK) + PUD_SIZE;
+ last_map_addr = next;
continue;
}
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index ac659f737c8..ee05d3a49c9 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -182,7 +182,7 @@ err_free_memtype:
/**
* ioremap_nocache - map bus memory into CPU space
- * @offset: bus address of the memory
+ * @phys_addr: bus address of the memory
* @size: size of the resource to map
*
* ioremap_nocache performs a platform specific sequence of operations to
@@ -219,7 +219,7 @@ EXPORT_SYMBOL(ioremap_nocache);
/**
* ioremap_wc - map memory into CPU space write combined
- * @offset: bus address of the memory
+ * @phys_addr: bus address of the memory
* @size: size of the resource to map
*
* This version of ioremap ensures that the memory is marked write combining.
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 19d3fa08b11..2d125be1bae 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -141,8 +141,8 @@ static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
/* whine about and ignore invalid blks */
if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
- pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n",
- nid, start, end);
+ pr_warning("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
+ nid, start, end - 1);
return 0;
}
@@ -210,8 +210,8 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
start = roundup(start, ZONE_ALIGN);
- printk(KERN_INFO "Initmem setup node %d %016Lx-%016Lx\n",
- nid, start, end);
+ printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
+ nid, start, end - 1);
/*
* Allocate node data. Try remap allocator first, node-local
@@ -232,7 +232,7 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
}
/* report and initialize */
- printk(KERN_INFO " NODE_DATA [%016Lx - %016Lx]%s\n",
+ printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]%s\n",
nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : "");
tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
if (!remapped && tnid != nid)
@@ -291,14 +291,14 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
*/
if (bi->end > bj->start && bi->start < bj->end) {
if (bi->nid != bj->nid) {
- pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n",
- bi->nid, bi->start, bi->end,
- bj->nid, bj->start, bj->end);
+ pr_err("NUMA: node %d [mem %#010Lx-%#010Lx] overlaps with node %d [mem %#010Lx-%#010Lx]\n",
+ bi->nid, bi->start, bi->end - 1,
+ bj->nid, bj->start, bj->end - 1);
return -EINVAL;
}
- pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n",
- bi->nid, bi->start, bi->end,
- bj->start, bj->end);
+ pr_warning("NUMA: Warning: node %d [mem %#010Lx-%#010Lx] overlaps with itself [mem %#010Lx-%#010Lx]\n",
+ bi->nid, bi->start, bi->end - 1,
+ bj->start, bj->end - 1);
}
/*
@@ -320,9 +320,9 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
}
if (k < mi->nr_blks)
continue;
- printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%Lx,%Lx)\n",
- bi->nid, bi->start, bi->end, bj->start, bj->end,
- start, end);
+ printk(KERN_INFO "NUMA: Node %d [mem %#010Lx-%#010Lx] + [mem %#010Lx-%#010Lx] -> [mem %#010Lx-%#010Lx]\n",
+ bi->nid, bi->start, bi->end - 1, bj->start,
+ bj->end - 1, start, end - 1);
bi->start = start;
bi->end = end;
numa_remove_memblk_from(j--, mi);
@@ -616,8 +616,8 @@ static int __init dummy_numa_init(void)
{
printk(KERN_INFO "%s\n",
numa_off ? "NUMA turned off" : "No NUMA configuration found");
- printk(KERN_INFO "Faking a node at %016Lx-%016Lx\n",
- 0LLU, PFN_PHYS(max_pfn));
+ printk(KERN_INFO "Faking a node at [mem %#018Lx-%#018Lx]\n",
+ 0LLU, PFN_PHYS(max_pfn) - 1);
node_set(0, numa_nodes_parsed);
numa_add_memblk(0, 0, PFN_PHYS(max_pfn));
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index 53489ff6bf8..dbbbb47260c 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -68,8 +68,8 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei,
numa_remove_memblk_from(phys_blk, pi);
}
- printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid,
- eb->start, eb->end, (eb->end - eb->start) >> 20);
+ printk(KERN_INFO "Faking node %d at [mem %#018Lx-%#018Lx] (%LuMB)\n",
+ nid, eb->start, eb->end - 1, (eb->end - eb->start) >> 20);
return 0;
}
@@ -339,9 +339,11 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
} else {
unsigned long n;
- n = simple_strtoul(emu_cmdline, NULL, 0);
+ n = simple_strtoul(emu_cmdline, &emu_cmdline, 0);
ret = split_nodes_interleave(&ei, &pi, 0, max_addr, n);
}
+ if (*emu_cmdline == ':')
+ emu_cmdline++;
if (ret < 0)
goto no_emu;
@@ -418,7 +420,9 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
int physj = emu_nid_to_phys[j];
int dist;
- if (physi >= numa_dist_cnt || physj >= numa_dist_cnt)
+ if (get_option(&emu_cmdline, &dist) == 2)
+ ;
+ else if (physi >= numa_dist_cnt || physj >= numa_dist_cnt)
dist = physi == physj ?
LOCAL_DISTANCE : REMOTE_DISTANCE;
else
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index e1ebde31521..a718e0d2350 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -122,7 +122,7 @@ within(unsigned long addr, unsigned long start, unsigned long end)
/**
* clflush_cache_range - flush a cache range with clflush
- * @addr: virtual start address
+ * @vaddr: virtual start address
* @size: number of bytes to flush
*
* clflush is an unordered instruction which needs fencing with mfence
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index f6ff57b7efa..3d68ef6d226 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -158,31 +158,47 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type)
return req_type;
}
+struct pagerange_state {
+ unsigned long cur_pfn;
+ int ram;
+ int not_ram;
+};
+
+static int
+pagerange_is_ram_callback(unsigned long initial_pfn, unsigned long total_nr_pages, void *arg)
+{
+ struct pagerange_state *state = arg;
+
+ state->not_ram |= initial_pfn > state->cur_pfn;
+ state->ram |= total_nr_pages > 0;
+ state->cur_pfn = initial_pfn + total_nr_pages;
+
+ return state->ram && state->not_ram;
+}
+
static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end)
{
- int ram_page = 0, not_rampage = 0;
- unsigned long page_nr;
+ int ret = 0;
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long end_pfn = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ struct pagerange_state state = {start_pfn, 0, 0};
- for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
- ++page_nr) {
- /*
- * For legacy reasons, physical address range in the legacy ISA
- * region is tracked as non-RAM. This will allow users of
- * /dev/mem to map portions of legacy ISA region, even when
- * some of those portions are listed(or not even listed) with
- * different e820 types(RAM/reserved/..)
- */
- if (page_nr >= (ISA_END_ADDRESS >> PAGE_SHIFT) &&
- page_is_ram(page_nr))
- ram_page = 1;
- else
- not_rampage = 1;
-
- if (ram_page == not_rampage)
- return -1;
+ /*
+ * For legacy reasons, physical address range in the legacy ISA
+ * region is tracked as non-RAM. This will allow users of
+ * /dev/mem to map portions of legacy ISA region, even when
+ * some of those portions are listed(or not even listed) with
+ * different e820 types(RAM/reserved/..)
+ */
+ if (start_pfn < ISA_END_ADDRESS >> PAGE_SHIFT)
+ start_pfn = ISA_END_ADDRESS >> PAGE_SHIFT;
+
+ if (start_pfn < end_pfn) {
+ ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn,
+ &state, pagerange_is_ram_callback);
}
- return ram_page;
+ return (ret > 0) ? -1 : (state.ram ? 1 : 0);
}
/*
@@ -209,9 +225,8 @@ static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type,
page = pfn_to_page(pfn);
type = get_page_memtype(page);
if (type != -1) {
- printk(KERN_INFO "reserve_ram_pages_type failed "
- "0x%Lx-0x%Lx, track 0x%lx, req 0x%lx\n",
- start, end, type, req_type);
+ printk(KERN_INFO "reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%lx, req 0x%lx\n",
+ start, end - 1, type, req_type);
if (new_type)
*new_type = type;
@@ -314,9 +329,9 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
err = rbt_memtype_check_insert(new, new_type);
if (err) {
- printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, "
- "track %s, req %s\n",
- start, end, cattr_name(new->type), cattr_name(req_type));
+ printk(KERN_INFO "reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
+ start, end - 1,
+ cattr_name(new->type), cattr_name(req_type));
kfree(new);
spin_unlock(&memtype_lock);
@@ -325,8 +340,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
spin_unlock(&memtype_lock);
- dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
- start, end, cattr_name(new->type), cattr_name(req_type),
+ dprintk("reserve_memtype added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n",
+ start, end - 1, cattr_name(new->type), cattr_name(req_type),
new_type ? cattr_name(*new_type) : "-");
return err;
@@ -360,14 +375,14 @@ int free_memtype(u64 start, u64 end)
spin_unlock(&memtype_lock);
if (!entry) {
- printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n",
- current->comm, current->pid, start, end);
+ printk(KERN_INFO "%s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
+ current->comm, current->pid, start, end - 1);
return -EINVAL;
}
kfree(entry);
- dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end);
+ dprintk("free_memtype request [mem %#010Lx-%#010Lx]\n", start, end - 1);
return 0;
}
@@ -491,9 +506,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
while (cursor < to) {
if (!devmem_is_allowed(pfn)) {
- printk(KERN_INFO
- "Program %s tried to access /dev/mem between %Lx->%Lx.\n",
- current->comm, from, to);
+ printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx]\n",
+ current->comm, from, to - 1);
return 0;
}
cursor += PAGE_SIZE;
@@ -554,12 +568,11 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags)
size;
if (ioremap_change_attr((unsigned long)__va(base), id_sz, flags) < 0) {
- printk(KERN_INFO
- "%s:%d ioremap_change_attr failed %s "
- "for %Lx-%Lx\n",
+ printk(KERN_INFO "%s:%d ioremap_change_attr failed %s "
+ "for [mem %#010Lx-%#010Lx]\n",
current->comm, current->pid,
cattr_name(flags),
- base, (unsigned long long)(base + size));
+ base, (unsigned long long)(base + size-1));
return -EINVAL;
}
return 0;
@@ -591,12 +604,11 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
flags = lookup_memtype(paddr);
if (want_flags != flags) {
- printk(KERN_WARNING
- "%s:%d map pfn RAM range req %s for %Lx-%Lx, got %s\n",
+ printk(KERN_WARNING "%s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n",
current->comm, current->pid,
cattr_name(want_flags),
(unsigned long long)paddr,
- (unsigned long long)(paddr + size),
+ (unsigned long long)(paddr + size - 1),
cattr_name(flags));
*vma_prot = __pgprot((pgprot_val(*vma_prot) &
(~_PAGE_CACHE_MASK)) |
@@ -614,11 +626,11 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
!is_new_memtype_allowed(paddr, size, want_flags, flags)) {
free_memtype(paddr, paddr + size);
printk(KERN_ERR "%s:%d map pfn expected mapping type %s"
- " for %Lx-%Lx, got %s\n",
+ " for [mem %#010Lx-%#010Lx], got %s\n",
current->comm, current->pid,
cattr_name(want_flags),
(unsigned long long)paddr,
- (unsigned long long)(paddr + size),
+ (unsigned long long)(paddr + size - 1),
cattr_name(flags));
return -EINVAL;
}
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index efb5b4b9371..4599c3e8bcb 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -176,8 +176,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
return;
}
- printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
- start, end);
+ node_set(node, numa_nodes_parsed);
+
+ printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n",
+ node, pxm,
+ (unsigned long long) start, (unsigned long long) end - 1);
}
void __init acpi_numa_arch_fixup(void) {}
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index d6c0418c3e4..5e57e113b72 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -61,11 +61,13 @@ static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset);
*/
void leave_mm(int cpu)
{
- if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
+ struct mm_struct *active_mm = this_cpu_read(cpu_tlbstate.active_mm);
+ if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
BUG();
- cpumask_clear_cpu(cpu,
- mm_cpumask(percpu_read(cpu_tlbstate.active_mm)));
- load_cr3(swapper_pg_dir);
+ if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
+ cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
+ load_cr3(swapper_pg_dir);
+ }
}
EXPORT_SYMBOL_GPL(leave_mm);
@@ -152,8 +154,8 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
* BUG();
*/
- if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
- if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
+ if (f->flush_mm == this_cpu_read(cpu_tlbstate.active_mm)) {
+ if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
if (f->flush_va == TLB_FLUSH_ALL)
local_flush_tlb();
else
@@ -322,7 +324,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
static void do_flush_tlb_all(void *info)
{
__flush_tlb_all();
- if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
+ if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
leave_mm(smp_processor_id());
}
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index e76e18c94a3..3af5a1e79c9 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -11,6 +11,8 @@ obj-$(CONFIG_X86_INTEL_CE) += ce4100.o
obj-$(CONFIG_ACPI) += acpi.o
obj-y += legacy.o irq.o
+obj-$(CONFIG_STA2X11) += sta2x11-fixup.o
+
obj-$(CONFIG_X86_VISWS) += visws.o
obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index ed2835e148b..fc09c2754e0 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -9,11 +9,11 @@
struct pci_root_info {
struct acpi_device *bridge;
- char *name;
+ char name[16];
unsigned int res_num;
struct resource *res;
- struct list_head *resources;
int busnum;
+ struct pci_sysdata sd;
};
static bool pci_use_crs = true;
@@ -245,13 +245,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
return AE_OK;
}
-static bool resource_contains(struct resource *res, resource_size_t point)
-{
- if (res->start <= point && point <= res->end)
- return true;
- return false;
-}
-
static void coalesce_windows(struct pci_root_info *info, unsigned long type)
{
int i, j;
@@ -272,10 +265,7 @@ static void coalesce_windows(struct pci_root_info *info, unsigned long type)
* our resources no longer match the ACPI _CRS, but
* the kernel resource tree doesn't allow overlaps.
*/
- if (resource_contains(res1, res2->start) ||
- resource_contains(res1, res2->end) ||
- resource_contains(res2, res1->start) ||
- resource_contains(res2, res1->end)) {
+ if (resource_overlaps(res1, res2)) {
res1->start = min(res1->start, res2->start);
res1->end = max(res1->end, res2->end);
dev_info(&info->bridge->dev,
@@ -287,7 +277,8 @@ static void coalesce_windows(struct pci_root_info *info, unsigned long type)
}
}
-static void add_resources(struct pci_root_info *info)
+static void add_resources(struct pci_root_info *info,
+ struct list_head *resources)
{
int i;
struct resource *res, *root, *conflict;
@@ -311,53 +302,74 @@ static void add_resources(struct pci_root_info *info)
"ignoring host bridge window %pR (conflicts with %s %pR)\n",
res, conflict->name, conflict);
else
- pci_add_resource(info->resources, res);
+ pci_add_resource(resources, res);
}
}
+static void free_pci_root_info_res(struct pci_root_info *info)
+{
+ kfree(info->res);
+ info->res = NULL;
+ info->res_num = 0;
+}
+
+static void __release_pci_root_info(struct pci_root_info *info)
+{
+ int i;
+ struct resource *res;
+
+ for (i = 0; i < info->res_num; i++) {
+ res = &info->res[i];
+
+ if (!res->parent)
+ continue;
+
+ if (!(res->flags & (IORESOURCE_MEM | IORESOURCE_IO)))
+ continue;
+
+ release_resource(res);
+ }
+
+ free_pci_root_info_res(info);
+
+ kfree(info);
+}
+static void release_pci_root_info(struct pci_host_bridge *bridge)
+{
+ struct pci_root_info *info = bridge->release_data;
+
+ __release_pci_root_info(info);
+}
+
static void
-get_current_resources(struct acpi_device *device, int busnum,
- int domain, struct list_head *resources)
+probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device,
+ int busnum, int domain)
{
- struct pci_root_info info;
size_t size;
- info.bridge = device;
- info.res_num = 0;
- info.resources = resources;
+ info->bridge = device;
+ info->res_num = 0;
acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource,
- &info);
- if (!info.res_num)
+ info);
+ if (!info->res_num)
return;
- size = sizeof(*info.res) * info.res_num;
- info.res = kmalloc(size, GFP_KERNEL);
- if (!info.res)
+ size = sizeof(*info->res) * info->res_num;
+ info->res_num = 0;
+ info->res = kmalloc(size, GFP_KERNEL);
+ if (!info->res)
return;
- info.name = kasprintf(GFP_KERNEL, "PCI Bus %04x:%02x", domain, busnum);
- if (!info.name)
- goto name_alloc_fail;
+ sprintf(info->name, "PCI Bus %04x:%02x", domain, busnum);
- info.res_num = 0;
acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource,
- &info);
-
- if (pci_use_crs) {
- add_resources(&info);
-
- return;
- }