diff options
author | Anton Arapov <anton@redhat.com> | 2012-06-08 12:58:00 +0200 |
---|---|---|
committer | Anton Arapov <anton@redhat.com> | 2012-06-08 12:58:00 +0200 |
commit | 6792a3f47a2e42d7164292bf7f1a55cfc4c91652 (patch) | |
tree | b90c002bfbbeaec92f5d8a2383dcabf6524016f7 /arch/x86 | |
parent | fe2895d3d55146cac65b273c0f83e2c7e543cd0e (diff) | |
download | kernel-uprobes-6792a3f47a2e42d7164292bf7f1a55cfc4c91652.tar.gz kernel-uprobes-6792a3f47a2e42d7164292bf7f1a55cfc4c91652.tar.xz kernel-uprobes-6792a3f47a2e42d7164292bf7f1a55cfc4c91652.zip |
fedora kernel: b920e9b748c595f970bf80ede7832d39f8d567dav3.4.1-2
Signed-off-by: Anton Arapov <anton@redhat.com>
Diffstat (limited to 'arch/x86')
294 files changed, 10794 insertions, 6494 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5bed94e189f..c9866b0b77d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -69,7 +69,6 @@ config X86 select HAVE_ARCH_JUMP_LABEL select HAVE_TEXT_POKE_SMP select HAVE_GENERIC_HARDIRQS - select HAVE_SPARSE_IRQ select SPARSE_IRQ select GENERIC_FIND_FIRST_BIT select GENERIC_IRQ_PROBE @@ -82,6 +81,7 @@ config X86 select CLKEVT_I8253 select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_IOMAP + select DCACHE_WORD_ACCESS config INSTRUCTION_DECODER def_bool (KPROBES || PERF_EVENTS) @@ -179,6 +179,9 @@ config ARCH_HAS_DEFAULT_IDLE config ARCH_HAS_CACHE_LINE_SIZE def_bool y +config ARCH_HAS_CPU_AUTOPROBE + def_bool y + config HAVE_SETUP_PER_CPU_AREA def_bool y @@ -398,6 +401,7 @@ config X86_INTEL_CE select X86_REBOOTFIXUPS select OF select OF_EARLY_FLATTREE + select IRQ_DOMAIN ---help--- Select for the Intel CE media processor (CE4100) SOC. This option compiles in support for the CE4100 SOC for settop @@ -417,27 +421,6 @@ if X86_WANT_INTEL_MID config X86_INTEL_MID bool -config X86_MRST - bool "Moorestown MID platform" - depends on PCI - depends on PCI_GOANY - depends on X86_IO_APIC - select X86_INTEL_MID - select SFI - select DW_APB_TIMER - select APB_TIMER - select I2C - select SPI - select INTEL_SCU_IPC - select X86_PLATFORM_DEVICES - ---help--- - Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin - Internet Device(MID) platform. Moorestown consists of two chips: - Lincroft (CPU core, graphics, and memory controller) and Langwell IOH. - Unlike standard x86 PCs, Moorestown does not have many legacy devices - nor standard legacy replacement devices/features. e.g. Moorestown does - not contain i8259, i8254, HPET, legacy BIOS, most of the io ports. - config X86_MDFLD bool "Medfield MID platform" depends on PCI @@ -451,6 +434,7 @@ config X86_MDFLD select SPI select INTEL_SCU_IPC select X86_PLATFORM_DEVICES + select MFD_INTEL_MSIC ---help--- Medfield is Intel's Low Power Intel Architecture (LPIA) based Moblin Internet Device(MID) platform. @@ -2076,6 +2060,7 @@ config OLPC select GPIOLIB select OF select OF_PROMTREE + select IRQ_DOMAIN ---help--- Add support for detecting the unique features of the OLPC XO hardware. @@ -2133,6 +2118,19 @@ config ALIX Note: You have to set alix.force=1 for boards with Award BIOS. +config NET5501 + bool "Soekris Engineering net5501 System Support (LEDS, GPIO, etc)" + select GPIOLIB + ---help--- + This option enables system support for the Soekris Engineering net5501. + +config GEOS + bool "Traverse Technologies GEOS System Support (LEDS, GPIO, etc)" + select GPIOLIB + depends on DMI + ---help--- + This option enables system support for the Traverse Technologies GEOS. + endif # X86_32 config AMD_NB @@ -2165,9 +2163,9 @@ config IA32_EMULATION depends on X86_64 select COMPAT_BINFMT_ELF ---help--- - Include code to run 32-bit programs under a 64-bit kernel. You should - likely turn this on, unless you're 100% sure that you don't have any - 32-bit programs left. + Include code to run legacy 32-bit programs under a + 64-bit kernel. You should likely turn this on, unless you're + 100% sure that you don't have any 32-bit programs left. config IA32_AOUT tristate "IA32 a.out support" @@ -2175,9 +2173,23 @@ config IA32_AOUT ---help--- Support old a.out binaries in the 32bit emulation. +config X86_X32 + bool "x32 ABI for 64-bit mode (EXPERIMENTAL)" + depends on X86_64 && IA32_EMULATION && EXPERIMENTAL + ---help--- + Include code to run binaries for the x32 native 32-bit ABI + for 64-bit processors. An x32 process gets access to the + full 64-bit register file and wide data path while leaving + pointers at 32 bits for smaller memory footprint. + + You will need a recent binutils (2.22 or later) with + elf32_x86_64 support enabled to compile a kernel with this + option set. + config COMPAT def_bool y - depends on IA32_EMULATION + depends on IA32_EMULATION || X86_X32 + select ARCH_WANT_OLD_COMPAT_IPC config COMPAT_FOR_U64_ALIGNMENT def_bool COMPAT diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 3c57033e221..706e12e9984 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -303,7 +303,6 @@ config X86_GENERIC config X86_INTERNODE_CACHE_SHIFT int default "12" if X86_VSMP - default "7" if NUMA default X86_L1_CACHE_SHIFT config X86_CMPXCHG @@ -441,7 +440,7 @@ config CPU_SUP_INTEL config CPU_SUP_CYRIX_32 default y bool "Support Cyrix processors" if PROCESSOR_SELECT - depends on !64BIT + depends on M386 || M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT) ---help--- This enables detection, tunings and quirks for Cyrix processors @@ -495,7 +494,7 @@ config CPU_SUP_TRANSMETA_32 config CPU_SUP_UMC_32 default y bool "Support UMC processors" if PROCESSOR_SELECT - depends on !64BIT + depends on M386 || M486 || (EXPERT && !64BIT) ---help--- This enables detection, tunings and quirks for UMC processors diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 209ba129459..b1c611e6da6 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -82,6 +82,22 @@ ifdef CONFIG_CC_STACKPROTECTOR endif endif +ifdef CONFIG_X86_X32 + x32_ld_ok := $(call try-run,\ + /bin/echo -e '1: .quad 1b' | \ + $(CC) $(KBUILD_AFLAGS) -c -xassembler -o "$$TMP" - && \ + $(OBJCOPY) -O elf32-x86-64 "$$TMP" "$$TMPO" && \ + $(LD) -m elf32_x86_64 "$$TMPO" -o "$$TMP",y,n) + ifeq ($(x32_ld_ok),y) + CONFIG_X86_X32_ABI := y + KBUILD_AFLAGS += -DCONFIG_X86_X32_ABI + KBUILD_CFLAGS += -DCONFIG_X86_X32_ABI + else + $(warning CONFIG_X86_X32 enabled but no binutils support) + endif +endif +export CONFIG_X86_X32_ABI + # Don't unroll struct assignments with kmemcheck enabled ifeq ($(CONFIG_KMEMCHECK),y) KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy) @@ -113,10 +129,14 @@ KBUILD_CFLAGS += -Wno-sign-compare KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # prevent gcc from generating any FP code by mistake KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) +KBUILD_CFLAGS += $(call cc-option,-mno-avx,) KBUILD_CFLAGS += $(mflags-y) KBUILD_AFLAGS += $(mflags-y) +archscripts: + $(Q)$(MAKE) $(build)=arch/x86/tools relocs + ### # Syscall table generation @@ -186,6 +206,7 @@ archclean: $(Q)rm -rf $(objtree)/arch/i386 $(Q)rm -rf $(objtree)/arch/x86_64 $(Q)$(MAKE) $(clean)=$(boot) + $(Q)$(MAKE) $(clean)=arch/x86/tools define archhelp echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)' diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um index 36ddec6a41c..36b62bc5263 100644 --- a/arch/x86/Makefile.um +++ b/arch/x86/Makefile.um @@ -8,15 +8,14 @@ ELF_ARCH := i386 ELF_FORMAT := elf32-i386 CHECKFLAGS += -D__i386__ -ifeq ("$(origin SUBARCH)", "command line") -ifneq ("$(shell uname -m | sed -e s/i.86/i386/)", "$(SUBARCH)") KBUILD_CFLAGS += $(call cc-option,-m32) KBUILD_AFLAGS += $(call cc-option,-m32) LINK-y += $(call cc-option,-m32) export LDFLAGS -endif -endif + +LDS_EXTRA := -Ui386 +export LDS_EXTRA # First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y. include $(srctree)/arch/x86/Makefile_32.cpu diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index c7093bd9f2d..18997e5a105 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -67,7 +67,7 @@ static inline void outl(u32 v, u16 port) { asm volatile("outl %0,%1" : : "a" (v), "dN" (port)); } -static inline u32 inl(u32 port) +static inline u32 inl(u16 port) { u32 v; asm volatile("inl %1,%0" : "=a" (v) : "dN" (port)); diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index fd55a2ff3ad..e398bb5d63b 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -40,13 +40,12 @@ OBJCOPYFLAGS_vmlinux.bin := -R .comment -S $(obj)/vmlinux.bin: vmlinux FORCE $(call if_changed,objcopy) +targets += vmlinux.bin.all vmlinux.relocs -targets += vmlinux.bin.all vmlinux.relocs relocs -hostprogs-$(CONFIG_X86_NEED_RELOCS) += relocs - +CMD_RELOCS = arch/x86/tools/relocs quiet_cmd_relocs = RELOCS $@ - cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $< -$(obj)/vmlinux.relocs: vmlinux $(obj)/relocs FORCE + cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $< +$(obj)/vmlinux.relocs: vmlinux FORCE $(call if_changed,relocs) vmlinux.bin.all-y := $(obj)/vmlinux.bin diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 09ce870c0ca..24443a33208 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -29,16 +29,14 @@ #include <stdarg.h> #include <sys/types.h> #include <sys/stat.h> -#include <sys/sysmacros.h> #include <unistd.h> #include <fcntl.h> #include <sys/mman.h> -#include <asm/boot.h> #include <tools/le_byteshift.h> typedef unsigned char u8; typedef unsigned short u16; -typedef unsigned long u32; +typedef unsigned int u32; #define DEFAULT_MAJOR_ROOT 0 #define DEFAULT_MINOR_ROOT 0 @@ -256,8 +254,9 @@ int main(int argc, char ** argv) } /* Write the CRC */ - fprintf(stderr, "CRC %lx\n", crc); - if (fwrite(&crc, 1, 4, stdout) != 4) + fprintf(stderr, "CRC %x\n", crc); + put_unaligned_le32(crc, buf); + if (fwrite(buf, 1, 4, stdout) != 4) die("Writing CRC failed"); close(fd); diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 2bf18059fbe..119db67dcb0 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -15,23 +15,28 @@ CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y CONFIG_CGROUP_SCHED=y -CONFIG_UTS_NS=y -CONFIG_IPC_NS=y -CONFIG_USER_NS=y -CONFIG_PID_NS=y -CONFIG_NET_NS=y CONFIG_BLK_DEV_INITRD=y -CONFIG_KALLSYMS_EXTRA_PASS=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_KPROBES=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +CONFIG_MAC_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_SGI_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y +CONFIG_EFI_PARTITION=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_SMP=y -CONFIG_SPARSE_IRQ=y CONFIG_X86_GENERIC=y CONFIG_HPET_TIMER=y CONFIG_SCHED_SMT=y @@ -51,14 +56,12 @@ CONFIG_HZ_1000=y CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y # CONFIG_COMPAT_VDSO is not set -CONFIG_PM=y +CONFIG_HIBERNATION=y CONFIG_PM_DEBUG=y CONFIG_PM_TRACE_RTC=y -CONFIG_HIBERNATION=y CONFIG_ACPI_PROCFS=y CONFIG_ACPI_DOCK=y CONFIG_CPU_FREQ=y -CONFIG_CPU_FREQ_DEBUG=y # CONFIG_CPU_FREQ_STAT is not set CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_PERFORMANCE=y @@ -69,7 +72,6 @@ CONFIG_PCI_MSI=y CONFIG_PCCARD=y CONFIG_YENTA=y CONFIG_HOTPLUG_PCI=y -CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_BINFMT_MISC=y CONFIG_NET=y CONFIG_PACKET=y @@ -120,7 +122,6 @@ CONFIG_NF_CONNTRACK_IPV4=y CONFIG_IP_NF_IPTABLES=y CONFIG_IP_NF_FILTER=y CONFIG_IP_NF_TARGET_REJECT=y -CONFIG_IP_NF_TARGET_LOG=y CONFIG_IP_NF_TARGET_ULOG=y CONFIG_NF_NAT=y CONFIG_IP_NF_TARGET_MASQUERADE=y @@ -128,7 +129,6 @@ CONFIG_IP_NF_MANGLE=y CONFIG_NF_CONNTRACK_IPV6=y CONFIG_IP6_NF_IPTABLES=y CONFIG_IP6_NF_MATCH_IPV6HEADER=y -CONFIG_IP6_NF_TARGET_LOG=y CONFIG_IP6_NF_FILTER=y CONFIG_IP6_NF_TARGET_REJECT=y CONFIG_IP6_NF_MANGLE=y @@ -169,25 +169,20 @@ CONFIG_DM_ZERO=y CONFIG_MACINTOSH_DRIVERS=y CONFIG_MAC_EMUMOUSEBTN=y CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -CONFIG_NET_VENDOR_3COM=y +CONFIG_NETCONSOLE=y +CONFIG_BNX2=y +CONFIG_TIGON3=y CONFIG_NET_TULIP=y -CONFIG_NET_PCI=y -CONFIG_FORCEDETH=y CONFIG_E100=y +CONFIG_E1000=y +CONFIG_E1000E=y +CONFIG_SKY2=y CONFIG_NE2K_PCI=y +CONFIG_FORCEDETH=y CONFIG_8139TOO=y # CONFIG_8139TOO_PIO is not set -CONFIG_E1000=y -CONFIG_E1000E=y CONFIG_R8169=y -CONFIG_SKY2=y -CONFIG_TIGON3=y -CONFIG_BNX2=y -CONFIG_TR=y -CONFIG_NET_PCMCIA=y CONFIG_FDDI=y -CONFIG_NETCONSOLE=y CONFIG_INPUT_POLLDEV=y # CONFIG_INPUT_MOUSEDEV_PSAUX is not set CONFIG_INPUT_EVDEV=y @@ -196,6 +191,7 @@ CONFIG_INPUT_TABLET=y CONFIG_INPUT_TOUCHSCREEN=y CONFIG_INPUT_MISC=y CONFIG_VT_HW_CONSOLE_BINDING=y +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_NONSTANDARD=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y @@ -205,7 +201,6 @@ CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_SERIAL_8250_DETECT_IRQ=y CONFIG_SERIAL_8250_RSA=y -# CONFIG_LEGACY_PTYS is not set CONFIG_HW_RANDOM=y CONFIG_NVRAM=y CONFIG_HPET=y @@ -220,7 +215,6 @@ CONFIG_DRM_I915=y CONFIG_FB_MODE_HELPERS=y CONFIG_FB_TILEBLITTING=y CONFIG_FB_EFI=y -CONFIG_BACKLIGHT_LCD_SUPPORT=y # CONFIG_LCD_CLASS_DEVICE is not set CONFIG_VGACON_SOFT_SCROLLBACK=y CONFIG_LOGO=y @@ -283,7 +277,6 @@ CONFIG_ZISOFS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y @@ -291,18 +284,6 @@ CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_OSF_PARTITION=y -CONFIG_AMIGA_PARTITION=y -CONFIG_MAC_PARTITION=y -CONFIG_BSD_DISKLABEL=y -CONFIG_MINIX_SUBPARTITION=y -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -CONFIG_SGI_PARTITION=y -CONFIG_SUN_PARTITION=y -CONFIG_KARMA_PARTITION=y -CONFIG_EFI_PARTITION=y CONFIG_NLS_DEFAULT="utf8" CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y @@ -317,13 +298,12 @@ CONFIG_DEBUG_KERNEL=y # CONFIG_SCHED_DEBUG is not set CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set +CONFIG_DEBUG_STACK_USAGE=y CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_PROVIDE_OHCI1394_DMA_INIT=y CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_DEBUG_STACK_USAGE=y # CONFIG_DEBUG_RODATA_TEST is not set CONFIG_DEBUG_NX_TEST=m CONFIG_DEBUG_BOOT_PARAMS=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 058a35b8286..76eb2903809 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -1,4 +1,3 @@ -CONFIG_64BIT=y CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y @@ -16,26 +15,29 @@ CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y CONFIG_CGROUP_SCHED=y -CONFIG_UTS_NS=y -CONFIG_IPC_NS=y -CONFIG_USER_NS=y -CONFIG_PID_NS=y -CONFIG_NET_NS=y CONFIG_BLK_DEV_INITRD=y -CONFIG_KALLSYMS_EXTRA_PASS=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_KPROBES=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +CONFIG_MAC_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_SGI_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y +CONFIG_EFI_PARTITION=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_SMP=y -CONFIG_SPARSE_IRQ=y CONFIG_CALGARY_IOMMU=y -CONFIG_AMD_IOMMU=y -CONFIG_AMD_IOMMU_STATS=y CONFIG_NR_CPUS=64 CONFIG_SCHED_SMT=y CONFIG_PREEMPT_VOLUNTARY=y @@ -53,27 +55,22 @@ CONFIG_HZ_1000=y CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y # CONFIG_COMPAT_VDSO is not set -CONFIG_PM=y +CONFIG_HIBERNATION=y CONFIG_PM_DEBUG=y CONFIG_PM_TRACE_RTC=y -CONFIG_HIBERNATION=y CONFIG_ACPI_PROCFS=y CONFIG_ACPI_DOCK=y CONFIG_CPU_FREQ=y -CONFIG_CPU_FREQ_DEBUG=y # CONFIG_CPU_FREQ_STAT is not set CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_PERFORMANCE=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y CONFIG_X86_ACPI_CPUFREQ=y CONFIG_PCI_MMCONFIG=y -CONFIG_INTEL_IOMMU=y -# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set CONFIG_PCIEPORTBUS=y CONFIG_PCCARD=y CONFIG_YENTA=y CONFIG_HOTPLUG_PCI=y -CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_BINFMT_MISC=y CONFIG_IA32_EMULATION=y CONFIG_NET=y @@ -125,7 +122,6 @@ CONFIG_NF_CONNTRACK_IPV4=y CONFIG_IP_NF_IPTABLES=y CONFIG_IP_NF_FILTER=y CONFIG_IP_NF_TARGET_REJECT=y -CONFIG_IP_NF_TARGET_LOG=y CONFIG_IP_NF_TARGET_ULOG=y CONFIG_NF_NAT=y CONFIG_IP_NF_TARGET_MASQUERADE=y @@ -133,7 +129,6 @@ CONFIG_IP_NF_MANGLE=y CONFIG_NF_CONNTRACK_IPV6=y CONFIG_IP6_NF_IPTABLES=y CONFIG_IP6_NF_MATCH_IPV6HEADER=y -CONFIG_IP6_NF_TARGET_LOG=y CONFIG_IP6_NF_FILTER=y CONFIG_IP6_NF_TARGET_REJECT=y CONFIG_IP6_NF_MANGLE=y @@ -172,20 +167,15 @@ CONFIG_DM_ZERO=y CONFIG_MACINTOSH_DRIVERS=y CONFIG_MAC_EMUMOUSEBTN=y CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -CONFIG_NET_VENDOR_3COM=y +CONFIG_NETCONSOLE=y +CONFIG_TIGON3=y CONFIG_NET_TULIP=y -CONFIG_NET_PCI=y -CONFIG_FORCEDETH=y CONFIG_E100=y -CONFIG_8139TOO=y CONFIG_E1000=y CONFIG_SKY2=y -CONFIG_TIGON3=y -CONFIG_TR=y -CONFIG_NET_PCMCIA=y +CONFIG_FORCEDETH=y +CONFIG_8139TOO=y CONFIG_FDDI=y -CONFIG_NETCONSOLE=y CONFIG_INPUT_POLLDEV=y # CONFIG_INPUT_MOUSEDEV_PSAUX is not set CONFIG_INPUT_EVDEV=y @@ -194,6 +184,7 @@ CONFIG_INPUT_TABLET=y CONFIG_INPUT_TOUCHSCREEN=y CONFIG_INPUT_MISC=y CONFIG_VT_HW_CONSOLE_BINDING=y +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_NONSTANDARD=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y @@ -203,7 +194,6 @@ CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_SERIAL_8250_DETECT_IRQ=y CONFIG_SERIAL_8250_RSA=y -# CONFIG_LEGACY_PTYS is not set CONFIG_HW_RANDOM=y # CONFIG_HW_RANDOM_INTEL is not set # CONFIG_HW_RANDOM_AMD is not set @@ -221,7 +211,6 @@ CONFIG_DRM_I915_KMS=y CONFIG_FB_MODE_HELPERS=y CONFIG_FB_TILEBLITTING=y CONFIG_FB_EFI=y -CONFIG_BACKLIGHT_LCD_SUPPORT=y # CONFIG_LCD_CLASS_DEVICE is not set CONFIG_VGACON_SOFT_SCROLLBACK=y CONFIG_LOGO=y @@ -268,6 +257,10 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_HCTOSYS is not set CONFIG_DMADEVICES=y CONFIG_EEEPC_LAPTOP=y +CONFIG_AMD_IOMMU=y +CONFIG_AMD_IOMMU_STATS=y +CONFIG_INTEL_IOMMU=y +# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set CONFIG_EFI_VARS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set @@ -284,7 +277,6 @@ CONFIG_ZISOFS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y @@ -292,18 +284,6 @@ CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_OSF_PARTITION=y -CONFIG_AMIGA_PARTITION=y -CONFIG_MAC_PARTITION=y -CONFIG_BSD_DISKLABEL=y -CONFIG_MINIX_SUBPARTITION=y -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -CONFIG_SGI_PARTITION=y -CONFIG_SUN_PARTITION=y -CONFIG_KARMA_PARTITION=y -CONFIG_EFI_PARTITION=y CONFIG_NLS_DEFAULT="utf8" CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y @@ -317,13 +297,12 @@ CONFIG_DEBUG_KERNEL=y # CONFIG_SCHED_DEBUG is not set CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set +CONFIG_DEBUG_STACK_USAGE=y CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_PROVIDE_OHCI1394_DMA_INIT=y CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_DEBUG_STACK_USAGE=y # CONFIG_DEBUG_RODATA_TEST is not set CONFIG_DEBUG_NX_TEST=m CONFIG_DEBUG_BOOT_PARAMS=y diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 2b0b9631474..e191ac048b5 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o +obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o @@ -25,6 +26,7 @@ salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o +camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 545d0ce5981..c799352e24f 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -28,6 +28,7 @@ #include <crypto/aes.h> #include <crypto/cryptd.h> #include <crypto/ctr.h> +#include <asm/cpu_device_id.h> #include <asm/i387.h> #include <asm/aes.h> #include <crypto/scatterwalk.h> @@ -1107,12 +1108,12 @@ static int __driver_rfc4106_encrypt(struct aead_request *req) one_entry_in_sg = 1; scatterwalk_start(&src_sg_walk, req->src); scatterwalk_start(&assoc_sg_walk, req->assoc); - src = scatterwalk_map(&src_sg_walk, 0); - assoc = scatterwalk_map(&assoc_sg_walk, 0); + src = scatterwalk_map(&src_sg_walk); + assoc = scatterwalk_map(&assoc_sg_walk); dst = src; if (unlikely(req->src != req->dst)) { scatterwalk_start(&dst_sg_walk, req->dst); - dst = scatterwalk_map(&dst_sg_walk, 0); + dst = scatterwalk_map(&dst_sg_walk); } } else { @@ -1136,11 +1137,11 @@ static int __driver_rfc4106_encrypt(struct aead_request *req) * back to the packet. */ if (one_entry_in_sg) { if (unlikely(req->src != req->dst)) { - scatterwalk_unmap(dst, 0); + scatterwalk_unmap(dst); scatterwalk_done(&dst_sg_walk, 0, 0); } - scatterwalk_unmap(src, 0); - scatterwalk_unmap(assoc, 0); + scatterwalk_unmap(src); + scatterwalk_unmap(assoc); scatterwalk_done(&src_sg_walk, 0, 0); scatterwalk_done(&assoc_sg_walk, 0, 0); } else { @@ -1189,12 +1190,12 @@ static int __driver_rfc4106_decrypt(struct aead_request *req) one_entry_in_sg = 1; scatterwalk_start(&src_sg_walk, req->src); scatterwalk_start(&assoc_sg_walk, req->assoc); - src = scatterwalk_map(&src_sg_walk, 0); - assoc = scatterwalk_map(&assoc_sg_walk, 0); + src = scatterwalk_map(&src_sg_walk); + assoc = scatterwalk_map(&assoc_sg_walk); dst = src; if (unlikely(req->src != req->dst)) { scatterwalk_start(&dst_sg_walk, req->dst); - dst = scatterwalk_map(&dst_sg_walk, 0); + dst = scatterwalk_map(&dst_sg_walk); } } else { @@ -1219,11 +1220,11 @@ static int __driver_rfc4106_decrypt(struct aead_request *req) if (one_entry_in_sg) { if (unlikely(req->src != req->dst)) { - scatterwalk_unmap(dst, 0); + scatterwalk_unmap(dst); scatterwalk_done(&dst_sg_walk, 0, 0); } - scatterwalk_unmap(src, 0); - scatterwalk_unmap(assoc, 0); + scatterwalk_unmap(src); + scatterwalk_unmap(assoc); scatterwalk_done(&src_sg_walk, 0, 0); scatterwalk_done(&assoc_sg_walk, 0, 0); } else { @@ -1253,14 +1254,19 @@ static struct crypto_alg __rfc4106_alg = { }; #endif + +static const struct x86_cpu_id aesni_cpu_id[] = { + X86_FEATURE_MATCH(X86_FEATURE_AES), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id); + static int __init aesni_init(void) { int err; - if (!cpu_has_aes) { - printk(KERN_INFO "Intel AES-NI instructions are not detected.\n"); + if (!x86_match_cpu(aesni_cpu_id)) return -ENODEV; - } if ((err = crypto_fpu_init())) goto fpu_err; diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c index b05aa163d55..7967474de8f 100644 --- a/arch/x86/crypto/blowfish_glue.c +++ b/arch/x86/crypto/blowfish_glue.c @@ -25,6 +25,7 @@ * */ +#include <asm/processor.h> #include <crypto/blowfish.h> #include <linux/crypto.h> #include <linux/init.h> @@ -76,27 +77,6 @@ static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src); } -static struct crypto_alg bf_alg = { - .cra_name = "blowfish", - .cra_driver_name = "blowfish-asm", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = BF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct bf_ctx), - .cra_alignmask = 3, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(bf_alg.cra_list), - .cra_u = { - .cipher = { - .cia_min_keysize = BF_MIN_KEY_SIZE, - .cia_max_keysize = BF_MAX_KEY_SIZE, - .cia_setkey = blowfish_setkey, - .cia_encrypt = blowfish_encrypt, - .cia_decrypt = blowfish_decrypt, - } - } -}; - static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, void (*fn)(struct bf_ctx *, u8 *, const u8 *), void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *)) @@ -160,28 +140,6 @@ static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way); } -static struct crypto_alg blk_ecb_alg = { - .cra_name = "ecb(blowfish)", - .cra_driver_name = "ecb-blowfish-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = BF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct bf_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .setkey = blowfish_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}; - static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk) { @@ -307,29 +265,6 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, return err; } -static struct crypto_alg blk_cbc_alg = { - .cra_name = "cbc(blowfish)", - .cra_driver_name = "cbc-blowfish-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = BF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct bf_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .ivsize = BF_BLOCK_SIZE, - .setkey = blowfish_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}; - static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk) { u8 *ctrblk = walk->iv; @@ -423,7 +358,67 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, return err; } -static struct crypto_alg blk_ctr_alg = { +static struct crypto_alg bf_algs[4] = { { + .cra_name = "blowfish", + .cra_driver_name = "blowfish-asm", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = BF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct bf_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(bf_algs[0].cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = BF_MIN_KEY_SIZE, + .cia_max_keysize = BF_MAX_KEY_SIZE, + .cia_setkey = blowfish_setkey, + .cia_encrypt = blowfish_encrypt, + .cia_decrypt = blowfish_decrypt, + } + } +}, { + .cra_name = "ecb(blowfish)", + .cra_driver_name = "ecb-blowfish-asm", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = BF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct bf_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(bf_algs[1].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = BF_MIN_KEY_SIZE, + .max_keysize = BF_MAX_KEY_SIZE, + .setkey = blowfish_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "cbc(blowfish)", + .cra_driver_name = "cbc-blowfish-asm", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = BF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct bf_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(bf_algs[2].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = BF_MIN_KEY_SIZE, + .max_keysize = BF_MAX_KEY_SIZE, + .ivsize = BF_BLOCK_SIZE, + .setkey = blowfish_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}, { .cra_name = "ctr(blowfish)", .cra_driver_name = "ctr-blowfish-asm", .cra_priority = 300, @@ -433,7 +428,7 @@ static struct crypto_alg blk_ctr_alg = { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list), + .cra_list = LIST_HEAD_INIT(bf_algs[3].cra_list), .cra_u = { .blkcipher = { .min_keysize = BF_MIN_KEY_SIZE, @@ -444,43 +439,45 @@ static struct crypto_alg blk_ctr_alg = { .decrypt = ctr_crypt, }, }, -}; +} }; + +static bool is_blacklisted_cpu(void) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return false; + + if (boot_cpu_data.x86 == 0x0f) { + /* + * On Pentium 4, blowfish-x86_64 is slower than generic C + * implementation because use of 64bit rotates (which are really + * slow on P4). Therefore blacklist P4s. + */ + return true; + } + + return false; +} + +static int force; +module_param(force, int, 0); +MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); static int __init init(void) { - int err; + if (!force && is_blacklisted_cpu()) { + printk(KERN_INFO + "blowfish-x86_64: performance on this CPU " + "would be suboptimal: disabling " + "blowfish-x86_64.\n"); + return -ENODEV; + } - err = crypto_register_alg(&bf_alg); - if (err) - goto bf_err; - err = crypto_register_alg(&blk_ecb_alg); - if (err) - goto ecb_err; - err = crypto_register_alg(&blk_cbc_alg); - if (err) - goto cbc_err; - err = crypto_register_alg(&blk_ctr_alg); - if (err) - goto ctr_err; - - return 0; - -ctr_err: - crypto_unregister_alg(&blk_cbc_alg); -cbc_err: - crypto_unregister_alg(&blk_ecb_alg); -ecb_err: - crypto_unregister_alg(&bf_alg); -bf_err: - return err; + return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs)); } static void __exit fini(void) { - crypto_unregister_alg(&blk_ctr_alg); - crypto_unregister_alg(&blk_cbc_alg); - crypto_unregister_alg(&blk_ecb_alg); - crypto_unregister_alg(&bf_alg); + crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs)); } module_init(init); diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S new file mode 100644 index 00000000000..0b3374335fd --- /dev/null +++ b/arch/x86/crypto/camellia-x86_64-asm_64.S @@ -0,0 +1,520 @@ +/* + * Camellia Cipher Algorithm (x86_64) + * + * Copyright (C) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +.file "camellia-x86_64-asm_64.S" +.text + +.extern camellia_sp10011110; +.extern camellia_sp22000222; +.extern camellia_sp03303033; +.extern camellia_sp00444404; +.extern camellia_sp02220222; +.extern camellia_sp30333033; +.extern camellia_sp44044404; +.extern camellia_sp11101110; + +#define sp10011110 camellia_sp10011110 +#define sp22000222 camellia_sp22000222 +#define sp03303033 camellia_sp03303033 +#define sp00444404 camellia_sp00444404 +#define sp02220222 camellia_sp02220222 +#define sp30333033 camellia_sp30333033 +#define sp44044404 camellia_sp44044404 +#define sp11101110 camellia_sp11101110 + +#define CAMELLIA_TABLE_BYTE_LEN 272 + +/* struct camellia_ctx: */ +#define key_table 0 +#define key_length CAMELLIA_TABLE_BYTE_LEN + +/* register macros */ +#define CTX %rdi +#define RIO %rsi +#define RIOd %esi + +#define RAB0 %rax +#define RCD0 %rcx +#define RAB1 %rbx +#define RCD1 %rdx + +#define RAB0d %eax +#define RCD0d %ecx +#define RAB1d %ebx +#define RCD1d %edx + +#define RAB0bl %al +#define RCD0bl %cl +#define RAB1bl %bl +#define RCD1bl %dl + +#define RAB0bh %ah +#define RCD0bh %ch +#define RAB1bh %bh +#define RCD1bh %dh + +#define RT0 %rsi +#define RT1 %rbp +#define RT2 %r8 + +#define RT0d %esi +#define RT1d %ebp +#define RT2d %r8d + +#define RT2bl %r8b + +#define RXOR %r9 +#define RRBP %r10 +#define RDST %r11 + +#define RXORd %r9d +#define RXORbl %r9b + +#define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \ + movzbl ab ## bl, tmp2 ## d; \ + movzbl ab ## bh, tmp1 ## d; \ + rorq $16, ab; \ + xorq T0(, tmp2, 8), dst; \ + xorq T1(, tmp1, 8), dst; + +/********************************************************************** + 1-way camellia + **********************************************************************/ +#define roundsm(ab, subkey, cd) \ + movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \ + \ + xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \ + xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \ + xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \ + xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \ + \ + xorq RT2, cd ## 0; + +#define fls(l, r, kl, kr) \ + movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \ + andl l ## 0d, RT0d; \ + roll $1, RT0d; \ + shlq $32, RT0; \ + xorq RT0, l ## 0; \ + movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \ + orq r ## 0, RT1; \ + shrq $32, RT1; \ + xorq RT1, r ## 0; \ + \ + movq (key_table + ((kl) * 2) * 4)(CTX), RT2; \ + orq l ## 0, RT2; \ + shrq $32, RT2; \ + xorq RT2, l ## 0; \ + movl (key_table + ((kr) * 2) * 4)(CTX), RT0d; \ + andl r ## 0d, RT0d; \ + roll $1, RT0d; \ + shlq $32, RT0; \ + xorq RT0, r ## 0; + +#define enc_rounds(i) \ + roundsm(RAB, i + 2, RCD); \ + roundsm(RCD, i + 3, RAB); \ + roundsm(RAB, i + 4, RCD); \ + roundsm(RCD, i + 5, RAB); \ + roundsm(RAB, i + 6, RCD); \ + roundsm(RCD, i + 7, RAB); + +#define enc_fls(i) \ + fls(RAB, RCD, i + 0, i + 1); + +#define enc_inpack() \ + movq (RIO), RAB0; \ + bswapq RAB0; \ + rolq $32, RAB0; \ + movq 4*2(RIO), RCD0; \ + bswapq RCD0; \ + rorq $32, RCD0; \ + xorq key_table(CTX), RAB0; + +#define enc_outunpack(op, max) \ + xorq key_table(CTX, max, 8), RCD0; \ + rorq $32, RCD0; \ + bswapq RCD0; \ + op ## q RCD0, (RIO); \ + rolq $32, RAB0; \ + bswapq RAB0; \ + op ## q RAB0, 4*2(RIO); + +#define dec_rounds(i) \ + roundsm(RAB, i + 7, RCD); \ + roundsm(RCD, i + 6, RAB); \ + roundsm(RAB, i + 5, RCD); \ + roundsm(RCD, i + 4, RAB); \ + roundsm(RAB, i + 3, RCD); \ + roundsm(RCD, i + 2, RAB); + +#define dec_fls(i) \ + fls(RAB, RCD, i + 1, i + 0); + +#define dec_inpack(max) \ + movq (RIO), RAB0; \ + bswapq RAB0; \ + rolq $32, RAB0; \ + movq 4*2(RIO), RCD0; \ + bswapq RCD0; \ + rorq $32, RCD0; \ + xorq key_table(CTX, max, 8), RAB0; + +#define dec_outunpack() \ + xorq key_table(CTX), RCD0; \ + rorq $32, RCD0; \ + bswapq RCD0; \ + movq RCD0, (RIO); \ + rolq $32, RAB0; \ + bswapq RAB0; \ + movq RAB0, 4*2(RIO); + +.global __camellia_enc_blk; +.type __camellia_enc_blk,@function; + +__camellia_enc_blk: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + * %rcx: bool xor + */ + movq %rbp, RRBP; + + movq %rcx, RXOR; + movq %rsi, RDST; + movq %rdx, RIO; + + enc_inpack(); + + enc_rounds(0); + enc_fls(8); + enc_rounds(8); + enc_fls(16); + enc_rounds(16); + movl $24, RT1d; /* max */ + + cmpb $16, key_length(CTX); + je __enc_done; + + enc_fls(24); + enc_rounds(24); + movl $32, RT1d; /* max */ + +__enc_done: + testb RXORbl, RXORbl; + movq RDST, RIO; + + jnz __enc_xor; + + enc_outunpack(mov, RT1); + + movq RRBP, %rbp; + ret; + +__enc_xor: + enc_outunpack(xor, RT1); + + movq RRBP, %rbp; + ret; + +.global camellia_dec_blk; +.type camellia_dec_blk,@function; + +camellia_dec_blk: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + cmpl $16, key_length(CTX); + movl $32, RT2d; + movl $24, RXORd; + cmovel RXORd, RT2d; /* max */ + + movq %rbp, RRBP; + movq %rsi, RDST; + movq %rdx, RIO; + + dec_inpack(RT2); + + cmpb $24, RT2bl; + je __dec_rounds16; + + dec_rounds(24); + dec_fls(24); + +__dec_rounds16: + dec_rounds(16); + dec_fls(16); + dec_rounds(8); + dec_fls(8); + dec_rounds(0); + + movq RDST, RIO; + + dec_outunpack(); + + movq RRBP, %rbp; + ret; + +/********************************************************************** + 2-way camellia + **********************************************************************/ +#define roundsm2(ab, subkey, cd) \ + movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \ + xorq RT2, cd ## 1; \ + \ + xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \ + xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \ + xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \ + xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \ + \ + xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 1, cd ## 1); \ + xorq RT2, cd ## 0; \ + xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 1, cd ## 1); \ + xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 1, cd ## 1); \ + xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 1, cd ## 1); + +#define fls2(l, r, kl, kr) \ + movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \ + andl l ## 0d, RT0d; \ + roll $1, RT0d; \ + shlq $32, RT0; \ + xorq RT0, l ## 0; \ + movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \ + orq r ## 0, RT1; \ + shrq $32, RT1; \ + xorq RT1, r ## 0; \ + \ + movl (key_table + ((kl) * 2) * 4)(CTX), RT2d; \ + andl l ## 1d, RT2d; \ + roll $1, RT2d; \ + shlq $32, RT2; \ + xorq RT2, l ## 1; \ + movq (key_table + ((kr) * 2) * 4)(CTX), RT0; \ + orq r ## 1, RT0; \ + shrq $32, RT0; \ + xorq RT0, r ## 1; \ + \ + movq (key_table + ((kl) * 2) * 4)(CTX), RT1; \ + orq l ## 0, RT1; \ + shrq $32, RT1; \ + xorq RT1, l ## 0; \ + movl (key_table + ((kr) * 2) * 4)(CTX), RT2d; \ + andl r ## 0d, RT2d; \ + roll $1, RT2d; \ + shlq $32, RT2; \ + xorq RT2, r ## 0; \ + \ + movq (key_table + ((kl) * 2) * 4)(CTX), RT0; \ + orq l ## 1, RT0; \ + shrq $32, RT0; \ + xorq RT0, l ## 1; \ + movl (key_table + ((kr) * 2) * 4)(CTX), RT1d; \ + andl r ## 1d, RT1d; \ + roll $1, RT1d; \ + shlq $32, RT1; \ + xorq RT1, r ## 1; + +#define enc_rounds2(i) \ + roundsm2(RAB, i + 2, RCD); \ + roundsm2(RCD, i + 3, RAB); \ + roundsm2(RAB, i + 4, RCD); \ + roundsm2(RCD, i + 5, RAB); \ + roundsm2(RAB, i + 6, RCD); \ + roundsm2(RCD, i + 7, RAB); + +#define enc_fls2(i) \ + fls2(RAB, RCD, i + 0, i + 1); + +#define enc_inpack2() \ + movq (RIO), RAB0; \ + bswapq RAB0; \ + rorq $32, RAB0; \ + movq 4*2(RIO), RCD0; \ + bswapq RCD0; \ + rolq $32, RCD0; \ + xorq key_table(CTX), RAB0; \ + \ + movq 8*2(RIO), RAB1; \ + bswapq RAB1; \ + rorq $32, RAB1; \ + movq 12*2(RIO), RCD1; \ + bswapq RCD1; \ + rolq $32, RCD1; \ + xorq key_table(CTX), RAB1; + +#define enc_outunpack2(op, max) \ + xorq key_table(CTX, max, 8), RCD0; \ + rolq $32, RCD0; \ + bswapq RCD0; \ + op ## q RCD0, (RIO); \ + rorq $32, RAB0; \ + bswapq RAB0; \ + op ## q RAB0, 4*2(RIO); \ + \ + xorq key_table(CTX, max, 8), RCD1; \ + rolq $32, RCD1; \ + bswapq RCD1; \ + op ## q RCD1, 8*2(RIO); \ + rorq $32, RAB1; \ + bswapq RAB1; \ + op ## q RAB1, 12*2(RIO); + +#define dec_rounds2(i) \ + roundsm2(RAB, i + 7, RCD); \ + roundsm2(RCD, i + 6, RAB); \ + roundsm2(RAB, i + 5, RCD); \ + roundsm2(RCD, i + 4, RAB); \ + roundsm2(RAB, i + 3, RCD); \ + roundsm2(RCD, i + 2, RAB); + +#define dec_fls2(i) \ + fls2(RAB, RCD, i + 1, i + 0); + +#define dec_inpack2(max) \ + movq (RIO), RAB0; \ + bswapq RAB0; \ + rorq $32, RAB0; \ + movq 4*2(RIO), RCD0; \ + bswapq RCD0; \ + rolq $32, RCD0; \ + xorq key_table(CTX, max, 8), RAB0; \ + \ + movq 8*2(RIO), RAB1; \ + bswapq RAB1; \ + rorq $32, RAB1; \ + movq 12*2(RIO), RCD1; \ + bswapq RCD1; \ + rolq $32, RCD1; \ + xorq key_table(CTX, max, 8), RAB1; + +#define dec_outunpack2() \ + xorq key_table(CTX), RCD0; \ + rolq $32, RCD0; \ + bswapq RCD0; \ + movq RCD0, (RIO); \ + rorq $32, RAB0; \ + bswapq RAB0; \ + movq RAB0, 4*2(RIO); \ + \ + xorq key_table(CTX), RCD1; \ + rolq $32, RCD1; \ + bswapq RCD1; \ + movq RCD1, 8*2(RIO); \ + rorq $32, RAB1; \ + bswapq RAB1; \ + movq RAB1, 12*2(RIO); + +.global __camellia_enc_blk_2way; +.type __camellia_enc_blk_2way,@function; + +__camellia_enc_blk_2way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + * %rcx: bool xor + */ + pushq %rbx; + + movq %rbp, RRBP; + movq %rcx, RXOR; + movq %rsi, RDST; + movq %rdx, RIO; + + enc_inpack2(); + + enc_rounds2(0); + enc_fls2(8); + enc_rounds2(8); + enc_fls2(16); + enc_rounds2(16); + movl $24, RT2d; /* max */ + + cmpb $16, key_length(CTX); + je __enc2_done; + + enc_fls2(24); + enc_rounds2(24); + movl $32, RT2d; /* max */ + +__enc2_done: + test RXORbl, RXORbl; + movq RDST, RIO; + jnz __enc2_xor; + + enc_outunpack2(mov, RT2); + + movq RRBP, %rbp; + popq %rbx; + ret; + +__enc2_xor: + enc_outunpack2(xor, RT2); + + movq RRBP, %rbp; + popq %rbx; + ret; + +.global camellia_dec_blk_2way; +.type camellia_dec_blk_2way,@function; + +camellia_dec_blk_2way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + cmpl $16, key_length(CTX); + movl $32, RT2d; + movl $24, RXORd; + cmovel RXORd, RT2d; /* max */ + + movq %rbx, RXOR; + movq %rbp, RRBP; + movq %rsi, RDST; + movq %rdx, RIO; + + dec_inpack2(RT2); + + cmpb $24, RT2bl; + je __dec2_rounds16; + + dec_rounds2(24); + dec_fls2(24); + +__dec2_rounds16: + dec_rounds2(16); + dec_fls2(16); + dec_rounds2(8); + dec_fls2(8); + dec_rounds2(0); + + movq RDST, RIO; + + dec_outunpack2(); + + movq RRBP, %rbp; + movq RXOR, %rbx; + ret; diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c new file mode 100644 index 00000000000..3306dc0b139 --- /dev/null +++ b/arch/x86/crypto/camellia_glue.c @@ -0,0 +1,1952 @@ +/* + * Glue Code for assembler optimized version of Camellia + * + * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> + * + * Camellia parts based on code by: + * Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation) + * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: + * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> + * CTR part based on code (crypto/ctr.c) by: + * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#include <asm/processor.h> +#include <asm/unaligned.h> +#include <linux/crypto.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/types.h> +#include <crypto/algapi.h> +#include <crypto/b128ops.h> +#include <crypto/lrw.h> +#include <crypto/xts.h> + +#define CAMELLIA_MIN_KEY_SIZE 16 +#define CAMELLIA_MAX_KEY_SIZE 32 +#define CAMELLIA_BLOCK_SIZE 16 +#define CAMELLIA_TABLE_BYTE_LEN 272 + +struct camellia_ctx { + u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)]; + u32 key_length; +}; + +/* regular block cipher functions */ +asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst, + const u8 *src); + +/* 2-way parallel cipher functions */ +asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst, + const u8 *src) +{ + __camellia_enc_blk(ctx, dst, src, false); +} + +static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst, + const u8 *src) +{ + __camellia_enc_blk(ctx, dst, src, true); +} + +static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst, + const u8 *src) +{ + __camellia_enc_blk_2way(ctx, dst, src, false); +} + +static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst, + const u8 *src) +{ + __camellia_enc_blk_2way(ctx, dst, src, true); +} + +static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + camellia_enc_blk(crypto_tfm_ctx(tfm), dst, src); +} + +static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + camellia_dec_blk(crypto_tfm_ctx(tfm), dst, src); +} + +/* camellia sboxes */ +const u64 camellia_sp10011110[256] = { + 0x7000007070707000, 0x8200008282828200, 0x2c00002c2c2c2c00, + 0xec0000ecececec00, 0xb30000b3b3b3b300, 0x2700002727272700, + 0xc00000c0c0c0c000, 0xe50000e5e5e5e500, 0xe40000e4e4e4e400, + 0x8500008585858500, 0x5700005757575700, 0x3500003535353500, + 0xea0000eaeaeaea00, 0x0c00000c0c0c0c00, 0xae0000aeaeaeae00, + 0x4100004141414100, 0x2300002323232300, 0xef0000efefefef00, + 0x6b00006b6b6b6b00, 0x9300009393939300, 0x4500004545454500, + 0x1900001919191900, 0xa50000a5a5a5a500, 0x2100002121212100, + 0xed0000edededed00, 0x0e00000e0e0e0e00, 0x4f00004f4f4f4f00, + 0x4e00004e4e4e4e00, 0x1d00001d1d1d1d00, 0x6500006565656500, + 0x9200009292929200, 0xbd0000bdbdbdbd00, 0x8600008686868600, + 0xb80000b8b8b8b800, 0xaf0000afafafaf00, 0x8f00008f8f8f8f00, + 0x7c00007c7c7c7c00, 0xeb0000ebebebeb00, 0x1f00001f1f1f1f00, + 0xce0000cececece00, 0x3e00003e3e3e3e00, 0x3000003030303000, + 0xdc0000dcdcdcdc00, 0x5f00005f5f5f5f00, 0x5e00005e5e5e5e00, + 0xc50000c5c5c5c500, 0x0b00000b0b0b0b00, 0x1a00001a1a1a1a00, + 0xa60000a6a6a6a600, 0xe10000e1e1e1e100, 0x3900003939393900, + 0xca0000cacacaca00, 0xd50000d5d5d5d500, 0x4700004747474700, + 0x5d00005d5d5d5d00, 0x3d00003d3d3d3d00, 0xd90000d9d9d9d900, + 0x0100000101010100, 0x5a00005a5a5a5a00, 0xd60000d6d6d6d600, + 0x5100005151515100, 0x5600005656565600, 0x6c00006c6c6c6c00, + 0x4d00004d4d4d4d00, 0x8b00008b8b8b8b00, 0x0d00000d0d0d0d00, + 0x9a00009a9a9a9a00, 0x6600006666666600, 0xfb0000fbfbfbfb00, + 0xcc0000cccccccc00, 0xb00000b0b0b0b000, 0x2d00002d2d2d2d00, + 0x7400007474747400, 0x1200001212121200, 0x2b00002b2b2b2b00, + 0x2000002020202000, 0xf00000f0f0f0f000, 0xb10000b1b1b1b100, + 0x8400008484848400, 0x9900009999999900, 0xdf0000dfdfdfdf00, + 0x4c00004c4c4c4c00, 0xcb0000cbcbcbcb00, 0xc20000c2c2c2c200, + 0x3400003434343400, 0x7e00007e7e7e7e00, 0x7600007676767600, + 0x0500000505050500, 0x6d00006d6d6d6d00, 0xb70000b7b7b7b700, + 0xa90000a9a9a9a900, 0x3100003131313100, 0xd10000d1d1d1d100, + 0x1700001717171700, 0x0400000404040400, 0xd70000d7d7d7d700, + 0x1400001414141400, 0x5800005858585800, 0x3a00003a3a3a3a00, + 0x6100006161616100, 0xde0000dededede00, 0x1b00001b1b1b1b00, + 0x1100001111111100, 0x1c00001c1c1c1c00, 0x3200003232323200, + 0x0f00000f0f0f0f00, 0x9c00009c9c9c9c00, 0x1600001616161600, + 0x5300005353535300, 0x1800001818181800, 0xf20000f2f2f2f200, + 0x2200002222222200, 0xfe0000fefefefe00, 0x4400004444444400, + 0xcf0000cfcfcfcf00, 0xb20000b2b2b2b200, 0xc30000c3c3c3c300, + 0xb50000b5b5b5b500, 0x7a00007a7a7a7a00, 0x9100009191919100, + 0x2400002424242400, 0x0800000808080800, 0xe80000e8e8e8e800, + 0xa80000a8a8a8a800, 0x6000006060606000, 0xfc0000fcfcfcfc00, + 0x6900006969696900, 0x5000005050505000, 0xaa0000aaaaaaaa00, + 0xd00000d0d0d0d000, 0xa00000a0a0a0a000, 0x7d00007d7d7d7d00, + 0xa10000a1a1a1a100, 0x8900008989898900, 0x6200006262626200, + 0x9700009797979700, 0x5400005454545400, 0x5b00005b5b5b5b00, + 0x1e00001e1e1e1e00, 0x9500009595959500, 0xe00000e0e0e0e000, + 0xff0000ffffffff00, 0x6400006464646400, 0xd20000d2d2d2d200, + 0x1000001010101000, 0xc40000c4c4c4c400, 0x0000000000000000, + 0x4800004848484800, 0xa30000a3a3a3a300, 0xf70000f7f7f7f700, + 0x7500007575757500, 0xdb0000dbdbdbdb00, 0x8a00008a8a8a8a00, + 0x0300000303030300, 0xe60000e6e6e6e600, 0xda0000dadadada00, + 0x0900000909090900, 0x3f00003f3f3f3f00, 0xdd0000dddddddd00, + 0x9400009494949400, 0x8700008787878700, 0x5c00005c5c5c5c00, + 0x8300008383838300, 0x0200000202020200, 0xcd0000cdcdcdcd00, + 0x4a00004a4a4a4a00, 0x9000009090909000, 0x3300003333333300, + 0x7300007373737300, 0x6700006767676700, 0xf60000f6f6f6f600, + 0xf30000f3f3f3f300, 0x9d00009d9d9d9d00, 0x7f00007f7f7f7f00, + 0xbf0000bfbfbfbf00, 0xe20000e2e2e2e200, 0x5200005252525200, + 0x9b00009b9b9b9b00, 0xd80000d8d8d8d800, 0x2600002626262600, + 0xc80000c8c8c8c800, 0x3700003737373700, 0xc60000c6c6c6c600, + 0x3b00003b3b3b3b00, 0x8100008181818100, 0x9600009696969600, + 0x6f00006f6f6f6f00, 0x4b00004b4b4b4b00, 0x1300001313131300, + 0xbe0000bebebebe00, 0x6300006363636300, 0x2e00002e2e2e2e00, + 0xe90000e9e9e9e900, 0x7900007979797900, 0xa70000a7a7a7a700, + 0x8c00008c8c8c8c00, 0x9f00009f9f9f9f00, 0x6e00006e6e6e6e00, + 0xbc0000bcbcbcbc00, 0x8e00008e8e8e8e00, 0x2900002929292900, + 0xf50000f5f5f5f500, 0xf90000f9f9f9f900, 0xb60000b6b6b6b600, + 0x2f00002f2f2f2f00, 0xfd0000fdfdfdfd00, 0xb40000b4b4b4b400, + 0x5900005959595900, 0x7800007878787800, 0x9800009898989800, + 0x0600000606060600, 0x6a00006a6a6a6a00, 0xe70000e7e7e7e700, + 0x4600004646464600, 0x7100007171717100, 0xba0000babababa00, + 0xd40000d4d4d4d400, 0x2500002525252500, 0xab0000abababab00, + 0x4200004242424200, 0x8800008888888800, 0xa20000a2a2a2a200, + 0x8d00008d8d8d8d00, 0xfa0000fafafafa00, 0x7200007272727200, + 0x0700000707070700, 0xb90000b9b9b9b900, 0x5500005555555500, + 0xf80000f8f8f8f800, 0xee0000eeeeeeee00, 0xac0000acacacac00, + 0x0a00000a0a0a0a00, 0x3600003636363600, 0x4900004949494900, + 0x2a00002a2a2a2a00, 0x6800006868686800, 0x3c00003c3c3c3c00, + 0x3800003838383800, 0xf10000f1f1f1f100, 0xa40000a4a4a4a400, + 0x4000004040404000, 0x2800002828282800, 0xd30000d3d3d3d300, + 0x7b00007b7b7b7b00, 0xbb0000bbbbbbbb00, 0xc90000c9c9c9c900, + 0x4300004343434300, 0xc10000c1c1c1c100, 0x1500001515151500, + 0xe30000e3e3e3e300, 0xad0000adadadad00, 0xf40000f4f4f4f400, + 0x7700007777777700, 0xc70000c7c7c7c700, 0x8000008080808000, + 0x9e00009e9e9e9e00, +}; + +const u64 camellia_sp22000222[256] = { + 0xe0e0000000e0e0e0, 0x0505000000050505, 0x5858000000585858, + 0xd9d9000000d9d9d9, 0x6767000000676767, 0x4e4e0000004e4e4e, + 0x8181000000818181, 0xcbcb000000cbcbcb, 0xc9c9000000c9c9c9, + 0x0b0b0000000b0b0b, 0xaeae000000aeaeae, 0x6a6a0000006a6a6a, + 0xd5d5000000d5d5d5, 0x1818000000181818, 0x5d5d0000005d5d5d, + 0x8282000000828282, 0x4646000000464646, 0xdfdf000000dfdfdf, + 0xd6d6000000d6d6d6, 0x2727000000272727, 0x8a8a0000008a8a8a, + 0x3232000000323232, 0x4b4b0000004b4b4b, 0x4242000000424242, + 0xdbdb000000dbdbdb, 0x1c1c0000001c1c1c, 0x9e9e0000009e9e9e, + 0x9c9c0000009c9c9c, 0x3a3a0000003a3a3a, 0xcaca000000cacaca, + 0x2525000000252525, 0x7b7b0000007b7b7b, 0x0d0d0000000d0d0d, + 0x7171000000717171, 0x5f5f0000005f5f5f, 0x1f1f0000001f1f1f, + 0xf8f8000000f8f8f8, 0xd7d7000000d7d7d7, 0x3e3e0000003e3e3e, + 0x9d9d0000009d9d9d, 0x7c7c0000007c7c7c, 0x6060000000606060, + 0xb9b9000000b9b9b9, 0xbebe000000bebebe, 0xbcbc000000bcbcbc, + 0x8b8b0000008b8b8b, 0x1616000000161616, 0x3434000000343434, + 0x4d4d0000004d4d4d, 0xc3c3000000c3c3c3, 0x7272000000727272, + 0x9595000000959595, 0xabab000000ababab, 0x8e8e0000008e8e8e, + 0xbaba000000bababa, 0x7a7a0000007a7a7a, 0xb3b3000000b3b3b3, + 0x0202000000020202, 0xb4b4000000b4b4b4, 0xadad000000adadad, + 0xa2a2000000a2a2a2, 0xacac000000acacac, 0xd8d8000000d8d8d8, + 0x9a9a0000009a9a9a, 0x1717000000171717, 0x1a1a0000001a1a1a, + 0x3535000000353535, 0xcccc000000cccccc, 0xf7f7000000f7f7f7, + 0x9999000000999999, 0x6161000000616161, 0x5a5a0000005a5a5a, + 0xe8e8000000e8e8e8, 0x2424000000242424, 0x5656000000565656, + 0x4040000000404040, 0xe1e1000000e1e1e1, 0x6363000000636363, + 0x0909000000090909, 0x3333000000333333, 0xbfbf000000bfbfbf, + 0x9898000000989898, 0x9797000000979797, 0x8585000000858585, + 0x6868000000686868, 0xfcfc000000fcfcfc, 0xecec000000ececec, + 0x0a0a0000000a0a0a, 0xdada000000dadada, 0x6f6f0000006f6f6f, + 0x5353000000535353, 0x6262000000626262, 0xa3a3000000a3a3a3, + 0x2e2e0000002e2e2e, 0x0808000000080808, 0xafaf000000afafaf, + 0x2828000000282828, 0xb0b0000000b0b0b0, 0x7474000000747474, + 0xc2c2000000c2c2c2, 0xbdbd000000bdbdbd, 0x3636000000363636, + 0x2222000000222222, 0x3838000000383838, 0x6464000000646464, + 0x1e1e0000001e1e1e, 0x3939000000393939, 0x2c2c0000002c2c2c, + 0xa6a6000000a6a6a6, 0x3030000000303030, 0xe5e5000000e5e5e5, + 0x4444000000444444, 0xfdfd000000fdfdfd, 0x8888000000888888, + 0x9f9f0000009f9f9f, 0x6565000000656565, 0x8787000000878787, + 0x6b6b0000006b6b6b, 0xf4f4000000f4f4f4, 0x2323000000232323, + 0x4848000000484848, 0x1010000000101010, 0xd1d1000000d1d1d1, + 0x5151000000515151, 0xc0c0000000c0c0c0, 0xf9f9000000f9f9f9, + 0xd2d2000000d2d2d2, 0xa0a0000000a0a0a0, 0x5555000000555555, + 0xa1a1000000a1a1a1, 0x4141000000414141, 0xfafa000000fafafa, + 0x4343000000434343, 0x1313000000131313, 0xc4c4000000c4c4c4, + 0x2f2f0000002f2f2f, 0xa8a8000000a8a8a8, 0xb6b6000000b6b6b6, + 0x3c3c0000003c3c3c, 0x2b2b0000002b2b2b, 0xc1c1000000c1c1c1, + 0xffff000000ffffff, 0xc8c8000000c8c8c8, 0xa5a5000000a5a5a5, + 0x2020000000202020, 0x8989000000898989, 0x0000000000000000, + 0x9090000000909090, 0x4747000000474747, 0xefef000000efefef, + 0xeaea000000eaeaea, 0xb7b7000000b7b7b7, 0x1515000000151515, + 0x0606000000060606, 0xcdcd000000cdcdcd, 0xb5b5000000b5b5b5, + 0x1212000000121212, 0x7e7e0000007e7e7e, 0xbbbb000000bbbbbb, + 0x2929000000292929, 0x0f0f0000000f0f0f, 0xb8b8000000b8b8b8, + 0x0707000000070707, 0x0404000000040404, 0x9b9b0000009b9b9b, + 0x9494000000949494, 0x2121000000212121, 0x6666000000666666, + 0xe6e6000000e6e6e6, 0xcece000000cecece, 0xeded000000ededed, + 0xe7e7000000e7e7e7, 0x3b3b0000003b3b3b, 0xfefe000000fefefe, + 0x7f7f0000007f7f7f, 0xc5c5000000c5c5c5, 0xa4a4000000a4a4a4, + 0x3737000000373737, 0xb1b1000000b1b1b1, 0x4c4c0000004c4c4c, + 0x9191000000919191, 0x6e6e0000006e6e6e, 0x8d8d0000008d8d8d, + 0x7676000000767676, 0x0303000000030303, 0x2d2d0000002d2d2d, + 0xdede000000dedede, 0x9696000000969696, 0x2626000000262626, + 0x7d7d0000007d7d7d, 0xc6c6000000c6c6c6, 0x5c5c0000005c5c5c, + 0xd3d3000000d3d3d3, 0xf2f2000000f2f2f2, 0x4f4f0000004f4f4f, + 0x1919000000191919, 0x3f3f0000003f3f3f, 0xdcdc000000dcdcdc, + 0x7979000000797979, 0x1d1d0000001d1d1d, 0x5252000000525252, + 0xebeb000000ebebeb, 0xf3f3000000f3f3f3, 0x6d6d0000006d6d6d, + 0x5e5e0000005e5e5e, 0xfbfb000000fbfbfb, 0x6969000000696969, + 0xb2b2000000b2b2b2, 0xf0f0000000f0f0f0, 0x3131000000313131, + 0x0c0c0000000c0c0c, 0xd4d4000000d4d4d4, 0xcfcf000000cfcfcf, + 0x8c8c0000008c8c8c, 0xe2e2000000e2e2e2, 0x7575000000757575, + 0xa9a9000000a9a9a9, 0x4a4a0000004a4a4a, 0x5757000000575757, + 0x8484000000848484, 0x1111000000111111, 0x4545000000454545, + 0x1b1b0000001b1b1b, 0xf5f5000000f5f5f5, 0xe4e4000000e4e4e4, + 0x0e0e0000000e0e0e, 0x7373000000737373, 0xaaaa000000aaaaaa, + 0xf1f1000000f1f1f1, 0xdddd000000dddddd, 0x5959000000595959, + 0x1414000000141414, 0x6c6c0000006c6c6c, 0x9292000000929292, + 0x5454000000545454, 0xd0d0000000d0d0d0, 0x7878000000787878, + 0x7070000000707070, 0xe3e3000000e3e3e3, 0x4949000000494949, + 0x8080000000808080, 0x5050000000505050, 0xa7a7000000a7a7a7, + 0xf6f6000000f6f6f6, 0x7777000000777777, 0x9393000000939393, + 0x8686000000868686, 0x8383000000838383, 0x2a2a0000002a2a2a, + 0xc7c7000000c7c7c7, 0x5b5b0000005b5b5b, 0xe9e9000000e9e9e9, + 0xeeee000000eeeeee, 0x8f8f0000008f8f8f, 0x0101000000010101, + 0x3d3d0000003d3d3d, +}; + +const u64 camellia_sp03303033[256] = { + 0x0038380038003838, 0x0041410041004141, 0x0016160016001616, + 0x0076760076007676, 0x00d9d900d900d9d9, 0x0093930093009393, + 0x0060600060006060, 0x00f2f200f200f2f2, 0x0072720072007272, + 0x00c2c200c200c2c2, 0x00abab00ab00abab, 0x009a9a009a009a9a, + 0x0075750075007575, 0x0006060006000606, 0x0057570057005757, + 0x00a0a000a000a0a0, 0x0091910091009191, 0x00f7f700f700f7f7, + 0x00b5b500b500b5b5, 0x00c9c900c900c9c9, 0x00a2a200a200a2a2, + 0x008c8c008c008c8c, 0x00d2d200d200d2d2, 0x0090900090009090, + 0x00f6f600f600f6f6, 0x0007070007000707, 0x00a7a700a700a7a7, + 0x0027270027002727, 0x008e8e008e008e8e, 0x00b2b200b200b2b2, + 0x0049490049004949, 0x00dede00de00dede, 0x0043430043004343, + 0x005c5c005c005c5c, 0x00d7d700d700d7d7, 0x00c7c700c700c7c7, + 0x003e3e003e003e3e, 0x00f5f500f500f5f5, 0x008f8f008f008f8f, + 0x0067670067006767, 0x001f1f001f001f1f, 0x0018180018001818, + 0x006e6e006e006e6e, 0x00afaf00af00afaf, 0x002f2f002f002f2f, + 0x00e2e200e200e2e2, 0x0085850085008585, 0x000d0d000d000d0d, + 0x0053530053005353, 0x00f0f000f000f0f0, 0x009c9c009c009c9c, + 0x0065650065006565, 0x00eaea00ea00eaea, 0x00a3a300a300a3a3, + 0x00aeae00ae00aeae, 0x009e9e009e009e9e, 0x00ecec00ec00ecec, + 0x0080800080008080, 0x002d2d002d002d2d, 0x006b6b006b006b6b, + 0x00a8a800a800a8a8, 0x002b2b002b002b2b, 0x0036360036003636, + 0x00a6a600a600a6a6, 0x00c5c500c500c5c5, 0x0086860086008686, + 0x004d4d004d004d4d, 0x0033330033003333, 0x00fdfd00fd00fdfd, + 0x0066660066006666, 0x0058580058005858, 0x0096960096009696, + 0x003a3a003a003a3a, 0x0009090009000909, 0x0095950095009595, + 0x0010100010001010, 0x0078780078007878, 0x00d8d800d800d8d8, + 0x0042420042004242, 0x00cccc00cc00cccc, 0x00efef00ef00efef, + 0x0026260026002626, 0x00e5e500e500e5e5, 0x0061610061006161, + 0x001a1a001a001a1a, 0x003f3f003f003f3f, 0x003b3b003b003b3b, + 0x0082820082008282, 0x00b6b600b600b6b6, 0x00dbdb00db00dbdb, + 0x00d4d400d400d4d4, 0x0098980098009898, 0x00e8e800e800e8e8, + 0x008b8b008b008b8b, 0x0002020002000202, 0x00ebeb00eb00ebeb, + 0x000a0a000a000a0a, 0x002c2c002c002c2c, 0x001d1d001d001d1d, + 0x00b0b000b000b0b0, 0x006f6f006f006f6f, 0x008d8d008d008d8d, + 0x0088880088008888, 0x000e0e000e000e0e, 0x0019190019001919, + 0x0087870087008787, 0x004e4e004e004e4e, 0x000b0b000b000b0b, + 0x00a9a900a900a9a9, 0x000c0c000c000c0c, 0x0079790079007979, + 0x0011110011001111, 0x007f7f007f007f7f, 0x0022220022002222, + 0x00e7e700e700e7e7, 0x0059590059005959, 0x00e1e100e100e1e1, + 0x00dada00da00dada, 0x003d3d003d003d3d, 0x00c8c800c800c8c8, + 0x0012120012001212, 0x0004040004000404, 0x0074740074007474, + 0x0054540054005454, 0x0030300030003030, 0x007e7e007e007e7e, + 0x00b4b400b400b4b4, 0x0028280028002828, 0x0055550055005555, + 0x0068680068006868, 0x0050500050005050, 0x00bebe00be00bebe, + 0x00d0d000d000d0d0, 0x00c4c400c400c4c4, 0x0031310031003131, + 0x00cbcb00cb00cbcb, 0x002a2a002a002a2a, 0x00adad00ad00adad, + 0x000f0f000f000f0f, 0x00caca00ca00caca, 0x0070700070007070, + 0x00ffff00ff00ffff, 0x0032320032003232, 0x0069690069006969, + 0x0008080008000808, 0x0062620062006262, 0x0000000000000000, + 0x0024240024002424, 0x00d1d100d100d1d1, 0x00fbfb00fb00fbfb, + 0x00baba00ba00baba, 0x00eded00ed00eded, 0x0045450045004545, + 0x0081810081008181, 0x0073730073007373, 0x006d6d006d006d6d, + 0x0084840084008484, 0x009f9f009f009f9f, 0x00eeee00ee00eeee, + 0x004a4a004a004a4a, 0x00c3c300c300c3c3, 0x002e2e002e002e2e, + 0x00c1c100c100c1c1, 0x0001010001000101, 0x00e6e600e600e6e6, + 0x0025250025002525, 0x0048480048004848, 0x0099990099009999, + 0x00b9b900b900b9b9, 0x00b3b300b300b3b3, 0x007b7b007b007b7b, + 0x00f9f900f900f9f9, 0x00cece00ce00cece, 0x00bfbf00bf00bfbf, + 0x00dfdf00df00dfdf, 0x0071710071007171, 0x0029290029002929, + 0x00cdcd00cd00cdcd, 0x006c6c006c006c6c, 0x0013130013001313, + 0x0064640064006464, 0x009b9b009b009b9b, 0x0063630063006363, + 0x009d9d009d009d9d, 0x00c0c000c000c0c0, 0x004b4b004b004b4b, + 0x00b7b700b700b7b7, 0x00a5a500a500a5a5, 0x0089890089008989, + 0x005f5f005f005f5f, 0x00b1b100b100b1b1, 0x0017170017001717, + 0x00f4f400f400f4f4, 0x00bcbc00bc00bcbc, 0x00d3d300d300d3d3, + 0x0046460046004646, 0x00cfcf00cf00cfcf, 0x0037370037003737, + 0x005e5e005e005e5e, 0x0047470047004747, 0x0094940094009494, + 0x00fafa00fa00fafa, 0x00fcfc00fc00fcfc, 0x005b5b005b005b5b, + 0x0097970097009797, 0x00fefe00fe00fefe, 0x005a5a005a005a5a, + 0x00acac00ac00acac, 0x003c3c003c003c3c, 0x004c4c004c004c4c, + 0x0003030003000303, 0x0035350035003535, 0x00f3f300f300f3f3, + 0x0023230023002323, 0x00b8b800b800b8b8, 0x005d5d005d005d5d, + 0x006a6a006a006a6a, 0x0092920092009292, 0x00d5d500d500d5d5, + 0x0021210021002121, 0x0044440044004444, 0x0051510051005151, + 0x00c6c600c600c6c6, 0x007d7d007d007d7d, 0x0039390039003939, + 0x0083830083008383, 0x00dcdc00dc00dcdc, 0x00aaaa00aa00aaaa, + 0x007c7c007c007c7c, 0x0077770077007777, 0x0056560056005656, + 0x0005050005000505, 0x001b1b001b001b1b, 0x00a4a400a400a4a4, + 0x0015150015001515, 0x0034340034003434, 0x001e1e001e001e1e, + 0x001c1c001c001c1c, 0x00f8f800f800f8f8, 0x0052520052005252, + 0x0020200020002020, 0x0014140014001414, 0x00e9e900e900e9e9, + 0x00bdbd00bd00bdbd, 0x00dddd00dd00dddd, 0x00e4e400e400e4e4, + 0x00a1a100a100a1a1, 0x00e0e000e000e0e0, 0x008a8a008a008a8a, + 0x00f1f100f100f1f1, 0x00d6d600d600d6d6, 0x007a7a007a007a7a, + 0x00bbbb00bb00bbbb, 0x00e3e300e300e3e3, 0x0040400040004040, + 0x004f4f004f004f4f, +}; + +const u64 camellia_sp00444404[256] = { + 0x0000707070700070, 0x00002c2c2c2c002c, 0x0000b3b3b3b300b3, + 0x0000c0c0c0c000c0, 0x0000e4e4e4e400e4, 0x0000575757570057, + 0x0000eaeaeaea00ea, 0x0000aeaeaeae00ae, 0x0000232323230023, + 0x00006b6b6b6b006b, 0x0000454545450045, 0x0000a5a5a5a500a5, + 0x0000edededed00ed, 0x00004f4f4f4f004f, 0x00001d1d1d1d001d, + 0x0000929292920092, 0x0000868686860086, 0x0000afafafaf00af, + 0x00007c7c7c7c007c, 0x00001f1f1f1f001f, 0x00003e3e3e3e003e, + 0x0000dcdcdcdc00dc, 0x00005e5e5e5e005e, 0x00000b0b0b0b000b, + 0x0000a6a6a6a600a6, 0x0000393939390039, 0x0000d5d5d5d500d5, + 0x00005d5d5d5d005d, 0x0000d9d9d9d900d9, 0x00005a5a5a5a005a, + 0x0000515151510051, 0x00006c6c6c6c006c, 0x00008b8b8b8b008b, + 0x00009a9a9a9a009a, 0x0000fbfbfbfb00fb, 0x0000b0b0b0b000b0, + 0x0000747474740074, 0x00002b2b2b2b002b, 0x0000f0f0f0f000f0, + 0x0000848484840084, 0x0000dfdfdfdf00df, 0x0000cbcbcbcb00cb, + 0x0000343434340034, 0x0000767676760076, 0x00006d6d6d6d006d, + 0x0000a9a9a9a900a9, 0x0000d1d1d1d100d1, 0x0000040404040004, + 0x0000141414140014, 0x00003a3a3a3a003a, 0x0000dededede00de, + 0x0000111111110011, 0x0000323232320032, 0x00009c9c9c9c009c, + 0x0000535353530053, 0x0000f2f2f2f200f2, 0x0000fefefefe00fe, + 0x0000cfcfcfcf00cf, 0x0000c3c3c3c300c3, 0x00007a7a7a7a007a, + 0x0000242424240024, 0x0000e8e8e8e800e8, 0x0000606060600060, + 0x0000696969690069, 0x0000aaaaaaaa00aa, 0x0000a0a0a0a000a0, + 0x0000a1a1a1a100a1, 0x0000626262620062, 0x0000545454540054, + 0x00001e1e1e1e001e, 0x0000e0e0e0e000e0, 0x0000646464640064, + 0x0000101010100010, 0x0000000000000000, 0x0000a3a3a3a300a3, + 0x0000757575750075, 0x00008a8a8a8a008a, 0x0000e6e6e6e600e6, + 0x0000090909090009, 0x0000dddddddd00dd, 0x0000878787870087, + 0x0000838383830083, 0x0000cdcdcdcd00cd, 0x0000909090900090, + 0x0000737373730073, 0x0000f6f6f6f600f6, 0x00009d9d9d9d009d, + 0x0000bfbfbfbf00bf, 0x0000525252520052, 0x0000d8d8d8d800d8, + 0x0000c8c8c8c800c8, 0x0000c6c6c6c600c6, 0x0000818181810081, + 0x00006f6f6f6f006f, 0x0000131313130013, 0x0000636363630063, + 0x0000e9e9e9e900e9, 0x0000a7a7a7a700a7, 0x00009f9f9f9f009f, + 0x0000bcbcbcbc00bc, 0x0000292929290029, 0x0000f9f9f9f900f9, + 0x00002f2f2f2f002f, 0x0000b4b4b4b400b4, 0x0000787878780078, + 0x0000060606060006, 0x0000e7e7e7e700e7, 0x0000717171710071, + 0x0000d4d4d4d400d4, 0x0000abababab00ab, 0x0000888888880088, + 0x00008d8d8d8d008d, 0x0000727272720072, 0x0000b9b9b9b900b9, + 0x0000f8f8f8f800f8, 0x0000acacacac00ac, 0x0000363636360036, + 0x00002a2a2a2a002a, 0x00003c3c3c3c003c, 0x0000f1f1f1f100f1, + 0x0000404040400040, 0x0000d3d3d3d300d3, 0x0000bbbbbbbb00bb, + 0x0000434343430043, 0x0000151515150015, 0x0000adadadad00ad, + 0x0000777777770077, 0x0000808080800080, 0x0000828282820082, + 0x0000ecececec00ec, 0x0000272727270027, 0x0000e5e5e5e500e5, + 0x0000858585850085, 0x0000353535350035, 0x00000c0c0c0c000c, + 0x0000414141410041, 0x0000efefefef00ef, 0x0000939393930093, + 0x0000191919190019, 0x0000212121210021, 0x00000e0e0e0e000e, + 0x00004e4e4e4e004e, 0x0000656565650065, 0x0000bdbdbdbd00bd, + 0x0000b8b8b8b800b8, 0x00008f8f8f8f008f, 0x0000ebebebeb00eb, + 0x0000cececece00ce, 0x0000303030300030, 0x00005f5f5f5f005f, + 0x0000c5c5c5c500c5, 0x00001a1a1a1a001a, 0x0000e1e1e1e100e1, + 0x0000cacacaca00ca, 0x0000474747470047, 0x00003d3d3d3d003d, + 0x0000010101010001, 0x0000d6d6d6d600d6, 0x0000565656560056, + 0x00004d4d4d4d004d, 0x00000d0d0d0d000d, 0x0000666666660066, + 0x0000cccccccc00cc, 0x00002d2d2d2d002d, 0x0000121212120012, + 0x0000202020200020, 0x0000b1b1b1b100b1, 0x0000999999990099, + 0x00004c4c4c4c004c, 0x0000c2c2c2c200c2, 0x00007e7e7e7e007e, + 0x0000050505050005, 0x0000b7b7b7b700b7, 0x0000313131310031, + 0x0000171717170017, 0x0000d7d7d7d700d7, 0x0000585858580058, + 0x0000616161610061, 0x00001b1b1b1b001b, 0x00001c1c1c1c001c, + 0x00000f0f0f0f000f, 0x0000161616160016, 0x0000181818180018, + 0x0000222222220022, 0x0000444444440044, 0x0000b2b2b2b200b2, + 0x0000b5b5b5b500b5, 0x0000919191910091, 0x0000080808080008, + 0x0000a8a8a8a800a8, 0x0000fcfcfcfc00fc, 0x0000505050500050, + 0x0000d0d0d0d000d0, 0x00007d7d7d7d007d, 0x0000898989890089, + 0x0000979797970097, 0x00005b5b5b5b005b, 0x0000959595950095, + 0x0000ffffffff00ff, 0x0000d2d2d2d200d2, 0x0000c4c4c4c400c4, + 0x0000484848480048, 0x0000f7f7f7f700f7, 0x0000dbdbdbdb00db, + 0x0000030303030003, 0x0000dadadada00da, 0x00003f3f3f3f003f, + 0x0000949494940094, 0x00005c5c5c5c005c, 0x0000020202020002, + 0x00004a4a4a4a004a, 0x0000333333330033, 0x0000676767670067, + 0x0000f3f3f3f300f3, 0x00007f7f7f7f007f, 0x0000e2e2e2e200e2, + 0x00009b9b9b9b009b, 0x0000262626260026, 0x0000373737370037, + 0x00003b3b3b3b003b, 0x0000969696960096, 0x00004b4b4b4b004b, + 0x0000bebebebe00be, 0x00002e2e2e2e002e, 0x0000797979790079, + 0x00008c8c8c8c008c, 0x00006e6e6e6e006e, 0x00008e8e8e8e008e, + 0x0000f5f5f5f500f5, 0x0000b6b6b6b600b6, 0x0000fdfdfdfd00fd, + 0x0000595959590059, 0x0000989898980098, 0x00006a6a6a6a006a, + 0x0000464646460046, 0x0000babababa00ba, 0x0000252525250025, + 0x0000424242420042, 0x0000a2a2a2a200a2, 0x0000fafafafa00fa, + 0x0000070707070007, 0x0000555555550055, 0x0000eeeeeeee00ee, + 0x00000a0a0a0a000a, 0x0000494949490049, 0x0000686868680068, + 0x0000383838380038, 0x0000a4a4a4a400a4, 0x0000282828280028, + 0x00007b7b7b7b007b, 0x0000c9c9c9c900c9, 0x0000c1c1c1c100c1, + 0x0000e3e3e3e300e3, 0x0000f4f4f4f400f4, 0x0000c7c7c7c700c7, + 0x00009e9e9e9e009e, +}; + +const u64 camellia_sp02220222[256] = { + 0x00e0e0e000e0e0e0, 0x0005050500050505, 0x0058585800585858, + 0x00d9d9d900d9d9d9, 0x0067676700676767, 0x004e4e4e004e4e4e, + 0x0081818100818181, 0x00cbcbcb00cbcbcb, 0x00c9c9c900c9c9c9, + 0x000b0b0b000b0b0b, 0x00aeaeae00aeaeae, 0x006a6a6a006a6a6a, + 0x00d5d5d500d5d5d5, 0x0018181800181818, 0x005d5d5d005d5d5d, + 0x0082828200828282, 0x0046464600464646, 0x00dfdfdf00dfdfdf, + 0x00d6d6d600d6d6d6, 0x0027272700272727, 0x008a8a8a008a8a8a, + 0x0032323200323232, 0x004b4b4b004b4b4b, 0x0042424200424242, + 0x00dbdbdb00dbdbdb, 0x001c1c1c001c1c1c, 0x009e9e9e009e9e9e, + 0x009c9c9c009c9c9c, 0x003a3a3a003a3a3a, 0x00cacaca00cacaca, + 0x0025252500252525, 0x007b7b7b007b7b7b, 0x000d0d0d000d0d0d, + 0x0071717100717171, 0x005f5f5f005f5f5f, 0x001f1f1f001f1f1f, + 0x00f8f8f800f8f8f8, 0x00d7d7d700d7d7d7, 0x003e3e3e003e3e3e, + 0x009d9d9d009d9d9d, 0x007c7c7c007c7c7c, 0x0060606000606060, + 0x00b9b9b900b9b9b9, 0x00bebebe00bebebe, 0x00bcbcbc00bcbcbc, + 0x008b8b8b008b8b8b, 0x0016161600161616, 0x0034343400343434, + 0x004d4d4d004d4d4d, 0x00c3c3c300c3c3c3, 0x0072727200727272, + 0x0095959500959595, 0x00ababab00ababab, 0x008e8e8e008e8e8e, + 0x00bababa00bababa, 0x007a7a7a007a7a7a, 0x00b3b3b300b3b3b3, + 0x0002020200020202, 0x00b4b4b400b4b4b4, 0x00adadad00adadad, + 0x00a2a2a200a2a2a2, 0x00acacac00acacac, 0x00d8d8d800d8d8d8, + 0x009a9a9a009a9a9a, 0x0017171700171717, 0x001a1a1a001a1a1a, + 0x0035353500353535, 0x00cccccc00cccccc, 0x00f7f7f700f7f7f7, + 0x0099999900999999, 0x0061616100616161, 0x005a5a5a005a5a5a, + 0x00e8e8e800e8e8e8, 0x0024242400242424, 0x0056565600565656, + 0x0040404000404040, 0x00e1e1e100e1e1e1, 0x0063636300636363, + 0x0009090900090909, 0x0033333300333333, 0x00bfbfbf00bfbfbf, + 0x0098989800989898, 0x0097979700979797, 0x0085858500858585, + 0x0068686800686868, 0x00fcfcfc00fcfcfc, 0x00ececec00ececec, + 0x000a0a0a000a0a0a, 0x00dadada00dadada, 0x006f6f6f006f6f6f, + 0x0053535300535353, 0x0062626200626262, 0x00a3a3a300a3a3a3, + 0x002e2e2e002e2e2e, 0x0008080800080808, 0x00afafaf00afafaf, + 0x0028282800282828, 0x00b0b0b000b0b0b0, 0x0074747400747474, + 0x00c2c2c200c2c2c2, 0x00bdbdbd00bdbdbd, 0x0036363600363636, + 0x0022222200222222, 0x0038383800383838, 0x0064646400646464, + 0x001e1e1e001e1e1e, 0x0039393900393939, 0x002c2c2c002c2c2c, + 0x00a6a6a600a6a6a6, 0x0030303000303030, 0x00e5e5e500e5e5e5, + 0x0044444400444444, 0x00fdfdfd00fdfdfd, 0x0088888800888888, + 0x009f9f9f009f9f9f, 0x0065656500656565, 0x0087878700878787, + 0x006b6b6b006b6b6b, 0x00f4f4f400f4f4f4, 0x0023232300232323, + 0x0048484800484848, 0x0010101000101010, 0x00d1d1d100d1d1d1, + 0x0051515100515151, 0x00c0c0c000c0c0c0, 0x00f9f9f900f9f9f9, + 0x00d2d2d200d2d2d2, 0x00a0a0a000a0a0a0, 0x0055555500555555, + 0x00a1a1a100a1a1a1, 0x0041414100414141, 0x00fafafa00fafafa, + 0x0043434300434343, 0x0013131300131313, 0x00c4c4c400c4c4c4, + 0x002f2f2f002f2f2f, 0x00a8a8a800a8a8a8, 0x00b6b6b600b6b6b6, + 0x003c3c3c003c3c3c, 0x002b2b2b002b2b2b, 0x00c1c1c100c1c1c1, + 0x00ffffff00ffffff, 0x00c8c8c800c8c8c8, 0x00a5a5a500a5a5a5, + 0x0020202000202020, 0x0089898900898989, 0x0000000000000000, + 0x0090909000909090, 0x0047474700474747, 0x00efefef00efefef, + 0x00eaeaea00eaeaea, 0x00b7b7b700b7b7b7, 0x0015151500151515, + 0x0006060600060606, 0x00cdcdcd00cdcdcd, 0x00b5b5b500b5b5b5, + 0x0012121200121212, 0x007e7e7e007e7e7e, 0x00bbbbbb00bbbbbb, + 0x0029292900292929, 0x000f0f0f000f0f0f, 0x00b8b8b800b8b8b8, + 0x0007070700070707, 0x0004040400040404, 0x009b9b9b009b9b9b, + 0x0094949400949494, 0x0021212100212121, 0x0066666600666666, + 0x00e6e6e600e6e6e6, 0x00cecece00cecece, 0x00ededed00ededed, + 0x00e7e7e700e7e7e7, 0x003b3b3b003b3b3b, 0x00fefefe00fefefe, + 0x007f7f7f007f7f7f, 0x00c5c5c500c5c5c5, 0x00a4a4a400a4a4a4, + 0x0037373700373737, 0x00b1b1b100b1b1b1, 0x004c4c4c004c4c4c, + 0x0091919100919191, 0x006e6e6e006e6e6e, 0x008d8d8d008d8d8d, + 0x0076767600767676, 0x0003030300030303, 0x002d2d2d002d2d2d, + 0x00dedede00dedede, 0x0096969600969696, 0x0026262600262626, + 0x007d7d7d007d7d7d, 0x00c6c6c600c6c6c6, 0x005c5c5c005c5c5c, + 0x00d3d3d300d3d3d3, 0x00f2f2f200f2f2f2, 0x004f4f4f004f4f4f, + 0x0019191900191919, 0x003f3f3f003f3f3f, 0x00dcdcdc00dcdcdc, + 0x0079797900797979, 0x001d1d1d001d1d1d, 0x0052525200525252, + 0x00ebebeb00ebebeb, 0x00f3f3f300f3f3f3, 0x006d6d6d006d6d6d, + 0x005e5e5e005e5e5e, 0x00fbfbfb00fbfbfb, 0x0069696900696969, + 0x00b2b2b200b2b2b2, 0x00f0f0f000f0f0f0, 0x0031313100313131, + 0x000c0c0c000c0c0c, 0x00d4d4d400d4d4d4, 0x00cfcfcf00cfcfcf, + 0x008c8c8c008c8c8c, 0x00e2e2e200e2e2e2, 0x0075757500757575, + 0x00a9a9a900a9a9a9, 0x004a4a4a004a4a4a, 0x0057575700575757, + 0x0084848400848484, 0x0011111100111111, 0x0045454500454545, + 0x001b1b1b001b1b1b, 0x00f5f5f500f5f5f5, 0x00e4e4e400e4e4e4, + 0x000e0e0e000e0e0e, 0x0073737300737373, 0x00aaaaaa00aaaaaa, + 0x00f1f1f100f1f1f1, 0x00dddddd00dddddd, 0x0059595900595959, + 0x0014141400141414, 0x006c6c6c006c6c6c, 0x0092929200929292, + 0x0054545400545454, 0x00d0d0d000d0d0d0, 0x0078787800787878, + 0x0070707000707070, 0x00e3e3e300e3e3e3, 0x0049494900494949, + 0x0080808000808080, 0x0050505000505050, 0x00a7a7a700a7a7a7, + 0x00f6f6f600f6f6f6, 0x0077777700777777, 0x0093939300939393, + 0x0086868600868686, 0x0083838300838383, 0x002a2a2a002a2a2a, + 0x00c7c7c700c7c7c7, 0x005b5b5b005b5b5b, 0x00e9e9e900e9e9e9, + 0x00eeeeee00eeeeee, 0x008f8f8f008f8f8f, 0x0001010100010101, + 0x003d3d3d003d3d3d, +}; + +const u64 camellia_sp30333033[256] = { + 0x3800383838003838, 0x4100414141004141, 0x1600161616001616, + 0x7600767676007676, 0xd900d9d9d900d9d9, 0x9300939393009393, + 0x6000606060006060, 0xf200f2f2f200f2f2, 0x7200727272007272, + 0xc200c2c2c200c2c2, 0xab00ababab00abab, 0x9a009a9a9a009a9a, + 0x7500757575007575, 0x0600060606000606, 0x5700575757005757, + 0xa000a0a0a000a0a0, 0x9100919191009191, 0xf700f7f7f700f7f7, + 0xb500b5b5b500b5b5, 0xc900c9c9c900c9c9, 0xa200a2a2a200a2a2, + 0x8c008c8c8c008c8c, 0xd200d2d2d200d2d2, 0x9000909090009090, + 0xf600f6f6f600f6f6, 0x0700070707000707, 0xa700a7a7a700a7a7, + 0x2700272727002727, 0x8e008e8e8e008e8e, 0xb200b2b2b200b2b2, + 0x4900494949004949, 0xde00dedede00dede, 0x4300434343004343, + 0x5c005c5c5c005c5c, 0xd700d7d7d700d7d7, 0xc700c7c7c700c7c7, + 0x3e003e3e3e003e3e, 0xf500f5f5f500f5f5, 0x8f008f8f8f008f8f, + 0x6700676767006767, 0x1f001f1f1f001f1f, 0x1800181818001818, + 0x6e006e6e6e006e6e, 0xaf00afafaf00afaf, 0x2f002f2f2f002f2f, + 0xe200e2e2e200e2e2, 0x8500858585008585, 0x0d000d0d0d000d0d, + 0x5300535353005353, 0xf000f0f0f000f0f0, 0x9c009c9c9c009c9c, + 0x6500656565006565, 0xea00eaeaea00eaea, 0xa300a3a3a300a3a3, + 0xae00aeaeae00aeae, 0x9e009e9e9e009e9e, 0xec00ececec00ecec, + 0x8000808080008080, 0x2d002d2d2d002d2d, 0x6b006b6b6b006b6b, + 0xa800a8a8a800a8a8, 0x2b002b2b2b002b2b, 0x3600363636003636, + 0xa600a6a6a600a6a6, 0xc500c5c5c500c5c5, 0x8600868686008686, + 0x4d004d4d4d004d4d, 0x3300333333003333, 0xfd00fdfdfd00fdfd, + 0x6600666666006666, 0x5800585858005858, 0x9600969696009696, + 0x3a003a3a3a003a3a, 0x0900090909000909, 0x9500959595009595, + 0x1000101010001010, 0x7800787878007878, 0xd800d8d8d800d8d8, + 0x4200424242004242, 0xcc00cccccc00cccc, 0xef00efefef00efef, + 0x2600262626002626, 0xe500e5e5e500e5e5, 0x6100616161006161, + 0x1a001a1a1a001a1a, 0x3f003f3f3f003f3f, 0x3b003b3b3b003b3b, + 0x8200828282008282, 0xb600b6b6b600b6b6, 0xdb00dbdbdb00dbdb, + 0xd400d4d4d400d4d4, 0x9800989898009898, 0xe800e8e8e800e8e8, + 0x8b008b8b8b008b8b, 0x0200020202000202, 0xeb00ebebeb00ebeb, + 0x0a000a0a0a000a0a, 0x2c002c2c2c002c2c, 0x1d001d1d1d001d1d, + 0xb000b0b0b000b0b0, 0x6f006f6f6f006f6f, 0x8d008d8d8d008d8d, + 0x8800888888008888, 0x0e000e0e0e000e0e, 0x1900191919001919, + 0x8700878787008787, 0x4e004e4e4e004e4e, 0x0b000b0b0b000b0b, + 0xa900a9a9a900a9a9, 0x0c000c0c0c000c0c, 0x7900797979007979, + 0x1100111111001111, 0x7f007f7f7f007f7f, 0x2200222222002222, + 0xe700e7e7e700e7e7, 0x5900595959005959, 0xe100e1e1e100e1e1, + 0xda00dadada00dada, 0x3d003d3d3d003d3d, 0xc800c8c8c800c8c8, + 0x1200121212001212, 0x0400040404000404, 0x7400747474007474, + 0x5400545454005454, 0x3000303030003030, 0x7e007e7e7e007e7e, + 0xb400b4b4b400b4b4, 0x2800282828002828, 0x5500555555005555, + 0x6800686868006868, 0x5000505050005050, 0xbe00bebebe00bebe, + 0xd000d0d0d000d0d0, 0xc400c4c4c400c4c4, 0x3100313131003131, + 0xcb00cbcbcb00cbcb, 0x2a002a2a2a002a2a, 0xad00adadad00adad, + 0x0f000f0f0f000f0f, 0xca00cacaca00caca, 0x7000707070007070, + 0xff00ffffff00ffff, 0x3200323232003232, 0x6900696969006969, + 0x0800080808000808, 0x6200626262006262, 0x0000000000000000, + 0x2400242424002424, 0xd100d1d1d100d1d1, 0xfb00fbfbfb00fbfb, + 0xba00bababa00baba, 0xed00ededed00eded, 0x4500454545004545, + 0x8100818181008181, 0x7300737373007373, 0x6d006d6d6d006d6d, + 0x8400848484008484, 0x9f009f9f9f009f9f, 0xee00eeeeee00eeee, + 0x4a004a4a4a004a4a, 0xc300c3c3c300c3c3, 0x2e002e2e2e002e2e, + 0xc100c1c1c100c1c1, 0x0100010101000101, 0xe600e6e6e600e6e6, + 0x2500252525002525, 0x4800484848004848, 0x9900999999009999, + 0xb900b9b9b900b9b9, 0xb300b3b3b300b3b3, 0x7b007b7b7b007b7b, + 0xf900f9f9f900f9f9, 0xce00cecece00cece, 0xbf00bfbfbf00bfbf, + 0xdf00dfdfdf00dfdf, 0x7100717171007171, 0x2900292929002929, + 0xcd00cdcdcd00cdcd, 0x6c006c6c6c006c6c, 0x1300131313001313, + 0x6400646464006464, 0x9b009b9b9b009b9b, 0x6300636363006363, + 0x9d009d9d9d009d9d, 0xc000c0c0c000c0c0, 0x4b004b4b4b004b4b, + 0xb700b7b7b700b7b7, 0xa500a5a5a500a5a5, 0x8900898989008989, + 0x5f005f5f5f005f5f, 0xb100b1b1b100b1b1, 0x1700171717001717, + 0xf400f4f4f400f4f4, 0xbc00bcbcbc00bcbc, 0xd300d3d3d300d3d3, + 0x4600464646004646, 0xcf00cfcfcf00cfcf, 0x3700373737003737, + 0x5e005e5e5e005e5e, 0x4700474747004747, 0x9400949494009494, + 0xfa00fafafa00fafa, 0xfc00fcfcfc00fcfc, 0x5b005b5b5b005b5b, + 0x9700979797009797, 0xfe00fefefe00fefe, 0x5a005a5a5a005a5a, + 0xac00acacac00acac, 0x3c003c3c3c003c3c, 0x4c004c4c4c004c4c, + 0x0300030303000303, 0x3500353535003535, 0xf300f3f3f300f3f3, + 0x2300232323002323, 0xb800b8b8b800b8b8, 0x5d005d5d5d005d5d, + 0x6a006a6a6a006a6a, 0x9200929292009292, 0xd500d5d5d500d5d5, + 0x2100212121002121, 0x4400444444004444, 0x5100515151005151, + 0xc600c6c6c600c6c6, 0x7d007d7d7d007d7d, 0x3900393939003939, + 0x8300838383008383, 0xdc00dcdcdc00dcdc, 0xaa00aaaaaa00aaaa, + 0x7c007c7c7c007c7c, 0x7700777777007777, 0x5600565656005656, + 0x0500050505000505, 0x1b001b1b1b001b1b, 0xa400a4a4a400a4a4, + 0x1500151515001515, 0x3400343434003434, 0x1e001e1e1e001e1e, + 0x1c001c1c1c001c1c, 0xf800f8f8f800f8f8, 0x5200525252005252, + 0x2000202020002020, 0x1400141414001414, 0xe900e9e9e900e9e9, + 0xbd00bdbdbd00bdbd, 0xdd00dddddd00dddd, 0xe400e4e4e400e4e4, + 0xa100a1a1a100a1a1, 0xe000e0e0e000e0e0, 0x8a008a8a8a008a8a, + 0xf100f1f1f100f1f1, 0xd600d6d6d600d6d6, 0x7a007a7a7a007a7a, + 0xbb00bbbbbb00bbbb, 0xe300e3e3e300e3e3, 0x4000404040004040, + 0x4f004f4f4f004f4f, +}; + +const u64 camellia_sp44044404[256] = { + 0x7070007070700070, 0x2c2c002c2c2c002c, 0xb3b300b3b3b300b3, + 0xc0c000c0c0c000c0, 0xe4e400e4e4e400e4, 0x5757005757570057, + 0xeaea00eaeaea00ea, 0xaeae00aeaeae00ae, 0x2323002323230023, + 0x6b6b006b6b6b006b, 0x4545004545450045, 0xa5a500a5a5a500a5, + 0xeded00ededed00ed, 0x4f4f004f4f4f004f, 0x1d1d001d1d1d001d, + 0x9292009292920092, 0x8686008686860086, 0xafaf00afafaf00af, + 0x7c7c007c7c7c007c, 0x1f1f001f1f1f001f, 0x3e3e003e3e3e003e, + 0xdcdc00dcdcdc00dc, 0x5e5e005e5e5e005e, 0x0b0b000b0b0b000b, + 0xa6a600a6a6a600a6, 0x3939003939390039, 0xd5d500d5d5d500d5, + 0x5d5d005d5d5d005d, 0xd9d900d9d9d900d9, 0x5a5a005a5a5a005a, + 0x5151005151510051, 0x6c6c006c6c6c006c, 0x8b8b008b8b8b008b, + 0x9a9a009a9a9a009a, 0xfbfb00fbfbfb00fb, 0xb0b000b0b0b000b0, + 0x7474007474740074, 0x2b2b002b2b2b002b, 0xf0f000f0f0f000f0, + 0x8484008484840084, 0xdfdf00dfdfdf00df, 0xcbcb00cbcbcb00cb, + 0x3434003434340034, 0x7676007676760076, 0x6d6d006d6d6d006d, + 0xa9a900a9a9a900a9, 0xd1d100d1d1d100d1, 0x0404000404040004, + 0x1414001414140014, 0x3a3a003a3a3a003a, 0xdede00dedede00de, + 0x1111001111110011, 0x3232003232320032, 0x9c9c009c9c9c009c, + 0x5353005353530053, 0xf2f200f2f2f200f2, 0xfefe00fefefe00fe, + 0xcfcf00cfcfcf00cf, 0xc3c300c3c3c300c3, 0x7a7a007a7a7a007a, + 0x2424002424240024, 0xe8e800e8e8e800e8, 0x6060006060600060, + 0x6969006969690069, 0xaaaa00aaaaaa00aa, 0xa0a000a0a0a000a0, + 0xa1a100a1a1a100a1, 0x6262006262620062, 0x5454005454540054, + 0x1e1e001e1e1e001e, 0xe0e000e0e0e000e0, 0x6464006464640064, + 0x1010001010100010, 0x0000000000000000, 0xa3a300a3a3a300a3, + 0x7575007575750075, 0x8a8a008a8a8a008a, 0xe6e600e6e6e600e6, + 0x0909000909090009, 0xdddd00dddddd00dd, 0x8787008787870087, + 0x8383008383830083, 0xcdcd00cdcdcd00cd, 0x9090009090900090, + 0x7373007373730073, 0xf6f600f6f6f600f6, 0x9d9d009d9d9d009d, + 0xbfbf00bfbfbf00bf, 0x5252005252520052, 0xd8d800d8d8d800d8, + 0xc8c800c8c8c800c8, 0xc6c600c6c6c600c6, 0x8181008181810081, + 0x6f6f006f6f6f006f, 0x1313001313130013, 0x6363006363630063, + 0xe9e900e9e9e900e9, 0xa7a700a7a7a700a7, 0x9f9f009f9f9f009f, + 0xbcbc00bcbcbc00bc, 0x2929002929290029, 0xf9f900f9f9f900f9, + 0x2f2f002f2f2f002f, 0xb4b400b4b4b400b4, 0x7878007878780078, + 0x0606000606060006, 0xe7e700e7e7e700e7, 0x7171007171710071, + 0xd4d400d4d4d400d4, 0xabab00ababab00ab, 0x8888008888880088, + 0x8d8d008d8d8d008d, 0x7272007272720072, 0xb9b900b9b9b900b9, + 0xf8f800f8f8f800f8, 0xacac00acacac00ac, 0x3636003636360036, + 0x2a2a002a2a2a002a, 0x3c3c003c3c3c003c, 0xf1f100f1f1f100f1, + 0x4040004040400040, 0xd3d300d3d3d300d3, 0xbbbb00bbbbbb00bb, + 0x4343004343430043, 0x1515001515150015, 0xadad00adadad00ad, + 0x7777007777770077, 0x8080008080800080, 0x8282008282820082, + 0xecec00ececec00ec, 0x2727002727270027, 0xe5e500e5e5e500e5, + 0x8585008585850085, 0x3535003535350035, 0x0c0c000c0c0c000c, + 0x4141004141410041, 0xefef00efefef00ef, 0x9393009393930093, + 0x1919001919190019, 0x2121002121210021, 0x0e0e000e0e0e000e, + 0x4e4e004e4e4e004e, 0x6565006565650065, 0xbdbd00bdbdbd00bd, + 0xb8b800b8b8b800b8, 0x8f8f008f8f8f008f, 0xebeb00ebebeb00eb, + 0xcece00cecece00ce, 0x3030003030300030, 0x5f5f005f5f5f005f, + 0xc5c500c5c5c500c5, 0x1a1a001a1a1a001a, 0xe1e100e1e1e100e1, + 0xcaca00cacaca00ca, 0x4747004747470047, 0x3d3d003d3d3d003d, + 0x0101000101010001, 0xd6d600d6d6d600d6, 0x5656005656560056, + 0x4d4d004d4d4d004d, 0x0d0d000d0d0d000d, 0x6666006666660066, + 0xcccc00cccccc00cc, 0x2d2d002d2d2d002d, 0x1212001212120012, + 0x2020002020200020, 0xb1b100b1b1b100b1, 0x9999009999990099, + 0x4c4c004c4c4c004c, 0xc2c200c2c2c200c2, 0x7e7e007e7e7e007e, + 0x0505000505050005, 0xb7b700b7b7b700b7, 0x3131003131310031, + 0x1717001717170017, 0xd7d700d7d7d700d7, 0x5858005858580058, + 0x6161006161610061, 0x1b1b001b1b1b001b, 0x1c1c001c1c1c001c, + 0x0f0f000f0f0f000f, 0x1616001616160016, 0x1818001818180018, + 0x2222002222220022, 0x4444004444440044, 0xb2b200b2b2b200b2, + 0xb5b500b5b5b500b5, 0x9191009191910091, 0x0808000808080008, + 0xa8a800a8a8a800a8, 0xfcfc00fcfcfc00fc, 0x5050005050500050, + 0xd0d000d0d0d000d0, 0x7d7d007d7d7d007d, 0x8989008989890089, + 0x9797009797970097, 0x5b5b005b5b5b005b, 0x9595009595950095, + 0xffff00ffffff00ff, 0xd2d200d2d2d200d2, 0xc4c400c4c4c400c4, + 0x4848004848480048, 0xf7f700f7f7f700f7, 0xdbdb00dbdbdb00db, + 0x0303000303030003, 0xdada00dadada00da, 0x3f3f003f3f3f003f, + 0x9494009494940094, 0x5c5c005c5c5c005c, 0x0202000202020002, + 0x4a4a004a4a4a004a, 0x3333003333330033, 0x6767006767670067, + 0xf3f300f3f3f300f3, 0x7f7f007f7f7f007f, 0xe2e200e2e2e200e2, + 0x9b9b009b9b9b009b, 0x2626002626260026, 0x3737003737370037, + 0x3b3b003b3b3b003b, 0x9696009696960096, 0x4b4b004b4b4b004b, + 0xbebe00bebebe00be, 0x2e2e002e2e2e002e, 0x7979007979790079, + 0x8c8c008c8c8c008c, 0x6e6e006e6e6e006e, 0x8e8e008e8e8e008e, + 0xf5f500f5f5f500f5, 0xb6b600b6b6b600b6, 0xfdfd00fdfdfd00fd, + 0x5959005959590059, 0x9898009898980098, 0x6a6a006a6a6a006a, + 0x4646004646460046, 0xbaba00bababa00ba, 0x2525002525250025, + 0x4242004242420042, 0xa2a200a2a2a200a2, 0xfafa00fafafa00fa, + 0x0707000707070007, 0x5555005555550055, 0xeeee00eeeeee00ee, + 0x0a0a000a0a0a000a, 0x4949004949490049, 0x6868006868680068, + 0x3838003838380038, 0xa4a400a4a4a400a4, 0x2828002828280028, + 0x7b7b007b7b7b007b, 0xc9c900c9c9c900c9, 0xc1c100c1c1c100c1, + 0xe3e300e3e3e300e3, 0xf4f400f4f4f400f4, 0xc7c700c7c7c700c7, + 0x9e9e009e9e9e009e, +}; + +const u64 camellia_sp11101110[256] = { + 0x7070700070707000, 0x8282820082828200, 0x2c2c2c002c2c2c00, + 0xececec00ececec00, 0xb3b3b300b3b3b300, 0x2727270027272700, + 0xc0c0c000c0c0c000, 0xe5e5e500e5e5e500, 0xe4e4e400e4e4e400, + 0x8585850085858500, 0x5757570057575700, 0x3535350035353500, + 0xeaeaea00eaeaea00, 0x0c0c0c000c0c0c00, 0xaeaeae00aeaeae00, + 0x4141410041414100, 0x2323230023232300, 0xefefef00efefef00, + 0x6b6b6b006b6b6b00, 0x9393930093939300, 0x4545450045454500, + 0x1919190019191900, 0xa5a5a500a5a5a500, 0x2121210021212100, + 0xededed00ededed00, 0x0e0e0e000e0e0e00, 0x4f4f4f004f4f4f00, + 0x4e4e4e004e4e4e00, 0x1d1d1d001d1d1d00, 0x6565650065656500, + 0x9292920092929200, 0xbdbdbd00bdbdbd00, 0x8686860086868600, + 0xb8b8b800b8b8b800, 0xafafaf00afafaf00, 0x8f8f8f008f8f8f00, + 0x7c7c7c007c7c7c00, 0xebebeb00ebebeb00, 0x1f1f1f001f1f1f00, + 0xcecece00cecece00, 0x3e3e3e003e3e3e00, 0x3030300030303000, + 0xdcdcdc00dcdcdc00, 0x5f5f5f005f5f5f00, 0x5e5e5e005e5e5e00, + 0xc5c5c500c5c5c500, 0x0b0b0b000b0b0b00, 0x1a1a1a001a1a1a00, + 0xa6a6a600a6a6a600, 0xe1e1e100e1e1e100, 0x3939390039393900, + 0xcacaca00cacaca00, 0xd5d5d500d5d5d500, 0x4747470047474700, + 0x5d5d5d005d5d5d00, 0x3d3d3d003d3d3d00, 0xd9d9d900d9d9d900, + 0x0101010001010100, 0x5a5a5a005a5a5a00, 0xd6d6d600d6d6d600, + 0x5151510051515100, 0x5656560056565600, 0x6c6c6c006c6c6c00, + 0x4d4d4d004d4d4d00, 0x8b8b8b008b8b8b00, 0x0d0d0d000d0d0d00, + 0x9a9a9a009a9a9a00, 0x6666660066666600, 0xfbfbfb00fbfbfb00, + 0xcccccc00cccccc00, 0xb0b0b000b0b0b000, 0x2d2d2d002d2d2d00, + 0x7474740074747400, 0x1212120012121200, 0x2b2b2b002b2b2b00, + 0x2020200020202000, 0xf0f0f000f0f0f000, 0xb1b1b100b1b1b100, + 0x8484840084848400, 0x9999990099999900, 0xdfdfdf00dfdfdf00, + 0x4c4c4c004c4c4c00, 0xcbcbcb00cbcbcb00, 0xc2c2c200c2c2c200, + 0x3434340034343400, 0x7e7e7e007e7e7e00, 0x7676760076767600, + 0x0505050005050500, 0x6d6d6d006d6d6d00, 0xb7b7b700b7b7b700, + 0xa9a9a900a9a9a900, 0x3131310031313100, 0xd1d1d100d1d1d100, + 0x1717170017171700, 0x0404040004040400, 0xd7d7d700d7d7d700, + 0x1414140014141400, 0x5858580058585800, 0x3a3a3a003a3a3a00, + 0x6161610061616100, 0xdedede00dedede00, 0x1b1b1b001b1b1b00, + 0x1111110011111100, 0x1c1c1c001c1c1c00, 0x3232320032323200, + 0x0f0f0f000f0f0f00, 0x9c9c9c009c9c9c00, 0x1616160016161600, + 0x5353530053535300, 0x1818180018181800, 0xf2f2f200f2f2f200, + 0x2222220022222200, 0xfefefe00fefefe00, 0x4444440044444400, + 0xcfcfcf00cfcfcf00, 0xb2b2b200b2b2b200, 0xc3c3c300c3c3c300, + 0xb5b5b500b5b5b500, 0x7a7a7a007a7a7a00, 0x9191910091919100, + 0x2424240024242400, 0x0808080008080800, 0xe8e8e800e8e8e800, + 0xa8a8a800a8a8a800, 0x6060600060606000, 0xfcfcfc00fcfcfc00, + 0x6969690069696900, 0x5050500050505000, 0xaaaaaa00aaaaaa00, + 0xd0d0d000d0d0d000, 0xa0a0a000a0a0a000, 0x7d7d7d007d7d7d00, + 0xa1a1a100a1a1a100, 0x8989890089898900, 0x6262620062626200, + 0x9797970097979700, 0x5454540054545400, 0x5b5b5b005b5b5b00, + 0x1e1e1e001e1e1e00, 0x9595950095959500, 0xe0e0e000e0e0e000, + 0xffffff00ffffff00, 0x6464640064646400, 0xd2d2d200d2d2d200, + 0x1010100010101000, 0xc4c4c400c4c4c400, 0x0000000000000000, + 0x4848480048484800, 0xa3a3a300a3a3a300, 0xf7f7f700f7f7f700, + 0x7575750075757500, 0xdbdbdb00dbdbdb00, 0x8a8a8a008a8a8a00, + 0x0303030003030300, 0xe6e6e600e6e6e600, 0xdadada00dadada00, + 0x0909090009090900, 0x3f3f3f003f3f3f00, 0xdddddd00dddddd00, + 0x9494940094949400, 0x8787870087878700, 0x5c5c5c005c5c5c00, + 0x8383830083838300, 0x0202020002020200, 0xcdcdcd00cdcdcd00, + 0x4a4a4a004a4a4a00, 0x9090900090909000, 0x3333330033333300, + 0x7373730073737300, 0x6767670067676700, 0xf6f6f600f6f6f600, + 0xf3f3f300f3f3f300, 0x9d9d9d009d9d9d00, 0x7f7f7f007f7f7f00, + 0xbfbfbf00bfbfbf00, 0xe2e2e200e2e2e200, 0x5252520052525200, + 0x9b9b9b009b9b9b00, 0xd8d8d800d8d8d800, 0x2626260026262600, + 0xc8c8c800c8c8c800, 0x3737370037373700, 0xc6c6c600c6c6c600, + 0x3b3b3b003b3b3b00, 0x8181810081818100, 0x9696960096969600, + 0x6f6f6f006f6f6f00, 0x4b4b4b004b4b4b00, 0x1313130013131300, + 0xbebebe00bebebe00, 0x6363630063636300, 0x2e2e2e002e2e2e00, + 0xe9e9e900e9e9e900, 0x7979790079797900, 0xa7a7a700a7a7a700, + 0x8c8c8c008c8c8c00, 0x9f9f9f009f9f9f00, 0x6e6e6e006e6e6e00, + 0xbcbcbc00bcbcbc00, 0x8e8e8e008e8e8e00, 0x2929290029292900, + 0xf5f5f500f5f5f500, 0xf9f9f900f9f9f900, 0xb6b6b600b6b6b600, + 0x2f2f2f002f2f2f00, 0xfdfdfd00fdfdfd00, 0xb4b4b400b4b4b400, + 0x5959590059595900, 0x7878780078787800, 0x9898980098989800, + 0x0606060006060600, 0x6a6a6a006a6a6a00, 0xe7e7e700e7e7e700, + 0x4646460046464600, 0x7171710071717100, 0xbababa00bababa00, + 0xd4d4d400d4d4d400, 0x2525250025252500, 0xababab00ababab00, + 0x4242420042424200, 0x8888880088888800, 0xa2a2a200a2a2a200, + 0x8d8d8d008d8d8d00, 0xfafafa00fafafa00, 0x7272720072727200, + 0x0707070007070700, 0xb9b9b900b9b9b900, 0x5555550055555500, + 0xf8f8f800f8f8f800, 0xeeeeee00eeeeee00, 0xacacac00acacac00, + 0x0a0a0a000a0a0a00, 0x3636360036363600, 0x4949490049494900, + 0x2a2a2a002a2a2a00, 0x6868680068686800, 0x3c3c3c003c3c3c00, + 0x3838380038383800, 0xf1f1f100f1f1f100, 0xa4a4a400a4a4a400, + 0x4040400040404000, 0x2828280028282800, 0xd3d3d300d3d3d300, + 0x7b7b7b007b7b7b00, 0xbbbbbb00bbbbbb00, 0xc9c9c900c9c9c900, + 0x4343430043434300, 0xc1c1c100c1c1c100, 0x1515150015151500, + 0xe3e3e300e3e3e300, 0xadadad00adadad00, 0xf4f4f400f4f4f400, + 0x7777770077777700, 0xc7c7c700c7c7c700, 0x8080800080808000, + 0x9e9e9e009e9e9e00, +}; + +/* key constants */ +#define CAMELLIA_SIGMA1L (0xA09E667FL) +#define CAMELLIA_SIGMA1R (0x3BCC908BL) +#define CAMELLIA_SIGMA2L (0xB67AE858L) +#define CAMELLIA_SIGMA2R (0x4CAA73B2L) +#define CAMELLIA_SIGMA3L (0xC6EF372FL) +#define CAMELLIA_SIGMA3R (0xE94F82BEL) +#define CAMELLIA_SIGMA4L (0x54FF53A5L) +#define CAMELLIA_SIGMA4R (0xF1D36F1CL) +#define CAMELLIA_SIGMA5L (0x10E527FAL) +#define CAMELLIA_SIGMA5R (0xDE682D1DL) +#define CAMELLIA_SIGMA6L (0xB05688C2L) +#define CAMELLIA_SIGMA6R (0xB3E6C1FDL) + +/* macros */ +#define ROLDQ(l, r, bits) ({ \ + u64 t = l; \ + l = (l << bits) | (r >> (64 - bits)); \ + r = (r << bits) | (t >> (64 - bits)); \ +}) + +#define CAMELLIA_F(x, kl, kr, y) ({ \ + u64 ii = x ^ (((u64)kl << 32) | kr); \ + y = camellia_sp11101110[(uint8_t)ii]; \ + y ^= camellia_sp44044404[(uint8_t)(ii >> 8)]; \ + ii >>= 16; \ + y ^= camellia_sp30333033[(uint8_t)ii]; \ + y ^= camellia_sp02220222[(uint8_t)(ii >> 8)]; \ + ii >>= 16; \ + y ^= camellia_sp00444404[(uint8_t)ii]; \ + y ^= camellia_sp03303033[(uint8_t)(ii >> 8)]; \ + ii >>= 16; \ + y ^= camellia_sp22000222[(uint8_t)ii]; \ + y ^= camellia_sp10011110[(uint8_t)(ii >> 8)]; \ + y = ror64(y, 32); \ +}) + +#define SET_SUBKEY_LR(INDEX, sRL) (subkey[(INDEX)] = ror64((sRL), 32)) + +static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max) +{ + u64 kw4, tt; + u32 dw, tl, tr; + + /* absorb kw2 to other subkeys */ + /* round 2 */ + subRL[3] ^= subRL[1]; + /* round 4 */ + subRL[5] ^= subRL[1]; + /* round 6 */ + subRL[7] ^= subRL[1]; + + subRL[1] ^= (subRL[1] & ~subRL[9]) << 32; + /* modified for FLinv(kl2) */ + dw = (subRL[1] & subRL[9]) >> 32, + subRL[1] ^= rol32(dw, 1); + + /* round 8 */ + subRL[11] ^= subRL[1]; + /* round 10 */ + subRL[13] ^= subRL[1]; + /* round 12 */ + subRL[15] ^= subRL[1]; + + subRL[1] ^= (subRL[1] & ~subRL[17]) << 32; + /* modified for FLinv(kl4) */ + dw = (subRL[1] & subRL[17]) >> 32, + subRL[1] ^= rol32(dw, 1); + + /* round 14 */ + subRL[19] ^= subRL[1]; + /* round 16 */ + subRL[21] ^= subRL[1]; + /* round 18 */ + subRL[23] ^= subRL[1]; + + if (max == 24) { + /* kw3 */ + subRL[24] ^= subRL[1]; + + /* absorb kw4 to other subkeys */ + kw4 = subRL[25]; + } else { + subRL[1] ^= (subRL[1] & ~subRL[25]) << 32; + /* modified for FLinv(kl6) */ + dw = (subRL[1] & subRL[25]) >> 32, + subRL[1] ^= rol32(dw, 1); + + /* round 20 */ + subRL[27] ^= subRL[1]; + /* round 22 */ + subRL[29] ^= subRL[1]; + /* round 24 */ + subRL[31] ^= subRL[1]; + /* kw3 */ + subRL[32] ^= subRL[1]; + + /* absorb kw4 to other subkeys */ + kw4 = subRL[33]; + /* round 23 */ + subRL[30] ^= kw4; + /* round 21 */ + subRL[28] ^= kw4; + /* round 19 */ + subRL[26] ^= kw4; + + kw4 ^= (kw4 & ~subRL[24]) << 32; + /* modified for FL(kl5) */ + dw = (kw4 & subRL[24]) >> 32, + kw4 ^= rol32(dw, 1); + } + + /* round 17 */ + subRL[22] ^= kw4; + /* round 15 */ + subRL[20] ^= kw4; + /* round 13 */ + subRL[18] ^= kw4; + + kw4 ^= (kw4 & ~subRL[16]) << 32; + /* modified for FL(kl3) */ + dw = (kw4 & subRL[16]) >> 32, + kw4 ^= rol32(dw, 1); + + /* round 11 */ + subRL[14] ^= kw4; + /* round 9 */ + subRL[12] ^= kw4; + /* round 7 */ + subRL[10] ^= kw4; + + kw4 ^= (kw4 & ~subRL[8]) << 32; + /* modified for FL(kl1) */ + dw = (kw4 & subRL[8]) >> 32, + kw4 ^= rol32(dw, 1); + + /* round 5 */ + subRL[6] ^= kw4; + /* round 3 */ + subRL[4] ^= kw4; + /* round 1 */ + subRL[2] ^= kw4; + /* kw1 */ + subRL[0] ^= kw4; + + /* key XOR is end of F-function */ + SET_SUBKEY_LR(0, subRL[0] ^ subRL[2]); /* kw1 */ + SET_SUBKEY_LR(2, subRL[3]); /* round 1 */ + SET_SUBKEY_LR(3, subRL[2] ^ subRL[4]); /* round 2 */ + SET_SUBKEY_LR(4, subRL[3] ^ subRL[5]); /* round 3 */ + SET_SUBKEY_LR(5, subRL[4] ^ subRL[6]); /* round 4 */ + SET_SUBKEY_LR(6, subRL[5] ^ subRL[7]); /* round 5 */ + + tl = (subRL[10] >> 32) ^ (subRL[10] & ~subRL[8]); + dw = tl & (subRL[8] >> 32), /* FL(kl1) */ + tr = subRL[10] ^ rol32(dw, 1); + tt = (tr | ((u64)tl << 32)); + + SET_SUBKEY_LR(7, subRL[6] ^ tt); /* round 6 */ + SET_SUBKEY_LR(8, subRL[8]); /* FL(kl1) */ + SET_SUBKEY_LR(9, subRL[9]); /* FLinv(kl2) */ + + tl = (subRL[7] >> 32) ^ (subRL[7] & ~subRL[9]); + dw = tl & (subRL[9] >> 32), /* FLinv(kl2) */ + tr = subRL[7] ^ rol32(dw, 1); + tt = (tr | ((u64)tl << 32)); + + SET_SUBKEY_LR(10, subRL[11] ^ tt); /* round 7 */ + SET_SUBKEY_LR(11, subRL[10] ^ subRL[12]); /* round 8 */ + SET_SUBKEY_LR(12, subRL[11] ^ subRL[13]); /* round 9 */ + SET_SUBKEY_LR(13, subRL[12] ^ subRL[14]); /* round 10 */ + SET_SUBKEY_LR(14, subRL[13] ^ subRL[15]); /* round 11 */ + + tl = (subRL[18] >> 32) ^ (subRL[18] & ~subRL[16]); + dw = tl & (subRL[16] >> 32), /* FL(kl3) */ + tr = subRL[18] ^ rol32(dw, 1); + tt = (tr | ((u64)tl << 32)); + + SET_SUBKEY_LR(15, subRL[14] ^ tt); /* round 12 */ + SET_SUBKEY_LR(16, subRL[16]); /* FL(kl3) */ + SET_SUBKEY_LR(17, subRL[17]); /* FLinv(kl4) */ + + tl = (subRL[15] >> 32) ^ (subRL[15] & ~subRL[17]); + dw = tl & (subRL[17] >> 32), /* FLinv(kl4) */ + tr = subRL[15] ^ rol32(dw, 1); + tt = (tr | ((u64)tl << 32)); + + SET_SUBKEY_LR(18, subRL[19] ^ tt); /* round 13 */ + SET_SUBKEY_LR(19, subRL[18] ^ subRL[20]); /* round 14 */ + SET_SUBKEY_LR(20, subRL[19] ^ subRL[21]); /* round 15 */ + SET_SUBKEY_LR(21, subRL[20] ^ subRL[22]); /* round 16 */ + SET_SUBKEY_LR(22, subRL[21] ^ subRL[23]); /* round 17 */ + + if (max == 24) { + SET_SUBKEY_LR(23, subRL[22]); /* round 18 */ + SET_SUBKEY_LR(24, subRL[24] ^ subRL[23]); /* kw3 */ + } else { + tl = (subRL[26] >> 32) ^ (subRL[26] & ~subRL[24]); + dw = tl & (subRL[24] >> 32), /* FL(kl5) */ + tr = subRL[26] ^ rol32(dw, 1); + tt = (tr | ((u64)tl << 32)); + + SET_SUBKEY_LR(23, subRL[22] ^ tt); /* round 18 */ + SET_SUBKEY_LR(24, subRL[24]); /* FL(kl5) */ + SET_SUBKEY_LR(25, subRL[25]); /* FLinv(kl6) */ + + tl = (subRL[23] >> 32) ^ (subRL[23] & ~subRL[25]); + dw = tl & (subRL[25] >> 32), /* FLinv(kl6) */ + tr = subRL[23] ^ rol32(dw, 1); + tt = (tr | ((u64)tl << 32)); + + SET_SUBKEY_LR(26, subRL[27] ^ tt); /* round 19 */ + SET_SUBKEY_LR(27, subRL[26] ^ subRL[28]); /* round 20 */ + SET_SUBKEY_LR(28, subRL[27] ^ subRL[29]); /* round 21 */ + SET_SUBKEY_LR(29, subRL[28] ^ subRL[30]); /* round 22 */ + SET_SUBKEY_LR(30, subRL[29] ^ subRL[31]); /* round 23 */ + SET_SUBKEY_LR(31, subRL[30]); /* round 24 */ + SET_SUBKEY_LR(32, subRL[32] ^ subRL[31]); /* kw3 */ + } +} + +static void camellia_setup128(const unsigned char *key, u64 *subkey) +{ + u64 kl, kr, ww; + u64 subRL[26]; + + /** + * k == kl || kr (|| is concatenation) + */ + kl = get_unaligned_be64(key); + kr = get_unaligned_be64(key + 8); + + /* generate KL dependent subkeys */ + /* kw1 */ + subRL[0] = kl; + /* kw2 */ + subRL[1] = kr; + + /* rotation left shift 15bit */ + ROLDQ(kl, kr, 15); + + /* k3 */ + subRL[4] = kl; + /* k4 */ + subRL[5] = kr; + + /* rotation left shift 15+30bit */ + ROLDQ(kl, kr, 30); + + /* k7 */ + subRL[10] = kl; + /* k8 */ + subRL[11] = kr; + + /* rotation left shift 15+30+15bit */ + ROLDQ(kl, kr, 15); + + /* k10 */ + subRL[13] = kr; + /* rotation left shift 15+30+15+17 bit */ + ROLDQ(kl, kr, 17); + + /* kl3 */ + subRL[16] = kl; + /* kl4 */ + subRL[17] = kr; + + /* rotation left shift 15+30+15+17+17 bit */ + ROLDQ(kl, kr, 17); + + /* k13 */ + subRL[18] = kl; + /* k14 */ + subRL[19] = kr; + + /* rotation left shift 15+30+15+17+17+17 bit */ + ROLDQ(kl, kr, 17); + + /* k17 */ + subRL[22] = kl; + /* k18 */ + subRL[23] = kr; + + /* generate KA */ + kl = subRL[0]; + kr = subRL[1]; + CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww); + kr ^= ww; + CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl); + + /* current status == (kll, klr, w0, w1) */ + CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr); + kr ^= ww; + CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww); + kl ^= ww; + + /* generate KA dependent subkeys */ + /* k1, k2 */ + subRL[2] = kl; + subRL[3] = kr; + ROLDQ(kl, kr, 15); + /* k5,k6 */ + subRL[6] = kl; + subRL[7] = kr; + ROLDQ(kl, kr, 15); + /* kl1, kl2 */ + subRL[8] = kl; + subRL[9] = kr; + ROLDQ(kl, kr, 15); + /* k9 */ + subRL[12] = kl; + ROLDQ(kl, kr, 15); + /* k11, k12 */ + subRL[14] = kl; + subRL[15] = kr; + ROLDQ(kl, kr, 34); + /* k15, k16 */ + subRL[20] = kl; + subRL[21] = kr; + ROLDQ(kl, kr, 17); + /* kw3, kw4 */ + subRL[24] = kl; + subRL[25] = kr; + + camellia_setup_tail(subkey, subRL, 24); +} + +static void camellia_setup256(const unsigned char *key, u64 *subkey) +{ + u64 kl, kr; /* left half of key */ + u64 krl, krr; /* right half of key */ + u64 ww; /* temporary variables */ + u64 subRL[34]; + + /** + * key = (kl || kr || krl || krr) (|| is concatenation) + */ + kl = get_unaligned_be64(key); + kr = get_unaligned_be64(key + 8); + krl = get_unaligned_be64(key + 16); + krr = get_unaligned_be64(key + 24); + + /* generate KL dependent subkeys */ + /* kw1 */ + subRL[0] = kl; + /* kw2 */ + subRL[1] = kr; + ROLDQ(kl, kr, 45); + /* k9 */ + subRL[12] = kl; + /* k10 */ + subRL[13] = kr; + ROLDQ(kl, kr, 15); + /* kl3 */ + subRL[16] = kl; + /* kl4 */ + subRL[17] = kr; + ROLDQ(kl, kr, 17); + /* k17 */ + subRL[22] = kl; + /* k18 */ + subRL[23] = kr; + ROLDQ(kl, kr, 34); + /* k23 */ + subRL[30] = kl; + /* k24 */ + subRL[31] = kr; + + /* generate KR dependent subkeys */ + ROLDQ(krl, krr, 15); + /* k3 */ + subRL[4] = krl; + /* k4 */ + subRL[5] = krr; + ROLDQ(krl, krr, 15); + /* kl1 */ + subRL[8] = krl; + /* kl2 */ + subRL[9] = krr; + ROLDQ(krl, krr, 30); + /* k13 */ + subRL[18] = krl; + /* k14 */ + subRL[19] = krr; + ROLDQ(krl, krr, 34); + /* k19 */ + subRL[26] = krl; + /* k20 */ + subRL[27] = krr; + ROLDQ(krl, krr, 34); + + /* generate KA */ + kl = subRL[0] ^ krl; + kr = subRL[1] ^ krr; + + CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww); + kr ^= ww; + CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl); + kl ^= krl; + CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr); + kr ^= ww ^ krr; + CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww); + kl ^= ww; + + /* generate KB */ + krl ^= kl; + krr ^= kr; + CAMELLIA_F(krl, CAMELLIA_SIGMA5L, CAMELLIA_SIGMA5R, ww); + krr ^= ww; + CAMELLIA_F(krr, CAMELLIA_SIGMA6L, CAMELLIA_SIGMA6R, ww); + krl ^= ww; + + /* generate KA dependent subkeys */ + ROLDQ(kl, kr, 15); + /* k5 */ + subRL[6] = kl; + /* k6 */ + subRL[7] = kr; + ROLDQ(kl, kr, 30); + /* k11 */ + subRL[14] = kl; + /* k12 */ + subRL[15] = kr; + /* rotation left shift 32bit */ + ROLDQ(kl, kr, 32); + /* kl5 */ + subRL[24] = kl; + /* kl6 */ + subRL[25] = kr; + /* rotation left shift 17 from k11,k12 -> k21,k22 */ + ROLDQ(kl, kr, 17); + /* k21 */ + subRL[28] = kl; + /* k22 */ + subRL[29] = kr; + + /* generate KB dependent subkeys */ + /* k1 */ + subRL[2] = krl; + /* k2 */ + subRL[3] = krr; + ROLDQ(krl, krr, 30); + /* k7 */ + subRL[10] = krl; + /* k8 */ + subRL[11] = krr; + ROLDQ(krl, krr, 30); + /* k15 */ + subRL[20] = krl; + /* k16 */ + subRL[21] = krr; + ROLDQ(krl, krr, 51); + /* kw3 */ + subRL[32] = krl; + /* kw4 */ + subRL[33] = krr; + + camellia_setup_tail(subkey, subRL, 32); +} + +static void camellia_setup192(const unsigned char *key, u64 *subkey) +{ + unsigned char kk[32]; + u64 krl, krr; + + memcpy(kk, key, 24); + memcpy((unsigned char *)&krl, key+16, 8); + krr = ~krl; + memcpy(kk+24, (unsigned char *)&krr, 8); + camellia_setup256(kk, subkey); +} + +static int __camellia_setkey(struct camellia_ctx *cctx, + const unsigned char *key, + unsigned int key_len, u32 *flags) +{ + if (key_len != 16 && key_len != 24 && key_len != 32) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + cctx->key_length = key_len; + + switch (key_len) { + case 16: + camellia_setup128(key, cctx->key_table); + break; + case 24: + camellia_setup192(key, cctx->key_table); + break; + case 32: + camellia_setup256(key, cctx->key_table); + break; + } + + return 0; +} + +static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len, + &tfm->crt_flags); +} + +static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, + void (*fn)(struct camellia_ctx *, u8 *, const u8 *), + void (*fn_2way)(struct camellia_ctx *, u8 *, const u8 *)) +{ + struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = CAMELLIA_BLOCK_SIZE; + unsigned int nbytes; + int err; + + err = blkcipher_walk_virt(desc, walk); + + while ((nbytes = walk->nbytes)) { + u8 *wsrc = walk->src.virt.addr; + u8 *wdst = walk->dst.virt.addr; + + /* Process two block batch */ + if (nbytes >= bsize * 2) { + do { + fn_2way(ctx, wdst, wsrc); + + wsrc += bsize * 2; + wdst += bsize * 2; + nbytes -= bsize * 2; + } while (nbytes >= bsize * 2); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + fn(ctx, wdst, wsrc); + + wsrc += bsize; + wdst += bsize; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + err = blkcipher_walk_done(desc, walk, nbytes); + } + + return err; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, camellia_enc_blk, camellia_enc_blk_2way); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, camellia_dec_blk, camellia_dec_blk_2way); +} + +static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = CAMELLIA_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 *iv = (u128 *)walk->iv; + + do { + u128_xor(dst, src, iv); + camellia_enc_blk(ctx, (u8 *)dst, (u8 *)dst); + iv = dst; + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + + u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); + return nbytes; +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + nbytes = __cbc_encrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = CAMELLIA_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 ivs[2 - 1]; + u128 last_iv; + + /* Start of the last block. */ + src += nbytes / bsize - 1; + dst += nbytes / bsize - 1; + + last_iv = *src; + + /* Process two block batch */ + if (nbytes >= bsize * 2) { + do { + nbytes -= bsize * (2 - 1); + src -= 2 - 1; + dst -= 2 - 1; + + ivs[0] = src[0]; + + camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src); + + u128_xor(dst + 1, dst + 1, ivs + 0); + + nbytes -= bsize; + if (nbytes < bsize) + goto done; + + u128_xor(dst, dst, src - 1); + src -= 1; + dst -= 1; + } while (nbytes >= bsize * 2); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + for (;;) { + camellia_dec_blk(ctx, (u8 *)dst, (u8 *)src); + + nbytes -= bsize; + if (nbytes < bsize) + break; + + u128_xor(dst, dst, src - 1); + src -= 1; + dst -= 1; + } + +done: + u128_xor(dst, dst, (u128 *)walk->iv); + *(u128 *)walk->iv = last_iv; + + return nbytes; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + nbytes = __cbc_decrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static inline void u128_to_be128(be128 *dst, const u128 *src) +{ + dst->a = cpu_to_be64(src->a); + dst->b = cpu_to_be64(src->b); +} + +static inline void be128_to_u128(u128 *dst, const be128 *src) +{ + dst->a = be64_to_cpu(src->a); + dst->b = be64_to_cpu(src->b); +} + +static inline void u128_inc(u128 *i) +{ + i->b++; + if (!i->b) + i->a++; +} + +static void ctr_crypt_final(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + u8 keystream[CAMELLIA_BLOCK_SIZE]; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + unsigned int nbytes = walk->nbytes; + u128 ctrblk; + + memcpy(keystream, src, nbytes); + camellia_enc_blk_xor(ctx, keystream, walk->iv); + memcpy(dst, keystream, nbytes); + + be128_to_u128(&ctrblk, (be128 *)walk->iv); + u128_inc(&ctrblk); + u128_to_be128((be128 *)walk->iv, &ctrblk); +} + +static unsigned int __ctr_crypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = CAMELLIA_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 ctrblk; + be128 ctrblocks[2]; + + be128_to_u128(&ctrblk, (be128 *)walk->iv); + + /* Process two block batch */ + if (nbytes >= bsize * 2) { + do { + if (dst != src) { + dst[0] = src[0]; + dst[1] = src[1]; + } + + /* create ctrblks for parallel encrypt */ + u128_to_be128(&ctrblocks[0], &ctrblk); + u128_inc(&ctrblk); + u128_to_be128(&ctrblocks[1], &ctrblk); + u128_inc(&ctrblk); + + camellia_enc_blk_xor_2way(ctx, (u8 *)dst, + (u8 *)ctrblocks); + + src += 2; + dst += 2; + nbytes -= bsize * 2; + } while (nbytes >= bsize * 2); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + if (dst != src) + *dst = *src; + + u128_to_be128(&ctrblocks[0], &ctrblk); + u128_inc(&ctrblk); + + camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks); + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + u128_to_be128((be128 *)walk->iv, &ctrblk); + return nbytes; +} + +static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, CAMELLIA_BLOCK_SIZE); + + while ((nbytes = walk.nbytes) >= CAMELLIA_BLOCK_SIZE) { + nbytes = __ctr_crypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + if (walk.nbytes) { + ctr_crypt_final(desc, &walk); + err = blkcipher_walk_done(desc, &walk, 0); + } + + return err; +} + +static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +{ + const unsigned int bsize = CAMELLIA_BLOCK_SIZE; + struct camellia_ctx *ctx = priv; + int i; + + while (nbytes >= 2 * bsize) { + camellia_enc_blk_2way(ctx, srcdst, srcdst); + srcdst += bsize * 2; + nbytes -= bsize * 2; + } + + for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) + camellia_enc_blk(ctx, srcdst, srcdst); +} + +static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +{ + const unsigned int bsize = CAMELLIA_BLOCK_SIZE; + struct camellia_ctx *ctx = priv; + int i; + + while (nbytes >= 2 * bsize) { + camellia_dec_blk_2way(ctx, srcdst, srcdst); + srcdst += bsize * 2; + nbytes -= bsize * 2; + } + + for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) + camellia_dec_blk(ctx, srcdst, srcdst); +} + +struct camellia_lrw_ctx { + struct lrw_table_ctx lrw_table; + struct camellia_ctx camellia_ctx; +}; + +static int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + int err; + + err = __camellia_setkey(&ctx->camellia_ctx, key, + keylen - CAMELLIA_BLOCK_SIZE, + &tfm->crt_flags); + if (err) + return err; + + return lrw_init_table(&ctx->lrw_table, + key + keylen - CAMELLIA_BLOCK_SIZE); +} + +static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[2 * 4]; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = &ctx->camellia_ctx, + .crypt_fn = encrypt_callback, + }; + + return lrw_crypt(desc, dst, src, nbytes, &req); +} + +static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[2 * 4]; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = &ctx->camellia_ctx, + .crypt_fn = decrypt_callback, + }; + + return lrw_crypt(desc, dst, src, nbytes, &req); +} + +static void lrw_exit_tfm(struct crypto_tfm *tfm) +{ + struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + + lrw_free_table(&ctx->lrw_table); +} + +struct camellia_xts_ctx { + struct camellia_ctx tweak_ctx; + struct camellia_ctx crypt_ctx; +}; + +static int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct camellia_xts_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + int err; + + /* key consists of keys of equal size concatenated, therefore + * the length must be even + */ + if (keylen % 2) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + /* first half of xts-key is for crypt */ + err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); + if (err) + return err; + + /* second half of xts-key is for tweak */ + return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, + flags); +} + +static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[2 * 4]; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = &ctx->tweak_ctx, + .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk), + .crypt_ctx = &ctx->crypt_ctx, + .crypt_fn = encrypt_callback, + }; + + return xts_crypt(desc, dst, src, nbytes, &req); +} + +static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[2 * 4]; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = &ctx->tweak_ctx, + .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk), + .crypt_ctx = &ctx->crypt_ctx, + .crypt_fn = decrypt_callback, + }; + + return xts_crypt(desc, dst, src, nbytes, &req); +} + +static struct crypto_alg camellia_algs[6] = { { + .cra_name = "camellia", + .cra_driver_name = "camellia-asm", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = CAMELLIA_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct camellia_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(camellia_algs[0].cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = CAMELLIA_MIN_KEY_SIZE, + .cia_max_keysize = CAMELLIA_MAX_KEY_SIZE, + .cia_setkey = camellia_setkey, + .cia_encrypt = camellia_encrypt, + .cia_decrypt = camellia_decrypt + } + } +}, { + .cra_name = "ecb(camellia)", + .cra_driver_name = "ecb-camellia-asm", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAMELLIA_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct camellia_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(camellia_algs[1].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .setkey = camellia_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "cbc(camellia)", + .cra_driver_name = "cbc-camellia-asm", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAMELLIA_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct camellia_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(camellia_algs[2].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = camellia_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}, { + .cra_name = "ctr(camellia)", + .cra_driver_name = "ctr-camellia-asm", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct camellia_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(camellia_algs[3].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = camellia_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, + }, +}, { + .cra_name = "lrw(camellia)", + .cra_driver_name = "lrw-camellia-asm", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAMELLIA_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct camellia_lrw_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(camellia_algs[4].cra_list), + .cra_exit = lrw_exit_tfm, + .cra_u = { + .blkcipher = { + .min_keysize = CAMELLIA_MIN_KEY_SIZE + + CAMELLIA_BLOCK_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE + + CAMELLIA_BLOCK_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = lrw_camellia_setkey, + .encrypt = lrw_encrypt, + .decrypt = lrw_decrypt, + }, + }, +}, { + .cra_name = "xts(camellia)", + .cra_driver_name = "xts-camellia-asm", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAMELLIA_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct camellia_xts_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(camellia_algs[5].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2, + .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = xts_camellia_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, + }, + }, +} }; + +static bool is_blacklisted_cpu(void) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return false; + + if (boot_cpu_data.x86 == 0x0f) { + /* + * On Pentium 4, camellia-asm is slower than original assembler + * implementation because excessive uses of 64bit rotate and + * left-shifts (which are really slow on P4) needed to store and + * handle 128bit block in two 64bit registers. + */ + return true; + } + + return false; +} + +static int force; +module_param(force, int, 0); +MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); + +static int __init init(void) +{ + if (!force && is_blacklisted_cpu()) { + printk(KERN_INFO + "camellia-x86_64: performance on this CPU " + "would be suboptimal: disabling " + "camellia-x86_64.\n"); + return -ENODEV; + } + + return crypto_register_algs(camellia_algs, ARRAY_SIZE(camellia_algs)); +} + +static void __exit fini(void) +{ + crypto_unregister_algs(camellia_algs, ARRAY_SIZE(camellia_algs)); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Camellia Cipher Algorithm, asm optimized"); +MODULE_ALIAS("camellia"); +MODULE_ALIAS("camellia-asm"); diff --git a/arch/x86/crypto/crc32c-intel.c b/arch/x86/crypto/crc32c-intel.c index b9d00261703..493f959261f 100644 --- a/arch/x86/crypto/crc32c-intel.c +++ b/arch/x86/crypto/crc32c-intel.c @@ -31,6 +31,7 @@ #include <crypto/internal/hash.h> #include <asm/cpufeature.h> +#include <asm/cpu_device_id.h> #define CHKSUM_BLOCK_SIZE 1 #define CHKSUM_DIGEST_SIZE 4 @@ -173,13 +174,17 @@ static struct shash_alg alg = { } }; +static const struct x86_cpu_id crc32c_cpu_id[] = { + X86_FEATURE_MATCH(X86_FEATURE_XMM4_2), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id); static int __init crc32c_intel_mod_init(void) { - if (cpu_has_xmm4_2) - return crypto_register_shash(&alg); - else + if (!x86_match_cpu(crc32c_cpu_id)) return -ENODEV; + return crypto_register_shash(&alg); } static void __exit crc32c_intel_mod_fini(void) diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 976aa64d9a2..b4bf0a63b52 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -20,6 +20,7 @@ #include <crypto/gf128mul.h> #include <crypto/internal/hash.h> #include <asm/i387.h> +#include <asm/cpu_device_id.h> #define GHASH_BLOCK_SIZE 16 #define GHASH_DIGEST_SIZE 16 @@ -294,15 +295,18 @@ static struct ahash_alg ghash_async_alg = { }, }; +static const struct x86_cpu_id pcmul_cpu_id[] = { + X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), /* Pickle-Mickle-Duck */ + {} +}; +MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id); + static int __init ghash_pclmulqdqni_mod_init(void) { int err; - if (!cpu_has_pclmulqdq) { - printk(KERN_INFO "Intel PCLMULQDQ-NI instructions are not" - " detected.\n"); + if (!x86_match_cpu(pcmul_cpu_id)) return -ENODEV; - } err = crypto_register_shash(&ghash_alg); if (err) diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S index 4e37677ca85..c00053d42f9 100644 --- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S +++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S @@ -463,23 +463,20 @@ pand x0, x4; \ pxor x2, x4; -#define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \ - movdqa x2, t3; \ - movdqa x0, t1; \ - unpcklps x3, t3; \ +#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ movdqa x0, t2; \ - unpcklps x1, t1; \ - unpckhps x1, t2; \ - movdqa t3, x1; \ - unpckhps x3, x2; \ - movdqa t1, x0; \ - movhlps t1, x1; \ - movdqa t2, t1; \ - movlhps t3, x0; \ - movlhps x2, t1; \ - movhlps t2, x2; \ - movdqa x2, x3; \ - movdqa t1, x2; + punpckldq x1, x0; \ + punpckhdq x1, t2; \ + movdqa x2, t1; \ + punpckhdq x3, x2; \ + punpckldq x3, t1; \ + movdqa x0, x1; \ + punpcklqdq t1, x0; \ + punpckhqdq t1, x1; \ + movdqa t2, x3; \ + punpcklqdq x2, t2; \ + punpckhqdq x2, x3; \ + movdqa t2, x2; #define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ movdqu (0*4*4)(in), x0; \ diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S index 7f24a154082..3ee1ff04d3e 100644 --- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S @@ -585,23 +585,20 @@ get_key(i, 1, RK1); \ SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ -#define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \ - movdqa x2, t3; \ - movdqa x0, t1; \ - unpcklps x3, t3; \ +#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ movdqa x0, t2; \ - unpcklps x1, t1; \ - unpckhps x1, t2; \ - movdqa t3, x1; \ - unpckhps x3, x2; \ - movdqa t1, x0; \ - movhlps t1, x1; \ - movdqa t2, t1; \ - movlhps t3, x0; \ - movlhps x2, t1; \ - movhlps t2, x2; \ - movdqa x2, x3; \ - movdqa t1, x2; + punpckldq x1, x0; \ + punpckhdq x1, t2; \ + movdqa x2, t1; \ + punpckhdq x3, x2; \ + punpckldq x3, t1; \ + movdqa x0, x1; \ + punpcklqdq t1, x0; \ + punpckhqdq t1, x1; \ + movdqa t2, x3; \ + punpcklqdq x2, t2; \ + punpckhqdq x2, x3; \ + movdqa t2, x2; #define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ movdqu (0*4*4)(in), x0; \ diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index 7955a9b76b9..4b21be85e0a 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -145,28 +145,6 @@ static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, return ecb_crypt(desc, &walk, false); } -static struct crypto_alg blk_ecb_alg = { - .cra_name = "__ecb-serpent-sse2", - .cra_driver_name = "__driver-ecb-serpent-sse2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = serpent_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}; - static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk) { @@ -295,28 +273,6 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, return err; } -static struct crypto_alg blk_cbc_alg = { - .cra_name = "__cbc-serpent-sse2", - .cra_driver_name = "__driver-cbc-serpent-sse2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = serpent_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}; - static inline void u128_to_be128(be128 *dst, const u128 *src) { dst->a = cpu_to_be64(src->a); @@ -439,29 +395,6 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, return err; } -static struct crypto_alg blk_ctr_alg = { - .cra_name = "__ctr-serpent-sse2", - .cra_driver_name = "__driver-ctr-serpent-sse2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = serpent_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}; - struct crypt_priv { struct serpent_ctx *ctx; bool fpu_enabled; @@ -580,32 +513,6 @@ static void lrw_exit_tfm(struct crypto_tfm *tfm) lrw_free_table(&ctx->lrw_table); } -static struct crypto_alg blk_lrw_alg = { - .cra_name = "__lrw-serpent-sse2", - .cra_driver_name = "__driver-lrw-serpent-sse2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(blk_lrw_alg.cra_list), - .cra_exit = lrw_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = lrw_serpent_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}; - struct serpent_xts_ctx { struct serpent_ctx tweak_ctx; struct serpent_ctx crypt_ctx; @@ -689,29 +596,6 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, return ret; } -static struct crypto_alg blk_xts_alg = { - .cra_name = "__xts-serpent-sse2", - .cra_driver_name = "__driver-xts-serpent-sse2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(blk_xts_alg.cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE * 2, - .max_keysize = SERPENT_MAX_KEY_SIZE * 2, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = xts_serpent_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -}; - static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, unsigned int key_len) { @@ -792,28 +676,133 @@ static void ablk_exit(struct crypto_tfm *tfm) cryptd_free_ablkcipher(ctx->cryptd_tfm); } -static void ablk_init_common(struct crypto_tfm *tfm, - struct cryptd_ablkcipher *cryptd_tfm) +static int ablk_init(struct crypto_tfm *tfm) { struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); + struct cryptd_ablkcipher *cryptd_tfm; + char drv_name[CRYPTO_MAX_ALG_NAME]; + + snprintf(drv_name, sizeof(drv_name), "__driver-%s", + crypto_tfm_alg_driver_name(tfm)); + + cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); ctx->cryptd_tfm = cryptd_tfm; tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + crypto_ablkcipher_reqsize(&cryptd_tfm->base); -} - -static int ablk_ecb_init(struct crypto_tfm *tfm) -{ - struct cryptd_ablkcipher *cryptd_tfm; - cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ecb-serpent-sse2", 0, 0); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - ablk_init_common(tfm, cryptd_tfm); return 0; } -static struct crypto_alg ablk_ecb_alg = { +static struct crypto_alg serpent_algs[10] = { { + .cra_name = "__ecb-serpent-sse2", + .cra_driver_name = "__driver-ecb-serpent-sse2", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[0].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .setkey = serpent_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "__cbc-serpent-sse2", + .cra_driver_name = "__driver-cbc-serpent-sse2", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[1].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .setkey = serpent_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}, { + .cra_name = "__ctr-serpent-sse2", + .cra_driver_name = "__driver-ctr-serpent-sse2", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[2].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = serpent_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, + }, +}, { + .cra_name = "__lrw-serpent-sse2", + .cra_driver_name = "__driver-lrw-serpent-sse2", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct serpent_lrw_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[3].cra_list), + .cra_exit = lrw_exit_tfm, + .cra_u = { + .blkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE + + SERPENT_BLOCK_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE + + SERPENT_BLOCK_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = lrw_serpent_setkey, + .encrypt = lrw_encrypt, + .decrypt = lrw_decrypt, + }, + }, +}, { + .cra_name = "__xts-serpent-sse2", + .cra_driver_name = "__driver-xts-serpent-sse2", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct serpent_xts_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[4].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE * 2, + .max_keysize = SERPENT_MAX_KEY_SIZE * 2, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = xts_serpent_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, + }, + }, +}, { .cra_name = "ecb(serpent)", .cra_driver_name = "ecb-serpent-sse2", .cra_priority = 400, @@ -823,8 +812,8 @@ static struct crypto_alg ablk_ecb_alg = { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ablk_ecb_alg.cra_list), - .cra_init = ablk_ecb_init, + .cra_list = LIST_HEAD_INIT(serpent_algs[5].cra_list), + .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { .ablkcipher = { @@ -835,20 +824,7 @@ static struct crypto_alg ablk_ecb_alg = { .decrypt = ablk_decrypt, }, }, -}; - -static int ablk_cbc_init(struct crypto_tfm *tfm) -{ - struct cryptd_ablkcipher *cryptd_tfm; - - cryptd_tfm = cryptd_alloc_ablkcipher("__driver-cbc-serpent-sse2", 0, 0); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - ablk_init_common(tfm, cryptd_tfm); - return 0; -} - -static struct crypto_alg ablk_cbc_alg = { +}, { .cra_name = "cbc(serpent)", .cra_driver_name = "cbc-serpent-sse2", .cra_priority = 400, @@ -858,8 +834,8 @@ static struct crypto_alg ablk_cbc_alg = { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ablk_cbc_alg.cra_list), - .cra_init = ablk_cbc_init, + .cra_list = LIST_HEAD_INIT(serpent_algs[6].cra_list), + .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { .ablkcipher = { @@ -871,20 +847,7 @@ static struct crypto_alg ablk_cbc_alg = { .decrypt = ablk_decrypt, }, }, -}; - -static int ablk_ctr_init(struct crypto_tfm *tfm) -{ - struct cryptd_ablkcipher *cryptd_tfm; - - cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ctr-serpent-sse2", 0, 0); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - ablk_init_common(tfm, cryptd_tfm); - return 0; -} - -static struct crypto_alg ablk_ctr_alg = { +}, { .cra_name = "ctr(serpent)", .cra_driver_name = "ctr-serpent-sse2", .cra_priority = 400, @@ -894,8 +857,8 @@ static struct crypto_alg ablk_ctr_alg = { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ablk_ctr_alg.cra_list), - .cra_init = ablk_ctr_init, + .cra_list = LIST_HEAD_INIT(serpent_algs[7].cra_list), + .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { .ablkcipher = { @@ -908,20 +871,7 @@ static struct crypto_alg ablk_ctr_alg = { .geniv = "chainiv", }, }, -}; - -static int ablk_lrw_init(struct crypto_tfm *tfm) -{ - struct cryptd_ablkcipher *cryptd_tfm; - - cryptd_tfm = cryptd_alloc_ablkcipher("__driver-lrw-serpent-sse2", 0, 0); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - ablk_init_common(tfm, cryptd_tfm); - return 0; -} - -static struct crypto_alg ablk_lrw_alg = { +}, { .cra_name = "lrw(serpent)", .cra_driver_name = "lrw-serpent-sse2", .cra_priority = 400, @@ -931,8 +881,8 @@ static struct crypto_alg ablk_lrw_alg = { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ablk_lrw_alg.cra_list), - .cra_init = ablk_lrw_init, + .cra_list = LIST_HEAD_INIT(serpent_algs[8].cra_list), + .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { .ablkcipher = { @@ -946,20 +896,7 @@ static struct crypto_alg ablk_lrw_alg = { .decrypt = ablk_decrypt, }, }, -}; - -static int ablk_xts_init(struct crypto_tfm *tfm) -{ - struct cryptd_ablkcipher *cryptd_tfm; - - cryptd_tfm = cryptd_alloc_ablkcipher("__driver-xts-serpent-sse2", 0, 0); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - ablk_init_common(tfm, cryptd_tfm); - return 0; -} - -static struct crypto_alg ablk_xts_alg = { +}, { .cra_name = "xts(serpent)", .cra_driver_name = "xts-serpent-sse2", .cra_priority = 400, @@ -969,8 +906,8 @@ static struct crypto_alg ablk_xts_alg = { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ablk_xts_alg.cra_list), - .cra_init = ablk_xts_init, + .cra_list = LIST_HEAD_INIT(serpent_algs[9].cra_list), + .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { .ablkcipher = { @@ -982,84 +919,21 @@ static struct crypto_alg ablk_xts_alg = { .decrypt = ablk_decrypt, }, }, -}; +} }; static int __init serpent_sse2_init(void) { - int err; - if (!cpu_has_xmm2) { printk(KERN_INFO "SSE2 instructions are not detected.\n"); return -ENODEV; } - err = crypto_register_alg(&blk_ecb_alg); - if (err) - goto blk_ecb_err; - err = crypto_register_alg(&blk_cbc_alg); - if (err) - goto blk_cbc_err; - err = crypto_register_alg(&blk_ctr_alg); - if (err) - goto blk_ctr_err; - err = crypto_register_alg(&ablk_ecb_alg); - if (err) - goto ablk_ecb_err; - err = crypto_register_alg(&ablk_cbc_alg); - if (err) - goto ablk_cbc_err; - err = crypto_register_alg(&ablk_ctr_alg); - if (err) - goto ablk_ctr_err; - err = crypto_register_alg(&blk_lrw_alg); - if (err) - goto blk_lrw_err; - err = crypto_register_alg(&ablk_lrw_alg); - if (err) - goto ablk_lrw_err; - err = crypto_register_alg(&blk_xts_alg); - if (err) - goto blk_xts_err; - err = crypto_register_alg(&ablk_xts_alg); - if (err) - goto ablk_xts_err; - return err; - - crypto_unregister_alg(&ablk_xts_alg); -ablk_xts_err: - crypto_unregister_alg(&blk_xts_alg); -blk_xts_err: - crypto_unregister_alg(&ablk_lrw_alg); -ablk_lrw_err: - crypto_unregister_alg(&blk_lrw_alg); -blk_lrw_err: - crypto_unregister_alg(&ablk_ctr_alg); -ablk_ctr_err: - crypto_unregister_alg(&ablk_cbc_alg); -ablk_cbc_err: - crypto_unregister_alg(&ablk_ecb_alg); -ablk_ecb_err: - crypto_unregister_alg(&blk_ctr_alg); -blk_ctr_err: - crypto_unregister_alg(&blk_cbc_alg); -blk_cbc_err: - crypto_unregister_alg(&blk_ecb_alg); -blk_ecb_err: - return err; + return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); } static void __exit serpent_sse2_exit(void) { - crypto_unregister_alg(&ablk_xts_alg); - crypto_unregister_alg(&blk_xts_alg); - crypto_unregister_alg(&ablk_lrw_alg); - crypto_unregister_alg(&blk_lrw_alg); - crypto_unregister_alg(&ablk_ctr_alg); - crypto_unregister_alg(&ablk_cbc_alg); - crypto_unregister_alg(&ablk_ecb_alg); - crypto_unregister_alg(&blk_ctr_alg); - crypto_unregister_alg(&blk_cbc_alg); - crypto_unregister_alg(&blk_ecb_alg); + crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); } module_init(serpent_sse2_init); diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c index dc6b3fb817f..359ae084275 100644 --- a/arch/x86/crypto/twofish_glue.c +++ b/arch/x86/crypto/twofish_glue.c @@ -68,7 +68,7 @@ static struct crypto_alg alg = { .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = TF_BLOCK_SIZE, .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 3, + .cra_alignmask = 0, .cra_module = THIS_MODULE, .cra_list = LIST_HEAD_INIT(alg.cra_list), .cra_u = { diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 7fee8c152f9..922ab24cce3 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -25,6 +25,7 @@ * */ +#include <asm/processor.h> #include <linux/crypto.h> #include <linux/init.h> #include <linux/module.h> @@ -122,28 +123,6 @@ static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, return ecb_crypt(desc, &walk, twofish_dec_blk, twofish_dec_blk_3way); } -static struct crypto_alg blk_ecb_alg = { - .cra_name = "ecb(twofish)", - .cra_driver_name = "ecb-twofish-3way", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .setkey = twofish_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}; - static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk) { @@ -267,29 +246,6 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, return err; } -static struct crypto_alg blk_cbc_alg = { - .cra_name = "cbc(twofish)", - .cra_driver_name = "cbc-twofish-3way", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = twofish_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}; - static inline void u128_to_be128(be128 *dst, const u128 *src) { dst->a = cpu_to_be64(src->a); @@ -411,29 +367,6 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, return err; } -static struct crypto_alg blk_ctr_alg = { - .cra_name = "ctr(twofish)", - .cra_driver_name = "ctr-twofish-3way", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = twofish_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}; - static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) { const unsigned int bsize = TF_BLOCK_SIZE; @@ -524,30 +457,6 @@ static void lrw_exit_tfm(struct crypto_tfm *tfm) lrw_free_table(&ctx->lrw_table); } -static struct crypto_alg blk_lrw_alg = { - .cra_name = "lrw(twofish)", - .cra_driver_name = "lrw-twofish-3way", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(blk_lrw_alg.cra_list), - .cra_exit = lrw_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE, - .max_keysize = TF_MAX_KEY_SIZE + TF_BLOCK_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = lrw_twofish_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}; - struct twofish_xts_ctx { struct twofish_ctx tweak_ctx; struct twofish_ctx crypt_ctx; @@ -614,7 +523,91 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, return xts_crypt(desc, dst, src, nbytes, &req); } -static struct crypto_alg blk_xts_alg = { +static struct crypto_alg tf_algs[5] = { { + .cra_name = "ecb(twofish)", + .cra_driver_name = "ecb-twofish-3way", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(tf_algs[0].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .setkey = twofish_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "cbc(twofish)", + .cra_driver_name = "cbc-twofish-3way", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(tf_algs[1].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = twofish_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}, { + .cra_name = "ctr(twofish)", + .cra_driver_name = "ctr-twofish-3way", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct twofish_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(tf_algs[2].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = twofish_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, + }, +}, { + .cra_name = "lrw(twofish)", + .cra_driver_name = "lrw-twofish-3way", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_lrw_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(tf_algs[3].cra_list), + .cra_exit = lrw_exit_tfm, + .cra_u = { + .blkcipher = { + .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE, + .max_keysize = TF_MAX_KEY_SIZE + TF_BLOCK_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = lrw_twofish_setkey, + .encrypt = lrw_encrypt, + .decrypt = lrw_decrypt, + }, + }, +}, { .cra_name = "xts(twofish)", .cra_driver_name = "xts-twofish-3way", .cra_priority = 300, @@ -624,7 +617,7 @@ static struct crypto_alg blk_xts_alg = { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(blk_xts_alg.cra_list), + .cra_list = LIST_HEAD_INIT(tf_algs[4].cra_list), .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE * 2, @@ -635,50 +628,62 @@ static struct crypto_alg blk_xts_alg = { .decrypt = xts_decrypt, }, }, -}; +} }; -int __init init(void) +static bool is_blacklisted_cpu(void) { - int err; + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return false; + + if (boot_cpu_data.x86 == 0x06 && + (boot_cpu_data.x86_model == 0x1c || + boot_cpu_data.x86_model == 0x26 || + boot_cpu_data.x86_model == 0x36)) { + /* + * On Atom, twofish-3way is slower than original assembler + * implementation. Twofish-3way trades off some performance in + * storing blocks in 64bit registers to allow three blocks to + * be processed parallel. Parallel operation then allows gaining + * more performance than was trade off, on out-of-order CPUs. + * However Atom does not benefit from this parallellism and + * should be blacklisted. + */ + return true; + } - err = crypto_register_alg(&blk_ecb_alg); - if (err) - goto ecb_err; - err = crypto_register_alg(&blk_cbc_alg); - if (err) - goto cbc_err; - err = crypto_register_alg(&blk_ctr_alg); - if (err) - goto ctr_err; - err = crypto_register_alg(&blk_lrw_alg); - if (err) - goto blk_lrw_err; - err = crypto_register_alg(&blk_xts_alg); - if (err) - goto blk_xts_err; - - return 0; - - crypto_unregister_alg(&blk_xts_alg); -blk_xts_err: - crypto_unregister_alg(&blk_lrw_alg); -blk_lrw_err: - crypto_unregister_alg(&blk_ctr_alg); -ctr_err: - crypto_unregister_alg(&blk_cbc_alg); -cbc_err: - crypto_unregister_alg(&blk_ecb_alg); -ecb_err: - return err; + if (boot_cpu_data.x86 == 0x0f) { + /* + * On Pentium 4, twofish-3way is slower than original assembler + * implementation because excessive uses of 64bit rotate and + * left-shifts (which are really slow on P4) needed to store and + * handle 128bit block in two 64bit registers. + */ + return true; + } + + return false; +} + +static int force; +module_param(force, int, 0); +MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); + +static int __init init(void) +{ + if (!force && is_blacklisted_cpu()) { + printk(KERN_INFO + "twofish-x86_64-3way: performance on this CPU " + "would be suboptimal: disabling " + "twofish-x86_64-3way.\n"); + return -ENODEV; + } + + return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs)); } -void __exit fini(void) +static void __exit fini(void) { - crypto_unregister_alg(&blk_xts_alg); - crypto_unregister_alg(&blk_lrw_alg); - crypto_unregister_alg(&blk_ctr_alg); - crypto_unregister_alg(&blk_cbc_alg); - crypto_unregister_alg(&blk_ecb_alg); + crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs)); } module_init(init); diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 39e49091f64..07b3a68d2d2 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -26,7 +26,6 @@ #include <linux/init.h> #include <linux/jiffies.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <asm/pgalloc.h> #include <asm/cacheflush.h> @@ -120,9 +119,7 @@ static void set_brk(unsigned long start, unsigned long end) end = PAGE_ALIGN(end); if (end <= start) return; - down_write(¤t->mm->mmap_sem); - do_brk(start, end - start); - up_write(¤t->mm->mmap_sem); + vm_brk(start, end - start); } #ifdef CORE_DUMP @@ -297,8 +294,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) /* OK, This is the point of no return */ set_personality(PER_LINUX); - set_thread_flag(TIF_IA32); - current->mm->context.ia32_compat = 1; + set_personality_ia32(false); setup_new_exec(bprm); @@ -323,7 +319,6 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) } install_exec_creds(bprm); - current->flags &= ~PF_FORKNOEXEC; if (N_MAGIC(ex) == OMAGIC) { unsigned long text_addr, map_size; @@ -334,9 +329,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) pos = 32; map_size = ex.a_text+ex.a_data; - down_write(¤t->mm->mmap_sem); - error = do_brk(text_addr & PAGE_MASK, map_size); - up_write(¤t->mm->mmap_sem); + error = vm_brk(text_addr & PAGE_MASK, map_size); if (error != (text_addr & PAGE_MASK)) { send_sig(SIGKILL, current, 0); @@ -375,9 +368,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (!bprm->file->f_op->mmap || (fd_offset & ~PAGE_MASK) != 0) { loff_t pos = fd_offset; - down_write(¤t->mm->mmap_sem); - do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); - up_write(¤t->mm->mmap_sem); + vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); bprm->file->f_op->read(bprm->file, (char __user *)N_TXTADDR(ex), ex.a_text+ex.a_data, &pos); @@ -387,26 +378,22 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) goto beyond_if; } - down_write(¤t->mm->mmap_sem); - error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, + error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT, fd_offset); - up_write(¤t->mm->mmap_sem); if (error != N_TXTADDR(ex)) { send_sig(SIGKILL, current, 0); return error; } - down_write(¤t->mm->mmap_sem); - error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data, + error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT, fd_offset + ex.a_text); - up_write(¤t->mm->mmap_sem); if (error != N_DATADDR(ex)) { send_sig(SIGKILL, current, 0); return error; @@ -478,9 +465,7 @@ static int load_aout_library(struct file *file) error_time = jiffies; } #endif - down_write(¤t->mm->mmap_sem); - do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); - up_write(¤t->mm->mmap_sem); + vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); file->f_op->read(file, (char __user *)start_addr, ex.a_text + ex.a_data, &pos); @@ -492,12 +477,10 @@ static int load_aout_library(struct file *file) goto out; } /* Now use mmap to map the library into memory. */ - down_write(¤t->mm->mmap_sem); - error = do_mmap(file, start_addr, ex.a_text + ex.a_data, + error = vm_mmap(file, start_addr, ex.a_text + ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_32BIT, N_TXTOFF(ex)); - up_write(¤t->mm->mmap_sem); retval = error; if (error != start_addr) goto out; @@ -505,9 +488,7 @@ static int load_aout_library(struct file *file) len = PAGE_ALIGN(ex.a_text + ex.a_data); bss = ex.a_text + ex.a_data + ex.a_bss; if (bss > len) { - down_write(¤t->mm->mmap_sem); - error = do_brk(start_addr + len, bss - len); - up_write(¤t->mm->mmap_sem); + error = vm_brk(start_addr + len, bss - len); retval = error; if (error != start_addr + len) goto out; @@ -519,7 +500,8 @@ out: static int __init init_aout_binfmt(void) { - return register_binfmt(&aout_format); + register_binfmt(&aout_format); + return 0; } static void __exit exit_aout_binfmt(void) diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 65577698cab..a69245ba27e 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -12,10 +12,8 @@ #include <linux/mm.h> #include <linux/smp.h> #include <linux/kernel.h> -#include <linux/signal.h> #include <linux/errno.h> #include <linux/wait.h> -#include <linux/ptrace.h> #include <linux/unistd.h> #include <linux/stddef.h> #include <linux/personality.h> @@ -24,6 +22,7 @@ #include <asm/ucontext.h> #include <asm/uaccess.h> #include <asm/i387.h> +#include <asm/fpu-internal.h> #include <asm/ptrace.h> #include <asm/ia32_unistd.h> #include <asm/user32.h> @@ -31,20 +30,15 @@ #include <asm/proto.h> #include <asm/vdso.h> #include <asm/sigframe.h> +#include <asm/sighandling.h> #include <asm/sys_ia32.h> -#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) - -#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ - X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \ - X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ - X86_EFLAGS_CF) - -void signal_fault(struct pt_regs *regs, void __user *frame, char *where); +#define FIX_EFLAGS __FIX_EFLAGS int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) { int err = 0; + bool ia32 = is_ia32_task(); if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) return -EFAULT; @@ -74,8 +68,13 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) case __SI_FAULT >> 16: break; case __SI_CHLD >> 16: - put_user_ex(from->si_utime, &to->si_utime); - put_user_ex(from->si_stime, &to->si_stime); + if (ia32) { + put_user_ex(from->si_utime, &to->si_utime); + put_user_ex(from->si_stime, &to->si_stime); + } else { + put_user_ex(from->si_utime, &to->_sifields._sigchld_x32._utime); + put_user_ex(from->si_stime, &to->_sifields._sigchld_x32._stime); + } put_user_ex(from->si_status, &to->si_status); /* FALL THROUGH */ default: @@ -347,7 +346,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, put_user_ex(regs->dx, &sc->dx); put_user_ex(regs->cx, &sc->cx); put_user_ex(regs->ax, &sc->ax); - put_user_ex(current->thread.trap_no, &sc->trapno); + put_user_ex(current->thread.trap_nr, &sc->trapno); put_user_ex(current->thread.error_code, &sc->err); put_user_ex(regs->ip, &sc->ip); put_user_ex(regs->cs, (unsigned int __user *)&sc->cs); diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index f6f5c53dc90..aec2202a596 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -287,46 +287,6 @@ asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act, return ret; } -asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, - compat_sigset_t __user *oset, - unsigned int sigsetsize) -{ - sigset_t s; - compat_sigset_t s32; - int ret; - mm_segment_t old_fs = get_fs(); - - if (set) { - if (copy_from_user(&s32, set, sizeof(compat_sigset_t))) - return -EFAULT; - switch (_NSIG_WORDS) { - case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32); - case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32); - case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32); - case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); - } - } - set_fs(KERNEL_DS); - ret = sys_rt_sigprocmask(how, - set ? (sigset_t __user *)&s : NULL, - oset ? (sigset_t __user *)&s : NULL, - sigsetsize); - set_fs(old_fs); - if (ret) - return ret; - if (oset) { - switch (_NSIG_WORDS) { - case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; - case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; - case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; - case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; - } - if (copy_to_user(oset, &s32, sizeof(compat_sigset_t))) - return -EFAULT; - } - return 0; -} - asmlinkage long sys32_alarm(unsigned int seconds) { return alarm_setitimer(seconds); diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index b57e6a43a37..f9c0d3ba9e8 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -14,6 +14,7 @@ header-y += msr.h header-y += mtrr.h header-y += posix_types_32.h header-y += posix_types_64.h +header-y += posix_types_x32.h header-y += prctl.h header-y += processor-flags.h header-y += ptrace-abi.h @@ -24,3 +25,4 @@ header-y += vsyscall.h genhdr-y += unistd_32.h genhdr-y += unistd_64.h +genhdr-y += unistd_x32.h diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 37ad100a221..49331bedc15 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -145,6 +145,12 @@ static inline int alternatives_text_reserved(void *start, void *end) */ #define ASM_OUTPUT2(a...) a +/* + * use this macro if you need clobbers but no inputs in + * alternative_{input,io,call}() + */ +#define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr + struct paravirt_patch_site; #ifdef CONFIG_PARAVIRT void apply_paravirt(struct paravirt_patch_site *start, diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 3ab9bdd87e7..d8541017126 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -11,7 +11,6 @@ #include <linux/atomic.h> #include <asm/fixmap.h> #include <asm/mpspec.h> -#include <asm/system.h> #include <asm/msr.h> #define ARCH_APICTIMER_STOPS_ON_C3 1 @@ -288,6 +287,7 @@ struct apic { int (*probe)(void); int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); + int (*apic_id_valid)(int apicid); int (*apic_id_registered)(void); u32 irq_delivery_mode; @@ -532,6 +532,11 @@ static inline unsigned int read_apic_id(void) return apic->get_apic_id(reg); } +static inline int default_apic_id_valid(int apicid) +{ + return (apicid < 255); +} + extern void default_setup_apic_routing(void); extern struct apic apic_noop; diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index fa13f0ec287..198119910da 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -14,13 +14,52 @@ typedef struct { #define ATOMIC64_INIT(val) { (val) } +#define __ATOMIC64_DECL(sym) void atomic64_##sym(atomic64_t *, ...) +#ifndef ATOMIC64_EXPORT +#define ATOMIC64_DECL_ONE __ATOMIC64_DECL +#else +#define ATOMIC64_DECL_ONE(sym) __ATOMIC64_DECL(sym); \ + ATOMIC64_EXPORT(atomic64_##sym) +#endif + #ifdef CONFIG_X86_CMPXCHG64 -#define ATOMIC64_ALTERNATIVE_(f, g) "call atomic64_" #g "_cx8" +#define __alternative_atomic64(f, g, out, in...) \ + asm volatile("call %P[func]" \ + : out : [func] "i" (atomic64_##g##_cx8), ## in) + +#define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8) #else -#define ATOMIC64_ALTERNATIVE_(f, g) ALTERNATIVE("call atomic64_" #f "_386", "call atomic64_" #g "_cx8", X86_FEATURE_CX8) +#define __alternative_atomic64(f, g, out, in...) \ + alternative_call(atomic64_##f##_386, atomic64_##g##_cx8, \ + X86_FEATURE_CX8, ASM_OUTPUT2(out), ## in) + +#define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8); \ + ATOMIC64_DECL_ONE(sym##_386) + +ATOMIC64_DECL_ONE(add_386); +ATOMIC64_DECL_ONE(sub_386); +ATOMIC64_DECL_ONE(inc_386); +ATOMIC64_DECL_ONE(dec_386); #endif -#define ATOMIC64_ALTERNATIVE(f) ATOMIC64_ALTERNATIVE_(f, f) +#define alternative_atomic64(f, out, in...) \ + __alternative_atomic64(f, f, ASM_OUTPUT2(out), ## in) + +ATOMIC64_DECL(read); +ATOMIC64_DECL(set); +ATOMIC64_DECL(xchg); +ATOMIC64_DECL(add_return); +ATOMIC64_DECL(sub_return); +ATOMIC64_DECL(inc_return); +ATOMIC64_DECL(dec_return); +ATOMIC64_DECL(dec_if_positive); +ATOMIC64_DECL(inc_not_zero); +ATOMIC64_DECL(add_unless); + +#undef ATOMIC64_DECL +#undef ATOMIC64_DECL_ONE +#undef __ATOMIC64_DECL +#undef ATOMIC64_EXPORT /** * atomic64_cmpxchg - cmpxchg atomic64 variable @@ -50,11 +89,9 @@ static inline long long atomic64_xchg(atomic64_t *v, long long n) long long o; unsigned high = (unsigned)(n >> 32); unsigned low = (unsigned)n; - asm volatile(ATOMIC64_ALTERNATIVE(xchg) - : "=A" (o), "+b" (low), "+c" (high) - : "S" (v) - : "memory" - ); + alternative_atomic64(xchg, "=&A" (o), + "S" (v), "b" (low), "c" (high) + : "memory"); return o; } @@ -69,11 +106,9 @@ static inline void atomic64_set(atomic64_t *v, long long i) { unsigned high = (unsigned)(i >> 32); unsigned low = (unsigned)i; - asm volatile(ATOMIC64_ALTERNATIVE(set) - : "+b" (low), "+c" (high) - : "S" (v) - : "eax", "edx", "memory" - ); + alternative_atomic64(set, /* no output */, + "S" (v), "b" (low), "c" (high) + : "eax", "edx", "memory"); } /** @@ -85,10 +120,7 @@ static inline void atomic64_set(atomic64_t *v, long long i) static inline long long atomic64_read(const atomic64_t *v) { long long r; - asm volatile(ATOMIC64_ALTERNATIVE(read) - : "=A" (r), "+c" (v) - : : "memory" - ); + alternative_atomic64(read, "=&A" (r), "c" (v) : "memory"); return r; } @@ -101,10 +133,9 @@ static inline long long atomic64_read(const atomic64_t *v) */ static inline long long atomic64_add_return(long long i, atomic64_t *v) { - asm volatile(ATOMIC64_ALTERNATIVE(add_return) - : "+A" (i), "+c" (v) - : : "memory" - ); + alternative_atomic64(add_return, + ASM_OUTPUT2("+A" (i), "+c" (v)), + ASM_NO_INPUT_CLOBBER("memory")); return i; } @@ -113,32 +144,25 @@ static inline long long atomic64_add_return(long long i, atomic64_t *v) */ static inline long long atomic64_sub_return(long long i, atomic64_t *v) { - asm volatile(ATOMIC64_ALTERNATIVE(sub_return) - : "+A" (i), "+c" (v) - : : "memory" - ); + alternative_atomic64(sub_return, + ASM_OUTPUT2("+A" (i), "+c" (v)), + ASM_NO_INPUT_CLOBBER("memory")); return i; } static inline long long atomic64_inc_return(atomic64_t *v) { long long a; - asm volatile(ATOMIC64_ALTERNATIVE(inc_return) - : "=A" (a) - : "S" (v) - : "memory", "ecx" - ); + alternative_atomic64(inc_return, "=&A" (a), + "S" (v) : "memory", "ecx"); return a; } static inline long long atomic64_dec_return(atomic64_t *v) { long long a; - asm volatile(ATOMIC64_ALTERNATIVE(dec_return) - : "=A" (a) - : "S" (v) - : "memory", "ecx" - ); + alternative_atomic64(dec_return, "=&A" (a), + "S" (v) : "memory", "ecx"); return a; } @@ -151,10 +175,9 @@ static inline long long atomic64_dec_return(atomic64_t *v) */ static inline long long atomic64_add(long long i, atomic64_t *v) { - asm volatile(ATOMIC64_ALTERNATIVE_(add, add_return) - : "+A" (i), "+c" (v) - : : "memory" - ); + __alternative_atomic64(add, add_return, + ASM_OUTPUT2("+A" (i), "+c" (v)), + ASM_NO_INPUT_CLOBBER("memory")); return i; } @@ -167,10 +190,9 @@ static inline long long atomic64_add(long long i, atomic64_t *v) */ static inline long long atomic64_sub(long long i, atomic64_t *v) { - asm volatile(ATOMIC64_ALTERNATIVE_(sub, sub_return) - : "+A" (i), "+c" (v) - : : "memory" - ); + __alternative_atomic64(sub, sub_return, + ASM_OUTPUT2("+A" (i), "+c" (v)), + ASM_NO_INPUT_CLOBBER("memory")); return i; } @@ -196,10 +218,8 @@ static inline int atomic64_sub_and_test(long long i, atomic64_t *v) */ static inline void atomic64_inc(atomic64_t *v) { - asm volatile(ATOMIC64_ALTERNATIVE_(inc, inc_return) - : : "S" (v) - : "memory", "eax", "ecx", "edx" - ); + __alternative_atomic64(inc, inc_return, /* no output */, + "S" (v) : "memory", "eax", "ecx", "edx"); } /** @@ -210,10 +230,8 @@ static inline void atomic64_inc(atomic64_t *v) */ static inline void atomic64_dec(atomic64_t *v) { - asm volatile(ATOMIC64_ALTERNATIVE_(dec, dec_return) - : : "S" (v) - : "memory", "eax", "ecx", "edx" - ); + __alternative_atomic64(dec, dec_return, /* no output */, + "S" (v) : "memory", "eax", "ecx", "edx"); } /** @@ -263,15 +281,15 @@ static inline int atomic64_add_negative(long long i, atomic64_t *v) * @u: ...unless v is equal to u. * * Atomically adds @a to @v, so long as it was not @u. - * Returns the old value of @v. + * Returns non-zero if the add was done, zero otherwise. */ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) { unsigned low = (unsigned)u; unsigned high = (unsigned)(u >> 32); - asm volatile(ATOMIC64_ALTERNATIVE(add_unless) "\n\t" - : "+A" (a), "+c" (v), "+S" (low), "+D" (high) - : : "memory"); + alternative_atomic64(add_unless, + ASM_OUTPUT2("+A" (a), "+c" (low), "+D" (high)), + "S" (v) : "memory"); return (int)a; } @@ -279,26 +297,20 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) static inline int atomic64_inc_not_zero(atomic64_t *v) { int r; - asm volatile(ATOMIC64_ALTERNATIVE(inc_not_zero) - : "=a" (r) - : "S" (v) - : "ecx", "edx", "memory" - ); + alternative_atomic64(inc_not_zero, "=&a" (r), + "S" (v) : "ecx", "edx", "memory"); return r; } static inline long long atomic64_dec_if_positive(atomic64_t *v) { long long r; - asm volatile(ATOMIC64_ALTERNATIVE(dec_if_positive) - : "=A" (r) - : "S" (v) - : "ecx", "memory" - ); + alternative_atomic64(dec_if_positive, "=&A" (r), + "S" (v) : "ecx", "memory"); return r; } -#undef ATOMIC64_ALTERNATIVE -#undef ATOMIC64_ALTERNATIVE_ +#undef alternative_atomic64 +#undef __alternative_atomic64 #endif /* _ASM_X86_ATOMIC64_32_H */ diff --git a/arch/x86/include/asm/auxvec.h b/arch/x86/include/asm/auxvec.h index 1316b4c3542..77203ac352d 100644 --- a/arch/x86/include/asm/auxvec.h +++ b/arch/x86/include/asm/auxvec.h @@ -9,4 +9,11 @@ #endif #define AT_SYSINFO_EHDR 33 +/* entries in ARCH_DLINFO: */ +#if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64) +# define AT_VECTOR_SIZE_ARCH 2 +#else /* else it's non-compat x86-64 */ +# define AT_VECTOR_SIZE_ARCH 1 +#endif + #endif /* _ASM_X86_AUXVEC_H */ diff --git a/arch/x86/um/asm/system.h b/arch/x86/include/asm/barrier.h index a459fd9b759..c6cd358a1ee 100644 --- a/arch/x86/um/asm/system.h +++ b/arch/x86/include/asm/barrier.h @@ -1,31 +1,15 @@ -#ifndef _ASM_X86_SYSTEM_H_ -#define _ASM_X86_SYSTEM_H_ +#ifndef _ASM_X86_BARRIER_H +#define _ASM_X86_BARRIER_H -#include <asm/asm.h> -#include <asm/segment.h> -#include <asm/cpufeature.h> -#include <asm/cmpxchg.h> +#include <asm/alternative.h> #include <asm/nops.h> -#include <linux/kernel.h> -#include <linux/irqflags.h> - -/* entries in ARCH_DLINFO: */ -#ifdef CONFIG_IA32_EMULATION -# define AT_VECTOR_SIZE_ARCH 2 -#else -# define AT_VECTOR_SIZE_ARCH 1 -#endif - -extern unsigned long arch_align_stack(unsigned long sp); - -void default_idle(void); - /* * Force strict CPU ordering. * And yes, this is required on UP too when we're talking * to devices. */ + #ifdef CONFIG_X86_32 /* * Some non-Intel clones support out of order store. wmb() ceases to be a @@ -123,13 +107,10 @@ void default_idle(void); * * (Could use an alternative three way for this if there was one.) */ -static inline void rdtsc_barrier(void) +static __always_inline void rdtsc_barrier(void) { alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); } -extern void *_switch_to(void *prev, void *next, void *last); -#define switch_to(prev, next, last) prev = _switch_to(prev, next, last) - -#endif +#endif /* _ASM_X86_BARRIER_H */ diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index f654d1bb17f..11e1152222d 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -36,4 +36,8 @@ do { \ #endif /* !CONFIG_BUG */ #include <asm-generic/bug.h> + + +extern void show_regs_common(void); + #endif /* _ASM_X86_BUG_H */ diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 4e12668711e..9863ee3747d 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -3,6 +3,7 @@ /* Caches aren't brain-dead on the intel. */ #include <asm-generic/cacheflush.h> +#include <asm/special_insns.h> #ifdef CONFIG_X86_PAT /* diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 30d737ef2a4..d6805798d6f 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -6,7 +6,9 @@ */ #include <linux/types.h> #include <linux/sched.h> +#include <asm/processor.h> #include <asm/user32.h> +#include <asm/unistd.h> #define COMPAT_USER_HZ 100 #define COMPAT_UTS_MACHINE "i686\0\0" @@ -186,7 +188,20 @@ struct compat_shmid64_ds { /* * The type of struct elf_prstatus.pr_reg in compatible core dumps. */ +#ifdef CONFIG_X86_X32_ABI +typedef struct user_regs_struct compat_elf_gregset_t; + +#define PR_REG_SIZE(S) (test_thread_flag(TIF_IA32) ? 68 : 216) +#define PRSTATUS_SIZE(S) (test_thread_flag(TIF_IA32) ? 144 : 296) +#define SET_PR_FPVALID(S,V) \ + do { *(int *) (((void *) &((S)->pr_reg)) + PR_REG_SIZE(0)) = (V); } \ + while (0) + +#define COMPAT_USE_64BIT_TIME \ + (!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)) +#else typedef struct user_regs_struct32 compat_elf_gregset_t; +#endif /* * A pointer passed in from user mode. This should not @@ -208,13 +223,30 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) static inline void __user *arch_compat_alloc_user_space(long len) { - struct pt_regs *regs = task_pt_regs(current); - return (void __user *)regs->sp - len; + compat_uptr_t sp; + + if (test_thread_flag(TIF_IA32)) { + sp = task_pt_regs(current)->sp; + } else { + /* -128 for the x32 ABI redzone */ + sp = percpu_read(old_rsp) - 128; + } + + return (void __user *)round_down(sp - len, 16); +} + +static inline bool is_x32_task(void) +{ +#ifdef CONFIG_X86_X32_ABI + if (task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT) + return true; +#endif + return false; } -static inline int is_compat_task(void) +static inline bool is_compat_task(void) { - return current_thread_info()->status & TS_COMPAT; + return is_ia32_task() || is_x32_task(); } #endif /* _ASM_X86_COMPAT_H */ diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h new file mode 100644 index 00000000000..ff501e511d9 --- /dev/null +++ b/arch/x86/include/asm/cpu_device_id.h @@ -0,0 +1,13 @@ +#ifndef _CPU_DEVICE_ID +#define _CPU_DEVICE_ID 1 + +/* + * Declare drivers belonging to specific x86 CPUs + * Similar in spirit to pci_device_id and related PCI functions + */ + +#include <linux/mod_devicetable.h> + +extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match); + +#endif diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 8d67d428b0f..340ee49961a 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -177,6 +177,7 @@ #define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */ #define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */ #define X86_FEATURE_DTS (7*32+ 7) /* Digital Thermal Sensor */ +#define X86_FEATURE_HW_PSTATE (7*32+ 8) /* AMD HW-PState */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ @@ -199,10 +200,13 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ #define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ #define X86_FEATURE_BMI1 (9*32+ 3) /* 1st group bit manipulation extensions */ +#define X86_FEATURE_HLE (9*32+ 4) /* Hardware Lock Elision */ #define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */ #define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution Protection */ #define X86_FEATURE_BMI2 (9*32+ 8) /* 2nd group bit manipulation extensions */ #define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ +#define X86_FEATURE_INVPCID (9*32+10) /* Invalidate Processor Context ID */ +#define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */ #if defined(__KERNEL__) && !defined(__ASSEMBLY__) diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index b903d5ea394..2d91580bf22 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -78,8 +78,75 @@ */ #ifdef __KERNEL__ +#include <linux/bug.h> + DECLARE_PER_CPU(unsigned long, cpu_dr7); +#ifndef CONFIG_PARAVIRT +/* + * These special macros can be used to get or set a debugging register + */ +#define get_debugreg(var, register) \ + (var) = native_get_debugreg(register) +#define set_debugreg(value, register) \ + native_set_debugreg(register, value) +#endif + +static inline unsigned long native_get_debugreg(int regno) +{ + unsigned long val = 0; /* Damn you, gcc! */ + + switch (regno) { + case 0: + asm("mov %%db0, %0" :"=r" (val)); + break; + case 1: + asm("mov %%db1, %0" :"=r" (val)); + break; + case 2: + asm("mov %%db2, %0" :"=r" (val)); + break; + case 3: + asm("mov %%db3, %0" :"=r" (val)); + break; + case 6: + asm("mov %%db6, %0" :"=r" (val)); + break; + case 7: + asm("mov %%db7, %0" :"=r" (val)); + break; + default: + BUG(); + } + return val; +} + +static inline void native_set_debugreg(int regno, unsigned long value) +{ + switch (regno) { + case 0: + asm("mov %0, %%db0" ::"r" (value)); + break; + case 1: + asm("mov %0, %%db1" ::"r" (value)); + break; + case 2: + asm("mov %0, %%db2" ::"r" (value)); + break; + case 3: + asm("mov %0, %%db3" ::"r" (value)); + break; + case 6: + asm("mov %0, %%db6" ::"r" (value)); + break; + case 7: + asm("mov %0, %%db7" ::"r" (value)); + break; + default: + BUG(); + } +} + static inline void hw_breakpoint_disable(void) { /* Zero the control register for HW Breakpoint */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 00b222fa06b..e95822d683f 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -5,7 +5,6 @@ #include <asm/ldt.h> #include <asm/mmu.h> -#include <linux/mm_types.h> #include <linux/smp.h> static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *info) @@ -100,9 +99,6 @@ static inline int desc_empty(const void *ptr) #define load_TLS(t, cpu) native_load_tls(t, cpu) #define set_ldt native_set_ldt -#ifdef CONFIG_X86_32 -#define load_user_cs_desc native_load_user_cs_desc -#endif /*CONFIG_X86_32*/ #define write_ldt_entry(dt, entry, desc) native_write_ldt_entry(dt, entry, desc) #define write_gdt_entry(dt, entry, desc, type) native_write_gdt_entry(dt, entry, desc, type) @@ -408,25 +404,4 @@ static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist) _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS); } -#ifdef CONFIG_X86_32 -static inline void set_user_cs(struct desc_struct *desc, unsigned long limit) -{ - limit = (limit - 1) / PAGE_SIZE; - desc->a = limit & 0xffff; - desc->b = (limit & 0xf0000) | 0x00c0fb00; -} - -static inline void native_load_user_cs_desc(int cpu, struct mm_struct *mm) -{ - get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS] = (mm)->context.user_cs; -} - -#define arch_add_exec_range arch_add_exec_range -#define arch_remove_exec_range arch_remove_exec_range -#define arch_flush_exec_range arch_flush_exec_range -extern void arch_add_exec_range(struct mm_struct *mm, unsigned long limit); -extern void arch_remove_exec_range(struct mm_struct *mm, unsigned long limit); -extern void arch_flush_exec_range(struct mm_struct *mm); -#endif /* CONFIG_X86_32 */ - #endif /* _ASM_X86_DESC_H */ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index ed3065fd631..4b4331d7193 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -59,7 +59,8 @@ extern int dma_supported(struct device *hwdev, u64 mask); extern int dma_set_mask(struct device *dev, u64 mask); extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t flag); + dma_addr_t *dma_addr, gfp_t flag, + struct dma_attrs *attrs); static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) { @@ -111,9 +112,11 @@ static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp) return gfp; } +#define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL) + static inline void * -dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp) +dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp, struct dma_attrs *attrs) { struct dma_map_ops *ops = get_dma_ops(dev); void *memory; @@ -129,18 +132,21 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, if (!is_device_dma_capable(dev)) return NULL; - if (!ops->alloc_coherent) + if (!ops->alloc) return NULL; - memory = ops->alloc_coherent(dev, size, dma_handle, - dma_alloc_coherent_gfp_flags(dev, gfp)); + memory = ops->alloc(dev, size, dma_handle, + dma_alloc_coherent_gfp_flags(dev, gfp), attrs); debug_dma_alloc_coherent(dev, size, *dma_handle, memory); return memory; } -static inline void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t bus) +#define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL) + +static inline void dma_free_attrs(struct device *dev, size_t size, + void *vaddr, dma_addr_t bus, + struct dma_attrs *attrs) { struct dma_map_ops *ops = get_dma_ops(dev); @@ -150,8 +156,8 @@ static inline void dma_free_coherent(struct device *dev, size_t size, return; debug_dma_free_coherent(dev, size, vaddr, bus); - if (ops->free_coherent) - ops->free_coherent(dev, size, vaddr, bus); + if (ops->free) + ops->free(dev, size, vaddr, bus, attrs); } #endif diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 844f735fd63..c9dcc181d4d 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -95,7 +95,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, extern int add_efi_memmap; extern void efi_set_executable(efi_memory_desc_t *md, bool executable); -extern void efi_memblock_x86_reserve_range(void); +extern int efi_memblock_x86_reserve_range(void); extern void efi_call_phys_prelog(void); extern void efi_call_phys_epilog(void); diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 5f962df30d0..5939f44fe0c 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -84,7 +84,6 @@ extern unsigned int vdso_enabled; (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486)) #include <asm/processor.h> -#include <asm/system.h> #ifdef CONFIG_X86_32 #include <asm/desc.h> @@ -156,7 +155,12 @@ do { \ #define elf_check_arch(x) \ ((x)->e_machine == EM_X86_64) -#define compat_elf_check_arch(x) elf_check_arch_ia32(x) +#define compat_elf_check_arch(x) \ + (elf_check_arch_ia32(x) || (x)->e_machine == EM_X86_64) + +#if __USER32_DS != __USER_DS +# error "The following code assumes __USER32_DS == __USER_DS" +#endif static inline void elf_common_init(struct thread_struct *t, struct pt_regs *regs, const u16 ds) @@ -179,8 +183,9 @@ static inline void elf_common_init(struct thread_struct *t, void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp); #define compat_start_thread start_thread_ia32 -void set_personality_ia32(void); -#define COMPAT_SET_PERSONALITY(ex) set_personality_ia32() +void set_personality_ia32(bool); +#define COMPAT_SET_PERSONALITY(ex) \ + set_personality_ia32((ex).e_machine == EM_X86_64) #define COMPAT_ELF_PLATFORM ("i686") @@ -287,7 +292,7 @@ do { \ #define VDSO_HIGH_BASE 0xffffe000U /* CONFIG_COMPAT_VDSO address */ /* 1GB for 64bit, 8MB for 32bit */ -#define STACK_RND_MASK (test_thread_flag(TIF_IA32) ? 0x7ff : 0x3fffff) +#define STACK_RND_MASK (test_thread_flag(TIF_ADDR32) ? 0x7ff : 0x3fffff) #define ARCH_DLINFO \ do { \ @@ -296,9 +301,20 @@ do { \ (unsigned long)current->mm->context.vdso); \ } while (0) +#define ARCH_DLINFO_X32 \ +do { \ + if (vdso_enabled) \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, \ + (unsigned long)current->mm->context.vdso); \ +} while (0) + #define AT_SYSINFO 32 -#define COMPAT_ARCH_DLINFO ARCH_DLINFO_IA32(sysctl_vsyscall32) +#define COMPAT_ARCH_DLINFO \ +if (test_thread_flag(TIF_X32)) \ + ARCH_DLINFO_X32; \ +else \ + ARCH_DLINFO_IA32(sysctl_vsyscall32) #define COMPAT_ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000) @@ -314,6 +330,8 @@ struct linux_binprm; #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 extern int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp); +extern int x32_setup_additional_pages(struct linux_binprm *bprm, + int uses_interp); extern int syscall32_setup_pages(struct linux_binprm *, int exstack); #define compat_arch_setup_additional_pages syscall32_setup_pages @@ -330,7 +348,7 @@ static inline int mmap_is_ia32(void) return 1; #endif #ifdef CONFIG_IA32_EMULATION - if (test_thread_flag(TIF_IA32)) + if (test_thread_flag(TIF_ADDR32)) return 1; #endif return 0; diff --git a/arch/x86/include/asm/exec.h b/arch/x86/include/asm/exec.h new file mode 100644 index 00000000000..54c2e1db274 --- /dev/null +++ b/arch/x86/include/asm/exec.h @@ -0,0 +1 @@ +/* define arch_align_stack() here */ diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h new file mode 100644 index 00000000000..4fa88154e4d --- /dev/null +++ b/arch/x86/include/asm/fpu-internal.h @@ -0,0 +1,520 @@ +/* + * Copyright (C) 1994 Linus Torvalds + * + * Pentium III FXSR, SSE support + * General FPU state handling cleanups + * Gareth Hughes <gareth@valinux.com>, May 2000 + * x86-64 work by Andi Kleen 2002 + */ + +#ifndef _FPU_INTERNAL_H +#define _FPU_INTERNAL_H + +#include <linux/kernel_stat.h> +#include <linux/regset.h> +#include <linux/slab.h> +#include <asm/asm.h> +#include <asm/cpufeature.h> +#include <asm/processor.h> +#include <asm/sigcontext.h> +#include <asm/user.h> +#include <asm/uaccess.h> +#include <asm/xsave.h> + +extern unsigned int sig_xstate_size; +extern void fpu_init(void); + +DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); + +extern user_regset_active_fn fpregs_active, xfpregs_active; +extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, + xstateregs_get; +extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, + xstateregs_set; + + +/* + * xstateregs_active == fpregs_active. Please refer to the comment + * at the definition of fpregs_active. + */ +#define xstateregs_active fpregs_active + +extern struct _fpx_sw_bytes fx_sw_reserved; +#ifdef CONFIG_IA32_EMULATION +extern unsigned int sig_xstate_ia32_size; +extern struct _fpx_sw_bytes fx_sw_reserved_ia32; +struct _fpstate_ia32; +struct _xstate_ia32; +extern int save_i387_xstate_ia32(void __user *buf); +extern int restore_i387_xstate_ia32(void __user *buf); +#endif + +#ifdef CONFIG_MATH_EMULATION +extern void finit_soft_fpu(struct i387_soft_struct *soft); +#else +static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} +#endif + +#define X87_FSW_ES (1 << 7) /* Exception Summary */ + +static __always_inline __pure bool use_xsaveopt(void) +{ + return static_cpu_has(X86_FEATURE_XSAVEOPT); +} + +static __always_inline __pure bool use_xsave(void) +{ + return static_cpu_has(X86_FEATURE_XSAVE); +} + +static __always_inline __pure bool use_fxsr(void) +{ + return static_cpu_has(X86_FEATURE_FXSR); +} + +extern void __sanitize_i387_state(struct task_struct *); + +static inline void sanitize_i387_state(struct task_struct *tsk) +{ + if (!use_xsaveopt()) + return; + __sanitize_i387_state(tsk); +} + +#ifdef CONFIG_X86_64 +static inline int fxrstor_checking(struct i387_fxsave_struct *fx) +{ + int err; + + /* See comment in fxsave() below. */ +#ifdef CONFIG_AS_FXSAVEQ + asm volatile("1: fxrstorq %[fx]\n\t" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl $-1,%[err]\n" + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + : [err] "=r" (err) + : [fx] "m" (*fx), "0" (0)); +#else + asm volatile("1: rex64/fxrstor (%[fx])\n\t" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl $-1,%[err]\n" + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + : [err] "=r" (err) + : [fx] "R" (fx), "m" (*fx), "0" (0)); +#endif + return err; +} + +static inline int fxsave_user(struct i387_fxsave_struct __user *fx) +{ + int err; + + /* + * Clear the bytes not touched by the fxsave and reserved + * for the SW usage. + */ + err = __clear_user(&fx->sw_reserved, + sizeof(struct _fpx_sw_bytes)); + if (unlikely(err)) + return -EFAULT; + + /* See comment in fxsave() below. */ +#ifdef CONFIG_AS_FXSAVEQ + asm volatile("1: fxsaveq %[fx]\n\t" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl $-1,%[err]\n" + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + : [err] "=r" (err), [fx] "=m" (*fx) + : "0" (0)); +#else + asm volatile("1: rex64/fxsave (%[fx])\n\t" + "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl $-1,%[err]\n" + " jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + : [err] "=r" (err), "=m" (*fx) + : [fx] "R" (fx), "0" (0)); +#endif + if (unlikely(err) && + __clear_user(fx, sizeof(struct i387_fxsave_struct))) + err = -EFAULT; + /* No need to clear here because the caller clears USED_MATH */ + return err; +} + +static inline void fpu_fxsave(struct fpu *fpu) +{ + /* Using "rex64; fxsave %0" is broken because, if the memory operand + uses any extended registers for addressing, a second REX prefix + will be generated (to the assembler, rex64 followed by semicolon + is a separate instruction), and hence the 64-bitness is lost. */ + +#ifdef CONFIG_AS_FXSAVEQ + /* Using "fxsaveq %0" would be the ideal choice, but is only supported + starting with gas 2.16. */ + __asm__ __volatile__("fxsaveq %0" + : "=m" (fpu->state->fxsave)); +#else + /* Using, as a workaround, the properly prefixed form below isn't + accepted by any binutils version so far released, complaining that + the same type of prefix is used twice if an extended register is + needed for addressing (fix submitted to mainline 2005-11-21). + asm volatile("rex64/fxsave %0" + : "=m" (fpu->state->fxsave)); + This, however, we can work around by forcing the compiler to select + an addressing mode that doesn't require extended registers. */ + asm volatile("rex64/fxsave (%[fx])" + : "=m" (fpu->state->fxsave) + : [fx] "R" (&fpu->state->fxsave)); +#endif +} + +#else /* CONFIG_X86_32 */ + +/* perform fxrstor iff the processor has extended states, otherwise frstor */ +static inline int fxrstor_checking(struct i387_fxsave_struct *fx) +{ + /* + * The "nop" is needed to make the instructions the same + * length. + */ + alternative_input( + "nop ; frstor %1", + "fxrstor %1", + X86_FEATURE_FXSR, + "m" (*fx)); + + return 0; +} + +static inline void fpu_fxsave(struct fpu *fpu) +{ + asm volatile("fxsave %[fx]" + : [fx] "=m" (fpu->state->fxsave)); +} + +#endif /* CONFIG_X86_64 */ + +/* + * These must be called with preempt disabled. Returns + * 'true' if the FPU state is still intact. + */ +static inline int fpu_save_init(struct fpu *fpu) +{ + if (use_xsave()) { + fpu_xsave(fpu); + + /* + * xsave header may indicate the init state of the FP. + */ + if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) + return 1; + } else if (use_fxsr()) { + fpu_fxsave(fpu); + } else { + asm volatile("fnsave %[fx]; fwait" + : [fx] "=m" (fpu->state->fsave)); + return 0; + } + + /* + * If exceptions are pending, we need to clear them so + * that we don't randomly get exceptions later. + * + * FIXME! Is this perhaps only true for the old-style + * irq13 case? Maybe we could leave the x87 state + * intact otherwise? + */ + if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) { + asm volatile("fnclex"); + return 0; + } + return 1; +} + +static inline int __save_init_fpu(struct task_struct *tsk) +{ + return fpu_save_init(&tsk->thread.fpu); +} + +static inline int fpu_fxrstor_checking(struct fpu *fpu) +{ + return fxrstor_checking(&fpu->state->fxsave); +} + +static inline int fpu_restore_checking(struct fpu *fpu) +{ + if (use_xsave()) + return fpu_xrstor_checking(fpu); + else + return fpu_fxrstor_checking(fpu); +} + +static inline int restore_fpu_checking(struct task_struct *tsk) +{ + /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception + is pending. Clear the x87 state here by setting it to fixed + values. "m" is a random variable that should be in L1 */ + alternative_input( + ASM_NOP8 ASM_NOP2, + "emms\n\t" /* clear stack tags */ + "fildl %P[addr]", /* set F?P to defined value */ + X86_FEATURE_FXSAVE_LEAK, + [addr] "m" (tsk->thread.fpu.has_fpu)); + + return fpu_restore_checking(&tsk->thread.fpu); +} + +/* + * Software FPU state helpers. Careful: these need to + * be preemption protection *and* they need to be + * properly paired with the CR0.TS changes! + */ +static inline int __thread_has_fpu(struct task_struct *tsk) +{ + return tsk->thread.fpu.has_fpu; +} + +/* Must be paired with an 'stts' after! */ +static inline void __thread_clear_has_fpu(struct task_struct *tsk) +{ + tsk->thread.fpu.has_fpu = 0; + percpu_write(fpu_owner_task, NULL); +} + +/* Must be paired with a 'clts' before! */ +static inline void __thread_set_has_fpu(struct task_struct *tsk) +{ + tsk->thread.fpu.has_fpu = 1; + percpu_write(fpu_owner_task, tsk); +} + +/* + * Encapsulate the CR0.TS handling together with the + * software flag. + * + * These generally need preemption protection to work, + * do try to avoid using these on their own. + */ +static inline void __thread_fpu_end(struct task_struct *tsk) +{ + __thread_clear_has_fpu(tsk); + stts(); +} + +static inline void __thread_fpu_begin(struct task_struct *tsk) +{ + clts(); + __thread_set_has_fpu(tsk); +} + +/* + * FPU state switching for scheduling. + * + * This is a two-stage process: + * + * - switch_fpu_prepare() saves the old state and + * sets the new state of the CR0.TS bit. This is + * done within the context of the old process. + * + * - switch_fpu_finish() restores the new state as + * necessary. + */ +typedef struct { int preload; } fpu_switch_t; + +/* + * FIXME! We could do a totally lazy restore, but we need to + * add a per-cpu "this was the task that last touched the FPU + * on this CPU" variable, and the task needs to have a "I last + * touched the FPU on this CPU" and check them. + * + * We don't do that yet, so "fpu_lazy_restore()" always returns + * false, but some day.. + */ +static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) +{ + return new == percpu_read_stable(fpu_owner_task) && + cpu == new->thread.fpu.last_cpu; +} + +static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu) +{ + fpu_switch_t fpu; + + fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; + if (__thread_has_fpu(old)) { + if (!__save_init_fpu(old)) + cpu = ~0; + old->thread.fpu.last_cpu = cpu; + old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */ + + /* Don't change CR0.TS if we just switch! */ + if (fpu.preload) { + new->fpu_counter++; + __thread_set_has_fpu(new); + prefetch(new->thread.fpu.state); + } else + stts(); + } else { + old->fpu_counter = 0; + old->thread.fpu.last_cpu = ~0; + if (fpu.preload) { + new->fpu_counter++; + if (fpu_lazy_restore(new, cpu)) + fpu.preload = 0; + else + prefetch(new->thread.fpu.state); + __thread_fpu_begin(new); + } + } + return fpu; +} + +/* + * By the time this gets called, we've already cleared CR0.TS and + * given the process the FPU if we are going to preload the FPU + * state - all we need to do is to conditionally restore the register + * state itself. + */ +static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) +{ + if (fpu.preload) { + if (unlikely(restore_fpu_checking(new))) + __thread_fpu_end(new); + } +} + +/* + * Signal frame handlers... + */ +extern int save_i387_xstate(void __user *buf); +extern int restore_i387_xstate(void __user *buf); + +static inline void __clear_fpu(struct task_struct *tsk) +{ + if (__thread_has_fpu(tsk)) { + /* Ignore delayed exceptions from user space */ + asm volatile("1: fwait\n" + "2:\n" + _ASM_EXTABLE(1b, 2b)); + __thread_fpu_end(tsk); + } +} + +/* + * The actual user_fpu_begin/end() functions + * need to be preemption-safe. + * + * NOTE! user_fpu_end() must be used only after you + * have saved the FP state, and user_fpu_begin() must + * be used only immediately before restoring it. + * These functions do not do any save/restore on + * their own. + */ +static inline void user_fpu_end(void) +{ + preempt_disable(); + __thread_fpu_end(current); + preempt_enable(); +} + +static inline void user_fpu_begin(void) +{ + preempt_disable(); + if (!user_has_fpu()) + __thread_fpu_begin(current); + preempt_enable(); +} + +/* + * These disable preemption on their own and are safe + */ +static inline void save_init_fpu(struct task_struct *tsk) +{ + WARN_ON_ONCE(!__thread_has_fpu(tsk)); + preempt_disable(); + __save_init_fpu(tsk); + __thread_fpu_end(tsk); + preempt_enable(); +} + +static inline void clear_fpu(struct task_struct *tsk) +{ + preempt_disable(); + __clear_fpu(tsk); + preempt_enable(); +} + +/* + * i387 state interaction + */ +static inline unsigned short get_fpu_cwd(struct task_struct *tsk) +{ + if (cpu_has_fxsr) { + return tsk->thread.fpu.state->fxsave.cwd; + } else { + return (unsigned short)tsk->thread.fpu.state->fsave.cwd; + } +} + +static inline unsigned short get_fpu_swd(struct task_struct *tsk) +{ + if (cpu_has_fxsr) { + return tsk->thread.fpu.state->fxsave.swd; + } else { + return (unsigned short)tsk->thread.fpu.state->fsave.swd; + } +} + +static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) +{ + if (cpu_has_xmm) { + return tsk->thread.fpu.state->fxsave.mxcsr; + } else { + return MXCSR_DEFAULT; + } +} + +static bool fpu_allocated(struct fpu *fpu) +{ + return fpu->state != NULL; +} + +static inline int fpu_alloc(struct fpu *fpu) +{ + if (fpu_allocated(fpu)) + return 0; + fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); + if (!fpu->state) + return -ENOMEM; + WARN_ON((unsigned long)fpu->state & 15); + return 0; +} + +static inline void fpu_free(struct fpu *fpu) +{ + if (fpu->state) { + kmem_cache_free(task_xstate_cachep, fpu->state); + fpu->state = NULL; + } +} + +static inline void fpu_copy(struct fpu *dst, struct fpu *src) +{ + memcpy(dst->state, src->state, xstate_size); +} + +extern void fpu_finit(struct fpu *fpu); + +#endif diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h index d09bb03653f..71ecbcba1a4 100644 --- a/arch/x86/include/asm/futex.h +++ b/arch/x86/include/asm/futex.h @@ -9,7 +9,6 @@ #include <asm/asm.h> #include <asm/errno.h> #include <asm/processor.h> -#include <asm/system.h> #define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ asm volatile("1:\t" insn "\n" \ diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index da0b3ca815b..382f75d735f 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -7,7 +7,6 @@ typedef struct { unsigned int __softirq_pending; unsigned int __nmi_count; /* arch dependent */ - unsigned int irq0_irqs; #ifdef CONFIG_X86_LOCAL_APIC unsigned int apic_timer_irqs; /* arch dependent */ unsigned int irq_spurious_count; diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h index 3bd04022fd0..302a323b3f6 100644 --- a/arch/x86/include/asm/highmem.h +++ b/arch/x86/include/asm/highmem.h @@ -61,7 +61,7 @@ void *kmap(struct page *page); void kunmap(struct page *page); void *kmap_atomic_prot(struct page *page, pgprot_t prot); -void *__kmap_atomic(struct page *page); +void *kmap_atomic(struct page *page); void __kunmap_atomic(void *kvaddr); void *kmap_atomic_pfn(unsigned long pfn); void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot); diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 247904945d3..257d9cca214 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -13,476 +13,18 @@ #ifndef __ASSEMBLY__ #include <linux/sched.h> -#include <linux/kernel_stat.h> -#include <linux/regset.h> #include <linux/hardirq.h> -#include <linux/slab.h> -#include <asm/asm.h> -#include <asm/cpufeature.h> -#include <asm/processor.h> -#include <asm/sigcontext.h> -#include <asm/user.h> -#include <asm/uaccess.h> -#include <asm/xsave.h> -extern unsigned int sig_xstate_size; -extern void fpu_init(void); -extern void mxcsr_feature_mask_init(void); +struct pt_regs; +struct user_i387_struct; + extern int init_fpu(struct task_struct *child); -extern void math_state_restore(void); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); +extern void math_state_restore(void); -DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); - -extern user_regset_active_fn fpregs_active, xfpregs_active; -extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, - xstateregs_get; -extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, - xstateregs_set; - -/* - * xstateregs_active == fpregs_active. Please refer to the comment - * at the definition of fpregs_active. - */ -#define xstateregs_active fpregs_active - -extern struct _fpx_sw_bytes fx_sw_reserved; -#ifdef CONFIG_IA32_EMULATION -extern unsigned int sig_xstate_ia32_size; -extern struct _fpx_sw_bytes fx_sw_reserved_ia32; -struct _fpstate_ia32; -struct _xstate_ia32; -extern int save_i387_xstate_ia32(void __user *buf); -extern int restore_i387_xstate_ia32(void __user *buf); -#endif - -#ifdef CONFIG_MATH_EMULATION -extern void finit_soft_fpu(struct i387_soft_struct *soft); -#else -static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} -#endif - -#define X87_FSW_ES (1 << 7) /* Exception Summary */ - -static __always_inline __pure bool use_xsaveopt(void) -{ - return static_cpu_has(X86_FEATURE_XSAVEOPT); -} - -static __always_inline __pure bool use_xsave(void) -{ - return static_cpu_has(X86_FEATURE_XSAVE); -} - -static __always_inline __pure bool use_fxsr(void) -{ - return static_cpu_has(X86_FEATURE_FXSR); -} - -extern void __sanitize_i387_state(struct task_struct *); - -static inline void sanitize_i387_state(struct task_struct *tsk) -{ - if (!use_xsaveopt()) - return; - __sanitize_i387_state(tsk); -} - -#ifdef CONFIG_X86_64 -static inline int fxrstor_checking(struct i387_fxsave_struct *fx) -{ - int err; - - /* See comment in fxsave() below. */ -#ifdef CONFIG_AS_FXSAVEQ - asm volatile("1: fxrstorq %[fx]\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err) - : [fx] "m" (*fx), "0" (0)); -#else - asm volatile("1: rex64/fxrstor (%[fx])\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err) - : [fx] "R" (fx), "m" (*fx), "0" (0)); -#endif - return err; -} - -static inline int fxsave_user(struct i387_fxsave_struct __user *fx) -{ - int err; - - /* - * Clear the bytes not touched by the fxsave and reserved - * for the SW usage. - */ - err = __clear_user(&fx->sw_reserved, - sizeof(struct _fpx_sw_bytes)); - if (unlikely(err)) - return -EFAULT; - - /* See comment in fxsave() below. */ -#ifdef CONFIG_AS_FXSAVEQ - asm volatile("1: fxsaveq %[fx]\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err), [fx] "=m" (*fx) - : "0" (0)); -#else - asm volatile("1: rex64/fxsave (%[fx])\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err), "=m" (*fx) - : [fx] "R" (fx), "0" (0)); -#endif - if (unlikely(err) && - __clear_user(fx, sizeof(struct i387_fxsave_struct))) - err = -EFAULT; - /* No need to clear here because the caller clears USED_MATH */ - return err; -} - -static inline void fpu_fxsave(struct fpu *fpu) -{ - /* Using "rex64; fxsave %0" is broken because, if the memory operand - uses any extended registers for addressing, a second REX prefix - will be generated (to the assembler, rex64 followed by semicolon - is a separate instruction), and hence the 64-bitness is lost. */ - -#ifdef CONFIG_AS_FXSAVEQ - /* Using "fxsaveq %0" would be the ideal choice, but is only supported - starting with gas 2.16. */ - __asm__ __volatile__("fxsaveq %0" - : "=m" (fpu->state->fxsave)); -#else - /* Using, as a workaround, the properly prefixed form below isn't - accepted by any binutils version so far released, complaining that - the same type of prefix is used twice if an extended register is - needed for addressing (fix submitted to mainline 2005-11-21). - asm volatile("rex64/fxsave %0" - : "=m" (fpu->state->fxsave)); - This, however, we can work around by forcing the compiler to select - an addressing mode that doesn't require extended registers. */ - asm volatile("rex64/fxsave (%[fx])" - : "=m" (fpu->state->fxsave) - : [fx] "R" (&fpu->state->fxsave)); -#endif -} - -#else /* CONFIG_X86_32 */ - -/* perform fxrstor iff the processor has extended states, otherwise frstor */ -static inline int fxrstor_checking(struct i387_fxsave_struct *fx) -{ - /* - * The "nop" is needed to make the instructions the same - * length. - */ - alternative_input( - "nop ; frstor %1", - "fxrstor %1", - X86_FEATURE_FXSR, - "m" (*fx)); - - return 0; -} - -static inline void fpu_fxsave(struct fpu *fpu) -{ - asm volatile("fxsave %[fx]" - : [fx] "=m" (fpu->state->fxsave)); -} - -#endif /* CONFIG_X86_64 */ - -/* - * These must be called with preempt disabled. Returns - * 'true' if the FPU state is still intact. - */ -static inline int fpu_save_init(struct fpu *fpu) -{ - if (use_xsave()) { - fpu_xsave(fpu); - - /* - * xsave header may indicate the init state of the FP. - */ - if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) - return 1; - } else if (use_fxsr()) { - fpu_fxsave(fpu); - } else { - asm volatile("fnsave %[fx]; fwait" - : [fx] "=m" (fpu->state->fsave)); - return 0; - } - - /* - * If exceptions are pending, we need to clear them so - * that we don't randomly get exceptions later. - * - * FIXME! Is this perhaps only true for the old-style - * irq13 case? Maybe we could leave the x87 state - * intact otherwise? - */ - if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) { - asm volatile("fnclex"); - return 0; - } - return 1; -} - -static inline int __save_init_fpu(struct task_struct *tsk) -{ - return fpu_save_init(&tsk->thread.fpu); -} - -static inline int fpu_fxrstor_checking(struct fpu *fpu) -{ - return fxrstor_checking(&fpu->state->fxsave); -} - -static inline int fpu_restore_checking(struct fpu *fpu) -{ - if (use_xsave()) - return fpu_xrstor_checking(fpu); - else - return fpu_fxrstor_checking(fpu); -} - -static inline int restore_fpu_checking(struct task_struct *tsk) -{ - /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception - is pending. Clear the x87 state here by setting it to fixed - values. "m" is a random variable that should be in L1 */ - alternative_input( - ASM_NOP8 ASM_NOP2, - "emms\n\t" /* clear stack tags */ - "fildl %P[addr]", /* set F?P to defined value */ - X86_FEATURE_FXSAVE_LEAK, - [addr] "m" (tsk->thread.fpu.has_fpu)); - - return fpu_restore_checking(&tsk->thread.fpu); -} - -/* - * Software FPU state helpers. Careful: these need to - * be preemption protection *and* they need to be - * properly paired with the CR0.TS changes! - */ -static inline int __thread_has_fpu(struct task_struct *tsk) -{ - return tsk->thread.fpu.has_fpu; -} - -/* Must be paired with an 'stts' after! */ -static inline void __thread_clear_has_fpu(struct task_struct *tsk) -{ - tsk->thread.fpu.has_fpu = 0; - percpu_write(fpu_owner_task, NULL); -} - -/* Must be paired with a 'clts' before! */ -static inline void __thread_set_has_fpu(struct task_struct *tsk) -{ - tsk->thread.fpu.has_fpu = 1; - percpu_write(fpu_owner_task, tsk); -} - -/* - * Encapsulate the CR0.TS handling together with the - * software flag. - * - * These generally need preemption protection to work, - * do try to avoid using these on their own. - */ -static inline void __thread_fpu_end(struct task_struct *tsk) -{ - __thread_clear_has_fpu(tsk); - stts(); -} - -static inline void __thread_fpu_begin(struct task_struct *tsk) -{ - clts(); - __thread_set_has_fpu(tsk); -} - -/* - * FPU state switching for scheduling. - * - * This is a two-stage process: - * - * - switch_fpu_prepare() saves the old state and - * sets the new state of the CR0.TS bit. This is - * done within the context of the old process. - * - * - switch_fpu_finish() restores the new state as - * necessary. - */ -typedef struct { int preload; } fpu_switch_t; - -/* - * FIXME! We could do a totally lazy restore, but we need to - * add a per-cpu "this was the task that last touched the FPU - * on this CPU" variable, and the task needs to have a "I last - * touched the FPU on this CPU" and check them. - * - * We don't do that yet, so "fpu_lazy_restore()" always returns - * false, but some day.. - */ -static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) -{ - return new == percpu_read_stable(fpu_owner_task) && - cpu == new->thread.fpu.last_cpu; -} - -static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu) -{ - fpu_switch_t fpu; - - fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; - if (__thread_has_fpu(old)) { - if (!__save_init_fpu(old)) - cpu = ~0; - old->thread.fpu.last_cpu = cpu; - old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */ - - /* Don't change CR0.TS if we just switch! */ - if (fpu.preload) { - new->fpu_counter++; - __thread_set_has_fpu(new); - prefetch(new->thread.fpu.state); - } else - stts(); - } else { - old->fpu_counter = 0; - old->thread.fpu.last_cpu = ~0; - if (fpu.preload) { - new->fpu_counter++; - if (fpu_lazy_restore(new, cpu)) - fpu.preload = 0; - else - prefetch(new->thread.fpu.state); - __thread_fpu_begin(new); - } - } - return fpu; -} - -/* - * By the time this gets called, we've already cleared CR0.TS and - * given the process the FPU if we are going to preload the FPU - * state - all we need to do is to conditionally restore the register - * state itself. - */ -static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) -{ - if (fpu.preload) { - if (unlikely(restore_fpu_checking(new))) - __thread_fpu_end(new); - } -} - -/* - * Signal frame handlers... - */ -extern int save_i387_xstate(void __user *buf); -extern int restore_i387_xstate(void __user *buf); - -static inline void __clear_fpu(struct task_struct *tsk) -{ - if (__thread_has_fpu(tsk)) { - /* Ignore delayed exceptions from user space */ - asm volatile("1: fwait\n" - "2:\n" - _ASM_EXTABLE(1b, 2b)); - __thread_fpu_end(tsk); - } -} - -/* - * Were we in an interrupt that interrupted kernel mode? - * - * We can do a kernel_fpu_begin/end() pair *ONLY* if that - * pair does nothing at all: the thread must not have fpu (so - * that we don't try to save the FPU state), and TS must - * be set (so that the clts/stts pair does nothing that is - * visible in the interrupted kernel thread). - */ -static inline bool interrupted_kernel_fpu_idle(void) -{ - return !__thread_has_fpu(current) && - (read_cr0() & X86_CR0_TS); -} - -/* - * Were we in user mode (or vm86 mode) when we were - * interrupted? - * - * Doing kernel_fpu_begin/end() is ok if we are running - * in an interrupt context from user mode - we'll just - * save the FPU state as required. - */ -static inline bool interrupted_user_mode(void) -{ - struct pt_regs *regs = get_irq_regs(); - return regs && user_mode_vm(regs); -} - -/* - * Can we use the FPU in kernel mode with the - * whole "kernel_fpu_begin/end()" sequence? - * - * It's always ok in process context (ie "not interrupt") - * but it is sometimes ok even from an irq. - */ -static inline bool irq_fpu_usable(void) -{ - return !in_interrupt() || - interrupted_user_mode() || - interrupted_kernel_fpu_idle(); -} - -static inline void kernel_fpu_begin(void) -{ - struct task_struct *me = current; - - WARN_ON_ONCE(!irq_fpu_usable()); - preempt_disable(); - if (__thread_has_fpu(me)) { - __save_init_fpu(me); - __thread_clear_has_fpu(me); - /* We do 'stts()' in kernel_fpu_end() */ - } else { - percpu_write(fpu_owner_task, NULL); - clts(); - } -} - -static inline void kernel_fpu_end(void) -{ - stts(); - preempt_enable(); -} +extern bool irq_fpu_usable(void); +extern void kernel_fpu_begin(void); +extern void kernel_fpu_end(void); /* * Some instructions like VIA's padlock instructions generate a spurious @@ -524,126 +66,13 @@ static inline void irq_ts_restore(int TS_state) * we can just assume we have FPU access - typically * to save the FP state - we'll just take a #NM * fault and get the FPU access back. - * - * The actual user_fpu_begin/end() functions - * need to be preemption-safe, though. - * - * NOTE! user_fpu_end() must be used only after you - * have saved the FP state, and user_fpu_begin() must - * be used only immediately before restoring it. - * These functions do not do any save/restore on - * their own. */ static inline int user_has_fpu(void) { - return __thread_has_fpu(current); -} - -static inline void user_fpu_end(void) -{ - preempt_disable(); - __thread_fpu_end(current); - preempt_enable(); -} - -static inline void user_fpu_begin(void) -{ - preempt_disable(); - if (!user_has_fpu()) - __thread_fpu_begin(current); - preempt_enable(); -} - -/* - * These disable preemption on their own and are safe - */ -static inline void save_init_fpu(struct task_struct *tsk) -{ - WARN_ON_ONCE(!__thread_has_fpu(tsk)); - preempt_disable(); - __save_init_fpu(tsk); - __thread_fpu_end(tsk); - preempt_enable(); -} - -static inline void unlazy_fpu(struct task_struct *tsk) -{ - preempt_disable(); - if (__thread_has_fpu(tsk)) { - __save_init_fpu(tsk); - __thread_fpu_end(tsk); - } else - tsk->fpu_counter = 0; - preempt_enable(); -} - -static inline void clear_fpu(struct task_struct *tsk) -{ - preempt_disable(); - __clear_fpu(tsk); - preempt_enable(); -} - -/* - * i387 state interaction - */ -static inline unsigned short get_fpu_cwd(struct task_struct *tsk) -{ - if (cpu_has_fxsr) { - return tsk->thread.fpu.state->fxsave.cwd; - } else { - return (unsigned short)tsk->thread.fpu.state->fsave.cwd; - } -} - -static inline unsigned short get_fpu_swd(struct task_struct *tsk) -{ - if (cpu_has_fxsr) { - return tsk->thread.fpu.state->fxsave.swd; - } else { - return (unsigned short)tsk->thread.fpu.state->fsave.swd; - } -} - -static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) -{ - if (cpu_has_xmm) { - return tsk->thread.fpu.state->fxsave.mxcsr; - } else { - return MXCSR_DEFAULT; - } -} - -static bool fpu_allocated(struct fpu *fpu) -{ - return fpu->state != NULL; -} - -static inline int fpu_alloc(struct fpu *fpu) -{ - if (fpu_allocated(fpu)) - return 0; - fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); - if (!fpu->state) - return -ENOMEM; - WARN_ON((unsigned long)fpu->state & 15); - return 0; -} - -static inline void fpu_free(struct fpu *fpu) -{ - if (fpu->state) { - kmem_cache_free(task_xstate_cachep, fpu->state); - fpu->state = NULL; - } -} - -static inline void fpu_copy(struct fpu *dst, struct fpu *src) -{ - memcpy(dst->state, src->state, xstate_size); + return current->thread.fpu.has_fpu; } -extern void fpu_finit(struct fpu *fpu); +extern void unlazy_fpu(struct task_struct *tsk); #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index 1f7e6251728..ee52760549f 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -43,6 +43,15 @@ struct ucontext_ia32 { compat_sigset_t uc_sigmask; /* mask last for extensibility */ }; +struct ucontext_x32 { + unsigned int uc_flags; + unsigned int uc_link; + stack_ia32_t uc_stack; + unsigned int uc__pad0; /* needed for alignment */ + struct sigcontext uc_mcontext; /* the 64-bit sigcontext type */ + compat_sigset_t uc_sigmask; /* mask last for extensibility */ +}; + /* This matches struct stat64 in glibc2.2, hence the absolutely * insane amounts of padding around dev_t's. */ @@ -116,6 +125,15 @@ typedef struct compat_siginfo { compat_clock_t _stime; } _sigchld; + /* SIGCHLD (x32 version) */ + struct { + unsigned int _pid; /* which child */ + unsigned int _uid; /* sender's uid */ + int _status; /* exit code */ + compat_s64 _utime; + compat_s64 _stime; + } _sigchld_x32; + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ struct { unsigned int _addr; /* faulting insn/memory ref. */ diff --git a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h index f49253d7571..c5d1785373e 100644 --- a/arch/x86/include/asm/idle.h +++ b/arch/x86/include/asm/idle.h @@ -14,6 +14,7 @@ void exit_idle(void); #else /* !CONFIG_X86_64 */ static inline void enter_idle(void) { } static inline void exit_idle(void) { } +static inline void __exit_idle(void) { } #endif /* CONFIG_X86_64 */ void amd_e400_remove_cpu(int cpu); diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h index 205b063e3e3..74a2e312e8a 100644 --- a/arch/x86/include/asm/inat.h +++ b/arch/x86/include/asm/inat.h @@ -97,11 +97,12 @@ /* Attribute search APIs */ extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); +extern int inat_get_last_prefix_id(insn_byte_t last_pfx); extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, - insn_byte_t last_pfx, + int lpfx_id, insn_attr_t esc_attr); extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, - insn_byte_t last_pfx, + int lpfx_id, insn_attr_t esc_attr); extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 74df3f1eddf..48eb30a8606 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -96,12 +96,6 @@ struct insn { #define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ -/* The last prefix is needed for two-byte and three-byte opcodes */ -static inline insn_byte_t insn_last_prefix(struct insn *insn) -{ - return insn->prefixes.bytes[3]; -} - extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); extern void insn_get_prefixes(struct insn *insn); extern void insn_get_opcode(struct insn *insn); @@ -160,6 +154,18 @@ static inline insn_byte_t insn_vex_p_bits(struct insn *insn) return X86_VEX_P(insn->vex_prefix.bytes[2]); } +/* Get the last prefix id from last prefix or VEX prefix */ +static inline int insn_last_prefix_id(struct insn *insn) +{ + if (insn_is_avx(insn)) + return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */ + + if (insn->prefixes.bytes[3]) + return inat_get_last_prefix_id(insn->prefixes.bytes[3]); + + return 0; +} + /* Offset of each field from kaddr */ static inline int insn_offset_rex_prefix(struct insn *insn) { diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index ba1b11ac1cf..2c4943de515 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -5,7 +5,6 @@ #include <asm/mpspec.h> #include <asm/apicdef.h> #include <asm/irq_vectors.h> -#include <asm/x86_init.h> /* * Intel IO-APIC support for SMP and UP systems. @@ -22,6 +21,15 @@ #define IO_APIC_REDIR_LEVEL_TRIGGER (1 << 15) #define IO_APIC_REDIR_MASKED (1 << 16) +struct io_apic_ops { + void (*init) (void); + unsigned int (*read) (unsigned int apic, unsigned int reg); + void (*write) (unsigned int apic, unsigned int reg, unsigned int value); + void (*modify)(unsigned int apic, unsigned int reg, unsigned int value); +}; + +void __init set_io_apic_ops(const struct io_apic_ops *); + /* * The structure of the IO-APIC: */ @@ -148,6 +156,7 @@ struct io_apic_irq_attr; extern int io_apic_set_pci_routing(struct device *dev, int irq, struct io_apic_irq_attr *irq_attr); void setup_IO_APIC_irq_extra(u32 gsi); +extern void ioapic_and_gsi_init(void); extern void ioapic_insert_resources(void); int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr); @@ -176,35 +185,12 @@ extern void mp_save_irq(struct mpc_intsrc *m); extern void disable_ioapic_support(void); - -void __init native_ioapic_init_mappings(void); -unsigned int native_ioapic_read(unsigned int apic, unsigned int reg); -void native_ioapic_write(unsigned int apic, unsigned int reg, - unsigned int val); -void native_ioapic_modify(unsigned int apic, unsigned int reg, - unsigned int val); - -static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) -{ - return x86_ioapic.read(apic, reg); -} - -static inline void io_apic_write(unsigned int apic, unsigned int reg, - unsigned int value) -{ - x86_ioapic.write(apic, reg, value); -} - -static inline void io_apic_modify(unsigned int apic, unsigned int reg, - unsigned int value) -{ - x86_ioapic.modify(apic, reg, value); -} #else /* !CONFIG_X86_IO_APIC */ #define io_apic_assign_pci_irqs 0 #define setup_ioapic_ids_from_mpc x86_init_noop static const int timer_through_8259 = 0; +static inline void ioapic_and_gsi_init(void) { } static inline void ioapic_insert_resources(void) { } #define gsi_top (NR_IRQS_LEGACY) static inline int mp_find_ioapic(u32 gsi) { return 0; } @@ -226,10 +212,6 @@ static inline int restore_ioapic_entries(void) static inline void mp_save_irq(struct mpc_intsrc *m) { }; static inline void disable_ioapic_support(void) { } -#define native_ioapic_init_mappings NULL -#define native_ioapic_read NULL -#define native_ioapic_write NULL -#define native_ioapic_modify NULL #endif #endif /* _ASM_X86_IO_APIC_H */ diff --git a/arch/x86/include/asm/irq_controller.h b/arch/x86/include/asm/irq_controller.h deleted file mode 100644 index 423bbbddf36..00000000000 --- a/arch/x86/include/asm/irq_controller.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef __IRQ_CONTROLLER__ -#define __IRQ_CONTROLLER__ - -struct irq_domain { - int (*xlate)(struct irq_domain *h, const u32 *intspec, u32 intsize, - u32 *out_hwirq, u32 *out_type); - void *priv; - struct device_node *controller; - struct list_head l; -}; - -#endif diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index a32b18ce6ea..3a16c1483b4 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -9,12 +9,12 @@ #define JUMP_LABEL_NOP_SIZE 5 -#define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" +#define STATIC_KEY_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" -static __always_inline bool arch_static_branch(struct jump_label_key *key) +static __always_inline bool arch_static_branch(struct static_key *key) { asm goto("1:" - JUMP_LABEL_INITIAL_NOP + STATIC_KEY_INITIAL_NOP ".pushsection __jump_table, \"aw\" \n\t" _ASM_ALIGN "\n\t" _ASM_PTR "1b, %l[l_yes], %c0 \n\t" diff --git a/arch/x86/include/asm/kgdb.h b/arch/x86/include/asm/kgdb.h index 77e95f54570..332f98c9111 100644 --- a/arch/x86/include/asm/kgdb.h +++ b/arch/x86/include/asm/kgdb.h @@ -64,11 +64,15 @@ enum regnames { GDB_PS, /* 17 */ GDB_CS, /* 18 */ GDB_SS, /* 19 */ + GDB_DS, /* 20 */ + GDB_ES, /* 21 */ + GDB_FS, /* 22 */ + GDB_GS, /* 23 */ }; #define GDB_ORIG_AX 57 -#define DBG_MAX_REG_NUM 20 -/* 17 64 bit regs and 3 32 bit regs */ -#define NUMREGBYTES ((17 * 8) + (3 * 4)) +#define DBG_MAX_REG_NUM 24 +/* 17 64 bit regs and 5 32 bit regs */ +#define NUMREGBYTES ((17 * 8) + (5 * 4)) #endif /* ! CONFIG_X86_32 */ static inline void arch_kgdb_breakpoint(void) diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 4d8dcbdfc12..e7d1c194d27 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -321,4 +321,8 @@ struct kvm_xcrs { __u64 padding[16]; }; +/* definition of registers in kvm_run */ +struct kvm_sync_regs { +}; + #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 7b9cfc4878a..c222e1a1b12 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -176,6 +176,7 @@ struct x86_emulate_ops { void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr); int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val); + void (*set_rflags)(struct x86_emulate_ctxt *ctxt, ulong val); int (*cpl)(struct x86_emulate_ctxt *ctxt); int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest); int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); @@ -388,7 +389,7 @@ bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt); #define EMULATION_INTERCEPTED 2 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); int emulator_task_switch(struct x86_emulate_ctxt *ctxt, - u16 tss_selector, int reason, + u16 tss_selector, int idt_index, int reason, bool has_error_code, u32 error_code); int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq); #endif /* _ASM_X86_KVM_X86_EMULATE_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 52d6640a5ca..e216ba066e7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -29,7 +29,7 @@ #include <asm/msr-index.h> #define KVM_MAX_VCPUS 254 -#define KVM_SOFT_MAX_VCPUS 64 +#define KVM_SOFT_MAX_VCPUS 160 #define KVM_MEMORY_SLOTS 32 /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 4 @@ -181,13 +181,6 @@ struct kvm_mmu_memory_cache { void *objects[KVM_NR_MEM_OBJS]; }; -#define NR_PTE_CHAIN_ENTRIES 5 - -struct kvm_pte_chain { - u64 *parent_ptes[NR_PTE_CHAIN_ENTRIES]; - struct hlist_node link; -}; - /* * kvm_mmu_page_role, below, is defined as: * @@ -427,12 +420,16 @@ struct kvm_vcpu_arch { u64 last_guest_tsc; u64 last_kernel_ns; - u64 last_tsc_nsec; - u64 last_tsc_write; - u32 virtual_tsc_khz; + u64 last_host_tsc; + u64 tsc_offset_adjustment; + u64 this_tsc_nsec; + u64 this_tsc_write; + u8 this_tsc_generation; bool tsc_catchup; - u32 tsc_catchup_mult; - s8 tsc_catchup_shift; + bool tsc_always_catchup; + s8 virtual_tsc_shift; + u32 virtual_tsc_mult; + u32 virtual_tsc_khz; atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ unsigned nmi_pending; /* NMI queued after currently running handler */ @@ -478,6 +475,21 @@ struct kvm_vcpu_arch { u32 id; bool send_user_only; } apf; + + /* OSVW MSRs (AMD only) */ + struct { + u64 length; + u64 status; + } osvw; +}; + +struct kvm_lpage_info { + unsigned long rmap_pde; + int write_count; +}; + +struct kvm_arch_memory_slot { + struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; }; struct kvm_arch { @@ -511,8 +523,12 @@ struct kvm_arch { s64 kvmclock_offset; raw_spinlock_t tsc_write_lock; u64 last_tsc_nsec; - u64 last_tsc_offset; u64 last_tsc_write; + u32 last_tsc_khz; + u64 cur_tsc_nsec; + u64 cur_tsc_write; + u64 cur_tsc_offset; + u8 cur_tsc_generation; struct kvm_xen_hvm_config xen_hvm_config; @@ -644,7 +660,7 @@ struct kvm_x86_ops { u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); int (*get_lpage_level)(void); bool (*rdtscp_supported)(void); - void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment); + void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment, bool host); void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); @@ -652,7 +668,7 @@ struct kvm_x86_ops { bool (*has_wbinvd_exit)(void); - void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz); + void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale); void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); @@ -674,6 +690,17 @@ struct kvm_arch_async_pf { extern struct kvm_x86_ops *kvm_x86_ops; +static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, + s64 adjustment) +{ + kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, false); +} + +static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) +{ + kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, true); +} + int kvm_mmu_module_init(void); void kvm_mmu_module_exit(void); @@ -741,8 +768,8 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); -int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, - bool has_error_code, u32 error_code); +int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, + int reason, bool has_error_code, u32 error_code); int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 734c3767cfa..183922e13de 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -170,6 +170,9 @@ static inline int kvm_para_available(void) unsigned int eax, ebx, ecx, edx; char signature[13]; + if (boot_cpu_data.cpuid_level < 0) + return 0; /* So we don't blow up on old processors */ + cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx); memcpy(signature + 0, &ebx, 4); memcpy(signature + 4, &ecx, 4); diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h index 9cdae5d47e8..c8bed0da434 100644 --- a/arch/x86/include/asm/local.h +++ b/arch/x86/include/asm/local.h @@ -3,7 +3,6 @@ #include <linux/percpu.h> -#include <asm/system.h> #include <linux/atomic.h> #include <asm/asm.h> diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h index 0e8e85bb7c5..d354fb781c5 100644 --- a/arch/x86/include/asm/mc146818rtc.h +++ b/arch/x86/include/asm/mc146818rtc.h @@ -5,7 +5,6 @@ #define _ASM_X86_MC146818RTC_H #include <asm/io.h> -#include <asm/system.h> #include <asm/processor.h> #include <linux/mc146818rtc.h> diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 6aefb14cbbc..441520e4174 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -151,7 +151,7 @@ static inline void enable_p5_mce(void) {} void mce_setup(struct mce *m); void mce_log(struct mce *m); -extern struct device *mce_device[CONFIG_NR_CPUS]; +DECLARE_PER_CPU(struct device *, mce_device); /* * Maximum banks number. diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index aba94f004b7..5f55e696276 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -7,9 +7,6 @@ /* * The x86 doesn't have a mmu context, but * we put the segment information here. - * - * exec_limit is used to track the range PROT_EXEC - * mappings span. */ typedef struct { void *ldt; @@ -22,11 +19,6 @@ typedef struct { struct mutex lock; void *vdso; - -#ifdef CONFIG_X86_32 - struct desc_struct user_cs; - unsigned long exec_limit; -#endif } mm_context_t; #ifdef CONFIG_SMP diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h index 0a0a9546043..fc18bf3ce7c 100644 --- a/arch/x86/include/asm/mrst.h +++ b/arch/x86/include/asm/mrst.h @@ -26,8 +26,8 @@ extern struct sfi_rtc_table_entry sfi_mrtc_array[]; * identified via MSRs. */ enum mrst_cpu_type { - MRST_CPU_CHIP_LINCROFT = 1, - MRST_CPU_CHIP_PENWELL, + /* 1 was Moorestown */ + MRST_CPU_CHIP_PENWELL = 2, }; extern enum mrst_cpu_type __mrst_cpu_chip; diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index a6962d9161a..ccb805966f6 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -56,6 +56,13 @@ #define MSR_OFFCORE_RSP_0 0x000001a6 #define MSR_OFFCORE_RSP_1 0x000001a7 +#define MSR_LBR_SELECT 0x000001c8 +#define MSR_LBR_TOS 0x000001c9 +#define MSR_LBR_NHM_FROM 0x00000680 +#define MSR_LBR_NHM_TO 0x000006c0 +#define MSR_LBR_CORE_FROM 0x00000040 +#define MSR_LBR_CORE_TO 0x00000060 + #define MSR_IA32_PEBS_ENABLE 0x000003f1 #define MSR_IA32_DS_AREA 0x00000600 #define MSR_IA32_PERF_CAPABILITIES 0x00000345 diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index 4365ffdb461..7e3f17f92c6 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -29,18 +29,18 @@ #define MTRR_IOCTL_BASE 'M' -struct mtrr_sentry { - unsigned long base; /* Base address */ - unsigned int size; /* Size of region */ - unsigned int type; /* Type of region */ -}; - /* Warning: this structure has a different order from i386 on x86-64. The 32bit emulation code takes care of that. But you need to use this for 64bit, otherwise your X server will break. */ #ifdef __i386__ +struct mtrr_sentry { + unsigned long base; /* Base address */ + unsigned int size; /* Size of region */ + unsigned int type; /* Type of region */ +}; + struct mtrr_gentry { unsigned int regnum; /* Register number */ unsigned long base; /* Base address */ @@ -50,12 +50,20 @@ struct mtrr_gentry { #else /* __i386__ */ +struct mtrr_sentry { + __u64 base; /* Base address */ + __u32 size; /* Size of region */ + __u32 type; /* Type of region */ +}; + struct mtrr_gentry { - unsigned long base; /* Base address */ - unsigned int size; /* Size of region */ - unsigned int regnum; /* Register number */ - unsigned int type; /* Type of region */ + __u64 base; /* Base address */ + __u32 size; /* Size of region */ + __u32 regnum; /* Register number */ + __u32 type; /* Type of region */ + __u32 _pad; /* Unused */ }; + #endif /* !__i386__ */ struct mtrr_var_range { diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index bce688d54c1..e21fdd10479 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -55,7 +55,6 @@ extern unsigned long init_memory_mapping(unsigned long start, unsigned long end); extern void initmem_init(void); -extern void free_initmem(void); #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index cc4ad605c6f..aa0f9130836 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -10,6 +10,7 @@ #include <asm/paravirt_types.h> #ifndef __ASSEMBLY__ +#include <linux/bug.h> #include <linux/types.h> #include <linux/cpumask.h> @@ -230,9 +231,9 @@ static inline unsigned long long paravirt_sched_clock(void) return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); } -struct jump_label_key; -extern struct jump_label_key paravirt_steal_enabled; -extern struct jump_label_key paravirt_steal_rq_enabled; +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; static inline u64 paravirt_steal_clock(int cpu) { @@ -298,12 +299,6 @@ static inline void set_ldt(const void *addr, unsigned entries) { PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries); } -#ifdef CONFIG_X86_32 -static inline void load_user_cs_desc(unsigned int cpu, struct mm_struct *mm) -{ - PVOP_VCALL2(pv_cpu_ops.load_user_cs_desc, cpu, mm); -} -#endif /*CONFIG_X86_32*/ static inline void store_gdt(struct desc_ptr *dtr) { PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr); diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index cca421e9355..8e8b9a4987e 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -125,9 +125,6 @@ struct pv_cpu_ops { void (*store_gdt)(struct desc_ptr *); void (*store_idt)(struct desc_ptr *); void (*set_ldt)(const void *desc, unsigned entries); -#ifdef CONFIG_X86_32 - void (*load_user_cs_desc)(int cpu, struct mm_struct *mm); -#endif unsigned long (*store_tr)(void); void (*load_tls)(struct thread_struct *t, unsigned int cpu); #ifdef CONFIG_X86_64 diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 461ce432b1c..2291895b183 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -23,6 +23,7 @@ #define ARCH_PERFMON_EVENTSEL_USR (1ULL << 16) #define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17) #define ARCH_PERFMON_EVENTSEL_EDGE (1ULL << 18) +#define ARCH_PERFMON_EVENTSEL_PIN_CONTROL (1ULL << 19) #define ARCH_PERFMON_EVENTSEL_INT (1ULL << 20) #define ARCH_PERFMON_EVENTSEL_ANY (1ULL << 21) #define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) @@ -188,8 +189,6 @@ extern u32 get_ibs_caps(void); #ifdef CONFIG_PERF_EVENTS extern void perf_events_lapic_init(void); -#define PERF_EVENT_INDEX_OFFSET 0 - /* * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups. * This flag is otherwise unused and ABI specified to be 0, so nobody should diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index effff47a3c8..43876f16caf 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -31,6 +31,56 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte) ptep->pte_low = pte.pte_low; } +#define pmd_read_atomic pmd_read_atomic +/* + * pte_offset_map_lock on 32bit PAE kernels was reading the pmd_t with + * a "*pmdp" dereference done by gcc. Problem is, in certain places + * where pte_offset_map_lock is called, concurrent page faults are + * allowed, if the mmap_sem is hold for reading. An example is mincore + * vs page faults vs MADV_DONTNEED. On the page fault side + * pmd_populate rightfully does a set_64bit, but if we're reading the + * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen + * because gcc will not read the 64bit of the pmd atomically. To fix + * this all places running pmd_offset_map_lock() while holding the + * mmap_sem in read mode, shall read the pmdp pointer using this + * function to know if the pmd is null nor not, and in turn to know if + * they can run pmd_offset_map_lock or pmd_trans_huge or other pmd + * operations. + * + * Without THP if the mmap_sem is hold for reading, the + * pmd can only transition from null to not null while pmd_read_atomic runs. + * So there's no need of literally reading it atomically. + * + * With THP if the mmap_sem is hold for reading, the pmd can become + * THP or null or point to a pte (and in turn become "stable") at any + * time under pmd_read_atomic, so it's mandatory to read it atomically + * with cmpxchg8b. + */ +#ifndef CONFIG_TRANSPARENT_HUGEPAGE +static inline pmd_t pmd_read_atomic(pmd_t *pmdp) +{ + pmdval_t ret; + u32 *tmp = (u32 *)pmdp; + + ret = (pmdval_t) (*tmp); + if (ret) { + /* + * If the low part is null, we must not read the high part + * or we can end up with a partial pmd. + */ + smp_rmb(); + ret |= ((pmdval_t)*(tmp + 1)) << 32; + } + + return (pmd_t) { ret }; +} +#else /* CONFIG_TRANSPARENT_HUGEPAGE */ +static inline pmd_t pmd_read_atomic(pmd_t *pmdp) +{ + return (pmd_t) { atomic64_read((atomic64_t *)pmdp) }; +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) { set_64bit((unsigned long long *)(ptep), native_pte_val(pte)); diff --git a/arch/x86/include/asm/posix_types.h b/arch/x86/include/asm/posix_types.h index bb7133dc155..7ef7c3020e5 100644 --- a/arch/x86/include/asm/posix_types.h +++ b/arch/x86/include/asm/posix_types.h @@ -7,6 +7,8 @@ #else # ifdef __i386__ # include "posix_types_32.h" +# elif defined(__ILP32__) +# include "posix_types_x32.h" # else # include "posix_types_64.h" # endif diff --git a/arch/x86/include/asm/posix_types_32.h b/arch/x86/include/asm/posix_types_32.h index f7d9adf82e5..99f262e04b9 100644 --- a/arch/x86/include/asm/posix_types_32.h +++ b/arch/x86/include/asm/posix_types_32.h @@ -7,79 +7,22 @@ * assume GCC is being used. */ -typedef unsigned long __kernel_ino_t; typedef unsigned short __kernel_mode_t; +#define __kernel_mode_t __kernel_mode_t + typedef unsigned short __kernel_nlink_t; -typedef long __kernel_off_t; -typedef int __kernel_pid_t; +#define __kernel_nlink_t __kernel_nlink_t + typedef unsigned short __kernel_ipc_pid_t; +#define __kernel_ipc_pid_t __kernel_ipc_pid_t + typedef unsigned short __kernel_uid_t; typedef unsigned short __kernel_gid_t; -typedef unsigned int __kernel_size_t; -typedef int __kernel_ssize_t; -typedef int __kernel_ptrdiff_t; -typedef long __kernel_time_t; -typedef long __kernel_suseconds_t; -typedef long __kernel_clock_t; -typedef int __kernel_timer_t; -typedef int __kernel_clockid_t; -typedef int __kernel_daddr_t; -typedef char * __kernel_caddr_t; -typedef unsigned short __kernel_uid16_t; -typedef unsigned short __kernel_gid16_t; -typedef unsigned int __kernel_uid32_t; -typedef unsigned int __kernel_gid32_t; +#define __kernel_uid_t __kernel_uid_t -typedef unsigned short __kernel_old_uid_t; -typedef unsigned short __kernel_old_gid_t; typedef unsigned short __kernel_old_dev_t; +#define __kernel_old_dev_t __kernel_old_dev_t -#ifdef __GNUC__ -typedef long long __kernel_loff_t; -#endif - -typedef struct { - int val[2]; -} __kernel_fsid_t; - -#if defined(__KERNEL__) - -#undef __FD_SET -#define __FD_SET(fd,fdsetp) \ - asm volatile("btsl %1,%0": \ - "+m" (*(__kernel_fd_set *)(fdsetp)) \ - : "r" ((int)(fd))) - -#undef __FD_CLR -#define __FD_CLR(fd,fdsetp) \ - asm volatile("btrl %1,%0": \ - "+m" (*(__kernel_fd_set *)(fdsetp)) \ - : "r" ((int) (fd))) - -#undef __FD_ISSET -#define __FD_ISSET(fd,fdsetp) \ - (__extension__ \ - ({ \ - unsigned char __result; \ - asm volatile("btl %1,%2 ; setb %0" \ - : "=q" (__result) \ - : "r" ((int)(fd)), \ - "m" (*(__kernel_fd_set *)(fdsetp))); \ - __result; \ -})) - -#undef __FD_ZERO -#define __FD_ZERO(fdsetp) \ -do { \ - int __d0, __d1; \ - asm volatile("cld ; rep ; stosl" \ - : "=m" (*(__kernel_fd_set *)(fdsetp)), \ - "=&c" (__d0), "=&D" (__d1) \ - : "a" (0), "1" (__FDSET_LONGS), \ - "2" ((__kernel_fd_set *)(fdsetp)) \ - : "memory"); \ -} while (0) - -#endif /* defined(__KERNEL__) */ +#include <asm-generic/posix_types.h> #endif /* _ASM_X86_POSIX_TYPES_32_H */ diff --git a/arch/x86/include/asm/posix_types_64.h b/arch/x86/include/asm/posix_types_64.h index eb8d2d92b63..cba0c1ead16 100644 --- a/arch/x86/include/asm/posix_types_64.h +++ b/arch/x86/include/asm/posix_types_64.h @@ -7,113 +7,13 @@ * assume GCC is being used. */ -typedef unsigned long __kernel_ino_t; -typedef unsigned int __kernel_mode_t; -typedef unsigned long __kernel_nlink_t; -typedef long __kernel_off_t; -typedef int __kernel_pid_t; -typedef int __kernel_ipc_pid_t; -typedef unsigned int __kernel_uid_t; -typedef unsigned int __kernel_gid_t; -typedef unsigned long __kernel_size_t; -typedef long __kernel_ssize_t; -typedef long __kernel_ptrdiff_t; -typedef long __kernel_time_t; -typedef long __kernel_suseconds_t; -typedef long __kernel_clock_t; -typedef int __kernel_timer_t; -typedef int __kernel_clockid_t; -typedef int __kernel_daddr_t; -typedef char * __kernel_caddr_t; -typedef unsigned short __kernel_uid16_t; -typedef unsigned short __kernel_gid16_t; - -#ifdef __GNUC__ -typedef long long __kernel_loff_t; -#endif - -typedef struct { - int val[2]; -} __kernel_fsid_t; - typedef unsigned short __kernel_old_uid_t; typedef unsigned short __kernel_old_gid_t; -typedef __kernel_uid_t __kernel_uid32_t; -typedef __kernel_gid_t __kernel_gid32_t; +#define __kernel_old_uid_t __kernel_old_uid_t typedef unsigned long __kernel_old_dev_t; +#define __kernel_old_dev_t __kernel_old_dev_t -#ifdef __KERNEL__ - -#undef __FD_SET -static inline void __FD_SET(unsigned long fd, __kernel_fd_set *fdsetp) -{ - unsigned long _tmp = fd / __NFDBITS; - unsigned long _rem = fd % __NFDBITS; - fdsetp->fds_bits[_tmp] |= (1UL<<_rem); -} - -#undef __FD_CLR -static inline void __FD_CLR(unsigned long fd, __kernel_fd_set *fdsetp) -{ - unsigned long _tmp = fd / __NFDBITS; - unsigned long _rem = fd % __NFDBITS; - fdsetp->fds_bits[_tmp] &= ~(1UL<<_rem); -} - -#undef __FD_ISSET -static inline int __FD_ISSET(unsigned long fd, __const__ __kernel_fd_set *p) -{ - unsigned long _tmp = fd / __NFDBITS; - unsigned long _rem = fd % __NFDBITS; - return (p->fds_bits[_tmp] & (1UL<<_rem)) != 0; -} - -/* - * This will unroll the loop for the normal constant cases (8 or 32 longs, - * for 256 and 1024-bit fd_sets respectively) - */ -#undef __FD_ZERO -static inline void __FD_ZERO(__kernel_fd_set *p) -{ - unsigned long *tmp = p->fds_bits; - int i; - - if (__builtin_constant_p(__FDSET_LONGS)) { - switch (__FDSET_LONGS) { - case 32: - tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; - tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0; - tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0; - tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0; - tmp[16] = 0; tmp[17] = 0; tmp[18] = 0; tmp[19] = 0; - tmp[20] = 0; tmp[21] = 0; tmp[22] = 0; tmp[23] = 0; - tmp[24] = 0; tmp[25] = 0; tmp[26] = 0; tmp[27] = 0; - tmp[28] = 0; tmp[29] = 0; tmp[30] = 0; tmp[31] = 0; - return; - case 16: - tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; - tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0; - tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0; - tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0; - return; - case 8: - tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; - tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0; - return; - case 4: - tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0; - return; - } - } - i = __FDSET_LONGS; - while (i) { - i--; - *tmp = 0; - tmp++; - } -} - -#endif /* defined(__KERNEL__) */ +#include <asm-generic/posix_types.h> #endif /* _ASM_X86_POSIX_TYPES_64_H */ diff --git a/arch/x86/include/asm/posix_types_x32.h b/arch/x86/include/asm/posix_types_x32.h new file mode 100644 index 00000000000..85f9bdafa93 --- /dev/null +++ b/arch/x86/include/asm/posix_types_x32.h @@ -0,0 +1,19 @@ +#ifndef _ASM_X86_POSIX_TYPES_X32_H +#define _ASM_X86_POSIX_TYPES_X32_H + +/* + * This file is only used by user-level software, so you need to + * be a little careful about namespace pollution etc. Also, we cannot + * assume GCC is being used. + * + * These types should generally match the ones used by the 64-bit kernel, + * + */ + +typedef long long __kernel_long_t; +typedef unsigned long long __kernel_ulong_t; +#define __kernel_long_t __kernel_long_t + +#include <asm/posix_types_64.h> + +#endif /* _ASM_X86_POSIX_TYPES_X32_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 58545c97d07..4fa7dcceb6c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -14,13 +14,13 @@ struct mm_struct; #include <asm/sigcontext.h> #include <asm/current.h> #include <asm/cpufeature.h> -#include <asm/system.h> #include <asm/page.h> #include <asm/pgtable_types.h> #include <asm/percpu.h> #include <asm/msr.h> #include <asm/desc_defs.h> #include <asm/nops.h> +#include <asm/special_insns.h> #include <linux/personality.h> #include <linux/cpumask.h> @@ -29,6 +29,15 @@ struct mm_struct; #include <linux/math64.h> #include <linux/init.h> #include <linux/err.h> +#include <linux/irqflags.h> + +/* + * We handle most unaligned accesses in hardware. On the other hand + * unaligned DMA can be quite expensive on some Nehalem processors. + * + * Based on this we disable the IP header alignment in network drivers. + */ +#define NET_IP_ALIGN 0 #define HBP_NUM 4 /* @@ -162,6 +171,7 @@ extern void early_cpu_init(void); extern void identify_boot_cpu(void); extern void identify_secondary_cpu(struct cpuinfo_x86 *); extern void print_cpu_info(struct cpuinfo_x86 *); +void print_cpu_msr(struct cpuinfo_x86 *); extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); extern unsigned short num_cache_leaves; @@ -453,7 +463,7 @@ struct thread_struct { unsigned long ptrace_dr7; /* Fault info: */ unsigned long cr2; - unsigned long trap_no; + unsigned long trap_nr; unsigned long error_code; /* floating point and extended processor state */ struct fpu fpu; @@ -474,61 +484,6 @@ struct thread_struct { unsigned io_bitmap_max; }; -static inline unsigned long native_get_debugreg(int regno) -{ - unsigned long val = 0; /* Damn you, gcc! */ - - switch (regno) { - case 0: - asm("mov %%db0, %0" :"=r" (val)); - break; - case 1: - asm("mov %%db1, %0" :"=r" (val)); - break; - case 2: - asm("mov %%db2, %0" :"=r" (val)); - break; - case 3: - asm("mov %%db3, %0" :"=r" (val)); - break; - case 6: - asm("mov %%db6, %0" :"=r" (val)); - break; - case 7: - asm("mov %%db7, %0" :"=r" (val)); - break; - default: - BUG(); - } - return val; -} - -static inline void native_set_debugreg(int regno, unsigned long value) -{ - switch (regno) { - case 0: - asm("mov %0, %%db0" ::"r" (value)); - break; - case 1: - asm("mov %0, %%db1" ::"r" (value)); - break; - case 2: - asm("mov %0, %%db2" ::"r" (value)); - break; - case 3: - asm("mov %0, %%db3" ::"r" (value)); - break; - case 6: - asm("mov %0, %%db6" ::"r" (value)); - break; - case 7: - asm("mov %0, %%db7" ::"r" (value)); - break; - default: - BUG(); - } -} - /* * Set IOPL bits in EFLAGS from given mask */ @@ -574,14 +529,6 @@ static inline void native_swapgs(void) #define __cpuid native_cpuid #define paravirt_enabled() 0 -/* - * These special macros can be used to get or set a debugging register - */ -#define get_debugreg(var, register) \ - (var) = native_get_debugreg(register) -#define set_debugreg(value, register) \ - native_set_debugreg(register, value) - static inline void load_sp0(struct tss_struct *tss, struct thread_struct *thread) { @@ -926,9 +873,9 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); #define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \ 0xc0000000 : 0xFFFFe000) -#define TASK_SIZE (test_thread_flag(TIF_IA32) ? \ +#define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \ IA32_PAGE_OFFSET : TASK_SIZE_MAX) -#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? \ +#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \ IA32_PAGE_OFFSET : TASK_SIZE_MAX) #define STACK_TOP TASK_SIZE @@ -950,6 +897,12 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) extern unsigned long KSTK_ESP(struct task_struct *task); + +/* + * User space RSP while inside the SYSCALL fast path + */ +DECLARE_PER_CPU(unsigned long, old_rsp); + #endif /* CONFIG_X86_64 */ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, @@ -1021,4 +974,14 @@ extern bool cpu_has_amd_erratum(const int *); #define cpu_has_amd_erratum(x) (false) #endif /* CONFIG_CPU_SUP_AMD */ +void cpu_idle_wait(void); + +extern unsigned long arch_align_stack(unsigned long sp); +extern void free_init_pages(char *what, unsigned long begin, unsigned long end); + +void default_idle(void); +bool set_pm_idle_to_default(void); + +void stop_this_cpu(void *dummy); + #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h index 644dd885f05..60bef663609 100644 --- a/arch/x86/include/asm/prom.h +++ b/arch/x86/include/asm/prom.h @@ -21,7 +21,6 @@ #include <asm/irq.h> #include <linux/atomic.h> #include <asm/setup.h> -#include <asm/irq_controller.h> #ifdef CONFIG_OF extern int of_ioapic; @@ -43,15 +42,6 @@ extern char cmd_line[COMMAND_LINE_SIZE]; #define pci_address_to_pio pci_address_to_pio unsigned long pci_address_to_pio(phys_addr_t addr); -/** - * irq_dispose_mapping - Unmap an interrupt - * @virq: linux virq number of the interrupt to unmap - * - * FIXME: We really should implement proper virq handling like power, - * but that's going to be major surgery. - */ -static inline void irq_dispose_mapping(unsigned int virq) { } - #define HAVE_ARCH_DEVTREE_FIXUPS #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 35664547125..dcfde52979c 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -145,7 +145,6 @@ extern unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs); extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code, int si_code); -void signal_fault(struct pt_regs *regs, void __user *frame, char *where); extern long syscall_trace_enter(struct pt_regs *); extern void syscall_trace_leave(struct pt_regs *); diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 5e641715c3f..165466233ab 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -212,7 +212,61 @@ #ifdef __KERNEL__ #ifndef __ASSEMBLY__ extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][10]; -#endif -#endif + +/* + * Load a segment. Fall back on loading the zero + * segment if something goes wrong.. + */ +#define loadsegment(seg, value) \ +do { \ + unsigned short __val = (value); \ + \ + asm volatile(" \n" \ + "1: movl %k0,%%" #seg " \n" \ + \ + ".section .fixup,\"ax\" \n" \ + "2: xorl %k0,%k0 \n" \ + " jmp 1b \n" \ + ".previous \n" \ + \ + _ASM_EXTABLE(1b, 2b) \ + \ + : "+r" (__val) : : "memory"); \ +} while (0) + +/* + * Save a segment register away + */ +#define savesegment(seg, value) \ + asm("mov %%" #seg ",%0":"=r" (value) : : "memory") + +/* + * x86_32 user gs accessors. + */ +#ifdef CONFIG_X86_32 +#ifdef CONFIG_X86_32_LAZY_GS +#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;}) +#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v)) +#define task_user_gs(tsk) ((tsk)->thread.gs) +#define lazy_save_gs(v) savesegment(gs, (v)) +#define lazy_load_gs(v) loadsegment(gs, (v)) +#else /* X86_32_LAZY_GS */ +#define get_user_gs(regs) (u16)((regs)->gs) +#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0) +#define task_user_gs(tsk) (task_pt_regs(tsk)->gs) +#define lazy_save_gs(v) do { } while (0) +#define lazy_load_gs(v) do { } while (0) +#endif /* X86_32_LAZY_GS */ +#endif /* X86_32 */ + +static inline unsigned long get_limit(unsigned long segment) +{ + unsigned long __limit; + asm("lsll %1,%0" : "=r" (__limit) : "r" (segment)); + return __limit + 1; +} + +#endif /* !__ASSEMBLY__ */ +#endif /* __KERNEL__ */ #endif /* _ASM_X86_SEGMENT_H */ diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h index 04459d25e66..5ca71c065ee 100644 --- a/arch/x86/include/asm/sigcontext.h +++ b/arch/x86/include/asm/sigcontext.h @@ -230,34 +230,37 @@ struct sigcontext { * User-space might still rely on the old definition: */ struct sigcontext { - unsigned long r8; - unsigned long r9; - unsigned long r10; - unsigned long r11; - unsigned long r12; - unsigned long r13; - unsigned long r14; - unsigned long r15; - unsigned long rdi; - unsigned long rsi; - unsigned long rbp; - unsigned long rbx; - unsigned long rdx; - unsigned long rax; - unsigned long rcx; - unsigned long rsp; - unsigned long rip; - unsigned long eflags; /* RFLAGS */ - unsigned short cs; - unsigned short gs; - unsigned short fs; - unsigned short __pad0; - unsigned long err; - unsigned long trapno; - unsigned long oldmask; - unsigned long cr2; + __u64 r8; + __u64 r9; + __u64 r10; + __u64 r11; + __u64 r12; + __u64 r13; + __u64 r14; + __u64 r15; + __u64 rdi; + __u64 rsi; + __u64 rbp; + __u64 rbx; + __u64 rdx; + __u64 rax; + __u64 rcx; + __u64 rsp; + __u64 rip; + __u64 eflags; /* RFLAGS */ + __u16 cs; + __u16 gs; + __u16 fs; + __u16 __pad0; + __u64 err; + __u64 trapno; + __u64 oldmask; + __u64 cr2; struct _fpstate __user *fpstate; /* zero when no FPU context */ - unsigned long reserved1[8]; +#ifdef __ILP32__ + __u32 __fpstate_pad; +#endif + __u64 reserved1[8]; }; #endif /* !__KERNEL__ */ diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h index 4e0fe26d27d..7c7c27c97da 100644 --- a/arch/x86/include/asm/sigframe.h +++ b/arch/x86/include/asm/sigframe.h @@ -59,12 +59,25 @@ struct rt_sigframe_ia32 { #endif /* defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) */ #ifdef CONFIG_X86_64 + struct rt_sigframe { char __user *pretcode; struct ucontext uc; struct siginfo info; /* fp state follows here */ }; + +#ifdef CONFIG_X86_X32_ABI + +struct rt_sigframe_x32 { + u64 pretcode; + struct ucontext_x32 uc; + compat_siginfo_t info; + /* fp state follows here */ +}; + +#endif /* CONFIG_X86_X32_ABI */ + #endif /* CONFIG_X86_64 */ #endif /* _ASM_X86_SIGFRAME_H */ diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h new file mode 100644 index 00000000000..ada93b3b8c6 --- /dev/null +++ b/arch/x86/include/asm/sighandling.h @@ -0,0 +1,24 @@ +#ifndef _ASM_X86_SIGHANDLING_H +#define _ASM_X86_SIGHANDLING_H + +#include <linux/compiler.h> +#include <linux/ptrace.h> +#include <linux/signal.h> + +#include <asm/processor-flags.h> + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +#define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ + X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \ + X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ + X86_EFLAGS_CF) + +void signal_fault(struct pt_regs *regs, void __user *frame, char *where); + +int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, + unsigned long *pax); +int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, + struct pt_regs *regs, unsigned long mask); + +#endif /* _ASM_X86_SIGHANDLING_H */ diff --git a/arch/x86/include/asm/siginfo.h b/arch/x86/include/asm/siginfo.h index fc1aa553564..34c47b3341c 100644 --- a/arch/x86/include/asm/siginfo.h +++ b/arch/x86/include/asm/siginfo.h @@ -2,7 +2,13 @@ #define _ASM_X86_SIGINFO_H #ifdef __x86_64__ -# define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) +# ifdef __ILP32__ /* x32 */ +typedef long long __kernel_si_clock_t __attribute__((aligned(4))); +# define __ARCH_SI_CLOCK_T __kernel_si_clock_t +# define __ARCH_SI_ATTRIBUTES __attribute__((aligned(8))) +# else /* x86-64 */ +# define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) +# endif #endif #include <asm-generic/siginfo.h> diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h new file mode 100644 index 00000000000..41fc93a2e22 --- /dev/null +++ b/arch/x86/include/asm/special_insns.h @@ -0,0 +1,199 @@ +#ifndef _ASM_X86_SPECIAL_INSNS_H +#define _ASM_X86_SPECIAL_INSNS_H + + +#ifdef __KERNEL__ + +static inline void native_clts(void) +{ + asm volatile("clts"); +} + +/* + * Volatile isn't enough to prevent the compiler from reordering the + * read/write functions for the control registers and messing everything up. + * A memory clobber would solve the problem, but would prevent reordering of + * all loads stores around it, which can hurt performance. Solution is to + * use a variable and mimic reads and writes to it to enforce serialization + */ +static unsigned long __force_order; + +static inline unsigned long native_read_cr0(void) +{ + unsigned long val; + asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order)); + return val; +} + +static inline void native_write_cr0(unsigned long val) +{ + asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order)); +} + +static inline unsigned long native_read_cr2(void) +{ + unsigned long val; + asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order)); + return val; +} + +static inline void native_write_cr2(unsigned long val) +{ + asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order)); +} + +static inline unsigned long native_read_cr3(void) +{ + unsigned long val; + asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order)); + return val; +} + +static inline void native_write_cr3(unsigned long val) +{ + asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order)); +} + +static inline unsigned long native_read_cr4(void) +{ + unsigned long val; + asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order)); + return val; +} + +static inline unsigned long native_read_cr4_safe(void) +{ + unsigned long val; + /* This could fault if %cr4 does not exist. In x86_64, a cr4 always + * exists, so it will never fail. */ +#ifdef CONFIG_X86_32 + asm volatile("1: mov %%cr4, %0\n" + "2:\n" + _ASM_EXTABLE(1b, 2b) + : "=r" (val), "=m" (__force_order) : "0" (0)); +#else + val = native_read_cr4(); +#endif + return val; +} + +static inline void native_write_cr4(unsigned long val) +{ + asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order)); +} + +#ifdef CONFIG_X86_64 +static inline unsigned long native_read_cr8(void) +{ + unsigned long cr8; + asm volatile("movq %%cr8,%0" : "=r" (cr8)); + return cr8; +} + +static inline void native_write_cr8(unsigned long val) +{ + asm volatile("movq %0,%%cr8" :: "r" (val) : "memory"); +} +#endif + +static inline void native_wbinvd(void) +{ + asm volatile("wbinvd": : :"memory"); +} + +extern void native_load_gs_index(unsigned); + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else + +static inline unsigned long read_cr0(void) +{ + return native_read_cr0(); +} + +static inline void write_cr0(unsigned long x) +{ + native_write_cr0(x); +} + +static inline unsigned long read_cr2(void) +{ + return native_read_cr2(); +} + +static inline void write_cr2(unsigned long x) +{ + native_write_cr2(x); +} + +static inline unsigned long read_cr3(void) +{ + return native_read_cr3(); +} + +static inline void write_cr3(unsigned long x) +{ + native_write_cr3(x); +} + +static inline unsigned long read_cr4(void) +{ + return native_read_cr4(); +} + +static inline unsigned long read_cr4_safe(void) +{ + return native_read_cr4_safe(); +} + +static inline void write_cr4(unsigned long x) +{ + native_write_cr4(x); +} + +static inline void wbinvd(void) +{ + native_wbinvd(); +} + +#ifdef CONFIG_X86_64 + +static inline unsigned long read_cr8(void) +{ + return native_read_cr8(); +} + +static inline void write_cr8(unsigned long x) +{ + native_write_cr8(x); +} + +static inline void load_gs_index(unsigned selector) +{ + native_load_gs_index(selector); +} + +#endif + +/* Clear the 'TS' bit */ +static inline void clts(void) +{ + native_clts(); +} + +#endif/* CONFIG_PARAVIRT */ + +#define stts() write_cr0(read_cr0() | X86_CR0_TS) + +static inline void clflush(volatile void *__p) +{ + asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p)); +} + +#define nop() asm volatile ("nop") + + +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_SPECIAL_INSNS_H */ diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index a82c2bf504b..76bfa2cf301 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -88,14 +88,14 @@ static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) { struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); - return !!(tmp.tail ^ tmp.head); + return tmp.tail != tmp.head; } static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) { struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); - return ((tmp.tail - tmp.head) & TICKET_MASK) > 1; + return (__ticket_t)(tmp.tail - tmp.head) > 1; } #ifndef CONFIG_PARAVIRT_SPINLOCKS diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index 8ebd5df7451..ad0ad07fc00 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h @@ -16,7 +16,6 @@ typedef u32 __ticketpair_t; #endif #define TICKET_SHIFT (sizeof(__ticket_t) * 8) -#define TICKET_MASK ((__ticket_t)((1 << TICKET_SHIFT) - 1)) typedef struct arch_spinlock { union { diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 15751776356..b5d9533d2c3 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -38,7 +38,6 @@ #include <asm/tsc.h> #include <asm/processor.h> #include <asm/percpu.h> -#include <asm/system.h> #include <asm/desc.h> #include <linux/random.h> diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h new file mode 100644 index 00000000000..4ec45b3abba --- /dev/null +++ b/arch/x86/include/asm/switch_to.h @@ -0,0 +1,129 @@ +#ifndef _ASM_X86_SWITCH_TO_H +#define _ASM_X86_SWITCH_TO_H + +struct task_struct; /* one of the stranger aspects of C forward declarations */ +struct task_struct *__switch_to(struct task_struct *prev, + struct task_struct *next); +struct tss_struct; +void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, + struct tss_struct *tss); + +#ifdef CONFIG_X86_32 + +#ifdef CONFIG_CC_STACKPROTECTOR +#define __switch_canary \ + "movl %P[task_canary](%[next]), %%ebx\n\t" \ + "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" +#define __switch_canary_oparam \ + , [stack_canary] "=m" (stack_canary.canary) +#define __switch_canary_iparam \ + , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) +#else /* CC_STACKPROTECTOR */ +#define __switch_canary +#define __switch_canary_oparam +#define __switch_canary_iparam +#endif /* CC_STACKPROTECTOR */ + +/* + * Saving eflags is important. It switches not only IOPL between tasks, + * it also protects other tasks from NT leaking through sysenter etc. + */ +#define switch_to(prev, next, last) \ +do { \ + /* \ + * Context-switching clobbers all registers, so we clobber \ + * them explicitly, via unused output variables. \ + * (EAX and EBP is not listed because EBP is saved/restored \ + * explicitly for wchan access and EAX is the return value of \ + * __switch_to()) \ + */ \ + unsigned long ebx, ecx, edx, esi, edi; \ + \ + asm volatile("pushfl\n\t" /* save flags */ \ + "pushl %%ebp\n\t" /* save EBP */ \ + "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \ + "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ + "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ + "pushl %[next_ip]\n\t" /* restore EIP */ \ + __switch_canary \ + "jmp __switch_to\n" /* regparm call */ \ + "1:\t" \ + "popl %%ebp\n\t" /* restore EBP */ \ + "popfl\n" /* restore flags */ \ + \ + /* output parameters */ \ + : [prev_sp] "=m" (prev->thread.sp), \ + [prev_ip] "=m" (prev->thread.ip), \ + "=a" (last), \ + \ + /* clobbered output registers: */ \ + "=b" (ebx), "=c" (ecx), "=d" (edx), \ + "=S" (esi), "=D" (edi) \ + \ + __switch_canary_oparam \ + \ + /* input parameters: */ \ + : [next_sp] "m" (next->thread.sp), \ + [next_ip] "m" (next->thread.ip), \ + \ + /* regparm parameters for __switch_to(): */ \ + [prev] "a" (prev), \ + [next] "d" (next) \ + \ + __switch_canary_iparam \ + \ + : /* reloaded segment registers */ \ + "memory"); \ +} while (0) + +#else /* CONFIG_X86_32 */ + +/* frame pointer must be last for get_wchan */ +#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" +#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t" + +#define __EXTRA_CLOBBER \ + , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ + "r12", "r13", "r14", "r15" + +#ifdef CONFIG_CC_STACKPROTECTOR +#define __switch_canary \ + "movq %P[task_canary](%%rsi),%%r8\n\t" \ + "movq %%r8,"__percpu_arg([gs_canary])"\n\t" +#define __switch_canary_oparam \ + , [gs_canary] "=m" (irq_stack_union.stack_canary) +#define __switch_canary_iparam \ + , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) +#else /* CC_STACKPROTECTOR */ +#define __switch_canary +#define __switch_canary_oparam +#define __switch_canary_iparam +#endif /* CC_STACKPROTECTOR */ + +/* Save restore flags to clear handle leaking NT */ +#define switch_to(prev, next, last) \ + asm volatile(SAVE_CONTEXT \ + "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ + "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ + "call __switch_to\n\t" \ + "movq "__percpu_arg([current_task])",%%rsi\n\t" \ + __switch_canary \ + "movq %P[thread_info](%%rsi),%%r8\n\t" \ + "movq %%rax,%%rdi\n\t" \ + "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ + "jnz ret_from_fork\n\t" \ + RESTORE_CONTEXT \ + : "=a" (last) \ + __switch_canary_oparam \ + : [next] "S" (next), [prev] "D" (prev), \ + [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ + [ti_flags] "i" (offsetof(struct thread_info, flags)), \ + [_tif_fork] "i" (_TIF_FORK), \ + [thread_info] "i" (offsetof(struct task_struct, stack)), \ + [current_task] "m" (current_task) \ + __switch_canary_iparam \ + : "memory", "cc" __EXTRA_CLOBBER) + +#endif /* CONFIG_X86_32 */ + +#endif /* _ASM_X86_SWITCH_TO_H */ diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index cb238526a9f..3fda9db4881 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -10,6 +10,8 @@ #ifndef _ASM_X86_SYS_IA32_H #define _ASM_X86_SYS_IA32_H +#ifdef CONFIG_COMPAT + #include <linux/compiler.h> #include <linux/linkage.h> #include <linux/types.h> @@ -36,8 +38,6 @@ asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *, struct sigaction32 __user *, unsigned int); asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *, struct old_sigaction32 __user *); -asmlinkage long sys32_rt_sigprocmask(int, compat_sigset_t __user *, - compat_sigset_t __user *, unsigned int); asmlinkage long sys32_alarm(unsigned int); asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int); @@ -83,4 +83,7 @@ asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32); asmlinkage long sys32_fanotify_mark(int, unsigned int, u32, u32, int, const char __user *); + +#endif /* CONFIG_COMPAT */ + #endif /* _ASM_X86_SYS_IA32_H */ diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index d962e5652a7..386b78686c4 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -16,6 +16,7 @@ #include <linux/sched.h> #include <linux/err.h> #include <asm/asm-offsets.h> /* For NR_syscalls */ +#include <asm/unistd.h> extern const unsigned long sys_call_table[]; @@ -26,13 +27,13 @@ extern const unsigned long sys_call_table[]; */ static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { - return regs->orig_ax; + return regs->orig_ax & __SYSCALL_MASK; } static inline void syscall_rollback(struct task_struct *task, struct pt_regs *regs) { - regs->ax = regs->orig_ax; + regs->ax = regs->orig_ax & __SYSCALL_MASK; } static inline long syscall_get_error(struct task_struct *task, diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h deleted file mode 100644 index 2d2f01ce6dc..00000000000 --- a/arch/x86/include/asm/system.h +++ /dev/null @@ -1,523 +0,0 @@ -#ifndef _ASM_X86_SYSTEM_H -#define _ASM_X86_SYSTEM_H - -#include <asm/asm.h> -#include <asm/segment.h> -#include <asm/cpufeature.h> -#include <asm/cmpxchg.h> -#include <asm/nops.h> - -#include <linux/kernel.h> -#include <linux/irqflags.h> - -/* entries in ARCH_DLINFO: */ -#if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64) -# define AT_VECTOR_SIZE_ARCH 2 -#else /* else it's non-compat x86-64 */ -# define AT_VECTOR_SIZE_ARCH 1 -#endif - -struct task_struct; /* one of the stranger aspects of C forward declarations */ -struct task_struct *__switch_to(struct task_struct *prev, - struct task_struct *next); -struct tss_struct; -void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss); -extern void show_regs_common(void); - -#ifdef CONFIG_X86_32 - -#ifdef CONFIG_CC_STACKPROTECTOR -#define __switch_canary \ - "movl %P[task_canary](%[next]), %%ebx\n\t" \ - "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" -#define __switch_canary_oparam \ - , [stack_canary] "=m" (stack_canary.canary) -#define __switch_canary_iparam \ - , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) -#else /* CC_STACKPROTECTOR */ -#define __switch_canary -#define __switch_canary_oparam -#define __switch_canary_iparam -#endif /* CC_STACKPROTECTOR */ - -/* - * Saving eflags is important. It switches not only IOPL between tasks, - * it also protects other tasks from NT leaking through sysenter etc. - */ -#define switch_to(prev, next, last) \ -do { \ - /* \ - * Context-switching clobbers all registers, so we clobber \ - * them explicitly, via unused output variables. \ - * (EAX and EBP is not listed because EBP is saved/restored \ - * explicitly for wchan access and EAX is the return value of \ - * __switch_to()) \ - */ \ - unsigned long ebx, ecx, edx, esi, edi; \ - \ - asm volatile("pushfl\n\t" /* save flags */ \ - "pushl %%ebp\n\t" /* save EBP */ \ - "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \ - "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ - "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ - "pushl %[next_ip]\n\t" /* restore EIP */ \ - __switch_canary \ - "jmp __switch_to\n" /* regparm call */ \ - "1:\t" \ - "popl %%ebp\n\t" /* restore EBP */ \ - "popfl\n" /* restore flags */ \ - \ - /* output parameters */ \ - : [prev_sp] "=m" (prev->thread.sp), \ - [prev_ip] "=m" (prev->thread.ip), \ - "=a" (last), \ - \ - /* clobbered output registers: */ \ - "=b" (ebx), "=c" (ecx), "=d" (edx), \ - "=S" (esi), "=D" (edi) \ - \ - __switch_canary_oparam \ - \ - /* input parameters: */ \ - : [next_sp] "m" (next->thread.sp), \ - [next_ip] "m" (next->thread.ip), \ - \ - /* regparm parameters for __switch_to(): */ \ - [prev] "a" (prev), \ - [next] "d" (next) \ - \ - __switch_canary_iparam \ - \ - : /* reloaded segment registers */ \ - "memory"); \ -} while (0) - -/* - * disable hlt during certain critical i/o operations - */ -#define HAVE_DISABLE_HLT -#else - -/* frame pointer must be last for get_wchan */ -#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" -#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t" - -#define __EXTRA_CLOBBER \ - , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ - "r12", "r13", "r14", "r15" - -#ifdef CONFIG_CC_STACKPROTECTOR -#define __switch_canary \ - "movq %P[task_canary](%%rsi),%%r8\n\t" \ - "movq %%r8,"__percpu_arg([gs_canary])"\n\t" -#define __switch_canary_oparam \ - , [gs_canary] "=m" (irq_stack_union.stack_canary) -#define __switch_canary_iparam \ - , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) -#else /* CC_STACKPROTECTOR */ -#define __switch_canary -#define __switch_canary_oparam -#define __switch_canary_iparam -#endif /* CC_STACKPROTECTOR */ - -/* Save restore flags to clear handle leaking NT */ -#define switch_to(prev, next, last) \ - asm volatile(SAVE_CONTEXT \ - "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ - "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ - "call __switch_to\n\t" \ - "movq "__percpu_arg([current_task])",%%rsi\n\t" \ - __switch_canary \ - "movq %P[thread_info](%%rsi),%%r8\n\t" \ - "movq %%rax,%%rdi\n\t" \ - "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ - "jnz ret_from_fork\n\t" \ - RESTORE_CONTEXT \ - : "=a" (last) \ - __switch_canary_oparam \ - : [next] "S" (next), [prev] "D" (prev), \ - [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ - [ti_flags] "i" (offsetof(struct thread_info, flags)), \ - [_tif_fork] "i" (_TIF_FORK), \ - [thread_info] "i" (offsetof(struct task_struct, stack)), \ - [current_task] "m" (current_task) \ - __switch_canary_iparam \ - : "memory", "cc" __EXTRA_CLOBBER) -#endif - -#ifdef __KERNEL__ - -extern void native_load_gs_index(unsigned); - -/* - * Load a segment. Fall back on loading the zero - * segment if something goes wrong.. - */ -#define loadsegment(seg, value) \ -do { \ - unsigned short __val = (value); \ - \ - asm volatile(" \n" \ - "1: movl %k0,%%" #seg " \n" \ - \ - ".section .fixup,\"ax\" \n" \ - "2: xorl %k0,%k0 \n" \ - " jmp 1b \n" \ - ".previous \n" \ - \ - _ASM_EXTABLE(1b, 2b) \ - \ - : "+r" (__val) : : "memory"); \ -} while (0) - -/* - * Save a segment register away - */ -#define savesegment(seg, value) \ - asm("mov %%" #seg ",%0":"=r" (value) : : "memory") - -/* - * x86_32 user gs accessors. - */ -#ifdef CONFIG_X86_32 -#ifdef CONFIG_X86_32_LAZY_GS -#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;}) -#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v)) -#define task_user_gs(tsk) ((tsk)->thread.gs) -#define lazy_save_gs(v) savesegment(gs, (v)) -#define lazy_load_gs(v) loadsegment(gs, (v)) -#else /* X86_32_LAZY_GS */ -#define get_user_gs(regs) (u16)((regs)->gs) -#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0) -#define task_user_gs(tsk) (task_pt_regs(tsk)->gs) -#define lazy_save_gs(v) do { } while (0) -#define lazy_load_gs(v) do { } while (0) -#endif /* X86_32_LAZY_GS */ -#endif /* X86_32 */ - -static inline unsigned long get_limit(unsigned long segment) -{ - unsigned long __limit; - asm("lsll %1,%0" : "=r" (__limit) : "r" (segment)); - return __limit + 1; -} - -static inline void native_clts(void) -{ - asm volatile("clts"); -} - -/* - * Volatile isn't enough to prevent the compiler from reordering the - * read/write functions for the control registers and messing everything up. - * A memory clobber would solve the problem, but would prevent reordering of - * all loads stores around it, which can hurt performance. Solution is to - * use a variable and mimic reads and writes to it to enforce serialization - */ -static unsigned long __force_order; - -static inline unsigned long native_read_cr0(void) -{ - unsigned long val; - asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order)); - return val; -} - -static inline void native_write_cr0(unsigned long val) -{ - asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order)); -} - -static inline unsigned long native_read_cr2(void) -{ - unsigned long val; - asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order)); - return val; -} - -static inline void native_write_cr2(unsigned long val) -{ - asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order)); -} - -static inline unsigned long native_read_cr3(void) -{ - unsigned long val; - asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order)); - return val; -} - -static inline void native_write_cr3(unsigned long val) -{ - asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order)); -} - -static inline unsigned long native_read_cr4(void) -{ - unsigned long val; - asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order)); - return val; -} - -static inline unsigned long native_read_cr4_safe(void) -{ - unsigned long val; - /* This could fault if %cr4 does not exist. In x86_64, a cr4 always - * exists, so it will never fail. */ -#ifdef CONFIG_X86_32 - asm volatile("1: mov %%cr4, %0\n" - "2:\n" - _ASM_EXTABLE(1b, 2b) - : "=r" (val), "=m" (__force_order) : "0" (0)); -#else - val = native_read_cr4(); -#endif - return val; -} - -static inline void native_write_cr4(unsigned long val) -{ - asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order)); -} - -#ifdef CONFIG_X86_64 -static inline unsigned long native_read_cr8(void) -{ - unsigned long cr8; - asm volatile("movq %%cr8,%0" : "=r" (cr8)); - return cr8; -} - -static inline void native_write_cr8(unsigned long val) -{ - asm volatile("movq %0,%%cr8" :: "r" (val) : "memory"); -} -#endif - -static inline void native_wbinvd(void) -{ - asm volatile("wbinvd": : :"memory"); -} - -#ifdef CONFIG_PARAVIRT -#include <asm/paravirt.h> -#else - -static inline unsigned long read_cr0(void) -{ - return native_read_cr0(); -} - -static inline void write_cr0(unsigned long x) -{ - native_write_cr0(x); -} - -static inline unsigned long read_cr2(void) -{ - return native_read_cr2(); -} - -static inline void write_cr2(unsigned long x) -{ - native_write_cr2(x); -} - -static inline unsigned long read_cr3(void) -{ - return native_read_cr3(); -} - -static inline void write_cr3(unsigned long x) -{ - native_write_cr3(x); -} - -static inline unsigned long read_cr4(void) -{ - return native_read_cr4(); -} - -static inline unsigned long read_cr4_safe(void) -{ - return native_read_cr4_safe(); -} - -static inline void write_cr4(unsigned long x) -{ - native_write_cr4(x); -} - -static inline void wbinvd(void) -{ - native_wbinvd(); -} - -#ifdef CONFIG_X86_64 - -static inline unsigned long read_cr8(void) -{ - return native_read_cr8(); -} - -static inline void write_cr8(unsigned long x) -{ - native_write_cr8(x); -} - -static inline void load_gs_index(unsigned selector) -{ - native_load_gs_index(selector); -} - -#endif - -/* Clear the 'TS' bit */ -static inline void clts(void) -{ - native_clts(); -} - -#endif/* CONFIG_PARAVIRT */ - -#define stts() write_cr0(read_cr0() | X86_CR0_TS) - -#endif /* __KERNEL__ */ - -static inline void clflush(volatile void *__p) -{ - asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p)); -} - -#define nop() asm volatile ("nop") - -void disable_hlt(void); -void enable_hlt(void); - -void cpu_idle_wait(void); - -extern unsigned long arch_align_stack(unsigned long sp); -extern void free_init_pages(char *what, unsigned long begin, unsigned long end); - -void default_idle(void); -bool set_pm_idle_to_default(void); - -void stop_this_cpu(void *dummy); - -/* - * Force strict CPU ordering. - * And yes, this is required on UP too when we're talking - * to devices. - */ -#ifdef CONFIG_X86_32 -/* - * Some non-Intel clones support out of order store. wmb() ceases to be a - * nop for these. - */ -#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) -#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) -#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) -#else -#define mb() asm volatile("mfence":::"memory") -#define rmb() asm volatile("lfence":::"memory") -#define wmb() asm volatile("sfence" ::: "memory") -#endif - -/** - * read_barrier_depends - Flush all pending reads that subsequents reads - * depend on. - * - * No data-dependent reads from memory-like regions are ever reordered - * over this barrier. All reads preceding this primitive are guaranteed - * to access memory (but not necessarily other CPUs' caches) before any - * reads following this primitive that depend on the data return by - * any of the preceding reads. This primitive is much lighter weight than - * rmb() on most CPUs, and is never heavier weight than is - * rmb(). - * - * These ordering constraints are respected by both the local CPU - * and the compiler. - * - * Ordering is not guaranteed by anything other than these primitives, - * not even by data dependencies. See the documentation for - * memory_barrier() for examples and URLs to more information. - * - * For example, the following code would force ordering (the initial - * value of "a" is zero, "b" is one, and "p" is "&a"): - * - * <programlisting> - * CPU 0 CPU 1 - * - * b = 2; - * memory_barrier(); - * p = &b; q = p; - * read_barrier_depends(); - * d = *q; - * </programlisting> - * - * because the read of "*q" depends on the read of "p" and these - * two reads are separated by a read_barrier_depends(). However, - * the following code, with the same initial values for "a" and "b": - * - * <programlisting> - * CPU 0 CPU 1 - * - * a = 2; - * memory_barrier(); - * b = 3; y = b; - * read_barrier_depends(); - * x = a; - * </programlisting> - * - * does not enforce ordering, since there is no data dependency between - * the read of "a" and the read of "b". Therefore, on some CPUs, such - * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() - * in cases like this where there are no data dependencies. - **/ - -#define read_barrier_depends() do { } while (0) - -#ifdef CONFIG_SMP -#define smp_mb() mb() -#ifdef CONFIG_X86_PPRO_FENCE -# define smp_rmb() rmb() -#else -# define smp_rmb() barrier() -#endif -#ifdef CONFIG_X86_OOSTORE -# define smp_wmb() wmb() -#else -# define smp_wmb() barrier() -#endif -#define smp_read_barrier_depends() read_barrier_depends() -#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) -#else -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while (0) -#define set_mb(var, value) do { var = value; barrier(); } while (0) -#endif - -/* - * Stop RDTSC speculation. This is needed when you need to use RDTSC - * (or get_cycles or vread that possibly accesses the TSC) in a defined - * code region. - * - * (Could use an alternative three way for this if there was one.) - */ -static __always_inline void rdtsc_barrier(void) -{ - alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); - alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); -} - -/* - * We handle most unaligned accesses in hardware. On the other hand - * unaligned DMA can be quite expensive on some Nehalem processors. - * - * Based on this we disable the IP header alignment in network drivers. - */ -#define NET_IP_ALIGN 0 -#endif /* _ASM_X86_SYSTEM_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index cfd8144d552..ad6df8ccd71 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -86,7 +86,7 @@ struct thread_info { #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ -#define TIF_IA32 17 /* 32bit process */ +#define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_FORK 18 /* ret_from_fork */ #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ #define TIF_DEBUG 21 /* uses debug registers */ @@ -95,6 +95,8 @@ struct thread_info { #define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ +#define TIF_ADDR32 29 /* 32-bit address space on 64 bits */ +#define TIF_X32 30 /* 32-bit native x86-64 binary */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -116,6 +118,8 @@ struct thread_info { #define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) +#define _TIF_ADDR32 (1 << TIF_ADDR32) +#define _TIF_X32 (1 << TIF_X32) /* work to do in syscall_trace_enter() */ #define _TIF_WORK_SYSCALL_ENTRY \ @@ -262,6 +266,18 @@ static inline void set_restore_sigmask(void) ti->status |= TS_RESTORE_SIGMASK; set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags); } + +static inline bool is_ia32_task(void) +{ +#ifdef CONFIG_X86_32 + return true; +#endif +#ifdef CONFIG_IA32_EMULATION + if (current_thread_info()->status & TS_COMPAT) + return true; +#endif + return false; +} #endif /* !__ASSEMBLY__ */ #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 169be8938b9..c0e108e0807 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -5,7 +5,7 @@ #include <linux/sched.h> #include <asm/processor.h> -#include <asm/system.h> +#include <asm/special_insns.h> #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 0012d0902c5..88eae2aec61 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -89,4 +89,29 @@ asmlinkage void smp_thermal_interrupt(void); asmlinkage void mce_threshold_interrupt(void); #endif +/* Interrupts/Exceptions */ +enum { + X86_TRAP_DE = 0, /* 0, Divide-by-zero */ + X86_TRAP_DB, /* 1, Debug */ + X86_TRAP_NMI, /* 2, Non-maskable Interrupt */ + X86_TRAP_BP, /* 3, Breakpoint */ + X86_TRAP_OF, /* 4, Overflow */ + X86_TRAP_BR, /* 5, Bound Range Exceeded */ + X86_TRAP_UD, /* 6, Invalid Opcode */ + X86_TRAP_NM, /* 7, Device Not Available */ + X86_TRAP_DF, /* 8, Double Fault */ + X86_TRAP_OLD_MF, /* 9, Coprocessor Segment Overrun */ + X86_TRAP_TS, /* 10, Invalid TSS */ + X86_TRAP_NP, /* 11, Segment Not Present */ + X86_TRAP_SS, /* 12, Stack Segment Fault */ + X86_TRAP_GP, /* 13, General Protection Fault */ + X86_TRAP_PF, /* 14, Page Fault */ + X86_TRAP_SPURIOUS, /* 15, Spurious Interrupt */ + X86_TRAP_MF, /* 16, x87 Floating-Point Exception */ + X86_TRAP_AC, /* 17, Alignment Check */ + X86_TRAP_MC, /* 18, Machine Check */ + X86_TRAP_XF, /* 19, SIMD Floating-Point Exception */ + X86_TRAP_IRET = 32, /* 32, IRET Exception */ +}; + #endif /* _ASM_X86_TRAPS_H */ diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 15d99153a96..c91e8b9d588 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -61,7 +61,7 @@ extern void check_tsc_sync_source(int cpu); extern void check_tsc_sync_target(void); extern int notsc_setup(char *); -extern void save_sched_clock_state(void); -extern void restore_sched_clock_state(void); +extern void tsc_save_sched_clock_state(void); +extern void tsc_restore_sched_clock_state(void); #endif /* _ASM_X86_TSC_H */ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 8be5f54d936..e0544597cfe 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -557,6 +557,8 @@ struct __large_struct { unsigned long buf[100]; }; extern unsigned long copy_from_user_nmi(void *to, const void __user *from, unsigned long n); +extern __must_check long +strncpy_from_user(char *dst, const char __user *src, long count); /* * movsl can be slow when source and dest are not both 8-byte aligned diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 566e803cc60..8084bc73b18 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -213,11 +213,6 @@ static inline unsigned long __must_check copy_from_user(void *to, return n; } -long __must_check strncpy_from_user(char *dst, const char __user *src, - long count); -long __must_check __strncpy_from_user(char *dst, - const char __user *src, long count); - /** * strlen_user: - Get the size of a string in user space. * @str: The string to measure. diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 1c66d30971a..fcd4b6f3ef0 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -208,10 +208,6 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) } } -__must_check long -strncpy_from_user(char *dst, const char __user *src, long count); -__must_check long -__strncpy_from_user(char *dst, const char __user *src, long count); __must_check long strnlen_user(const char __user *str, long n); __must_check long __strnlen_user(const char __user *str, long n); __must_check long strlen_user(const char __user *str); diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 21f77b89e47..4437001d8e3 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -1,7 +1,17 @@ #ifndef _ASM_X86_UNISTD_H #define _ASM_X86_UNISTD_H 1 +/* x32 syscall flag bit */ +#define __X32_SYSCALL_BIT 0x40000000 + #ifdef __KERNEL__ + +# ifdef CONFIG_X86_X32_ABI +# define __SYSCALL_MASK (~(__X32_SYSCALL_BIT)) +# else +# define __SYSCALL_MASK (~0) +# endif + # ifdef CONFIG_X86_32 # include <asm/unistd_32.h> @@ -14,6 +24,7 @@ # else # include <asm/unistd_64.h> +# include <asm/unistd_64_x32.h> # define __ARCH_WANT_COMPAT_SYS_TIME # endif @@ -52,6 +63,8 @@ #else # ifdef __i386__ # include <asm/unistd_32.h> +# elif defined(__ILP32__) +# include <asm/unistd_x32.h> # else # include <asm/unistd_64.h> # endif diff --git a/arch/x86/include/asm/vga.h b/arch/x86/include/asm/vga.h index 2723c07835d..44282fbf7bf 100644 --- a/arch/x86/include/asm/vga.h +++ b/arch/x86/include/asm/vga.h @@ -17,7 +17,7 @@ #define vga_readb(x) (*(x)) #define vga_writeb(x, y) (*(y) = (x)) -#if CONFIG_FB_EFI +#ifdef CONFIG_FB_EFI #define __ARCH_HAS_VGA_DEFAULT_DEVICE extern struct pci_dev *vga_default_device(void); extern void vga_set_default_device(struct pci_dev *pdev); diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 815285bcace..8b38be2de9e 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -5,13 +5,8 @@ #include <linux/clocksource.h> struct vsyscall_gtod_data { - seqlock_t lock; + seqcount_t seq; - /* open coded 'struct timespec' */ - time_t wall_time_sec; - u32 wall_time_nsec; - - struct timezone sys_tz; struct { /* extract of a clocksource struct */ int vclock_mode; cycle_t cycle_last; @@ -19,8 +14,16 @@ struct vsyscall_gtod_data { u32 mult; u32 shift; } clock; - struct timespec wall_to_monotonic; + + /* open coded 'struct timespec' */ + time_t wall_time_sec; + u32 wall_time_nsec; + u32 monotonic_time_nsec; + time_t monotonic_time_sec; + + struct timezone sys_tz; struct timespec wall_time_coarse; + struct timespec monotonic_time_coarse; }; extern struct vsyscall_gtod_data vsyscall_gtod_data; diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h index e0f9aa16358..5da71c27cc5 100644 --- a/arch/x86/include/asm/virtext.h +++ b/arch/x86/include/asm/virtext.h @@ -16,7 +16,6 @@ #define _ASM_X86_VIRTEX_H #include <asm/processor.h> -#include <asm/system.h> #include <asm/vmx.h> #include <asm/svm.h> diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h new file mode 100644 index 00000000000..e58f03b206c --- /dev/null +++ b/arch/x86/include/asm/word-at-a-time.h @@ -0,0 +1,79 @@ +#ifndef _ASM_WORD_AT_A_TIME_H +#define _ASM_WORD_AT_A_TIME_H + +/* + * This is largely generic for little-endian machines, but the + * optimal byte mask counting is probably going to be something + * that is architecture-specific. If you have a reliably fast + * bit count instruction, that might be better than the multiply + * and shift, for example. + */ + +#ifdef CONFIG_64BIT + +/* + * Jan Achrenius on G+: microoptimized version of + * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56" + * that works for the bytemasks without having to + * mask them first. + */ +static inline long count_masked_bytes(unsigned long mask) +{ + return mask*0x0001020304050608ul >> 56; +} + +#else /* 32-bit case */ + +/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ +static inline long count_masked_bytes(long mask) +{ + /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ + long a = (0x0ff0001+mask) >> 23; + /* Fix the 1 for 00 case */ + return a & mask; +} + +#endif + +#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) + +/* Return the high bit set in the first byte that is a zero */ +static inline unsigned long has_zero(unsigned long a) +{ + return ((a - REPEAT_BYTE(0x01)) & ~a) & REPEAT_BYTE(0x80); +} + +/* + * Load an unaligned word from kernel space. + * + * In the (very unlikely) case of the word being a page-crosser + * and the next page not being mapped, take the exception and + * return zeroes in the non-existing part. + */ +static inline unsigned long load_unaligned_zeropad(const void *addr) +{ + unsigned long ret, dummy; + + asm( + "1:\tmov %2,%0\n" + "2:\n" + ".section .fixup,\"ax\"\n" + "3:\t" + "lea %2,%1\n\t" + "and %3,%1\n\t" + "mov (%1),%0\n\t" + "leal %2,%%ecx\n\t" + "andl %4,%%ecx\n\t" + "shll $3,%%ecx\n\t" + "shr %%cl,%0\n\t" + "jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + :"=&r" (ret),"=&c" (dummy) + :"m" (*(unsigned long *)addr), + "i" (-sizeof(unsigned long)), + "i" (sizeof(unsigned long)-1)); + return ret; +} + +#endif /* _ASM_WORD_AT_A_TIME_H */ diff --git a/arch/x86/include/asm/x2apic.h b/arch/x86/include/asm/x2apic.h index 6bf5b8e478c..92e54abf89e 100644 --- a/arch/x86/include/asm/x2apic.h +++ b/arch/x86/include/asm/x2apic.h @@ -18,6 +18,11 @@ static const struct cpumask *x2apic_target_cpus(void) return cpu_online_mask; } +static int x2apic_apic_id_valid(int apicid) +{ + return 1; +} + static int x2apic_apic_id_registered(void) { return 1; diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 3084d634418..764b66a4cf8 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -145,9 +145,11 @@ struct x86_init_ops { /** * struct x86_cpuinit_ops - platform specific cpu hotplug setups * @setup_percpu_clockev: set up the per cpu clock event device + * @early_percpu_clock_init: early init of the per cpu clock event device */ struct x86_cpuinit_ops { void (*setup_percpu_clockev)(void); + void (*early_percpu_clock_init)(void); void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node); }; @@ -160,6 +162,8 @@ struct x86_cpuinit_ops { * @is_untracked_pat_range exclude from PAT logic * @nmi_init enable NMI on cpus * @i8042_detect pre-detect if i8042 controller exists + * @save_sched_clock_state: save state for sched_clock() on suspend + * @restore_sched_clock_state: restore state for sched_clock() on resume */ struct x86_platform_ops { unsigned long (*calibrate_tsc)(void); @@ -171,6 +175,8 @@ struct x86_platform_ops { void (*nmi_init)(void); unsigned char (*get_nmi_reason)(void); int (*i8042_detect)(void); + void (*save_sched_clock_state)(void); + void (*restore_sched_clock_state)(void); }; struct pci_dev; @@ -182,18 +188,10 @@ struct x86_msi_ops { void (*restore_msi_irqs)(struct pci_dev *dev, int irq); }; -struct x86_ioapic_ops { - void (*init)(void); - unsigned int (*read)(unsigned int apic, unsigned int reg); - void (*write)(unsigned int apic, unsigned int reg, unsigned int value); - void (*modify)(unsigned int apic, unsigned int reg, unsigned int value); -}; - extern struct x86_init_ops x86_init; extern struct x86_cpuinit_ops x86_cpuinit; extern struct x86_platform_ops x86_platform; extern struct x86_msi_ops x86_msi; -extern struct x86_ioapic_ops x86_ioapic; extern void x86_init_noop(void); extern void x86_init_uint_noop(unsigned int unused); diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index a1f2db5f117..cbf0c9d50b9 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -56,6 +56,7 @@ DEFINE_GUEST_HANDLE(int); DEFINE_GUEST_HANDLE(long); DEFINE_GUEST_HANDLE(void); DEFINE_GUEST_HANDLE(uint64_t); +DEFINE_GUEST_HANDLE(uint32_t); #endif #ifndef HYPERVISOR_VIRT_START diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5369059c07a..532d2e090e6 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -69,6 +69,7 @@ obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_OPTPROBES) += kprobes-opt.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o obj-$(CONFIG_KGDB) += kgdb.o diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index ce664f33ea8..7c439fe4941 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -239,7 +239,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) * to not preallocating memory for all NR_CPUS * when we use CPU hotplug. */ - if (!cpu_has_x2apic && (apic_id >= 0xff) && enabled) + if (!apic->apic_id_valid(apic_id) && enabled) printk(KERN_WARNING PREFIX "x2apic entry ignored\n"); else acpi_register_lapic(apic_id, enabled); @@ -593,7 +593,7 @@ void __init acpi_set_irq_model_ioapic(void) #ifdef CONFIG_ACPI_HOTPLUG_CPU #include <acpi/processor.h> -static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) +static void __cpuinit acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) { #ifdef CONFIG_ACPI_NUMA int nid; @@ -642,6 +642,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) kfree(buffer.pointer); buffer.length = ACPI_ALLOCATE_BUFFER; buffer.pointer = NULL; + lapic = NULL; if (!alloc_cpumask_var(&tmp_map, GFP_KERNEL)) goto out; @@ -650,7 +651,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) goto free_tmp_map; cpumask_copy(tmp_map, cpu_present_mask); - acpi_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED); + acpi_register_lapic(physid, ACPI_MADT_ENABLED); /* * If mp_register_lapic successfully generates a new logical cpu diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index f50e7fb2a20..d2b7f27781b 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -14,6 +14,7 @@ #include <acpi/processor.h> #include <asm/acpi.h> #include <asm/mwait.h> +#include <asm/special_insns.h> /* * Initialize bm_flags based on the CPU cache properties diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 103b6ab368d..146a49c763a 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -24,6 +24,10 @@ unsigned long acpi_realmode_flags; static char temp_stack[4096]; #endif +asmlinkage void acpi_enter_s3(void) +{ + acpi_enter_sleep_state(3, wake_sleep_flags); +} /** * acpi_suspend_lowlevel - save kernel state * diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h index 416d4be13fe..d68677a2a01 100644 --- a/arch/x86/kernel/acpi/sleep.h +++ b/arch/x86/kernel/acpi/sleep.h @@ -3,12 +3,16 @@ */ #include <asm/trampoline.h> +#include <linux/linkage.h> extern unsigned long saved_video_mode; extern long saved_magic; extern int wakeup_pmode_return; +extern u8 wake_sleep_flags; +extern asmlinkage void acpi_enter_s3(void); + extern unsigned long acpi_copy_wakeup_routine(unsigned long); extern void wakeup_long64(void); diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S index 13ab720573e..72610839f03 100644 --- a/arch/x86/kernel/acpi/wakeup_32.S +++ b/arch/x86/kernel/acpi/wakeup_32.S @@ -74,9 +74,7 @@ restore_registers: ENTRY(do_suspend_lowlevel) call save_processor_state call save_registers - pushl $3 - call acpi_enter_sleep_state - addl $4, %esp + call acpi_enter_s3 # In case of S3 failure, we'll emerge here. Jump # to ret_point to recover diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index 8ea5164cbd0..014d1d28c39 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -71,9 +71,7 @@ ENTRY(do_suspend_lowlevel) movq %rsi, saved_rsi addq $8, %rsp - movl $3, %edi - xorl %eax, %eax - call acpi_enter_sleep_state + call acpi_enter_s3 /* in case something went wrong, restore the machine status and go on */ jmp resume_point diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index b1e7c7f7a0a..e66311200cb 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -477,7 +477,7 @@ error: /* allocate and map a coherent mapping */ static void * gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, - gfp_t flag) + gfp_t flag, struct dma_attrs *attrs) { dma_addr_t paddr; unsigned long align_mask; @@ -500,7 +500,8 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, } __free_pages(page, get_order(size)); } else - return dma_generic_alloc_coherent(dev, size, dma_addr, flag); + return dma_generic_alloc_coherent(dev, size, dma_addr, flag, + attrs); return NULL; } @@ -508,7 +509,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, /* free a coherent mapping */ static void gart_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr) + dma_addr_t dma_addr, struct dma_attrs *attrs) { gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL); free_pages((unsigned long)vaddr, get_order(size)); @@ -700,8 +701,8 @@ static struct dma_map_ops gart_dma_ops = { .unmap_sg = gart_unmap_sg, .map_page = gart_map_page, .unmap_page = gart_unmap_page, - .alloc_coherent = gart_alloc_coherent, - .free_coherent = gart_free_coherent, + .alloc = gart_alloc_coherent, + .free = gart_free_coherent, .mapping_error = gart_mapping_error, }; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 5b3f88e607f..edc24480469 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -383,20 +383,25 @@ static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new) static unsigned int reserve_eilvt_offset(int offset, unsigned int new) { - unsigned int rsvd; /* 0: uninitialized */ + unsigned int rsvd, vector; if (offset >= APIC_EILVT_NR_MAX) return ~0; - rsvd = atomic_read(&eilvt_offsets[offset]) & ~APIC_EILVT_MASKED; + rsvd = atomic_read(&eilvt_offsets[offset]); do { - if (rsvd && - !eilvt_entry_is_changeable(rsvd, new)) + vector = rsvd & ~APIC_EILVT_MASKED; /* 0: unassigned */ + if (vector && !eilvt_entry_is_changeable(vector, new)) /* may not change if vectors are different */ return rsvd; rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new); } while (rsvd != new); + rsvd &= ~APIC_EILVT_MASKED; + if (rsvd && rsvd != vector) + pr_info("LVT offset %d assigned for vector 0x%02x\n", + offset, rsvd); + return new; } diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 8c3cdded6f2..359b6899a36 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -180,6 +180,7 @@ static struct apic apic_flat = { .name = "flat", .probe = flat_probe, .acpi_madt_oem_check = flat_acpi_madt_oem_check, + .apic_id_valid = default_apic_id_valid, .apic_id_registered = flat_apic_id_registered, .irq_delivery_mode = dest_LowestPrio, @@ -337,6 +338,7 @@ static struct apic apic_physflat = { .name = "physical flat", .probe = physflat_probe, .acpi_madt_oem_check = physflat_acpi_madt_oem_check, + .apic_id_valid = default_apic_id_valid, .apic_id_registered = flat_apic_id_registered, .irq_delivery_mode = dest_Fixed, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 775b82bc655..634ae6cdd5c 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -124,6 +124,7 @@ struct apic apic_noop = { .probe = noop_probe, .acpi_madt_oem_check = NULL, + .apic_id_valid = default_apic_id_valid, .apic_id_registered = noop_apic_id_registered, .irq_delivery_mode = dest_LowestPrio, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index ade0182a46e..23e75422e01 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -56,6 +56,12 @@ static unsigned int read_xapic_id(void) return get_apic_id(apic_read(APIC_ID)); } +static int numachip_apic_id_valid(int apicid) +{ + /* Trust what bootloader passes in MADT */ + return 1; +} + static int numachip_apic_id_registered(void) { return physid_isset(read_xapic_id(), phys_cpu_present_map); @@ -241,6 +247,7 @@ static struct apic apic_numachip __refconst = { .name = "NumaConnect system", .probe = numachip_probe, .acpi_madt_oem_check = numachip_acpi_madt_oem_check, + .apic_id_valid = numachip_apic_id_valid, .apic_id_registered = numachip_apic_id_registered, .irq_delivery_mode = dest_Fixed, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 521bead0113..0cdec7065af 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -198,6 +198,7 @@ static struct apic apic_bigsmp = { .name = "bigsmp", .probe = probe_bigsmp, .acpi_madt_oem_check = NULL, + .apic_id_valid = default_apic_id_valid, .apic_id_registered = bigsmp_apic_id_registered, .irq_delivery_mode = dest_Fixed, diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 5d513bc47b6..e42d1d3b913 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -625,6 +625,7 @@ static struct apic __refdata apic_es7000_cluster = { .name = "es7000", .probe = probe_es7000, .acpi_madt_oem_check = es7000_acpi_madt_oem_check_cluster, + .apic_id_valid = default_apic_id_valid, .apic_id_registered = es7000_apic_id_registered, .irq_delivery_mode = dest_LowestPrio, @@ -690,6 +691,7 @@ static struct apic __refdata apic_es7000 = { .name = "es7000", .probe = probe_es7000, .acpi_madt_oem_check = es7000_acpi_madt_oem_check, + .apic_id_valid = default_apic_id_valid, .apic_id_registered = es7000_apic_id_registered, .irq_delivery_mode = dest_Fixed, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 9a15d4b9055..e88300d8e80 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -64,9 +64,28 @@ #include <asm/apic.h> #define __apicdebuginit(type) static type __init + #define for_each_irq_pin(entry, head) \ for (entry = head; entry; entry = entry->next) +static void __init __ioapic_init_mappings(void); + +static unsigned int __io_apic_read (unsigned int apic, unsigned int reg); +static void __io_apic_write (unsigned int apic, unsigned int reg, unsigned int val); +static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val); + +static struct io_apic_ops io_apic_ops = { + .init = __ioapic_init_mappings, + .read = __io_apic_read, + .write = __io_apic_write, + .modify = __io_apic_modify, +}; + +void __init set_io_apic_ops(const struct io_apic_ops *ops) +{ + io_apic_ops = *ops; +} + /* * Is the SiS APIC rmw bug present ? * -1 = don't know, 0 = no, 1 = yes @@ -294,6 +313,22 @@ static void free_irq_at(unsigned int at, struct irq_cfg *cfg) irq_free_desc(at); } +static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) +{ + return io_apic_ops.read(apic, reg); +} + +static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) +{ + io_apic_ops.write(apic, reg, value); +} + +static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) +{ + io_apic_ops.modify(apic, reg, value); +} + + struct io_apic { unsigned int index; unsigned int unused[3]; @@ -314,17 +349,17 @@ static inline void io_apic_eoi(unsigned int apic, unsigned int vector) writel(vector, &io_apic->eoi); } -unsigned int native_ioapic_read(unsigned int apic, unsigned int reg) +static unsigned int __io_apic_read(unsigned int apic, unsigned int reg) { struct io_apic __iomem *io_apic = io_apic_base(apic); writel(reg, &io_apic->index); return readl(&io_apic->data); } -void native_ioapic_write(unsigned int apic, unsigned int reg, - unsigned int value) +static void __io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) { struct io_apic __iomem *io_apic = io_apic_base(apic); + writel(reg, &io_apic->index); writel(value, &io_apic->data); } @@ -335,8 +370,7 @@ void native_ioapic_write(unsigned int apic, unsigned int reg, * * Older SiS APIC requires we rewrite the index register */ -void native_ioapic_modify(unsigned int apic, unsigned int reg, - unsigned int value) +static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) { struct io_apic __iomem *io_apic = io_apic_base(apic); @@ -379,6 +413,7 @@ static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin) eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); + return eu.entry; } @@ -386,9 +421,11 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) { union entry_union eu; unsigned long flags; + raw_spin_lock_irqsave(&ioapic_lock, flags); eu.entry = __ioapic_read_entry(apic, pin); raw_spin_unlock_irqrestore(&ioapic_lock, flags); + return eu.entry; } @@ -398,8 +435,7 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) * the interrupt, and we need to make sure the entry is fully populated * before that happens. */ -static void -__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) +static void __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) { union entry_union eu = {{0, 0}}; @@ -411,6 +447,7 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) { unsigned long flags; + raw_spin_lock_irqsave(&ioapic_lock, flags); __ioapic_write_entry(apic, pin, e); raw_spin_unlock_irqrestore(&ioapic_lock, flags); @@ -437,8 +474,7 @@ static void ioapic_mask_entry(int apic, int pin) * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ -static int -__add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) +static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) { struct irq_pin_list **last, *entry; @@ -523,6 +559,7 @@ static void io_apic_sync(struct irq_pin_list *entry) * a dummy read from the IO-APIC */ struct io_apic __iomem *io_apic; + io_apic = io_apic_base(entry->apic); readl(&io_apic->data); } @@ -2514,21 +2551,73 @@ static void ack_apic_edge(struct irq_data *data) atomic_t irq_mis_count; -static void ack_apic_level(struct irq_data *data) -{ - struct irq_cfg *cfg = data->chip_data; - int i, do_unmask_irq = 0, irq = data->irq; - unsigned long v; - - irq_complete_move(cfg); #ifdef CONFIG_GENERIC_PENDING_IRQ +static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg) +{ /* If we are moving the irq we need to mask it */ if (unlikely(irqd_is_setaffinity_pending(data))) { - do_unmask_irq = 1; mask_ioapic(cfg); + return true; } + return false; +} + +static inline void ioapic_irqd_unmask(struct irq_data *data, + struct irq_cfg *cfg, bool masked) +{ + if (unlikely(masked)) { + /* Only migrate the irq if the ack has been received. + * + * On rare occasions the broadcast level triggered ack gets + * delayed going to ioapics, and if we reprogram the + * vector while Remote IRR is still set the irq will never + * fire again. + * + * To prevent this scenario we read the Remote IRR bit + * of the ioapic. This has two effects. + * - On any sane system the read of the ioapic will + * flush writes (and acks) going to the ioapic from + * this cpu. + * - We get to see if the ACK has actually been delivered. + * + * Based on failed experiments of reprogramming the + * ioapic entry from outside of irq context starting + * with masking the ioapic entry and then polling until + * Remote IRR was clear before reprogramming the + * ioapic I don't trust the Remote IRR bit to be + * completey accurate. + * + * However there appears to be no other way to plug + * this race, so if the Remote IRR bit is not + * accurate and is causing problems then it is a hardware bug + * and you can go talk to the chipset vendor about it. + */ + if (!io_apic_level_ack_pending(cfg)) + irq_move_masked_irq(data); + unmask_ioapic(cfg); + } +} +#else +static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg) +{ + return false; +} +static inline void ioapic_irqd_unmask(struct irq_data *data, + struct irq_cfg *cfg, bool masked) +{ +} #endif +static void ack_apic_level(struct irq_data *data) +{ + struct irq_cfg *cfg = data->chip_data; + int i, irq = data->irq; + unsigned long v; + bool masked; + + irq_complete_move(cfg); + masked = ioapic_irqd_mask(data, cfg); + /* * It appears there is an erratum which affects at least version 0x11 * of I/O APIC (that's the 82093AA and cores integrated into various @@ -2583,38 +2672,7 @@ static void ack_apic_level(struct irq_data *data) eoi_ioapic_irq(irq, cfg); } - /* Now we can move and renable the irq */ - if (unlikely(do_unmask_irq)) { - /* Only migrate the irq if the ack has been received. - * - * On rare occasions the broadcast level triggered ack gets - * delayed going to ioapics, and if we reprogram the - * vector while Remote IRR is still set the irq will never - * fire again. - * - * To prevent this scenario we read the Remote IRR bit - * of the ioapic. This has two effects. - * - On any sane system the read of the ioapic will - * flush writes (and acks) going to the ioapic from - * this cpu. - * - We get to see if the ACK has actually been delivered. - * - * Based on failed experiments of reprogramming the - * ioapic entry from outside of irq context starting - * with masking the ioapic entry and then polling until - * Remote IRR was clear before reprogramming the - * ioapic I don't trust the Remote IRR bit to be - * completey accurate. - * - * However there appears to be no other way to plug - * this race, so if the Remote IRR bit is not - * accurate and is causing problems then it is a hardware bug - * and you can go talk to the chipset vendor about it. - */ - if (!io_apic_level_ack_pending(cfg)) - irq_move_masked_irq(data); - unmask_ioapic(cfg); - } + ioapic_irqd_unmask(data, cfg, masked); } #ifdef CONFIG_IRQ_REMAP @@ -3873,7 +3931,12 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics) return res; } -void __init native_ioapic_init_mappings(void) +void __init ioapic_and_gsi_init(void) +{ + io_apic_ops.init(); +} + +static void __init __ioapic_init_mappings(void) { unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; struct resource *ioapic_res; @@ -3969,18 +4032,36 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi) static __init int bad_ioapic(unsigned long address) { if (nr_ioapics >= MAX_IO_APICS) { - printk(KERN_WARNING "WARNING: Max # of I/O APICs (%d) exceeded " - "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics); + pr_warn("WARNING: Max # of I/O APICs (%d) exceeded (found %d), skipping\n", + MAX_IO_APICS, nr_ioapics); return 1; } if (!address) { - printk(KERN_WARNING "WARNING: Bogus (zero) I/O APIC address" - " found in table, skipping!\n"); + pr_warn("WARNING: Bogus (zero) I/O APIC address found in table, skipping!\n"); return 1; } return 0; } +static __init int bad_ioapic_register(int idx) +{ + union IO_APIC_reg_00 reg_00; + union IO_APIC_reg_01 reg_01; + union IO_APIC_reg_02 reg_02; + + reg_00.raw = io_apic_read(idx, 0); + reg_01.raw = io_apic_read(idx, 1); + reg_02.raw = io_apic_read(idx, 2); + + if (reg_00.raw == -1 && reg_01.raw == -1 && reg_02.raw == -1) { + pr_warn("I/O APIC 0x%x registers return all ones, skipping!\n", + mpc_ioapic_addr(idx)); + return 1; + } + + return 0; +} + void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) { int idx = 0; @@ -3997,6 +4078,12 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) ioapics[idx].mp_config.apicaddr = address; set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); + + if (bad_ioapic_register(idx)) { + clear_fixmap(FIX_IO_APIC_BASE_0 + idx); + return; + } + ioapics[idx].mp_config.apicid = io_apic_unique_id(id); ioapics[idx].mp_config.apicver = io_apic_get_version(idx); @@ -4017,10 +4104,10 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) if (gsi_cfg->gsi_end >= gsi_top) gsi_top = gsi_cfg->gsi_end + 1; - printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " - "GSI %d-%d\n", idx, mpc_ioapic_id(idx), - mpc_ioapic_ver(idx), mpc_ioapic_addr(idx), - gsi_cfg->gsi_base, gsi_cfg->gsi_end); + pr_info("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, GSI %d-%d\n", + idx, mpc_ioapic_id(idx), + mpc_ioapic_ver(idx), mpc_ioapic_addr(idx), + gsi_cfg->gsi_base, gsi_cfg->gsi_end); nr_ioapics++; } diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index c4a61ca1349..00d2422ca7c 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -478,6 +478,7 @@ static struct apic __refdata apic_numaq = { .name = "NUMAQ", .probe = probe_numaq, .acpi_madt_oem_check = NULL, + .apic_id_valid = default_apic_id_valid, .apic_id_registered = numaq_apic_id_registered, .irq_delivery_mode = dest_LowestPrio, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 0787bb3412f..ff2c1b9aac4 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -92,6 +92,7 @@ static struct apic apic_default = { .name = "default", .probe = probe_default, .acpi_madt_oem_check = NULL, + .apic_id_valid = default_apic_id_valid, .apic_id_registered = default_apic_id_registered, .irq_delivery_mode = dest_LowestPrio, diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 19114423c58..fea000b27f0 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -496,6 +496,7 @@ static struct apic apic_summit = { .name = "summit", .probe = probe_summit, .acpi_madt_oem_check = summit_acpi_madt_oem_check, + .apic_id_valid = default_apic_id_valid, .apic_id_registered = summit_apic_id_registered, .irq_delivery_mode = dest_LowestPrio, diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 50079587582..48f3103b3c9 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -213,6 +213,7 @@ static struct apic apic_x2apic_cluster = { .name = "cluster x2apic", .probe = x2apic_cluster_probe, .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, + .apic_id_valid = x2apic_apic_id_valid, .apic_id_registered = x2apic_apic_id_registered, .irq_delivery_mode = dest_LowestPrio, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index f5373dfde21..991e315f422 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -24,6 +24,12 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { if (x2apic_phys) return x2apic_enabled(); + else if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) && + (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) && + x2apic_enabled()) { + printk(KERN_DEBUG "System requires x2apic physical mode\n"); + return 1; + } else return 0; } @@ -119,6 +125,7 @@ static struct apic apic_x2apic_phys = { .name = "physical x2apic", .probe = x2apic_phys_probe, .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, + .apic_id_valid = x2apic_apic_id_valid, .apic_id_registered = x2apic_apic_id_registered, .irq_delivery_mode = dest_Fixed, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 79b05b88aa1..87bfa69e216 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -266,6 +266,11 @@ static void uv_send_IPI_all(int vector) uv_send_IPI_mask(cpu_online_mask, vector); } +static int uv_apic_id_valid(int apicid) +{ + return 1; +} + static int uv_apic_id_registered(void) { return 1; @@ -351,6 +356,7 @@ static struct apic __refdata apic_x2apic_uv_x = { .name = "UV large system", .probe = uv_probe, .acpi_madt_oem_check = uv_acpi_madt_oem_check, + .apic_id_valid = uv_apic_id_valid, .apic_id_registered = uv_apic_id_registered, .irq_delivery_mode = dest_Fixed, diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 219cf2f2078..2245eb5bd06 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -231,7 +231,6 @@ #include <linux/syscore_ops.h> #include <linux/i8253.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <asm/desc.h> #include <asm/olpc.h> @@ -1234,8 +1233,7 @@ static int suspend(int vetoable) struct apm_user *as; dpm_suspend_start(PMSG_SUSPEND); - - dpm_suspend_noirq(PMSG_SUSPEND); + dpm_suspend_end(PMSG_SUSPEND); local_irq_disable(); syscore_suspend(); @@ -1259,9 +1257,9 @@ static int suspend(int vetoable) syscore_resume(); local_irq_enable(); - dpm_resume_noirq(PMSG_RESUME); - + dpm_resume_start(PMSG_RESUME); dpm_resume_end(PMSG_RESUME); + queue_event(APM_NORMAL_RESUME, NULL); spin_lock(&user_list_lock); for (as = user_list; as != NULL; as = as->next) { @@ -1277,7 +1275,7 @@ static void standby(void) { int err; - dpm_suspend_noirq(PMSG_SUSPEND); + dpm_suspend_end(PMSG_SUSPEND); local_irq_disable(); syscore_suspend(); @@ -1291,7 +1289,7 @@ static void standby(void) syscore_resume(); local_irq_enable(); - dpm_resume_noirq(PMSG_RESUME); + dpm_resume_start(PMSG_RESUME); } static apm_event_t get_event(void) diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 834e897b1e2..1b4754f82ba 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -1,6 +1,12 @@ #include <asm/ia32.h> #define __SYSCALL_64(nr, sym, compat) [nr] = 1, +#define __SYSCALL_COMMON(nr, sym, compat) [nr] = 1, +#ifdef CONFIG_X86_X32_ABI +# define __SYSCALL_X32(nr, sym, compat) [nr] = 1, +#else +# define __SYSCALL_X32(nr, sym, compat) /* nothing */ +#endif static char syscalls_64[] = { #include <asm/syscalls_64.h> }; diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 25f24dccdcf..6ab6aa2fdfd 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -16,6 +16,7 @@ obj-y := intel_cacheinfo.o scattered.o topology.o obj-y += proc.o capflags.o powerflags.o common.o obj-y += vmware.o hypervisor.o sched.o mshyperv.o obj-y += rdrand.o +obj-y += match.o obj-$(CONFIG_X86_32) += bugs.o obj-$(CONFIG_X86_64) += bugs_64.o diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 80ab83dc5f6..146bb6218ee 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -5,6 +5,7 @@ #include <linux/mm.h> #include <linux/io.h> +#include <linux/sched.h> #include <asm/processor.h> #include <asm/apic.h> #include <asm/cpu.h> @@ -25,7 +26,8 @@ * contact AMD for precise details and a CPU swap. * * See http://www.multimania.com/poulot/k6bug.html - * http://www.amd.com/K6/k6docs/revgd.html + * and section 2.6.2 of "AMD-K6 Processor Revision Guide - Model 6" + * (Publication # 21266 Issue Date: August 1998) * * The following test is erm.. interesting. AMD neglected to up * the chip setting when fixing the bug but they also tweaked some @@ -93,7 +95,6 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) "system stability may be impaired when more than 32 MB are used.\n"); else printk(KERN_CONT "probably OK (after B9730xxxx).\n"); - printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); } /* K6 with old style WHCR */ @@ -457,6 +458,8 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); + if (!check_tsc_unstable()) + sched_clock_stable = 1; } #ifdef CONFIG_X86_64 @@ -577,6 +580,24 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) } } + /* re-enable TopologyExtensions if switched off by BIOS */ + if ((c->x86 == 0x15) && + (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) && + !cpu_has(c, X86_FEATURE_TOPOEXT)) { + u64 val; + + if (!rdmsrl_amd_safe(0xc0011005, &val)) { + val |= 1ULL << 54; + wrmsrl_amd_safe(0xc0011005, val); + rdmsrl(0xc0011005, val); + if (val & (1ULL << 54)) { + set_cpu_cap(c, X86_FEATURE_TOPOEXT); + printk(KERN_INFO FW_INFO "CPU: Re-enabling " + "disabled Topology Extensions Support\n"); + } + } + } + cpu_detect_cache_sizes(c); /* Multi core CPU? */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 16bd46646c8..cf79302198a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -18,6 +18,7 @@ #include <asm/archrandom.h> #include <asm/hypervisor.h> #include <asm/processor.h> +#include <asm/debugreg.h> #include <asm/sections.h> #include <linux/topology.h> #include <linux/cpumask.h> @@ -28,6 +29,7 @@ #include <asm/apic.h> #include <asm/desc.h> #include <asm/i387.h> +#include <asm/fpu-internal.h> #include <asm/mtrr.h> #include <linux/numa.h> #include <asm/asm.h> @@ -839,22 +841,6 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) /* Filter out anything that depends on CPUID levels we don't have */ filter_cpuid_features(c, true); -#ifdef CONFIG_X86_32 - /* - * emulation of NX with segment limits unfortunately means - * we have to disable the fast system calls, due to the way that - * sysexit clears the segment limits on return. - * If we have either disabled exec-shield on the boot command line, - * or we have NX, then we don't need to do this. - */ - if (!disable_nx) { -#ifdef CONFIG_X86_PAE - if (!test_cpu_cap(c, X86_FEATURE_NX)) -#endif - clear_cpu_cap(c, X86_FEATURE_SEP); - } -#endif - /* If the model name is still unset, do table lookup. */ if (!c->x86_model_id[0]) { const char *p; @@ -949,7 +935,7 @@ static const struct msr_range msr_range_array[] __cpuinitconst = { { 0xc0011000, 0xc001103b}, }; -static void __cpuinit print_cpu_msr(void) +static void __cpuinit __print_cpu_msr(void) { unsigned index_min, index_max; unsigned index; @@ -1013,13 +999,13 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) else printk(KERN_CONT "\n"); -#ifdef CONFIG_SMP + print_cpu_msr(c); +} + +void __cpuinit print_cpu_msr(struct cpuinfo_x86 *c) +{ if (c->cpu_index < show_msr) - print_cpu_msr(); -#else - if (show_msr) - print_cpu_msr(); -#endif + __print_cpu_msr(); } static __init int setup_disablecpuid(char *arg) @@ -1061,7 +1047,6 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) = DEFINE_PER_CPU(unsigned int, irq_count) = -1; DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); -EXPORT_PER_CPU_SYMBOL(fpu_owner_task); /* * Special IST stacks which the CPU switches to when it calls @@ -1131,7 +1116,6 @@ void debug_stack_reset(void) DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); -EXPORT_PER_CPU_SYMBOL(fpu_owner_task); #ifdef CONFIG_CC_STACKPROTECTOR DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 73d08ed98a6..b8f3653dddb 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -433,14 +433,14 @@ int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, unsigned slot, /* check if @slot is already used or the index is already disabled */ ret = amd_get_l3_disable_slot(nb, slot); if (ret >= 0) - return -EINVAL; + return -EEXIST; if (index > nb->l3_cache.indices) return -EINVAL; /* check whether the other slot has disabled the same index already */ if (index == amd_get_l3_disable_slot(nb, !slot)) - return -EINVAL; + return -EEXIST; amd_l3_disable_index(nb, cpu, slot, index); @@ -468,8 +468,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val); if (err) { if (err == -EEXIST) - printk(KERN_WARNING "L3 disable slot %d in use!\n", - slot); + pr_warning("L3 slot %d in use/index already disabled!\n", + slot); return err; } return count; diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c new file mode 100644 index 00000000000..5502b289341 --- /dev/null +++ b/arch/x86/kernel/cpu/match.c @@ -0,0 +1,91 @@ +#include <asm/cpu_device_id.h> +#include <asm/processor.h> +#include <linux/cpu.h> +#include <linux/module.h> +#include <linux/slab.h> + +/** + * x86_match_cpu - match current CPU again an array of x86_cpu_ids + * @match: Pointer to array of x86_cpu_ids. Last entry terminated with + * {}. + * + * Return the entry if the current CPU matches the entries in the + * passed x86_cpu_id match table. Otherwise NULL. The match table + * contains vendor (X86_VENDOR_*), family, model and feature bits or + * respective wildcard entries. + * + * A typical table entry would be to match a specific CPU + * { X86_VENDOR_INTEL, 6, 0x12 } + * or to match a specific CPU feature + * { X86_FEATURE_MATCH(X86_FEATURE_FOOBAR) } + * + * Fields can be wildcarded with %X86_VENDOR_ANY, %X86_FAMILY_ANY, + * %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor) + * + * Arrays used to match for this should also be declared using + * MODULE_DEVICE_TABLE(x86_cpu, ...) + * + * This always matches against the boot cpu, assuming models and features are + * consistent over all CPUs. + */ +const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match) +{ + const struct x86_cpu_id *m; + struct cpuinfo_x86 *c = &boot_cpu_data; + + for (m = match; m->vendor | m->family | m->model | m->feature; m++) { + if (m->vendor != X86_VENDOR_ANY && c->x86_vendor != m->vendor) + continue; + if (m->family != X86_FAMILY_ANY && c->x86 != m->family) + continue; + if (m->model != X86_MODEL_ANY && c->x86_model != m->model) + continue; + if (m->feature != X86_FEATURE_ANY && !cpu_has(c, m->feature)) + continue; + return m; + } + return NULL; +} +EXPORT_SYMBOL(x86_match_cpu); + +ssize_t arch_print_cpu_modalias(struct device *dev, + struct device_attribute *attr, + char *bufptr) +{ + int size = PAGE_SIZE; + int i, n; + char *buf = bufptr; + + n = snprintf(buf, size, "x86cpu:vendor:%04X:family:%04X:" + "model:%04X:feature:", + boot_cpu_data.x86_vendor, + boot_cpu_data.x86, + boot_cpu_data.x86_model); + size -= n; + buf += n; + size -= 1; + for (i = 0; i < NCAPINTS*32; i++) { + if (boot_cpu_has(i)) { + n = snprintf(buf, size, ",%04X", i); + if (n >= size) { + WARN(1, "x86 features overflow page\n"); + break; + } + size -= n; + buf += n; + } + } + *buf++ = '\n'; + return buf - bufptr; +} + +int arch_cpu_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (buf) { + arch_print_cpu_modalias(NULL, NULL, buf); + add_uevent_var(env, "MODALIAS=%s", buf); + kfree(buf); + } + return 0; +} diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 7395d5f4272..1ccd453903d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -54,7 +54,14 @@ static struct severity { #define MASK(x, y) .mask = x, .result = y #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) +#define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV) #define MCACOD 0xffff +/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */ +#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */ +#define MCACOD_SCRUBMSK 0xfff0 +#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */ +#define MCACOD_DATA 0x0134 /* Data Load */ +#define MCACOD_INSTR 0x0150 /* Instruction Fetch */ MCESEV( NO, "Invalid", @@ -102,11 +109,24 @@ static struct severity { SER, BITCLR(MCI_STATUS_S) ), - /* AR add known MCACODs here */ MCESEV( PANIC, "Action required with lost events", SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR) ), + + /* known AR MCACODs: */ +#ifdef CONFIG_MEMORY_FAILURE + MCESEV( + KEEP, "HT thread notices Action required: data load error", + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), + MCGMASK(MCG_STATUS_EIPV, 0) + ), + MCESEV( + AR, "Action required: data load error", + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), + USER + ), +#endif MCESEV( PANIC, "Action required: unknown MCACOD", SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR) @@ -115,11 +135,11 @@ static struct severity { /* known AO MCACODs: */ MCESEV( AO, "Action optional: memory scrubbing error", - SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|0xfff0, MCI_UC_S|0x00c0) + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD_SCRUBMSK, MCI_UC_S|MCACOD_SCRUB) ), MCESEV( AO, "Action optional: last level cache writeback error", - SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|0x017a) + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|MCACOD_L3WB) ), MCESEV( SOME, "Action optional: unknown MCACOD", @@ -145,15 +165,19 @@ static struct severity { }; /* - * If the EIPV bit is set, it means the saved IP is the - * instruction which caused the MCE. + * If mcgstatus indicated that ip/cs on the stack were + * no good, then "m->cs" will be zero and we will have + * to assume the worst case (IN_KERNEL) as we actually + * have no idea what we were executing when the machine + * check hit. + * If we do have a good "m->cs" (or a faked one in the + * case we were executing in VM86 mode) we can use it to + * distinguish an exception taken in user from from one + * taken in the kernel. */ static int error_context(struct mce *m) { - if (m->mcgstatus & MCG_STATUS_EIPV) - return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL; - /* Unknown, assume kernel */ - return IN_KERNEL; + return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL; } int mce_severity(struct mce *m, int tolerant, char **msg) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index db590aff874..61604aefc40 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -437,6 +437,14 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs) if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) { m->ip = regs->ip; m->cs = regs->cs; + + /* + * When in VM86 mode make the cs look like ring 3 + * always. This is a lie, but it's better than passing + * the additional vm86 bit around everywhere. + */ + if (v8086_mode(regs)) + m->cs |= 3; } /* Use accurate RIP reporting if available. */ if (rip_msr) @@ -540,6 +548,27 @@ static void mce_report_event(struct pt_regs *regs) irq_work_queue(&__get_cpu_var(mce_irq_work)); } +/* + * Read ADDR and MISC registers. + */ +static void mce_read_aux(struct mce *m, int i) +{ + if (m->status & MCI_STATUS_MISCV) + m->misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i)); + if (m->status & MCI_STATUS_ADDRV) { + m->addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i)); + + /* + * Mask the reported address by the reported granularity. + */ + if (mce_ser && (m->status & MCI_STATUS_MISCV)) { + u8 shift = MCI_MISC_ADDR_LSB(m->misc); + m->addr >>= shift; + m->addr <<= shift; + } + } +} + DEFINE_PER_CPU(unsigned, mce_poll_count); /* @@ -590,10 +619,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC))) continue; - if (m.status & MCI_STATUS_MISCV) - m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i)); - if (m.status & MCI_STATUS_ADDRV) - m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i)); + mce_read_aux(&m, i); if (!(flags & MCP_TIMESTAMP)) m.tsc = 0; @@ -917,6 +943,51 @@ static void mce_clear_state(unsigned long *toclear) } /* + * Need to save faulting physical address associated with a process + * in the machine check handler some place where we can grab it back + * later in mce_notify_process() + */ +#define MCE_INFO_MAX 16 + +struct mce_info { + atomic_t inuse; + struct task_struct *t; + __u64 paddr; + int restartable; +} mce_info[MCE_INFO_MAX]; + +static void mce_save_info(__u64 addr, int c) +{ + struct mce_info *mi; + + for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) { + if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) { + mi->t = current; + mi->paddr = addr; + mi->restartable = c; + return; + } + } + + mce_panic("Too many concurrent recoverable errors", NULL, NULL); +} + +static struct mce_info *mce_find_info(void) +{ + struct mce_info *mi; + + for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) + if (atomic_read(&mi->inuse) && mi->t == current) + return mi; + return NULL; +} + +static void mce_clear_info(struct mce_info *mi) +{ + atomic_set(&mi->inuse, 0); +} + +/* * The actual machine check handler. This only handles real * exceptions when something got corrupted coming in through int 18. * @@ -969,7 +1040,9 @@ void do_machine_check(struct pt_regs *regs, long error_code) barrier(); /* - * When no restart IP must always kill or panic. + * When no restart IP might need to kill or panic. + * Assume the worst for now, but if we find the + * severity is MCE_AR_SEVERITY we have other options. */ if (!(m.mcgstatus & MCG_STATUS_RIPV)) kill_it = 1; @@ -1023,16 +1096,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) continue; } - /* - * Kill on action required. - */ - if (severity == MCE_AR_SEVERITY) - kill_it = 1; - - if (m.status & MCI_STATUS_MISCV) - m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i)); - if (m.status & MCI_STATUS_ADDRV) - m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i)); + mce_read_aux(&m, i); /* * Action optional error. Queue address for later processing. @@ -1052,6 +1116,9 @@ void do_machine_check(struct pt_regs *regs, long error_code) } } + /* mce_clear_state will clear *final, save locally for use later */ + m = *final; + if (!no_way_out) mce_clear_state(toclear); @@ -1063,27 +1130,22 @@ void do_machine_check(struct pt_regs *regs, long error_code) no_way_out = worst >= MCE_PANIC_SEVERITY; /* - * If we have decided that we just CAN'T continue, and the user - * has not set tolerant to an insane level, give up and die. - * - * This is mainly used in the case when the system doesn't - * support MCE broadcasting or it has been disabled. - */ - if (no_way_out && tolerant < 3) - mce_panic("Fatal machine check on current CPU", final, msg); - - /* - * If the error seems to be unrecoverable, something should be - * done. Try to kill as little as possible. If we can kill just - * one task, do that. If the user has set the tolerance very - * high, don't try to do anything at all. + * At insane "tolerant" levels we take no action. Otherwise + * we only die if we have no other choice. For less serious + * issues we try to recover, or limit damage to the current + * process. */ - - if (kill_it && tolerant < 3) - force_sig(SIGBUS, current); - - /* notify userspace ASAP */ - set_thread_flag(TIF_MCE_NOTIFY); + if (tolerant < 3) { + if (no_way_out) + mce_panic("Fatal machine check on current CPU", &m, msg); + if (worst == MCE_AR_SEVERITY) { + /* schedule action before return to userland */ + mce_save_info(m.addr, m.mcgstatus & MCG_STATUS_RIPV); + set_thread_flag(TIF_MCE_NOTIFY); + } else if (kill_it) { + force_sig(SIGBUS, current); + } + } if (worst > 0) mce_report_event(regs); @@ -1094,34 +1156,63 @@ out: } EXPORT_SYMBOL_GPL(do_machine_check); -/* dummy to break dependency. actual code is in mm/memory-failure.c */ -void __attribute__((weak)) memory_failure(unsigned long pfn, int vector) +#ifndef CONFIG_MEMORY_FAILURE +int memory_failure(unsigned long pfn, int vector, int flags) { - printk(KERN_ERR "Action optional memory failure at %lx ignored\n", pfn); + /* mce_severity() should not hand us an ACTION_REQUIRED error */ + BUG_ON(flags & MF_ACTION_REQUIRED); + printk(KERN_ERR "Uncorrected memory error in page 0x%lx ignored\n" + "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", pfn); + + return 0; } +#endif /* - * Called after mce notification in process context. This code - * is allowed to sleep. Call the high level VM handler to process - * any corrupted pages. - * Assume that the work queue code only calls this one at a time - * per CPU. - * Note we don't disable preemption, so this code might run on the wrong - * CPU. In this case the event is picked up by the scheduled work queue. - * This is merely a fast path to expedite processing in some common - * cases. + * Called in process context that interrupted by MCE and marked with + * TIF_MCE_NOTIFY, just before returning to erroneous userland. + * This code is allowed to sleep. + * Attempt possible recovery such as calling the high level VM handler to + * process any corrupted pages, and kill/signal current process if required. + * Action required errors are handled here. */ void mce_notify_process(void) { unsigned long pfn; - mce_notify_irq(); - while (mce_ring_get(&pfn)) - memory_failure(pfn, MCE_VECTOR); + struct mce_info *mi = mce_find_info(); + + if (!mi) + mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL); + pfn = mi->paddr >> PAGE_SHIFT; + + clear_thread_flag(TIF_MCE_NOTIFY); + + pr_err("Uncorrected hardware memory error in user-access at %llx", + mi->paddr); + /* + * We must call memory_failure() here even if the current process is + * doomed. We still need to mark the page as poisoned and alert any + * other users of the page. + */ + if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0 || + mi->restartable == 0) { + pr_err("Memory error not recovered"); + force_sig(SIGBUS, current); + } + mce_clear_info(mi); } +/* + * Action optional processing happens here (picking up + * from the list of faulting pages that do_machine_check() + * placed into the "ring"). + */ static void mce_process_work(struct work_struct *dummy) { - mce_notify_process(); + unsigned long pfn; + + while (mce_ring_get(&pfn)) + memory_failure(pfn, MCE_VECTOR, 0); } #ifdef CONFIG_X86_MCE_INTEL @@ -1211,8 +1302,6 @@ int mce_notify_irq(void) /* Not more than two messages every minute */ static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); - clear_thread_flag(TIF_MCE_NOTIFY); - if (test_and_clear_bit(0, &mce_need_notify)) { /* wake processes polling /dev/mcelog */ wake_up_interruptible(&mce_chrdev_wait); @@ -1541,6 +1630,12 @@ static int __mce_read_apei(char __user **ubuf, size_t usize) /* Error or no more MCE record */ if (rc <= 0) { mce_apei_read_done = 1; + /* + * When ERST is disabled, mce_chrdev_read() should return + * "no record" instead of "no device." + */ + if (rc == -ENODEV) + return 0; return rc; } rc = -EFAULT; @@ -1859,7 +1954,7 @@ static struct bus_type mce_subsys = { .dev_name = "machinecheck", }; -struct device *mce_device[CONFIG_NR_CPUS]; +DEFINE_PER_CPU(struct device *, mce_device); __cpuinitdata void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); @@ -2038,7 +2133,7 @@ static __cpuinit int mce_device_create(unsigned int cpu) goto error2; } cpumask_set_cpu(cpu, mce_device_initialized); - mce_device[cpu] = dev; + per_cpu(mce_device, cpu) = dev; return 0; error2: @@ -2055,7 +2150,7 @@ error: static __cpuinit void mce_device_remove(unsigned int cpu) { - struct device *dev = mce_device[cpu]; + struct device *dev = per_cpu(mce_device, cpu); int i; if (!cpumask_test_cpu(cpu, mce_device_initialized)) @@ -2069,7 +2164,7 @@ static __cpuinit void mce_device_remove(unsigned int cpu) device_unregister(dev); cpumask_clear_cpu(cpu, mce_device_initialized); - mce_device[cpu] = NULL; + per_cpu(mce_device, cpu) = NULL; } /* Make sure there are no machine checks on offlined CPUs. */ diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index e4eeaaf58a4..99b57179f91 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -523,7 +523,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) { int i, err = 0; struct threshold_bank *b = NULL; - struct device *dev = mce_device[cpu]; + struct device *dev = per_cpu(mce_device, cpu); char name[32]; sprintf(name, "threshold_bank%i", bank); @@ -587,7 +587,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (i == cpu) continue; - dev = mce_device[i]; + dev = per_cpu(mce_device, i); if (dev) err = sysfs_create_link(&dev->kobj,b->kobj, name); if (err) @@ -667,7 +667,8 @@ static void threshold_remove_bank(unsigned int cpu, int bank) #ifdef CONFIG_SMP /* sibling symlink */ if (shared_bank[bank] && b->blocks->cpu != cpu) { - sysfs_remove_link(&mce_device[cpu]->kobj, name); + dev = per_cpu(mce_device, cpu); + sysfs_remove_link(&dev->kobj, name); per_cpu(threshold_banks, cpu)[bank] = NULL; return; @@ -679,7 +680,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) if (i == cpu) continue; - dev = mce_device[i]; + dev = per_cpu(mce_device, i); if (dev) sysfs_remove_link(&dev->kobj, name); per_cpu(threshold_banks, i)[bank] = NULL; diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 5c0e6533d9b..2d5454cd2c4 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c @@ -9,7 +9,6 @@ #include <linux/smp.h> #include <asm/processor.h> -#include <asm/system.h> #include <asm/mce.h> #include <asm/msr.h> diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 67bb17a37a0..47a1870279a 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -25,7 +25,6 @@ #include <linux/cpu.h> #include <asm/processor.h> -#include <asm/system.h> #include <asm/apic.h> #include <asm/idle.h> #include <asm/mce.h> diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 54060f56597..2d7998fb628 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c @@ -8,7 +8,6 @@ #include <linux/init.h> #include <asm/processor.h> -#include <asm/system.h> #include <asm/mce.h> #include <asm/msr.h> diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 97b26356e9e..75772ae6c65 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -12,7 +12,6 @@ #include <asm/processor-flags.h> #include <asm/cpufeature.h> #include <asm/tlbflush.h> -#include <asm/system.h> #include <asm/mtrr.h> #include <asm/msr.h> #include <asm/pat.h> diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 79289632cb2..a041e094b8b 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -167,6 +167,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) { int err = 0; mtrr_type type; + unsigned long base; unsigned long size; struct mtrr_sentry sentry; struct mtrr_gentry gentry; @@ -267,14 +268,14 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) #endif if (gentry.regnum >= num_var_ranges) return -EINVAL; - mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); + mtrr_if->get(gentry.regnum, &base, &size, &type); /* Hide entries that go above 4GB */ - if (gentry.base + size - 1 >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT)) + if (base + size - 1 >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT)) || size >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT))) gentry.base = gentry.size = gentry.type = 0; else { - gentry.base <<= PAGE_SHIFT; + gentry.base = base << PAGE_SHIFT; gentry.size = size << PAGE_SHIFT; gentry.type = type; } @@ -321,11 +322,12 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) #endif if (gentry.regnum >= num_var_ranges) return -EINVAL; - mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); + mtrr_if->get(gentry.regnum, &base, &size, &type); /* Hide entries that would overflow */ if (size != (__typeof__(gentry.size))size) gentry.base = gentry.size = gentry.type = 0; else { + gentry.base = base; gentry.size = size; gentry.type = type; } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5adce1040b1..bb8e03407e1 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -24,13 +24,14 @@ #include <linux/slab.h> #include <linux/cpu.h> #include <linux/bitops.h> +#include <linux/device.h> #include <asm/apic.h> #include <asm/stacktrace.h> #include <asm/nmi.h> -#include <asm/compat.h> #include <asm/smp.h> #include <asm/alternative.h> +#include <asm/timer.h> #include "perf_event.h" @@ -351,6 +352,36 @@ int x86_setup_perfctr(struct perf_event *event) return 0; } +/* + * check that branch_sample_type is compatible with + * settings needed for precise_ip > 1 which implies + * using the LBR to capture ALL taken branches at the + * priv levels of the measurement + */ +static inline int precise_br_compat(struct perf_event *event) +{ + u64 m = event->attr.branch_sample_type; + u64 b = 0; + + /* must capture all branches */ + if (!(m & PERF_SAMPLE_BRANCH_ANY)) + return 0; + + m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER; + + if (!event->attr.exclude_user) + b |= PERF_SAMPLE_BRANCH_USER; + + if (!event->attr.exclude_kernel) + b |= PERF_SAMPLE_BRANCH_KERNEL; + + /* + * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86 + */ + + return m == b; +} + int x86_pmu_hw_config(struct perf_event *event) { if (event->attr.precise_ip) { @@ -367,6 +398,36 @@ int x86_pmu_hw_config(struct perf_event *event) if (event->attr.precise_ip > precise) return -EOPNOTSUPP; + /* + * check that PEBS LBR correction does not conflict with + * whatever the user is asking with attr->branch_sample_type + */ + if (event->attr.precise_ip > 1) { + u64 *br_type = &event->attr.branch_sample_type; + + if (has_branch_stack(event)) { + if (!precise_br_compat(event)) + return -EOPNOTSUPP; + + /* branch_sample_type is compatible */ + + } else { + /* + * user did not specify branch_sample_type + * + * For PEBS fixups, we capture all + * the branches at the priv level of the + * event. + */ + *br_type = PERF_SAMPLE_BRANCH_ANY; + + if (!event->attr.exclude_user) + *br_type |= PERF_SAMPLE_BRANCH_USER; + + if (!event->attr.exclude_kernel) + *br_type |= PERF_SAMPLE_BRANCH_KERNEL; + } + } } /* @@ -424,6 +485,10 @@ static int __x86_pmu_event_init(struct perf_event *event) /* mark unused */ event->hw.extra_reg.idx = EXTRA_REG_NONE; + /* mark not used */ + event->hw.extra_reg.idx = EXTRA_REG_NONE; + event->hw.branch_reg.idx = EXTRA_REG_NONE; + return x86_pmu.hw_config(event); } @@ -577,14 +642,14 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) /* Prefer fixed purpose counters */ if (x86_pmu.num_counters_fixed) { idx = X86_PMC_IDX_FIXED; - for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) { + for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) { if (!__test_and_set_bit(idx, sched->state.used)) goto done; } } /* Grab the first unused counter starting with idx */ idx = sched->state.counter; - for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) { + for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_FIXED) { if (!__test_and_set_bit(idx, sched->state.used)) goto done; } @@ -1210,6 +1275,8 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) break; case CPU_STARTING: + if (x86_pmu.attr_rdpmc) + set_in_cr4(X86_CR4_PCE); if (x86_pmu.cpu_starting) x86_pmu.cpu_starting(cpu); break; @@ -1246,6 +1313,11 @@ static void __init pmu_check_apic(void) pr_info("no hardware sampling interrupt available.\n"); } +static struct attribute_group x86_pmu_format_group = { + .name = "format", + .attrs = NULL, +}; + static int __init init_hw_perf_events(void) { struct x86_pmu_quirk *quirk; @@ -1319,6 +1391,9 @@ static int __init init_hw_perf_events(void) } } + x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ + x86_pmu_format_group.attrs = x86_pmu.format_attrs; + pr_info("... version: %d\n", x86_pmu.version); pr_info("... bit width: %d\n", x86_pmu.cntval_bits); pr_info("... generic registers: %d\n", x86_pmu.num_counters); @@ -1542,23 +1617,115 @@ static int x86_pmu_event_init(struct perf_event *event) return err; } +static int x86_pmu_event_idx(struct perf_event *event) +{ + int idx = event->hw.idx; + + if (!x86_pmu.attr_rdpmc) + return 0; + + if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) { + idx -= X86_PMC_IDX_FIXED; + idx |= 1 << 30; + } + + return idx + 1; +} + +static ssize_t get_attr_rdpmc(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc); +} + +static void change_rdpmc(void *info) +{ + bool enable = !!(unsigned long)info; + + if (enable) + set_in_cr4(X86_CR4_PCE); + else + clear_in_cr4(X86_CR4_PCE); +} + +static ssize_t set_attr_rdpmc(struct device *cdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned long val = simple_strtoul(buf, NULL, 0); + + if (!!val != !!x86_pmu.attr_rdpmc) { + x86_pmu.attr_rdpmc = !!val; + smp_call_function(change_rdpmc, (void *)val, 1); + } + + return count; +} + +static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc); + +static struct attribute *x86_pmu_attrs[] = { + &dev_attr_rdpmc.attr, + NULL, +}; + +static struct attribute_group x86_pmu_attr_group = { + .attrs = x86_pmu_attrs, +}; + +static const struct attribute_group *x86_pmu_attr_groups[] = { + &x86_pmu_attr_group, + &x86_pmu_format_group, + NULL, +}; + +static void x86_pmu_flush_branch_stack(void) +{ + if (x86_pmu.flush_branch_stack) + x86_pmu.flush_branch_stack(); +} + static struct pmu pmu = { - .pmu_enable = x86_pmu_enable, - .pmu_disable = x86_pmu_disable, + .pmu_enable = x86_pmu_enable, + .pmu_disable = x86_pmu_disable, + + .attr_groups = x86_pmu_attr_groups, .event_init = x86_pmu_event_init, - .add = x86_pmu_add, - .del = x86_pmu_del, - .start = x86_pmu_start, - .stop = x86_pmu_stop, - .read = x86_pmu_read, + .add = x86_pmu_add, + .del = x86_pmu_del, + .start = x86_pmu_start, + .stop = x86_pmu_stop, + .read = x86_pmu_read, .start_txn = x86_pmu_start_txn, .cancel_txn = x86_pmu_cancel_txn, .commit_txn = x86_pmu_commit_txn, + + .event_idx = x86_pmu_event_idx, + .flush_branch_stack = x86_pmu_flush_branch_stack, }; +void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) +{ + userpg->cap_usr_time = 0; + userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc; + userpg->pmc_width = x86_pmu.cntval_bits; + + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) + return; + + if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) + return; + + userpg->cap_usr_time = 1; + userpg->time_mult = this_cpu_read(cyc2ns); + userpg->time_shift = CYC2NS_SCALE_FACTOR; + userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; +} + /* * callchain support */ @@ -1595,6 +1762,9 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) } #ifdef CONFIG_COMPAT + +#include <asm/compat.h> + static inline int perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) { diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index c30c807ddc7..6638aaf5449 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -33,6 +33,7 @@ enum extra_reg_type { EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ + EXTRA_REG_LBR = 2, /* lbr_select */ EXTRA_REG_MAX /* number of entries needed */ }; @@ -130,6 +131,8 @@ struct cpu_hw_events { void *lbr_context; struct perf_branch_stack lbr_stack; struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; + struct er_account *lbr_sel; + u64 br_sel; /* * Intel host/guest exclude bits @@ -268,6 +271,29 @@ struct x86_pmu_quirk { void (*func)(void); }; +union x86_pmu_config { + struct { + u64 event:8, + umask:8, + usr:1, + os:1, + edge:1, + pc:1, + interrupt:1, + __reserved1:1, + en:1, + inv:1, + cmask:8, + event2:4, + __reserved2:4, + go:1, + ho:1; + } bits; + u64 value; +}; + +#define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value + /* * struct x86_pmu - generic x86 pmu */ @@ -309,10 +335,20 @@ struct x86_pmu { struct x86_pmu_quirk *quirks; int perfctr_second_write; + /* + * sysfs attrs + */ + int attr_rdpmc; + struct attribute **format_attrs; + + /* + * CPU Hotplug hooks + */ int (*cpu_prepare)(int cpu); void (*cpu_starting)(int cpu); void (*cpu_dying)(int cpu); void (*cpu_dead)(int cpu); + void (*flush_branch_stack)(void); /* * Intel Arch Perfmon v2+ @@ -334,6 +370,8 @@ struct x86_pmu { */ unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ int lbr_nr; /* hardware stack size */ + u64 lbr_sel_mask; /* LBR_SELECT valid bits */ + const int *lbr_sel_map; /* lbr_select mappings */ /* * Extra registers for events @@ -447,6 +485,15 @@ extern struct event_constraint emptyconstraint; extern struct event_constraint unconstrained; +static inline bool kernel_ip(unsigned long ip) +{ +#ifdef CONFIG_X86_32 + return ip > PAGE_OFFSET; +#else + return (long)ip < 0; +#endif +} + #ifdef CONFIG_CPU_SUP_AMD int amd_pmu_init(void); @@ -527,6 +574,10 @@ void intel_pmu_lbr_init_nhm(void); void intel_pmu_lbr_init_atom(void); +void intel_pmu_lbr_init_snb(void); + +int intel_pmu_setup_lbr_filter(struct perf_event *event); + int p4_pmu_init(void); int p6_pmu_init(void); diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 67250a52430..9edc786aef8 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -139,6 +139,9 @@ static int amd_pmu_hw_config(struct perf_event *event) if (ret) return ret; + if (has_branch_stack(event)) + return -EOPNOTSUPP; + if (event->attr.exclude_host && event->attr.exclude_guest) /* * When HO == GO == 1 the hardware treats that as GO == HO == 0 @@ -401,6 +404,21 @@ static void amd_pmu_cpu_dead(int cpu) } } +PMU_FORMAT_ATTR(event, "config:0-7,32-35"); +PMU_FORMAT_ATTR(umask, "config:8-15" ); +PMU_FORMAT_ATTR(edge, "config:18" ); +PMU_FORMAT_ATTR(inv, "config:23" ); +PMU_FORMAT_ATTR(cmask, "config:24-31" ); + +static struct attribute *amd_format_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_cmask.attr, + NULL, +}; + static __initconst const struct x86_pmu amd_pmu = { .name = "AMD", .handle_irq = x86_pmu_handle_irq, @@ -423,6 +441,8 @@ static __initconst const struct x86_pmu amd_pmu = { .get_event_constraints = amd_get_event_constraints, .put_event_constraints = amd_put_event_constraints, + .format_attrs = amd_format_attr, + .cpu_prepare = amd_pmu_cpu_prepare, .cpu_starting = amd_pmu_cpu_starting, .cpu_dead = amd_pmu_cpu_dead, @@ -473,6 +493,7 @@ static __initconst const struct x86_pmu amd_pmu = { * 0x023 DE PERF_CTL[2:0] * 0x02D LS PERF_CTL[3] * 0x02E LS PERF_CTL[3,0] + * 0x031 LS PERF_CTL[2:0] (**) * 0x043 CU PERF_CTL[2:0] * 0x045 CU PERF_CTL[2:0] * 0x046 CU PERF_CTL[2:0] @@ -486,10 +507,12 @@ static __initconst const struct x86_pmu amd_pmu = { * 0x0DD LS PERF_CTL[5:0] * 0x0DE LS PERF_CTL[5:0] * 0x0DF LS PERF_CTL[5:0] + * 0x1C0 EX PERF_CTL[5:3] * 0x1D6 EX PERF_CTL[5:0] * 0x1D8 EX PERF_CTL[5:0] * - * (*) depending on the umask all FPU counters may be used + * (*) depending on the umask all FPU counters may be used + * (**) only one unitmask enabled at a time */ static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); @@ -539,6 +562,12 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev return &amd_f15_PMC3; case 0x02E: return &amd_f15_PMC30; + case 0x031: + if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1) + return &amd_f15_PMC20; + return &emptyconstraint; + case 0x1C0: + return &amd_f15_PMC53; default: return &amd_f15_PMC50; } @@ -593,6 +622,7 @@ static __initconst const struct x86_pmu amd_pmu_f15h = { .cpu_dead = amd_pmu_cpu_dead, #endif .cpu_starting = amd_pmu_cpu_starting, + .format_attrs = amd_format_attr, }; __init int amd_pmu_init(void) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 61d4f79a550..26b3e2fef10 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -728,6 +728,19 @@ static __initconst const u64 atom_hw_cache_event_ids }, }; +static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event) +{ + /* user explicitly requested branch sampling */ + if (has_branch_stack(event)) + return true; + + /* implicit branch sampling to correct PEBS skid */ + if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) + return true; + + return false; +} + static void intel_pmu_disable_all(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -882,6 +895,13 @@ static void intel_pmu_disable_event(struct perf_event *event) cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); + /* + * must disable before any actual event + * because any event may be combined with LBR + */ + if (intel_pmu_needs_lbr_smpl(event)) + intel_pmu_lbr_disable(event); + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { intel_pmu_disable_fixed(hwc); return; @@ -936,6 +956,12 @@ static void intel_pmu_enable_event(struct perf_event *event) intel_pmu_enable_bts(hwc->config); return; } + /* + * must enabled before any actual event + * because any event may be combined with LBR + */ + if (intel_pmu_needs_lbr_smpl(event)) + intel_pmu_lbr_enable(event); if (event->attr.exclude_host) cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); @@ -1058,6 +1084,9 @@ again: data.period = event->hw.last_period; + if (has_branch_stack(event)) + data.br_stack = &cpuc->lbr_stack; + if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); } @@ -1124,17 +1153,17 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx) */ static struct event_constraint * __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, - struct perf_event *event) + struct perf_event *event, + struct hw_perf_event_extra *reg) { struct event_constraint *c = &emptyconstraint; - struct hw_perf_event_extra *reg = &event->hw.extra_reg; struct er_account *era; unsigned long flags; int orig_idx = reg->idx; /* already allocated shared msr */ if (reg->alloc) - return &unconstrained; + return NULL; /* call x86_get_event_constraint() */ again: era = &cpuc->shared_regs->regs[reg->idx]; @@ -1157,14 +1186,10 @@ again: reg->alloc = 1; /* - * All events using extra_reg are unconstrained. - * Avoids calling x86_get_event_constraints() - * - * Must revisit if extra_reg controlling events - * ever have constraints. Worst case we go through - * the regular event constraint table. + * need to call x86_get_event_constraint() + * to check if associated event has constraints */ - c = &unconstrained; + c = NULL; } else if (intel_try_alt_er(event, orig_idx)) { raw_spin_unlock_irqrestore(&era->lock, flags); goto again; @@ -1201,11 +1226,23 @@ static struct event_constraint * intel_shared_regs_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) { - struct event_constraint *c = NULL; - - if (event->hw.extra_reg.idx != EXTRA_REG_NONE) - c = __intel_shared_reg_get_constraints(cpuc, event); - + struct event_constraint *c = NULL, *d; + struct hw_perf_event_extra *xreg, *breg; + + xreg = &event->hw.extra_reg; + if (xreg->idx != EXTRA_REG_NONE) { + c = __intel_shared_reg_get_constraints(cpuc, event, xreg); + if (c == &emptyconstraint) + return c; + } + breg = &event->hw.branch_reg; + if (breg->idx != EXTRA_REG_NONE) { + d = __intel_shared_reg_get_constraints(cpuc, event, breg); + if (d == &emptyconstraint) { + __intel_shared_reg_put_constraints(cpuc, xreg); + c = d; + } + } return c; } @@ -1253,6 +1290,10 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, reg = &event->hw.extra_reg; if (reg->idx != EXTRA_REG_NONE) __intel_shared_reg_put_constraints(cpuc, reg); + + reg = &event->hw.branch_reg; + if (reg->idx != EXTRA_REG_NONE) + __intel_shared_reg_put_constraints(cpuc, reg); } static void intel_put_event_constraints(struct cpu_hw_events *cpuc, @@ -1288,12 +1329,19 @@ static int intel_pmu_hw_config(struct perf_event *event) * * Thereby we gain a PEBS capable cycle counter. */ - u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */ + u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); + alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); event->hw.config = alt_config; } + if (intel_pmu_needs_lbr_smpl(event)) { + ret = intel_pmu_setup_lbr_filter(event); + if (ret) + return ret; + } + if (event->attr.type != PERF_TYPE_RAW) return 0; @@ -1383,6 +1431,24 @@ static void core_pmu_enable_all(int added) } } +PMU_FORMAT_ATTR(event, "config:0-7" ); +PMU_FORMAT_ATTR(umask, "config:8-15" ); +PMU_FORMAT_ATTR(edge, "config:18" ); +PMU_FORMAT_ATTR(pc, "config:19" ); +PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */ +PMU_FORMAT_ATTR(inv, "config:23" ); +PMU_FORMAT_ATTR(cmask, "config:24-31" ); + +static struct attribute *intel_arch_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_pc.attr, + &format_attr_inv.attr, + &format_attr_cmask.attr, + NULL, +}; + static __initconst const struct x86_pmu core_pmu = { .name = "core", .handle_irq = x86_pmu_handle_irq, @@ -1407,6 +1473,7 @@ static __initconst const struct x86_pmu core_pmu = { .put_event_constraints = intel_put_event_constraints, .event_constraints = intel_core_event_constraints, .guest_get_msrs = core_guest_get_msrs, + .format_attrs = intel_arch_formats_attr, }; struct intel_shared_regs *allocate_shared_regs(int cpu) @@ -1432,7 +1499,7 @@ static int intel_pmu_cpu_prepare(int cpu) { struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); - if (!x86_pmu.extra_regs) + if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map)) return NOTIFY_OK; cpuc->shared_regs = allocate_shared_regs(cpu); @@ -1454,22 +1521,28 @@ static void intel_pmu_cpu_starting(int cpu) */ intel_pmu_lbr_reset(); - if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING)) + cpuc->lbr_sel = NULL; + + if (!cpuc->shared_regs) return; - for_each_cpu(i, topology_thread_cpumask(cpu)) { - struct intel_shared_regs *pc; + if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) { + for_each_cpu(i, topology_thread_cpumask(cpu)) { + struct intel_shared_regs *pc; - pc = per_cpu(cpu_hw_events, i).shared_regs; - if (pc && pc->core_id == core_id) { - cpuc->kfree_on_online = cpuc->shared_regs; - cpuc->shared_regs = pc; - break; + pc = per_cpu(cpu_hw_events, i).shared_regs; + if (pc && pc->core_id == core_id) { + cpuc->kfree_on_online = cpuc->shared_regs; + cpuc->shared_regs = pc; + break; + } } + cpuc->shared_regs->core_id = core_id; + cpuc->shared_regs->refcnt++; } - cpuc->shared_regs->core_id = core_id; - cpuc->shared_regs->refcnt++; + if (x86_pmu.lbr_sel_map) + cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; } static void intel_pmu_cpu_dying(int cpu) @@ -1487,6 +1560,33 @@ static void intel_pmu_cpu_dying(int cpu) fini_debug_store_on_cpu(cpu); } +static void intel_pmu_flush_branch_stack(void) +{ + /* + * Intel LBR does not tag entries with the + * PID of the current task, then we need to + * flush it on ctxsw + * For now, we simply reset it + */ + if (x86_pmu.lbr_nr) + intel_pmu_lbr_reset(); +} + +PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); + +static struct attribute *intel_arch3_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_pc.attr, + &format_attr_any.attr, + &format_attr_inv.attr, + &format_attr_cmask.attr, + + &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */ + NULL, +}; + static __initconst const struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, @@ -1510,10 +1610,13 @@ static __initconst const struct x86_pmu intel_pmu = { .get_event_constraints = intel_get_event_constraints, .put_event_constraints = intel_put_event_constraints, + .format_attrs = intel_arch3_formats_attr, + .cpu_prepare = intel_pmu_cpu_prepare, .cpu_starting = intel_pmu_cpu_starting, .cpu_dying = intel_pmu_cpu_dying, .guest_get_msrs = intel_guest_get_msrs, + .flush_branch_stack = intel_pmu_flush_branch_stack, }; static __init void intel_clovertown_quirk(void) @@ -1690,9 +1793,11 @@ __init int intel_pmu_init(void) x86_pmu.extra_regs = intel_nehalem_extra_regs; /* UOPS_ISSUED.STALLED_CYCLES */ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = + X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = + X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); x86_add_quirk(intel_nehalem_quirk); @@ -1727,9 +1832,11 @@ __init int intel_pmu_init(void) x86_pmu.er_flags |= ERF_HAS_RSP_1; /* UOPS_ISSUED.STALLED_CYCLES */ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = + X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = + X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); pr_cont("Westmere events, "); break; @@ -1740,7 +1847,7 @@ __init int intel_pmu_init(void) memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); - intel_pmu_lbr_init_nhm(); + intel_pmu_lbr_init_snb(); x86_pmu.event_constraints = intel_snb_event_constraints; x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; @@ -1750,9 +1857,11 @@ __init int intel_pmu_init(void) x86_pmu.er_flags |= ERF_NO_HT_SHARING; /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = + X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1; + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = + X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1); pr_cont("SandyBridge events, "); break; diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index d6bd49faa40..7f64df19e7d 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -3,6 +3,7 @@ #include <linux/slab.h> #include <asm/perf_event.h> +#include <asm/insn.h> #include "perf_event.h" @@ -439,9 +440,6 @@ void intel_pmu_pebs_enable(struct perf_event *event) hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; cpuc->pebs_enabled |= 1ULL << hwc->idx; - - if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) - intel_pmu_lbr_enable(event); } void intel_pmu_pebs_disable(struct perf_event *event) @@ -454,9 +452,6 @@ void intel_pmu_pebs_disable(struct perf_event *event) wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); hwc->config |= ARCH_PERFMON_EVENTSEL_INT; - - if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) - intel_pmu_lbr_disable(event); } void intel_pmu_pebs_enable_all(void) @@ -475,17 +470,6 @@ void intel_pmu_pebs_disable_all(void) wrmsrl(MSR_IA32_PEBS_ENABLE, 0); } -#include <asm/insn.h> - -static inline bool kernel_ip(unsigned long ip) -{ -#ifdef CONFIG_X86_32 - return ip > PAGE_OFFSET; -#else - return (long)ip < 0; -#endif -} - static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -572,6 +556,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, * both formats and we don't use the other fields in this * routine. */ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct pebs_record_core *pebs = __pebs; struct perf_sample_data data; struct pt_regs regs; @@ -602,6 +587,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event, else regs.flags &= ~PERF_EFLAGS_EXACT; + if (has_branch_stack(event)) + data.br_stack = &cpuc->lbr_stack; + if (perf_event_overflow(event, &data, ®s)) x86_pmu_stop(event, 0); } diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 47a7e63bfe5..520b4265fcd 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -3,6 +3,7 @@ #include <asm/perf_event.h> #include <asm/msr.h> +#include <asm/insn.h> #include "perf_event.h" @@ -14,6 +15,100 @@ enum { }; /* + * Intel LBR_SELECT bits + * Intel Vol3a, April 2011, Section 16.7 Table 16-10 + * + * Hardware branch filter (not available on all CPUs) + */ +#define LBR_KERNEL_BIT 0 /* do not capture at ring0 */ +#define LBR_USER_BIT 1 /* do not capture at ring > 0 */ +#define LBR_JCC_BIT 2 /* do not capture conditional branches */ +#define LBR_REL_CALL_BIT 3 /* do not capture relative calls */ +#define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */ +#define LBR_RETURN_BIT 5 /* do not capture near returns */ +#define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */ +#define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */ +#define LBR_FAR_BIT 8 /* do not capture far branches */ + +#define LBR_KERNEL (1 << LBR_KERNEL_BIT) +#define LBR_USER (1 << LBR_USER_BIT) +#define LBR_JCC (1 << LBR_JCC_BIT) +#define LBR_REL_CALL (1 << LBR_REL_CALL_BIT) +#define LBR_IND_CALL (1 << LBR_IND_CALL_BIT) +#define LBR_RETURN (1 << LBR_RETURN_BIT) +#define LBR_REL_JMP (1 << LBR_REL_JMP_BIT) +#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT) +#define LBR_FAR (1 << LBR_FAR_BIT) + +#define LBR_PLM (LBR_KERNEL | LBR_USER) + +#define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */ +#define LBR_NOT_SUPP -1 /* LBR filter not supported */ +#define LBR_IGN 0 /* ignored */ + +#define LBR_ANY \ + (LBR_JCC |\ + LBR_REL_CALL |\ + LBR_IND_CALL |\ + LBR_RETURN |\ + LBR_REL_JMP |\ + LBR_IND_JMP |\ + LBR_FAR) + +#define LBR_FROM_FLAG_MISPRED (1ULL << 63) + +#define for_each_branch_sample_type(x) \ + for ((x) = PERF_SAMPLE_BRANCH_USER; \ + (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1) + +/* + * x86control flow change classification + * x86control flow changes include branches, interrupts, traps, faults + */ +enum { + X86_BR_NONE = 0, /* unknown */ + + X86_BR_USER = 1 << 0, /* branch target is user */ + X86_BR_KERNEL = 1 << 1, /* branch target is kernel */ + + X86_BR_CALL = 1 << 2, /* call */ + X86_BR_RET = 1 << 3, /* return */ + X86_BR_SYSCALL = 1 << 4, /* syscall */ + X86_BR_SYSRET = 1 << 5, /* syscall return */ + X86_BR_INT = 1 << 6, /* sw interrupt */ + X86_BR_IRET = 1 << 7, /* return from interrupt */ + X86_BR_JCC = 1 << 8, /* conditional */ + X86_BR_JMP = 1 << 9, /* jump */ + X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ + X86_BR_IND_CALL = 1 << 11,/* indirect calls */ +}; + +#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) + +#define X86_BR_ANY \ + (X86_BR_CALL |\ + X86_BR_RET |\ + X86_BR_SYSCALL |\ + X86_BR_SYSRET |\ + X86_BR_INT |\ + X86_BR_IRET |\ + X86_BR_JCC |\ + X86_BR_JMP |\ + X86_BR_IRQ |\ + X86_BR_IND_CALL) + +#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) + +#define X86_BR_ANY_CALL \ + (X86_BR_CALL |\ + X86_BR_IND_CALL |\ + X86_BR_SYSCALL |\ + X86_BR_IRQ |\ + X86_BR_INT) + +static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc); + +/* * We only support LBR implementations that have FREEZE_LBRS_ON_PMI * otherwise it becomes near impossible to get a reliable stack. */ @@ -21,6 +116,10 @@ enum { static void __intel_pmu_lbr_enable(void) { u64 debugctl; + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (cpuc->lbr_sel) + wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config); rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); @@ -76,11 +175,11 @@ void intel_pmu_lbr_enable(struct perf_event *event) * Reset the LBR stack if we changed task context to * avoid data leaks. */ - if (event->ctx->task && cpuc->lbr_context != event->ctx) { intel_pmu_lbr_reset(); cpuc->lbr_context = event->ctx; } + cpuc->br_sel = event->hw.branch_reg.reg; cpuc->lbr_users++; } @@ -95,8 +194,11 @@ void intel_pmu_lbr_disable(struct perf_event *event) cpuc->lbr_users--; WARN_ON_ONCE(cpuc->lbr_users < 0); - if (cpuc->enabled && !cpuc->lbr_users) + if (cpuc->enabled && !cpuc->lbr_users) { __intel_pmu_lbr_disable(); + /* avoid stale pointer */ + cpuc->lbr_context = NULL; + } } void intel_pmu_lbr_enable_all(void) @@ -115,6 +217,9 @@ void intel_pmu_lbr_disable_all(void) __intel_pmu_lbr_disable(); } +/* + * TOS = most recently recorded branch + */ static inline u64 intel_pmu_lbr_tos(void) { u64 tos; @@ -142,15 +247,15 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); - cpuc->lbr_entries[i].from = msr_lastbranch.from; - cpuc->lbr_entries[i].to = msr_lastbranch.to; - cpuc->lbr_entries[i].flags = 0; + cpuc->lbr_entries[i].from = msr_lastbranch.from; + cpuc->lbr_entries[i].to = msr_lastbranch.to; + cpuc->lbr_entries[i].mispred = 0; + cpuc->lbr_entries[i].predicted = 0; + cpuc->lbr_entries[i].reserved = 0; } cpuc->lbr_stack.nr = i; } -#define LBR_FROM_FLAG_MISPRED (1ULL << 63) - /* * Due to lack of segmentation in Linux the effective address (offset) * is the same as the linear address, allowing us to merge the LIP and EIP @@ -165,19 +270,22 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) for (i = 0; i < x86_pmu.lbr_nr; i++) { unsigned long lbr_idx = (tos - i) & mask; - u64 from, to, flags = 0; + u64 from, to, mis = 0, pred = 0; rdmsrl(x86_pmu.lbr_from + lbr_idx, from); rdmsrl(x86_pmu.lbr_to + lbr_idx, to); if (lbr_format == LBR_FORMAT_EIP_FLAGS) { - flags = !!(from & LBR_FROM_FLAG_MISPRED); + mis = !!(from & LBR_FROM_FLAG_MISPRED); + pred = !mis; from = (u64)((((s64)from) << 1) >> 1); } - cpuc->lbr_entries[i].from = from; - cpuc->lbr_entries[i].to = to; - cpuc->lbr_entries[i].flags = flags; + cpuc->lbr_entries[i].from = from; + cpuc->lbr_entries[i].to = to; + cpuc->lbr_entries[i].mispred = mis; + cpuc->lbr_entries[i].predicted = pred; + cpuc->lbr_entries[i].reserved = 0; } cpuc->lbr_stack.nr = i; } @@ -193,28 +301,404 @@ void intel_pmu_lbr_read(void) intel_pmu_lbr_read_32(cpuc); else intel_pmu_lbr_read_64(cpuc); + + intel_pmu_lbr_filter(cpuc); +} + +/* + * SW filter is used: + * - in case there is no HW filter + * - in case the HW filter has errata or limitations + */ +static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) +{ + u64 br_type = event->attr.branch_sample_type; + int mask = 0; + + if (br_type & PERF_SAMPLE_BRANCH_USER) + mask |= X86_BR_USER; + + if (br_type & PERF_SAMPLE_BRANCH_KERNEL) + mask |= X86_BR_KERNEL; + + /* we ignore BRANCH_HV here */ + + if (br_type & PERF_SAMPLE_BRANCH_ANY) + mask |= X86_BR_ANY; + + if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL) + mask |= X86_BR_ANY_CALL; + + if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN) + mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET; + + if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) + mask |= X86_BR_IND_CALL; + /* + * stash actual user request into reg, it may + * be used by fixup code for some CPU + */ + event->hw.branch_reg.reg = mask; +} + +/* + * setup the HW LBR filter + * Used only when available, may not be enough to disambiguate + * all branches, may need the help of the SW filter + */ +static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) +{ + struct hw_perf_event_extra *reg; + u64 br_type = event->attr.branch_sample_type; + u64 mask = 0, m; + u64 v; + + for_each_branch_sample_type(m) { + if (!(br_type & m)) + continue; + + v = x86_pmu.lbr_sel_map[m]; + if (v == LBR_NOT_SUPP) + return -EOPNOTSUPP; + + if (v != LBR_IGN) + mask |= v; + } + reg = &event->hw.branch_reg; + reg->idx = EXTRA_REG_LBR; + + /* LBR_SELECT operates in suppress mode so invert mask */ + reg->config = ~mask & x86_pmu.lbr_sel_mask; + + return 0; +} + +int intel_pmu_setup_lbr_filter(struct perf_event *event) +{ + int ret = 0; + + /* + * no LBR on this PMU + */ + if (!x86_pmu.lbr_nr) + return -EOPNOTSUPP; + + /* + * setup SW LBR filter + */ + intel_pmu_setup_sw_lbr_filter(event); + + /* + * setup HW LBR filter, if any + */ + if (x86_pmu.lbr_sel_map) + ret = intel_pmu_setup_hw_lbr_filter(event); + + return ret; } +/* + * return the type of control flow change at address "from" + * intruction is not necessarily a branch (in case of interrupt). + * + * The branch type returned also includes the priv level of the + * target of the control flow change (X86_BR_USER, X86_BR_KERNEL). + * + * If a branch type is unknown OR the instruction cannot be + * decoded (e.g., text page not present), then X86_BR_NONE is + * returned. + */ +static int branch_type(unsigned long from, unsigned long to) +{ + struct insn insn; + void *addr; + int bytes, size = MAX_INSN_SIZE; + int ret = X86_BR_NONE; + int ext, to_plm, from_plm; + u8 buf[MAX_INSN_SIZE]; + int is64 = 0; + + to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER; + from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER; + + /* + * maybe zero if lbr did not fill up after a reset by the time + * we get a PMU interrupt + */ + if (from == 0 || to == 0) + return X86_BR_NONE; + + if (from_plm == X86_BR_USER) { + /* + * can happen if measuring at the user level only + * and we interrupt in a kernel thread, e.g., idle. + */ + if (!current->mm) + return X86_BR_NONE; + + /* may fail if text not present */ + bytes = copy_from_user_nmi(buf, (void __user *)from, size); + if (bytes != size) + return X86_BR_NONE; + + addr = buf; + } else + addr = (void *)from; + + /* + * decoder needs to know the ABI especially + * on 64-bit systems running 32-bit apps + */ +#ifdef CONFIG_X86_64 + is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32); +#endif + insn_init(&insn, addr, is64); + insn_get_opcode(&insn); + + switch (insn.opcode.bytes[0]) { + case 0xf: + switch (insn.opcode.bytes[1]) { + case 0x05: /* syscall */ + case 0x34: /* sysenter */ + ret = X86_BR_SYSCALL; + break; + case 0x07: /* sysret */ + case 0x35: /* sysexit */ + ret = X86_BR_SYSRET; + break; + case 0x80 ... 0x8f: /* conditional */ + ret = X86_BR_JCC; + break; + default: + ret = X86_BR_NONE; + } + break; + case 0x70 ... 0x7f: /* conditional */ + ret = X86_BR_JCC; + break; + case 0xc2: /* near ret */ + case 0xc3: /* near ret */ + case 0xca: /* far ret */ + case 0xcb: /* far ret */ + ret = X86_BR_RET; + break; + case 0xcf: /* iret */ + ret = X86_BR_IRET; + break; + case 0xcc ... 0xce: /* int */ + ret = X86_BR_INT; + break; + case 0xe8: /* call near rel */ + case 0x9a: /* call far absolute */ + ret = X86_BR_CALL; + break; + case 0xe0 ... 0xe3: /* loop jmp */ + ret = X86_BR_JCC; + break; + case 0xe9 ... 0xeb: /* jmp */ + ret = X86_BR_JMP; + break; + case 0xff: /* call near absolute, call far absolute ind */ + insn_get_modrm(&insn); + ext = (insn.modrm.bytes[0] >> 3) & 0x7; + switch (ext) { + case 2: /* near ind call */ + case 3: /* far ind call */ + ret = X86_BR_IND_CALL; + break; + case 4: + case 5: + ret = X86_BR_JMP; + break; + } + break; + default: + ret = X86_BR_NONE; + } + /* + * interrupts, traps, faults (and thus ring transition) may + * occur on any instructions. Thus, to classify them correctly, + * we need to first look at the from and to priv levels. If they + * are different and to is in the kernel, then it indicates + * a ring transition. If the from instruction is not a ring + * transition instr (syscall, systenter, int), then it means + * it was a irq, trap or fault. + * + * we have no way of detecting kernel to kernel faults. + */ + if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL + && ret != X86_BR_SYSCALL && ret != X86_BR_INT) + ret = X86_BR_IRQ; + + /* + * branch priv level determined by target as + * is done by HW when LBR_SELECT is implemented + */ + if (ret != X86_BR_NONE) + ret |= to_plm; + + return ret; +} + +/* + * implement actual branch filter based on user demand. + * Hardware may not exactly satisfy that request, thus + * we need to inspect opcodes. Mismatched branches are + * discarded. Therefore, the number of branches returned + * in PERF_SAMPLE_BRANCH_STACK sample may vary. + */ +static void +intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) +{ + u64 from, to; + int br_sel = cpuc->br_sel; + int i, j, type; + bool compress = false; + + /* if sampling all branches, then nothing to filter */ + if ((br_sel & X86_BR_ALL) == X86_BR_ALL) + return; + + for (i = 0; i < cpuc->lbr_stack.nr; i++) { + + from = cpuc->lbr_entries[i].from; + to = cpuc->lbr_entries[i].to; + + type = branch_type(from, to); + + /* if type does not correspond, then discard */ + if (type == X86_BR_NONE || (br_sel & type) != type) { + cpuc->lbr_entries[i].from = 0; + compress = true; + } + } + + if (!compress) + return; + + /* remove all entries with from=0 */ + for (i = 0; i < cpuc->lbr_stack.nr; ) { + if (!cpuc->lbr_entries[i].from) { + j = i; + while (++j < cpuc->lbr_stack.nr) + cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j]; + cpuc->lbr_stack.nr--; + if (!cpuc->lbr_entries[i].from) + continue; + } + i++; + } +} + +/* + * Map interface branch filters onto LBR filters + */ +static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { + [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, + [PERF_SAMPLE_BRANCH_USER] = LBR_USER, + [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, + [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, + [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP + | LBR_IND_JMP | LBR_FAR, + /* + * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches + */ + [PERF_SAMPLE_BRANCH_ANY_CALL] = + LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR, + /* + * NHM/WSM erratum: must include IND_JMP to capture IND_CALL + */ + [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP, +}; + +static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { + [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, + [PERF_SAMPLE_BRANCH_USER] = LBR_USER, + [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, + [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, + [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR, + [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL + | LBR_FAR, + [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL, +}; + +/* core */ void intel_pmu_lbr_init_core(void) { x86_pmu.lbr_nr = 4; - x86_pmu.lbr_tos = 0x01c9; - x86_pmu.lbr_from = 0x40; - x86_pmu.lbr_to = 0x60; + x86_pmu.lbr_tos = MSR_LBR_TOS; + x86_pmu.lbr_from = MSR_LBR_CORE_FROM; + x86_pmu.lbr_to = MSR_LBR_CORE_TO; + + /* + * SW branch filter usage: + * - compensate for lack of HW filter + */ + pr_cont("4-deep LBR, "); } +/* nehalem/westmere */ void intel_pmu_lbr_init_nhm(void) { x86_pmu.lbr_nr = 16; - x86_pmu.lbr_tos = 0x01c9; - x86_pmu.lbr_from = 0x680; - x86_pmu.lbr_to = 0x6c0; + x86_pmu.lbr_tos = MSR_LBR_TOS; + x86_pmu.lbr_from = MSR_LBR_NHM_FROM; + x86_pmu.lbr_to = MSR_LBR_NHM_TO; + + x86_pmu.lbr_sel_mask = LBR_SEL_MASK; + x86_pmu.lbr_sel_map = nhm_lbr_sel_map; + + /* + * SW branch filter usage: + * - workaround LBR_SEL errata (see above) + * - support syscall, sysret capture. + * That requires LBR_FAR but that means far + * jmp need to be filtered out + */ + pr_cont("16-deep LBR, "); +} + +/* sandy bridge */ +void intel_pmu_lbr_init_snb(void) +{ + x86_pmu.lbr_nr = 16; + x86_pmu.lbr_tos = MSR_LBR_TOS; + x86_pmu.lbr_from = MSR_LBR_NHM_FROM; + x86_pmu.lbr_to = MSR_LBR_NHM_TO; + + x86_pmu.lbr_sel_mask = LBR_SEL_MASK; + x86_pmu.lbr_sel_map = snb_lbr_sel_map; + + /* + * SW branch filter usage: + * - support syscall, sysret capture. + * That requires LBR_FAR but that means far + * jmp need to be filtered out + */ + pr_cont("16-deep LBR, "); } +/* atom */ void intel_pmu_lbr_init_atom(void) { + /* + * only models starting at stepping 10 seems + * to have an operational LBR which can freeze + * on PMU interrupt + */ + if (boot_cpu_data.x86_mask < 10) { + pr_cont("LBR disabled due to erratum"); + return; + } + x86_pmu.lbr_nr = 8; - x86_pmu.lbr_tos = 0x01c9; - x86_pmu.lbr_from = 0x40; - x86_pmu.lbr_to = 0x60; + x86_pmu.lbr_tos = MSR_LBR_TOS; + x86_pmu.lbr_from = MSR_LBR_CORE_FROM; + x86_pmu.lbr_to = MSR_LBR_CORE_TO; + + /* + * SW branch filter usage: + * - compensate for lack of HW filter + */ + pr_cont("8-deep LBR, "); } diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index ef484d9d0a2..a2dfacfd710 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -1271,6 +1271,17 @@ done: return num ? -EINVAL : 0; } +PMU_FORMAT_ATTR(cccr, "config:0-31" ); +PMU_FORMAT_ATTR(escr, "config:32-62"); +PMU_FORMAT_ATTR(ht, "config:63" ); + +static struct attribute *intel_p4_formats_attr[] = { + &format_attr_cccr.attr, + &format_attr_escr.attr, + &format_attr_ht.attr, + NULL, +}; + static __initconst const struct x86_pmu p4_pmu = { .name = "Netburst P4/Xeon", .handle_irq = p4_pmu_handle_irq, @@ -1305,6 +1316,8 @@ static __initconst const struct x86_pmu p4_pmu = { * the former idea is taken from OProfile code */ .perfctr_second_write = 1, + + .format_attrs = intel_p4_formats_attr, }; __init int p4_pmu_init(void) diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index c7181befecd..32bcfc7dd23 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -87,6 +87,23 @@ static void p6_pmu_enable_event(struct perf_event *event) (void)checking_wrmsrl(hwc->config_base, val); } +PMU_FORMAT_ATTR(event, "config:0-7" ); +PMU_FORMAT_ATTR(umask, "config:8-15" ); +PMU_FORMAT_ATTR(edge, "config:18" ); +PMU_FORMAT_ATTR(pc, "config:19" ); +PMU_FORMAT_ATTR(inv, "config:23" ); +PMU_FORMAT_ATTR(cmask, "config:24-31" ); + +static struct attribute *intel_p6_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_pc.attr, + &format_attr_inv.attr, + &format_attr_cmask.attr, + NULL, +}; + static __initconst const struct x86_pmu p6_pmu = { .name = "p6", .handle_irq = x86_pmu_handle_irq, @@ -115,6 +132,8 @@ static __initconst const struct x86_pmu p6_pmu = { .cntval_mask = (1ULL << 32) - 1, .get_event_constraints = x86_get_event_constraints, .event_constraints = p6_event_constraints, + + .format_attrs = intel_p6_formats_attr, }; __init int p6_pmu_init(void) diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index c7f64e6f537..addf9e82a7f 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -40,6 +40,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 }, { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 }, + { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 }, { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 }, { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 }, diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index a524353d93f..39472dd2323 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -43,7 +43,6 @@ #include <asm/processor.h> #include <asm/msr.h> -#include <asm/system.h> static struct class *cpuid_class; diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c index 642f75a68cd..11891ca7b71 100644 --- a/arch/x86/kernel/crash_dump_32.c +++ b/arch/x86/kernel/crash_dump_32.c @@ -62,16 +62,16 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, if (!userbuf) { memcpy(buf, (vaddr + offset), csize); - kunmap_atomic(vaddr, KM_PTE0); + kunmap_atomic(vaddr); } else { if (!kdump_buf_page) { printk(KERN_WARNING "Kdump: Kdump buffer page not" " allocated\n"); - kunmap_atomic(vaddr, KM_PTE0); + kunmap_atomic(vaddr); return -EFAULT; } copy_page(kdump_buf_page, vaddr); - kunmap_atomic(vaddr, KM_PTE0); + kunmap_atomic(vaddr); if (copy_to_user(buf, (kdump_buf_page + offset), csize)) return -EFAULT; } diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 52821799a70..3ae2ced4a87 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -4,6 +4,7 @@ #include <linux/bootmem.h> #include <linux/export.h> #include <linux/io.h> +#include <linux/irqdomain.h> #include <linux/interrupt.h> #include <linux/list.h> #include <linux/of.h> @@ -17,64 +18,14 @@ #include <linux/initrd.h> #include <asm/hpet.h> -#include <asm/irq_controller.h> #include <asm/apic.h> #include <asm/pci_x86.h> __initdata u64 initial_dtb; char __initdata cmd_line[COMMAND_LINE_SIZE]; -static LIST_HEAD(irq_domains); -static DEFINE_RAW_SPINLOCK(big_irq_lock); int __initdata of_ioapic; -#ifdef CONFIG_X86_IO_APIC -static void add_interrupt_host(struct irq_domain *ih) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&big_irq_lock, flags); - list_add(&ih->l, &irq_domains); - raw_spin_unlock_irqrestore(&big_irq_lock, flags); -} -#endif - -static struct irq_domain *get_ih_from_node(struct device_node *controller) -{ - struct irq_domain *ih, *found = NULL; - unsigned long flags; - - raw_spin_lock_irqsave(&big_irq_lock, flags); - list_for_each_entry(ih, &irq_domains, l) { - if (ih->controller == controller) { - found = ih; - break; - } - } - raw_spin_unlock_irqrestore(&big_irq_lock, flags); - return found; -} - -unsigned int irq_create_of_mapping(struct device_node *controller, - const u32 *intspec, unsigned int intsize) -{ - struct irq_domain *ih; - u32 virq, type; - int ret; - - ih = get_ih_from_node(controller); - if (!ih) - return 0; - ret = ih->xlate(ih, intspec, intsize, &virq, &type); - if (ret) - return 0; - if (type == IRQ_TYPE_NONE) - return virq; - irq_set_irq_type(virq, type); - return virq; -} -EXPORT_SYMBOL_GPL(irq_create_of_mapping); - unsigned long pci_address_to_pio(phys_addr_t address) { /* @@ -354,36 +305,43 @@ static struct of_ioapic_type of_ioapic_type[] = }, }; -static int ioapic_xlate(struct irq_domain *id, const u32 *intspec, u32 intsize, - u32 *out_hwirq, u32 *out_type) +static int ioapic_xlate(struct irq_domain *domain, + struct device_node *controller, + const u32 *intspec, u32 intsize, + irq_hw_number_t *out_hwirq, u32 *out_type) { - struct mp_ioapic_gsi *gsi_cfg; struct io_apic_irq_attr attr; struct of_ioapic_type *it; - u32 line, idx, type; + u32 line, idx; + int rc; - if (intsize < 2) + if (WARN_ON(intsize < 2)) return -EINVAL; - line = *intspec; - idx = (u32) id->priv; - gsi_cfg = mp_ioapic_gsi_routing(idx); - *out_hwirq = line + gsi_cfg->gsi_base; - - intspec++; - type = *intspec; + line = intspec[0]; - if (type >= ARRAY_SIZE(of_ioapic_type)) + if (intspec[1] >= ARRAY_SIZE(of_ioapic_type)) return -EINVAL; - it = of_ioapic_type + type; - *out_type = it->out_type; + it = &of_ioapic_type[intspec[1]]; + idx = (u32) domain->host_data; set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity); - return io_apic_setup_irq_pin_once(*out_hwirq, cpu_to_node(0), &attr); + rc = io_apic_setup_irq_pin_once(irq_find_mapping(domain, line), + cpu_to_node(0), &attr); + if (rc) + return rc; + + *out_hwirq = line; + *out_type = it->out_type; + return 0; } +const struct irq_domain_ops ioapic_irq_domain_ops = { + .xlate = ioapic_xlate, +}; + static void __init ioapic_add_ofnode(struct device_node *np) { struct resource r; @@ -399,13 +357,14 @@ static void __init ioapic_add_ofnode(struct device_node *np) for (i = 0; i < nr_ioapics; i++) { if (r.start == mpc_ioapic_addr(i)) { struct irq_domain *id; + struct mp_ioapic_gsi *gsi_cfg; + + gsi_cfg = mp_ioapic_gsi_routing(i); - id = kzalloc(sizeof(*id), GFP_KERNEL); + id = irq_domain_add_legacy(np, 32, gsi_cfg->gsi_base, 0, + &ioapic_irq_domain_ops, + (void*)i); BUG_ON(!id); - id->controller = np; - id->xlate = ioapic_xlate; - id->priv = (void *)i; - add_interrupt_host(id); return; } } diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 4025fe4f928..1b81839b6c8 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -37,13 +37,16 @@ print_ftrace_graph_addr(unsigned long addr, void *data, const struct stacktrace_ops *ops, struct thread_info *tinfo, int *graph) { - struct task_struct *task = tinfo->task; + struct task_struct *task; unsigned long ret_addr; - int index = task->curr_ret_stack; + int index; if (addr != (unsigned long)return_to_handler) return; + task = tinfo->task; + index = task->curr_ret_stack; + if (!task->ret_stack || index < *graph) return; @@ -265,7 +268,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) #endif printk("\n"); if (notify_die(DIE_OOPS, str, regs, err, - current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) + current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) return 1; show_registers(regs); diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index c99f9ed013d..88ec9129271 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -87,7 +87,7 @@ void show_registers(struct pt_regs *regs) int i; print_modules(); - __show_regs(regs, 0); + __show_regs(regs, !user_mode_vm(regs)); printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", TASK_COMM_LEN, current->comm, task_pid_nr(current), diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1333d985177..cdc79b5cfcd 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -320,7 +320,7 @@ ENDPROC(native_usergs_sysret64) movq %rsp, %rsi leaq -RBP(%rsp),%rdi /* arg1 for handler */ - testl $3, CS(%rdi) + testl $3, CS-RBP(%rsi) je 1f SWAPGS /* @@ -330,11 +330,10 @@ ENDPROC(native_usergs_sysret64) * moving irq_enter into assembly, which would be too much work) */ 1: incl PER_CPU_VAR(irq_count) - jne 2f - mov PER_CPU_VAR(irq_stack_ptr),%rsp + cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp CFI_DEF_CFA_REGISTER rsi -2: /* Store previous stack value */ + /* Store previous stack value */ pushq %rsi CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ 0x77 /* DW_OP_breg7 */, 0, \ @@ -482,7 +481,12 @@ GLOBAL(system_call_after_swapgs) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) jnz tracesys system_call_fastpath: +#if __SYSCALL_MASK == ~0 cmpq $__NR_syscall_max,%rax +#else + andl $__SYSCALL_MASK,%eax + cmpl $__NR_syscall_max,%eax +#endif ja badsys movq %r10,%rcx call *sys_call_table(,%rax,8) # XXX: rip relative @@ -596,7 +600,12 @@ tracesys: */ LOAD_ARGS ARGOFFSET, 1 RESTORE_REST +#if __SYSCALL_MASK == ~0 cmpq $__NR_syscall_max,%rax +#else + andl $__SYSCALL_MASK,%eax + cmpl $__NR_syscall_max,%eax +#endif ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ movq %r10,%rcx /* fixup for C */ call *sys_call_table(,%rax,8) @@ -736,6 +745,40 @@ ENTRY(stub_rt_sigreturn) CFI_ENDPROC END(stub_rt_sigreturn) +#ifdef CONFIG_X86_X32_ABI + PTREGSCALL stub_x32_sigaltstack, sys32_sigaltstack, %rdx + +ENTRY(stub_x32_rt_sigreturn) + CFI_STARTPROC + addq $8, %rsp + PARTIAL_FRAME 0 + SAVE_REST + movq %rsp,%rdi + FIXUP_TOP_OF_STACK %r11 + call sys32_x32_rt_sigreturn + movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer + RESTORE_REST + jmp int_ret_from_sys_call + CFI_ENDPROC +END(stub_x32_rt_sigreturn) + +ENTRY(stub_x32_execve) + CFI_STARTPROC + addq $8, %rsp + PARTIAL_FRAME 0 + SAVE_REST + FIXUP_TOP_OF_STACK %r11 + movq %rsp, %rcx + call sys32_execve + RESTORE_TOP_OF_STACK %r11 + movq %rax,RAX(%rsp) + RESTORE_REST + jmp int_ret_from_sys_call + CFI_ENDPROC +END(stub_x32_execve) + +#endif + /* * Build the entry stubs and pointer table with some assembler magic. * We pack 7 stubs into a single 32-byte chunk, which will fit in a @@ -813,7 +856,7 @@ ret_from_intr: /* Restore saved previous stack */ popq %rsi - CFI_DEF_CFA_REGISTER rsi + CFI_DEF_CFA rsi,SS+8-RBP /* reg/off reset after def_cfa_expr */ leaq ARGOFFSET-RBP(%rsi), %rsp CFI_DEF_CFA_REGISTER rsp CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET @@ -1530,6 +1573,7 @@ ENTRY(nmi) /* Use %rdx as out temp variable throughout */ pushq_cfi %rdx + CFI_REL_OFFSET rdx, 0 /* * If %cs was not the kernel segment, then the NMI triggered in user @@ -1554,6 +1598,7 @@ ENTRY(nmi) */ lea 6*8(%rsp), %rdx test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi + CFI_REMEMBER_STATE nested_nmi: /* @@ -1585,10 +1630,12 @@ nested_nmi: nested_nmi_out: popq_cfi %rdx + CFI_RESTORE rdx /* No need to check faults here */ INTERRUPT_RETURN + CFI_RESTORE_STATE first_nmi: /* * Because nested NMIs will use the pushed location that we @@ -1620,10 +1667,15 @@ first_nmi: * | pt_regs | * +-------------------------+ * - * The saved RIP is used to fix up the copied RIP that a nested - * NMI may zero out. The original stack frame and the temp storage + * The saved stack frame is used to fix up the copied stack frame + * that a nested NMI may change to make the interrupted NMI iret jump + * to the repeat_nmi. The original stack frame and the temp storage * is also used by nested NMIs and can not be trusted on exit. */ + /* Do not pop rdx, nested NMIs will corrupt that part of the stack */ + movq (%rsp), %rdx + CFI_RESTORE rdx + /* Set the NMI executing variable on the stack. */ pushq_cfi $1 @@ -1631,22 +1683,39 @@ first_nmi: .rept 5 pushq_cfi 6*8(%rsp) .endr + CFI_DEF_CFA_OFFSET SS+8-RIP + + /* Everything up to here is safe from nested NMIs */ + + /* + * If there was a nested NMI, the first NMI's iret will return + * here. But NMIs are still enabled and we can take another + * nested NMI. The nested NMI checks the interrupted RIP to see + * if it is between repeat_nmi and end_repeat_nmi, and if so + * it will just return, as we are about to repeat an NMI anyway. + * This makes it safe to copy to the stack frame that a nested + * NMI will update. + */ +repeat_nmi: + /* + * Update the stack variable to say we are still in NMI (the update + * is benign for the non-repeat case, where 1 was pushed just above + * to this very stack slot). + */ + movq $1, 5*8(%rsp) /* Make another copy, this one may be modified by nested NMIs */ .rept 5 pushq_cfi 4*8(%rsp) .endr - - /* Do not pop rdx, nested NMIs will corrupt it */ - movq 11*8(%rsp), %rdx + CFI_DEF_CFA_OFFSET SS+8-RIP +end_repeat_nmi: /* * Everything below this point can be preempted by a nested - * NMI if the first NMI took an exception. Repeated NMIs - * caused by an exception and nested NMI will start here, and - * can still be preempted by another NMI. + * NMI if the first NMI took an exception and reset our iret stack + * so that we repeat another NMI. */ -restart_nmi: pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ subq $ORIG_RAX-R15, %rsp CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 @@ -1675,26 +1744,6 @@ nmi_restore: CFI_ENDPROC END(nmi) - /* - * If an NMI hit an iret because of an exception or breakpoint, - * it can lose its NMI context, and a nested NMI may come in. - * In that case, the nested NMI will change the preempted NMI's - * stack to jump to here when it does the final iret. - */ -repeat_nmi: - INTR_FRAME - /* Update the stack variable to say we are still in NMI */ - movq $1, 5*8(%rsp) - - /* copy the saved stack back to copy stack */ - .rept 5 - pushq_cfi 4*8(%rsp) - .endr - - jmp restart_nmi - CFI_ENDPROC -end_repeat_nmi: - ENTRY(ignore_sysret) CFI_STARTPROC mov $-ENOSYS,%eax diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index f239f301db2..2d6e6498c17 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -16,6 +16,7 @@ #include <asm/uaccess.h> #include <asm/ptrace.h> #include <asm/i387.h> +#include <asm/fpu-internal.h> #include <asm/user.h> #ifdef CONFIG_X86_64 @@ -32,6 +33,86 @@ # define user32_fxsr_struct user_fxsr_struct #endif +/* + * Were we in an interrupt that interrupted kernel mode? + * + * We can do a kernel_fpu_begin/end() pair *ONLY* if that + * pair does nothing at all: the thread must not have fpu (so + * that we don't try to save the FPU state), and TS must + * be set (so that the clts/stts pair does nothing that is + * visible in the interrupted kernel thread). + */ +static inline bool interrupted_kernel_fpu_idle(void) +{ + return !__thread_has_fpu(current) && + (read_cr0() & X86_CR0_TS); +} + +/* + * Were we in user mode (or vm86 mode) when we were + * interrupted? + * + * Doing kernel_fpu_begin/end() is ok if we are running + * in an interrupt context from user mode - we'll just + * save the FPU state as required. + */ +static inline bool interrupted_user_mode(void) +{ + struct pt_regs *regs = get_irq_regs(); + return regs && user_mode_vm(regs); +} + +/* + * Can we use the FPU in kernel mode with the + * whole "kernel_fpu_begin/end()" sequence? + * + * It's always ok in process context (ie "not interrupt") + * but it is sometimes ok even from an irq. + */ +bool irq_fpu_usable(void) +{ + return !in_interrupt() || + interrupted_user_mode() || + interrupted_kernel_fpu_idle(); +} +EXPORT_SYMBOL(irq_fpu_usable); + +void kernel_fpu_begin(void) +{ + struct task_struct *me = current; + + WARN_ON_ONCE(!irq_fpu_usable()); + preempt_disable(); + if (__thread_has_fpu(me)) { + __save_init_fpu(me); + __thread_clear_has_fpu(me); + /* We do 'stts()' in kernel_fpu_end() */ + } else { + percpu_write(fpu_owner_task, NULL); + clts(); + } +} +EXPORT_SYMBOL(kernel_fpu_begin); + +void kernel_fpu_end(void) +{ + stts(); + preempt_enable(); +} +EXPORT_SYMBOL(kernel_fpu_end); + +void unlazy_fpu(struct task_struct *tsk) +{ + preempt_disable(); + if (__thread_has_fpu(tsk)) { + __save_init_fpu(tsk); + __thread_fpu_end(tsk); + } else + tsk->fpu_counter = 0; + preempt_enable(); +} +EXPORT_SYMBOL(unlazy_fpu); + #ifdef CONFIG_MATH_EMULATION # define HAVE_HWFP (boot_cpu_data.hard_math) #else @@ -44,7 +125,7 @@ EXPORT_SYMBOL_GPL(xstate_size); unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32); static struct i387_fxsave_struct fx_scratch __cpuinitdata; -void __cpuinit mxcsr_feature_mask_init(void) +static void __cpuinit mxcsr_feature_mask_init(void) { unsigned long mask = 0; diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 610485223bd..36d1853e91a 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -15,7 +15,6 @@ #include <linux/delay.h> #include <linux/atomic.h> -#include <asm/system.h> #include <asm/timer.h> #include <asm/hw_irq.h> #include <asm/pgtable.h> diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 7943e0c21bd..3dafc6003b7 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -282,8 +282,13 @@ void fixup_irqs(void) else if (!(warned++)) set_affinity = 0; + /* + * We unmask if the irq was not marked masked by the + * core code. That respects the lazy irq disable + * behaviour. + */ if (!irqd_can_move_in_process_context(data) && - !irqd_irq_disabled(data) && chip->irq_unmask) + !irqd_irq_masked(data) && chip->irq_unmask) chip->irq_unmask(data); raw_spin_unlock(&desc->lock); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 40fc86161d9..58b7f27cb3e 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -100,13 +100,8 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) irqctx->tinfo.task = curctx->tinfo.task; irqctx->tinfo.previous_esp = current_stack_pointer; - /* - * Copy the softirq bits in preempt_count so that the - * softirq checks work in the hardirq context. - */ - irqctx->tinfo.preempt_count = - (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) | - (curctx->tinfo.preempt_count & SOFTIRQ_MASK); + /* Copy the preempt_count so that the [soft]irq checks work. */ + irqctx->tinfo.preempt_count = curctx->tinfo.preempt_count; if (unlikely(overflow)) call_on_stack(print_stack_overflow, isp); @@ -196,7 +191,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) if (unlikely(!desc)) return false; - if (!execute_on_irq_stack(overflow, desc, irq)) { + if (user_mode_vm(regs) || !execute_on_irq_stack(overflow, desc, irq)) { if (unlikely(overflow)) print_stack_overflow(); desc->handle_irq(irq, desc); diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 313fb5cddbc..252981afd6c 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -16,7 +16,6 @@ #include <linux/delay.h> #include <linux/atomic.h> -#include <asm/system.h> #include <asm/timer.h> #include <asm/hw_irq.h> #include <asm/pgtable.h> @@ -61,7 +60,7 @@ static irqreturn_t math_error_irq(int cpl, void *dev_id) outb(0, 0xF0); if (ignore_fpu_irq || !boot_cpu_data.hard_math) return IRQ_NONE; - math_error(get_irq_regs(), 0, 16); + math_error(get_irq_regs(), 0, X86_TRAP_MF); return IRQ_HANDLED; } @@ -306,10 +305,10 @@ void __init native_init_IRQ(void) * us. (some of these will be overridden and become * 'special' SMP interrupts) */ - for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { + i = FIRST_EXTERNAL_VECTOR; + for_each_clear_bit_from(i, used_vectors, NR_VECTORS) { /* IA32_SYSCALL_VECTOR could be used in trap_init already. */ - if (!test_bit(i, used_vectors)) - set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); + set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); } if (!acpi_ioapic && !of_ioapic) diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index 90fcf62854b..1d5d31ea686 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -68,16 +68,9 @@ static ssize_t setup_data_read(struct file *file, char __user *user_buf, return count; } -static int setup_data_open(struct inode *inode, struct file *file) -{ - file->private_data = inode->i_private; - - return 0; -} - static const struct file_operations fops_setup_data = { .read = setup_data_read, - .open = setup_data_open, + .open = simple_open, .llseek = default_llseek, }; diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 2f45c4c0b9d..8bfb6146f75 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -48,7 +48,6 @@ #include <asm/debugreg.h> #include <asm/apicdef.h> -#include <asm/system.h> #include <asm/apic.h> #include <asm/nmi.h> @@ -69,8 +68,6 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { "ss", 4, offsetof(struct pt_regs, ss) }, { "ds", 4, offsetof(struct pt_regs, ds) }, { "es", 4, offsetof(struct pt_regs, es) }, - { "fs", 4, -1 }, - { "gs", 4, -1 }, #else { "ax", 8, offsetof(struct pt_regs, ax) }, { "bx", 8, offsetof(struct pt_regs, bx) }, @@ -92,7 +89,11 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { "flags", 4, offsetof(struct pt_regs, flags) }, { "cs", 4, offsetof(struct pt_regs, cs) }, { "ss", 4, offsetof(struct pt_regs, ss) }, + { "ds", 4, -1 }, + { "es", 4, -1 }, #endif + { "fs", 4, -1 }, + { "gs", 4, -1 }, }; int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) diff --git a/arch/x86/kernel/kprobes-common.h b/arch/x86/kernel/kprobes-common.h new file mode 100644 index 00000000000..3230b68ef29 --- /dev/null +++ b/arch/x86/kernel/kprobes-common.h @@ -0,0 +1,102 @@ +#ifndef __X86_KERNEL_KPROBES_COMMON_H +#define __X86_KERNEL_KPROBES_COMMON_H + +/* Kprobes and Optprobes common header */ + +#ifdef CONFIG_X86_64 +#define SAVE_REGS_STRING \ + /* Skip cs, ip, orig_ax. */ \ + " subq $24, %rsp\n" \ + " pushq %rdi\n" \ + " pushq %rsi\n" \ + " pushq %rdx\n" \ + " pushq %rcx\n" \ + " pushq %rax\n" \ + " pushq %r8\n" \ + " pushq %r9\n" \ + " pushq %r10\n" \ + " pushq %r11\n" \ + " pushq %rbx\n" \ + " pushq %rbp\n" \ + " pushq %r12\n" \ + " pushq %r13\n" \ + " pushq %r14\n" \ + " pushq %r15\n" +#define RESTORE_REGS_STRING \ + " popq %r15\n" \ + " popq %r14\n" \ + " popq %r13\n" \ + " popq %r12\n" \ + " popq %rbp\n" \ + " popq %rbx\n" \ + " popq %r11\n" \ + " popq %r10\n" \ + " popq %r9\n" \ + " popq %r8\n" \ + " popq %rax\n" \ + " popq %rcx\n" \ + " popq %rdx\n" \ + " popq %rsi\n" \ + " popq %rdi\n" \ + /* Skip orig_ax, ip, cs */ \ + " addq $24, %rsp\n" +#else +#define SAVE_REGS_STRING \ + /* Skip cs, ip, orig_ax and gs. */ \ + " subl $16, %esp\n" \ + " pushl %fs\n" \ + " pushl %es\n" \ + " pushl %ds\n" \ + " pushl %eax\n" \ + " pushl %ebp\n" \ + " pushl %edi\n" \ + " pushl %esi\n" \ + " pushl %edx\n" \ + " pushl %ecx\n" \ + " pushl %ebx\n" +#define RESTORE_REGS_STRING \ + " popl %ebx\n" \ + " popl %ecx\n" \ + " popl %edx\n" \ + " popl %esi\n" \ + " popl %edi\n" \ + " popl %ebp\n" \ + " popl %eax\n" \ + /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\ + " addl $24, %esp\n" +#endif + +/* Ensure if the instruction can be boostable */ +extern int can_boost(kprobe_opcode_t *instruction); +/* Recover instruction if given address is probed */ +extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf, + unsigned long addr); +/* + * Copy an instruction and adjust the displacement if the instruction + * uses the %rip-relative addressing mode. + */ +extern int __copy_instruction(u8 *dest, u8 *src); + +/* Generate a relative-jump/call instruction */ +extern void synthesize_reljump(void *from, void *to); +extern void synthesize_relcall(void *from, void *to); + +#ifdef CONFIG_OPTPROBES +extern int arch_init_optprobes(void); +extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter); +extern unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr); +#else /* !CONFIG_OPTPROBES */ +static inline int arch_init_optprobes(void) +{ + return 0; +} +static inline int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) +{ + return 0; +} +static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) +{ + return addr; +} +#endif +#endif diff --git a/arch/x86/kernel/kprobes-opt.c b/arch/x86/kernel/kprobes-opt.c new file mode 100644 index 00000000000..c5e410eed40 --- /dev/null +++ b/arch/x86/kernel/kprobes-opt.c @@ -0,0 +1,512 @@ +/* + * Kernel Probes Jump Optimization (Optprobes) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004 + * Copyright (C) Hitachi Ltd., 2012 + */ +#include <linux/kprobes.h> +#include <linux/ptrace.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/hardirq.h> +#include <linux/preempt.h> +#include <linux/module.h> +#include <linux/kdebug.h> +#include <linux/kallsyms.h> +#include <linux/ftrace.h> + +#include <asm/cacheflush.h> +#include <asm/desc.h> +#include <asm/pgtable.h> +#include <asm/uaccess.h> +#include <asm/alternative.h> +#include <asm/insn.h> +#include <asm/debugreg.h> + +#include "kprobes-common.h" + +unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) +{ + struct optimized_kprobe *op; + struct kprobe *kp; + long offs; + int i; + + for (i = 0; i < RELATIVEJUMP_SIZE; i++) { + kp = get_kprobe((void *)addr - i); + /* This function only handles jump-optimized kprobe */ + if (kp && kprobe_optimized(kp)) { + op = container_of(kp, struct optimized_kprobe, kp); + /* If op->list is not empty, op is under optimizing */ + if (list_empty(&op->list)) + goto found; + } + } + + return addr; +found: + /* + * If the kprobe can be optimized, original bytes which can be + * overwritten by jump destination address. In this case, original + * bytes must be recovered from op->optinsn.copied_insn buffer. + */ + memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + if (addr == (unsigned long)kp->addr) { + buf[0] = kp->opcode; + memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); + } else { + offs = addr - (unsigned long)kp->addr - 1; + memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs); + } + + return (unsigned long)buf; +} + +/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ +static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) +{ +#ifdef CONFIG_X86_64 + *addr++ = 0x48; + *addr++ = 0xbf; +#else + *addr++ = 0xb8; +#endif + *(unsigned long *)addr = val; +} + +static void __used __kprobes kprobes_optinsn_template_holder(void) +{ + asm volatile ( + ".global optprobe_template_entry\n" + "optprobe_template_entry:\n" +#ifdef CONFIG_X86_64 + /* We don't bother saving the ss register */ + " pushq %rsp\n" + " pushfq\n" + SAVE_REGS_STRING + " movq %rsp, %rsi\n" + ".global optprobe_template_val\n" + "optprobe_template_val:\n" + ASM_NOP5 + ASM_NOP5 + ".global optprobe_template_call\n" + "optprobe_template_call:\n" + ASM_NOP5 + /* Move flags to rsp */ + " movq 144(%rsp), %rdx\n" + " movq %rdx, 152(%rsp)\n" + RESTORE_REGS_STRING + /* Skip flags entry */ + " addq $8, %rsp\n" + " popfq\n" +#else /* CONFIG_X86_32 */ + " pushf\n" + SAVE_REGS_STRING + " movl %esp, %edx\n" + ".global optprobe_template_val\n" + "optprobe_template_val:\n" + ASM_NOP5 + ".global optprobe_template_call\n" + "optprobe_template_call:\n" + ASM_NOP5 + RESTORE_REGS_STRING + " addl $4, %esp\n" /* skip cs */ + " popf\n" +#endif + ".global optprobe_template_end\n" + "optprobe_template_end:\n"); +} + +#define TMPL_MOVE_IDX \ + ((long)&optprobe_template_val - (long)&optprobe_template_entry) +#define TMPL_CALL_IDX \ + ((long)&optprobe_template_call - (long)&optprobe_template_entry) +#define TMPL_END_IDX \ + ((long)&optprobe_template_end - (long)&optprobe_template_entry) + +#define INT3_SIZE sizeof(kprobe_opcode_t) + +/* Optimized kprobe call back function: called from optinsn */ +static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + unsigned long flags; + + /* This is possible if op is under delayed unoptimizing */ + if (kprobe_disabled(&op->kp)) + return; + + local_irq_save(flags); + if (kprobe_running()) { + kprobes_inc_nmissed_count(&op->kp); + } else { + /* Save skipped registers */ +#ifdef CONFIG_X86_64 + regs->cs = __KERNEL_CS; +#else + regs->cs = __KERNEL_CS | get_kernel_rpl(); + regs->gs = 0; +#endif + regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; + regs->orig_ax = ~0UL; + + __this_cpu_write(current_kprobe, &op->kp); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + opt_pre_handler(&op->kp, regs); + __this_cpu_write(current_kprobe, NULL); + } + local_irq_restore(flags); +} + +static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) +{ + int len = 0, ret; + + while (len < RELATIVEJUMP_SIZE) { + ret = __copy_instruction(dest + len, src + len); + if (!ret || !can_boost(dest + len)) + return -EINVAL; + len += ret; + } + /* Check whether the address range is reserved */ + if (ftrace_text_reserved(src, src + len - 1) || + alternatives_text_reserved(src, src + len - 1) || + jump_label_text_reserved(src, src + len - 1)) + return -EBUSY; + + return len; +} + +/* Check whether insn is indirect jump */ +static int __kprobes insn_is_indirect_jump(struct insn *insn) +{ + return ((insn->opcode.bytes[0] == 0xff && + (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ + insn->opcode.bytes[0] == 0xea); /* Segment based jump */ +} + +/* Check whether insn jumps into specified address range */ +static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) +{ + unsigned long target = 0; + + switch (insn->opcode.bytes[0]) { + case 0xe0: /* loopne */ + case 0xe1: /* loope */ + case 0xe2: /* loop */ + case 0xe3: /* jcxz */ + case 0xe9: /* near relative jump */ + case 0xeb: /* short relative jump */ + break; + case 0x0f: + if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ + break; + return 0; + default: + if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ + break; + return 0; + } + target = (unsigned long)insn->next_byte + insn->immediate.value; + + return (start <= target && target <= start + len); +} + +/* Decode whole function to ensure any instructions don't jump into target */ +static int __kprobes can_optimize(unsigned long paddr) +{ + unsigned long addr, size = 0, offset = 0; + struct insn insn; + kprobe_opcode_t buf[MAX_INSN_SIZE]; + + /* Lookup symbol including addr */ + if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) + return 0; + + /* + * Do not optimize in the entry code due to the unstable + * stack handling. + */ + if ((paddr >= (unsigned long)__entry_text_start) && + (paddr < (unsigned long)__entry_text_end)) + return 0; + + /* Check there is enough space for a relative jump. */ + if (size - offset < RELATIVEJUMP_SIZE) + return 0; + + /* Decode instructions */ + addr = paddr - offset; + while (addr < paddr - offset + size) { /* Decode until function end */ + if (search_exception_tables(addr)) + /* + * Since some fixup code will jumps into this function, + * we can't optimize kprobe in this function. + */ + return 0; + kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, addr)); + insn_get_length(&insn); + /* Another subsystem puts a breakpoint */ + if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) + return 0; + /* Recover address */ + insn.kaddr = (void *)addr; + insn.next_byte = (void *)(addr + insn.length); + /* Check any instructions don't jump into target */ + if (insn_is_indirect_jump(&insn) || + insn_jump_into_range(&insn, paddr + INT3_SIZE, + RELATIVE_ADDR_SIZE)) + return 0; + addr += insn.length; + } + + return 1; +} + +/* Check optimized_kprobe can actually be optimized. */ +int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) +{ + int i; + struct kprobe *p; + + for (i = 1; i < op->optinsn.size; i++) { + p = get_kprobe(op->kp.addr + i); + if (p && !kprobe_disabled(p)) + return -EEXIST; + } + + return 0; +} + +/* Check the addr is within the optimized instructions. */ +int __kprobes +arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr) +{ + return ((unsigned long)op->kp.addr <= addr && + (unsigned long)op->kp.addr + op->optinsn.size > addr); +} + +/* Free optimized instruction slot */ +static __kprobes +void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) +{ + if (op->optinsn.insn) { + free_optinsn_slot(op->optinsn.insn, dirty); + op->optinsn.insn = NULL; + op->optinsn.size = 0; + } +} + +void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) +{ + __arch_remove_optimized_kprobe(op, 1); +} + +/* + * Copy replacing target instructions + * Target instructions MUST be relocatable (checked inside) + * This is called when new aggr(opt)probe is allocated or reused. + */ +int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) +{ + u8 *buf; + int ret; + long rel; + + if (!can_optimize((unsigned long)op->kp.addr)) + return -EILSEQ; + + op->optinsn.insn = get_optinsn_slot(); + if (!op->optinsn.insn) + return -ENOMEM; + + /* + * Verify if the address gap is in 2GB range, because this uses + * a relative jump. + */ + rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; + if (abs(rel) > 0x7fffffff) + return -ERANGE; + + buf = (u8 *)op->optinsn.insn; + + /* Copy instructions into the out-of-line buffer */ + ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); + if (ret < 0) { + __arch_remove_optimized_kprobe(op, 0); + return ret; + } + op->optinsn.size = ret; + + /* Copy arch-dep-instance from template */ + memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); + + /* Set probe information */ + synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); + + /* Set probe function call */ + synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); + + /* Set returning jmp instruction at the tail of out-of-line buffer */ + synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, + (u8 *)op->kp.addr + op->optinsn.size); + + flush_icache_range((unsigned long) buf, + (unsigned long) buf + TMPL_END_IDX + + op->optinsn.size + RELATIVEJUMP_SIZE); + return 0; +} + +#define MAX_OPTIMIZE_PROBES 256 +static struct text_poke_param *jump_poke_params; +static struct jump_poke_buffer { + u8 buf[RELATIVEJUMP_SIZE]; +} *jump_poke_bufs; + +static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm, + u8 *insn_buf, + struct optimized_kprobe *op) +{ + s32 rel = (s32)((long)op->optinsn.insn - + ((long)op->kp.addr + RELATIVEJUMP_SIZE)); + + /* Backup instructions which will be replaced by jump address */ + memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, + RELATIVE_ADDR_SIZE); + + insn_buf[0] = RELATIVEJUMP_OPCODE; + *(s32 *)(&insn_buf[1]) = rel; + + tprm->addr = op->kp.addr; + tprm->opcode = insn_buf; + tprm->len = RELATIVEJUMP_SIZE; +} + +/* + * Replace breakpoints (int3) with relative jumps. + * Caller must call with locking kprobe_mutex and text_mutex. + */ +void __kprobes arch_optimize_kprobes(struct list_head *oplist) +{ + struct optimized_kprobe *op, *tmp; + int c = 0; + + list_for_each_entry_safe(op, tmp, oplist, list) { + WARN_ON(kprobe_disabled(&op->kp)); + /* Setup param */ + setup_optimize_kprobe(&jump_poke_params[c], + jump_poke_bufs[c].buf, op); + list_del_init(&op->list); + if (++c >= MAX_OPTIMIZE_PROBES) + break; + } + + /* + * text_poke_smp doesn't support NMI/MCE code modifying. + * However, since kprobes itself also doesn't support NMI/MCE + * code probing, it's not a problem. + */ + text_poke_smp_batch(jump_poke_params, c); +} + +static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm, + u8 *insn_buf, + struct optimized_kprobe *op) +{ + /* Set int3 to first byte for kprobes */ + insn_buf[0] = BREAKPOINT_INSTRUCTION; + memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); + + tprm->addr = op->kp.addr; + tprm->opcode = insn_buf; + tprm->len = RELATIVEJUMP_SIZE; +} + +/* + * Recover original instructions and breakpoints from relative jumps. + * Caller must call with locking kprobe_mutex. + */ +extern void arch_unoptimize_kprobes(struct list_head *oplist, + struct list_head *done_list) +{ + struct optimized_kprobe *op, *tmp; + int c = 0; + + list_for_each_entry_safe(op, tmp, oplist, list) { + /* Setup param */ + setup_unoptimize_kprobe(&jump_poke_params[c], + jump_poke_bufs[c].buf, op); + list_move(&op->list, done_list); + if (++c >= MAX_OPTIMIZE_PROBES) + break; + } + + /* + * text_poke_smp doesn't support NMI/MCE code modifying. + * However, since kprobes itself also doesn't support NMI/MCE + * code probing, it's not a problem. + */ + text_poke_smp_batch(jump_poke_params, c); +} + +/* Replace a relative jump with a breakpoint (int3). */ +void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) +{ + u8 buf[RELATIVEJUMP_SIZE]; + + /* Set int3 to first byte for kprobes */ + buf[0] = BREAKPOINT_INSTRUCTION; + memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); + text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE); +} + +int __kprobes +setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) +{ + struct optimized_kprobe *op; + + if (p->flags & KPROBE_FLAG_OPTIMIZED) { + /* This kprobe is really able to run optimized path. */ + op = container_of(p, struct optimized_kprobe, kp); + /* Detour through copied instructions */ + regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; + if (!reenter) + reset_current_kprobe(); + preempt_enable_no_resched(); + return 1; + } + return 0; +} + +int __kprobes arch_init_optprobes(void) +{ + /* Allocate code buffer and parameter array */ + jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) * + MAX_OPTIMIZE_PROBES, GFP_KERNEL); + if (!jump_poke_bufs) + return -ENOMEM; + + jump_poke_params = kmalloc(sizeof(struct text_poke_param) * + MAX_OPTIMIZE_PROBES, GFP_KERNEL); + if (!jump_poke_params) { + kfree(jump_poke_bufs); + jump_poke_bufs = NULL; + return -ENOMEM; + } + + return 0; +} diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7da647d8b64..e213fc8408d 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -30,16 +30,15 @@ * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi * <prasanna@in.ibm.com> added function-return probes. * 2005-May Rusty Lynch <rusty.lynch@intel.com> - * Added function return probes functionality + * Added function return probes functionality * 2006-Feb Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added - * kprobe-booster and kretprobe-booster for i386. + * kprobe-booster and kretprobe-booster for i386. * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster - * and kretprobe-booster for x86-64 + * and kretprobe-booster for x86-64 * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven - * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com> - * unified x86 kprobes code. + * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com> + * unified x86 kprobes code. */ - #include <linux/kprobes.h> #include <linux/ptrace.h> #include <linux/string.h> @@ -59,6 +58,8 @@ #include <asm/insn.h> #include <asm/debugreg.h> +#include "kprobes-common.h" + void jprobe_return_end(void); DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; @@ -108,6 +109,7 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = { doesn't switch kernel stack.*/ {NULL, NULL} /* Terminator */ }; + const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) @@ -123,11 +125,17 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) } /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ -static void __kprobes synthesize_reljump(void *from, void *to) +void __kprobes synthesize_reljump(void *from, void *to) { __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); } +/* Insert a call instruction at address 'from', which calls address 'to'.*/ +void __kprobes synthesize_relcall(void *from, void *to) +{ + __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); +} + /* * Skip the prefixes of the instruction. */ @@ -151,7 +159,7 @@ static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn) * Returns non-zero if opcode is boostable. * RIP relative instructions are adjusted at copying time in 64 bits mode */ -static int __kprobes can_boost(kprobe_opcode_t *opcodes) +int __kprobes can_boost(kprobe_opcode_t *opcodes) { kprobe_opcode_t opcode; kprobe_opcode_t *orig_opcodes = opcodes; @@ -207,13 +215,15 @@ retry: } } -/* Recover the probed instruction at addr for further analysis. */ -static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) +static unsigned long +__recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) { struct kprobe *kp; + kp = get_kprobe((void *)addr); + /* There is no probe, return original address */ if (!kp) - return -EINVAL; + return addr; /* * Basically, kp->ainsn.insn has an original instruction. @@ -230,14 +240,29 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) */ memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); buf[0] = kp->opcode; - return 0; + return (unsigned long)buf; +} + +/* + * Recover the probed instruction at addr for further analysis. + * Caller must lock kprobes by kprobe_mutex, or disable preemption + * for preventing to release referencing kprobes. + */ +unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) +{ + unsigned long __addr; + + __addr = __recover_optprobed_insn(buf, addr); + if (__addr != addr) + return __addr; + + return __recover_probed_insn(buf, addr); } /* Check if paddr is at an instruction boundary */ static int __kprobes can_probe(unsigned long paddr) { - int ret; - unsigned long addr, offset = 0; + unsigned long addr, __addr, offset = 0; struct insn insn; kprobe_opcode_t buf[MAX_INSN_SIZE]; @@ -247,26 +272,24 @@ static int __kprobes can_probe(unsigned long paddr) /* Decode instructions */ addr = paddr - offset; while (addr < paddr) { - kernel_insn_init(&insn, (void *)addr); - insn_get_opcode(&insn); - /* * Check if the instruction has been modified by another * kprobe, in which case we replace the breakpoint by the * original instruction in our buffer. + * Also, jump optimization will change the breakpoint to + * relative-jump. Since the relative-jump itself is + * normally used, we just go through if there is no kprobe. */ - if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { - ret = recover_probed_instruction(buf, addr); - if (ret) - /* - * Another debugging subsystem might insert - * this breakpoint. In that case, we can't - * recover it. - */ - return 0; - kernel_insn_init(&insn, buf); - } + __addr = recover_probed_instruction(buf, addr); + kernel_insn_init(&insn, (void *)__addr); insn_get_length(&insn); + + /* + * Another debugging subsystem might insert this breakpoint. + * In that case, we can't recover it. + */ + if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) + return 0; addr += insn.length; } @@ -299,24 +322,16 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) * If not, return null. * Only applicable to 64-bit x86. */ -static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) +int __kprobes __copy_instruction(u8 *dest, u8 *src) { struct insn insn; - int ret; kprobe_opcode_t buf[MAX_INSN_SIZE]; - kernel_insn_init(&insn, src); - if (recover) { - insn_get_opcode(&insn); - if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { - ret = recover_probed_instruction(buf, - (unsigned long)src); - if (ret) - return 0; - kernel_insn_init(&insn, buf); - } - } + kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, (unsigned long)src)); insn_get_length(&insn); + /* Another subsystem puts a breakpoint, failed to recover */ + if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) + return 0; memcpy(dest, insn.kaddr, insn.length); #ifdef CONFIG_X86_64 @@ -337,8 +352,7 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) * extension of the original signed 32-bit displacement would * have given. */ - newdisp = (u8 *) src + (s64) insn.displacement.value - - (u8 *) dest; + newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest; BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ disp = (u8 *) dest + insn_offset_displacement(&insn); *(s32 *) disp = (s32) newdisp; @@ -349,18 +363,20 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) static void __kprobes arch_copy_kprobe(struct kprobe *p) { + /* Copy an instruction with recovering if other optprobe modifies it.*/ + __copy_instruction(p->ainsn.insn, p->addr); + /* - * Copy an instruction without recovering int3, because it will be - * put by another subsystem. + * __copy_instruction can modify the displacement of the instruction, + * but it doesn't affect boostable check. */ - __copy_instruction(p->ainsn.insn, p->addr, 0); - - if (can_boost(p->addr)) + if (can_boost(p->ainsn.insn)) p->ainsn.boostable = 0; else p->ainsn.boostable = -1; - p->opcode = *p->addr; + /* Also, displacement change doesn't affect the first byte */ + p->opcode = p->ainsn.insn[0]; } int __kprobes arch_prepare_kprobe(struct kprobe *p) @@ -442,8 +458,8 @@ static void __kprobes restore_btf(void) } } -void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, - struct pt_regs *regs) +void __kprobes +arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { unsigned long *sara = stack_addr(regs); @@ -453,16 +469,8 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, *sara = (unsigned long) &kretprobe_trampoline; } -#ifdef CONFIG_OPTPROBES -static int __kprobes setup_detour_execution(struct kprobe *p, - struct pt_regs *regs, - int reenter); -#else -#define setup_detour_execution(p, regs, reenter) (0) -#endif - -static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb, int reenter) +static void __kprobes +setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter) { if (setup_detour_execution(p, regs, reenter)) return; @@ -504,8 +512,8 @@ static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, * within the handler. We save the original kprobes variables and just single * step on the instruction of the new probe without calling any user handlers. */ -static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) +static int __kprobes +reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { switch (kcb->kprobe_status) { case KPROBE_HIT_SSDONE: @@ -600,69 +608,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) return 0; } -#ifdef CONFIG_X86_64 -#define SAVE_REGS_STRING \ - /* Skip cs, ip, orig_ax. */ \ - " subq $24, %rsp\n" \ - " pushq %rdi\n" \ - " pushq %rsi\n" \ - " pushq %rdx\n" \ - " pushq %rcx\n" \ - " pushq %rax\n" \ - " pushq %r8\n" \ - " pushq %r9\n" \ - " pushq %r10\n" \ - " pushq %r11\n" \ - " pushq %rbx\n" \ - " pushq %rbp\n" \ - " pushq %r12\n" \ - " pushq %r13\n" \ - " pushq %r14\n" \ - " pushq %r15\n" -#define RESTORE_REGS_STRING \ - " popq %r15\n" \ - " popq %r14\n" \ - " popq %r13\n" \ - " popq %r12\n" \ - " popq %rbp\n" \ - " popq %rbx\n" \ - " popq %r11\n" \ - " popq %r10\n" \ - " popq %r9\n" \ - " popq %r8\n" \ - " popq %rax\n" \ - " popq %rcx\n" \ - " popq %rdx\n" \ - " popq %rsi\n" \ - " popq %rdi\n" \ - /* Skip orig_ax, ip, cs */ \ - " addq $24, %rsp\n" -#else -#define SAVE_REGS_STRING \ - /* Skip cs, ip, orig_ax and gs. */ \ - " subl $16, %esp\n" \ - " pushl %fs\n" \ - " pushl %es\n" \ - " pushl %ds\n" \ - " pushl %eax\n" \ - " pushl %ebp\n" \ - " pushl %edi\n" \ - " pushl %esi\n" \ - " pushl %edx\n" \ - " pushl %ecx\n" \ - " pushl %ebx\n" -#define RESTORE_REGS_STRING \ - " popl %ebx\n" \ - " popl %ecx\n" \ - " popl %edx\n" \ - " popl %esi\n" \ - " popl %edi\n" \ - " popl %ebp\n" \ - " popl %eax\n" \ - /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\ - " addl $24, %esp\n" -#endif - /* * When a retprobed function returns, this code saves registers and * calls trampoline_handler() runs, which calls the kretprobe's handler. @@ -816,8 +761,8 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) * jump instruction after the copied instruction, that jumps to the next * instruction after the probepoint. */ -static void __kprobes resume_execution(struct kprobe *p, - struct pt_regs *regs, struct kprobe_ctlblk *kcb) +static void __kprobes +resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { unsigned long *tos = stack_addr(regs); unsigned long copy_ip = (unsigned long)p->ainsn.insn; @@ -996,8 +941,8 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) /* * Wrapper routine for handling exceptions. */ -int __kprobes kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) +int __kprobes +kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) { struct die_args *args = data; int ret = NOTIFY_DONE; @@ -1107,466 +1052,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) return 0; } - -#ifdef CONFIG_OPTPROBES - -/* Insert a call instruction at address 'from', which calls address 'to'.*/ -static void __kprobes synthesize_relcall(void *from, void *to) -{ - __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); -} - -/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ -static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, - unsigned long val) -{ -#ifdef CONFIG_X86_64 - *addr++ = 0x48; - *addr++ = 0xbf; -#else - *addr++ = 0xb8; -#endif - *(unsigned long *)addr = val; -} - -static void __used __kprobes kprobes_optinsn_template_holder(void) -{ - asm volatile ( - ".global optprobe_template_entry\n" - "optprobe_template_entry: \n" -#ifdef CONFIG_X86_64 - /* We don't bother saving the ss register */ - " pushq %rsp\n" - " pushfq\n" - SAVE_REGS_STRING - " movq %rsp, %rsi\n" - ".global optprobe_template_val\n" - "optprobe_template_val: \n" - ASM_NOP5 - ASM_NOP5 - ".global optprobe_template_call\n" - "optprobe_template_call: \n" - ASM_NOP5 - /* Move flags to rsp */ - " movq 144(%rsp), %rdx\n" - " movq %rdx, 152(%rsp)\n" - RESTORE_REGS_STRING - /* Skip flags entry */ - " addq $8, %rsp\n" - " popfq\n" -#else /* CONFIG_X86_32 */ - " pushf\n" - SAVE_REGS_STRING - " movl %esp, %edx\n" - ".global optprobe_template_val\n" - "optprobe_template_val: \n" - ASM_NOP5 - ".global optprobe_template_call\n" - "optprobe_template_call: \n" - ASM_NOP5 - RESTORE_REGS_STRING - " addl $4, %esp\n" /* skip cs */ - " popf\n" -#endif - ".global optprobe_template_end\n" - "optprobe_template_end: \n"); -} - -#define TMPL_MOVE_IDX \ - ((long)&optprobe_template_val - (long)&optprobe_template_entry) -#define TMPL_CALL_IDX \ - ((long)&optprobe_template_call - (long)&optprobe_template_entry) -#define TMPL_END_IDX \ - ((long)&optprobe_template_end - (long)&optprobe_template_entry) - -#define INT3_SIZE sizeof(kprobe_opcode_t) - -/* Optimized kprobe call back function: called from optinsn */ -static void __kprobes optimized_callback(struct optimized_kprobe *op, - struct pt_regs *regs) -{ - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - unsigned long flags; - - /* This is possible if op is under delayed unoptimizing */ - if (kprobe_disabled(&op->kp)) - return; - - local_irq_save(flags); - if (kprobe_running()) { - kprobes_inc_nmissed_count(&op->kp); - } else { - /* Save skipped registers */ -#ifdef CONFIG_X86_64 - regs->cs = __KERNEL_CS; -#else - regs->cs = __KERNEL_CS | get_kernel_rpl(); - regs->gs = 0; -#endif - regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; - regs->orig_ax = ~0UL; - - __this_cpu_write(current_kprobe, &op->kp); - kcb->kprobe_status = KPROBE_HIT_ACTIVE; - opt_pre_handler(&op->kp, regs); - __this_cpu_write(current_kprobe, NULL); - } - local_irq_restore(flags); -} - -static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) -{ - int len = 0, ret; - - while (len < RELATIVEJUMP_SIZE) { - ret = __copy_instruction(dest + len, src + len, 1); - if (!ret || !can_boost(dest + len)) - return -EINVAL; - len += ret; - } - /* Check whether the address range is reserved */ - if (ftrace_text_reserved(src, src + len - 1) || - alternatives_text_reserved(src, src + len - 1) || - jump_label_text_reserved(src, src + len - 1)) - return -EBUSY; - - return len; -} - -/* Check whether insn is indirect jump */ -static int __kprobes insn_is_indirect_jump(struct insn *insn) -{ - return ((insn->opcode.bytes[0] == 0xff && - (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ - insn->opcode.bytes[0] == 0xea); /* Segment based jump */ -} - -/* Check whether insn jumps into specified address range */ -static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) -{ - unsigned long target = 0; - - switch (insn->opcode.bytes[0]) { - case 0xe0: /* loopne */ - case 0xe1: /* loope */ - case 0xe2: /* loop */ - case 0xe3: /* jcxz */ - case 0xe9: /* near relative jump */ - case 0xeb: /* short relative jump */ - break; - case 0x0f: - if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ - break; - return 0; - default: - if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ - break; - return 0; - } - target = (unsigned long)insn->next_byte + insn->immediate.value; - - return (start <= target && target <= start + len); -} - -/* Decode whole function to ensure any instructions don't jump into target */ -static int __kprobes can_optimize(unsigned long paddr) -{ - int ret; - unsigned long addr, size = 0, offset = 0; - struct insn insn; - kprobe_opcode_t buf[MAX_INSN_SIZE]; - - /* Lookup symbol including addr */ - if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) - return 0; - - /* - * Do not optimize in the entry code due to the unstable - * stack handling. - */ - if ((paddr >= (unsigned long )__entry_text_start) && - (paddr < (unsigned long )__entry_text_end)) - return 0; - - /* Check there is enough space for a relative jump. */ - if (size - offset < RELATIVEJUMP_SIZE) - return 0; - - /* Decode instructions */ - addr = paddr - offset; - while (addr < paddr - offset + size) { /* Decode until function end */ - if (search_exception_tables(addr)) - /* - * Since some fixup code will jumps into this function, - * we can't optimize kprobe in this function. - */ - return 0; - kernel_insn_init(&insn, (void *)addr); - insn_get_opcode(&insn); - if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { - ret = recover_probed_instruction(buf, addr); - if (ret) - return 0; - kernel_insn_init(&insn, buf); - } - insn_get_length(&insn); - /* Recover address */ - insn.kaddr = (void *)addr; - insn.next_byte = (void *)(addr + insn.length); - /* Check any instructions don't jump into target */ - if (insn_is_indirect_jump(&insn) || - insn_jump_into_range(&insn, paddr + INT3_SIZE, - RELATIVE_ADDR_SIZE)) - return 0; - addr += insn.length; - } - - return 1; -} - -/* Check optimized_kprobe can actually be optimized. */ -int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) -{ - int i; - struct kprobe *p; - - for (i = 1; i < op->optinsn.size; i++) { - p = get_kprobe(op->kp.addr + i); - if (p && !kprobe_disabled(p)) - return -EEXIST; - } - - return 0; -} - -/* Check the addr is within the optimized instructions. */ -int __kprobes arch_within_optimized_kprobe(struct optimized_kprobe *op, - unsigned long addr) -{ - return ((unsigned long)op->kp.addr <= addr && - (unsigned long)op->kp.addr + op->optinsn.size > addr); -} - -/* Free optimized instruction slot */ -static __kprobes -void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) -{ - if (op->optinsn.insn) { - free_optinsn_slot(op->optinsn.insn, dirty); - op->optinsn.insn = NULL; - op->optinsn.size = 0; - } -} - -void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) -{ - __arch_remove_optimized_kprobe(op, 1); -} - -/* - * Copy replacing target instructions - * Target instructions MUST be relocatable (checked inside) - */ -int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) -{ - u8 *buf; - int ret; - long rel; - - if (!can_optimize((unsigned long)op->kp.addr)) - return -EILSEQ; - - op->optinsn.insn = get_optinsn_slot(); - if (!op->optinsn.insn) - return -ENOMEM; - - /* - * Verify if the address gap is in 2GB range, because this uses - * a relative jump. - */ - rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; - if (abs(rel) > 0x7fffffff) - return -ERANGE; - - buf = (u8 *)op->optinsn.insn; - - /* Copy instructions into the out-of-line buffer */ - ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); - if (ret < 0) { - __arch_remove_optimized_kprobe(op, 0); - return ret; - } - op->optinsn.size = ret; - - /* Copy arch-dep-instance from template */ - memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); - - /* Set probe information */ - synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); - - /* Set probe function call */ - synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); - - /* Set returning jmp instruction at the tail of out-of-line buffer */ - synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, - (u8 *)op->kp.addr + op->optinsn.size); - - flush_icache_range((unsigned long) buf, - (unsigned long) buf + TMPL_END_IDX + - op->optinsn.size + RELATIVEJUMP_SIZE); - return 0; -} - -#define MAX_OPTIMIZE_PROBES 256 -static struct text_poke_param *jump_poke_params; -static struct jump_poke_buffer { - u8 buf[RELATIVEJUMP_SIZE]; -} *jump_poke_bufs; - -static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm, - u8 *insn_buf, - struct optimized_kprobe *op) -{ - s32 rel = (s32)((long)op->optinsn.insn - - ((long)op->kp.addr + RELATIVEJUMP_SIZE)); - - /* Backup instructions which will be replaced by jump address */ - memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, - RELATIVE_ADDR_SIZE); - - insn_buf[0] = RELATIVEJUMP_OPCODE; - *(s32 *)(&insn_buf[1]) = rel; - - tprm->addr = op->kp.addr; - tprm->opcode = insn_buf; - tprm->len = RELATIVEJUMP_SIZE; -} - -/* - * Replace breakpoints (int3) with relative jumps. - * Caller must call with locking kprobe_mutex and text_mutex. - */ -void __kprobes arch_optimize_kprobes(struct list_head *oplist) -{ - struct optimized_kprobe *op, *tmp; - int c = 0; - - list_for_each_entry_safe(op, tmp, oplist, list) { - WARN_ON(kprobe_disabled(&op->kp)); - /* Setup param */ - setup_optimize_kprobe(&jump_poke_params[c], - jump_poke_bufs[c].buf, op); - list_del_init(&op->list); - if (++c >= MAX_OPTIMIZE_PROBES) - break; - } - - /* - * text_poke_smp doesn't support NMI/MCE code modifying. - * However, since kprobes itself also doesn't support NMI/MCE - * code probing, it's not a problem. - */ - text_poke_smp_batch(jump_poke_params, c); -} - -static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm, - u8 *insn_buf, - struct optimized_kprobe *op) -{ - /* Set int3 to first byte for kprobes */ - insn_buf[0] = BREAKPOINT_INSTRUCTION; - memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); - - tprm->addr = op->kp.addr; - tprm->opcode = insn_buf; - tprm->len = RELATIVEJUMP_SIZE; -} - -/* - * Recover original instructions and breakpoints from relative jumps. - * Caller must call with locking kprobe_mutex. - */ -extern void arch_unoptimize_kprobes(struct list_head *oplist, - struct list_head *done_list) -{ - struct optimized_kprobe *op, *tmp; - int c = 0; - - list_for_each_entry_safe(op, tmp, oplist, list) { - /* Setup param */ - setup_unoptimize_kprobe(&jump_poke_params[c], - jump_poke_bufs[c].buf, op); - list_move(&op->list, done_list); - if (++c >= MAX_OPTIMIZE_PROBES) - break; - } - - /* - * text_poke_smp doesn't support NMI/MCE code modifying. - * However, since kprobes itself also doesn't support NMI/MCE - * code probing, it's not a problem. - */ - text_poke_smp_batch(jump_poke_params, c); -} - -/* Replace a relative jump with a breakpoint (int3). */ -void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) -{ - u8 buf[RELATIVEJUMP_SIZE]; - - /* Set int3 to first byte for kprobes */ - buf[0] = BREAKPOINT_INSTRUCTION; - memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); - text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE); -} - -static int __kprobes setup_detour_execution(struct kprobe *p, - struct pt_regs *regs, - int reenter) -{ - struct optimized_kprobe *op; - - if (p->flags & KPROBE_FLAG_OPTIMIZED) { - /* This kprobe is really able to run optimized path. */ - op = container_of(p, struct optimized_kprobe, kp); - /* Detour through copied instructions */ - regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; - if (!reenter) - reset_current_kprobe(); - preempt_enable_no_resched(); - return 1; - } - return 0; -} - -static int __kprobes init_poke_params(void) -{ - /* Allocate code buffer and parameter array */ - jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) * - MAX_OPTIMIZE_PROBES, GFP_KERNEL); - if (!jump_poke_bufs) - return -ENOMEM; - - jump_poke_params = kmalloc(sizeof(struct text_poke_param) * - MAX_OPTIMIZE_PROBES, GFP_KERNEL); - if (!jump_poke_params) { - kfree(jump_poke_bufs); - jump_poke_bufs = NULL; - return -ENOMEM; - } - - return 0; -} -#else /* !CONFIG_OPTPROBES */ -static int __kprobes init_poke_params(void) -{ - return 0; -} -#endif - int __init arch_init_kprobes(void) { - return init_poke_params(); + return arch_init_optprobes(); } int __kprobes arch_trampoline_kprobe(struct kprobe *p) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index f0c6fd6f176..e554e5ad2fe 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -38,6 +38,7 @@ #include <asm/traps.h> #include <asm/desc.h> #include <asm/tlbflush.h> +#include <asm/idle.h> static int kvmapf = 1; @@ -78,7 +79,6 @@ struct kvm_task_sleep_node { u32 token; int cpu; bool halted; - struct mm_struct *mm; }; static struct kvm_task_sleep_head { @@ -125,9 +125,7 @@ void kvm_async_pf_task_wait(u32 token) n.token = token; n.cpu = smp_processor_id(); - n.mm = current->active_mm; n.halted = idle || preempt_count() > 1; - atomic_inc(&n.mm->mm_count); init_waitqueue_head(&n.wq); hlist_add_head(&n.link, &b->list); spin_unlock(&b->lock); @@ -160,9 +158,6 @@ EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait); static void apf_task_wake_one(struct kvm_task_sleep_node *n) { hlist_del_init(&n->link); - if (!n->mm) - return; - mmdrop(n->mm); if (n->halted) smp_send_reschedule(n->cpu); else if (waitqueue_active(&n->wq)) @@ -206,7 +201,7 @@ again: * async PF was not yet handled. * Add dummy entry for the token. */ - n = kmalloc(sizeof(*n), GFP_ATOMIC); + n = kzalloc(sizeof(*n), GFP_ATOMIC); if (!n) { /* * Allocation failed! Busy wait while other cpu @@ -218,7 +213,6 @@ again: } n->token = token; n->cpu = smp_processor_id(); - n->mm = NULL; init_waitqueue_head(&n->wq); hlist_add_head(&n->link, &b->list); } else @@ -253,7 +247,10 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) kvm_async_pf_task_wait((u32)read_cr2()); break; case KVM_PV_REASON_PAGE_READY: + rcu_irq_enter(); + exit_idle(); kvm_async_pf_task_wake((u32)read_cr2()); + rcu_irq_exit(); break; } } @@ -438,9 +435,9 @@ void __init kvm_guest_init(void) static __init int activate_jump_labels(void) { if (has_steal_clock) { - jump_label_inc(¶virt_steal_enabled); + static_key_slow_inc(¶virt_steal_enabled); if (steal_acc) - jump_label_inc(¶virt_steal_rq_enabled); + static_key_slow_inc(¶virt_steal_rq_enabled); } return 0; diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 44842d756b2..f8492da65bf 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -136,6 +136,15 @@ int kvm_register_clock(char *txt) return ret; } +static void kvm_save_sched_clock_state(void) +{ +} + +static void kvm_restore_sched_clock_state(void) +{ + kvm_register_clock("primary cpu clock, resume"); +} + #ifdef CONFIG_X86_LOCAL_APIC static void __cpuinit kvm_setup_secondary_clock(void) { @@ -144,8 +153,6 @@ static void __cpuinit kvm_setup_secondary_clock(void) * we shouldn't fail. */ WARN_ON(kvm_register_clock("secondary cpu clock")); - /* ok, done with our trickery, call native */ - setup_secondary_APIC_clock(); } #endif @@ -194,9 +201,11 @@ void __init kvmclock_init(void) x86_platform.get_wallclock = kvm_get_wallclock; x86_platform.set_wallclock = kvm_set_wallclock; #ifdef CONFIG_X86_LOCAL_APIC - x86_cpuinit.setup_percpu_clockev = + x86_cpuinit.early_percpu_clock_init = kvm_setup_secondary_clock; #endif + x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; + x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; machine_ops.shutdown = kvm_shutdown; #ifdef CONFIG_KEXEC machine_ops.crash_shutdown = kvm_crash_shutdown; diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index ea697263b37..ebc98739892 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -15,7 +15,6 @@ #include <linux/vmalloc.h> #include <linux/uaccess.h> -#include <asm/system.h> #include <asm/ldt.h> #include <asm/desc.h> #include <asm/mmu_context.h> diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index a3fa43ba5d3..5b19e4d78b0 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -23,7 +23,6 @@ #include <asm/apic.h> #include <asm/cpufeature.h> #include <asm/desc.h> -#include <asm/system.h> #include <asm/cacheflush.h> #include <asm/debugreg.h> diff --git a/arch/x86/kernel/mca_32.c b/arch/x86/kernel/mca_32.c index 177183cbb6a..7eb1e2b9782 100644 --- a/arch/x86/kernel/mca_32.c +++ b/arch/x86/kernel/mca_32.c @@ -43,7 +43,6 @@ #include <linux/mca.h> #include <linux/kprobes.h> #include <linux/slab.h> -#include <asm/system.h> #include <asm/io.h> #include <linux/proc_fs.h> #include <linux/mman.h> diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 50a58754da1..c9bda6d6035 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -86,6 +86,7 @@ #include <asm/microcode.h> #include <asm/processor.h> +#include <asm/cpu_device_id.h> MODULE_DESCRIPTION("Microcode Update Driver"); MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); @@ -502,6 +503,20 @@ static struct notifier_block __refdata mc_cpu_notifier = { .notifier_call = mc_cpu_callback, }; +#ifdef MODULE +/* Autoload on Intel and AMD systems */ +static const struct x86_cpu_id microcode_id[] = { +#ifdef CONFIG_MICROCODE_INTEL + { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, }, +#endif +#ifdef CONFIG_MICROCODE_AMD + { X86_VENDOR_AMD, X86_FAMILY_ANY, X86_MODEL_ANY, }, +#endif + {} +}; +MODULE_DEVICE_TABLE(x86cpu, microcode_id); +#endif + static int __init microcode_init(void) { struct cpuinfo_x86 *c = &cpu_data(0); diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 3ca42d0e43a..0327e2b3c40 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -147,12 +147,6 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) memset(csig, 0, sizeof(*csig)); - if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || - cpu_has(c, X86_FEATURE_IA64)) { - pr_err("CPU%d not a capable Intel processor\n", cpu_num); - return -1; - } - csig->sig = cpuid_eax(0x00000001); if ((c->x86_model >= 5) || (c->x86 > 6)) { @@ -463,6 +457,14 @@ static struct microcode_ops microcode_intel_ops = { struct microcode_ops * __init init_intel_microcode(void) { + struct cpuinfo_x86 *c = &cpu_data(0); + + if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || + cpu_has(c, X86_FEATURE_IA64)) { + pr_err("Intel CPU family 0x%x not supported\n", c->x86); + return NULL; + } + return µcode_intel_ops; } diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 925179f871d..f21fd94ac89 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -26,7 +26,6 @@ #include <linux/gfp.h> #include <linux/jump_label.h> -#include <asm/system.h> #include <asm/page.h> #include <asm/pgtable.h> diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 96356762a51..eb113693f04 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -40,7 +40,6 @@ #include <asm/processor.h> #include <asm/msr.h> -#include <asm/system.h> static struct class *msr_class; diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c index 0d01a8ea4e1..2c39dcd510f 100644 --- a/arch/x86/kernel/nmi_selftest.c +++ b/arch/x86/kernel/nmi_selftest.c @@ -12,6 +12,7 @@ #include <linux/smp.h> #include <linux/cpumask.h> #include <linux/delay.h> +#include <linux/init.h> #include <asm/apic.h> #include <asm/nmi.h> @@ -20,35 +21,35 @@ #define FAILURE 1 #define TIMEOUT 2 -static int nmi_fail; +static int __initdata nmi_fail; /* check to see if NMI IPIs work on this machine */ -static DECLARE_BITMAP(nmi_ipi_mask, NR_CPUS) __read_mostly; +static DECLARE_BITMAP(nmi_ipi_mask, NR_CPUS) __initdata; -static int testcase_total; -static int testcase_successes; -static int expected_testcase_failures; -static int unexpected_testcase_failures; -static int unexpected_testcase_unknowns; +static int __initdata testcase_total; +static int __initdata testcase_successes; +static int __initdata expected_testcase_failures; +static int __initdata unexpected_testcase_failures; +static int __initdata unexpected_testcase_unknowns; -static int nmi_unk_cb(unsigned int val, struct pt_regs *regs) +static int __init nmi_unk_cb(unsigned int val, struct pt_regs *regs) { unexpected_testcase_unknowns++; return NMI_HANDLED; } -static void init_nmi_testsuite(void) +static void __init init_nmi_testsuite(void) { /* trap all the unknown NMIs we may generate */ register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk"); } -static void cleanup_nmi_testsuite(void) +static void __init cleanup_nmi_testsuite(void) { unregister_nmi_handler(NMI_UNKNOWN, "nmi_selftest_unk"); } -static int test_nmi_ipi_callback(unsigned int val, struct pt_regs *regs) +static int __init test_nmi_ipi_callback(unsigned int val, struct pt_regs *regs) { int cpu = raw_smp_processor_id(); @@ -58,7 +59,7 @@ static int test_nmi_ipi_callback(unsigned int val, struct pt_regs *regs) return NMI_DONE; } -static void test_nmi_ipi(struct cpumask *mask) +static void __init test_nmi_ipi(struct cpumask *mask) { unsigned long timeout; @@ -86,7 +87,7 @@ static void test_nmi_ipi(struct cpumask *mask) return; } -static void remote_ipi(void) +static void __init remote_ipi(void) { cpumask_copy(to_cpumask(nmi_ipi_mask), cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask)); @@ -94,19 +95,19 @@ static void remote_ipi(void) test_nmi_ipi(to_cpumask(nmi_ipi_mask)); } -static void local_ipi(void) +static void __init local_ipi(void) { cpumask_clear(to_cpumask(nmi_ipi_mask)); cpumask_set_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask)); test_nmi_ipi(to_cpumask(nmi_ipi_mask)); } -static void reset_nmi(void) +static void __init reset_nmi(void) { nmi_fail = 0; } -static void dotest(void (*testcase_fn)(void), int expected) +static void __init dotest(void (*testcase_fn)(void), int expected) { testcase_fn(); /* @@ -131,12 +132,12 @@ static void dotest(void (*testcase_fn)(void), int expected) reset_nmi(); } -static inline void print_testname(const char *testname) +static inline void __init print_testname(const char *testname) { printk("%12s:", testname); } -void nmi_selftest(void) +void __init nmi_selftest(void) { init_nmi_testsuite(); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index cc594fca64d..ab137605e69 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -26,6 +26,7 @@ #include <asm/bug.h> #include <asm/paravirt.h> +#include <asm/debugreg.h> #include <asm/desc.h> #include <asm/setup.h> #include <asm/pgtable.h> @@ -37,6 +38,7 @@ #include <asm/apic.h> #include <asm/tlbflush.h> #include <asm/timer.h> +#include <asm/special_insns.h> /* nop stub */ void _paravirt_nop(void) @@ -202,8 +204,8 @@ static void native_flush_tlb_single(unsigned long addr) __native_flush_tlb_single(addr); } -struct jump_label_key paravirt_steal_enabled; -struct jump_label_key paravirt_steal_rq_enabled; +struct static_key paravirt_steal_enabled; +struct static_key paravirt_steal_rq_enabled; static u64 native_steal_clock(int cpu) { @@ -358,9 +360,6 @@ struct pv_cpu_ops pv_cpu_ops = { .read_tscp = native_read_tscp, .load_tr_desc = native_load_tr_desc, .set_ldt = native_set_ldt, -#ifdef CONFIG_X86_32 - .load_user_cs_desc = native_load_user_cs_desc, -#endif /*CONFIG_X86_32*/ .load_gdt = native_load_gdt, .load_idt = native_load_idt, .store_gdt = native_store_gdt, diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 726494b5834..d0b2fb9ccbb 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -42,7 +42,6 @@ #include <asm/calgary.h> #include <asm/tce.h> #include <asm/pci-direct.h> -#include <asm/system.h> #include <asm/dma.h> #include <asm/rio.h> #include <asm/bios_ebda.h> @@ -431,7 +430,7 @@ static void calgary_unmap_page(struct device *dev, dma_addr_t dma_addr, } static void* calgary_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag) + dma_addr_t *dma_handle, gfp_t flag, struct dma_attrs *attrs) { void *ret = NULL; dma_addr_t mapping; @@ -464,7 +463,8 @@ error: } static void calgary_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle) + void *vaddr, dma_addr_t dma_handle, + struct dma_attrs *attrs) { unsigned int npages; struct iommu_table *tbl = find_iommu_table(dev); @@ -477,8 +477,8 @@ static void calgary_free_coherent(struct device *dev, size_t size, } static struct dma_map_ops calgary_dma_ops = { - .alloc_coherent = calgary_alloc_coherent, - .free_coherent = calgary_free_coherent, + .alloc = calgary_alloc_coherent, + .free = calgary_free_coherent, .map_sg = calgary_map_sg, .unmap_sg = calgary_unmap_sg, .map_page = calgary_map_page, diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 1c4d769e21e..3003250ac51 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -96,7 +96,8 @@ void __init pci_iommu_alloc(void) } } void *dma_generic_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t flag) + dma_addr_t *dma_addr, gfp_t flag, + struct dma_attrs *attrs) { unsigned long dma_mask; struct page *page; @@ -262,10 +263,11 @@ rootfs_initcall(pci_iommu_init); static __devinit void via_no_dac(struct pci_dev *dev) { - if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { + if (forbid_dac == 0) { dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n"); forbid_dac = 1; } } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); +DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, + PCI_CLASS_BRIDGE_PCI, 8, via_no_dac); #endif diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 3af4af810c0..f96050685b4 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -75,7 +75,7 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, } static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr) + dma_addr_t dma_addr, struct dma_attrs *attrs) { free_pages((unsigned long)vaddr, get_order(size)); } @@ -96,8 +96,8 @@ static void nommu_sync_sg_for_device(struct device *dev, } struct dma_map_ops nommu_dma_ops = { - .alloc_coherent = dma_generic_alloc_coherent, - .free_coherent = nommu_free_coherent, + .alloc = dma_generic_alloc_coherent, + .free = nommu_free_coherent, .map_sg = nommu_map_sg, .map_page = nommu_map_page, .sync_single_for_device = nommu_sync_single_for_device, diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 8f972cbddef..6c483ba98b9 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -15,21 +15,30 @@ int swiotlb __read_mostly; static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags) + dma_addr_t *dma_handle, gfp_t flags, + struct dma_attrs *attrs) { void *vaddr; - vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags); + vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags, + attrs); if (vaddr) return vaddr; return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags); } +static void x86_swiotlb_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_addr, + struct dma_attrs *attrs) +{ + swiotlb_free_coherent(dev, size, vaddr, dma_addr); +} + static struct dma_map_ops swiotlb_dma_ops = { .mapping_error = swiotlb_dma_mapping_error, - .alloc_coherent = x86_swiotlb_alloc_coherent, - .free_coherent = swiotlb_free_coherent, + .alloc = x86_swiotlb_alloc_coherent, + .free = x86_swiotlb_free_coherent, .sync_single_for_cpu = swiotlb_sync_single_for_cpu, .sync_single_for_device = swiotlb_sync_single_for_device, .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index 34e06e84ce3..0bc72e2069e 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -12,6 +12,7 @@ #include <linux/pci.h> #include <linux/export.h> +#include <asm/probe_roms.h> #include <asm/pci-direct.h> #include <asm/e820.h> #include <asm/mmzone.h> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 8a5fe440718..1d92a5ab6e8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -12,16 +12,37 @@ #include <linux/user-return-notifier.h> #include <linux/dmi.h> #include <linux/utsname.h> +#include <linux/stackprotector.h> +#include <linux/tick.h> +#include <linux/cpuidle.h> #include <trace/events/power.h> #include <linux/hw_breakpoint.h> #include <asm/cpu.h> -#include <asm/system.h> #include <asm/apic.h> #include <asm/syscalls.h> #include <asm/idle.h> #include <asm/uaccess.h> #include <asm/i387.h> +#include <asm/fpu-internal.h> #include <asm/debugreg.h> +#include <asm/nmi.h> + +#ifdef CONFIG_X86_64 +static DEFINE_PER_CPU(unsigned char, is_idle); +static ATOMIC_NOTIFIER_HEAD(idle_notifier); + +void idle_notifier_register(struct notifier_block *n) +{ + atomic_notifier_chain_register(&idle_notifier, n); +} +EXPORT_SYMBOL_GPL(idle_notifier_register); + +void idle_notifier_unregister(struct notifier_block *n) +{ + atomic_notifier_chain_unregister(&idle_notifier, n); +} +EXPORT_SYMBOL_GPL(idle_notifier_unregister); +#endif struct kmem_cache *task_xstate_cachep; EXPORT_SYMBOL_GPL(task_xstate_cachep); @@ -341,44 +362,113 @@ void (*pm_idle)(void); EXPORT_SYMBOL(pm_idle); #endif -#ifdef CONFIG_X86_32 -/* - * This halt magic was a workaround for ancient floppy DMA - * wreckage. It should be safe to remove. - */ -static int hlt_counter; -void disable_hlt(void) +static inline int hlt_use_halt(void) { - hlt_counter++; + return 1; } -EXPORT_SYMBOL(disable_hlt); -void enable_hlt(void) +#ifndef CONFIG_SMP +static inline void play_dead(void) { - hlt_counter--; + BUG(); } -EXPORT_SYMBOL(enable_hlt); +#endif -static inline int hlt_use_halt(void) +#ifdef CONFIG_X86_64 +void enter_idle(void) { - return (!hlt_counter && boot_cpu_data.hlt_works_ok); + percpu_write(is_idle, 1); + atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); } -#else -static inline int hlt_use_halt(void) + +static void __exit_idle(void) { - return 1; + if (x86_test_and_clear_bit_percpu(0, is_idle) == 0) + return; + atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); +} + +/* Called from interrupts to signify idle end */ +void exit_idle(void) +{ + /* idle loop has pid 0 */ + if (current->pid) + return; + __exit_idle(); } #endif /* + * The idle thread. There's no useful work to be + * done, so just try to conserve power and have a + * low exit latency (ie sit in a loop waiting for + * somebody to say that they'd like to reschedule) + */ +void cpu_idle(void) +{ + /* + * If we're the non-boot CPU, nothing set the stack canary up + * for us. CPU0 already has it initialized but no harm in + * doing it again. This is a good place for updating it, as + * we wont ever return from this function (so the invalid + * canaries already on the stack wont ever trigger). + */ + boot_init_stack_canary(); + current_thread_info()->status |= TS_POLLING; + + while (1) { + tick_nohz_idle_enter(); + + while (!need_resched()) { + rmb(); + + if (cpu_is_offline(smp_processor_id())) + play_dead(); + + /* + * Idle routines should keep interrupts disabled + * from here on, until they go to idle. + * Otherwise, idle callbacks can misfire. + */ + local_touch_nmi(); + local_irq_disable(); + + enter_idle(); + + /* Don't trace irqs off for idle */ + stop_critical_timings(); + + /* enter_idle() needs rcu for notifiers */ + rcu_idle_enter(); + + if (cpuidle_idle_call()) + pm_idle(); + + rcu_idle_exit(); + start_critical_timings(); + + /* In many cases the interrupt that ended idle + has already called exit_idle. But some idle + loops can be woken up without interrupt. */ + __exit_idle(); + } + + tick_nohz_idle_exit(); + preempt_enable_no_resched(); + schedule(); + preempt_disable(); + } +} + +/* * We use this if we don't have any better * idle routine.. */ void default_idle(void) { if (hlt_use_halt()) { - trace_power_start(POWER_CSTATE, 1, smp_processor_id()); - trace_cpu_idle(1, smp_processor_id()); + trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id()); + trace_cpu_idle_rcuidle(1, smp_processor_id()); current_thread_info()->status &= ~TS_POLLING; /* * TS_POLLING-cleared state must be visible before we @@ -391,8 +481,8 @@ void default_idle(void) else local_irq_enable(); current_thread_info()->status |= TS_POLLING; - trace_power_end(smp_processor_id()); - trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); + trace_power_end_rcuidle(smp_processor_id()); + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } else { local_irq_enable(); /* loop is done by the caller */ @@ -450,8 +540,8 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); static void mwait_idle(void) { if (!need_resched()) { - trace_power_start(POWER_CSTATE, 1, smp_processor_id()); - trace_cpu_idle(1, smp_processor_id()); + trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id()); + trace_cpu_idle_rcuidle(1, smp_processor_id()); if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); @@ -461,8 +551,8 @@ static void mwait_idle(void) __sti_mwait(0, 0); else local_irq_enable(); - trace_power_end(smp_processor_id()); - trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); + trace_power_end_rcuidle(smp_processor_id()); + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } else local_irq_enable(); } @@ -474,13 +564,13 @@ static void mwait_idle(void) */ static void poll_idle(void) { - trace_power_start(POWER_CSTATE, 0, smp_processor_id()); - trace_cpu_idle(0, smp_processor_id()); + trace_power_start_rcuidle(POWER_CSTATE, 0, smp_processor_id()); + trace_cpu_idle_rcuidle(0, smp_processor_id()); local_irq_enable(); while (!need_resched()) cpu_relax(); - trace_power_end(smp_processor_id()); - trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); + trace_power_end_rcuidle(smp_processor_id()); + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } /* @@ -663,16 +753,6 @@ unsigned long arch_align_stack(unsigned long sp) unsigned long arch_randomize_brk(struct mm_struct *mm) { unsigned long range_end = mm->brk + 0x02000000; - unsigned long bump = 0; -#ifdef CONFIG_X86_32 - /* in the case of NX emulation, shove the brk segment way out of the - way of the exec randomization area, since it can collide with - future allocations if not. */ - if ( (mm->get_unmapped_exec_area == arch_get_unmapped_exec_area) && - (mm->brk < 0x08000000) ) { - bump = (TASK_SIZE/6); - } -#endif - return bump + (randomize_range(mm->brk, range_end, 0) ? : mm->brk); + return randomize_range(mm->brk, range_end, 0) ? : mm->brk; } diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f9c2c9d9e2b..ae6847303e2 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -9,7 +9,6 @@ * This file handles the architecture-dependent parts of process handling.. */ -#include <linux/stackprotector.h> #include <linux/cpu.h> #include <linux/errno.h> #include <linux/sched.h> @@ -31,20 +30,18 @@ #include <linux/kallsyms.h> #include <linux/ptrace.h> #include <linux/personality.h> -#include <linux/tick.h> #include <linux/percpu.h> #include <linux/prctl.h> #include <linux/ftrace.h> #include <linux/uaccess.h> #include <linux/io.h> #include <linux/kdebug.h> -#include <linux/cpuidle.h> #include <asm/pgtable.h> -#include <asm/system.h> #include <asm/ldt.h> #include <asm/processor.h> #include <asm/i387.h> +#include <asm/fpu-internal.h> #include <asm/desc.h> #ifdef CONFIG_MATH_EMULATION #include <asm/math_emu.h> @@ -57,7 +54,7 @@ #include <asm/idle.h> #include <asm/syscalls.h> #include <asm/debugreg.h> -#include <asm/nmi.h> +#include <asm/switch_to.h> asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -69,62 +66,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk) return ((unsigned long *)tsk->thread.sp)[3]; } -#ifndef CONFIG_SMP -static inline void play_dead(void) -{ - BUG(); -} -#endif - -/* - * The idle thread. There's no useful work to be - * done, so just try to conserve power and have a - * low exit latency (ie sit in a loop waiting for - * somebody to say that they'd like to reschedule) - */ -void cpu_idle(void) -{ - int cpu = smp_processor_id(); - - /* - * If we're the non-boot CPU, nothing set the stack canary up - * for us. CPU0 already has it initialized but no harm in - * doing it again. This is a good place for updating it, as - * we wont ever return from this function (so the invalid - * canaries already on the stack wont ever trigger). - */ - boot_init_stack_canary(); - - current_thread_info()->status |= TS_POLLING; - - /* endless idle loop with no priority at all */ - while (1) { - tick_nohz_idle_enter(); - rcu_idle_enter(); - while (!need_resched()) { - - check_pgt_cache(); - rmb(); - - if (cpu_is_offline(cpu)) - play_dead(); - - local_touch_nmi(); - local_irq_disable(); - /* Don't trace irqs off for idle */ - stop_critical_timings(); - if (cpuidle_idle_call()) - pm_idle(); - start_critical_timings(); - } - rcu_idle_exit(); - tick_nohz_idle_exit(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); - } -} - void __show_regs(struct pt_regs *regs, int all) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; @@ -250,10 +191,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, void start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) { - int cpu; - set_user_gs(regs, 0); - regs->fs = 0; regs->ds = __USER_DS; regs->es = __USER_DS; @@ -261,11 +199,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) regs->cs = __USER_CS; regs->ip = new_ip; regs->sp = new_sp; - - cpu = get_cpu(); - load_user_cs_desc(cpu, current->mm); - put_cpu(); - /* * Free the old FP and other extended state */ @@ -331,9 +264,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ lazy_save_gs(prev->gs); - if (next_p->mm) - load_user_cs_desc(cpu, next_p->mm); - /* * Load the per-thread Thread-Local Storage descriptor. */ @@ -404,40 +334,3 @@ unsigned long get_wchan(struct task_struct *p) return 0; } -static void modify_cs(struct mm_struct *mm, unsigned long limit) -{ - mm->context.exec_limit = limit; - set_user_cs(&mm->context.user_cs, limit); - if (mm == current->mm) { - int cpu; - - cpu = get_cpu(); - load_user_cs_desc(cpu, mm); - put_cpu(); - } -} - -void arch_add_exec_range(struct mm_struct *mm, unsigned long limit) -{ - if (limit > mm->context.exec_limit) - modify_cs(mm, limit); -} - -void arch_remove_exec_range(struct mm_struct *mm, unsigned long old_end) -{ - struct vm_area_struct *vma; - unsigned long limit = PAGE_SIZE; - - if (old_end == mm->context.exec_limit) { - for (vma = mm->mmap; vma; vma = vma->vm_next) - if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit)) - limit = vma->vm_end; - modify_cs(mm, limit); - } -} - -void arch_flush_exec_range(struct mm_struct *mm) -{ - mm->context.exec_limit = 0; - set_user_cs(&mm->context.user_cs, 0); -} diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index cfa5c90c01d..43d8b48b23e 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -14,7 +14,6 @@ * This file handles the architecture-dependent parts of process handling.. */ -#include <linux/stackprotector.h> #include <linux/cpu.h> #include <linux/errno.h> #include <linux/sched.h> @@ -32,17 +31,15 @@ #include <linux/notifier.h> #include <linux/kprobes.h> #include <linux/kdebug.h> -#include <linux/tick.h> #include <linux/prctl.h> #include <linux/uaccess.h> #include <linux/io.h> #include <linux/ftrace.h> -#include <linux/cpuidle.h> #include <asm/pgtable.h> -#include <asm/system.h> #include <asm/processor.h> #include <asm/i387.h> +#include <asm/fpu-internal.h> #include <asm/mmu_context.h> #include <asm/prctl.h> #include <asm/desc.h> @@ -51,116 +48,11 @@ #include <asm/idle.h> #include <asm/syscalls.h> #include <asm/debugreg.h> -#include <asm/nmi.h> +#include <asm/switch_to.h> asmlinkage extern void ret_from_fork(void); DEFINE_PER_CPU(unsigned long, old_rsp); -static DEFINE_PER_CPU(unsigned char, is_idle); - -static ATOMIC_NOTIFIER_HEAD(idle_notifier); - -void idle_notifier_register(struct notifier_block *n) -{ - atomic_notifier_chain_register(&idle_notifier, n); -} -EXPORT_SYMBOL_GPL(idle_notifier_register); - -void idle_notifier_unregister(struct notifier_block *n) -{ - atomic_notifier_chain_unregister(&idle_notifier, n); -} -EXPORT_SYMBOL_GPL(idle_notifier_unregister); - -void enter_idle(void) -{ - percpu_write(is_idle, 1); - atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); -} - -static void __exit_idle(void) -{ - if (x86_test_and_clear_bit_percpu(0, is_idle) == 0) - return; - atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); -} - -/* Called from interrupts to signify idle end */ -void exit_idle(void) -{ - /* idle loop has pid 0 */ - if (current->pid) - return; - __exit_idle(); -} - -#ifndef CONFIG_SMP -static inline void play_dead(void) -{ - BUG(); -} -#endif - -/* - * The idle thread. There's no useful work to be - * done, so just try to conserve power and have a - * low exit latency (ie sit in a loop waiting for - * somebody to say that they'd like to reschedule) - */ -void cpu_idle(void) -{ - current_thread_info()->status |= TS_POLLING; - - /* - * If we're the non-boot CPU, nothing set the stack canary up - * for us. CPU0 already has it initialized but no harm in - * doing it again. This is a good place for updating it, as - * we wont ever return from this function (so the invalid - * canaries already on the stack wont ever trigger). - */ - boot_init_stack_canary(); - - /* endless idle loop with no priority at all */ - while (1) { - tick_nohz_idle_enter(); - while (!need_resched()) { - - rmb(); - - if (cpu_is_offline(smp_processor_id())) - play_dead(); - /* - * Idle routines should keep interrupts disabled - * from here on, until they go to idle. - * Otherwise, idle callbacks can misfire. - */ - local_touch_nmi(); - local_irq_disable(); - enter_idle(); - /* Don't trace irqs off for idle */ - stop_critical_timings(); - - /* enter_idle() needs rcu for notifiers */ - rcu_idle_enter(); - - if (cpuidle_idle_call()) - pm_idle(); - - rcu_idle_exit(); - start_critical_timings(); - - /* In many cases the interrupt that ended idle - has already called exit_idle. But some idle - loops can be woken up without interrupt. */ - __exit_idle(); - } - - tick_nohz_idle_exit(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); - } -} /* Prints also some state that isn't saved in the pt_regs */ void __show_regs(struct pt_regs *regs, int all) @@ -342,6 +234,7 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip, loadsegment(es, _ds); loadsegment(ds, _ds); load_gs_index(0); + current->thread.usersp = new_sp; regs->ip = new_ip; regs->sp = new_sp; percpu_write(old_rsp, new_sp); @@ -365,7 +258,9 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp) { start_thread_common(regs, new_ip, new_sp, - __USER32_CS, __USER32_DS, __USER32_DS); + test_thread_flag(TIF_X32) + ? __USER_CS : __USER32_CS, + __USER_DS, __USER_DS); } #endif @@ -488,6 +383,8 @@ void set_personality_64bit(void) /* Make sure to be in 64bit mode */ clear_thread_flag(TIF_IA32); + clear_thread_flag(TIF_ADDR32); + clear_thread_flag(TIF_X32); /* Ensure the corresponding mm is not marked. */ if (current->mm) @@ -500,21 +397,33 @@ void set_personality_64bit(void) current->personality &= ~READ_IMPLIES_EXEC; } -void set_personality_ia32(void) +void set_personality_ia32(bool x32) { /* inherit personality from parent */ /* Make sure to be in 32bit mode */ - set_thread_flag(TIF_IA32); - current->personality |= force_personality32; + set_thread_flag(TIF_ADDR32); /* Mark the associated mm as containing 32-bit tasks. */ if (current->mm) current->mm->context.ia32_compat = 1; - /* Prepare the first "return" to user space */ - current_thread_info()->status |= TS_COMPAT; + if (x32) { + clear_thread_flag(TIF_IA32); + set_thread_flag(TIF_X32); + current->personality &= ~READ_IMPLIES_EXEC; + /* is_compat_task() uses the presence of the x32 + syscall bit flag to determine compat status */ + current_thread_info()->status &= ~TS_COMPAT; + } else { + set_thread_flag(TIF_IA32); + clear_thread_flag(TIF_X32); + current->personality |= force_personality32; + /* Prepare the first "return" to user space */ + current_thread_info()->status |= TS_COMPAT; + } } +EXPORT_SYMBOL_GPL(set_personality_ia32); unsigned long get_wchan(struct task_struct *p) { |