summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kbuild2
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/boot/compressed/eboot.c87
-rw-r--r--arch/x86/boot/compressed/eboot.h6
-rw-r--r--arch/x86/ia32/ia32_signal.c2
-rw-r--r--arch/x86/include/asm/acpi.h9
-rw-r--r--arch/x86/include/asm/bitops.h2
-rw-r--r--arch/x86/include/asm/ftrace.h2
-rw-r--r--arch/x86/include/asm/pgtable-3level.h50
-rw-r--r--arch/x86/include/asm/posix_types_32.h3
-rw-r--r--arch/x86/include/asm/processor.h7
-rw-r--r--arch/x86/include/asm/realmode.h62
-rw-r--r--arch/x86/include/asm/sighandling.h2
-rw-r--r--arch/x86/include/asm/sta2x11.h12
-rw-r--r--arch/x86/include/asm/thread_info.h18
-rw-r--r--arch/x86/include/asm/trampoline.h39
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/Makefile9
-rw-r--r--arch/x86/kernel/acpi/realmode/.gitignore3
-rw-r--r--arch/x86/kernel/acpi/realmode/Makefile59
-rw-r--r--arch/x86/kernel/acpi/realmode/bioscall.S1
-rw-r--r--arch/x86/kernel/acpi/realmode/copy.S1
-rw-r--r--arch/x86/kernel/acpi/realmode/regs.c1
-rw-r--r--arch/x86/kernel/acpi/realmode/video-bios.c1
-rw-r--r--arch/x86/kernel/acpi/realmode/video-mode.c1
-rw-r--r--arch/x86/kernel/acpi/realmode/video-vesa.c1
-rw-r--r--arch/x86/kernel/acpi/realmode/video-vga.c1
-rw-r--r--arch/x86/kernel/acpi/realmode/wakeup.lds.S62
-rw-r--r--arch/x86/kernel/acpi/sleep.c33
-rw-r--r--arch/x86/kernel/acpi/sleep.h2
-rw-r--r--arch/x86/kernel/acpi/wakeup_rm.S12
-rw-r--r--arch/x86/kernel/cpu/common.c8
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c37
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c2
-rw-r--r--arch/x86/kernel/e820.c53
-rw-r--r--arch/x86/kernel/entry_32.S13
-rw-r--r--arch/x86/kernel/entry_64.S44
-rw-r--r--arch/x86/kernel/ftrace.c102
-rw-r--r--arch/x86/kernel/head32.c1
-rw-r--r--arch/x86/kernel/head64.c1
-rw-r--r--arch/x86/kernel/head_32.S5
-rw-r--r--arch/x86/kernel/head_64.S4
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/mpparse.c11
-rw-r--r--arch/x86/kernel/nmi.c6
-rw-r--r--arch/x86/kernel/ptrace.c6
-rw-r--r--arch/x86/kernel/reboot.c25
-rw-r--r--arch/x86/kernel/setup.c22
-rw-r--r--arch/x86/kernel/signal.c62
-rw-r--r--arch/x86/kernel/smpboot.c18
-rw-r--r--arch/x86/kernel/tboot.c7
-rw-r--r--arch/x86/kernel/trampoline.c42
-rw-r--r--arch/x86/kernel/trampoline_32.S83
-rw-r--r--arch/x86/kernel/traps.c8
-rw-r--r--arch/x86/kernel/vmlinux.lds.S12
-rw-r--r--arch/x86/kvm/mmu.c3
-rw-r--r--arch/x86/mm/init.c16
-rw-r--r--arch/x86/mm/numa.c32
-rw-r--r--arch/x86/mm/numa_emulation.c4
-rw-r--r--arch/x86/mm/pat.c98
-rw-r--r--arch/x86/mm/srat.c5
-rw-r--r--arch/x86/realmode/Makefile18
-rw-r--r--arch/x86/realmode/init.c115
-rw-r--r--arch/x86/realmode/rm/.gitignore3
-rw-r--r--arch/x86/realmode/rm/Makefile82
-rw-r--r--arch/x86/realmode/rm/bioscall.S1
-rw-r--r--arch/x86/realmode/rm/copy.S1
-rw-r--r--arch/x86/realmode/rm/header.S41
-rw-r--r--arch/x86/realmode/rm/realmode.h21
-rw-r--r--arch/x86/realmode/rm/realmode.lds.S76
-rw-r--r--arch/x86/realmode/rm/reboot_32.S (renamed from arch/x86/kernel/reboot_32.S)89
-rw-r--r--arch/x86/realmode/rm/regs.c1
-rw-r--r--arch/x86/realmode/rm/stack.S19
-rw-r--r--arch/x86/realmode/rm/trampoline_32.S74
-rw-r--r--arch/x86/realmode/rm/trampoline_64.S (renamed from arch/x86/kernel/trampoline_64.S)148
-rw-r--r--arch/x86/realmode/rm/trampoline_common.S7
-rw-r--r--arch/x86/realmode/rm/video-bios.c1
-rw-r--r--arch/x86/realmode/rm/video-mode.c1
-rw-r--r--arch/x86/realmode/rm/video-vesa.c1
-rw-r--r--arch/x86/realmode/rm/video-vga.c1
-rw-r--r--arch/x86/realmode/rm/wakemain.c (renamed from arch/x86/kernel/acpi/realmode/wakemain.c)3
-rw-r--r--arch/x86/realmode/rm/wakeup.h (renamed from arch/x86/kernel/acpi/realmode/wakeup.h)10
-rw-r--r--arch/x86/realmode/rm/wakeup_asm.S (renamed from arch/x86/kernel/acpi/realmode/wakeup.S)131
-rw-r--r--arch/x86/realmode/rmpiggy.S20
-rw-r--r--arch/x86/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/syscalls/syscall_64.tbl2
-rw-r--r--arch/x86/tools/relocs.c7
-rw-r--r--arch/x86/um/signal.c2
-rw-r--r--arch/x86/xen/enlighten.c3
89 files changed, 1260 insertions, 845 deletions
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index 0e9dec6cadd1..e5287d8517aa 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -1,4 +1,3 @@
-
obj-$(CONFIG_KVM) += kvm/
# Xen paravirtualization support
@@ -7,6 +6,7 @@ obj-$(CONFIG_XEN) += xen/
# lguest paravirtualization support
obj-$(CONFIG_LGUEST_GUEST) += lguest/
+obj-y += realmode/
obj-y += kernel/
obj-y += mm/
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d700811785ea..c70684f859e1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1506,6 +1506,8 @@ config EFI_STUB
This kernel feature allows a bzImage to be loaded directly
by EFI firmware without the use of a bootloader.
+ See Documentation/x86/efi-stub.txt for more information.
+
config SECCOMP
def_bool y
prompt "Enable seccomp to safely compute untrusted bytecode"
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 2c14e76bb4c7..4e85f5f85837 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -16,6 +16,26 @@
static efi_system_table_t *sys_table;
+static void efi_printk(char *str)
+{
+ char *s8;
+
+ for (s8 = str; *s8; s8++) {
+ struct efi_simple_text_output_protocol *out;
+ efi_char16_t ch[2] = { 0 };
+
+ ch[0] = *s8;
+ out = (struct efi_simple_text_output_protocol *)sys_table->con_out;
+
+ if (*s8 == '\n') {
+ efi_char16_t nl[2] = { '\r', 0 };
+ efi_call_phys2(out->output_string, out, nl);
+ }
+
+ efi_call_phys2(out->output_string, out, ch);
+ }
+}
+
static efi_status_t __get_map(efi_memory_desc_t **map, unsigned long *map_size,
unsigned long *desc_size)
{
@@ -531,8 +551,10 @@ static efi_status_t handle_ramdisks(efi_loaded_image_t *image,
EFI_LOADER_DATA,
nr_initrds * sizeof(*initrds),
&initrds);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc mem for initrds\n");
goto fail;
+ }
str = (char *)(unsigned long)hdr->cmd_line_ptr;
for (i = 0; i < nr_initrds; i++) {
@@ -575,32 +597,42 @@ static efi_status_t handle_ramdisks(efi_loaded_image_t *image,
status = efi_call_phys3(boottime->handle_protocol,
image->device_handle, &fs_proto, &io);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to handle fs_proto\n");
goto free_initrds;
+ }
status = efi_call_phys2(io->open_volume, io, &fh);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to open volume\n");
goto free_initrds;
+ }
}
status = efi_call_phys5(fh->open, fh, &h, filename_16,
EFI_FILE_MODE_READ, (u64)0);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to open initrd file\n");
goto close_handles;
+ }
initrd->handle = h;
info_sz = 0;
status = efi_call_phys4(h->get_info, h, &info_guid,
&info_sz, NULL);
- if (status != EFI_BUFFER_TOO_SMALL)
+ if (status != EFI_BUFFER_TOO_SMALL) {
+ efi_printk("Failed to get initrd info size\n");
goto close_handles;
+ }
grow:
status = efi_call_phys3(sys_table->boottime->allocate_pool,
EFI_LOADER_DATA, info_sz, &info);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc mem for initrd info\n");
goto close_handles;
+ }
status = efi_call_phys4(h->get_info, h, &info_guid,
&info_sz, info);
@@ -612,8 +644,10 @@ grow:
file_sz = info->file_size;
efi_call_phys1(sys_table->boottime->free_pool, info);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to get initrd info\n");
goto close_handles;
+ }
initrd->size = file_sz;
initrd_total += file_sz;
@@ -629,11 +663,14 @@ grow:
*/
status = high_alloc(initrd_total, 0x1000,
&initrd_addr, hdr->initrd_addr_max);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc highmem for initrds\n");
goto close_handles;
+ }
/* We've run out of free low memory. */
if (initrd_addr > hdr->initrd_addr_max) {
+ efi_printk("We've run out of free low memory\n");
status = EFI_INVALID_PARAMETER;
goto free_initrd_total;
}
@@ -652,8 +689,10 @@ grow:
status = efi_call_phys3(fh->read,
initrds[j].handle,
&chunksize, addr);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to read initrd\n");
goto free_initrd_total;
+ }
addr += chunksize;
size -= chunksize;
}
@@ -674,7 +713,7 @@ free_initrd_total:
low_free(initrd_total, initrd_addr);
close_handles:
- for (k = j; k < nr_initrds; k++)
+ for (k = j; k < i; k++)
efi_call_phys1(fh->close, initrds[k].handle);
free_initrds:
efi_call_phys1(sys_table->boottime->free_pool, initrds);
@@ -732,8 +771,10 @@ static efi_status_t make_boot_params(struct boot_params *boot_params,
options_size++; /* NUL termination */
status = low_alloc(options_size, 1, &cmdline);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc mem for cmdline\n");
goto fail;
+ }
s1 = (u8 *)(unsigned long)cmdline;
s2 = (u16 *)options;
@@ -895,12 +936,16 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table)
status = efi_call_phys3(sys_table->boottime->handle_protocol,
handle, &proto, (void *)&image);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n");
goto fail;
+ }
status = low_alloc(0x4000, 1, (unsigned long *)&boot_params);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc lowmem for boot params\n");
goto fail;
+ }
memset(boot_params, 0x0, 0x4000);
@@ -933,8 +978,10 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table)
if (status != EFI_SUCCESS) {
status = low_alloc(hdr->init_size, hdr->kernel_alignment,
&start);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc mem for kernel\n");
goto fail;
+ }
}
hdr->code32_start = (__u32)start;
@@ -945,19 +992,25 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table)
status = efi_call_phys3(sys_table->boottime->allocate_pool,
EFI_LOADER_DATA, sizeof(*gdt),
(void **)&gdt);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc mem for gdt structure\n");
goto fail;
+ }
gdt->size = 0x800;
status = low_alloc(gdt->size, 8, (unsigned long *)&gdt->address);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc mem for gdt\n");
goto fail;
+ }
status = efi_call_phys3(sys_table->boottime->allocate_pool,
EFI_LOADER_DATA, sizeof(*idt),
(void **)&idt);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc mem for idt structure\n");
goto fail;
+ }
idt->size = 0;
idt->address = 0;
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
index 39251663e65b..3b6e15627c55 100644
--- a/arch/x86/boot/compressed/eboot.h
+++ b/arch/x86/boot/compressed/eboot.h
@@ -58,4 +58,10 @@ struct efi_uga_draw_protocol {
void *blt;
};
+struct efi_simple_text_output_protocol {
+ void *reset;
+ void *output_string;
+ void *test_string;
+};
+
#endif /* BOOT_COMPRESSED_EBOOT_H */
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 98bd70faccc5..daeca56211e3 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -273,7 +273,6 @@ asmlinkage long sys32_sigreturn(struct pt_regs *regs)
sizeof(frame->extramask))))
goto badframe;
- sigdelsetmask(&set, ~_BLOCKABLE);
set_current_blocked(&set);
if (ia32_restore_sigcontext(regs, &frame->sc, &ax))
@@ -299,7 +298,6 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
- sigdelsetmask(&set, ~_BLOCKABLE);
set_current_blocked(&set);
if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 610001d385dd..0c44630d1789 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -29,7 +29,7 @@
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/mpspec.h>
-#include <asm/trampoline.h>
+#include <asm/realmode.h>
#define COMPILER_DEPENDENT_INT64 long long
#define COMPILER_DEPENDENT_UINT64 unsigned long long
@@ -117,11 +117,8 @@ static inline void acpi_disable_pci(void)
/* Low-level suspend routine. */
extern int acpi_suspend_lowlevel(void);
-extern const unsigned char acpi_wakeup_code[];
-#define acpi_wakeup_address (__pa(TRAMPOLINE_SYM(acpi_wakeup_code)))
-
-/* early initialization routine */
-extern void acpi_reserve_wakeup_memory(void);
+/* Physical address to resume after wakeup */
+#define acpi_wakeup_address ((unsigned long)(real_mode_header->wakeup_start))
/*
* Check if the CPU can handle C2 and deeper
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index b97596e2b68c..a6983b277220 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -15,6 +15,8 @@
#include <linux/compiler.h>
#include <asm/alternative.h>
+#define BIT_64(n) (U64_C(1) << (n))
+
/*
* These have to be done with inline assembly: that way the bit-setting
* is guaranteed to be atomic. All bit operations return 0 if the bit
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 18d9005d9e4f..b0767bc08740 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -34,7 +34,7 @@
#ifndef __ASSEMBLY__
extern void mcount(void);
-extern int modifying_ftrace_code;
+extern atomic_t modifying_ftrace_code;
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index effff47a3c82..43876f16caf1 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -31,6 +31,56 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte)
ptep->pte_low = pte.pte_low;
}
+#define pmd_read_atomic pmd_read_atomic
+/*
+ * pte_offset_map_lock on 32bit PAE kernels was reading the pmd_t with
+ * a "*pmdp" dereference done by gcc. Problem is, in certain places
+ * where pte_offset_map_lock is called, concurrent page faults are
+ * allowed, if the mmap_sem is hold for reading. An example is mincore
+ * vs page faults vs MADV_DONTNEED. On the page fault side
+ * pmd_populate rightfully does a set_64bit, but if we're reading the
+ * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen
+ * because gcc will not read the 64bit of the pmd atomically. To fix
+ * this all places running pmd_offset_map_lock() while holding the
+ * mmap_sem in read mode, shall read the pmdp pointer using this
+ * function to know if the pmd is null nor not, and in turn to know if
+ * they can run pmd_offset_map_lock or pmd_trans_huge or other pmd
+ * operations.
+ *
+ * Without THP if the mmap_sem is hold for reading, the
+ * pmd can only transition from null to not null while pmd_read_atomic runs.
+ * So there's no need of literally reading it atomically.
+ *
+ * With THP if the mmap_sem is hold for reading, the pmd can become
+ * THP or null or point to a pte (and in turn become "stable") at any
+ * time under pmd_read_atomic, so it's mandatory to read it atomically
+ * with cmpxchg8b.
+ */
+#ifndef CONFIG_TRANSPARENT_HUGEPAGE
+static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
+{
+ pmdval_t ret;
+ u32 *tmp = (u32 *)pmdp;
+
+ ret = (pmdval_t) (*tmp);
+ if (ret) {
+ /*
+ * If the low part is null, we must not read the high part
+ * or we can end up with a partial pmd.
+ */
+ smp_rmb();
+ ret |= ((pmdval_t)*(tmp + 1)) << 32;
+ }
+
+ return (pmd_t) { ret };
+}
+#else /* CONFIG_TRANSPARENT_HUGEPAGE */
+static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
+{
+ return (pmd_t) { atomic64_read((atomic64_t *)pmdp) };
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
{
set_64bit((unsigned long long *)(ptep), native_pte_val(pte));
diff --git a/arch/x86/include/asm/posix_types_32.h b/arch/x86/include/asm/posix_types_32.h
index 99f262e04b91..8e525059e7d8 100644
--- a/arch/x86/include/asm/posix_types_32.h
+++ b/arch/x86/include/asm/posix_types_32.h
@@ -10,9 +10,6 @@
typedef unsigned short __kernel_mode_t;
#define __kernel_mode_t __kernel_mode_t
-typedef unsigned short __kernel_nlink_t;
-#define __kernel_nlink_t __kernel_nlink_t
-
typedef unsigned short __kernel_ipc_pid_t;
#define __kernel_ipc_pid_t __kernel_ipc_pid_t
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 7745b257f035..39bc5777211a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -544,13 +544,16 @@ static inline void load_sp0(struct tss_struct *tss,
* enable), so that any CPU's that boot up
* after us can get the correct flags.
*/
-extern unsigned long mmu_cr4_features;
+extern unsigned long mmu_cr4_features;
+extern u32 *trampoline_cr4_features;
static inline void set_in_cr4(unsigned long mask)
{
unsigned long cr4;
mmu_cr4_features |= mask;
+ if (trampoline_cr4_features)
+ *trampoline_cr4_features = mmu_cr4_features;
cr4 = read_cr4();
cr4 |= mask;
write_cr4(cr4);
@@ -561,6 +564,8 @@ static inline void clear_in_cr4(unsigned long mask)
unsigned long cr4;
mmu_cr4_features &= ~mask;
+ if (trampoline_cr4_features)
+ *trampoline_cr4_features = mmu_cr4_features;
cr4 = read_cr4();
cr4 &= ~mask;
write_cr4(cr4);
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
new file mode 100644
index 000000000000..fce3f4ae5bd6
--- /dev/null
+++ b/arch/x86/include/asm/realmode.h
@@ -0,0 +1,62 @@
+#ifndef _ARCH_X86_REALMODE_H
+#define _ARCH_X86_REALMODE_H
+
+#include <linux/types.h>
+#include <asm/io.h>
+
+/* This must match data at realmode.S */
+struct real_mode_header {
+ u32 text_start;
+ u32 ro_end;
+ /* SMP trampoline */
+ u32 trampoline_start;
+ u32 trampoline_status;
+ u32 trampoline_header;
+#ifdef CONFIG_X86_64
+ u32 trampoline_pgd;
+#endif
+ /* ACPI S3 wakeup */
+#ifdef CONFIG_ACPI_SLEEP
+ u32 wakeup_start;
+ u32 wakeup_header;
+#endif
+ /* APM/BIOS reboot */
+#ifdef CONFIG_X86_32
+ u32 machine_real_restart_asm;
+#endif
+};
+
+/* This must match data at trampoline_32/64.S */
+struct trampoline_header {
+#ifdef CONFIG_X86_32
+ u32 start;
+ u16 gdt_pad;
+ u16 gdt_limit;
+ u32 gdt_base;
+#else
+ u64 start;
+ u64 efer;
+ u32 cr4;
+#endif
+};
+
+extern struct real_mode_header *real_mode_header;
+extern unsigned char real_mode_blob_end[];
+
+extern unsigned long init_rsp;
+extern unsigned long initial_code;
+extern unsigned long initial_gs;
+
+extern unsigned char real_mode_blob[];
+extern unsigned char real_mode_relocs[];
+
+#ifdef CONFIG_X86_32
+extern unsigned char startup_32_smp[];
+extern unsigned char boot_gdt[];
+#else
+extern unsigned char secondary_startup_64[];
+#endif
+
+extern void __init setup_real_mode(void);
+
+#endif /* _ARCH_X86_REALMODE_H */
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index ada93b3b8c66..beff97f7df37 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -7,8 +7,6 @@
#include <asm/processor-flags.h>
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
#define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \
X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \
X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
diff --git a/arch/x86/include/asm/sta2x11.h b/arch/x86/include/asm/sta2x11.h
new file mode 100644
index 000000000000..e9d32df89ccc
--- /dev/null
+++ b/arch/x86/include/asm/sta2x11.h
@@ -0,0 +1,12 @@
+/*
+ * Header file for STMicroelectronics ConneXt (STA2X11) IOHub
+ */
+#ifndef __ASM_STA2X11_H
+#define __ASM_STA2X11_H
+
+#include <linux/pci.h>
+
+/* This needs to be called from the MFD to configure its sub-devices */
+struct sta2x11_instance *sta2x11_get_instance(struct pci_dev *pdev);
+
+#endif /* __ASM_STA2X11_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 5c25de07cba8..89f794f007ec 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -248,7 +248,23 @@ static inline void set_restore_sigmask(void)
{
struct thread_info *ti = current_thread_info();
ti->status |= TS_RESTORE_SIGMASK;
- set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags);
+ WARN_ON(!test_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags));
+}
+static inline void clear_restore_sigmask(void)
+{
+ current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
+}
+static inline bool test_restore_sigmask(void)
+{
+ return current_thread_info()->status & TS_RESTORE_SIGMASK;
+}
+static inline bool test_and_clear_restore_sigmask(void)
+{
+ struct thread_info *ti = current_thread_info();
+ if (!(ti->status & TS_RESTORE_SIGMASK))
+ return false;
+ ti->status &= ~TS_RESTORE_SIGMASK;
+ return true;
}
static inline bool is_ia32_task(void)
diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h
deleted file mode 100644
index feca3118a73b..000000000000
--- a/arch/x86/include/asm/trampoline.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef _ASM_X86_TRAMPOLINE_H
-#define _ASM_X86_TRAMPOLINE_H
-
-#ifndef __ASSEMBLY__
-
-#include <linux/types.h>
-#include <asm/io.h>
-
-/*
- * Trampoline 80x86 program as an array. These are in the init rodata
- * segment, but that's okay, because we only care about the relative
- * addresses of the symbols.
- */
-extern const unsigned char x86_trampoline_start [];
-extern const unsigned char x86_trampoline_end [];
-extern unsigned char *x86_trampoline_base;
-
-extern unsigned long init_rsp;
-extern unsigned long initial_code;
-extern unsigned long initial_gs;
-
-extern void __init setup_trampolines(void);
-
-extern const unsigned char trampoline_data[];
-extern const unsigned char trampoline_status[];
-
-#define TRAMPOLINE_SYM(x) \
- ((void *)(x86_trampoline_base + \
- ((const unsigned char *)(x) - x86_trampoline_start)))
-
-/* Address of the SMP trampoline */
-static inline unsigned long trampoline_address(void)
-{
- return virt_to_phys(TRAMPOLINE_SYM(trampoline_data));
-}
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _ASM_X86_TRAMPOLINE_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9bba5b79902b..8215e5652d97 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -35,7 +35,6 @@ obj-y += tsc.o io_delay.o rtc.o
obj-y += pci-iommu_table.o
obj-y += resource.o
-obj-y += trampoline.o trampoline_$(BITS).o
obj-y += process.o
obj-y += i387.o xsave.o
obj-y += ptrace.o
@@ -48,7 +47,6 @@ obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
obj-y += acpi/
obj-y += reboot.o
-obj-$(CONFIG_X86_32) += reboot_32.o
obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
obj-$(CONFIG_PCI) += early-quirks.o
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile
index 6f35260bb3ef..163b22581472 100644
--- a/arch/x86/kernel/acpi/Makefile
+++ b/arch/x86/kernel/acpi/Makefile
@@ -1,14 +1,7 @@
-subdir- := realmode
-
obj-$(CONFIG_ACPI) += boot.o
-obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_rm.o wakeup_$(BITS).o
+obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o
ifneq ($(CONFIG_ACPI_PROCESSOR),)
obj-y += cstate.o
endif
-$(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin
-
-$(obj)/realmode/wakeup.bin: FORCE
- $(Q)$(MAKE) $(build)=$(obj)/realmode
-
diff --git a/arch/x86/kernel/acpi/realmode/.gitignore b/arch/x86/kernel/acpi/realmode/.gitignore
deleted file mode 100644
index 58f1f48a58f8..000000000000
--- a/arch/x86/kernel/acpi/realmode/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-wakeup.bin
-wakeup.elf
-wakeup.lds
diff --git a/arch/x86/kernel/acpi/realmode/Makefile b/arch/x86/kernel/acpi/realmode/Makefile
deleted file mode 100644
index 6a564ac67ef5..000000000000
--- a/arch/x86/kernel/acpi/realmode/Makefile
+++ /dev/null
@@ -1,59 +0,0 @@
-#
-# arch/x86/kernel/acpi/realmode/Makefile
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License. See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-
-always := wakeup.bin
-targets := wakeup.elf wakeup.lds
-
-wakeup-y += wakeup.o wakemain.o video-mode.o copy.o bioscall.o regs.o
-
-# The link order of the video-*.o modules can matter. In particular,
-# video-vga.o *must* be listed first, followed by video-vesa.o.
-# Hardware-specific drivers should follow in the order they should be
-# probed, and video-bios.o should typically be last.
-wakeup-y += video-vga.o
-wakeup-y += video-vesa.o
-wakeup-y += video-bios.o
-
-targets += $(wakeup-y)
-
-bootsrc := $(src)/../../../boot
-
-# ---------------------------------------------------------------------------
-
-# How to compile the 16-bit code. Note we always compile for -march=i386,
-# that way we can complain to the user if the CPU is insufficient.
-# Compile with _SETUP since this is similar to the boot-time setup code.
-KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D_WAKEUP -D__KERNEL__ \
- -I$(srctree)/$(bootsrc) \
- $(cflags-y) \
- -Wall -Wstrict-prototypes \
- -march=i386 -mregparm=3 \
- -include $(srctree)/$(bootsrc)/code16gcc.h \
- -fno-strict-aliasing -fomit-frame-pointer \
- $(call cc-option, -ffreestanding) \
- $(call cc-option, -fno-toplevel-reorder,\
- $(call cc-option, -fno-unit-at-a-time)) \
- $(call cc-option, -fno-stack-protector) \
- $(call cc-option, -mpreferred-stack-boundary=2)
-KBUILD_CFLAGS += $(call cc-option, -m32)
-KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
-GCOV_PROFILE := n
-
-WAKEUP_OBJS = $(addprefix $(obj)/,$(wakeup-y))
-
-LDFLAGS_wakeup.elf := -T
-
-CPPFLAGS_wakeup.lds += -P -C
-
-$(obj)/wakeup.elf: $(obj)/wakeup.lds $(WAKEUP_OBJS) FORCE
- $(call if_changed,ld)
-
-OBJCOPYFLAGS_wakeup.bin := -O binary
-
-$(obj)/wakeup.bin: $(obj)/wakeup.elf FORCE
- $(call if_changed,objcopy)
diff --git a/arch/x86/kernel/acpi/realmode/bioscall.S b/arch/x86/kernel/acpi/realmode/bioscall.S
deleted file mode 100644
index f51eb0bb56ce..000000000000
--- a/arch/x86/kernel/acpi/realmode/bioscall.S
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../boot/bioscall.S"
diff --git a/arch/x86/kernel/acpi/realmode/copy.S b/arch/x86/kernel/acpi/realmode/copy.S
deleted file mode 100644
index dc59ebee69d8..000000000000
--- a/arch/x86/kernel/acpi/realmode/copy.S
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../boot/copy.S"
diff --git a/arch/x86/kernel/acpi/realmode/regs.c b/arch/x86/kernel/acpi/realmode/regs.c
deleted file mode 100644
index 6206033ba202..000000000000
--- a/arch/x86/kernel/acpi/realmode/regs.c
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../boot/regs.c"
diff --git a/arch/x86/kernel/acpi/realmode/video-bios.c b/arch/x86/kernel/acpi/realmode/video-bios.c
deleted file mode 100644
index 7deabc144a27..000000000000
--- a/arch/x86/kernel/acpi/realmode/video-bios.c
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../boot/video-bios.c"
diff --git a/arch/x86/kernel/acpi/realmode/video-mode.c b/arch/x86/kernel/acpi/realmode/video-mode.c
deleted file mode 100644
index 328ad209f113..000000000000
--- a/arch/x86/kernel/acpi/realmode/video-mode.c
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../boot/video-mode.c"
diff --git a/arch/x86/kernel/acpi/realmode/video-vesa.c b/arch/x86/kernel/acpi/realmode/video-vesa.c
deleted file mode 100644
index 9dbb9672226a..000000000000
--- a/arch/x86/kernel/acpi/realmode/video-vesa.c
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../boot/video-vesa.c"
diff --git a/arch/x86/kernel/acpi/realmode/video-vga.c b/arch/x86/kernel/acpi/realmode/video-vga.c
deleted file mode 100644
index bcc81255f374..000000000000
--- a/arch/x86/kernel/acpi/realmode/video-vga.c
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../boot/video-vga.c"
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S
deleted file mode 100644
index d4f8010a5b1b..000000000000
--- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * wakeup.ld
- *
- * Linker script for the real-mode wakeup code
- */
-#undef i386
-#include "wakeup.h"
-
-OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
-OUTPUT_ARCH(i386)
-ENTRY(_start)
-
-SECTIONS
-{
- . = 0;
- .jump : {
- *(.jump)
- } = 0x90909090
-
- . = WAKEUP_HEADER_OFFSET;
- .header : {
- *(.header)
- }
-
- . = ALIGN(16);
- .text : {
- *(.text*)
- } = 0x90909090
-
- . = ALIGN(16);
- .rodata : {
- *(.rodata*)
- }
-
- .videocards : {
- video_cards = .;
- *(.videocards)
- video_cards_end = .;
- }
-
- . = ALIGN(16);
- .data : {
- *(.data*)
- }
-
- . = ALIGN(16);
- .bss : {
- __bss_start = .;
- *(.bss)
- __bss_end = .;
- }
-
- .signature : {
- *(.signature)
- }
-
- _end = .;
-
- /DISCARD/ : {
- *(.note*)
- }
-}
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 146a49c763a4..95bf99de9058 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -14,8 +14,9 @@
#include <asm/desc.h>
#include <asm/pgtable.h>
#include <asm/cacheflush.h>
+#include <asm/realmode.h>
-#include "realmode/wakeup.h"
+#include "../../realmode/rm/wakeup.h"
#include "sleep.h"
unsigned long acpi_realmode_flags;
@@ -36,13 +37,9 @@ asmlinkage void acpi_enter_s3(void)
*/
int acpi_suspend_lowlevel(void)
{
- struct wakeup_header *header;
- /* address in low memory of the wakeup routine. */
- char *acpi_realmode;
+ struct wakeup_header *header =
+ (struct wakeup_header *) __va(real_mode_header->wakeup_header);
- acpi_realmode = TRAMPOLINE_SYM(acpi_wakeup_code);
-
- header = (struct wakeup_header *)(acpi_realmode + WAKEUP_HEADER_OFFSET);
if (header->signature != WAKEUP_HEADER_SIGNATURE) {
printk(KERN_ERR "wakeup header does not match\n");
return -EINVAL;
@@ -50,27 +47,6 @@ int acpi_suspend_lowlevel(void)
header->video_mode = saved_video_mode;
- header->wakeup_jmp_seg = acpi_wakeup_address >> 4;
-
- /*
- * Set up the wakeup GDT. We set these up as Big Real Mode,
- * that is, with limits set to 4 GB. At least the Lenovo
- * Thinkpad X61 is known to need this for the video BIOS
- * initialization quirk to work; this is likely to also
- * be the case for other laptops or integrated video devices.
- */
-
- /* GDT[0]: GDT self-pointer */
- header->wakeup_gdt[0] =
- (u64)(sizeof(header->wakeup_gdt) - 1) +
- ((u64)__pa(&header->wakeup_gdt) << 16);
- /* GDT[1]: big real mode-like code segment */
- header->wakeup_gdt[1] =
- GDT_ENTRY(0x809b, acpi_wakeup_address, 0xfffff);
- /* GDT[2]: big real mode-like data segment */
- header->wakeup_gdt[2] =
- GDT_ENTRY(0x8093, acpi_wakeup_address, 0xfffff);
-
#ifndef CONFIG_64BIT
store_gdt((struct desc_ptr *)&header->pmode_gdt);
@@ -95,7 +71,6 @@ int acpi_suspend_lowlevel(void)
header->pmode_cr3 = (u32)__pa(&initial_page_table);
saved_magic = 0x12345678;
#else /* CONFIG_64BIT */
- header->trampoline_segment = trampoline_address() >> 4;
#ifdef CONFIG_SMP
stack_start = (unsigned long)temp_stack + sizeof(temp_stack);
early_gdt_descr.address =
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h
index d68677a2a010..5653a5791ec9 100644
--- a/arch/x86/kernel/acpi/sleep.h
+++ b/arch/x86/kernel/acpi/sleep.h
@@ -2,8 +2,8 @@
* Variables and functions used by the code in sleep.c
*/
-#include <asm/trampoline.h>
#include <linux/linkage.h>
+#include <asm/realmode.h>
extern unsigned long saved_video_mode;
extern long saved_magic;
diff --git a/arch/x86/kernel/acpi/wakeup_rm.S b/arch/x86/kernel/acpi/wakeup_rm.S
deleted file mode 100644
index 63b8ab524f2c..000000000000
--- a/arch/x86/kernel/acpi/wakeup_rm.S
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * Wrapper script for the realmode binary as a transport object
- * before copying to low memory.
- */
-#include <asm/page_types.h>
-
- .section ".x86_trampoline","a"
- .balign PAGE_SIZE
- .globl acpi_wakeup_code
-acpi_wakeup_code:
- .incbin "arch/x86/kernel/acpi/realmode/wakeup.bin"
- .size acpi_wakeup_code, .-acpi_wakeup_code
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 82f29e70d058..6b9333b429ba 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1101,14 +1101,20 @@ int is_debug_stack(unsigned long addr)
addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
}
+static DEFINE_PER_CPU(u32, debug_stack_use_ctr);
+
void debug_stack_set_zero(void)
{
+ this_cpu_inc(debug_stack_use_ctr);
load_idt((const struct desc_ptr *)&nmi_idt_descr);
}
void debug_stack_reset(void)
{
- load_idt((const struct desc_ptr *)&idt_descr);
+ if (WARN_ON(!this_cpu_read(debug_stack_use_ctr)))
+ return;
+ if (this_cpu_dec_return(debug_stack_use_ctr) == 0)
+ load_idt((const struct desc_ptr *)&idt_descr);
}
#else /* CONFIG_X86_64 */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b772dd6ad450..0a687fd185e6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1251,15 +1251,15 @@ void mce_log_therm_throt_event(__u64 status)
* poller finds an MCE, poll 2x faster. When the poller finds no more
* errors, poll 2x slower (up to check_interval seconds).
*/
-static int check_interval = 5 * 60; /* 5 minutes */
+static unsigned long check_interval = 5 * 60; /* 5 minutes */
-static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */
+static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
static DEFINE_PER_CPU(struct timer_list, mce_timer);
-static void mce_start_timer(unsigned long data)
+static void mce_timer_fn(unsigned long data)
{
- struct timer_list *t = &per_cpu(mce_timer, data);
- int *n;
+ struct timer_list *t = &__get_cpu_var(mce_timer);
+ unsigned long iv;
WARN_ON(smp_processor_id() != data);
@@ -1272,13 +1272,14 @@ static void mce_start_timer(unsigned long data)
* Alert userspace if needed. If we logged an MCE, reduce the
* polling interval, otherwise increase the polling interval.
*/
- n = &__get_cpu_var(mce_next_interval);
+ iv = __this_cpu_read(mce_next_interval);
if (mce_notify_irq())
- *n = max(*n/2, HZ/100);
+ iv = max(iv, (unsigned long) HZ/100);
else
- *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
+ iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
+ __this_cpu_write(mce_next_interval, iv);
- t->expires = jiffies + *n;
+ t->expires = jiffies + iv;
add_timer_on(t, smp_processor_id());
}
@@ -1472,9 +1473,9 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
rdmsrl(msrs[i], val);
/* CntP bit set? */
- if (val & BIT(62)) {
- val &= ~BIT(62);
- wrmsrl(msrs[i], val);
+ if (val & BIT_64(62)) {
+ val &= ~BIT_64(62);
+ wrmsrl(msrs[i], val);
}
}
@@ -1556,17 +1557,17 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
static void __mcheck_cpu_init_timer(void)
{
struct timer_list *t = &__get_cpu_var(mce_timer);
- int *n = &__get_cpu_var(mce_next_interval);
+ unsigned long iv = __this_cpu_read(mce_next_interval);
- setup_timer(t, mce_start_timer, smp_processor_id());
+ setup_timer(t, mce_timer_fn, smp_processor_id());
if (mce_ignore_ce)
return;
- *n = check_interval * HZ;
- if (!*n)
+ __this_cpu_write(mce_next_interval, iv);
+ if (!iv)
return;
- t->expires = round_jiffies(jiffies + *n);
+ t->expires = round_jiffies(jiffies + iv);
add_timer_on(t, smp_processor_id());
}
@@ -2276,7 +2277,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
case CPU_DOWN_FAILED_FROZEN:
if (!mce_ignore_ce && check_interval) {
t->expires = round_jiffies(jiffies +
- __get_cpu_var(mce_next_interval));
+ per_cpu(mce_next_interval, cpu));
add_timer_on(t, cpu);
}
smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index ac140c7be396..bdda2e6c673b 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -266,7 +266,7 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
if (align > max_align)
align = max_align;
- sizek = 1 << align;
+ sizek = 1UL << align;
if (debug_print) {
char start_factor = 'K', size_factor = 'K';
unsigned long start_base, size_base;
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 62d61e9976eb..41857970517f 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -113,7 +113,9 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
int x = e820x->nr_map;
if (x >= ARRAY_SIZE(e820x->map)) {
- printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
+ printk(KERN_ERR "e820: too many entries; ignoring [mem %#010llx-%#010llx]\n",
+ (unsigned long long) start,
+ (unsigned long long) (start + size - 1));
return;
}
@@ -133,19 +135,19 @@ static void __init e820_print_type(u32 type)
switch (type) {
case E820_RAM:
case E820_RESERVED_KERN:
- printk(KERN_CONT "(usable)");
+ printk(KERN_CONT "usable");
break;
case E820_RESERVED:
- printk(KERN_CONT "(reserved)");
+ printk(KERN_CONT "reserved");
break;
case E820_ACPI:
- printk(KERN_CONT "(ACPI data)");
+ printk(KERN_CONT "ACPI data");
break;
case E820_NVS:
- printk(KERN_CONT "(ACPI NVS)");
+ printk(KERN_CONT "ACPI NVS");
break;
case E820_UNUSABLE:
- printk(KERN_CONT "(unusable)");
+ printk(KERN_CONT "unusable");
break;
default:
printk(KERN_CONT "type %u", type);
@@ -158,10 +160,10 @@ void __init e820_print_map(char *who)
int i;
for (i = 0; i < e820.nr_map; i++) {
- printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
+ printk(KERN_INFO "%s: [mem %#018Lx-%#018Lx] ", who,
(unsigned long long) e820.map[i].addr,
(unsigned long long)
- (e820.map[i].addr + e820.map[i].size));
+ (e820.map[i].addr + e820.map[i].size - 1));
e820_print_type(e820.map[i].type);
printk(KERN_CONT "\n");
}
@@ -428,9 +430,8 @@ static u64 __init __e820_update_range(struct e820map *e820x, u64 start,
size = ULLONG_MAX - start;
end = start + size;
- printk(KERN_DEBUG "e820 update range: %016Lx - %016Lx ",
- (unsigned long long) start,
- (unsigned long long) end);
+ printk(KERN_DEBUG "e820: update [mem %#010Lx-%#010Lx] ",
+ (unsigned long long) start, (unsigned long long) (end - 1));
e820_print_type(old_type);
printk(KERN_CONT " ==> ");
e820_print_type(new_type);
@@ -509,9 +510,8 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
size = ULLONG_MAX - start;
end = start + size;
- printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ",
- (unsigned long long) start,
- (unsigned long long) end);
+ printk(KERN_DEBUG "e820: remove [mem %#010Lx-%#010Lx] ",
+ (unsigned long long) start, (unsigned long long) (end - 1));
if (checktype)
e820_print_type(old_type);
printk(KERN_CONT "\n");
@@ -567,7 +567,7 @@ void __init update_e820(void)
if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map))
return;
e820.nr_map = nr_map;
- printk(KERN_INFO "modified physical RAM map:\n");
+ printk(KERN_INFO "e820: modified physical RAM map:\n");
e820_print_map("modified");
}
static void __init update_e820_saved(void)
@@ -637,8 +637,8 @@ __init void e820_setup_gap(void)
if (!found) {
gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024;
printk(KERN_ERR
- "PCI: Warning: Cannot find a gap in the 32bit address range\n"
- "PCI: Unassigned devices with 32bit resource registers may break!\n");
+ "e820: cannot find a gap in the 32bit address range\n"
+ "e820: PCI devices with unassigned 32bit BARs may break!\n");
}
#endif
@@ -648,8 +648,8 @@ __init void e820_setup_gap(void)
pci_mem_start = gapstart;
printk(KERN_INFO
- "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
- pci_mem_start, gapstart, gapsize);
+ "e820: [mem %#010lx-%#010lx] available for PCI devices\n",
+ gapstart, gapstart + gapsize - 1);
}
/**
@@ -667,7 +667,7 @@ void __init parse_e820_ext(struct setup_data *sdata)
extmap = (struct e820entry *)(sdata->data);
__append_e820_map(extmap, entries);
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
- printk(KERN_INFO "extended physical RAM map:\n");
+ printk(KERN_INFO "e820: extended physical RAM map:\n");
e820_print_map("extended");
}
@@ -734,7 +734,7 @@ u64 __init early_reserve_e820(u64 size, u64 align)
addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
if (addr) {
e820_update_range_saved(addr, size, E820_RAM, E820_RESERVED);
- printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
+ printk(KERN_INFO "e820: update e820_saved for early_reserve_e820\n");
update_e820_saved();
}
@@ -784,7 +784,7 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
if (last_pfn > max_arch_pfn)
last_pfn = max_arch_pfn;
- printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n",
+ printk(KERN_INFO "e820: last_pfn = %#lx max_arch_pfn = %#lx\n",
last_pfn, max_arch_pfn);
return last_pfn;
}
@@ -888,7 +888,7 @@ void __init finish_e820_parsing(void)
early_panic("Invalid user supplied memory map");
e820.nr_map = nr;
- printk(KERN_INFO "user-defined physical RAM map:\n");
+ printk(KERN_INFO "e820: user-defined physical RAM map:\n");
e820_print_map("user");
}
}
@@ -996,8 +996,9 @@ void __init e820_reserve_resources_late(void)
end = MAX_RESOURCE_SIZE;
if (start >= end)
continue;
- printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ",
- start, end);
+ printk(KERN_DEBUG
+ "e820: reserve RAM buffer [mem %#010llx-%#010llx]\n",
+ start, end);
reserve_region_with_split(&iomem_resource, start, end,
"RAM buffer");
}
@@ -1047,7 +1048,7 @@ void __init setup_memory_map(void)
who = x86_init.resources.memory_setup();
memcpy(&e820_saved, &e820, sizeof(struct e820map));
- printk(KERN_INFO "BIOS-provided physical RAM map:\n");
+ printk(KERN_INFO "e820: BIOS-provided physical RAM map:\n");
e820_print_map(who);
}
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 01ccf9b71473..623f28837476 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -316,7 +316,6 @@ ret_from_exception:
preempt_stop(CLBR_ANY)
ret_from_intr:
GET_THREAD_INFO(%ebp)
-resume_userspace_sig:
#ifdef CONFIG_VM86
movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
movb PT_CS(%esp), %al
@@ -615,9 +614,13 @@ work_notifysig: # deal with pending signals and
# vm86-space
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
+ movb PT_CS(%esp), %bl
+ andb $SEGMENT_RPL_MASK, %bl
+ cmpb $USER_RPL, %bl
+ jb resume_kernel
xorl %edx, %edx
call do_notify_resume
- jmp resume_userspace_sig
+ jmp resume_userspace
ALIGN
work_notifysig_v86:
@@ -630,9 +633,13 @@ work_notifysig_v86:
#endif
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
+ movb PT_CS(%esp), %bl
+ andb $SEGMENT_RPL_MASK, %bl
+ cmpb $USER_RPL, %bl
+ jb resume_kernel
xorl %edx, %edx
call do_notify_resume
- jmp resume_userspace_sig
+ jmp resume_userspace
END(work_pending)
# perform syscall exit tracing
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 320852d02026..7d65133b51be 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -191,6 +191,44 @@ ENDPROC(native_usergs_sysret64)
.endm
/*
+ * When dynamic function tracer is enabled it will add a breakpoint
+ * to all locations that it is about to modify, sync CPUs, update
+ * all the code, sync CPUs, then remove the breakpoints. In this time
+ * if lockdep is enabled, it might jump back into the debug handler
+ * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF).
+ *
+ * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to
+ * make sure the stack pointer does not get reset back to the top
+ * of the debug stack, and instead just reuses the current stack.
+ */
+#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS)
+
+.macro TRACE_IRQS_OFF_DEBUG
+ call debug_stack_set_zero
+ TRACE_IRQS_OFF
+ call debug_stack_reset
+.endm
+
+.macro TRACE_IRQS_ON_DEBUG
+ call debug_stack_set_zero
+ TRACE_IRQS_ON
+ call debug_stack_reset
+.endm
+
+.macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET
+ bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
+ jnc 1f
+ TRACE_IRQS_ON_DEBUG
+1:
+.endm
+
+#else
+# define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF
+# define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON
+# define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ
+#endif
+
+/*
* C code is not supposed to know about undefined top of stack. Every time
* a C function with an pt_regs argument is called from the SYSCALL based
* fast path FIXUP_TOP_OF_STACK is needed.
@@ -1098,7 +1136,7 @@ ENTRY(\sym)
subq $ORIG_RAX-R15, %rsp
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
call save_paranoid
- TRACE_IRQS_OFF
+ TRACE_IRQS_OFF_DEBUG
movq %rsp,%rdi /* pt_regs pointer */
xorl %esi,%esi /* no error code */
subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)
@@ -1393,7 +1431,7 @@ paranoidzeroentry machine_check *machine_check_vector(%rip)
ENTRY(paranoid_exit)
DEFAULT_FRAME
DISABLE_INTERRUPTS(CLBR_NONE)
- TRACE_IRQS_OFF
+ TRACE_IRQS_OFF_DEBUG
testl %ebx,%ebx /* swapgs needed? */
jnz paranoid_restore
testl $3,CS(%rsp)
@@ -1404,7 +1442,7 @@ paranoid_swapgs:
RESTORE_ALL 8
jmp irq_return
paranoid_restore:
- TRACE_IRQS_IRETQ 0
+ TRACE_IRQS_IRETQ_DEBUG 0
RESTORE_ALL 8
jmp irq_return
paranoid_userspace:
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 32ff36596ab1..c3a7cb4bf6e6 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -100,7 +100,7 @@ static const unsigned char *ftrace_nop_replace(void)
}
static int
-ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
+ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
unsigned const char *new_code)
{
unsigned char replaced[MCOUNT_INSN_SIZE];
@@ -141,7 +141,20 @@ int ftrace_make_nop(struct module *mod,
old = ftrace_call_replace(ip, addr);
new = ftrace_nop_replace();
- return ftrace_modify_code(rec->ip, old, new);
+ /*
+ * On boot up, and when modules are loaded, the MCOUNT_ADDR
+ * is converted to a nop, and will never become MCOUNT_ADDR
+ * again. This code is either running before SMP (on boot up)
+ * or before the code will ever be executed (module load).
+ * We do not want to use the breakpoint version in this case,
+ * just modify the code directly.
+ */
+ if (addr == MCOUNT_ADDR)
+ return ftrace_modify_code_direct(rec->ip, old, new);
+
+ /* Normal cases use add_brk_on_nop */
+ WARN_ONCE(1, "invalid use of ftrace_make_nop");
+ return -EINVAL;
}
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
@@ -152,9 +165,47 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
old = ftrace_nop_replace();
new = ftrace_call_replace(ip, addr);
- return ftrace_modify_code(rec->ip, old, new);
+ /* Should only be called when module is loaded */
+ return ftrace_modify_code_direct(rec->ip, old, new);
}
+/*
+ * The modifying_ftrace_code is used to tell the breakpoint
+ * handler to call ftrace_int3_handler(). If it fails to
+ * call this handler for a breakpoint added by ftrace, then
+ * the kernel may crash.
+ *
+ * As atomic_writes on x86 do not need a barrier, we do not
+ * need to add smp_mb()s for this to work. It is also considered
+ * that we can not read the modifying_ftrace_code before
+ * executing the breakpoint. That would be quite remarkable if
+ * it could do that. Here's the flow that is required:
+ *
+ * CPU-0 CPU-1
+ *
+ * atomic_inc(mfc);
+ * write int3s
+ * <trap-int3> // implicit (r)mb
+ * if (atomic_read(mfc))
+ * call ftrace_int3_handler()
+ *
+ * Then when we are finished:
+ *
+ * atomic_dec(mfc);
+ *
+ * If we hit a breakpoint that was not set by ftrace, it does not
+ * matter if ftrace_int3_handler() is called or not. It will
+ * simply be ignored. But it is crucial that a ftrace nop/caller
+ * breakpoint is handled. No other user should ever place a
+ * breakpoint on an ftrace nop/caller location. It must only
+ * be done by this code.
+ */
+atomic_t modifying_ftrace_code __read_mostly;
+
+static int
+ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
+ unsigned const char *new_code);
+
int ftrace_update_ftrace_func(ftrace_func_t func)
{
unsigned long ip = (unsigned long)(&ftrace_call);
@@ -163,13 +214,17 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
new = ftrace_call_replace(ip, (unsigned long)func);
+
+ /* See comment above by declaration of modifying_ftrace_code */
+ atomic_inc(&modifying_ftrace_code);
+
ret = ftrace_modify_code(ip, old, new);
+ atomic_dec(&modifying_ftrace_code);
+
return ret;
}
-int modifying_ftrace_code __read_mostly;
-
/*
* A breakpoint was added to the code address we are about to
* modify, and this is the handle that will just skip over it.
@@ -489,13 +544,46 @@ void ftrace_replace_code(int enable)
}
}
+static int
+ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
+ unsigned const char *new_code)
+{
+ int ret;
+
+ ret = add_break(ip, old_code);
+ if (ret)
+ goto out;
+
+ run_sync();
+
+ ret = add_update_code(ip, new_code);
+ if (ret)
+ goto fail_update;
+
+ run_sync();
+
+ ret = ftrace_write(ip, new_code, 1);
+ if (ret) {
+ ret = -EPERM;
+ goto out;
+ }
+ run_sync();
+ out:
+ return ret;
+
+ fail_update:
+ probe_kernel_write((void *)ip, &old_code[0], 1);
+ goto out;
+}
+
void arch_ftrace_update_code(int command)
{
- modifying_ftrace_code++;
+ /* See comment above by declaration of modifying_ftrace_code */
+ atomic_inc(&modifying_ftrace_code);
ftrace_modify_all_code(command);
- modifying_ftrace_code--;
+ atomic_dec(&modifying_ftrace_code);
}
int __init ftrace_dyn_arch_init(void *data)
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 51ff18616d50..c18f59d10101 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -14,7 +14,6 @@
#include <asm/sections.h>
#include <asm/e820.h>
#include <asm/page.h>
-#include <asm/trampoline.h>
#include <asm/apic.h>
#include <asm/io_apic.h>
#include <asm/bios_ebda.h>
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 3a3b779f41d3..037df57a99ac 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -24,7 +24,6 @@
#include <asm/sections.h>
#include <asm/kdebug.h>
#include <asm/e820.h>
-#include <asm/trampoline.h>
#include <asm/bios_ebda.h>
static void __init zap_identity_mappings(void)
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 463c9797ca6a..d42ab17b7397 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -274,10 +274,7 @@ num_subarch_entries = (. - subarch_entries) / 4
* If cpu hotplug is not supported then this code can go in init section
* which will be freed later
*/
-
__CPUINIT
-
-#ifdef CONFIG_SMP
ENTRY(startup_32_smp)
cld
movl $(__BOOT_DS),%eax
@@ -288,7 +285,7 @@ ENTRY(startup_32_smp)
movl pa(stack_start),%ecx
movl %eax,%ss
leal -__PAGE_OFFSET(%ecx),%esp
-#endif /* CONFIG_SMP */
+
default_entry:
/*
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 7a40f2447321..94bf9cc2c7ee 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -139,10 +139,6 @@ ident_complete:
/* Fixup phys_base */
addq %rbp, phys_base(%rip)
- /* Fixup trampoline */
- addq %rbp, trampoline_level4_pgt + 0(%rip)
- addq %rbp, trampoline_level4_pgt + (511*8)(%rip)
-
/* Due to ENTRY(), sometimes the empty space gets filled with
* zeros. Better take a jmp than relying on empty space being
* filled with 0x90 (nop)
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 9cc7b4392f7c..1460a5df92f7 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -870,7 +870,7 @@ int __init hpet_enable(void)
else
pr_warn("HPET initial state will not be saved\n");
cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
- hpet_writel(cfg, HPET_Tn_CFG(i));
+ hpet_writel(cfg, HPET_CFG);
if (cfg)
pr_warn("HPET: Unrecognized bits %#x set in global cfg\n",
cfg);
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index b02d4dd6b8a3..d2b56489d70f 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -27,7 +27,6 @@
#include <asm/proto.h>
#include <asm/bios_ebda.h>
#include <asm/e820.h>
-#include <asm/trampoline.h>
#include <asm/setup.h>
#include <asm/smp.h>
@@ -568,8 +567,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
struct mpf_intel *mpf;
unsigned long mem;
- apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
- bp, length);
+ apic_printk(APIC_VERBOSE, "Scan for SMP in [mem %#010lx-%#010lx]\n",
+ base, base + length - 1);
BUILD_BUG_ON(sizeof(*mpf) != 16);
while (length > 0) {
@@ -584,8 +583,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
#endif
mpf_found = mpf;
- printk(KERN_INFO "found SMP MP-table at [%p] %llx\n",
- mpf, (u64)virt_to_phys(mpf));
+ printk(KERN_INFO "found SMP MP-table at [mem %#010llx-%#010llx] mapped at [%p]\n",
+ (unsigned long long) virt_to_phys(mpf),
+ (unsigned long long) virt_to_phys(mpf) +
+ sizeof(*mpf) - 1, mpf);
mem = virt_to_phys(mpf);
memblock_reserve(mem, sizeof(*mpf));
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 90875279ef3d..a0b2f84457be 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -444,14 +444,16 @@ static inline void nmi_nesting_preprocess(struct pt_regs *regs)
*/
if (unlikely(is_debug_stack(regs->sp))) {
debug_stack_set_zero();
- __get_cpu_var(update_debug_stack) = 1;
+ this_cpu_write(update_debug_stack, 1);
}
}
static inline void nmi_nesting_postprocess(void)
{
- if (unlikely(__get_cpu_var(update_debug_stack)))
+ if (unlikely(this_cpu_read(update_debug_stack))) {
debug_stack_reset();
+ this_cpu_write(update_debug_stack, 0);
+ }
}
#endif
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 13b1990c7c58..c4c6a5c2bf0f 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1211,12 +1211,6 @@ static long x32_arch_ptrace(struct task_struct *child,
0, sizeof(struct user_i387_struct),
datap);
- /* normal 64bit interface to access TLS data.
- Works just like arch_prctl, except that the arguments
- are reversed. */
- case PTRACE_ARCH_PRCTL:
- return do_arch_prctl(child, data, addr);
-
default:
return compat_ptrace_request(child, request, addr, data);
}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 77215c23fba1..79c45af81604 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -24,6 +24,7 @@
#ifdef CONFIG_X86_32
# include <linux/ctype.h>
# include <linux/mc146818rtc.h>
+# include <asm/realmode.h>
#else
# include <asm/x86_init.h>
#endif
@@ -156,15 +157,10 @@ static int __init set_bios_reboot(const struct dmi_system_id *d)
return 0;
}
-extern const unsigned char machine_real_restart_asm[];
-extern const u64 machine_real_restart_gdt[3];
-
void machine_real_restart(unsigned int type)
{
- void *restart_va;
- unsigned long restart_pa;
- void (*restart_lowmem)(unsigned int);
- u64 *lowmem_gdt;
+ void (*restart_lowmem)(unsigned int) = (void (*)(unsigned int))
+ real_mode_header->machine_real_restart_asm;
local_irq_disable();
@@ -195,21 +191,6 @@ void machine_real_restart(unsigned int type)
* too. */
*((unsigned short *)0x472) = reboot_mode;
- /* Patch the GDT in the low memory trampoline */
- lowmem_gdt = TRAMPOLINE_SYM(machine_real_restart_gdt);
-
- restart_va = TRAMPOLINE_SYM(machine_real_restart_asm);
- restart_pa = virt_to_phys(restart_va);
- restart_lowmem = (void (*)(unsigned int))restart_pa;
-
- /* GDT[0]: GDT self-pointer */
- lowmem_gdt[0] =
- (u64)(sizeof(machine_real_restart_gdt) - 1) +
- ((u64)virt_to_phys(lowmem_gdt) << 16);
- /* GDT[1]: 64K real mode code segment */
- lowmem_gdt[1] =
- GDT_ENTRY(0x009b, restart_pa, 0xffff);
-
/* Jump to the identity-mapped low memory code */
restart_lowmem(type);
}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f2afee6a19c1..16be6dc14db1 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -73,7 +73,7 @@
#include <asm/mtrr.h>
#include <asm/apic.h>
-#include <asm/trampoline.h>
+#include <asm/realmode.h>
#include <asm/e820.h>
#include <asm/mpspec.h>
#include <asm/setup.h>
@@ -334,8 +334,8 @@ static void __init relocate_initrd(void)
memblock_reserve(ramdisk_here, area_size);
initrd_start = ramdisk_here + PAGE_OFFSET;
initrd_end = initrd_start + ramdisk_size;
- printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
- ramdisk_here, ramdisk_here + ramdisk_size);
+ printk(KERN_INFO "Allocated new RAMDISK: [mem %#010llx-%#010llx]\n",
+ ramdisk_here, ramdisk_here + ramdisk_size - 1);
q = (char *)initrd_start;
@@ -366,8 +366,8 @@ static void __init relocate_initrd(void)
/* high pages is not converted by early_res_to_bootmem */
ramdisk_image = boot_params.hdr.ramdisk_image;
ramdisk_size = boot_params.hdr.ramdisk_size;
- printk(KERN_INFO "Move RAMDISK from %016llx - %016llx to"
- " %08llx - %08llx\n",
+ printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to"
+ " [mem %#010llx-%#010llx]\n",
ramdisk_image, ramdisk_image + ramdisk_size - 1,
ramdisk_here, ramdisk_here + ramdisk_size - 1);
}
@@ -392,8 +392,8 @@ static void __init reserve_initrd(void)
ramdisk_size, end_of_lowmem>>1);
}
- printk(KERN_INFO "RAMDISK: %08llx - %08llx\n", ramdisk_image,
- ramdisk_end);
+ printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image,
+ ramdisk_end - 1);
if (ramdisk_end <= end_of_lowmem) {
@@ -906,10 +906,10 @@ void __init setup_arch(char **cmdline_p)
setup_bios_corruption_check();
#endif
- printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n",
- max_pfn_mapped<<PAGE_SHIFT);
+ printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n",
+ (max_pfn_mapped<<PAGE_SHIFT) - 1);
- setup_trampolines();
+ setup_real_mode();
init_gbpages();
@@ -968,6 +968,8 @@ void __init setup_arch(char **cmdline_p)
if (boot_cpu_data.cpuid_level >= 0) {
/* A CPU has %cr4 if and only if it has CPUID */
mmu_cr4_features = read_cr4();
+ if (trampoline_cr4_features)
+ *trampoline_cr4_features = mmu_cr4_features;
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 965dfda0fd5e..21af737053aa 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -555,7 +555,6 @@ unsigned long sys_sigreturn(struct pt_regs *regs)
sizeof(frame->extramask))))
goto badframe;
- sigdelsetmask(&set, ~_BLOCKABLE);
set_current_blocked(&set);
if (restore_sigcontext(regs, &frame->sc, &ax))
@@ -581,7 +580,6 @@ long sys_rt_sigreturn(struct pt_regs *regs)
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
- sigdelsetmask(&set, ~_BLOCKABLE);
set_current_blocked(&set);
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
@@ -647,42 +645,28 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
struct pt_regs *regs)
{
int usig = signr_convert(sig);
- sigset_t *set = &current->blocked;
- int ret;
-
- if (current_thread_info()->status & TS_RESTORE_SIGMASK)
- set = &current->saved_sigmask;
+ sigset_t *set = sigmask_to_save();
/* Set up the stack frame */
if (is_ia32) {
if (ka->sa.sa_flags & SA_SIGINFO)
- ret = ia32_setup_rt_frame(usig, ka, info, set, regs);
+ return ia32_setup_rt_frame(usig, ka, info, set, regs);
else
- ret = ia32_setup_frame(usig, ka, set, regs);
+ return ia32_setup_frame(usig, ka, set, regs);
#ifdef CONFIG_X86_X32_ABI
} else if (is_x32) {
- ret = x32_setup_rt_frame(usig, ka, info,
+ return x32_setup_rt_frame(usig, ka, info,
(compat_sigset_t *)set, regs);
#endif
} else {
- ret = __setup_rt_frame(sig, ka, info, set, regs);
- }
-
- if (ret) {
- force_sigsegv(sig, current);
- return -EFAULT;
+ return __setup_rt_frame(sig, ka, info, set, regs);
}
-
- current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
- return ret;
}
-static int
+static void
handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
struct pt_regs *regs)
{
- int ret;
-
/* Are we from a system call? */
if (syscall_get_nr(current, regs) >= 0) {
/* If so, check system call restarting.. */
@@ -713,10 +697,10 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
regs->flags &= ~X86_EFLAGS_TF;
- ret = setup_rt_frame(sig, ka, info, regs);
-
- if (ret)
- return ret;
+ if (setup_rt_frame(sig, ka, info, regs) < 0) {
+ force_sigsegv(sig, current);
+ return;
+ }
/*
* Clear the direction flag as per the ABI for function entry.
@@ -731,12 +715,8 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
*/
regs->flags &= ~X86_EFLAGS_TF;
- block_sigmask(ka, sig);
-
- tracehook_signal_handler(sig, info, ka, regs,
- test_thread_flag(TIF_SINGLESTEP));
-
- return 0;
+ signal_delivered(sig, info, ka, regs,
+ test_thread_flag(TIF_SINGLESTEP));
}
#ifdef CONFIG_X86_32
@@ -757,16 +737,6 @@ static void do_signal(struct pt_regs *regs)
siginfo_t info;
int signr;
- /*
- * We want the common case to go fast, which is why we may in certain
- * cases get here from kernel mode. Just return without doing anything
- * if so.
- * X86_32: vm86 regs switched out by assembly code before reaching
- * here, so testing against kernel CS suffices.
- */
- if (!user_mode(regs))
- return;
-
signr = get_signal_to_deliver(&info, &ka, regs, NULL);
if (signr > 0) {
/* Whee! Actually deliver the signal. */
@@ -796,10 +766,7 @@ static void do_signal(struct pt_regs *regs)
* If there's no signal to deliver, we just put the saved sigmask
* back.
*/
- if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
- current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
- set_current_blocked(&current->saved_sigmask);
- }
+ restore_saved_sigmask();
}
/*
@@ -827,8 +794,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
- if (current->replacement_session_keyring)
- key_replace_session_keyring();
}
if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
fire_user_return_notifiers();
@@ -936,7 +901,6 @@ asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
- sigdelsetmask(&set, ~_BLOCKABLE);
set_current_blocked(&set);
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 433529e29be4..f56f96da77f5 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -57,7 +57,7 @@
#include <asm/nmi.h>
#include <asm/irq.h>
#include <asm/idle.h>
-#include <asm/trampoline.h>
+#include <asm/realmode.h>
#include <asm/cpu.h>
#include <asm/numa.h>
#include <asm/pgtable.h>
@@ -73,6 +73,8 @@
#include <asm/smpboot_hooks.h>
#include <asm/i8259.h>
+#include <asm/realmode.h>
+
/* State of each CPU */
DEFINE_PER_CPU(int, cpu_state) = { 0 };
@@ -660,8 +662,12 @@ static void __cpuinit announce_cpu(int cpu, int apicid)
*/
static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
{
+ volatile u32 *trampoline_status =
+ (volatile u32 *) __va(real_mode_header->trampoline_status);
+ /* start_ip had better be page-aligned! */
+ unsigned long start_ip = real_mode_header->trampoline_start;
+
unsigned long boot_error = 0;
- unsigned long start_ip;
int timeout;
alternatives_smp_switch(1);
@@ -684,9 +690,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
initial_code = (unsigned long)start_secondary;
stack_start = idle->thread.sp;
- /* start_ip had better be page-aligned! */
- start_ip = trampoline_address();
-
/* So we see what's up */
announce_cpu(cpu, apicid);
@@ -749,8 +752,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
pr_debug("CPU%d: has booted.\n", cpu);
} else {
boot_error = 1;
- if (*(volatile u32 *)TRAMPOLINE_SYM(trampoline_status)
- == 0xA5A5A5A5)
+ if (*trampoline_status == 0xA5A5A5A5)
/* trampoline started but...? */
pr_err("CPU%d: Stuck ??\n", cpu);
else
@@ -776,7 +778,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
}
/* mark "stuck" area as not stuck */
- *(volatile u32 *)TRAMPOLINE_SYM(trampoline_status) = 0;
+ *trampoline_status = 0;
if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
/*
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 6410744ac5cb..f84fe00fad48 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -32,7 +32,7 @@
#include <linux/mm.h>
#include <linux/tboot.h>
-#include <asm/trampoline.h>
+#include <asm/realmode.h>
#include <asm/processor.h>
#include <asm/bootparam.h>
#include <asm/pgtable.h>
@@ -44,7 +44,7 @@
#include <asm/e820.h>
#include <asm/io.h>
-#include "acpi/realmode/wakeup.h"
+#include "../realmode/rm/wakeup.h"
/* Global pointer to shared data; NULL means no measured launch. */
struct tboot *tboot __read_mostly;
@@ -201,7 +201,8 @@ static int tboot_setup_sleep(void)
add_mac_region(e820.map[i].addr, e820.map[i].size);
}
- tboot->acpi_sinfo.kernel_s3_resume_vector = acpi_wakeup_address;
+ tboot->acpi_sinfo.kernel_s3_resume_vector =
+ real_mode_header->wakeup_start;
return 0;
}
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
deleted file mode 100644
index a73b61055ad6..000000000000
--- a/arch/x86/kernel/trampoline.c
+++ /dev/null
@@ -1,42 +0,0 @@
-#include <linux/io.h>
-#include <linux/memblock.h>
-
-#include <asm/trampoline.h>
-#include <asm/cacheflush.h>
-#include <asm/pgtable.h>
-
-unsigned char *x86_trampoline_base;
-
-void __init setup_trampolines(void)
-{
- phys_addr_t mem;
- size_t size = PAGE_ALIGN(x86_trampoline_end - x86_trampoline_start);
-
- /* Has to be in very low memory so we can execute real-mode AP code. */
- mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
- if (!mem)
- panic("Cannot allocate trampoline\n");
-
- x86_trampoline_base = __va(mem);
- memblock_reserve(mem, size);
-
- printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
- x86_trampoline_base, (unsigned long long)mem, size);
-
- memcpy(x86_trampoline_base, x86_trampoline_start, size);
-}
-
-/*
- * setup_trampolines() gets called very early, to guarantee the
- * availability of low memory. This is before the proper kernel page
- * tables are set up, so we cannot set page permissions in that
- * function. Thus, we use an arch_initcall instead.
- */
-static int __init configure_trampolines(void)
-{
- size_t size = PAGE_ALIGN(x86_trampoline_end - x86_trampoline_start);
-
- set_memory_x((unsigned long)x86_trampoline_base, size >> PAGE_SHIFT);
- return 0;
-}
-arch_initcall(configure_trampolines);
diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S
deleted file mode 100644
index 451c0a7ef7fd..000000000000
--- a/arch/x86/kernel/trampoline_32.S
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- *
- * Trampoline.S Derived from Setup.S by Linus Torvalds
- *
- * 4 Jan 1997 Michael Chastain: changed to gnu as.
- *
- * This is only used for booting secondary CPUs in SMP machine
- *
- * Entry: CS:IP point to the start of our code, we are
- * in real mode with no stack, but the rest of the
- * trampoline page to make our stack and everything else
- * is a mystery.
- *
- * We jump into arch/x86/kernel/head_32.S.
- *
- * On entry to trampoline_data, the processor is in real mode
- * with 16-bit addressing and 16-bit data. CS has some value
- * and IP is zero. Thus, data addresses need to be absolute
- * (no relocation) and are taken with regard to r_base.
- *
- * If you work on this file, check the object module with
- * objdump --reloc to make sure there are no relocation
- * entries except for:
- *
- * TYPE VALUE
- * R_386_32 startup_32_smp
- * R_386_32 boot_gdt
- */
-
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/segment.h>
-#include <asm/page_types.h>
-
-#ifdef CONFIG_SMP
-
- .section ".x86_trampoline","a"
- .balign PAGE_SIZE
- .code16
-
-ENTRY(trampoline_data)
-r_base = .
- wbinvd # Needed for NUMA-Q should be harmless for others
- mov %cs, %ax # Code and data in the same place
- mov %ax, %ds
-
- cli # We should be safe anyway
-
- movl $0xA5A5A5A5, trampoline_status - r_base
- # write marker for master knows we're running
-
- /* GDT tables in non default location kernel can be beyond 16MB and
- * lgdt will not be able to load the address as in real mode default
- * operand size is 16bit. Use lgdtl instead to force operand size
- * to 32 bit.
- */
-
- lidtl boot_idt_descr - r_base # load idt with 0, 0
- lgdtl boot_gdt_descr - r_base # load gdt with whatever is appropriate
-
- xor %ax, %ax
- inc %ax # protected mode (PE) bit
- lmsw %ax # into protected mode
- # flush prefetch and jump to startup_32_smp in arch/i386/kernel/head.S
- ljmpl $__BOOT_CS, $(startup_32_smp-__PAGE_OFFSET)
-
- # These need to be in the same 64K segment as the above;
- # hence we don't use the boot_gdt_descr defined in head.S
-boot_gdt_descr:
- .word __BOOT_DS + 7 # gdt limit
- .long boot_gdt - __PAGE_OFFSET # gdt base
-
-boot_idt_descr:
- .word 0 # idt limit = 0
- .long 0 # idt base = 0L
-
-ENTRY(trampoline_status)
- .long 0
-
-.globl trampoline_end
-trampoline_end:
-
-#endif /* CONFIG_SMP */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ff08457a025d..05b31d92f69c 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -303,8 +303,12 @@ gp_in_kernel:
dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code)
{
#ifdef CONFIG_DYNAMIC_FTRACE
- /* ftrace must be first, everything else may cause a recursive crash */
- if (unlikely(modifying_ftrace_code) && ftrace_int3_handler(regs))
+ /*
+ * ftrace must be first, everything else may cause a recursive crash.
+ * See note by declaration of modifying_ftrace_code in ftrace.c
+ */
+ if (unlikely(atomic_read(&modifying_ftrace_code)) &&
+ ftrace_int3_handler(regs))
return;
#endif
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 0f703f10901a..22a1530146a8 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -197,18 +197,6 @@ SECTIONS
INIT_DATA_SECTION(16)
- /*
- * Code and data for a variety of lowlevel trampolines, to be
- * copied into base memory (< 1 MiB) during initialization.
- * Since it is copied early, the main copy can be discarded
- * afterwards.
- */
- .x86_trampoline : AT(ADDR(.x86_trampoline) - LOAD_OFFSET) {
- x86_trampoline_start = .;
- *(.x86_trampoline)
- x86_trampoline_end = .;
- }
-
.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
__x86_cpu_dev_start = .;
*(.x86_cpu_dev.init)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 72102e0ab7cb..be3cea4407ff 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2595,8 +2595,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
*gfnp = gfn;
kvm_release_pfn_clean(pfn);
pfn &= ~mask;
- if (!get_page_unless_zero(pfn_to_page(pfn)))
- BUG();
+ kvm_get_pfn(pfn);
*pfnp = pfn;
}
}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 319b6f2fb8b9..97141c26a13a 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -84,8 +84,9 @@ static void __init find_early_table_space(struct map_range *mr, unsigned long en
pgt_buf_end = pgt_buf_start;
pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT);
- printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
- end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT);
+ printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx]\n",
+ end - 1, pgt_buf_start << PAGE_SHIFT,
+ (pgt_buf_top << PAGE_SHIFT) - 1);
}
void __init native_pagetable_reserve(u64 start, u64 end)
@@ -132,7 +133,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
int nr_range, i;
int use_pse, use_gbpages;
- printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
+ printk(KERN_INFO "init_memory_mapping: [mem %#010lx-%#010lx]\n",
+ start, end - 1);
#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
/*
@@ -251,8 +253,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
}
for (i = 0; i < nr_range; i++)
- printk(KERN_DEBUG " %010lx - %010lx page %s\n",
- mr[i].start, mr[i].end,
+ printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n",
+ mr[i].start, mr[i].end - 1,
(mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
(mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
@@ -350,8 +352,8 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
* create a kernel page fault:
*/
#ifdef CONFIG_DEBUG_PAGEALLOC
- printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n",
- begin, end);
+ printk(KERN_INFO "debug: unmapping init [mem %#010lx-%#010lx]\n",
+ begin, end - 1);
set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
#else
/*
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 19d3fa08b119..2d125be1bae9 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -141,8 +141,8 @@ static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
/* whine about and ignore invalid blks */
if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
- pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n",
- nid, start, end);
+ pr_warning("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
+ nid, start, end - 1);
return 0;
}
@@ -210,8 +210,8 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
start = roundup(start, ZONE_ALIGN);
- printk(KERN_INFO "Initmem setup node %d %016Lx-%016Lx\n",
- nid, start, end);
+ printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
+ nid, start, end - 1);
/*
* Allocate node data. Try remap allocator first, node-local
@@ -232,7 +232,7 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
}
/* report and initialize */
- printk(KERN_INFO " NODE_DATA [%016Lx - %016Lx]%s\n",
+ printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]%s\n",
nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : "");
tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
if (!remapped && tnid != nid)
@@ -291,14 +291,14 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
*/
if (bi->end > bj->start && bi->start < bj->end) {
if (bi->nid != bj->nid) {
- pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n",
- bi->nid, bi->start, bi->end,
- bj->nid, bj->start, bj->end);
+ pr_err("NUMA: node %d [mem %#010Lx-%#010Lx] overlaps with node %d [mem %#010Lx-%#010Lx]\n",
+ bi->nid, bi->start, bi->end - 1,
+ bj->nid, bj->start, bj->end - 1);
return -EINVAL;
}
- pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n",
- bi->nid, bi->start, bi->end,
- bj->start, bj->end);
+ pr_warning("NUMA: Warning: node %d [mem %#010Lx-%#010Lx] overlaps with itself [mem %#010Lx-%#010Lx]\n",
+ bi->nid, bi->start, bi->end - 1,
+ bj->start, bj->end - 1);
}
/*
@@ -320,9 +320,9 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
}
if (k < mi->nr_blks)
continue;
- printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%Lx,%Lx)\n",
- bi->nid, bi->start, bi->end, bj->start, bj->end,
- start, end);
+ printk(KERN_INFO "NUMA: Node %d [mem %#010Lx-%#010Lx] + [mem %#010Lx-%#010Lx] -> [mem %#010Lx-%#010Lx]\n",
+ bi->nid, bi->start, bi->end - 1, bj->start,
+ bj->end - 1, start, end - 1);
bi->start = start;
bi->end = end;
numa_remove_memblk_from(j--, mi);
@@ -616,8 +616,8 @@ static int __init dummy_numa_init(void)
{
printk(KERN_INFO "%s\n",
numa_off ? "NUMA turned off" : "No NUMA configuration found");
- printk(KERN_INFO "Faking a node at %016Lx-%016Lx\n",
- 0LLU, PFN_PHYS(max_pfn));
+ printk(KERN_INFO "Faking a node at [mem %#018Lx-%#018Lx]\n",
+ 0LLU, PFN_PHYS(max_pfn) - 1);
node_set(0, numa_nodes_parsed);
numa_add_memblk(0, 0, PFN_PHYS(max_pfn));
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index 871dd8868170..dbbbb47260cc 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -68,8 +68,8 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei,
numa_remove_memblk_from(phys_blk, pi);
}
- printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid,
- eb->start, eb->end, (eb->end - eb->start) >> 20);
+ printk(KERN_INFO "Faking node %d at [mem %#018Lx-%#018Lx] (%LuMB)\n",
+ nid, eb->start, eb->end - 1, (eb->end - eb->start) >> 20);
return 0;
}
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index f6ff57b7efa5..3d68ef6d2266 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -158,31 +158,47 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type)
return req_type;
}
+struct pagerange_state {
+ unsigned long cur_pfn;
+ int ram;
+ int not_ram;
+};
+
+static int
+pagerange_is_ram_callback(unsigned long initial_pfn, unsigned long total_nr_pages, void *arg)
+{
+ struct pagerange_state *state = arg;
+
+ state->not_ram |= initial_pfn > state->cur_pfn;
+ state->ram |= total_nr_pages > 0;
+ state->cur_pfn = initial_pfn + total_nr_pages;
+
+ return state->ram && state->not_ram;
+}
+
static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end)
{
- int ram_page = 0, not_rampage = 0;
- unsigned long page_nr;
+ int ret = 0;
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long end_pfn = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ struct pagerange_state state = {start_pfn, 0, 0};
- for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
- ++page_nr) {
- /*
- * For legacy reasons, physical address range in the legacy ISA
- * region is tracked as non-RAM. This will allow users of
- * /dev/mem to map portions of legacy ISA region, even when
- * some of those portions are listed(or not even listed) with
- * different e820 types(RAM/reserved/..)
- */
- if (page_nr >= (ISA_END_ADDRESS >> PAGE_SHIFT) &&
- page_is_ram(page_nr))
- ram_page = 1;
- else
- not_rampage = 1;
-
- if (ram_page == not_rampage)
- return -1;
+ /*
+ * For legacy reasons, physical address range in the legacy ISA
+ * region is tracked as non-RAM. This will allow users of
+ * /dev/mem to map portions of legacy ISA region, even when
+ * some of those portions are listed(or not even listed) with
+ * different e820 types(RAM/reserved/..)
+ */
+ if (start_pfn < ISA_END_ADDRESS >> PAGE_SHIFT)
+ start_pfn = ISA_END_ADDRESS >> PAGE_SHIFT;
+
+ if (start_pfn < end_pfn) {
+ ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn,
+ &state, pagerange_is_ram_callback);
}
- return ram_page;
+ return (ret > 0) ? -1 : (state.ram ? 1 : 0);
}
/*
@@ -209,9 +225,8 @@ static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type,
page = pfn_to_page(pfn);
type = get_page_memtype(page);
if (type != -1) {
- printk(KERN_INFO "reserve_ram_pages_type failed "
- "0x%Lx-0x%Lx, track 0x%lx, req 0x%lx\n",
- start, end, type, req_type);
+ printk(KERN_INFO "reserve_ram_pages_type failed [mem %#010Lx-%#010Lx], track 0x%lx, req 0x%lx\n",
+ start, end - 1, type, req_type);
if (new_type)
*new_type = type;
@@ -314,9 +329,9 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
err = rbt_memtype_check_insert(new, new_type);
if (err) {
- printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, "
- "track %s, req %s\n",
- start, end, cattr_name(new->type), cattr_name(req_type));
+ printk(KERN_INFO "reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
+ start, end - 1,
+ cattr_name(new->type), cattr_name(req_type));
kfree(new);
spin_unlock(&memtype_lock);
@@ -325,8 +340,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
spin_unlock(&memtype_lock);
- dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
- start, end, cattr_name(new->type), cattr_name(req_type),
+ dprintk("reserve_memtype added [mem %#010Lx-%#010Lx], track %s, req %s, ret %s\n",
+ start, end - 1, cattr_name(new->type), cattr_name(req_type),
new_type ? cattr_name(*new_type) : "-");
return err;
@@ -360,14 +375,14 @@ int free_memtype(u64 start, u64 end)
spin_unlock(&memtype_lock);
if (!entry) {
- printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n",
- current->comm, current->pid, start, end);
+ printk(KERN_INFO "%s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
+ current->comm, current->pid, start, end - 1);
return -EINVAL;
}
kfree(entry);
- dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end);
+ dprintk("free_memtype request [mem %#010Lx-%#010Lx]\n", start, end - 1);
return 0;
}
@@ -491,9 +506,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
while (cursor < to) {
if (!devmem_is_allowed(pfn)) {
- printk(KERN_INFO
- "Program %s tried to access /dev/mem between %Lx->%Lx.\n",
- current->comm, from, to);
+ printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx]\n",
+ current->comm, from, to - 1);
return 0;
}
cursor += PAGE_SIZE;
@@ -554,12 +568,11 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags)
size;
if (ioremap_change_attr((unsigned long)__va(base), id_sz, flags) < 0) {
- printk(KERN_INFO
- "%s:%d ioremap_change_attr failed %s "
- "for %Lx-%Lx\n",
+ printk(KERN_INFO "%s:%d ioremap_change_attr failed %s "
+ "for [mem %#010Lx-%#010Lx]\n",
current->comm, current->pid,
cattr_name(flags),
- base, (unsigned long long)(base + size));
+ base, (unsigned long long)(base + size-1));
return -EINVAL;
}
return 0;
@@ -591,12 +604,11 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
flags = lookup_memtype(paddr);
if (want_flags != flags) {
- printk(KERN_WARNING
- "%s:%d map pfn RAM range req %s for %Lx-%Lx, got %s\n",
+ printk(KERN_WARNING "%s:%d map pfn RAM range req %s for [mem %#010Lx-%#010Lx], got %s\n",
current->comm, current->pid,
cattr_name(want_flags),
(unsigned long long)paddr,
- (unsigned long long)(paddr + size),
+ (unsigned long long)(paddr + size - 1),
cattr_name(flags));
*vma_prot = __pgprot((pgprot_val(*vma_prot) &
(~_PAGE_CACHE_MASK)) |
@@ -614,11 +626,11 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
!is_new_memtype_allowed(paddr, size, want_flags, flags)) {
free_memtype(paddr, paddr + size);
printk(KERN_ERR "%s:%d map pfn expected mapping type %s"
- " for %Lx-%Lx, got %s\n",
+ " for [mem %#010Lx-%#010Lx], got %s\n",
current->comm, current->pid,
cattr_name(want_flags),
(unsigned long long)paddr,
- (unsigned long long)(paddr + size),
+ (unsigned long long)(paddr + size - 1),
cattr_name(flags));
return -EINVAL;
}
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index efb5b4b93711..732af3a96183 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -176,8 +176,9 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
return;
}
- printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
- start, end);
+ printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n",
+ node, pxm,
+ (unsigned long long) start, (unsigned long long) end - 1);
}
void __init acpi_numa_arch_fixup(void) {}
diff --git a/arch/x86/realmode/Makefile b/arch/x86/realmode/Makefile
new file mode 100644
index 000000000000..94f7fbe97b08
--- /dev/null
+++ b/arch/x86/realmode/Makefile
@@ -0,0 +1,18 @@
+#
+# arch/x86/realmode/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+#
+
+subdir- := rm
+
+obj-y += init.o
+obj-y += rmpiggy.o
+
+$(obj)/rmpiggy.o: $(obj)/rm/realmode.bin
+
+$(obj)/rm/realmode.bin: FORCE
+ $(Q)$(MAKE) $(build)=$(obj)/rm $@
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
new file mode 100644
index 000000000000..cbca565af5bd
--- /dev/null
+++ b/arch/x86/realmode/init.c
@@ -0,0 +1,115 @@
+#include <linux/io.h>
+#include <linux/memblock.h>
+
+#include <asm/cacheflush.h>
+#include <asm/pgtable.h>
+#include <asm/realmode.h>
+
+struct real_mode_header *real_mode_header;
+u32 *trampoline_cr4_features;
+
+void __init setup_real_mode(void)
+{
+ phys_addr_t mem;
+ u16 real_mode_seg;
+ u32 *rel;
+ u32 count;
+ u32 *ptr;
+ u16 *seg;
+ int i;
+ unsigned char *base;
+ struct trampoline_header *trampoline_header;
+ size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
+#ifdef CONFIG_X86_64
+ u64 *trampoline_pgd;
+ u64 efer;
+#endif
+
+ /* Has to be in very low memory so we can execute real-mode AP code. */
+ mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
+ if (!mem)
+ panic("Cannot allocate trampoline\n");
+
+ base = __va(mem);
+ memblock_reserve(mem, size);
+ real_mode_header = (struct real_mode_header *) base;
+ printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
+ base, (unsigned long long)mem, size);
+
+ memcpy(base, real_mode_blob, size);
+
+ real_mode_seg = __pa(base) >> 4;
+ rel = (u32 *) real_mode_relocs;
+
+ /* 16-bit segment relocations. */
+ count = rel[0];
+ rel = &rel[1];
+ for (i = 0; i < count; i++) {
+ seg = (u16 *) (base + rel[i]);
+ *seg = real_mode_seg;
+ }
+
+ /* 32-bit linear relocations. */
+ count = rel[i];
+ rel = &rel[i + 1];
+ for (i = 0; i < count; i++) {
+ ptr = (u32 *) (base + rel[i]);
+ *ptr += __pa(base);
+ }
+
+ /* Must be perfomed *after* relocation. */
+ trampoline_header = (struct trampoline_header *)
+ __va(real_mode_header->trampoline_header);
+
+#ifdef CONFIG_X86_32
+ trampoline_header->start = __pa(startup_32_smp);
+ trampoline_header->gdt_limit = __BOOT_DS + 7;
+ trampoline_header->gdt_base = __pa(boot_gdt);
+#else
+ /*
+ * Some AMD processors will #GP(0) if EFER.LMA is set in WRMSR
+ * so we need to mask it out.
+ */
+ rdmsrl(MSR_EFER, efer);
+ trampoline_header->efer = efer & ~EFER_LMA;
+
+ trampoline_header->start = (u64) secondary_startup_64;
+ trampoline_cr4_features = &trampoline_header->cr4;
+ *trampoline_cr4_features = read_cr4();
+
+ trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
+ trampoline_pgd[0] = __pa(level3_ident_pgt) + _KERNPG_TABLE;
+ trampoline_pgd[511] = __pa(level3_kernel_pgt) + _KERNPG_TABLE;
+#endif
+}
+
+/*
+ * set_real_mode_permissions() gets called very early, to guarantee the
+ * availability of low memory. This is before the proper kernel page
+ * tables are set up, so we cannot set page permissions in that
+ * function. Thus, we use an arch_initcall instead.
+ */
+static int __init set_real_mode_permissions(void)
+{
+ unsigned char *base = (unsigned char *) real_mode_header;
+ size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
+
+ size_t ro_size =
+ PAGE_ALIGN(real_mode_header->ro_end) -
+ __pa(base);
+
+ size_t text_size =
+ PAGE_ALIGN(real_mode_header->ro_end) -
+ real_mode_header->text_start;
+
+ unsigned long text_start =
+ (unsigned long) __va(real_mode_header->text_start);
+
+ set_memory_nx((unsigned long) base, size >> PAGE_SHIFT);
+ set_memory_ro((unsigned long) base, ro_size >> PAGE_SHIFT);
+ set_memory_x((unsigned long) text_start, text_size >> PAGE_SHIFT);
+
+ return 0;
+}
+
+arch_initcall(set_real_mode_permissions);
diff --git a/arch/x86/realmode/rm/.gitignore b/arch/x86/realmode/rm/.gitignore
new file mode 100644
index 000000000000..b6ed3a2555cb
--- /dev/null
+++ b/arch/x86/realmode/rm/.gitignore
@@ -0,0 +1,3 @@
+pasyms.h
+realmode.lds
+realmode.relocs
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
new file mode 100644
index 000000000000..5b84a2d30888
--- /dev/null
+++ b/arch/x86/realmode/rm/Makefile
@@ -0,0 +1,82 @@
+#
+# arch/x86/realmode/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+#
+
+always := realmode.bin realmode.relocs
+
+wakeup-objs := wakeup_asm.o wakemain.o video-mode.o
+wakeup-objs += copy.o bioscall.o regs.o
+# The link order of the video-*.o modules can matter. In particular,
+# video-vga.o *must* be listed first, followed by video-vesa.o.
+# Hardware-specific drivers should follow in the order they should be
+# probed, and video-bios.o should typically be last.
+wakeup-objs += video-vga.o
+wakeup-objs += video-vesa.o
+wakeup-objs += video-bios.o
+
+realmode-y += header.o
+realmode-y += trampoline_$(BITS).o
+realmode-y += stack.o
+realmode-$(CONFIG_X86_32) += reboot_32.o
+realmode-$(CONFIG_ACPI_SLEEP) += $(wakeup-objs)
+
+targets += $(realmode-y)
+
+REALMODE_OBJS = $(addprefix $(obj)/,$(realmode-y))
+
+sed-pasyms := -n -r -e 's/^([0-9a-fA-F]+) [ABCDGRSTVW] (.+)$$/pa_\2 = \2;/p'
+
+quiet_cmd_pasyms = PASYMS $@
+ cmd_pasyms = $(NM) $(filter-out FORCE,$^) | \
+ sed $(sed-pasyms) | sort | uniq > $@
+
+targets += pasyms.h
+$(obj)/pasyms.h: $(REALMODE_OBJS) FORCE
+ $(call if_changed,pasyms)
+
+targets += realmode.lds
+$(obj)/realmode.lds: $(obj)/pasyms.h
+
+LDFLAGS_realmode.elf := --emit-relocs -T
+CPPFLAGS_realmode.lds += -P -C -I$(obj)
+
+targets += realmode.elf
+$(obj)/realmode.elf: $(obj)/realmode.lds $(REALMODE_OBJS) FORCE
+ $(call if_changed,ld)
+
+OBJCOPYFLAGS_realmode.bin := -O binary
+
+targets += realmode.bin
+$(obj)/realmode.bin: $(obj)/realmode.elf $(obj)/realmode.relocs
+ $(call if_changed,objcopy)
+
+quiet_cmd_relocs = RELOCS $@
+ cmd_relocs = arch/x86/tools/relocs --realmode $< > $@
+
+targets += realmode.relocs
+$(obj)/realmode.relocs: $(obj)/realmode.elf FORCE
+ $(call if_changed,relocs)
+
+# ---------------------------------------------------------------------------
+
+# How to compile the 16-bit code. Note we always compile for -march=i386,
+# that way we can complain to the user if the CPU is insufficient.
+KBUILD_CFLAGS := $(LINUXINCLUDE) -m32 -g -Os -D_SETUP -D__KERNEL__ -D_WAKEUP \
+ -I$(srctree)/arch/x86/boot \
+ -DDISABLE_BRANCH_PROFILING \
+ -Wall -Wstrict-prototypes \
+ -march=i386 -mregparm=3 \
+ -include $(srctree)/$(src)/../../boot/code16gcc.h \
+ -fno-strict-aliasing -fomit-frame-pointer \
+ $(call cc-option, -ffreestanding) \
+ $(call cc-option, -fno-toplevel-reorder,\
+ $(call cc-option, -fno-unit-at-a-time)) \
+ $(call cc-option, -fno-stack-protector) \
+ $(call cc-option, -mpreferred-stack-boundary=2)
+KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
+GCOV_PROFILE := n
diff --git a/arch/x86/realmode/rm/bioscall.S b/arch/x86/realmode/rm/bioscall.S
new file mode 100644
index 000000000000..16162d197918
--- /dev/null
+++ b/arch/x86/realmode/rm/bioscall.S
@@ -0,0 +1 @@
+#include "../../boot/bioscall.S"
diff --git a/arch/x86/realmode/rm/copy.S b/arch/x86/realmode/rm/copy.S
new file mode 100644
index 000000000000..b785e6f38fdd
--- /dev/null
+++ b/arch/x86/realmode/rm/copy.S
@@ -0,0 +1 @@
+#include "../../boot/copy.S"
diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S
new file mode 100644
index 000000000000..fadf48378ada
--- /dev/null
+++ b/arch/x86/realmode/rm/header.S
@@ -0,0 +1,41 @@
+/*
+ * Real-mode blob header; this should match realmode.h and be
+ * readonly; for mutable data instead add pointers into the .data
+ * or .bss sections as appropriate.
+ */
+
+#include <linux/linkage.h>
+#include <asm/page_types.h>
+
+#include "realmode.h"
+
+ .section ".header", "a"
+
+ .balign 16
+GLOBAL(real_mode_header)
+ .long pa_text_start
+ .long pa_ro_end
+ /* SMP trampoline */
+ .long pa_trampoline_start
+ .long pa_trampoline_status
+ .long pa_trampoline_header
+#ifdef CONFIG_X86_64
+ .long pa_trampoline_pgd;
+#endif
+ /* ACPI S3 wakeup */
+#ifdef CONFIG_ACPI_SLEEP
+ .long pa_wakeup_start
+ .long pa_wakeup_header
+#endif
+ /* APM/BIOS reboot */
+#ifdef CONFIG_X86_32
+ .long pa_machine_real_restart_asm
+#endif
+END(real_mode_header)
+
+ /* End signature, used to verify integrity */
+ .section ".signature","a"
+ .balign 4
+GLOBAL(end_signature)
+ .long REALMODE_END_SIGNATURE
+END(end_signature)
diff --git a/arch/x86/realmode/rm/realmode.h b/arch/x86/realmode/rm/realmode.h
new file mode 100644
index 000000000000..d74cff6350ed
--- /dev/null
+++ b/arch/x86/realmode/rm/realmode.h
@@ -0,0 +1,21 @@
+#ifndef ARCH_X86_REALMODE_RM_REALMODE_H
+#define ARCH_X86_REALMODE_RM_REALMODE_H
+
+#ifdef __ASSEMBLY__
+
+/*
+ * 16-bit ljmpw to the real_mode_seg
+ *
+ * This must be open-coded since gas will choke on using a
+ * relocatable symbol for the segment portion.
+ */
+#define LJMPW_RM(to) .byte 0xea ; .word (to), real_mode_seg
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * Signature at the end of the realmode region
+ */
+#define REALMODE_END_SIGNATURE 0x65a22c82
+
+#endif /* ARCH_X86_REALMODE_RM_REALMODE_H */
diff --git a/arch/x86/realmode/rm/realmode.lds.S b/arch/x86/realmode/rm/realmode.lds.S
new file mode 100644
index 000000000000..86b2e8d6b1f1
--- /dev/null
+++ b/arch/x86/realmode/rm/realmode.lds.S
@@ -0,0 +1,76 @@
+/*
+ * realmode.lds.S
+ *
+ * Linker script for the real-mode code
+ */
+
+#include <asm/page_types.h>
+
+#undef i386
+
+OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+OUTPUT_ARCH(i386)
+
+SECTIONS
+{
+ real_mode_seg = 0;
+
+ . = 0;
+ .header : {
+ pa_real_mode_base = .;
+ *(.header)
+ }
+
+ . = ALIGN(4);
+ .rodata : {
+ *(.rodata)
+ *(.rodata.*)
+ . = ALIGN(16);
+ video_cards = .;
+ *(.videocards)
+ video_cards_end = .;
+ }
+
+ . = ALIGN(PAGE_SIZE);
+ pa_text_start = .;
+ .text : {
+ *(.text)
+ *(.text.*)
+ }
+
+ .text32 : {
+ *(.text32)
+ *(.text32.*)
+ }
+
+ .text64 : {
+ *(.text64)
+ *(.text64.*)
+ }
+ pa_ro_end = .;
+
+ . = ALIGN(PAGE_SIZE);
+ .data : {
+ *(.data)
+ *(.data.*)
+ }
+
+ . = ALIGN(128);
+ .bss : {
+ *(.bss*)
+ }
+
+ /* End signature for integrity checking */
+ . = ALIGN(4);
+ .signature : {
+ *(.signature)
+ }
+
+ /DISCARD/ : {
+ *(.note*)
+ *(.debug*)
+ *(.eh_frame*)
+ }
+
+#include "pasyms.h"
+}
diff --git a/arch/x86/kernel/reboot_32.S b/arch/x86/realmode/rm/reboot_32.S
index 1d5c46df0d78..114044876b3d 100644
--- a/arch/x86/kernel/reboot_32.S
+++ b/arch/x86/realmode/rm/reboot_32.S
@@ -2,6 +2,7 @@
#include <linux/init.h>
#include <asm/segment.h>
#include <asm/page_types.h>
+#include "realmode.h"
/*
* The following code and data reboots the machine by switching to real
@@ -13,34 +14,20 @@
*
* This code is called with the restart type (0 = BIOS, 1 = APM) in %eax.
*/
- .section ".x86_trampoline","a"
- .balign 16
+ .section ".text32", "ax"
.code32
-ENTRY(machine_real_restart_asm)
-r_base = .
- /* Get our own relocated address */
- call 1f
-1: popl %ebx
- subl $(1b - r_base), %ebx
-
- /* Compute the equivalent real-mode segment */
- movl %ebx, %ecx
- shrl $4, %ecx
-
- /* Patch post-real-mode segment jump */
- movw (dispatch_table - r_base)(%ebx,%eax,2),%ax
- movw %ax, (101f - r_base)(%ebx)
- movw %cx, (102f - r_base)(%ebx)
+ .balign 16
+ENTRY(machine_real_restart_asm)
/* Set up the IDT for real mode. */
- lidtl (machine_real_restart_idt - r_base)(%ebx)
+ lidtl pa_machine_real_restart_idt
/*
* Set up a GDT from which we can load segment descriptors for real
* mode. The GDT is not used in real mode; it is just needed here to
* prepare the descriptors.
*/
- lgdtl (machine_real_restart_gdt - r_base)(%ebx)
+ lgdtl pa_machine_real_restart_gdt
/*
* Load the data segment registers with 16-bit compatible values
@@ -51,7 +38,7 @@ r_base = .
movl %ecx, %fs
movl %ecx, %gs
movl %ecx, %ss
- ljmpl $8, $1f - r_base
+ ljmpw $8, $1f
/*
* This is 16-bit protected mode code to disable paging and the cache,
@@ -76,27 +63,29 @@ r_base = .
*
* Most of this work is probably excessive, but it is what is tested.
*/
+ .text
.code16
+
+ .balign 16
+machine_real_restart_asm16:
1:
xorl %ecx, %ecx
- movl %cr0, %eax
- andl $0x00000011, %eax
- orl $0x60000000, %eax
- movl %eax, %cr0
+ movl %cr0, %edx
+ andl $0x00000011, %edx
+ orl $0x60000000, %edx
+ movl %edx, %cr0
movl %ecx, %cr3
movl %cr0, %edx
- andl $0x60000000, %edx /* If no cache bits -> no wbinvd */
+ testl $0x60000000, %edx /* If no cache bits -> no wbinvd */
jz 2f
wbinvd
2:
- andb $0x10, %al
- movl %eax, %cr0
- .byte 0xea /* ljmpw */
-101: .word 0 /* Offset */
-102: .word 0 /* Segment */
-
-bios:
- ljmpw $0xf000, $0xfff0
+ andb $0x10, %dl
+ movl %edx, %cr0
+ LJMPW_RM(3f)
+3:
+ andw %ax, %ax
+ jz bios
apm:
movw $0x1000, %ax
@@ -106,26 +95,34 @@ apm:
movw $0x0001, %bx
movw $0x0003, %cx
int $0x15
+ /* This should never return... */
-END(machine_real_restart_asm)
+bios:
+ ljmpw $0xf000, $0xfff0
- .balign 16
- /* These must match <asm/reboot.h */
-dispatch_table:
- .word bios - r_base
- .word apm - r_base
-END(dispatch_table)
+ .section ".rodata", "a"
- .balign 16
-machine_real_restart_idt:
+ .balign 16
+GLOBAL(machine_real_restart_idt)
.word 0xffff /* Length - real mode default value */
.long 0 /* Base - real mode default value */
END(machine_real_restart_idt)
- .balign 16
-ENTRY(machine_real_restart_gdt)
- .quad 0 /* Self-pointer, filled in by PM code */
- .quad 0 /* 16-bit code segment, filled in by PM code */
+ .balign 16
+GLOBAL(machine_real_restart_gdt)
+ /* Self-pointer */
+ .word 0xffff /* Length - real mode default value */
+ .long pa_machine_real_restart_gdt
+ .word 0
+
+ /*
+ * 16-bit code segment pointing to real_mode_seg
+ * Selector value 8
+ */
+ .word 0xffff /* Limit */
+ .long 0x9b000000 + pa_real_mode_base
+ .word 0
+
/*
* 16-bit data segment with the selector value 16 = 0x10 and
* base value 0x100; since this is consistent with real mode
diff --git a/arch/x86/realmode/rm/regs.c b/arch/x86/realmode/rm/regs.c
new file mode 100644
index 000000000000..fbb15b9f9ca9
--- /dev/null
+++ b/arch/x86/realmode/rm/regs.c
@@ -0,0 +1 @@
+#include "../../boot/regs.c"
diff --git a/arch/x86/realmode/rm/stack.S b/arch/x86/realmode/rm/stack.S
new file mode 100644
index 000000000000..867ae87adfae
--- /dev/null
+++ b/arch/x86/realmode/rm/stack.S
@@ -0,0 +1,19 @@
+/*
+ * Common heap and stack allocations
+ */
+
+#include <linux/linkage.h>
+
+ .data
+GLOBAL(HEAP)
+ .long rm_heap
+GLOBAL(heap_end)
+ .long rm_stack
+
+ .bss
+ .balign 16
+GLOBAL(rm_heap)
+ .space 2048
+GLOBAL(rm_stack)
+ .space 2048
+GLOBAL(rm_stack_end)
diff --git a/arch/x86/realmode/rm/trampoline_32.S b/arch/x86/realmode/rm/trampoline_32.S
new file mode 100644
index 000000000000..c1b2791183e7
--- /dev/null
+++ b/arch/x86/realmode/rm/trampoline_32.S
@@ -0,0 +1,74 @@
+/*
+ *
+ * Trampoline.S Derived from Setup.S by Linus Torvalds
+ *
+ * 4 Jan 1997 Michael Chastain: changed to gnu as.
+ *
+ * This is only used for booting secondary CPUs in SMP machine
+ *
+ * Entry: CS:IP point to the start of our code, we are
+ * in real mode with no stack, but the rest of the
+ * trampoline page to make our stack and everything else
+ * is a mystery.
+ *
+ * We jump into arch/x86/kernel/head_32.S.
+ *
+ * On entry to trampoline_start, the processor is in real mode
+ * with 16-bit addressing and 16-bit data. CS has some value
+ * and IP is zero. Thus, we load CS to the physical segment
+ * of the real mode code before doing anything further.
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/segment.h>
+#include <asm/page_types.h>
+#include "realmode.h"
+
+ .text
+ .code16
+
+ .balign PAGE_SIZE
+ENTRY(trampoline_start)
+ wbinvd # Needed for NUMA-Q should be harmless for others
+
+ LJMPW_RM(1f)
+1:
+ mov %cs, %ax # Code and data in the same place
+ mov %ax, %ds
+
+ cli # We should be safe anyway
+
+ movl tr_start, %eax # where we need to go
+
+ movl $0xA5A5A5A5, trampoline_status
+ # write marker for master knows we're running
+
+ /*
+ * GDT tables in non default location kernel can be beyond 16MB and
+ * lgdt will not be able to load the address as in real mode default
+ * operand size is 16bit. Use lgdtl instead to force operand size
+ * to 32 bit.
+ */
+ lidtl tr_idt # load idt with 0, 0
+ lgdtl tr_gdt # load gdt with whatever is appropriate
+
+ movw $1, %dx # protected mode (PE) bit
+ lmsw %dx # into protected mode
+
+ ljmpl $__BOOT_CS, $pa_startup_32
+
+ .section ".text32","ax"
+ .code32
+ENTRY(startup_32) # note: also used from wakeup_asm.S
+ jmp *%eax
+
+ .bss
+ .balign 8
+GLOBAL(trampoline_header)
+ tr_start: .space 4
+ tr_gdt_pad: .space 2
+ tr_gdt: .space 6
+END(trampoline_header)
+
+#include "trampoline_common.S"
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index 09ff51799e96..bb360dc39d21 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -5,12 +5,12 @@
* 4 Jan 1997 Michael Chastain: changed to gnu as.
* 15 Sept 2005 Eric Biederman: 64bit PIC support
*
- * Entry: CS:IP point to the start of our code, we are
- * in real mode with no stack, but the rest of the
+ * Entry: CS:IP point to the start of our code, we are
+ * in real mode with no stack, but the rest of the
* trampoline page to make our stack and everything else
* is a mystery.
*
- * On entry to trampoline_data, the processor is in real mode
+ * On entry to trampoline_start, the processor is in real mode
* with 16-bit addressing and 16-bit data. CS has some value
* and IP is zero. Thus, data addresses need to be absolute
* (no relocation) and are taken with regard to r_base.
@@ -31,43 +31,33 @@
#include <asm/msr.h>
#include <asm/segment.h>
#include <asm/processor-flags.h>
+#include "realmode.h"
- .section ".x86_trampoline","a"
- .balign PAGE_SIZE
+ .text
.code16
-ENTRY(trampoline_data)
-r_base = .
+ .balign PAGE_SIZE
+ENTRY(trampoline_start)
cli # We should be safe anyway
wbinvd
+
+ LJMPW_RM(1f)
+1:
mov %cs, %ax # Code and data in the same place
mov %ax, %ds
mov %ax, %es
mov %ax, %ss
+ movl $0xA5A5A5A5, trampoline_status
+ # write marker for master knows we're running
- movl $0xA5A5A5A5, trampoline_status - r_base
- # write marker for master knows we're running
-
- # Setup stack
- movw $(trampoline_stack_end - r_base), %sp
+ # Setup stack
+ movl $rm_stack_end, %esp
call verify_cpu # Verify the cpu supports long mode
testl %eax, %eax # Check for return code
jnz no_longmode
- mov %cs, %ax
- movzx %ax, %esi # Find the 32bit trampoline location
- shll $4, %esi
-
- # Fixup the absolute vectors
- leal (startup_32 - r_base)(%esi), %eax
- movl %eax, startup_32_vector - r_base
- leal (startup_64 - r_base)(%esi), %eax
- movl %eax, startup_64_vector - r_base
- leal (tgdt - r_base)(%esi), %eax
- movl %eax, (tgdt + 2 - r_base)
-
/*
* GDT tables in non default location kernel can be beyond 16MB and
* lgdt will not be able to load the address as in real mode default
@@ -75,36 +65,49 @@ r_base = .
* to 32 bit.
*/
- lidtl tidt - r_base # load idt with 0, 0
- lgdtl tgdt - r_base # load gdt with whatever is appropriate
+ lidtl tr_idt # load idt with 0, 0
+ lgdtl tr_gdt # load gdt with whatever is appropriate
+
+ movw $__KERNEL_DS, %dx # Data segment descriptor
- mov $X86_CR0_PE, %ax # protected mode (PE) bit
- lmsw %ax # into protected mode
+ # Enable protected mode
+ movl $X86_CR0_PE, %eax # protected mode (PE) bit
+ movl %eax, %cr0 # into protected mode
# flush prefetch and jump to startup_32
- ljmpl *(startup_32_vector - r_base)
+ ljmpl $__KERNEL32_CS, $pa_startup_32
+no_longmode:
+ hlt
+ jmp no_longmode
+#include "../kernel/verify_cpu.S"
+
+ .section ".text32","ax"
.code32
.balign 4
-startup_32:
- movl $__KERNEL_DS, %eax # Initialize the %ds segment register
- movl %eax, %ds
-
- movl $X86_CR4_PAE, %eax
+ENTRY(startup_32)
+ movl %edx, %ss
+ addl $pa_real_mode_base, %esp
+ movl %edx, %ds
+ movl %edx, %es
+ movl %edx, %fs
+ movl %edx, %gs
+
+ movl pa_tr_cr4, %eax
movl %eax, %cr4 # Enable PAE mode
- # Setup trampoline 4 level pagetables
- leal (trampoline_level4_pgt - r_base)(%esi), %eax
+ # Setup trampoline 4 level pagetables
+ movl $pa_trampoline_pgd, %eax
movl %eax, %cr3
+ # Set up EFER
+ movl pa_tr_efer, %eax
+ movl pa_tr_efer + 4, %edx
movl $MSR_EFER, %ecx
- movl $(1 << _EFER_LME), %eax # Enable Long Mode
- xorl %edx, %edx
wrmsr
# Enable paging and in turn activate Long Mode
- # Enable protected mode
- movl $(X86_CR0_PG | X86_CR0_PE), %eax
+ movl $(X86_CR0_PG | X86_CR0_WP | X86_CR0_PE), %eax
movl %eax, %cr0
/*
@@ -113,59 +116,38 @@ startup_32:
* EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use
* the new gdt/idt that has __KERNEL_CS with CS.L = 1.
*/
- ljmp *(startup_64_vector - r_base)(%esi)
+ ljmpl $__KERNEL_CS, $pa_startup_64
+ .section ".text64","ax"
.code64
.balign 4
-startup_64:
+ENTRY(startup_64)
# Now jump into the kernel using virtual addresses
- movq $secondary_startup_64, %rax
- jmp *%rax
-
- .code16
-no_longmode:
- hlt
- jmp no_longmode
-#include "verify_cpu.S"
-
- .balign 4
- # Careful these need to be in the same 64K segment as the above;
-tidt:
- .word 0 # idt limit = 0
- .word 0, 0 # idt base = 0L
+ jmpq *tr_start(%rip)
+ .section ".rodata","a"
# Duplicate the global descriptor table
# so the kernel can live anywhere
- .balign 4
-tgdt:
- .short tgdt_end - tgdt # gdt limit
- .long tgdt - r_base
- .short 0
+ .balign 16
+ .globl tr_gdt
+tr_gdt:
+ .short tr_gdt_end - tr_gdt - 1 # gdt limit
+ .long pa_tr_gdt
+ .short 0
.quad 0x00cf9b000000ffff # __KERNEL32_CS
.quad 0x00af9b000000ffff # __KERNEL_CS
.quad 0x00cf93000000ffff # __KERNEL_DS
-tgdt_end:
+tr_gdt_end:
- .balign 4
-startup_32_vector:
- .long startup_32 - r_base
- .word __KERNEL32_CS, 0
+ .bss
+ .balign PAGE_SIZE
+GLOBAL(trampoline_pgd) .space PAGE_SIZE
- .balign 4
-startup_64_vector:
- .long startup_64 - r_base
- .word __KERNEL_CS, 0
+ .balign 8
+GLOBAL(trampoline_header)
+ tr_start: .space 8
+ GLOBAL(tr_efer) .space 8
+ GLOBAL(tr_cr4) .space 4
+END(trampoline_header)
- .balign 4
-ENTRY(trampoline_status)
- .long 0
-
-trampoline_stack:
- .org 0x1000
-trampoline_stack_end:
-ENTRY(trampoline_level4_pgt)
- .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
- .fill 510,8,0
- .quad level3_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
-
-ENTRY(trampoline_end)
+#include "trampoline_common.S"
diff --git a/arch/x86/realmode/rm/trampoline_common.S b/arch/x86/realmode/rm/trampoline_common.S
new file mode 100644
index 000000000000..b1ecdb9692ad
--- /dev/null
+++ b/arch/x86/realmode/rm/trampoline_common.S
@@ -0,0 +1,7 @@
+ .section ".rodata","a"
+ .balign 16
+tr_idt: .fill 1, 6, 0
+
+ .bss
+ .balign 4
+GLOBAL(trampoline_status) .space 4
diff --git a/arch/x86/realmode/rm/video-bios.c b/arch/x86/realmode/rm/video-bios.c
new file mode 100644
index 000000000000..848b25aaf11b
--- /dev/null
+++ b/arch/x86/realmode/rm/video-bios.c
@@ -0,0 +1 @@
+#include "../../boot/video-bios.c"
diff --git a/arch/x86/realmode/rm/video-mode.c b/arch/x86/realmode/rm/video-mode.c
new file mode 100644
index 000000000000..2a98b7e2368b
--- /dev/null
+++ b/arch/x86/realmode/rm/video-mode.c
@@ -0,0 +1 @@
+#include "../../boot/video-mode.c"
diff --git a/arch/x86/realmode/rm/video-vesa.c b/arch/x86/realmode/rm/video-vesa.c
new file mode 100644
index 000000000000..413edddb51e5
--- /dev/null
+++ b/arch/x86/realmode/rm/video-vesa.c
@@ -0,0 +1 @@
+#include "../../boot/video-vesa.c"
diff --git a/arch/x86/realmode/rm/video-vga.c b/arch/x86/realmode/rm/video-vga.c
new file mode 100644
index 000000000000..3085f5c9d288
--- /dev/null
+++ b/arch/x86/realmode/rm/video-vga.c
@@ -0,0 +1 @@
+#include "../../boot/video-vga.c"
diff --git a/arch/x86/kernel/acpi/realmode/wakemain.c b/arch/x86/realmode/rm/wakemain.c
index 883962d9eef2..91405d515ec6 100644
--- a/arch/x86/kernel/acpi/realmode/wakemain.c
+++ b/arch/x86/realmode/rm/wakemain.c
@@ -65,7 +65,8 @@ void main(void)
{
/* Kill machine if structures are wrong */
if (wakeup_header.real_magic != 0x12345678)
- while (1);
+ while (1)
+ ;
if (wakeup_header.realmode_flags & 4)
send_morse("...-");
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.h b/arch/x86/realmode/rm/wakeup.h
index 97a29e1430e3..9317e0042f24 100644
--- a/arch/x86/kernel/acpi/realmode/wakeup.h
+++ b/arch/x86/realmode/rm/wakeup.h
@@ -12,9 +12,8 @@
/* This must match data at wakeup.S */
struct wakeup_header {
u16 video_mode; /* Video mode number */
- u16 _jmp1; /* ljmpl opcode, 32-bit only */
u32 pmode_entry; /* Protected mode resume point, 32-bit only */
- u16 _jmp2; /* CS value, 32-bit only */
+ u16 pmode_cs;
u32 pmode_cr0; /* Protected mode cr0 */
u32 pmode_cr3; /* Protected mode cr3 */
u32 pmode_cr4; /* Protected mode cr4 */
@@ -26,12 +25,6 @@ struct wakeup_header {
u32 pmode_behavior; /* Wakeup routine behavior flags */
u32 realmode_flags;
u32 real_magic;
- u16 trampoline_segment; /* segment with trampoline code, 64-bit only */
- u8 _pad1;
- u8 wakeup_jmp;
- u16 wakeup_jmp_off;
- u16 wakeup_jmp_seg;
- u64 wakeup_gdt[3];
u32 signature; /* To check we have correct structure */
} __attribute__((__packed__));
@@ -40,7 +33,6 @@ extern struct wakeup_header wakeup_header;
#define WAKEUP_HEADER_OFFSET 8
#define WAKEUP_HEADER_SIGNATURE 0x51ee1111
-#define WAKEUP_END_SIGNATURE 0x65a22c82
/* Wakeup behavior bits */
#define WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE 0
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/realmode/rm/wakeup_asm.S
index b4fd836e4053..8905166b0bbb 100644
--- a/arch/x86/kernel/acpi/realmode/wakeup.S
+++ b/arch/x86/realmode/rm/wakeup_asm.S
@@ -1,50 +1,47 @@
/*
* ACPI wakeup real mode startup stub
*/
+#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/msr-index.h>
#include <asm/page_types.h>
#include <asm/pgtable_types.h>
#include <asm/processor-flags.h>
+#include "realmode.h"
#include "wakeup.h"
.code16
- .section ".jump", "ax"
- .globl _start
-_start:
- cli
- jmp wakeup_code
/* This should match the structure in wakeup.h */
- .section ".header", "a"
- .globl wakeup_header
-wakeup_header:
-video_mode: .short 0 /* Video mode number */
-pmode_return: .byte 0x66, 0xea /* ljmpl */
- .long 0 /* offset goes here */
- .short __KERNEL_CS
-pmode_cr0: .long 0 /* Saved %cr0 */
-pmode_cr3: .long 0 /* Saved %cr3 */
-pmode_cr4: .long 0 /* Saved %cr4 */
-pmode_efer: .quad 0 /* Saved EFER */
-pmode_gdt: .quad 0
-pmode_misc_en: .quad 0 /* Saved MISC_ENABLE MSR */
-pmode_behavior: .long 0 /* Wakeup behavior flags */
-realmode_flags: .long 0
-real_magic: .long 0
-trampoline_segment: .word 0
-_pad1: .byte 0
-wakeup_jmp: .byte 0xea /* ljmpw */
-wakeup_jmp_off: .word 3f
-wakeup_jmp_seg: .word 0
-wakeup_gdt: .quad 0, 0, 0
-signature: .long WAKEUP_HEADER_SIGNATURE
+ .section ".data", "aw"
+
+ .balign 16
+GLOBAL(wakeup_header)
+ video_mode: .short 0 /* Video mode number */
+ pmode_entry: .long 0
+ pmode_cs: .short __KERNEL_CS
+ pmode_cr0: .long 0 /* Saved %cr0 */
+ pmode_cr3: .long 0 /* Saved %cr3 */
+ pmode_cr4: .long 0 /* Saved %cr4 */
+ pmode_efer: .quad 0 /* Saved EFER */
+ pmode_gdt: .quad 0
+ pmode_misc_en: .quad 0 /* Saved MISC_ENABLE MSR */
+ pmode_behavior: .long 0 /* Wakeup behavior flags */
+ realmode_flags: .long 0
+ real_magic: .long 0
+ signature: .long WAKEUP_HEADER_SIGNATURE
+END(wakeup_header)
.text
.code16
-wakeup_code:
+
+ .balign 16
+ENTRY(wakeup_start)
+ cli
cld
+ LJMPW_RM(3f)
+3:
/* Apparently some dimwit BIOS programmers don't know how to
program a PM to RM transition, and we might end up here with
junk in the data segment descriptor registers. The only way
@@ -54,8 +51,7 @@ wakeup_code:
movl %cr0, %eax
orb $X86_CR0_PE, %al
movl %eax, %cr0
- jmp 1f
-1: ljmpw $8, $2f
+ ljmpw $8, $2f
2:
movw %cx, %ds
movw %cx, %es
@@ -65,16 +61,18 @@ wakeup_code:
andb $~X86_CR0_PE, %al
movl %eax, %cr0
- jmp wakeup_jmp
+ LJMPW_RM(3f)
3:
/* Set up segments */
movw %cs, %ax
+ movw %ax, %ss
+ movl $rm_stack_end, %esp
movw %ax, %ds
movw %ax, %es
- movw %ax, %ss
- lidtl wakeup_idt
+ movw %ax, %fs
+ movw %ax, %gs
- movl $wakeup_stack_end, %esp
+ lidtl wakeup_idt
/* Clear the EFLAGS */
pushl $0
@@ -87,7 +85,7 @@ wakeup_code:
/* Check we really have everything... */
movl end_signature, %eax
- cmpl $WAKEUP_END_SIGNATURE, %eax
+ cmpl $REALMODE_END_SIGNATURE, %eax
jne bogus_real_magic
/* Call the C code */
@@ -128,14 +126,13 @@ wakeup_code:
lgdtl pmode_gdt
/* This really couldn't... */
- movl pmode_cr0, %eax
- movl %eax, %cr0
- jmp pmode_return
+ movl pmode_entry, %eax
+ movl pmode_cr0, %ecx
+ movl %ecx, %cr0
+ ljmpl $__KERNEL_CS, $pa_startup_32
+ /* -> jmp *%eax in trampoline_32.S */
#else
- pushw $0
- pushw trampoline_segment
- pushw $0
- lret
+ jmp trampoline_start
#endif
bogus_real_magic:
@@ -143,28 +140,38 @@ bogus_real_magic:
hlt
jmp 1b
- .data
+ .section ".rodata","a"
+
+ /*
+ * Set up the wakeup GDT. We set these up as Big Real Mode,
+ * that is, with limits set to 4 GB. At least the Lenovo
+ * Thinkpad X61 is known to need this for the video BIOS
+ * initialization quirk to work; this is likely to also
+ * be the case for other laptops or integrated video devices.
+ */
+
+ .balign 16
+GLOBAL(wakeup_gdt)
+ .word 3*8-1 /* Self-descriptor */
+ .long pa_wakeup_gdt
+ .word 0
+
+ .word 0xffff /* 16-bit code segment @ real_mode_base */
+ .long 0x9b000000 + pa_real_mode_base
+ .word 0x008f /* big real mode */
+
+ .word 0xffff /* 16-bit data segment @ real_mode_base */
+ .long 0x93000000 + pa_real_mode_base
+ .word 0x008f /* big real mode */
+END(wakeup_gdt)
+
+ .section ".rodata","a"
.balign 8
/* This is the standard real-mode IDT */
-wakeup_idt:
+ .balign 16
+GLOBAL(wakeup_idt)
.word 0xffff /* limit */
.long 0 /* address */
.word 0
-
- .globl HEAP, heap_end
-HEAP:
- .long wakeup_heap
-heap_end:
- .long wakeup_stack
-
- .bss
-wakeup_heap:
- .space 2048
-wakeup_stack:
- .space 2048
-wakeup_stack_end:
-
- .section ".signature","a"
-end_signature:
- .long WAKEUP_END_SIGNATURE
+END(wakeup_idt)
diff --git a/arch/x86/realmode/rmpiggy.S b/arch/x86/realmode/rmpiggy.S
new file mode 100644
index 000000000000..204c6ece0e97
--- /dev/null
+++ b/arch/x86/realmode/rmpiggy.S
@@ -0,0 +1,20 @@
+/*
+ * Wrapper script for the realmode binary as a transport object
+ * before copying to low memory.
+ */
+#include <linux/linkage.h>
+#include <asm/page_types.h>
+
+ .section ".init.data","aw"
+
+ .balign PAGE_SIZE
+
+GLOBAL(real_mode_blob)
+ .incbin "arch/x86/realmode/rm/realmode.bin"
+END(real_mode_blob)
+
+GLOBAL(real_mode_blob_end);
+
+GLOBAL(real_mode_relocs)
+ .incbin "arch/x86/realmode/rm/realmode.relocs"
+END(real_mode_relocs)
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index 29f9f0554f7d..7a35a6e71d44 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -355,3 +355,4 @@
346 i386 setns sys_setns
347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
+349 i386 kcmp sys_kcmp
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index dd29a9ea27c5..51171aeff0dc 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -318,6 +318,8 @@
309 common getcpu sys_getcpu
310 64 process_vm_readv sys_process_vm_readv
311 64 process_vm_writev sys_process_vm_writev
+312 64 kcmp sys_kcmp
+
#
# x32-specific system call numbers start at 512 to avoid cache impact
# for native 64-bit operation.
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index b685296d4464..5a1847d61930 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -78,6 +78,13 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = {
static const char * const sym_regex_realmode[S_NSYMTYPES] = {
/*
+ * These symbols are known to be relative, even if the linker marks them
+ * as absolute (typically defined outside any section in the linker script.)
+ */
+ [S_REL] =
+ "^pa_",
+
+/*
* These are 16-bit segment symbols when compiling 16-bit code.
*/
[S_SEG] =
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index bb0fb03b9f85..a508cea13503 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -486,7 +486,6 @@ long sys_sigreturn(struct pt_regs *regs)
copy_from_user(&set.sig[1], extramask, sig_size))
goto segfault;
- sigdelsetmask(&set, ~_BLOCKABLE);
set_current_blocked(&set);
if (copy_sc_from_user(&current->thread.regs, sc))
@@ -600,7 +599,6 @@ long sys_rt_sigreturn(struct pt_regs *regs)
if (copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
goto segfault;
- sigdelsetmask(&set, ~_BLOCKABLE);
set_current_blocked(&set);
if (copy_sc_from_user(&current->thread.regs, &uc->uc_mcontext))
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 75f33b2a5933..e74df9548a02 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1116,7 +1116,10 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.wbinvd = native_wbinvd,
.read_msr = native_read_msr_safe,
+ .rdmsr_regs = native_rdmsr_safe_regs,
.write_msr = xen_write_msr_safe,
+ .wrmsr_regs = native_wrmsr_safe_regs,
+
.read_tsc = native_read_tsc,
.read_pmc = native_read_pmc,