summaryrefslogtreecommitdiffstats
path: root/linux-2.6-execshield.patch
diff options
context:
space:
mode:
authorJesse Keating <jkeating@redhat.com>2010-07-29 17:18:45 -0700
committerJesse Keating <jkeating@redhat.com>2010-07-29 17:18:45 -0700
commit2f82dda4a9bf41e64e864889bf06564bdf826e25 (patch)
tree118a7b483ae5de4dbf83d20001302f1404866ef0 /linux-2.6-execshield.patch
parent64ba2e5ffde5f2418eb26c700cb0ab62b04e5013 (diff)
downloaddom0-kernel-2f82dda4a9bf41e64e864889bf06564bdf826e25.tar.gz
dom0-kernel-2f82dda4a9bf41e64e864889bf06564bdf826e25.tar.xz
dom0-kernel-2f82dda4a9bf41e64e864889bf06564bdf826e25.zip
initial srpm import
Diffstat (limited to 'linux-2.6-execshield.patch')
-rw-r--r--linux-2.6-execshield.patch987
1 files changed, 987 insertions, 0 deletions
diff --git a/linux-2.6-execshield.patch b/linux-2.6-execshield.patch
new file mode 100644
index 0000000..61e444f
--- /dev/null
+++ b/linux-2.6-execshield.patch
@@ -0,0 +1,987 @@
+diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
+index e8de2f6..538c2b6 100644
+--- a/arch/x86/include/asm/desc.h
++++ b/arch/x86/include/asm/desc.h
+@@ -5,6 +5,7 @@
+ #include <asm/ldt.h>
+ #include <asm/mmu.h>
+ #include <linux/smp.h>
++#include <linux/mm_types.h>
+
+ static inline void fill_ldt(struct desc_struct *desc,
+ const struct user_desc *info)
+@@ -93,6 +94,9 @@ static inline int desc_empty(const void *ptr)
+
+ #define load_TLS(t, cpu) native_load_tls(t, cpu)
+ #define set_ldt native_set_ldt
++#ifdef CONFIG_X86_32
++#define load_user_cs_desc native_load_user_cs_desc
++#endif /*CONFIG_X86_32*/
+
+ #define write_ldt_entry(dt, entry, desc) \
+ native_write_ldt_entry(dt, entry, desc)
+@@ -392,4 +396,25 @@ static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
+ _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
+ }
+
++#ifdef CONFIG_X86_32
++static inline void set_user_cs(struct desc_struct *desc, unsigned long limit)
++{
++ limit = (limit - 1) / PAGE_SIZE;
++ desc->a = limit & 0xffff;
++ desc->b = (limit & 0xf0000) | 0x00c0fb00;
++}
++
++static inline void native_load_user_cs_desc(int cpu, struct mm_struct *mm)
++{
++ get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS] = (mm)->context.user_cs;
++}
++
++#define arch_add_exec_range arch_add_exec_range
++#define arch_remove_exec_range arch_remove_exec_range
++#define arch_flush_exec_range arch_flush_exec_range
++extern void arch_add_exec_range(struct mm_struct *mm, unsigned long limit);
++extern void arch_remove_exec_range(struct mm_struct *mm, unsigned long limit);
++extern void arch_flush_exec_range(struct mm_struct *mm);
++#endif /* CONFIG_X86_32 */
++
+ #endif /* _ASM_X86_DESC_H */
+diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
+index 80a1dee..8314c66 100644
+--- a/arch/x86/include/asm/mmu.h
++++ b/arch/x86/include/asm/mmu.h
+@@ -7,12 +7,19 @@
+ /*
+ * The x86 doesn't have a mmu context, but
+ * we put the segment information here.
++ *
++ * exec_limit is used to track the range PROT_EXEC
++ * mappings span.
+ */
+ typedef struct {
+ void *ldt;
+ int size;
+ struct mutex lock;
+ void *vdso;
++#ifdef CONFIG_X86_32
++ struct desc_struct user_cs;
++ unsigned long exec_limit;
++#endif
+ } mm_context_t;
+
+ #ifdef CONFIG_SMP
+diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
+index 8aebcc4..cbbd2b0 100644
+--- a/arch/x86/include/asm/paravirt.h
++++ b/arch/x86/include/asm/paravirt.h
+@@ -289,6 +289,12 @@ static inline void set_ldt(const void *addr, unsigned entries)
+ {
+ PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
+ }
++#ifdef CONFIG_X86_32
++static inline void load_user_cs_desc(unsigned int cpu, struct mm_struct *mm)
++{
++ PVOP_VCALL2(pv_cpu_ops.load_user_cs_desc, cpu, mm);
++}
++#endif /*CONFIG_X86_32*/
+ static inline void store_gdt(struct desc_ptr *dtr)
+ {
+ PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr);
+diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
+index dd0f5b3..c2727ef 100644
+--- a/arch/x86/include/asm/paravirt_types.h
++++ b/arch/x86/include/asm/paravirt_types.h
+@@ -118,6 +118,9 @@ struct pv_cpu_ops {
+ void (*store_gdt)(struct desc_ptr *);
+ void (*store_idt)(struct desc_ptr *);
+ void (*set_ldt)(const void *desc, unsigned entries);
++#ifdef CONFIG_X86_32
++ void (*load_user_cs_desc)(int cpu, struct mm_struct *mm);
++#endif
+ unsigned long (*store_tr)(void);
+ void (*load_tls)(struct thread_struct *t, unsigned int cpu);
+ #ifdef CONFIG_X86_64
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
+index c3429e8..62cc460 100644
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -161,6 +161,9 @@ static inline int hlt_works(int cpu)
+
+ #define cache_line_size() (boot_cpu_data.x86_cache_alignment)
+
++#define __HAVE_ARCH_ALIGN_STACK
++extern unsigned long arch_align_stack(unsigned long sp);
++
+ extern void cpu_detect(struct cpuinfo_x86 *c);
+
+ extern struct pt_regs *idle_regs(struct pt_regs *);
+diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
+index cc25c2b..6ce4863 100644
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -798,6 +798,20 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
+ /* Filter out anything that depends on CPUID levels we don't have */
+ filter_cpuid_features(c, true);
+
++ /*
++ * emulation of NX with segment limits unfortunately means
++ * we have to disable the fast system calls, due to the way that
++ * sysexit clears the segment limits on return.
++ * If we have either disabled exec-shield on the boot command line,
++ * or we have NX, then we don't need to do this.
++ */
++ if (exec_shield != 0) {
++#ifdef CONFIG_X86_PAE
++ if (!test_cpu_cap(c, X86_FEATURE_NX))
++#endif
++ clear_cpu_cap(c, X86_FEATURE_SEP);
++ }
++
+ /* If the model name is still unset, do table lookup. */
+ if (!c->x86_model_id[0]) {
+ const char *p;
+diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
+index 1b1739d..c2dda16 100644
+--- a/arch/x86/kernel/paravirt.c
++++ b/arch/x86/kernel/paravirt.c
+@@ -345,6 +345,9 @@ struct pv_cpu_ops pv_cpu_ops = {
+ .read_tscp = native_read_tscp,
+ .load_tr_desc = native_load_tr_desc,
+ .set_ldt = native_set_ldt,
++#ifdef CONFIG_X86_32
++ .load_user_cs_desc = native_load_user_cs_desc,
++#endif /*CONFIG_X86_32*/
+ .load_gdt = native_load_gdt,
+ .load_idt = native_load_idt,
+ .store_gdt = native_store_gdt,
+diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
+index 4cf7956..b2407dc 100644
+--- a/arch/x86/kernel/process_32.c
++++ b/arch/x86/kernel/process_32.c
+@@ -296,7 +296,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
+ void
+ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
+ {
++ int cpu;
++
+ set_user_gs(regs, 0);
++
+ regs->fs = 0;
+ set_fs(USER_DS);
+ regs->ds = __USER_DS;
+@@ -305,6 +308,11 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
+ regs->cs = __USER_CS;
+ regs->ip = new_ip;
+ regs->sp = new_sp;
++
++ cpu = get_cpu();
++ load_user_cs_desc(cpu, current->mm);
++ put_cpu();
++
+ /*
+ * Free the old FP and other extended state
+ */
+@@ -364,6 +372,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+ if (preload_fpu)
+ prefetch(next->xstate);
+
++ if (next_p->mm)
++ load_user_cs_desc(cpu, next_p->mm);
++
+ /*
+ * Reload esp0.
+ */
+@@ -497,3 +508,40 @@ unsigned long get_wchan(struct task_struct *p)
+ return 0;
+ }
+
++static void modify_cs(struct mm_struct *mm, unsigned long limit)
++{
++ mm->context.exec_limit = limit;
++ set_user_cs(&mm->context.user_cs, limit);
++ if (mm == current->mm) {
++ int cpu;
++
++ cpu = get_cpu();
++ load_user_cs_desc(cpu, mm);
++ put_cpu();
++ }
++}
++
++void arch_add_exec_range(struct mm_struct *mm, unsigned long limit)
++{
++ if (limit > mm->context.exec_limit)
++ modify_cs(mm, limit);
++}
++
++void arch_remove_exec_range(struct mm_struct *mm, unsigned long old_end)
++{
++ struct vm_area_struct *vma;
++ unsigned long limit = PAGE_SIZE;
++
++ if (old_end == mm->context.exec_limit) {
++ for (vma = mm->mmap; vma; vma = vma->vm_next)
++ if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
++ limit = vma->vm_end;
++ modify_cs(mm, limit);
++ }
++}
++
++void arch_flush_exec_range(struct mm_struct *mm)
++{
++ mm->context.exec_limit = 0;
++ set_user_cs(&mm->context.user_cs, 0);
++}
+diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
+index 7e37dce..92ae538 100644
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -115,6 +115,76 @@ die_if_kernel(const char *str, struct pt_regs *regs, long err)
+ if (!user_mode_vm(regs))
+ die(str, regs, err);
+ }
++
++static inline int
++__compare_user_cs_desc(const struct desc_struct *desc1,
++ const struct desc_struct *desc2)
++{
++ return ((desc1->limit0 != desc2->limit0) ||
++ (desc1->limit != desc2->limit) ||
++ (desc1->base0 != desc2->base0) ||
++ (desc1->base1 != desc2->base1) ||
++ (desc1->base2 != desc2->base2));
++}
++
++/*
++ * lazy-check for CS validity on exec-shield binaries:
++ *
++ * the original non-exec stack patch was written by
++ * Solar Designer <solar at openwall.com>. Thanks!
++ */
++static int
++check_lazy_exec_limit(int cpu, struct pt_regs *regs, long error_code)
++{
++ struct desc_struct *desc1, *desc2;
++ struct vm_area_struct *vma;
++ unsigned long limit;
++
++ if (current->mm == NULL)
++ return 0;
++
++ limit = -1UL;
++ if (current->mm->context.exec_limit != -1UL) {
++ limit = PAGE_SIZE;
++ spin_lock(&current->mm->page_table_lock);
++ for (vma = current->mm->mmap; vma; vma = vma->vm_next)
++ if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
++ limit = vma->vm_end;
++ vma = get_gate_vma(current);
++ if (vma && (vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
++ limit = vma->vm_end;
++ spin_unlock(&current->mm->page_table_lock);
++ if (limit >= TASK_SIZE)
++ limit = -1UL;
++ current->mm->context.exec_limit = limit;
++ }
++ set_user_cs(&current->mm->context.user_cs, limit);
++
++ desc1 = &current->mm->context.user_cs;
++ desc2 = get_cpu_gdt_table(cpu) + GDT_ENTRY_DEFAULT_USER_CS;
++
++ if (__compare_user_cs_desc(desc1, desc2)) {
++ /*
++ * The CS was not in sync - reload it and retry the
++ * instruction. If the instruction still faults then
++ * we won't hit this branch next time around.
++ */
++ if (print_fatal_signals >= 2) {
++ printk(KERN_ERR "#GPF fixup (%ld[seg:%lx]) at %08lx, CPU#%d.\n",
++ error_code, error_code/8, regs->ip,
++ smp_processor_id());
++ printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x, CPU_cs: %08x/%08x.\n",
++ current->mm->context.exec_limit,
++ desc1->a, desc1->b, desc2->a, desc2->b);
++ }
++
++ load_user_cs_desc(cpu, current->mm);
++
++ return 1;
++ }
++
++ return 0;
++}
+ #endif
+
+ static void __kprobes
+@@ -273,6 +343,29 @@ do_general_protection(struct pt_regs *regs, long error_code)
+ if (!user_mode(regs))
+ goto gp_in_kernel;
+
++#ifdef CONFIG_X86_32
++{
++ int cpu;
++ int ok;
++
++ cpu = get_cpu();
++ ok = check_lazy_exec_limit(cpu, regs, error_code);
++ put_cpu();
++
++ if (ok)
++ return;
++
++ if (print_fatal_signals) {
++ printk(KERN_ERR "#GPF(%ld[seg:%lx]) at %08lx, CPU#%d.\n",
++ error_code, error_code/8, regs->ip, smp_processor_id());
++ printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x.\n",
++ current->mm->context.exec_limit,
++ current->mm->context.user_cs.a,
++ current->mm->context.user_cs.b);
++ }
++}
++#endif /*CONFIG_X86_32*/
++
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = 13;
+
+@@ -881,19 +974,37 @@ do_device_not_available(struct pt_regs *regs, long error_code)
+ }
+
+ #ifdef CONFIG_X86_32
++/*
++ * The fixup code for errors in iret jumps to here (iret_exc). It loses
++ * the original trap number and erorr code. The bogus trap 32 and error
++ * code 0 are what the vanilla kernel delivers via:
++ * DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1)
++ *
++ * NOTE: Because of the final "1" in the macro we need to enable interrupts.
++ *
++ * In case of a general protection fault in the iret instruction, we
++ * need to check for a lazy CS update for exec-shield.
++ */
+ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
+ {
+- siginfo_t info;
++ int ok;
++ int cpu;
++
+ local_irq_enable();
+
+- info.si_signo = SIGILL;
+- info.si_errno = 0;
+- info.si_code = ILL_BADSTK;
+- info.si_addr = NULL;
+- if (notify_die(DIE_TRAP, "iret exception",
+- regs, error_code, 32, SIGILL) == NOTIFY_STOP)
+- return;
+- do_trap(32, SIGILL, "iret exception", regs, error_code, &info);
++ cpu = get_cpu();
++ ok = check_lazy_exec_limit(cpu, regs, error_code);
++ put_cpu();
++
++ if (!ok && notify_die(DIE_TRAP, "iret exception", regs,
++ error_code, 32, SIGSEGV) != NOTIFY_STOP) {
++ siginfo_t info;
++ info.si_signo = SIGSEGV;
++ info.si_errno = 0;
++ info.si_code = ILL_BADSTK;
++ info.si_addr = 0;
++ do_trap(32, SIGSEGV, "iret exception", regs, error_code, &info);
++ }
+ }
+ #endif
+
+diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
+index 73ffd55..0cf2a7b 100644
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -149,6 +149,12 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
+ set_nx();
+ if (nx_enabled)
+ printk(KERN_INFO "NX (Execute Disable) protection: active\n");
++#ifdef CONFIG_X86_32
++ else
++ if (exec_shield)
++ printk(KERN_INFO "Using x86 segment limits to approximate "
++ "NX protection\n");
++#endif
+
+ /* Enable PSE if available */
+ if (cpu_has_pse)
+diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
+index c8191de..7d84d01 100644
+--- a/arch/x86/mm/mmap.c
++++ b/arch/x86/mm/mmap.c
+@@ -124,13 +124,16 @@ static unsigned long mmap_legacy_base(void)
+ */
+ void arch_pick_mmap_layout(struct mm_struct *mm)
+ {
+- if (mmap_is_legacy()) {
++ if (!(2 & exec_shield) && mmap_is_legacy()) {
+ mm->mmap_base = mmap_legacy_base();
+ mm->get_unmapped_area = arch_get_unmapped_area;
+ mm->unmap_area = arch_unmap_area;
+ } else {
+ mm->mmap_base = mmap_base();
+ mm->get_unmapped_area = arch_get_unmapped_area_topdown;
++ if (!(current->personality & READ_IMPLIES_EXEC)
++ && mmap_is_ia32())
++ mm->get_unmapped_exec_area = arch_get_unmapped_exec_area;
+ mm->unmap_area = arch_unmap_area_topdown;
+ }
+ }
+diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
+index 513d8ed..c614a90 100644
+--- a/arch/x86/mm/setup_nx.c
++++ b/arch/x86/mm/setup_nx.c
+@@ -1,3 +1,4 @@
++#include <linux/sched.h>
+ #include <linux/spinlock.h>
+ #include <linux/errno.h>
+ #include <linux/init.h>
+@@ -27,6 +28,9 @@ static int __init noexec_setup(char *str)
+ } else if (!strncmp(str, "off", 3)) {
+ disable_nx = 1;
+ __supported_pte_mask &= ~_PAGE_NX;
++#ifdef CONFIG_X86_32
++ exec_shield = 0;
++#endif
+ }
+ return 0;
+ }
+diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
+index 36fe08e..3806a45 100644
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -6,6 +6,7 @@
+ #include <linux/interrupt.h>
+ #include <linux/module.h>
+
++#include <asm/desc.h>
+ #include <asm/tlbflush.h>
+ #include <asm/mmu_context.h>
+ #include <asm/apic.h>
+@@ -130,6 +131,12 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
+ union smp_flush_state *f;
+
+ cpu = smp_processor_id();
++
++#ifdef CONFIG_X86_32
++ if (current->active_mm)
++ load_user_cs_desc(cpu, current->active_mm);
++#endif
++
+ /*
+ * orig_rax contains the negated interrupt vector.
+ * Use that to determine where the sender put the data.
+diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
+index 58bc00f..1fdafb5 100644
+--- a/arch/x86/vdso/vdso32-setup.c
++++ b/arch/x86/vdso/vdso32-setup.c
+@@ -331,7 +331,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+ if (compat)
+ addr = VDSO_HIGH_BASE;
+ else {
+- addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
++ addr = get_unmapped_area_prot(NULL, 0, PAGE_SIZE, 0, 0, 1);
+ if (IS_ERR_VALUE(addr)) {
+ ret = addr;
+ goto up_fail;
+diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
+index 3439616..31e5c6f 100644
+--- a/arch/x86/xen/enlighten.c
++++ b/arch/x86/xen/enlighten.c
+@@ -323,6 +323,24 @@ static void xen_set_ldt(const void *addr, unsigned entries)
+ xen_mc_issue(PARAVIRT_LAZY_CPU);
+ }
+
++#ifdef CONFIG_X86_32
++static void xen_load_user_cs_desc(int cpu, struct mm_struct *mm)
++{
++ void *gdt;
++ xmaddr_t mgdt;
++ u64 descriptor;
++ struct desc_struct user_cs;
++
++ gdt = &get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS];
++ mgdt = virt_to_machine(gdt);
++
++ user_cs = mm->context.user_cs;
++ descriptor = (u64) user_cs.a | ((u64) user_cs.b) << 32;
++
++ HYPERVISOR_update_descriptor(mgdt.maddr, descriptor);
++}
++#endif /*CONFIG_X86_32*/
++
+ static void xen_load_gdt(const struct desc_ptr *dtr)
+ {
+ unsigned long va = dtr->address;
+@@ -949,6 +967,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
+
+ .load_tr_desc = paravirt_nop,
+ .set_ldt = xen_set_ldt,
++#ifdef CONFIG_X86_32
++ .load_user_cs_desc = xen_load_user_cs_desc,
++#endif /*CONFIG_X86_32*/
+ .load_gdt = xen_load_gdt,
+ .load_idt = xen_load_idt,
+ .load_tls = xen_load_tls,
+diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
+index b9b3bb5..1e55926 100644
+--- a/fs/binfmt_elf.c
++++ b/fs/binfmt_elf.c
+@@ -73,7 +73,7 @@ static struct linux_binfmt elf_format = {
+ .hasvdso = 1
+ };
+
+-#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
++#define BAD_ADDR(x) IS_ERR_VALUE(x)
+
+ static int set_brk(unsigned long start, unsigned long end)
+ {
+@@ -721,6 +721,11 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
+ break;
+ }
+
++ if (current->personality == PER_LINUX && (exec_shield & 2)) {
++ executable_stack = EXSTACK_DISABLE_X;
++ current->flags |= PF_RANDOMIZE;
++ }
++
+ /* Some simple consistency checks for the interpreter */
+ if (elf_interpreter) {
+ retval = -ELIBBAD;
+@@ -740,6 +745,15 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
+ if (retval)
+ goto out_free_dentry;
+
++#ifdef CONFIG_X86_32
++ /*
++ * Turn off the CS limit completely if exec-shield disabled or
++ * NX active:
++ */
++ if (!exec_shield || executable_stack != EXSTACK_DISABLE_X || nx_enabled)
++ arch_add_exec_range(current->mm, -1);
++#endif
++
+ /* OK, This is the point of no return */
+ current->flags &= ~PF_FORKNOEXEC;
+ current->mm->def_flags = def_flags;
+@@ -747,7 +761,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
+ /* Do this immediately, since STACK_TOP as used in setup_arg_pages
+ may depend on the personality. */
+ SET_PERSONALITY(loc->elf_ex);
+- if (elf_read_implies_exec(loc->elf_ex, executable_stack))
++ if (!(exec_shield & 2) &&
++ elf_read_implies_exec(loc->elf_ex, executable_stack))
+ current->personality |= READ_IMPLIES_EXEC;
+
+ if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+@@ -912,7 +927,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
+ interpreter,
+ &interp_map_addr,
+ load_bias);
+- if (!IS_ERR((void *)elf_entry)) {
++ if (!BAD_ADDR(elf_entry)) {
+ /*
+ * load_elf_interp() returns relocation
+ * adjustment
+diff --git a/include/linux/mm.h b/include/linux/mm.h
+index 24c3956..88f944d 100644
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1129,7 +1129,13 @@ extern int install_special_mapping(struct mm_struct *mm,
+ unsigned long addr, unsigned long len,
+ unsigned long flags, struct page **pages);
+
+-extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
++extern unsigned long get_unmapped_area_prot(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, int);
++
++static inline unsigned long get_unmapped_area(struct file *file, unsigned long addr,
++ unsigned long len, unsigned long pgoff, unsigned long flags)
++{
++ return get_unmapped_area_prot(file, addr, len, pgoff, flags, 0);
++}
+
+ extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long prot,
+diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
+index 84a524a..a81e0db 100644
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -206,6 +206,9 @@ struct mm_struct {
+ unsigned long (*get_unmapped_area) (struct file *filp,
+ unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags);
++ unsigned long (*get_unmapped_exec_area) (struct file *filp,
++ unsigned long addr, unsigned long len,
++ unsigned long pgoff, unsigned long flags);
+ void (*unmap_area) (struct mm_struct *mm, unsigned long addr);
+ unsigned long mmap_base; /* base of mmap area */
+ unsigned long task_size; /* size of task vm space */
+diff --git a/include/linux/resource.h b/include/linux/resource.h
+index 40fc7e6..68c2549 100644
+--- a/include/linux/resource.h
++++ b/include/linux/resource.h
+@@ -55,8 +55,11 @@ struct rlimit {
+ /*
+ * Limit the stack by to some sane default: root can always
+ * increase this limit if needed.. 8MB seems reasonable.
++ *
++ * (2MB more to cover randomization effects.)
+ */
+-#define _STK_LIM (8*1024*1024)
++#define _STK_LIM (10*1024*1024)
++#define EXEC_STACK_BIAS (2*1024*1024)
+
+ /*
+ * GPG2 wants 64kB of mlocked memory, to make sure pass phrases
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 75e6e60..0bce489 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -102,6 +102,9 @@ struct fs_struct;
+ struct bts_context;
+ struct perf_event_context;
+
++extern int exec_shield;
++extern int print_fatal_signals;
++
+ /*
+ * List of flags we want to share for kernel threads,
+ * if only because they are not used by them anyway.
+@@ -378,6 +381,10 @@ extern int sysctl_max_map_count;
+ extern unsigned long
+ arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
+ unsigned long, unsigned long);
++
++extern unsigned long
++arch_get_unmapped_exec_area(struct file *, unsigned long, unsigned long,
++ unsigned long, unsigned long);
+ extern unsigned long
+ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+diff --git a/kernel/sysctl.c b/kernel/sysctl.c
+index 0d949c5..12ca319 100644
+--- a/kernel/sysctl.c
++++ b/kernel/sysctl.c
+@@ -88,6 +88,26 @@ extern int sysctl_nr_open_min, sysctl_nr_open_max;
+ #ifndef CONFIG_MMU
+ extern int sysctl_nr_trim_pages;
+ #endif
++
++int exec_shield = (1<<0);
++/* exec_shield is a bitmask:
++ * 0: off; vdso at STACK_TOP, 1 page below TASK_SIZE
++ * (1<<0) 1: on [also on if !=0]
++ * (1<<1) 2: force noexecstack regardless of PT_GNU_STACK
++ * The old settings
++ * (1<<2) 4: vdso just below .text of main (unless too low)
++ * (1<<3) 8: vdso just below .text of PT_INTERP (unless too low)
++ * are ignored because the vdso is placed completely randomly
++ */
++
++static int __init setup_exec_shield(char *str)
++{
++ get_option(&str, &exec_shield);
++
++ return 1;
++}
++__setup("exec-shield=", setup_exec_shield);
++
+ #ifdef CONFIG_RCU_TORTURE_TEST
+ extern int rcutorture_runnable;
+ #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
+@@ -408,6 +428,14 @@ static struct ctl_table kern_table[] = {
+ .proc_handler = &proc_dointvec,
+ },
+ {
++ .ctl_name = CTL_UNNUMBERED,
++ .procname = "exec-shield",
++ .data = &exec_shield,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec,
++ },
++ {
+ .ctl_name = KERN_CORE_USES_PID,
+ .procname = "core_uses_pid",
+ .data = &core_uses_pid,
+diff --git a/mm/mmap.c b/mm/mmap.c
+index 73f5e4b..814b95f 100644
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -29,6 +29,7 @@
+ #include <linux/rmap.h>
+ #include <linux/mmu_notifier.h>
+ #include <linux/perf_event.h>
++#include <linux/random.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/cacheflush.h>
+@@ -45,6 +46,18 @@
+ #define arch_rebalance_pgtables(addr, len) (addr)
+ #endif
+
++/* No sane architecture will #define these to anything else */
++#ifndef arch_add_exec_range
++#define arch_add_exec_range(mm, limit) do { ; } while (0)
++#endif
++#ifndef arch_flush_exec_range
++#define arch_flush_exec_range(mm) do { ; } while (0)
++#endif
++#ifndef arch_remove_exec_range
++#define arch_remove_exec_range(mm, limit) do { ; } while (0)
++#endif
++
++
+ static void unmap_region(struct mm_struct *mm,
+ struct vm_area_struct *vma, struct vm_area_struct *prev,
+ unsigned long start, unsigned long end);
+@@ -389,6 +402,8 @@ static inline void
+ __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
+ struct vm_area_struct *prev, struct rb_node *rb_parent)
+ {
++ if (vma->vm_flags & VM_EXEC)
++ arch_add_exec_range(mm, vma->vm_end);
+ if (prev) {
+ vma->vm_next = prev->vm_next;
+ prev->vm_next = vma;
+@@ -491,6 +506,8 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
+ rb_erase(&vma->vm_rb, &mm->mm_rb);
+ if (mm->mmap_cache == vma)
+ mm->mmap_cache = prev;
++ if (vma->vm_flags & VM_EXEC)
++ arch_remove_exec_range(mm, vma->vm_end);
+ }
+
+ /*
+@@ -798,6 +815,8 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
+ } else /* cases 2, 5, 7 */
+ vma_adjust(prev, prev->vm_start,
+ end, prev->vm_pgoff, NULL);
++ if (prev->vm_flags & VM_EXEC)
++ arch_add_exec_range(mm, prev->vm_end);
+ return prev;
+ }
+
+@@ -970,7 +989,8 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+ /* Obtain the address to map to. we verify (or select) it and ensure
+ * that it represents a valid section of the address space.
+ */
+- addr = get_unmapped_area(file, addr, len, pgoff, flags);
++ addr = get_unmapped_area_prot(file, addr, len, pgoff, flags,
++ prot & PROT_EXEC);
+ if (addr & ~PAGE_MASK)
+ return addr;
+
+@@ -1453,21 +1473,25 @@ void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr)
+ }
+
+ unsigned long
+-get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
+- unsigned long pgoff, unsigned long flags)
++get_unmapped_area_prot(struct file *file, unsigned long addr, unsigned long len,
++ unsigned long pgoff, unsigned long flags, int exec)
+ {
+ unsigned long (*get_area)(struct file *, unsigned long,
+ unsigned long, unsigned long, unsigned long);
+
+ unsigned long error = arch_mmap_check(addr, len, flags);
+ if (error)
+ return error;
+
+ /* Careful about overflows.. */
+ if (len > TASK_SIZE)
+ return -ENOMEM;
+
+- get_area = current->mm->get_unmapped_area;
++ if (exec && current->mm->get_unmapped_exec_area)
++ get_area = current->mm->get_unmapped_exec_area;
++ else
++ get_area = current->mm->get_unmapped_area;
++
+ if (file && file->f_op && file->f_op->get_unmapped_area)
+ get_area = file->f_op->get_unmapped_area;
+ addr = get_area(file, addr, len, pgoff, flags);
+@@ -1473,8 +1497,76 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
+
+ return arch_rebalance_pgtables(addr, len);
+ }
++EXPORT_SYMBOL(get_unmapped_area_prot);
++
++#define SHLIB_BASE 0x00110000
++
++unsigned long
++arch_get_unmapped_exec_area(struct file *filp, unsigned long addr0,
++ unsigned long len0, unsigned long pgoff, unsigned long flags)
++{
++ unsigned long addr = addr0, len = len0;
++ struct mm_struct *mm = current->mm;
++ struct vm_area_struct *vma;
++ unsigned long tmp;
++
++ if (len > TASK_SIZE)
++ return -ENOMEM;
++
++ if (flags & MAP_FIXED)
++ return addr;
++
++ if (!addr)
++ addr = randomize_range(SHLIB_BASE, 0x01000000, len);
++
++ if (addr) {
++ addr = PAGE_ALIGN(addr);
++ vma = find_vma(mm, addr);
++ if (TASK_SIZE - len >= addr &&
++ (!vma || addr + len <= vma->vm_start))
++ return addr;
++ }
++
++ addr = SHLIB_BASE;
++ for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
++ /* At this point: (!vma || addr < vma->vm_end). */
++ if (TASK_SIZE - len < addr)
++ return -ENOMEM;
++
++ if (!vma || addr + len <= vma->vm_start) {
++ /*
++ * Must not let a PROT_EXEC mapping get into the
++ * brk area:
++ */
++ if (addr + len > mm->brk)
++ goto failed;
++
++ /*
++ * Up until the brk area we randomize addresses
++ * as much as possible:
++ */
++ if (addr >= 0x01000000) {
++ tmp = randomize_range(0x01000000,
++ PAGE_ALIGN(max(mm->start_brk,
++ (unsigned long)0x08000000)), len);
++ vma = find_vma(mm, tmp);
++ if (TASK_SIZE - len >= tmp &&
++ (!vma || tmp + len <= vma->vm_start))
++ return tmp;
++ }
++ /*
++ * Ok, randomization didnt work out - return
++ * the result of the linear search:
++ */
++ return addr;
++ }
++ addr = vma->vm_end;
++ }
++
++failed:
++ return current->mm->get_unmapped_area(filp, addr0, len0, pgoff, flags);
++}
+
+-EXPORT_SYMBOL(get_unmapped_area);
+
+ /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
+ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
+@@ -1549,6 +1641,14 @@ out:
+ return prev ? prev->vm_next : vma;
+ }
+
++static int over_stack_limit(unsigned long sz)
++{
++ if (sz < EXEC_STACK_BIAS)
++ return 0;
++ return (sz - EXEC_STACK_BIAS) >
++ current->signal->rlim[RLIMIT_STACK].rlim_cur;
++}
++
+ /*
+ * Verify that the stack growth is acceptable and
+ * update accounting. This is shared with both the
+@@ -1565,7 +1665,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
+ return -ENOMEM;
+
+ /* Stack limit test */
+- if (size > rlim[RLIMIT_STACK].rlim_cur)
++ if (over_stack_limit(size))
+ return -ENOMEM;
+
+ /* mlock limit tests */
+@@ -1875,10 +1975,14 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
+ if (new->vm_ops && new->vm_ops->open)
+ new->vm_ops->open(new);
+
+- if (new_below)
++ if (new_below) {
++ unsigned long old_end = vma->vm_end;
++
+ vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
+ ((addr - new->vm_start) >> PAGE_SHIFT), new);
+- else
++ if (vma->vm_flags & VM_EXEC)
++ arch_remove_exec_range(mm, old_end);
++ } else
+ vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
+
+ return 0;
+@@ -2128,6 +2232,7 @@ void exit_mmap(struct mm_struct *mm)
+
+ free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
+ tlb_finish_mmu(tlb, 0, end);
++ arch_flush_exec_range(mm);
+
+ /*
+ * Walk the list again, actually closing and freeing it,
+diff --git a/mm/mprotect.c b/mm/mprotect.c
+index 8bc969d..3c9b4fc 100644
+--- a/mm/mprotect.c
++++ b/mm/mprotect.c
+@@ -26,9 +26,14 @@
+ #include <linux/perf_event.h>
+ #include <asm/uaccess.h>
+ #include <asm/pgtable.h>
++#include <asm/pgalloc.h>
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
+
++#ifndef arch_remove_exec_range
++#define arch_remove_exec_range(mm, limit) do { ; } while (0)
++#endif
++
+ #ifndef pgprot_modify
+ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
+ {
+@@ -139,7 +144,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long oldflags = vma->vm_flags;
+ long nrpages = (end - start) >> PAGE_SHIFT;
+- unsigned long charged = 0;
++ unsigned long charged = 0, old_end = vma->vm_end;
+ pgoff_t pgoff;
+ int error;
+ int dirty_accountable = 0;
+@@ -204,6 +209,9 @@ success:
+ dirty_accountable = 1;
+ }
+
++ if (oldflags & VM_EXEC)
++ arch_remove_exec_range(current->mm, old_end);
++
+ mmu_notifier_invalidate_range_start(mm, start, end);
+ if (is_vm_hugetlb_page(vma))
+ hugetlb_change_protection(vma, start, end, vma->vm_page_prot);
+diff --git a/mm/mremap.c b/mm/mremap.c
+index 97bff25..17a9fd7 100644
+--- a/mm/mremap.c
++++ b/mm/mremap.c
+@@ -414,10 +414,10 @@ unsigned long do_mremap(unsigned long addr,
+ if (vma->vm_flags & VM_MAYSHARE)
+ map_flags |= MAP_SHARED;
+
+- new_addr = get_unmapped_area(vma->vm_file, 0, new_len,
++ new_addr = get_unmapped_area_prot(vma->vm_file, 0, new_len,
+ vma->vm_pgoff +
+ ((addr - vma->vm_start) >> PAGE_SHIFT),
+- map_flags);
++ map_flags, vma->vm_flags & VM_EXEC);
+ if (new_addr & ~PAGE_MASK) {
+ ret = new_addr;
+ goto out;