diff options
author | Jesse Keating <jkeating@redhat.com> | 2010-07-29 17:18:45 -0700 |
---|---|---|
committer | Jesse Keating <jkeating@redhat.com> | 2010-07-29 17:18:45 -0700 |
commit | 2f82dda4a9bf41e64e864889bf06564bdf826e25 (patch) | |
tree | 118a7b483ae5de4dbf83d20001302f1404866ef0 /linux-2.6-execshield.patch | |
parent | 64ba2e5ffde5f2418eb26c700cb0ab62b04e5013 (diff) | |
download | dom0-kernel-2f82dda4a9bf41e64e864889bf06564bdf826e25.tar.gz dom0-kernel-2f82dda4a9bf41e64e864889bf06564bdf826e25.tar.xz dom0-kernel-2f82dda4a9bf41e64e864889bf06564bdf826e25.zip |
initial srpm import
Diffstat (limited to 'linux-2.6-execshield.patch')
-rw-r--r-- | linux-2.6-execshield.patch | 987 |
1 files changed, 987 insertions, 0 deletions
diff --git a/linux-2.6-execshield.patch b/linux-2.6-execshield.patch new file mode 100644 index 0000000..61e444f --- /dev/null +++ b/linux-2.6-execshield.patch @@ -0,0 +1,987 @@ +diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h +index e8de2f6..538c2b6 100644 +--- a/arch/x86/include/asm/desc.h ++++ b/arch/x86/include/asm/desc.h +@@ -5,6 +5,7 @@ + #include <asm/ldt.h> + #include <asm/mmu.h> + #include <linux/smp.h> ++#include <linux/mm_types.h> + + static inline void fill_ldt(struct desc_struct *desc, + const struct user_desc *info) +@@ -93,6 +94,9 @@ static inline int desc_empty(const void *ptr) + + #define load_TLS(t, cpu) native_load_tls(t, cpu) + #define set_ldt native_set_ldt ++#ifdef CONFIG_X86_32 ++#define load_user_cs_desc native_load_user_cs_desc ++#endif /*CONFIG_X86_32*/ + + #define write_ldt_entry(dt, entry, desc) \ + native_write_ldt_entry(dt, entry, desc) +@@ -392,4 +396,25 @@ static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist) + _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS); + } + ++#ifdef CONFIG_X86_32 ++static inline void set_user_cs(struct desc_struct *desc, unsigned long limit) ++{ ++ limit = (limit - 1) / PAGE_SIZE; ++ desc->a = limit & 0xffff; ++ desc->b = (limit & 0xf0000) | 0x00c0fb00; ++} ++ ++static inline void native_load_user_cs_desc(int cpu, struct mm_struct *mm) ++{ ++ get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS] = (mm)->context.user_cs; ++} ++ ++#define arch_add_exec_range arch_add_exec_range ++#define arch_remove_exec_range arch_remove_exec_range ++#define arch_flush_exec_range arch_flush_exec_range ++extern void arch_add_exec_range(struct mm_struct *mm, unsigned long limit); ++extern void arch_remove_exec_range(struct mm_struct *mm, unsigned long limit); ++extern void arch_flush_exec_range(struct mm_struct *mm); ++#endif /* CONFIG_X86_32 */ ++ + #endif /* _ASM_X86_DESC_H */ +diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h +index 80a1dee..8314c66 100644 +--- a/arch/x86/include/asm/mmu.h ++++ b/arch/x86/include/asm/mmu.h +@@ -7,12 +7,19 @@ + /* + * The x86 doesn't have a mmu context, but + * we put the segment information here. ++ * ++ * exec_limit is used to track the range PROT_EXEC ++ * mappings span. + */ + typedef struct { + void *ldt; + int size; + struct mutex lock; + void *vdso; ++#ifdef CONFIG_X86_32 ++ struct desc_struct user_cs; ++ unsigned long exec_limit; ++#endif + } mm_context_t; + + #ifdef CONFIG_SMP +diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h +index 8aebcc4..cbbd2b0 100644 +--- a/arch/x86/include/asm/paravirt.h ++++ b/arch/x86/include/asm/paravirt.h +@@ -289,6 +289,12 @@ static inline void set_ldt(const void *addr, unsigned entries) + { + PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries); + } ++#ifdef CONFIG_X86_32 ++static inline void load_user_cs_desc(unsigned int cpu, struct mm_struct *mm) ++{ ++ PVOP_VCALL2(pv_cpu_ops.load_user_cs_desc, cpu, mm); ++} ++#endif /*CONFIG_X86_32*/ + static inline void store_gdt(struct desc_ptr *dtr) + { + PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr); +diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h +index dd0f5b3..c2727ef 100644 +--- a/arch/x86/include/asm/paravirt_types.h ++++ b/arch/x86/include/asm/paravirt_types.h +@@ -118,6 +118,9 @@ struct pv_cpu_ops { + void (*store_gdt)(struct desc_ptr *); + void (*store_idt)(struct desc_ptr *); + void (*set_ldt)(const void *desc, unsigned entries); ++#ifdef CONFIG_X86_32 ++ void (*load_user_cs_desc)(int cpu, struct mm_struct *mm); ++#endif + unsigned long (*store_tr)(void); + void (*load_tls)(struct thread_struct *t, unsigned int cpu); + #ifdef CONFIG_X86_64 +diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h +index c3429e8..62cc460 100644 +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -161,6 +161,9 @@ static inline int hlt_works(int cpu) + + #define cache_line_size() (boot_cpu_data.x86_cache_alignment) + ++#define __HAVE_ARCH_ALIGN_STACK ++extern unsigned long arch_align_stack(unsigned long sp); ++ + extern void cpu_detect(struct cpuinfo_x86 *c); + + extern struct pt_regs *idle_regs(struct pt_regs *); +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index cc25c2b..6ce4863 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -798,6 +798,20 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) + /* Filter out anything that depends on CPUID levels we don't have */ + filter_cpuid_features(c, true); + ++ /* ++ * emulation of NX with segment limits unfortunately means ++ * we have to disable the fast system calls, due to the way that ++ * sysexit clears the segment limits on return. ++ * If we have either disabled exec-shield on the boot command line, ++ * or we have NX, then we don't need to do this. ++ */ ++ if (exec_shield != 0) { ++#ifdef CONFIG_X86_PAE ++ if (!test_cpu_cap(c, X86_FEATURE_NX)) ++#endif ++ clear_cpu_cap(c, X86_FEATURE_SEP); ++ } ++ + /* If the model name is still unset, do table lookup. */ + if (!c->x86_model_id[0]) { + const char *p; +diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c +index 1b1739d..c2dda16 100644 +--- a/arch/x86/kernel/paravirt.c ++++ b/arch/x86/kernel/paravirt.c +@@ -345,6 +345,9 @@ struct pv_cpu_ops pv_cpu_ops = { + .read_tscp = native_read_tscp, + .load_tr_desc = native_load_tr_desc, + .set_ldt = native_set_ldt, ++#ifdef CONFIG_X86_32 ++ .load_user_cs_desc = native_load_user_cs_desc, ++#endif /*CONFIG_X86_32*/ + .load_gdt = native_load_gdt, + .load_idt = native_load_idt, + .store_gdt = native_store_gdt, +diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c +index 4cf7956..b2407dc 100644 +--- a/arch/x86/kernel/process_32.c ++++ b/arch/x86/kernel/process_32.c +@@ -296,7 +296,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, + void + start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) + { ++ int cpu; ++ + set_user_gs(regs, 0); ++ + regs->fs = 0; + set_fs(USER_DS); + regs->ds = __USER_DS; +@@ -305,6 +308,11 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) + regs->cs = __USER_CS; + regs->ip = new_ip; + regs->sp = new_sp; ++ ++ cpu = get_cpu(); ++ load_user_cs_desc(cpu, current->mm); ++ put_cpu(); ++ + /* + * Free the old FP and other extended state + */ +@@ -364,6 +372,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) + if (preload_fpu) + prefetch(next->xstate); + ++ if (next_p->mm) ++ load_user_cs_desc(cpu, next_p->mm); ++ + /* + * Reload esp0. + */ +@@ -497,3 +508,40 @@ unsigned long get_wchan(struct task_struct *p) + return 0; + } + ++static void modify_cs(struct mm_struct *mm, unsigned long limit) ++{ ++ mm->context.exec_limit = limit; ++ set_user_cs(&mm->context.user_cs, limit); ++ if (mm == current->mm) { ++ int cpu; ++ ++ cpu = get_cpu(); ++ load_user_cs_desc(cpu, mm); ++ put_cpu(); ++ } ++} ++ ++void arch_add_exec_range(struct mm_struct *mm, unsigned long limit) ++{ ++ if (limit > mm->context.exec_limit) ++ modify_cs(mm, limit); ++} ++ ++void arch_remove_exec_range(struct mm_struct *mm, unsigned long old_end) ++{ ++ struct vm_area_struct *vma; ++ unsigned long limit = PAGE_SIZE; ++ ++ if (old_end == mm->context.exec_limit) { ++ for (vma = mm->mmap; vma; vma = vma->vm_next) ++ if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit)) ++ limit = vma->vm_end; ++ modify_cs(mm, limit); ++ } ++} ++ ++void arch_flush_exec_range(struct mm_struct *mm) ++{ ++ mm->context.exec_limit = 0; ++ set_user_cs(&mm->context.user_cs, 0); ++} +diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c +index 7e37dce..92ae538 100644 +--- a/arch/x86/kernel/traps.c ++++ b/arch/x86/kernel/traps.c +@@ -115,6 +115,76 @@ die_if_kernel(const char *str, struct pt_regs *regs, long err) + if (!user_mode_vm(regs)) + die(str, regs, err); + } ++ ++static inline int ++__compare_user_cs_desc(const struct desc_struct *desc1, ++ const struct desc_struct *desc2) ++{ ++ return ((desc1->limit0 != desc2->limit0) || ++ (desc1->limit != desc2->limit) || ++ (desc1->base0 != desc2->base0) || ++ (desc1->base1 != desc2->base1) || ++ (desc1->base2 != desc2->base2)); ++} ++ ++/* ++ * lazy-check for CS validity on exec-shield binaries: ++ * ++ * the original non-exec stack patch was written by ++ * Solar Designer <solar at openwall.com>. Thanks! ++ */ ++static int ++check_lazy_exec_limit(int cpu, struct pt_regs *regs, long error_code) ++{ ++ struct desc_struct *desc1, *desc2; ++ struct vm_area_struct *vma; ++ unsigned long limit; ++ ++ if (current->mm == NULL) ++ return 0; ++ ++ limit = -1UL; ++ if (current->mm->context.exec_limit != -1UL) { ++ limit = PAGE_SIZE; ++ spin_lock(¤t->mm->page_table_lock); ++ for (vma = current->mm->mmap; vma; vma = vma->vm_next) ++ if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit)) ++ limit = vma->vm_end; ++ vma = get_gate_vma(current); ++ if (vma && (vma->vm_flags & VM_EXEC) && (vma->vm_end > limit)) ++ limit = vma->vm_end; ++ spin_unlock(¤t->mm->page_table_lock); ++ if (limit >= TASK_SIZE) ++ limit = -1UL; ++ current->mm->context.exec_limit = limit; ++ } ++ set_user_cs(¤t->mm->context.user_cs, limit); ++ ++ desc1 = ¤t->mm->context.user_cs; ++ desc2 = get_cpu_gdt_table(cpu) + GDT_ENTRY_DEFAULT_USER_CS; ++ ++ if (__compare_user_cs_desc(desc1, desc2)) { ++ /* ++ * The CS was not in sync - reload it and retry the ++ * instruction. If the instruction still faults then ++ * we won't hit this branch next time around. ++ */ ++ if (print_fatal_signals >= 2) { ++ printk(KERN_ERR "#GPF fixup (%ld[seg:%lx]) at %08lx, CPU#%d.\n", ++ error_code, error_code/8, regs->ip, ++ smp_processor_id()); ++ printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x, CPU_cs: %08x/%08x.\n", ++ current->mm->context.exec_limit, ++ desc1->a, desc1->b, desc2->a, desc2->b); ++ } ++ ++ load_user_cs_desc(cpu, current->mm); ++ ++ return 1; ++ } ++ ++ return 0; ++} + #endif + + static void __kprobes +@@ -273,6 +343,29 @@ do_general_protection(struct pt_regs *regs, long error_code) + if (!user_mode(regs)) + goto gp_in_kernel; + ++#ifdef CONFIG_X86_32 ++{ ++ int cpu; ++ int ok; ++ ++ cpu = get_cpu(); ++ ok = check_lazy_exec_limit(cpu, regs, error_code); ++ put_cpu(); ++ ++ if (ok) ++ return; ++ ++ if (print_fatal_signals) { ++ printk(KERN_ERR "#GPF(%ld[seg:%lx]) at %08lx, CPU#%d.\n", ++ error_code, error_code/8, regs->ip, smp_processor_id()); ++ printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x.\n", ++ current->mm->context.exec_limit, ++ current->mm->context.user_cs.a, ++ current->mm->context.user_cs.b); ++ } ++} ++#endif /*CONFIG_X86_32*/ ++ + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 13; + +@@ -881,19 +974,37 @@ do_device_not_available(struct pt_regs *regs, long error_code) + } + + #ifdef CONFIG_X86_32 ++/* ++ * The fixup code for errors in iret jumps to here (iret_exc). It loses ++ * the original trap number and erorr code. The bogus trap 32 and error ++ * code 0 are what the vanilla kernel delivers via: ++ * DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1) ++ * ++ * NOTE: Because of the final "1" in the macro we need to enable interrupts. ++ * ++ * In case of a general protection fault in the iret instruction, we ++ * need to check for a lazy CS update for exec-shield. ++ */ + dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) + { +- siginfo_t info; ++ int ok; ++ int cpu; ++ + local_irq_enable(); + +- info.si_signo = SIGILL; +- info.si_errno = 0; +- info.si_code = ILL_BADSTK; +- info.si_addr = NULL; +- if (notify_die(DIE_TRAP, "iret exception", +- regs, error_code, 32, SIGILL) == NOTIFY_STOP) +- return; +- do_trap(32, SIGILL, "iret exception", regs, error_code, &info); ++ cpu = get_cpu(); ++ ok = check_lazy_exec_limit(cpu, regs, error_code); ++ put_cpu(); ++ ++ if (!ok && notify_die(DIE_TRAP, "iret exception", regs, ++ error_code, 32, SIGSEGV) != NOTIFY_STOP) { ++ siginfo_t info; ++ info.si_signo = SIGSEGV; ++ info.si_errno = 0; ++ info.si_code = ILL_BADSTK; ++ info.si_addr = 0; ++ do_trap(32, SIGSEGV, "iret exception", regs, error_code, &info); ++ } + } + #endif + +diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c +index 73ffd55..0cf2a7b 100644 +--- a/arch/x86/mm/init.c ++++ b/arch/x86/mm/init.c +@@ -149,6 +149,12 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, + set_nx(); + if (nx_enabled) + printk(KERN_INFO "NX (Execute Disable) protection: active\n"); ++#ifdef CONFIG_X86_32 ++ else ++ if (exec_shield) ++ printk(KERN_INFO "Using x86 segment limits to approximate " ++ "NX protection\n"); ++#endif + + /* Enable PSE if available */ + if (cpu_has_pse) +diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c +index c8191de..7d84d01 100644 +--- a/arch/x86/mm/mmap.c ++++ b/arch/x86/mm/mmap.c +@@ -124,13 +124,16 @@ static unsigned long mmap_legacy_base(void) + */ + void arch_pick_mmap_layout(struct mm_struct *mm) + { +- if (mmap_is_legacy()) { ++ if (!(2 & exec_shield) && mmap_is_legacy()) { + mm->mmap_base = mmap_legacy_base(); + mm->get_unmapped_area = arch_get_unmapped_area; + mm->unmap_area = arch_unmap_area; + } else { + mm->mmap_base = mmap_base(); + mm->get_unmapped_area = arch_get_unmapped_area_topdown; ++ if (!(current->personality & READ_IMPLIES_EXEC) ++ && mmap_is_ia32()) ++ mm->get_unmapped_exec_area = arch_get_unmapped_exec_area; + mm->unmap_area = arch_unmap_area_topdown; + } + } +diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c +index 513d8ed..c614a90 100644 +--- a/arch/x86/mm/setup_nx.c ++++ b/arch/x86/mm/setup_nx.c +@@ -1,3 +1,4 @@ ++#include <linux/sched.h> + #include <linux/spinlock.h> + #include <linux/errno.h> + #include <linux/init.h> +@@ -27,6 +28,9 @@ static int __init noexec_setup(char *str) + } else if (!strncmp(str, "off", 3)) { + disable_nx = 1; + __supported_pte_mask &= ~_PAGE_NX; ++#ifdef CONFIG_X86_32 ++ exec_shield = 0; ++#endif + } + return 0; + } +diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c +index 36fe08e..3806a45 100644 +--- a/arch/x86/mm/tlb.c ++++ b/arch/x86/mm/tlb.c +@@ -6,6 +6,7 @@ + #include <linux/interrupt.h> + #include <linux/module.h> + ++#include <asm/desc.h> + #include <asm/tlbflush.h> + #include <asm/mmu_context.h> + #include <asm/apic.h> +@@ -130,6 +131,12 @@ void smp_invalidate_interrupt(struct pt_regs *regs) + union smp_flush_state *f; + + cpu = smp_processor_id(); ++ ++#ifdef CONFIG_X86_32 ++ if (current->active_mm) ++ load_user_cs_desc(cpu, current->active_mm); ++#endif ++ + /* + * orig_rax contains the negated interrupt vector. + * Use that to determine where the sender put the data. +diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c +index 58bc00f..1fdafb5 100644 +--- a/arch/x86/vdso/vdso32-setup.c ++++ b/arch/x86/vdso/vdso32-setup.c +@@ -331,7 +331,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) + if (compat) + addr = VDSO_HIGH_BASE; + else { +- addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); ++ addr = get_unmapped_area_prot(NULL, 0, PAGE_SIZE, 0, 0, 1); + if (IS_ERR_VALUE(addr)) { + ret = addr; + goto up_fail; +diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c +index 3439616..31e5c6f 100644 +--- a/arch/x86/xen/enlighten.c ++++ b/arch/x86/xen/enlighten.c +@@ -323,6 +323,24 @@ static void xen_set_ldt(const void *addr, unsigned entries) + xen_mc_issue(PARAVIRT_LAZY_CPU); + } + ++#ifdef CONFIG_X86_32 ++static void xen_load_user_cs_desc(int cpu, struct mm_struct *mm) ++{ ++ void *gdt; ++ xmaddr_t mgdt; ++ u64 descriptor; ++ struct desc_struct user_cs; ++ ++ gdt = &get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS]; ++ mgdt = virt_to_machine(gdt); ++ ++ user_cs = mm->context.user_cs; ++ descriptor = (u64) user_cs.a | ((u64) user_cs.b) << 32; ++ ++ HYPERVISOR_update_descriptor(mgdt.maddr, descriptor); ++} ++#endif /*CONFIG_X86_32*/ ++ + static void xen_load_gdt(const struct desc_ptr *dtr) + { + unsigned long va = dtr->address; +@@ -949,6 +967,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { + + .load_tr_desc = paravirt_nop, + .set_ldt = xen_set_ldt, ++#ifdef CONFIG_X86_32 ++ .load_user_cs_desc = xen_load_user_cs_desc, ++#endif /*CONFIG_X86_32*/ + .load_gdt = xen_load_gdt, + .load_idt = xen_load_idt, + .load_tls = xen_load_tls, +diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c +index b9b3bb5..1e55926 100644 +--- a/fs/binfmt_elf.c ++++ b/fs/binfmt_elf.c +@@ -73,7 +73,7 @@ static struct linux_binfmt elf_format = { + .hasvdso = 1 + }; + +-#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE) ++#define BAD_ADDR(x) IS_ERR_VALUE(x) + + static int set_brk(unsigned long start, unsigned long end) + { +@@ -721,6 +721,11 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) + break; + } + ++ if (current->personality == PER_LINUX && (exec_shield & 2)) { ++ executable_stack = EXSTACK_DISABLE_X; ++ current->flags |= PF_RANDOMIZE; ++ } ++ + /* Some simple consistency checks for the interpreter */ + if (elf_interpreter) { + retval = -ELIBBAD; +@@ -740,6 +745,15 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) + if (retval) + goto out_free_dentry; + ++#ifdef CONFIG_X86_32 ++ /* ++ * Turn off the CS limit completely if exec-shield disabled or ++ * NX active: ++ */ ++ if (!exec_shield || executable_stack != EXSTACK_DISABLE_X || nx_enabled) ++ arch_add_exec_range(current->mm, -1); ++#endif ++ + /* OK, This is the point of no return */ + current->flags &= ~PF_FORKNOEXEC; + current->mm->def_flags = def_flags; +@@ -747,7 +761,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) + /* Do this immediately, since STACK_TOP as used in setup_arg_pages + may depend on the personality. */ + SET_PERSONALITY(loc->elf_ex); +- if (elf_read_implies_exec(loc->elf_ex, executable_stack)) ++ if (!(exec_shield & 2) && ++ elf_read_implies_exec(loc->elf_ex, executable_stack)) + current->personality |= READ_IMPLIES_EXEC; + + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) +@@ -912,7 +927,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) + interpreter, + &interp_map_addr, + load_bias); +- if (!IS_ERR((void *)elf_entry)) { ++ if (!BAD_ADDR(elf_entry)) { + /* + * load_elf_interp() returns relocation + * adjustment +diff --git a/include/linux/mm.h b/include/linux/mm.h +index 24c3956..88f944d 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -1129,7 +1129,13 @@ extern int install_special_mapping(struct mm_struct *mm, + unsigned long addr, unsigned long len, + unsigned long flags, struct page **pages); + +-extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); ++extern unsigned long get_unmapped_area_prot(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, int); ++ ++static inline unsigned long get_unmapped_area(struct file *file, unsigned long addr, ++ unsigned long len, unsigned long pgoff, unsigned long flags) ++{ ++ return get_unmapped_area_prot(file, addr, len, pgoff, flags, 0); ++} + + extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, + unsigned long len, unsigned long prot, +diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h +index 84a524a..a81e0db 100644 +--- a/include/linux/mm_types.h ++++ b/include/linux/mm_types.h +@@ -206,6 +206,9 @@ struct mm_struct { + unsigned long (*get_unmapped_area) (struct file *filp, + unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags); ++ unsigned long (*get_unmapped_exec_area) (struct file *filp, ++ unsigned long addr, unsigned long len, ++ unsigned long pgoff, unsigned long flags); + void (*unmap_area) (struct mm_struct *mm, unsigned long addr); + unsigned long mmap_base; /* base of mmap area */ + unsigned long task_size; /* size of task vm space */ +diff --git a/include/linux/resource.h b/include/linux/resource.h +index 40fc7e6..68c2549 100644 +--- a/include/linux/resource.h ++++ b/include/linux/resource.h +@@ -55,8 +55,11 @@ struct rlimit { + /* + * Limit the stack by to some sane default: root can always + * increase this limit if needed.. 8MB seems reasonable. ++ * ++ * (2MB more to cover randomization effects.) + */ +-#define _STK_LIM (8*1024*1024) ++#define _STK_LIM (10*1024*1024) ++#define EXEC_STACK_BIAS (2*1024*1024) + + /* + * GPG2 wants 64kB of mlocked memory, to make sure pass phrases +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 75e6e60..0bce489 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -102,6 +102,9 @@ struct fs_struct; + struct bts_context; + struct perf_event_context; + ++extern int exec_shield; ++extern int print_fatal_signals; ++ + /* + * List of flags we want to share for kernel threads, + * if only because they are not used by them anyway. +@@ -378,6 +381,10 @@ extern int sysctl_max_map_count; + extern unsigned long + arch_get_unmapped_area(struct file *, unsigned long, unsigned long, + unsigned long, unsigned long); ++ ++extern unsigned long ++arch_get_unmapped_exec_area(struct file *, unsigned long, unsigned long, ++ unsigned long, unsigned long); + extern unsigned long + arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, +diff --git a/kernel/sysctl.c b/kernel/sysctl.c +index 0d949c5..12ca319 100644 +--- a/kernel/sysctl.c ++++ b/kernel/sysctl.c +@@ -88,6 +88,26 @@ extern int sysctl_nr_open_min, sysctl_nr_open_max; + #ifndef CONFIG_MMU + extern int sysctl_nr_trim_pages; + #endif ++ ++int exec_shield = (1<<0); ++/* exec_shield is a bitmask: ++ * 0: off; vdso at STACK_TOP, 1 page below TASK_SIZE ++ * (1<<0) 1: on [also on if !=0] ++ * (1<<1) 2: force noexecstack regardless of PT_GNU_STACK ++ * The old settings ++ * (1<<2) 4: vdso just below .text of main (unless too low) ++ * (1<<3) 8: vdso just below .text of PT_INTERP (unless too low) ++ * are ignored because the vdso is placed completely randomly ++ */ ++ ++static int __init setup_exec_shield(char *str) ++{ ++ get_option(&str, &exec_shield); ++ ++ return 1; ++} ++__setup("exec-shield=", setup_exec_shield); ++ + #ifdef CONFIG_RCU_TORTURE_TEST + extern int rcutorture_runnable; + #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ +@@ -408,6 +428,14 @@ static struct ctl_table kern_table[] = { + .proc_handler = &proc_dointvec, + }, + { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "exec-shield", ++ .data = &exec_shield, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = &proc_dointvec, ++ }, ++ { + .ctl_name = KERN_CORE_USES_PID, + .procname = "core_uses_pid", + .data = &core_uses_pid, +diff --git a/mm/mmap.c b/mm/mmap.c +index 73f5e4b..814b95f 100644 +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -29,6 +29,7 @@ + #include <linux/rmap.h> + #include <linux/mmu_notifier.h> + #include <linux/perf_event.h> ++#include <linux/random.h> + + #include <asm/uaccess.h> + #include <asm/cacheflush.h> +@@ -45,6 +46,18 @@ + #define arch_rebalance_pgtables(addr, len) (addr) + #endif + ++/* No sane architecture will #define these to anything else */ ++#ifndef arch_add_exec_range ++#define arch_add_exec_range(mm, limit) do { ; } while (0) ++#endif ++#ifndef arch_flush_exec_range ++#define arch_flush_exec_range(mm) do { ; } while (0) ++#endif ++#ifndef arch_remove_exec_range ++#define arch_remove_exec_range(mm, limit) do { ; } while (0) ++#endif ++ ++ + static void unmap_region(struct mm_struct *mm, + struct vm_area_struct *vma, struct vm_area_struct *prev, + unsigned long start, unsigned long end); +@@ -389,6 +402,8 @@ static inline void + __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, + struct vm_area_struct *prev, struct rb_node *rb_parent) + { ++ if (vma->vm_flags & VM_EXEC) ++ arch_add_exec_range(mm, vma->vm_end); + if (prev) { + vma->vm_next = prev->vm_next; + prev->vm_next = vma; +@@ -491,6 +506,8 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma, + rb_erase(&vma->vm_rb, &mm->mm_rb); + if (mm->mmap_cache == vma) + mm->mmap_cache = prev; ++ if (vma->vm_flags & VM_EXEC) ++ arch_remove_exec_range(mm, vma->vm_end); + } + + /* +@@ -798,6 +815,8 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, + } else /* cases 2, 5, 7 */ + vma_adjust(prev, prev->vm_start, + end, prev->vm_pgoff, NULL); ++ if (prev->vm_flags & VM_EXEC) ++ arch_add_exec_range(mm, prev->vm_end); + return prev; + } + +@@ -970,7 +989,8 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, + /* Obtain the address to map to. we verify (or select) it and ensure + * that it represents a valid section of the address space. + */ +- addr = get_unmapped_area(file, addr, len, pgoff, flags); ++ addr = get_unmapped_area_prot(file, addr, len, pgoff, flags, ++ prot & PROT_EXEC); + if (addr & ~PAGE_MASK) + return addr; + +@@ -1453,21 +1473,25 @@ void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr) + } + + unsigned long +-get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, +- unsigned long pgoff, unsigned long flags) ++get_unmapped_area_prot(struct file *file, unsigned long addr, unsigned long len, ++ unsigned long pgoff, unsigned long flags, int exec) + { + unsigned long (*get_area)(struct file *, unsigned long, + unsigned long, unsigned long, unsigned long); + + unsigned long error = arch_mmap_check(addr, len, flags); + if (error) + return error; + + /* Careful about overflows.. */ + if (len > TASK_SIZE) + return -ENOMEM; + +- get_area = current->mm->get_unmapped_area; ++ if (exec && current->mm->get_unmapped_exec_area) ++ get_area = current->mm->get_unmapped_exec_area; ++ else ++ get_area = current->mm->get_unmapped_area; ++ + if (file && file->f_op && file->f_op->get_unmapped_area) + get_area = file->f_op->get_unmapped_area; + addr = get_area(file, addr, len, pgoff, flags); +@@ -1473,8 +1497,76 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, + + return arch_rebalance_pgtables(addr, len); + } ++EXPORT_SYMBOL(get_unmapped_area_prot); ++ ++#define SHLIB_BASE 0x00110000 ++ ++unsigned long ++arch_get_unmapped_exec_area(struct file *filp, unsigned long addr0, ++ unsigned long len0, unsigned long pgoff, unsigned long flags) ++{ ++ unsigned long addr = addr0, len = len0; ++ struct mm_struct *mm = current->mm; ++ struct vm_area_struct *vma; ++ unsigned long tmp; ++ ++ if (len > TASK_SIZE) ++ return -ENOMEM; ++ ++ if (flags & MAP_FIXED) ++ return addr; ++ ++ if (!addr) ++ addr = randomize_range(SHLIB_BASE, 0x01000000, len); ++ ++ if (addr) { ++ addr = PAGE_ALIGN(addr); ++ vma = find_vma(mm, addr); ++ if (TASK_SIZE - len >= addr && ++ (!vma || addr + len <= vma->vm_start)) ++ return addr; ++ } ++ ++ addr = SHLIB_BASE; ++ for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { ++ /* At this point: (!vma || addr < vma->vm_end). */ ++ if (TASK_SIZE - len < addr) ++ return -ENOMEM; ++ ++ if (!vma || addr + len <= vma->vm_start) { ++ /* ++ * Must not let a PROT_EXEC mapping get into the ++ * brk area: ++ */ ++ if (addr + len > mm->brk) ++ goto failed; ++ ++ /* ++ * Up until the brk area we randomize addresses ++ * as much as possible: ++ */ ++ if (addr >= 0x01000000) { ++ tmp = randomize_range(0x01000000, ++ PAGE_ALIGN(max(mm->start_brk, ++ (unsigned long)0x08000000)), len); ++ vma = find_vma(mm, tmp); ++ if (TASK_SIZE - len >= tmp && ++ (!vma || tmp + len <= vma->vm_start)) ++ return tmp; ++ } ++ /* ++ * Ok, randomization didnt work out - return ++ * the result of the linear search: ++ */ ++ return addr; ++ } ++ addr = vma->vm_end; ++ } ++ ++failed: ++ return current->mm->get_unmapped_area(filp, addr0, len0, pgoff, flags); ++} + +-EXPORT_SYMBOL(get_unmapped_area); + + /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ + struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) +@@ -1549,6 +1641,14 @@ out: + return prev ? prev->vm_next : vma; + } + ++static int over_stack_limit(unsigned long sz) ++{ ++ if (sz < EXEC_STACK_BIAS) ++ return 0; ++ return (sz - EXEC_STACK_BIAS) > ++ current->signal->rlim[RLIMIT_STACK].rlim_cur; ++} ++ + /* + * Verify that the stack growth is acceptable and + * update accounting. This is shared with both the +@@ -1565,7 +1665,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns + return -ENOMEM; + + /* Stack limit test */ +- if (size > rlim[RLIMIT_STACK].rlim_cur) ++ if (over_stack_limit(size)) + return -ENOMEM; + + /* mlock limit tests */ +@@ -1875,10 +1975,14 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, + if (new->vm_ops && new->vm_ops->open) + new->vm_ops->open(new); + +- if (new_below) ++ if (new_below) { ++ unsigned long old_end = vma->vm_end; ++ + vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff + + ((addr - new->vm_start) >> PAGE_SHIFT), new); +- else ++ if (vma->vm_flags & VM_EXEC) ++ arch_remove_exec_range(mm, old_end); ++ } else + vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new); + + return 0; +@@ -2128,6 +2232,7 @@ void exit_mmap(struct mm_struct *mm) + + free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0); + tlb_finish_mmu(tlb, 0, end); ++ arch_flush_exec_range(mm); + + /* + * Walk the list again, actually closing and freeing it, +diff --git a/mm/mprotect.c b/mm/mprotect.c +index 8bc969d..3c9b4fc 100644 +--- a/mm/mprotect.c ++++ b/mm/mprotect.c +@@ -26,9 +26,14 @@ + #include <linux/perf_event.h> + #include <asm/uaccess.h> + #include <asm/pgtable.h> ++#include <asm/pgalloc.h> + #include <asm/cacheflush.h> + #include <asm/tlbflush.h> + ++#ifndef arch_remove_exec_range ++#define arch_remove_exec_range(mm, limit) do { ; } while (0) ++#endif ++ + #ifndef pgprot_modify + static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) + { +@@ -139,7 +144,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, + struct mm_struct *mm = vma->vm_mm; + unsigned long oldflags = vma->vm_flags; + long nrpages = (end - start) >> PAGE_SHIFT; +- unsigned long charged = 0; ++ unsigned long charged = 0, old_end = vma->vm_end; + pgoff_t pgoff; + int error; + int dirty_accountable = 0; +@@ -204,6 +209,9 @@ success: + dirty_accountable = 1; + } + ++ if (oldflags & VM_EXEC) ++ arch_remove_exec_range(current->mm, old_end); ++ + mmu_notifier_invalidate_range_start(mm, start, end); + if (is_vm_hugetlb_page(vma)) + hugetlb_change_protection(vma, start, end, vma->vm_page_prot); +diff --git a/mm/mremap.c b/mm/mremap.c +index 97bff25..17a9fd7 100644 +--- a/mm/mremap.c ++++ b/mm/mremap.c +@@ -414,10 +414,10 @@ unsigned long do_mremap(unsigned long addr, + if (vma->vm_flags & VM_MAYSHARE) + map_flags |= MAP_SHARED; + +- new_addr = get_unmapped_area(vma->vm_file, 0, new_len, ++ new_addr = get_unmapped_area_prot(vma->vm_file, 0, new_len, + vma->vm_pgoff + + ((addr - vma->vm_start) >> PAGE_SHIFT), +- map_flags); ++ map_flags, vma->vm_flags & VM_EXEC); + if (new_addr & ~PAGE_MASK) { + ret = new_addr; + goto out; |