From c5c99429fa57dcf6e05203ebe3676db1ec646793 Mon Sep 17 00:00:00 2001 From: Larry Woodman Date: Thu, 24 Jan 2008 05:49:25 -0800 Subject: fix hugepages leak due to pagetable page sharing The shared page table code for hugetlb memory on x86 and x86_64 is causing a leak. When a user of hugepages exits using this code the system leaks some of the hugepages. ------------------------------------------------------- Part of /proc/meminfo just before database startup: HugePages_Total: 5500 HugePages_Free: 5500 HugePages_Rsvd: 0 Hugepagesize: 2048 kB Just before shutdown: HugePages_Total: 5500 HugePages_Free: 4475 HugePages_Rsvd: 0 Hugepagesize: 2048 kB After shutdown: HugePages_Total: 5500 HugePages_Free: 4988 HugePages_Rsvd: 0 Hugepagesize: 2048 kB ---------------------------------------------------------- The problem occurs durring a fork, in copy_hugetlb_page_range(). It locates the dst_pte using huge_pte_alloc(). Since huge_pte_alloc() calls huge_pmd_share() it will share the pmd page if can, yet the main loop in copy_hugetlb_page_range() does a get_page() on every hugepage. This is a violation of the shared hugepmd pagetable protocol and creates additional referenced to the hugepages causing a leak when the unmap of the VMA occurs. We can skip the entire replication of the ptes when the hugepage pagetables are shared. The attached patch skips copying the ptes and the get_page() calls if the hugetlbpage pagetable is shared. [akpm@linux-foundation.org: coding-style cleanups] Signed-off-by: Larry Woodman Signed-off-by: Adam Litke Cc: Badari Pulavarty Cc: Ken Chen Cc: David Gibson Cc: William Lee Irwin III Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'mm') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e0fda156f02..db861d8b6c2 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -699,6 +699,11 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, dst_pte = huge_pte_alloc(dst, addr); if (!dst_pte) goto nomem; + + /* If the pagetables are shared don't copy or take references */ + if (dst_pte == src_pte) + continue; + spin_lock(&dst->page_table_lock); spin_lock(&src->page_table_lock); if (!pte_none(*src_pte)) { -- cgit From 9c09a95cf431fcf5720f2e408befa24b32b8cf4d Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 24 Jan 2008 05:49:54 -0800 Subject: slab: partially revert list3 changes Partial revert the changes made by 04231b3002ac53f8a64a7bd142fde3fa4b6808c6 to the kmem_list3 management. On a machine with a memoryless node, this BUG_ON was triggering static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) { struct list_head *entry; struct slab *slabp; struct kmem_list3 *l3; void *obj; int x; l3 = cachep->nodelists[nodeid]; BUG_ON(!l3); Signed-off-by: Mel Gorman Cc: Pekka Enberg Acked-by: Christoph Lameter Cc: "Aneesh Kumar K.V" Cc: Nishanth Aravamudan Cc: KAMEZAWA Hiroyuki Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'mm') diff --git a/mm/slab.c b/mm/slab.c index aebb9f68557..b03b2e46b80 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1590,7 +1590,7 @@ void __init kmem_cache_init(void) /* Replace the static kmem_list3 structures for the boot cpu */ init_list(&cache_cache, &initkmem_list3[CACHE_CACHE], node); - for_each_node_state(nid, N_NORMAL_MEMORY) { + for_each_online_node(nid) { init_list(malloc_sizes[INDEX_AC].cs_cachep, &initkmem_list3[SIZE_AC + nid], nid); @@ -1968,7 +1968,7 @@ static void __init set_up_list3s(struct kmem_cache *cachep, int index) { int node; - for_each_node_state(node, N_NORMAL_MEMORY) { + for_each_online_node(node) { cachep->nodelists[node] = &initkmem_list3[index + node]; cachep->nodelists[node]->next_reap = jiffies + REAPTIMEOUT_LIST3 + @@ -3815,7 +3815,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep) struct array_cache *new_shared; struct array_cache **new_alien = NULL; - for_each_node_state(node, N_NORMAL_MEMORY) { + for_each_online_node(node) { if (use_alien_caches) { new_alien = alloc_alien_cache(node, cachep->limit); -- cgit From 88c3f7a8f2c86be264d326cf6f49a3e8c30d13a6 Mon Sep 17 00:00:00 2001 From: Richard Knutsson Date: Sat, 8 Dec 2007 12:02:48 +0100 Subject: Security: remove security_file_mmap hook sparse-warnings (NULL as 0). Fixing: CHECK mm/mmap.c mm/mmap.c:1623:29: warning: Using plain integer as NULL pointer mm/mmap.c:1623:29: warning: Using plain integer as NULL pointer mm/mmap.c:1944:29: warning: Using plain integer as NULL pointer Signed-off-by: Richard Knutsson Signed-off-by: James Morris --- mm/mmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm') diff --git a/mm/mmap.c b/mm/mmap.c index 15678aa6ec7..bfa389fc6de 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1620,7 +1620,7 @@ static inline int expand_downwards(struct vm_area_struct *vma, return -ENOMEM; address &= PAGE_MASK; - error = security_file_mmap(0, 0, 0, 0, address, 1); + error = security_file_mmap(NULL, 0, 0, 0, address, 1); if (error) return error; @@ -1941,7 +1941,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len) if (is_hugepage_only_range(mm, addr, len)) return -EINVAL; - error = security_file_mmap(0, 0, 0, 0, addr, 1); + error = security_file_mmap(NULL, 0, 0, 0, addr, 1); if (error) return error; -- cgit From 3514faca19a6fdc209734431c509631ea92b094e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 16 Oct 2007 10:11:44 -0600 Subject: kobject: remove struct kobj_type from struct kset We don't need a "default" ktype for a kset. We should set this explicitly every time for each kset. This change is needed so that we can make ksets dynamic, and cleans up one of the odd, undocumented assumption that the kset/kobject/ktype model has. This patch is based on a lot of help from Kay Sievers. Nasty bug in the block code was found by Dave Young Cc: Kay Sievers Cc: Dave Young Signed-off-by: Greg Kroah-Hartman --- mm/slub.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index 474945ecd89..40bdf41035e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3962,7 +3962,7 @@ static struct kset_uevent_ops slab_uevent_ops = { .filter = uevent_filter, }; -static decl_subsys(slab, &slab_ktype, &slab_uevent_ops); +static decl_subsys(slab, &slab_uevent_ops); #define ID_STR_LENGTH 64 @@ -4025,8 +4025,9 @@ static int sysfs_slab_add(struct kmem_cache *s) name = create_unique_id(s); } - kobj_set_kset_s(s, slab_subsys); kobject_set_name(&s->kobj, name); + s->kobj.kset = &slab_subsys; + s->kobj.ktype = &slab_ktype; kobject_init(&s->kobj); err = kobject_add(&s->kobj); if (err) -- cgit From 27c3a314d55b810f3b51902d1d815c714a7afbd2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 1 Nov 2007 09:29:06 -0600 Subject: kset: convert slub to use kset_create Dynamically create the kset instead of declaring it statically. Cc: Kay Sievers Cc: Christoph Lameter Signed-off-by: Greg Kroah-Hartman --- mm/slub.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index 40bdf41035e..886131c5b5c 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3962,7 +3962,7 @@ static struct kset_uevent_ops slab_uevent_ops = { .filter = uevent_filter, }; -static decl_subsys(slab, &slab_uevent_ops); +static struct kset *slab_kset; #define ID_STR_LENGTH 64 @@ -4015,7 +4015,7 @@ static int sysfs_slab_add(struct kmem_cache *s) * This is typically the case for debug situations. In that * case we can catch duplicate names easily. */ - sysfs_remove_link(&slab_subsys.kobj, s->name); + sysfs_remove_link(&slab_kset->kobj, s->name); name = s->name; } else { /* @@ -4026,7 +4026,7 @@ static int sysfs_slab_add(struct kmem_cache *s) } kobject_set_name(&s->kobj, name); - s->kobj.kset = &slab_subsys; + s->kobj.kset = slab_kset; s->kobj.ktype = &slab_ktype; kobject_init(&s->kobj); err = kobject_add(&s->kobj); @@ -4071,9 +4071,8 @@ static int sysfs_slab_alias(struct kmem_cache *s, const char *name) /* * If we have a leftover link then remove it. */ - sysfs_remove_link(&slab_subsys.kobj, name); - return sysfs_create_link(&slab_subsys.kobj, - &s->kobj, name); + sysfs_remove_link(&slab_kset->kobj, name); + return sysfs_create_link(&slab_kset->kobj, &s->kobj, name); } al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL); @@ -4092,8 +4091,8 @@ static int __init slab_sysfs_init(void) struct kmem_cache *s; int err; - err = subsystem_register(&slab_subsys); - if (err) { + slab_kset = kset_create_and_add("slab", &slab_uevent_ops, NULL); + if (!slab_kset) { printk(KERN_ERR "Cannot register slab subsystem.\n"); return -ENOSYS; } -- cgit From 081248de0a0288a0ce4e1447a07ccf56aa4fae01 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 1 Nov 2007 09:29:06 -0600 Subject: kset: move /sys/slab to /sys/kernel/slab /sys/kernel is where these things should go. Also updated the documentation and tool that used this directory. Cc: Kay Sievers Acked-by: Christoph Lameter Signed-off-by: Greg Kroah-Hartman --- mm/slub.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index 886131c5b5c..b6c79462157 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4091,7 +4091,8 @@ static int __init slab_sysfs_init(void) struct kmem_cache *s; int err; - slab_kset = kset_create_and_add("slab", &slab_uevent_ops, NULL); + slab_kset = kset_create_and_add("slab", &slab_uevent_ops, + &kernel_kset->kobj); if (!slab_kset) { printk(KERN_ERR "Cannot register slab subsystem.\n"); return -ENOSYS; -- cgit From 0ff21e46630abce11fdaaffabd72bbd4eed5ac2c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 6 Nov 2007 10:36:58 -0800 Subject: kobject: convert kernel_kset to be a kobject kernel_kset does not need to be a kset, but a much simpler kobject now that we have kobj_attributes. We also rename kernel_kset to kernel_kobj to catch all users of this symbol with a build error instead of an easy-to-ignore build warning. Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- mm/slub.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index b6c79462157..d26177fb293 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4091,8 +4091,7 @@ static int __init slab_sysfs_init(void) struct kmem_cache *s; int err; - slab_kset = kset_create_and_add("slab", &slab_uevent_ops, - &kernel_kset->kobj); + slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj); if (!slab_kset) { printk(KERN_ERR "Cannot register slab subsystem.\n"); return -ENOSYS; -- cgit From 1eada11c88251e0a30ce5690d2607bb4293b3564 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 17 Dec 2007 23:05:35 -0700 Subject: Kobject: convert mm/slub.c to use kobject_init/add_ng() This converts the code to use the new kobject functions, cleaning up the logic in doing so. Cc: Christoph Lameter Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- mm/slub.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'mm') diff --git a/mm/slub.c b/mm/slub.c index d26177fb293..5cc4b7dddb5 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4025,13 +4025,12 @@ static int sysfs_slab_add(struct kmem_cache *s) name = create_unique_id(s); } - kobject_set_name(&s->kobj, name); s->kobj.kset = slab_kset; - s->kobj.ktype = &slab_ktype; - kobject_init(&s->kobj); - err = kobject_add(&s->kobj); - if (err) + err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, name); + if (err) { + kobject_put(&s->kobj); return err; + } err = sysfs_create_group(&s->kobj, &slab_attr_group); if (err) -- cgit From 556a169dab38b5100df6f4a45b655dddd3db94c1 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 25 Jan 2008 08:20:51 +0200 Subject: slab: fix bootstrap on memoryless node If the node we're booting on doesn't have memory, bootstrapping kmalloc() caches resorts to fallback_alloc() which requires ->nodelists set for all nodes. Fix that by calling set_up_list3s() for CACHE_CACHE in kmem_cache_init(). As kmem_getpages() is called with GFP_THISNODE set, this used to work before because of breakage in 2.6.22 and before with GFP_THISNODE returning pages from the wrong node if a node had no memory. So it may have worked accidentally and in an unsafe manner because the pages would have been associated with the wrong node which could trigger bug ons and locking troubles. Tested-by: Mel Gorman Tested-by: Olaf Hering Reviewed-by: Christoph Lameter Signed-off-by: Pekka Enberg [ With additional one-liner by Olaf Hering - Linus ] Signed-off-by: Linus Torvalds --- mm/slab.c | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'mm') diff --git a/mm/slab.c b/mm/slab.c index b03b2e46b80..ff31261fd24 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -304,11 +304,11 @@ struct kmem_list3 { /* * Need this for bootstrapping a per node allocator. */ -#define NUM_INIT_LISTS (2 * MAX_NUMNODES + 1) +#define NUM_INIT_LISTS (3 * MAX_NUMNODES) struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; #define CACHE_CACHE 0 -#define SIZE_AC 1 -#define SIZE_L3 (1 + MAX_NUMNODES) +#define SIZE_AC MAX_NUMNODES +#define SIZE_L3 (2 * MAX_NUMNODES) static int drain_freelist(struct kmem_cache *cache, struct kmem_list3 *l3, int tofree); @@ -1409,6 +1409,22 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, local_irq_enable(); } +/* + * For setting up all the kmem_list3s for cache whose buffer_size is same as + * size of kmem_list3. + */ +static void __init set_up_list3s(struct kmem_cache *cachep, int index) +{ + int node; + + for_each_online_node(node) { + cachep->nodelists[node] = &initkmem_list3[index + node]; + cachep->nodelists[node]->next_reap = jiffies + + REAPTIMEOUT_LIST3 + + ((unsigned long)cachep) % REAPTIMEOUT_LIST3; + } +} + /* * Initialisation. Called after the page allocator have been initialised and * before smp_init(). @@ -1432,6 +1448,7 @@ void __init kmem_cache_init(void) if (i < MAX_NUMNODES) cache_cache.nodelists[i] = NULL; } + set_up_list3s(&cache_cache, CACHE_CACHE); /* * Fragmentation resistance on low memory - only use bigger @@ -1587,10 +1604,9 @@ void __init kmem_cache_init(void) { int nid; - /* Replace the static kmem_list3 structures for the boot cpu */ - init_list(&cache_cache, &initkmem_list3[CACHE_CACHE], node); - for_each_online_node(nid) { + init_list(&cache_cache, &initkmem_list3[CACHE_CACHE], nid); + init_list(malloc_sizes[INDEX_AC].cs_cachep, &initkmem_list3[SIZE_AC + nid], nid); @@ -1960,22 +1976,6 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) } } -/* - * For setting up all the kmem_list3s for cache whose buffer_size is same as - * size of kmem_list3. - */ -static void __init set_up_list3s(struct kmem_cache *cachep, int index) -{ - int node; - - for_each_online_node(node) { - cachep->nodelists[node] = &initkmem_list3[index + node]; - cachep->nodelists[node]->next_reap = jiffies + - REAPTIMEOUT_LIST3 + - ((unsigned long)cachep) % REAPTIMEOUT_LIST3; - } -} - static void __kmem_cache_destroy(struct kmem_cache *cachep) { int i; @@ -2099,7 +2099,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep) g_cpucache_up = PARTIAL_L3; } else { int node; - for_each_node_state(node, N_NORMAL_MEMORY) { + for_each_online_node(node) { cachep->nodelists[node] = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node); -- cgit From 95402b3829010fe1e208f44e4a158ccade88969a Mon Sep 17 00:00:00 2001 From: Gautham R Shenoy Date: Fri, 25 Jan 2008 21:08:02 +0100 Subject: cpu-hotplug: replace per-subsystem mutexes with get_online_cpus() This patch converts the known per-subsystem mutexes to get_online_cpus put_online_cpus. It also eliminates the CPU_LOCK_ACQUIRE and CPU_LOCK_RELEASE hotplug notification events. Signed-off-by: Gautham R Shenoy Signed-off-by: Ingo Molnar --- mm/slab.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'mm') diff --git a/mm/slab.c b/mm/slab.c index ff31261fd24..40c00dacbe4 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -730,8 +730,7 @@ static inline void init_lock_keys(void) #endif /* - * 1. Guard access to the cache-chain. - * 2. Protect sanity of cpu_online_map against cpu hotplug events + * Guard access to the cache-chain. */ static DEFINE_MUTEX(cache_chain_mutex); static struct list_head cache_chain; @@ -1331,12 +1330,11 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, int err = 0; switch (action) { - case CPU_LOCK_ACQUIRE: - mutex_lock(&cache_chain_mutex); - break; case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: + mutex_lock(&cache_chain_mutex); err = cpuup_prepare(cpu); + mutex_unlock(&cache_chain_mutex); break; case CPU_ONLINE: case CPU_ONLINE_FROZEN: @@ -1373,9 +1371,8 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, #endif case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: + mutex_lock(&cache_chain_mutex); cpuup_canceled(cpu); - break; - case CPU_LOCK_RELEASE: mutex_unlock(&cache_chain_mutex); break; } @@ -2170,6 +2167,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, * We use cache_chain_mutex to ensure a consistent view of * cpu_online_map as well. Please see cpuup_callback */ + get_online_cpus(); mutex_lock(&cache_chain_mutex); list_for_each_entry(pc, &cache_chain, next) { @@ -2396,6 +2394,7 @@ oops: panic("kmem_cache_create(): failed to create slab `%s'\n", name); mutex_unlock(&cache_chain_mutex); + put_online_cpus(); return cachep; } EXPORT_SYMBOL(kmem_cache_create); @@ -2547,9 +2546,11 @@ int kmem_cache_shrink(struct kmem_cache *cachep) int ret; BUG_ON(!cachep || in_interrupt()); + get_online_cpus(); mutex_lock(&cache_chain_mutex); ret = __cache_shrink(cachep); mutex_unlock(&cache_chain_mutex); + put_online_cpus(); return ret; } EXPORT_SYMBOL(kmem_cache_shrink); @@ -2575,6 +2576,7 @@ void kmem_cache_destroy(struct kmem_cache *cachep) BUG_ON(!cachep || in_interrupt()); /* Find the cache in the chain of caches. */ + get_online_cpus(); mutex_lock(&cache_chain_mutex); /* * the chain is never empty, cache_cache is never destroyed @@ -2584,6 +2586,7 @@ void kmem_cache_destroy(struct kmem_cache *cachep) slab_error(cachep, "Can't free all objects"); list_add(&cachep->next, &cache_chain); mutex_unlock(&cache_chain_mutex); + put_online_cpus(); return; } @@ -2592,6 +2595,7 @@ void kmem_cache_destroy(struct kmem_cache *cachep) __kmem_cache_destroy(cachep); mutex_unlock(&cache_chain_mutex); + put_online_cpus(); } EXPORT_SYMBOL(kmem_cache_destroy); -- cgit From fa717060f1ab7eb6570f2fb49136f838fc9195a9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 25 Jan 2008 21:08:27 +0100 Subject: sched: sched_rt_entity Move the task_struct members specific to rt scheduling together. A future optimization could be to put sched_entity and sched_rt_entity into a union. Signed-off-by: Peter Zijlstra CC: Srivatsa Vaddagiri Signed-off-by: Ingo Molnar --- mm/oom_kill.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 91a081a82f5..96473b48209 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -286,7 +286,7 @@ static void __oom_kill_task(struct task_struct *p, int verbose) * all the memory it needs. That way it should be able to * exit() and clear out its resources quickly... */ - p->time_slice = HZ; + p->rt.time_slice = HZ; set_tsk_thread_flag(p, TIF_MEMDIE); force_sig(SIGKILL, p); -- cgit From d5f68c6dbda8e63df258a0c639f03d7565cf1d50 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 22 Nov 2007 16:28:26 +0900 Subject: sh: Bump number of quicklists for SH-5. Sync up with the SH definitions. Signed-off-by: Paul Mundt --- mm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/Kconfig b/mm/Kconfig index 9ef97417a0b..0016ebd4dcb 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -187,7 +187,7 @@ config BOUNCE config NR_QUICK int depends on QUICKLIST - default "2" if (SUPERH && !SUPERH64) + default "2" if SUPERH default "1" config VIRT_TO_BUS -- cgit From c1d171a002942ea2d93b4fbd0c9583c56fce0772 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Wed, 30 Jan 2008 13:30:40 +0100 Subject: x86: randomize brk Randomize the location of the heap (brk) for i386 and x86_64. The range is randomized in the range starting at current brk location up to 0x02000000 offset for both architectures. This, together with pie-executable-randomization.patch and pie-executable-randomization-fix.patch, should make the address space randomization on i386 and x86_64 complete. Arjan says: This is known to break older versions of some emacs variants, whose dumper code assumed that the last variable declared in the program is equal to the start of the dynamically allocated memory region. (The dumper is the code where emacs effectively dumps core at the end of it's compilation stage; this coredump is then loaded as the main program during normal use) iirc this was 5 years or so; we found this way back when I was at RH and we first did the security stuff there (including this brk randomization). It wasn't all variants of emacs, and it got fixed as a result (I vaguely remember that emacs already had code to deal with it for other archs/oses, just ifdeffed wrongly). It's a rare and wrong assumption as a general thing, just on x86 it mostly happened to be true (but to be honest, it'll break too if gcc does something fancy or if the linker does a non-standard order). Still its something we should at least document. Note 2: afaik it only broke the emacs *build*. I'm not 100% sure about that (it IS 5 years ago) though. [ akpm@linux-foundation.org: deuglification ] Signed-off-by: Jiri Kosina Cc: Arjan van de Ven Cc: Roland McGrath Cc: Jakub Jelinek Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- mm/mmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/mmap.c b/mm/mmap.c index bfa389fc6de..d2b6d44962b 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -251,7 +251,8 @@ asmlinkage unsigned long sys_brk(unsigned long brk) * not page aligned -Ram Gupta */ rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; - if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim) + if (rlim < RLIM_INFINITY && (brk - mm->start_brk) + + (mm->end_data - mm->start_data) > rlim) goto out; newbrk = PAGE_ALIGN(brk); -- cgit From 95c354fe9f7d6decc08a92aa26eb233ecc2155bf Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 30 Jan 2008 13:31:20 +0100 Subject: spinlock: lockbreak cleanup The break_lock data structure and code for spinlocks is quite nasty. Not only does it double the size of a spinlock but it changes locking to a potentially less optimal trylock. Put all of that under CONFIG_GENERIC_LOCKBREAK, and introduce a __raw_spin_is_contended that uses the lock data itself to determine whether there are waiters on the lock, to be used if CONFIG_GENERIC_LOCKBREAK is not set. Rename need_lockbreak to spin_needbreak, make it use spin_is_contended to decouple it from the spinlock implementation, and make it typesafe (rwlocks do not have any need_lockbreak sites -- why do they even get bloated up with that break_lock then?). Signed-off-by: Nick Piggin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- mm/memory.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'mm') diff --git a/mm/memory.c b/mm/memory.c index 4b0144b24c1..673ebbf499c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -513,8 +513,7 @@ again: if (progress >= 32) { progress = 0; if (need_resched() || - need_lockbreak(src_ptl) || - need_lockbreak(dst_ptl)) + spin_needbreak(src_ptl) || spin_needbreak(dst_ptl)) break; } if (pte_none(*src_pte)) { @@ -853,7 +852,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, tlb_finish_mmu(*tlbp, tlb_start, start); if (need_resched() || - (i_mmap_lock && need_lockbreak(i_mmap_lock))) { + (i_mmap_lock && spin_needbreak(i_mmap_lock))) { if (i_mmap_lock) { *tlbp = NULL; goto out; @@ -1768,8 +1767,7 @@ again: restart_addr = zap_page_range(vma, start_addr, end_addr - start_addr, details); - need_break = need_resched() || - need_lockbreak(details->i_mmap_lock); + need_break = need_resched() || spin_needbreak(details->i_mmap_lock); if (restart_addr >= end_addr) { /* We have now completed this vma: mark it so */ -- cgit From 03252919b79891063cf99145612360efbdf9500b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 30 Jan 2008 13:33:18 +0100 Subject: x86: print which shared library/executable faulted in segfault etc. messages v3 They now look like: hal-resmgr[13791]: segfault at 3c rip 2b9c8caec182 rsp 7fff1e825d30 error 4 in libacl.so.1.1.0[2b9c8caea000+6000] This makes it easier to pinpoint bugs to specific libraries. And printing the offset into a mapping also always allows to find the correct fault point in a library even with randomized mappings. Previously there was no way to actually find the correct code address inside the randomized mapping. Relies on earlier patch to shorten the printk formats. They are often now longer than 80 characters, but I think that's worth it. [includes fix from Eric Dumazet to check d_path error value] Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- mm/memory.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'mm') diff --git a/mm/memory.c b/mm/memory.c index 673ebbf499c..d902d0e25ed 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2754,3 +2754,34 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in return buf - old_buf; } + +/* + * Print the name of a VMA. + */ +void print_vma_addr(char *prefix, unsigned long ip) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + + down_read(&mm->mmap_sem); + vma = find_vma(mm, ip); + if (vma && vma->vm_file) { + struct file *f = vma->vm_file; + char *buf = (char *)__get_free_page(GFP_KERNEL); + if (buf) { + char *p, *s; + + p = d_path(f->f_dentry, f->f_vfsmnt, buf, PAGE_SIZE); + if (IS_ERR(p)) + p = "?"; + s = strrchr(p, '/'); + if (s) + p = s+1; + printk("%s%s[%lx+%lx]", prefix, p, + vma->vm_start, + vma->vm_end - vma->vm_start); + free_page((unsigned long)buf); + } + } + up_read(¤t->mm->mmap_sem); +} -- cgit