diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memory.c | 5 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 1 | ||||
-rw-r--r-- | mm/mempolicy.c | 30 | ||||
-rw-r--r-- | mm/page_alloc.c | 17 | ||||
-rw-r--r-- | mm/rmap.c | 3 | ||||
-rw-r--r-- | mm/slab.c | 96 | ||||
-rw-r--r-- | mm/swap.c | 28 | ||||
-rw-r--r-- | mm/vmscan.c | 21 |
8 files changed, 154 insertions, 47 deletions
diff --git a/mm/memory.c b/mm/memory.c index 9abc6008544..85e80a57db2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -623,11 +623,12 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, (*zap_work)--; continue; } + + (*zap_work) -= PAGE_SIZE; + if (pte_present(ptent)) { struct page *page; - (*zap_work) -= PAGE_SIZE; - page = vm_normal_page(vma, addr, ptent); if (unlikely(details) && page) { /* diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index a918f77f02f..1fe76d963ac 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -130,6 +130,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) onlined_pages++; } zone->present_pages += onlined_pages; + zone->zone_pgdat->node_present_pages += onlined_pages; setup_per_zone_pages_min(); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d80fa7d8f72..b21869a39f0 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -330,9 +330,19 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, int err; struct vm_area_struct *first, *vma, *prev; - /* Clear the LRU lists so pages can be isolated */ - if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { + /* Must have swap device for migration */ + if (nr_swap_pages <= 0) + return ERR_PTR(-ENODEV); + + /* + * Clear the LRU lists so pages can be isolated. + * Note that pages may be moved off the LRU after we have + * drained them. Those pages will fail to migrate like other + * pages that may be busy. + */ lru_add_drain_all(); + } first = find_vma(mm, start); if (!first) @@ -748,7 +758,7 @@ long do_mbind(unsigned long start, unsigned long len, MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) || mode > MPOL_MAX) return -EINVAL; - if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_RESOURCE)) + if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE)) return -EPERM; if (start & ~PAGE_MASK) @@ -942,20 +952,20 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, */ if ((current->euid != task->suid) && (current->euid != task->uid) && (current->uid != task->suid) && (current->uid != task->uid) && - !capable(CAP_SYS_ADMIN)) { + !capable(CAP_SYS_NICE)) { err = -EPERM; goto out; } task_nodes = cpuset_mems_allowed(task); /* Is the user allowed to access the target nodes? */ - if (!nodes_subset(new, task_nodes) && !capable(CAP_SYS_ADMIN)) { + if (!nodes_subset(new, task_nodes) && !capable(CAP_SYS_NICE)) { err = -EPERM; goto out; } err = do_migrate_pages(mm, &old, &new, - capable(CAP_SYS_ADMIN) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE); + capable(CAP_SYS_NICE) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE); out: mmput(mm); return err; @@ -1789,6 +1799,7 @@ static void gather_stats(struct page *page, void *private, int pte_dirty) cond_resched(); } +#ifdef CONFIG_HUGETLB_PAGE static void check_huge_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct numa_maps *md) @@ -1814,6 +1825,13 @@ static void check_huge_range(struct vm_area_struct *vma, gather_stats(page, md, pte_dirty(*ptep)); } } +#else +static inline void check_huge_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end, + struct numa_maps *md) +{ +} +#endif int show_numa_map(struct seq_file *m, void *v) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 791690d7d3f..234bd4895d1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -590,21 +590,20 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, } #ifdef CONFIG_NUMA -/* Called from the slab reaper to drain remote pagesets */ -void drain_remote_pages(void) +/* + * Called from the slab reaper to drain pagesets on a particular node that + * belong to the currently executing processor. + */ +void drain_node_pages(int nodeid) { - struct zone *zone; - int i; + int i, z; unsigned long flags; local_irq_save(flags); - for_each_zone(zone) { + for (z = 0; z < MAX_NR_ZONES; z++) { + struct zone *zone = NODE_DATA(nodeid)->node_zones + z; struct per_cpu_pageset *pset; - /* Do not drain local pagesets */ - if (zone->zone_pgdat->node_id == numa_node_id()) - continue; - pset = zone_pcp(zone, smp_processor_id()); for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { struct per_cpu_pages *pcp; diff --git a/mm/rmap.c b/mm/rmap.c index d8ce5ff6145..67f0e20b101 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -537,9 +537,6 @@ void page_add_new_anon_rmap(struct page *page, */ void page_add_file_rmap(struct page *page) { - BUG_ON(PageAnon(page)); - BUG_ON(!pfn_valid(page_to_pfn(page))); - if (atomic_inc_and_test(&page->_mapcount)) __inc_page_state(nr_mapped); } diff --git a/mm/slab.c b/mm/slab.c index f2e92dc1c9c..d0bd7f07ab0 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -789,6 +789,47 @@ static void __slab_error(const char *function, struct kmem_cache *cachep, char * dump_stack(); } +#ifdef CONFIG_NUMA +/* + * Special reaping functions for NUMA systems called from cache_reap(). + * These take care of doing round robin flushing of alien caches (containing + * objects freed on different nodes from which they were allocated) and the + * flushing of remote pcps by calling drain_node_pages. + */ +static DEFINE_PER_CPU(unsigned long, reap_node); + +static void init_reap_node(int cpu) +{ + int node; + + node = next_node(cpu_to_node(cpu), node_online_map); + if (node == MAX_NUMNODES) + node = 0; + + __get_cpu_var(reap_node) = node; +} + +static void next_reap_node(void) +{ + int node = __get_cpu_var(reap_node); + + /* + * Also drain per cpu pages on remote zones + */ + if (node != numa_node_id()) + drain_node_pages(node); + + node = next_node(node, node_online_map); + if (unlikely(node >= MAX_NUMNODES)) + node = first_node(node_online_map); + __get_cpu_var(reap_node) = node; +} + +#else +#define init_reap_node(cpu) do { } while (0) +#define next_reap_node(void) do { } while (0) +#endif + /* * Initiate the reap timer running on the target CPU. We run at around 1 to 2Hz * via the workqueue/eventd. @@ -806,6 +847,7 @@ static void __devinit start_cpu_timer(int cpu) * at that time. */ if (keventd_up() && reap_work->func == NULL) { + init_reap_node(cpu); INIT_WORK(reap_work, cache_reap, NULL); schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu); } @@ -884,6 +926,23 @@ static void __drain_alien_cache(struct kmem_cache *cachep, } } +/* + * Called from cache_reap() to regularly drain alien caches round robin. + */ +static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) +{ + int node = __get_cpu_var(reap_node); + + if (l3->alien) { + struct array_cache *ac = l3->alien[node]; + if (ac && ac->avail) { + spin_lock_irq(&ac->lock); + __drain_alien_cache(cachep, ac, node); + spin_unlock_irq(&ac->lock); + } + } +} + static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien) { int i = 0; @@ -902,6 +961,7 @@ static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **al #else #define drain_alien_cache(cachep, alien) do { } while (0) +#define reap_alien(cachep, l3) do { } while (0) static inline struct array_cache **alloc_alien_cache(int node, int limit) { @@ -1124,6 +1184,7 @@ void __init kmem_cache_init(void) struct cache_sizes *sizes; struct cache_names *names; int i; + int order; for (i = 0; i < NUM_INIT_LISTS; i++) { kmem_list3_init(&initkmem_list3[i]); @@ -1167,11 +1228,15 @@ void __init kmem_cache_init(void) cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, cache_line_size()); - cache_estimate(0, cache_cache.buffer_size, cache_line_size(), 0, - &left_over, &cache_cache.num); + for (order = 0; order < MAX_ORDER; order++) { + cache_estimate(order, cache_cache.buffer_size, + cache_line_size(), 0, &left_over, &cache_cache.num); + if (cache_cache.num) + break; + } if (!cache_cache.num) BUG(); - + cache_cache.gfporder = order; cache_cache.colour = left_over / cache_cache.colour_off; cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + sizeof(struct slab), cache_line_size()); @@ -1648,6 +1713,14 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep, left_over = remainder; /* + * A VFS-reclaimable slab tends to have most allocations + * as GFP_NOFS and we really don't want to have to be allocating + * higher-order pages when we are unable to shrink dcache. + */ + if (flags & SLAB_RECLAIM_ACCOUNT) + break; + + /* * Large number of objects is good, but very large slabs are * currently bad for the gfp()s. */ @@ -1869,17 +1942,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, size = ALIGN(size, align); - if ((flags & SLAB_RECLAIM_ACCOUNT) && size <= PAGE_SIZE) { - /* - * A VFS-reclaimable slab tends to have most allocations - * as GFP_NOFS and we really don't want to have to be allocating - * higher-order pages when we are unable to shrink dcache. - */ - cachep->gfporder = 0; - cache_estimate(cachep->gfporder, size, align, flags, - &left_over, &cachep->num); - } else - left_over = calculate_slab_order(cachep, size, align, flags); + left_over = calculate_slab_order(cachep, size, align, flags); if (!cachep->num) { printk("kmem_cache_create: couldn't create cache %s.\n", name); @@ -3494,8 +3557,7 @@ static void cache_reap(void *unused) check_irq_on(); l3 = searchp->nodelists[numa_node_id()]; - if (l3->alien) - drain_alien_cache(searchp, l3->alien); + reap_alien(searchp, l3); spin_lock_irq(&l3->list_lock); drain_array_locked(searchp, cpu_cache_get(searchp), 0, @@ -3545,7 +3607,7 @@ static void cache_reap(void *unused) } check_irq_on(); mutex_unlock(&cache_chain_mutex); - drain_remote_pages(); + next_reap_node(); /* Setup the next iteration */ schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); } diff --git a/mm/swap.c b/mm/swap.c index cce3dda59c5..b524ea90bdd 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -393,7 +393,8 @@ void pagevec_strip(struct pagevec *pvec) struct page *page = pvec->pages[i]; if (PagePrivate(page) && !TestSetPageLocked(page)) { - try_to_release_page(page, 0); + if (PagePrivate(page)) + try_to_release_page(page, 0); unlock_page(page); } } @@ -489,13 +490,34 @@ void percpu_counter_mod(struct percpu_counter *fbc, long amount) if (count >= FBC_BATCH || count <= -FBC_BATCH) { spin_lock(&fbc->lock); fbc->count += count; + *pcount = 0; spin_unlock(&fbc->lock); - count = 0; + } else { + *pcount = count; } - *pcount = count; put_cpu(); } EXPORT_SYMBOL(percpu_counter_mod); + +/* + * Add up all the per-cpu counts, return the result. This is a more accurate + * but much slower version of percpu_counter_read_positive() + */ +long percpu_counter_sum(struct percpu_counter *fbc) +{ + long ret; + int cpu; + + spin_lock(&fbc->lock); + ret = fbc->count; + for_each_cpu(cpu) { + long *pcount = per_cpu_ptr(fbc->counters, cpu); + ret += *pcount; + } + spin_unlock(&fbc->lock); + return ret < 0 ? 0 : ret; +} +EXPORT_SYMBOL(percpu_counter_sum); #endif /* diff --git a/mm/vmscan.c b/mm/vmscan.c index b0af7593d01..4fe7e3aa02e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -700,7 +700,7 @@ int migrate_page_remove_references(struct page *newpage, * the page. */ if (!mapping || page_mapcount(page) + nr_refs != page_count(page)) - return 1; + return -EAGAIN; /* * Establish swap ptes for anonymous pages or destroy pte @@ -721,13 +721,15 @@ int migrate_page_remove_references(struct page *newpage, * If the page was not migrated then the PageSwapCache bit * is still set and the operation may continue. */ - try_to_unmap(page, 1); + if (try_to_unmap(page, 1) == SWAP_FAIL) + /* A vma has VM_LOCKED set -> Permanent failure */ + return -EPERM; /* * Give up if we were unable to remove all mappings. */ if (page_mapcount(page)) - return 1; + return -EAGAIN; write_lock_irq(&mapping->tree_lock); @@ -738,7 +740,7 @@ int migrate_page_remove_references(struct page *newpage, if (!page_mapping(page) || page_count(page) != nr_refs || *radix_pointer != page) { write_unlock_irq(&mapping->tree_lock); - return 1; + return -EAGAIN; } /* @@ -813,10 +815,14 @@ EXPORT_SYMBOL(migrate_page_copy); */ int migrate_page(struct page *newpage, struct page *page) { + int rc; + BUG_ON(PageWriteback(page)); /* Writeback must be complete */ - if (migrate_page_remove_references(newpage, page, 2)) - return -EAGAIN; + rc = migrate_page_remove_references(newpage, page, 2); + + if (rc) + return rc; migrate_page_copy(newpage, page); @@ -1883,7 +1889,8 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) if (!(gfp_mask & __GFP_WAIT) || zone->all_unreclaimable || - atomic_read(&zone->reclaim_in_progress) > 0) + atomic_read(&zone->reclaim_in_progress) > 0 || + (p->flags & PF_MEMALLOC)) return 0; node_id = zone->zone_pgdat->node_id; |