summaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c160
1 files changed, 64 insertions, 96 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5c44ed49ca9..0727896a88a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -23,6 +23,7 @@
#include <linux/bootmem.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
+#include <linux/kmemcheck.h>
#include <linux/module.h>
#include <linux/suspend.h>
#include <linux/pagevec.h>
@@ -46,6 +47,7 @@
#include <linux/page-isolation.h>
#include <linux/page_cgroup.h>
#include <linux/debugobjects.h>
+#include <linux/kmemleak.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -149,10 +151,6 @@ static unsigned long __meminitdata dma_reserve;
static int __meminitdata nr_nodemap_entries;
static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
- static unsigned long __meminitdata node_boundary_start_pfn[MAX_NUMNODES];
- static unsigned long __meminitdata node_boundary_end_pfn[MAX_NUMNODES];
-#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
static unsigned long __initdata required_kernelcore;
static unsigned long __initdata required_movablecore;
static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
@@ -331,7 +329,7 @@ static int destroy_compound_page(struct page *page, unsigned long order)
for (i = 1; i < nr_pages; i++) {
struct page *p = page + i;
- if (unlikely(!PageTail(p) | (p->first_page != page))) {
+ if (unlikely(!PageTail(p) || (p->first_page != page))) {
bad_page(page);
bad++;
}
@@ -549,6 +547,8 @@ static void __free_pages_ok(struct page *page, unsigned int order)
int i;
int bad = 0;
+ kmemcheck_free_shadow(page, order);
+
for (i = 0 ; i < (1 << order) ; ++i)
bad += free_pages_check(page + i);
if (bad)
@@ -922,13 +922,10 @@ static void drain_pages(unsigned int cpu)
unsigned long flags;
struct zone *zone;
- for_each_zone(zone) {
+ for_each_populated_zone(zone) {
struct per_cpu_pageset *pset;
struct per_cpu_pages *pcp;
- if (!populated_zone(zone))
- continue;
-
pset = zone_pcp(zone, cpu);
pcp = &pset->pcp;
@@ -1000,6 +997,8 @@ static void free_hot_cold_page(struct page *page, int cold)
struct per_cpu_pages *pcp;
unsigned long flags;
+ kmemcheck_free_shadow(page, 0);
+
if (PageAnon(page))
page->mapping = NULL;
if (free_pages_check(page))
@@ -1053,6 +1052,16 @@ void split_page(struct page *page, unsigned int order)
VM_BUG_ON(PageCompound(page));
VM_BUG_ON(!page_count(page));
+
+#ifdef CONFIG_KMEMCHECK
+ /*
+ * Split shadow pages too, because free(page[0]) would
+ * otherwise free the whole shadow.
+ */
+ if (kmemcheck_page_is_tracked(page))
+ split_page(virt_to_page(page[0].shadow), order);
+#endif
+
for (i = 1; i < (1 << order); i++)
set_page_refcounted(page + i);
}
@@ -1479,6 +1488,8 @@ __alloc_pages_internal(gfp_t gfp_mask, unsigned int order,
unsigned long did_some_progress;
unsigned long pages_reclaimed = 0;
+ lockdep_trace_alloc(gfp_mask);
+
might_sleep_if(wait);
if (should_fail_alloc_page(gfp_mask, order))
@@ -1578,12 +1589,16 @@ nofail_alloc:
*/
cpuset_update_task_memory_state();
p->flags |= PF_MEMALLOC;
+
+ lockdep_set_current_reclaim_state(gfp_mask);
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;
- did_some_progress = try_to_free_pages(zonelist, order, gfp_mask);
+ did_some_progress = try_to_free_pages(zonelist, order,
+ gfp_mask, nodemask);
p->reclaim_state = NULL;
+ lockdep_clear_current_reclaim_state();
p->flags &= ~PF_MEMALLOC;
cond_resched();
@@ -1667,7 +1682,10 @@ nopage:
dump_stack();
show_mem();
}
+ return page;
got_pg:
+ if (kmemcheck_enabled)
+ kmemcheck_pagealloc_alloc(page, order, gfp_mask);
return page;
}
EXPORT_SYMBOL(__alloc_pages_internal);
@@ -1874,10 +1892,7 @@ void show_free_areas(void)
int cpu;
struct zone *zone;
- for_each_zone(zone) {
- if (!populated_zone(zone))
- continue;
-
+ for_each_populated_zone(zone) {
show_node(zone);
printk("%s per-cpu:\n", zone->name);
@@ -1917,12 +1932,9 @@ void show_free_areas(void)
global_page_state(NR_PAGETABLE),
global_page_state(NR_BOUNCE));
- for_each_zone(zone) {
+ for_each_populated_zone(zone) {
int i;
- if (!populated_zone(zone))
- continue;
-
show_node(zone);
printk("%s"
" free:%lukB"
@@ -1962,12 +1974,9 @@ void show_free_areas(void)
printk("\n");
}
- for_each_zone(zone) {
+ for_each_populated_zone(zone) {
unsigned long nr[MAX_ORDER], flags, order, total = 0;
- if (!populated_zone(zone))
- continue;
-
show_node(zone);
printk("%s: ", zone->name);
@@ -2134,7 +2143,7 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)
int n, val;
int min_val = INT_MAX;
int best_node = -1;
- node_to_cpumask_ptr(tmp, 0);
+ const struct cpumask *tmp = cpumask_of_node(0);
/* Use the local node if we haven't already */
if (!node_isset(node, *used_node_mask)) {
@@ -2155,8 +2164,8 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)
val += (n < node);
/* Give preference to headless and unused nodes */
- node_to_cpumask_ptr_next(tmp, n);
- if (!cpus_empty(*tmp))
+ tmp = cpumask_of_node(n);
+ if (!cpumask_empty(tmp))
val += PENALTY_FOR_NODE_WITH_CPUS;
/* Slight preference for less loaded node */
@@ -2687,6 +2696,7 @@ static void __meminit zone_init_free_lists(struct zone *zone)
static int zone_batchsize(struct zone *zone)
{
+#ifdef CONFIG_MMU
int batch;
/*
@@ -2712,9 +2722,26 @@ static int zone_batchsize(struct zone *zone)
* of pages of one half of the possible page colors
* and the other with pages of the other colors.
*/
- batch = (1 << (fls(batch + batch/2)-1)) - 1;
+ batch = rounddown_pow_of_two(batch + batch/2) - 1;
return batch;
+
+#else
+ /* The deferral and batching of frees should be suppressed under NOMMU
+ * conditions.
+ *
+ * The problem is that NOMMU needs to be able to allocate large chunks
+ * of contiguous memory as there's no hardware page translation to
+ * assemble apparent contiguous memory from discontiguous pages.
+ *
+ * Queueing large contiguous runs of pages for batching, however,
+ * causes the pages to actually be freed in smaller chunks. As there
+ * can be a significant delay between the individual batches being
+ * recycled, this leads to the once large chunks of space being
+ * fragmented and becoming unavailable for high-order allocations.
+ */
+ return 0;
+#endif
}
static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
@@ -2779,11 +2806,7 @@ static int __cpuinit process_zones(int cpu)
node_set_state(node, N_CPU); /* this node has a cpu */
- for_each_zone(zone) {
-
- if (!populated_zone(zone))
- continue;
-
+ for_each_populated_zone(zone) {
zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset),
GFP_KERNEL, node);
if (!zone_pcp(zone, cpu))
@@ -3095,64 +3118,6 @@ void __init sparse_memory_present_with_active_regions(int nid)
}
/**
- * push_node_boundaries - Push node boundaries to at least the requested boundary
- * @nid: The nid of the node to push the boundary for
- * @start_pfn: The start pfn of the node
- * @end_pfn: The end pfn of the node
- *
- * In reserve-based hot-add, mem_map is allocated that is unused until hotadd
- * time. Specifically, on x86_64, SRAT will report ranges that can potentially
- * be hotplugged even though no physical memory exists. This function allows
- * an arch to push out the node boundaries so mem_map is allocated that can
- * be used later.
- */
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-void __init push_node_boundaries(unsigned int nid,
- unsigned long start_pfn, unsigned long end_pfn)
-{
- mminit_dprintk(MMINIT_TRACE, "zoneboundary",
- "Entering push_node_boundaries(%u, %lu, %lu)\n",
- nid, start_pfn, end_pfn);
-
- /* Initialise the boundary for this node if necessary */
- if (node_boundary_end_pfn[nid] == 0)
- node_boundary_start_pfn[nid] = -1UL;
-
- /* Update the boundaries */
- if (node_boundary_start_pfn[nid] > start_pfn)
- node_boundary_start_pfn[nid] = start_pfn;
- if (node_boundary_end_pfn[nid] < end_pfn)
- node_boundary_end_pfn[nid] = end_pfn;
-}
-
-/* If necessary, push the node boundary out for reserve hotadd */
-static void __meminit account_node_boundary(unsigned int nid,
- unsigned long *start_pfn, unsigned long *end_pfn)
-{
- mminit_dprintk(MMINIT_TRACE, "zoneboundary",
- "Entering account_node_boundary(%u, %lu, %lu)\n",
- nid, *start_pfn, *end_pfn);
-
- /* Return if boundary information has not been provided */
- if (node_boundary_end_pfn[nid] == 0)
- return;
-
- /* Check the boundaries and update if necessary */
- if (node_boundary_start_pfn[nid] < *start_pfn)
- *start_pfn = node_boundary_start_pfn[nid];
- if (node_boundary_end_pfn[nid] > *end_pfn)
- *end_pfn = node_boundary_end_pfn[nid];
-}
-#else
-void __init push_node_boundaries(unsigned int nid,
- unsigned long start_pfn, unsigned long end_pfn) {}
-
-static void __meminit account_node_boundary(unsigned int nid,
- unsigned long *start_pfn, unsigned long *end_pfn) {}
-#endif
-
-
-/**
* get_pfn_range_for_nid - Return the start and end page frames for a node
* @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned.
* @start_pfn: Passed by reference. On return, it will have the node start_pfn.
@@ -3177,9 +3142,6 @@ void __meminit get_pfn_range_for_nid(unsigned int nid,
if (*start_pfn == -1UL)
*start_pfn = 0;
-
- /* Push the node boundaries out if requested */
- account_node_boundary(nid, start_pfn, end_pfn);
}
/*
@@ -3785,10 +3747,6 @@ void __init remove_all_active_ranges(void)
{
memset(early_node_map, 0, sizeof(early_node_map));
nr_nodemap_entries = 0;
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
- memset(node_boundary_start_pfn, 0, sizeof(node_boundary_start_pfn));
- memset(node_boundary_end_pfn, 0, sizeof(node_boundary_end_pfn));
-#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
}
/* Compare two active node_active_regions */
@@ -4607,6 +4565,16 @@ void *__init alloc_large_system_hash(const char *tablename,
if (_hash_mask)
*_hash_mask = (1 << log2qty) - 1;
+ /*
+ * If hashdist is set, the table allocation is done with __vmalloc()
+ * which invokes the kmemleak_alloc() callback. This function may also
+ * be called before the slab and kmemleak are initialised when
+ * kmemleak simply buffers the request to be executed later
+ * (GFP_ATOMIC flag ignored in this case).
+ */
+ if (!hashdist)
+ kmemleak_alloc(table, size, 1, GFP_ATOMIC);
+
return table;
}