From 922059227ce012ccb60adaefee0e4237f46bee46 Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Mon, 17 Oct 2011 13:24:14 -0400 Subject: Add two patches to fix stalls in khugepaged (rhbz 735946) --- ...imit-direct-reclaim-for-higher-order-allo.patch | 54 +++++++++++++++ ...claim-compaction-if-compaction-can-procee.patch | 81 ++++++++++++++++++++++ TODO | 1 + kernel.spec | 11 +++ 4 files changed, 147 insertions(+) create mode 100644 0001-mm-vmscan-Limit-direct-reclaim-for-higher-order-allo.patch create mode 100644 0002-mm-Abort-reclaim-compaction-if-compaction-can-procee.patch diff --git a/0001-mm-vmscan-Limit-direct-reclaim-for-higher-order-allo.patch b/0001-mm-vmscan-Limit-direct-reclaim-for-higher-order-allo.patch new file mode 100644 index 000000000..77777f012 --- /dev/null +++ b/0001-mm-vmscan-Limit-direct-reclaim-for-higher-order-allo.patch @@ -0,0 +1,54 @@ +From 6b7025ea927d290a59d2772828435c1893f0267f Mon Sep 17 00:00:00 2001 +From: Rik van Riel +Date: Fri, 7 Oct 2011 16:17:22 +0100 +Subject: [PATCH 1/2] mm: vmscan: Limit direct reclaim for higher order + allocations + +When suffering from memory fragmentation due to unfreeable pages, +THP page faults will repeatedly try to compact memory. Due to the +unfreeable pages, compaction fails. + +Needless to say, at that point page reclaim also fails to create +free contiguous 2MB areas. However, that doesn't stop the current +code from trying, over and over again, and freeing a minimum of 4MB +(2UL << sc->order pages) at every single invocation. + +This resulted in my 12GB system having 2-3GB free memory, a +corresponding amount of used swap and very sluggish response times. + +This can be avoided by having the direct reclaim code not reclaim from +zones that already have plenty of free memory available for compaction. + +If compaction still fails due to unmovable memory, doing additional +reclaim will only hurt the system, not help. + +Signed-off-by: Rik van Riel +Signed-off-by: Mel Gorman +--- + mm/vmscan.c | 10 ++++++++++ + 1 files changed, 10 insertions(+), 0 deletions(-) + +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 6072d74..8c03534 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -2022,6 +2022,16 @@ static void shrink_zones(int priority, struct zonelist *zonelist, + continue; + if (zone->all_unreclaimable && priority != DEF_PRIORITY) + continue; /* Let kswapd poll it */ ++ if (COMPACTION_BUILD) { ++ /* ++ * If we already have plenty of memory free ++ * for compaction, don't free any more. ++ */ ++ if (sc->order > PAGE_ALLOC_COSTLY_ORDER && ++ (compaction_suitable(zone, sc->order) || ++ compaction_deferred(zone))) ++ continue; ++ } + /* + * This steals pages from memory cgroups over softlimit + * and returns the number of reclaimed pages and +-- +1.7.6.4 + diff --git a/0002-mm-Abort-reclaim-compaction-if-compaction-can-procee.patch b/0002-mm-Abort-reclaim-compaction-if-compaction-can-procee.patch new file mode 100644 index 000000000..e74b64d91 --- /dev/null +++ b/0002-mm-Abort-reclaim-compaction-if-compaction-can-procee.patch @@ -0,0 +1,81 @@ +From c01043c9aa51a63bd01c60e53494ca4a7e994542 Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Fri, 7 Oct 2011 16:17:23 +0100 +Subject: [PATCH 2/2] mm: Abort reclaim/compaction if compaction can proceed + +If compaction can proceed, shrink_zones() stops doing any work but +the callers still shrink_slab(), raises the priority and potentially +sleeps. This patch aborts direct reclaim/compaction entirely if +compaction can proceed. + +Signed-off-by: Mel Gorman +--- + mm/vmscan.c | 20 ++++++++++++++++---- + 1 files changed, 16 insertions(+), 4 deletions(-) + +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 8c03534..b295a38 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -2000,14 +2000,19 @@ restart: + * + * If a zone is deemed to be full of pinned pages then just give it a light + * scan then give up on it. ++ * ++ * This function returns true if a zone is being reclaimed for a costly ++ * high-order allocation and compaction is either ready to begin or deferred. ++ * This indicates to the caller that it should retry the allocation or fail. + */ +-static void shrink_zones(int priority, struct zonelist *zonelist, ++static bool shrink_zones(int priority, struct zonelist *zonelist, + struct scan_control *sc) + { + struct zoneref *z; + struct zone *zone; + unsigned long nr_soft_reclaimed; + unsigned long nr_soft_scanned; ++ bool should_abort_reclaim = false; + + for_each_zone_zonelist_nodemask(zone, z, zonelist, + gfp_zone(sc->gfp_mask), sc->nodemask) { +@@ -2025,12 +2030,15 @@ static void shrink_zones(int priority, struct zonelist *zonelist, + if (COMPACTION_BUILD) { + /* + * If we already have plenty of memory free +- * for compaction, don't free any more. ++ * for compaction in this zone , don't free any ++ * more. + */ + if (sc->order > PAGE_ALLOC_COSTLY_ORDER && + (compaction_suitable(zone, sc->order) || +- compaction_deferred(zone))) ++ compaction_deferred(zone))) { ++ should_abort_reclaim = true; + continue; ++ } + } + /* + * This steals pages from memory cgroups over softlimit +@@ -2049,6 +2057,8 @@ static void shrink_zones(int priority, struct zonelist *zonelist, + + shrink_zone(priority, zone, sc); + } ++ ++ return should_abort_reclaim; + } + + static bool zone_reclaimable(struct zone *zone) +@@ -2113,7 +2123,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, + sc->nr_scanned = 0; + if (!priority) + disable_swap_token(sc->mem_cgroup); +- shrink_zones(priority, zonelist, sc); ++ if (shrink_zones(priority, zonelist, sc)) ++ break; ++ + /* + * Don't shrink slabs when reclaiming memory from + * over limit cgroups +-- +1.7.6.4 + diff --git a/TODO b/TODO index ff6db7d5e..647d48037 100644 --- a/TODO +++ b/TODO @@ -17,6 +17,7 @@ * add-macbookair41-keyboard.patch * ucvideo-fix-crash-when-linking-entities.patch * mmc-Always-check-for-lower-base-frequency-quirk-for-.patch (also CC'd stable) +* 000[12]-mm-* **** Other stuff that should go upstream (in decreasing likelyhood) ************************************ diff --git a/kernel.spec b/kernel.spec index 9d8f2118d..ced743b4d 100644 --- a/kernel.spec +++ b/kernel.spec @@ -741,6 +741,10 @@ Patch21001: arm-smsc-support-reading-mac-address-from-device-tree.patch #rhbz #722509 Patch21002: mmc-Always-check-for-lower-base-frequency-quirk-for-.patch +#rhbz #735946 +Patch21020: 0001-mm-vmscan-Limit-direct-reclaim-for-higher-order-allo.patch +Patch21021: 0002-mm-Abort-reclaim-compaction-if-compaction-can-procee.patch + %endif BuildRoot: %{_tmppath}/kernel-%{KVERREL}-root @@ -1358,6 +1362,10 @@ ApplyPatch mmc-Always-check-for-lower-base-frequency-quirk-for-.patch # utrace. ApplyPatch utrace.patch +#rhbz #735946 +ApplyPatch 0001-mm-vmscan-Limit-direct-reclaim-for-higher-order-allo.patch +ApplyPatch 0002-mm-Abort-reclaim-compaction-if-compaction-can-procee.patch + # END OF PATCH APPLICATIONS %endif @@ -2066,6 +2074,9 @@ fi # ||----w | # || || %changelog +* Mon Oct 17 2011 Josh Boyer +- Add two patches to fix stalls in khugepaged (rhbz 735946) + * Fri Oct 14 2011 Dave Jones - Disable CONFIG_ACPI_PROCFS_POWER which is supposed to be going away soon. -- cgit