diff options
-rw-r--r-- | 0001-mm-don-t-warn-about-allocations-which-stall-for-too-.patch | 181 | ||||
-rw-r--r-- | kernel.spec | 4 |
2 files changed, 185 insertions, 0 deletions
diff --git a/0001-mm-don-t-warn-about-allocations-which-stall-for-too-.patch b/0001-mm-don-t-warn-about-allocations-which-stall-for-too-.patch new file mode 100644 index 000000000..bc5921caf --- /dev/null +++ b/0001-mm-don-t-warn-about-allocations-which-stall-for-too-.patch @@ -0,0 +1,181 @@ +From 400e22499dd92613821374c8c6c88c7225359980 Mon Sep 17 00:00:00 2001 +From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> +Date: Wed, 15 Nov 2017 17:38:37 -0800 +Subject: [PATCH] mm: don't warn about allocations which stall for too long + +Commit 63f53dea0c98 ("mm: warn about allocations which stall for too +long") was a great step for reducing possibility of silent hang up +problem caused by memory allocation stalls. But this commit reverts it, +for it is possible to trigger OOM lockup and/or soft lockups when many +threads concurrently called warn_alloc() (in order to warn about memory +allocation stalls) due to current implementation of printk(), and it is +difficult to obtain useful information due to limitation of synchronous +warning approach. + +Current printk() implementation flushes all pending logs using the +context of a thread which called console_unlock(). printk() should be +able to flush all pending logs eventually unless somebody continues +appending to printk() buffer. + +Since warn_alloc() started appending to printk() buffer while waiting +for oom_kill_process() to make forward progress when oom_kill_process() +is processing pending logs, it became possible for warn_alloc() to force +oom_kill_process() loop inside printk(). As a result, warn_alloc() +significantly increased possibility of preventing oom_kill_process() +from making forward progress. + +---------- Pseudo code start ---------- +Before warn_alloc() was introduced: + + retry: + if (mutex_trylock(&oom_lock)) { + while (atomic_read(&printk_pending_logs) > 0) { + atomic_dec(&printk_pending_logs); + print_one_log(); + } + // Send SIGKILL here. + mutex_unlock(&oom_lock) + } + goto retry; + +After warn_alloc() was introduced: + + retry: + if (mutex_trylock(&oom_lock)) { + while (atomic_read(&printk_pending_logs) > 0) { + atomic_dec(&printk_pending_logs); + print_one_log(); + } + // Send SIGKILL here. + mutex_unlock(&oom_lock) + } else if (waited_for_10seconds()) { + atomic_inc(&printk_pending_logs); + } + goto retry; +---------- Pseudo code end ---------- + +Although waited_for_10seconds() becomes true once per 10 seconds, +unbounded number of threads can call waited_for_10seconds() at the same +time. Also, since threads doing waited_for_10seconds() keep doing +almost busy loop, the thread doing print_one_log() can use little CPU +resource. Therefore, this situation can be simplified like + +---------- Pseudo code start ---------- + retry: + if (mutex_trylock(&oom_lock)) { + while (atomic_read(&printk_pending_logs) > 0) { + atomic_dec(&printk_pending_logs); + print_one_log(); + } + // Send SIGKILL here. + mutex_unlock(&oom_lock) + } else { + atomic_inc(&printk_pending_logs); + } + goto retry; +---------- Pseudo code end ---------- + +when printk() is called faster than print_one_log() can process a log. + +One of possible mitigation would be to introduce a new lock in order to +make sure that no other series of printk() (either oom_kill_process() or +warn_alloc()) can append to printk() buffer when one series of printk() +(either oom_kill_process() or warn_alloc()) is already in progress. + +Such serialization will also help obtaining kernel messages in readable +form. + +---------- Pseudo code start ---------- + retry: + if (mutex_trylock(&oom_lock)) { + mutex_lock(&oom_printk_lock); + while (atomic_read(&printk_pending_logs) > 0) { + atomic_dec(&printk_pending_logs); + print_one_log(); + } + // Send SIGKILL here. + mutex_unlock(&oom_printk_lock); + mutex_unlock(&oom_lock) + } else { + if (mutex_trylock(&oom_printk_lock)) { + atomic_inc(&printk_pending_logs); + mutex_unlock(&oom_printk_lock); + } + } + goto retry; +---------- Pseudo code end ---------- + +But this commit does not go that direction, for we don't want to +introduce a new lock dependency, and we unlikely be able to obtain +useful information even if we serialized oom_kill_process() and +warn_alloc(). + +Synchronous approach is prone to unexpected results (e.g. too late [1], +too frequent [2], overlooked [3]). As far as I know, warn_alloc() never +helped with providing information other than "something is going wrong". +I want to consider asynchronous approach which can obtain information +during stalls with possibly relevant threads (e.g. the owner of +oom_lock and kswapd-like threads) and serve as a trigger for actions +(e.g. turn on/off tracepoints, ask libvirt daemon to take a memory dump +of stalling KVM guest for diagnostic purpose). + +This commit temporarily loses ability to report e.g. OOM lockup due to +unable to invoke the OOM killer due to !__GFP_FS allocation request. +But asynchronous approach will be able to detect such situation and emit +warning. Thus, let's remove warn_alloc(). + +[1] https://bugzilla.kernel.org/show_bug.cgi?id=192981 +[2] http://lkml.kernel.org/r/CAM_iQpWuPVGc2ky8M-9yukECtS+zKjiDasNymX7rMcBjBFyM_A@mail.gmail.com +[3] commit db73ee0d46379922 ("mm, vmscan: do not loop on too_many_isolated for ever")) + +Link: http://lkml.kernel.org/r/1509017339-4802-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp +Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> +Reported-by: Cong Wang <xiyou.wangcong@gmail.com> +Reported-by: yuwang.yuwang <yuwang.yuwang@alibaba-inc.com> +Reported-by: Johannes Weiner <hannes@cmpxchg.org> +Acked-by: Michal Hocko <mhocko@suse.com> +Acked-by: Johannes Weiner <hannes@cmpxchg.org> +Cc: Vlastimil Babka <vbabka@suse.cz> +Cc: Mel Gorman <mgorman@suse.de> +Cc: Dave Hansen <dave.hansen@intel.com> +Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> +Cc: Petr Mladek <pmladek@suse.com> +Cc: Steven Rostedt <rostedt@goodmis.org> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> + +Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> +--- + mm/page_alloc.c | 10 ---------- + 1 file changed, 10 deletions(-) + +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 04bf1ad50144..bd1a686e40fe 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -3903,8 +3903,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, + enum compact_result compact_result; + int compaction_retries; + int no_progress_loops; +- unsigned long alloc_start = jiffies; +- unsigned int stall_timeout = 10 * HZ; + unsigned int cpuset_mems_cookie; + int reserve_flags; + +@@ -4036,14 +4034,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, + if (!can_direct_reclaim) + goto nopage; + +- /* Make sure we know about allocations which stall for too long */ +- if (time_after(jiffies, alloc_start + stall_timeout)) { +- warn_alloc(gfp_mask & ~__GFP_NOWARN, ac->nodemask, +- "page allocation stalls for %ums, order:%u", +- jiffies_to_msecs(jiffies-alloc_start), order); +- stall_timeout += 10 * HZ; +- } +- + /* Avoid recursion of direct reclaim */ + if (current->flags & PF_MEMALLOC) + goto nopage; +-- +2.14.3 + diff --git a/kernel.spec b/kernel.spec index 42cae8608..70248c224 100644 --- a/kernel.spec +++ b/kernel.spec @@ -683,6 +683,9 @@ Patch635: Add-support-for-One-by-Wacom-CTL-472-CTL-672.patch # CVE-2018-5750 rhbz 1539706 1539708 Patch636: ACPI-sbshc-remove-raw-pointer-from-printk-message.patch +# rhbz 1492664 1492665 +Patch637: 0001-mm-don-t-warn-about-allocations-which-stall-for-too-.patch + # END OF PATCH DEFINITIONS %endif @@ -2242,6 +2245,7 @@ fi %changelog * Mon Jan 29 2018 Justin M. Forbes <jforbes@fedoraproject.org> - Fix CVE-2018-5750 (rhbz 1539706 1539708) +- Fix softlockup (rhbz 1492664 1492665) * Sat Jan 27 2018 Laura Abbott <labbott@fedoraproject.org> - Add support for Wacom tablet (rhbz 1539238) |