summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--0001-mm-don-t-warn-about-allocations-which-stall-for-too-.patch181
-rw-r--r--kernel.spec4
2 files changed, 185 insertions, 0 deletions
diff --git a/0001-mm-don-t-warn-about-allocations-which-stall-for-too-.patch b/0001-mm-don-t-warn-about-allocations-which-stall-for-too-.patch
new file mode 100644
index 000000000..bc5921caf
--- /dev/null
+++ b/0001-mm-don-t-warn-about-allocations-which-stall-for-too-.patch
@@ -0,0 +1,181 @@
+From 400e22499dd92613821374c8c6c88c7225359980 Mon Sep 17 00:00:00 2001
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Date: Wed, 15 Nov 2017 17:38:37 -0800
+Subject: [PATCH] mm: don't warn about allocations which stall for too long
+
+Commit 63f53dea0c98 ("mm: warn about allocations which stall for too
+long") was a great step for reducing possibility of silent hang up
+problem caused by memory allocation stalls. But this commit reverts it,
+for it is possible to trigger OOM lockup and/or soft lockups when many
+threads concurrently called warn_alloc() (in order to warn about memory
+allocation stalls) due to current implementation of printk(), and it is
+difficult to obtain useful information due to limitation of synchronous
+warning approach.
+
+Current printk() implementation flushes all pending logs using the
+context of a thread which called console_unlock(). printk() should be
+able to flush all pending logs eventually unless somebody continues
+appending to printk() buffer.
+
+Since warn_alloc() started appending to printk() buffer while waiting
+for oom_kill_process() to make forward progress when oom_kill_process()
+is processing pending logs, it became possible for warn_alloc() to force
+oom_kill_process() loop inside printk(). As a result, warn_alloc()
+significantly increased possibility of preventing oom_kill_process()
+from making forward progress.
+
+---------- Pseudo code start ----------
+Before warn_alloc() was introduced:
+
+ retry:
+ if (mutex_trylock(&oom_lock)) {
+ while (atomic_read(&printk_pending_logs) > 0) {
+ atomic_dec(&printk_pending_logs);
+ print_one_log();
+ }
+ // Send SIGKILL here.
+ mutex_unlock(&oom_lock)
+ }
+ goto retry;
+
+After warn_alloc() was introduced:
+
+ retry:
+ if (mutex_trylock(&oom_lock)) {
+ while (atomic_read(&printk_pending_logs) > 0) {
+ atomic_dec(&printk_pending_logs);
+ print_one_log();
+ }
+ // Send SIGKILL here.
+ mutex_unlock(&oom_lock)
+ } else if (waited_for_10seconds()) {
+ atomic_inc(&printk_pending_logs);
+ }
+ goto retry;
+---------- Pseudo code end ----------
+
+Although waited_for_10seconds() becomes true once per 10 seconds,
+unbounded number of threads can call waited_for_10seconds() at the same
+time. Also, since threads doing waited_for_10seconds() keep doing
+almost busy loop, the thread doing print_one_log() can use little CPU
+resource. Therefore, this situation can be simplified like
+
+---------- Pseudo code start ----------
+ retry:
+ if (mutex_trylock(&oom_lock)) {
+ while (atomic_read(&printk_pending_logs) > 0) {
+ atomic_dec(&printk_pending_logs);
+ print_one_log();
+ }
+ // Send SIGKILL here.
+ mutex_unlock(&oom_lock)
+ } else {
+ atomic_inc(&printk_pending_logs);
+ }
+ goto retry;
+---------- Pseudo code end ----------
+
+when printk() is called faster than print_one_log() can process a log.
+
+One of possible mitigation would be to introduce a new lock in order to
+make sure that no other series of printk() (either oom_kill_process() or
+warn_alloc()) can append to printk() buffer when one series of printk()
+(either oom_kill_process() or warn_alloc()) is already in progress.
+
+Such serialization will also help obtaining kernel messages in readable
+form.
+
+---------- Pseudo code start ----------
+ retry:
+ if (mutex_trylock(&oom_lock)) {
+ mutex_lock(&oom_printk_lock);
+ while (atomic_read(&printk_pending_logs) > 0) {
+ atomic_dec(&printk_pending_logs);
+ print_one_log();
+ }
+ // Send SIGKILL here.
+ mutex_unlock(&oom_printk_lock);
+ mutex_unlock(&oom_lock)
+ } else {
+ if (mutex_trylock(&oom_printk_lock)) {
+ atomic_inc(&printk_pending_logs);
+ mutex_unlock(&oom_printk_lock);
+ }
+ }
+ goto retry;
+---------- Pseudo code end ----------
+
+But this commit does not go that direction, for we don't want to
+introduce a new lock dependency, and we unlikely be able to obtain
+useful information even if we serialized oom_kill_process() and
+warn_alloc().
+
+Synchronous approach is prone to unexpected results (e.g. too late [1],
+too frequent [2], overlooked [3]). As far as I know, warn_alloc() never
+helped with providing information other than "something is going wrong".
+I want to consider asynchronous approach which can obtain information
+during stalls with possibly relevant threads (e.g. the owner of
+oom_lock and kswapd-like threads) and serve as a trigger for actions
+(e.g. turn on/off tracepoints, ask libvirt daemon to take a memory dump
+of stalling KVM guest for diagnostic purpose).
+
+This commit temporarily loses ability to report e.g. OOM lockup due to
+unable to invoke the OOM killer due to !__GFP_FS allocation request.
+But asynchronous approach will be able to detect such situation and emit
+warning. Thus, let's remove warn_alloc().
+
+[1] https://bugzilla.kernel.org/show_bug.cgi?id=192981
+[2] http://lkml.kernel.org/r/CAM_iQpWuPVGc2ky8M-9yukECtS+zKjiDasNymX7rMcBjBFyM_A@mail.gmail.com
+[3] commit db73ee0d46379922 ("mm, vmscan: do not loop on too_many_isolated for ever"))
+
+Link: http://lkml.kernel.org/r/1509017339-4802-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Reported-by: Cong Wang <xiyou.wangcong@gmail.com>
+Reported-by: yuwang.yuwang <yuwang.yuwang@alibaba-inc.com>
+Reported-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Cc: Petr Mladek <pmladek@suse.com>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+---
+ mm/page_alloc.c | 10 ----------
+ 1 file changed, 10 deletions(-)
+
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 04bf1ad50144..bd1a686e40fe 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3903,8 +3903,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
+ enum compact_result compact_result;
+ int compaction_retries;
+ int no_progress_loops;
+- unsigned long alloc_start = jiffies;
+- unsigned int stall_timeout = 10 * HZ;
+ unsigned int cpuset_mems_cookie;
+ int reserve_flags;
+
+@@ -4036,14 +4034,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
+ if (!can_direct_reclaim)
+ goto nopage;
+
+- /* Make sure we know about allocations which stall for too long */
+- if (time_after(jiffies, alloc_start + stall_timeout)) {
+- warn_alloc(gfp_mask & ~__GFP_NOWARN, ac->nodemask,
+- "page allocation stalls for %ums, order:%u",
+- jiffies_to_msecs(jiffies-alloc_start), order);
+- stall_timeout += 10 * HZ;
+- }
+-
+ /* Avoid recursion of direct reclaim */
+ if (current->flags & PF_MEMALLOC)
+ goto nopage;
+--
+2.14.3
+
diff --git a/kernel.spec b/kernel.spec
index 42cae8608..70248c224 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -683,6 +683,9 @@ Patch635: Add-support-for-One-by-Wacom-CTL-472-CTL-672.patch
# CVE-2018-5750 rhbz 1539706 1539708
Patch636: ACPI-sbshc-remove-raw-pointer-from-printk-message.patch
+# rhbz 1492664 1492665
+Patch637: 0001-mm-don-t-warn-about-allocations-which-stall-for-too-.patch
+
# END OF PATCH DEFINITIONS
%endif
@@ -2242,6 +2245,7 @@ fi
%changelog
* Mon Jan 29 2018 Justin M. Forbes <jforbes@fedoraproject.org>
- Fix CVE-2018-5750 (rhbz 1539706 1539708)
+- Fix softlockup (rhbz 1492664 1492665)
* Sat Jan 27 2018 Laura Abbott <labbott@fedoraproject.org>
- Add support for Wacom tablet (rhbz 1539238)