From 4e55c736133f4545a4f1b8b3cbcdbc0fa6787b25 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 25 Aug 2016 07:31:24 -0700 Subject: Fix for TPROXY panic (rhbz 1370061) Fix for known oom regression --- 0001-OOM-detection-regressions-since-4.7.patch | 121 +++++++++++++++++++++++++ kernel-panic-TPROXY-vanilla-4.7.1.patch | 85 +++++++++++++++++ kernel.spec | 10 ++ 3 files changed, 216 insertions(+) create mode 100644 0001-OOM-detection-regressions-since-4.7.patch create mode 100644 kernel-panic-TPROXY-vanilla-4.7.1.patch diff --git a/0001-OOM-detection-regressions-since-4.7.patch b/0001-OOM-detection-regressions-since-4.7.patch new file mode 100644 index 000000000..4616c7f87 --- /dev/null +++ b/0001-OOM-detection-regressions-since-4.7.patch @@ -0,0 +1,121 @@ +From a7f80308bac4013728e33e2bcb9b60eee78f60fb Mon Sep 17 00:00:00 2001 +From: Michal Hocko +Date: Mon, 22 Aug 2016 11:32:49 +0200 +Subject: [PATCH] OOM detection regressions since 4.7 + +Hi, +there have been multiple reports [1][2][3][4][5] about pre-mature OOM +killer invocations since 4.7 which contains oom detection rework. All of +them were for order-2 (kernel stack) alloaction requests failing because +of a high fragmentation and compaction failing to make any forward +progress. While investigating this we have found out that the compaction +just gives up too early. Vlastimil has been working on compaction +improvement for quite some time and his series [6] is already sitting +in mmotm tree. This already helps a lot because it drops some heuristics +which are more aimed at lower latencies for high orders rather than +reliability. Joonsoo has then identified further problem with too many +blocks being marked as unmovable [7] and Vlastimil has prepared a patch +on top of his series [8] which is also in the mmotm tree now. + +That being said, the regression is real and should be fixed for 4.7 +stable users. [6][8] was reported to help and ooms are no longer +reproducible. I know we are quite late (rc3) in 4.8 but I would vote +for mergeing those patches and have them in 4.8. For 4.7 I would go +with a partial revert of the detection rework for high order requests +(see patch below). This patch is really trivial. If those compaction +improvements are just too large for 4.8 then we can use the same patch +as for 4.7 stable for now and revert it in 4.9 after compaction changes +are merged. + +Thoughts? + +[1] http://lkml.kernel.org/r/20160731051121.GB307@x4 +[2] http://lkml.kernel.org/r/201608120901.41463.a.miskiewicz@gmail.com +[3] http://lkml.kernel.org/r/20160801192620.GD31957@dhcp22.suse.cz +[4] https://lists.opensuse.org/opensuse-kernel/2016-08/msg00021.html +[5] https://bugzilla.opensuse.org/show_bug.cgi?id=994066 +[6] http://lkml.kernel.org/r/20160810091226.6709-1-vbabka@suse.cz +[7] http://lkml.kernel.org/r/20160816031222.GC16913@js1304-P5Q-DELUXE +[8] http://lkml.kernel.org/r/f7a9ea9d-bb88-bfd6-e340-3a933559305a@suse.cz +--- + mm/page_alloc.c | 50 ++------------------------------------------------ + 1 file changed, 2 insertions(+), 48 deletions(-) + +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 8b3e134..6e35419 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -3254,53 +3254,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, + return NULL; + } + +-static inline bool +-should_compact_retry(struct alloc_context *ac, int order, int alloc_flags, +- enum compact_result compact_result, enum migrate_mode *migrate_mode, +- int compaction_retries) +-{ +- int max_retries = MAX_COMPACT_RETRIES; +- +- if (!order) +- return false; +- +- /* +- * compaction considers all the zone as desperately out of memory +- * so it doesn't really make much sense to retry except when the +- * failure could be caused by weak migration mode. +- */ +- if (compaction_failed(compact_result)) { +- if (*migrate_mode == MIGRATE_ASYNC) { +- *migrate_mode = MIGRATE_SYNC_LIGHT; +- return true; +- } +- return false; +- } +- +- /* +- * make sure the compaction wasn't deferred or didn't bail out early +- * due to locks contention before we declare that we should give up. +- * But do not retry if the given zonelist is not suitable for +- * compaction. +- */ +- if (compaction_withdrawn(compact_result)) +- return compaction_zonelist_suitable(ac, order, alloc_flags); +- +- /* +- * !costly requests are much more important than __GFP_REPEAT +- * costly ones because they are de facto nofail and invoke OOM +- * killer to move on while costly can fail and users are ready +- * to cope with that. 1/4 retries is rather arbitrary but we +- * would need much more detailed feedback from compaction to +- * make a better decision. +- */ +- if (order > PAGE_ALLOC_COSTLY_ORDER) +- max_retries /= 4; +- if (compaction_retries <= max_retries) +- return true; +- +- return false; +-} + #else + static inline struct page * + __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, +@@ -3311,6 +3264,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, + return NULL; + } + ++#endif /* CONFIG_COMPACTION */ ++ + static inline bool + should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_flags, + enum compact_result compact_result, +@@ -3337,7 +3292,6 @@ should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_fla + } + return false; + } +-#endif /* CONFIG_COMPACTION */ + + /* Perform direct synchronous page reclaim */ + static int +-- +2.7.4 + diff --git a/kernel-panic-TPROXY-vanilla-4.7.1.patch b/kernel-panic-TPROXY-vanilla-4.7.1.patch new file mode 100644 index 000000000..9d045cabe --- /dev/null +++ b/kernel-panic-TPROXY-vanilla-4.7.1.patch @@ -0,0 +1,85 @@ +From patchwork Wed Aug 17 16:04:31 2016 +Content-Type: text/plain; charset="utf-8" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +Subject: kernel panic TPROXY , vanilla 4.7.1 +From: Eric Dumazet +X-Patchwork-Id: 660174 +X-Patchwork-Delegate: davem@davemloft.net +Message-Id: <1471449871.29842.3.camel@edumazet-glaptop3.roam.corp.google.com> +To: Denys Fedoryshchenko +Cc: Linux Kernel Network Developers , + netfilter-devel@vger.kernel.org +Date: Wed, 17 Aug 2016 09:04:31 -0700 + +On Wed, 2016-08-17 at 08:42 -0700, Eric Dumazet wrote: +> On Wed, 2016-08-17 at 17:31 +0300, Denys Fedoryshchenko wrote: +> > Hi! +> > +> > Tried to run squid on latest kernel, and hit a panic +> > Sometimes it just shows warning in dmesg (but doesnt work properly) +> > [ 75.701666] IPv4: Attempt to release TCP socket in state 10 +> > ffff88102d430780 +> > [ 83.866974] squid (2700) used greatest stack depth: 12912 bytes left +> > [ 87.506644] IPv4: Attempt to release TCP socket in state 10 +> > ffff880078a48780 +> > [ 114.704295] IPv4: Attempt to release TCP socket in state 10 +> > ffff881029f8ad00 +> > +> > I cannot catch yet oops/panic message, netconsole not working. +> > +> > After triggering warning message 3 times, i am unable to run squid +> > anymore (without reboot), and in netstat it doesnt show port running. +> > +> > firewall is: +> > *mangle +> > -A PREROUTING -p tcp -m socket -j DIVERT +> > -A PREROUTING -p tcp -m tcp --dport 80 -i eno1 -j TPROXY --on-port 3129 +> > --on-ip 0.0.0.0 --tproxy-mark 0x1/0x1 +> > -A DIVERT -j MARK --set-xmark 0x1/0xffffffff +> > -A DIVERT -j ACCEPT +> > +> > routing +> > ip rule add fwmark 1 lookup 100 +> > ip route add local default dev eno1 table 100 +> > +> > +> > squid config is default with tproxy option +> > http_port 3129 tproxy +> > +> +> Hmppff... sorry for this, I will send a fix. +> +> Thanks for the report ! +> + + +Could you try the following ? + +Thanks ! + + net/netfilter/xt_TPROXY.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c +index 7f4414d26a66..663c4c3c9072 100644 +--- a/net/netfilter/xt_TPROXY.c ++++ b/net/netfilter/xt_TPROXY.c +@@ -127,6 +127,8 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp, + daddr, dport, + in->ifindex); + ++ if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) ++ sk = NULL; + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound +@@ -195,6 +197,8 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp, + daddr, ntohs(dport), + in->ifindex); + ++ if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) ++ sk = NULL; + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound diff --git a/kernel.spec b/kernel.spec index 352d747fb..90fbc8543 100644 --- a/kernel.spec +++ b/kernel.spec @@ -635,6 +635,12 @@ Patch855: aacraid-Check-size-values-after-double-fetch-from-us.patch #rhbz 1365940 Patch856: 0001-udp-fix-poll-issue-with-zero-sized-packets.patch +#rhbz 13700161 +Patch857: kernel-panic-TPROXY-vanilla-4.7.1.patch + +# lkml.kernel.org/r/<20160822093249.GA14916@dhcp22.suse.cz> +Patch858: 0001-OOM-detection-regressions-since-4.7.patch + # END OF PATCH DEFINITIONS %endif @@ -2162,6 +2168,10 @@ fi # # %changelog +* Thu Aug 25 2016 Laura Abbott +- Fix for TPROXY panic (rhbz 1370061) +- Fix for known OOM regression + * Tue Aug 23 2016 Laura Abbott - Fix for inabiltiy to send zero sized UDP packets (rhbz 1365940) -- cgit