summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--bpf-fix-bpf_jit_limit-knob.patch173
-rw-r--r--gitrev2
-rw-r--r--kernel.spec11
-rw-r--r--sources1
4 files changed, 184 insertions, 3 deletions
diff --git a/bpf-fix-bpf_jit_limit-knob.patch b/bpf-fix-bpf_jit_limit-knob.patch
new file mode 100644
index 000000000..68c9d967c
--- /dev/null
+++ b/bpf-fix-bpf_jit_limit-knob.patch
@@ -0,0 +1,173 @@
+From fdadd04931c2d7cd294dc5b2b342863f94be53a3 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Tue, 11 Dec 2018 12:14:12 +0100
+Subject: bpf: fix bpf_jit_limit knob for PAGE_SIZE >= 64K
+
+Michael and Sandipan report:
+
+ Commit ede95a63b5 introduced a bpf_jit_limit tuneable to limit BPF
+ JIT allocations. At compile time it defaults to PAGE_SIZE * 40000,
+ and is adjusted again at init time if MODULES_VADDR is defined.
+
+ For ppc64 kernels, MODULES_VADDR isn't defined, so we're stuck with
+ the compile-time default at boot-time, which is 0x9c400000 when
+ using 64K page size. This overflows the signed 32-bit bpf_jit_limit
+ value:
+
+ root@ubuntu:/tmp# cat /proc/sys/net/core/bpf_jit_limit
+ -1673527296
+
+ and can cause various unexpected failures throughout the network
+ stack. In one case `strace dhclient eth0` reported:
+
+ setsockopt(5, SOL_SOCKET, SO_ATTACH_FILTER, {len=11, filter=0x105dd27f8},
+ 16) = -1 ENOTSUPP (Unknown error 524)
+
+ and similar failures can be seen with tools like tcpdump. This doesn't
+ always reproduce however, and I'm not sure why. The more consistent
+ failure I've seen is an Ubuntu 18.04 KVM guest booted on a POWER9
+ host would time out on systemd/netplan configuring a virtio-net NIC
+ with no noticeable errors in the logs.
+
+Given this and also given that in near future some architectures like
+arm64 will have a custom area for BPF JIT image allocations we should
+get rid of the BPF_JIT_LIMIT_DEFAULT fallback / default entirely. For
+4.21, we have an overridable bpf_jit_alloc_exec(), bpf_jit_free_exec()
+so therefore add another overridable bpf_jit_alloc_exec_limit() helper
+function which returns the possible size of the memory area for deriving
+the default heuristic in bpf_jit_charge_init().
+
+Like bpf_jit_alloc_exec() and bpf_jit_free_exec(), the new
+bpf_jit_alloc_exec_limit() assumes that module_alloc() is the default
+JIT memory provider, and therefore in case archs implement their custom
+module_alloc() we use MODULES_{END,_VADDR} for limits and otherwise for
+vmalloc_exec() cases like on ppc64 we use VMALLOC_{END,_START}.
+
+Additionally, for archs supporting large page sizes, we should change
+the sysctl to be handled as long to not run into sysctl restrictions
+in future.
+
+Fixes: ede95a63b5e8 ("bpf: add bpf_jit_limit knob to restrict unpriv allocations")
+Reported-by: Sandipan Das <sandipan@linux.ibm.com>
+Reported-by: Michael Roth <mdroth@linux.vnet.ibm.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Tested-by: Michael Roth <mdroth@linux.vnet.ibm.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+---
+ include/linux/filter.h | 2 +-
+ kernel/bpf/core.c | 21 +++++++++++++++------
+ net/core/sysctl_net_core.c | 20 +++++++++++++++++---
+ 3 files changed, 33 insertions(+), 10 deletions(-)
+
+diff --git a/include/linux/filter.h b/include/linux/filter.h
+index 795ff0b869bb..a8b9d90a8042 100644
+--- a/include/linux/filter.h
++++ b/include/linux/filter.h
+@@ -861,7 +861,7 @@ bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
+ extern int bpf_jit_enable;
+ extern int bpf_jit_harden;
+ extern int bpf_jit_kallsyms;
+-extern int bpf_jit_limit;
++extern long bpf_jit_limit;
+
+ typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
+
+diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
+index b1a3545d0ec8..b2890c268cb3 100644
+--- a/kernel/bpf/core.c
++++ b/kernel/bpf/core.c
+@@ -365,13 +365,11 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp)
+ }
+
+ #ifdef CONFIG_BPF_JIT
+-# define BPF_JIT_LIMIT_DEFAULT (PAGE_SIZE * 40000)
+-
+ /* All BPF JIT sysctl knobs here. */
+ int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
+ int bpf_jit_harden __read_mostly;
+ int bpf_jit_kallsyms __read_mostly;
+-int bpf_jit_limit __read_mostly = BPF_JIT_LIMIT_DEFAULT;
++long bpf_jit_limit __read_mostly;
+
+ static __always_inline void
+ bpf_get_prog_addr_region(const struct bpf_prog *prog,
+@@ -580,16 +578,27 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
+
+ static atomic_long_t bpf_jit_current;
+
++/* Can be overridden by an arch's JIT compiler if it has a custom,
++ * dedicated BPF backend memory area, or if neither of the two
++ * below apply.
++ */
++u64 __weak bpf_jit_alloc_exec_limit(void)
++{
+ #if defined(MODULES_VADDR)
++ return MODULES_END - MODULES_VADDR;
++#else
++ return VMALLOC_END - VMALLOC_START;
++#endif
++}
++
+ static int __init bpf_jit_charge_init(void)
+ {
+ /* Only used as heuristic here to derive limit. */
+- bpf_jit_limit = min_t(u64, round_up((MODULES_END - MODULES_VADDR) >> 2,
+- PAGE_SIZE), INT_MAX);
++ bpf_jit_limit = min_t(u64, round_up(bpf_jit_alloc_exec_limit() >> 2,
++ PAGE_SIZE), LONG_MAX);
+ return 0;
+ }
+ pure_initcall(bpf_jit_charge_init);
+-#endif
+
+ static int bpf_jit_charge_modmem(u32 pages)
+ {
+diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
+index 37b4667128a3..d67ec17f2cc8 100644
+--- a/net/core/sysctl_net_core.c
++++ b/net/core/sysctl_net_core.c
+@@ -28,6 +28,8 @@ static int two __maybe_unused = 2;
+ static int min_sndbuf = SOCK_MIN_SNDBUF;
+ static int min_rcvbuf = SOCK_MIN_RCVBUF;
+ static int max_skb_frags = MAX_SKB_FRAGS;
++static long long_one __maybe_unused = 1;
++static long long_max __maybe_unused = LONG_MAX;
+
+ static int net_msg_warn; /* Unused, but still a sysctl */
+
+@@ -289,6 +291,17 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
+
+ return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ }
++
++static int
++proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write,
++ void __user *buffer, size_t *lenp,
++ loff_t *ppos)
++{
++ if (!capable(CAP_SYS_ADMIN))
++ return -EPERM;
++
++ return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
++}
+ #endif
+
+ static struct ctl_table net_core_table[] = {
+@@ -398,10 +411,11 @@ static struct ctl_table net_core_table[] = {
+ {
+ .procname = "bpf_jit_limit",
+ .data = &bpf_jit_limit,
+- .maxlen = sizeof(int),
++ .maxlen = sizeof(long),
+ .mode = 0600,
+- .proc_handler = proc_dointvec_minmax_bpf_restricted,
+- .extra1 = &one,
++ .proc_handler = proc_dolongvec_minmax_bpf_restricted,
++ .extra1 = &long_one,
++ .extra2 = &long_max,
+ },
+ #endif
+ {
+--
+cgit 1.2-0.3.lf.el7
+
diff --git a/gitrev b/gitrev
index fab6f201d..ff20bbe6e 100644
--- a/gitrev
+++ b/gitrev
@@ -1 +1 @@
-7566ec393f4161572ba6f11ad5171fd5d59b0fbd
+ddfbab46539f2d37a9e9d357b054486b51f7dc27
diff --git a/kernel.spec b/kernel.spec
index ec008b001..dd5fc0aad 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -77,7 +77,7 @@ Summary: The Linux kernel
# The rc snapshot level
%global rcrev 7
# The git snapshot level
-%define gitrev 0
+%define gitrev 1
# Set rpm version accordingly
%define rpmversion 4.%{upstream_sublevel}.0
%endif
@@ -130,7 +130,7 @@ Summary: The Linux kernel
# Set debugbuildsenabled to 1 for production (build separate debug kernels)
# and 0 for rawhide (all kernels are debug kernels).
# See also 'make debug' and 'make release'.
-%define debugbuildsenabled 1
+%define debugbuildsenabled 0
# Kernel headers are being split out into a separate package
%if 0%{?fedora}
@@ -629,6 +629,9 @@ Patch504: iio-accel-kxcjk1013-Add-more-hardware-ids.patch
# rhbz 1645070 patch queued upstream for merging into 4.21
Patch505: asus-fx503-keyb.patch
+# rhbz 1647947
+Patch506: bpf-fix-bpf_jit_limit-knob.patch
+
# END OF PATCH DEFINITIONS
%endif
@@ -1904,6 +1907,10 @@ fi
#
#
%changelog
+* Tue Dec 18 2018 Justin M. Forbes <jforbes@fedoraproject.org> - 4.20.0-0.rc7.git1.1
+- Linux v4.20-rc7-6-gddfbab46539f
+- Reenable debugging options.
+
* Mon Dec 17 2018 Justin M. Forbes <jforbes@fedoraproject.org> - 4.20.0-0.rc7.git0.1
- Linux v4.20-rc7
diff --git a/sources b/sources
index 431cf579f..7674e69d3 100644
--- a/sources
+++ b/sources
@@ -1,2 +1,3 @@
SHA512 (linux-4.19.tar.xz) = ab67cc746b375a8b135e8b23e35e1d6787930d19b3c26b2679787d62951cbdbc3bb66f8ededeb9b890e5008b2459397f9018f1a6772fdef67780b06a4cb9f6f4
SHA512 (patch-4.20-rc7.xz) = 84c35b95f08454f3920b1400e6fee8c6f30ebfdcc9a32f447d2124867b22a17da87c0d1496dd22512ddb4d6c0ce9457acddb6d6167e8c673d44b3f2a585486bd
+SHA512 (patch-4.20-rc7-git1.xz) = 1be1f4d521267a23b3682926dd7b6cf638d8bd1073dd14575007b7736714668229fd2e0b6532e50d9ff07a3079210741e3bd37c52ecab9706435db546e495f51