diff options
author | Thorsten Leemhuis <fedora@leemhuis.info> | 2016-09-07 18:34:56 +0200 |
---|---|---|
committer | Thorsten Leemhuis <fedora@leemhuis.info> | 2016-09-07 18:34:56 +0200 |
commit | 372e1be826c02046a5e489641247ad5b3e8645df (patch) | |
tree | 0fa2929b38b48ea48b322be841413b5f43bfd6d4 /0001-cgroup-reduce-read-locked-section-of-cgroup_threadgr.patch | |
parent | a236a78b22a3f14eaaa9ed7931c65d26a11e0680 (diff) | |
parent | 4b8d25bc3d43e4eff06c268281857e654ef6189c (diff) | |
download | kernel-4.7.2-201.vanilla.knurd.1.fc24.tar.gz kernel-4.7.2-201.vanilla.knurd.1.fc24.tar.xz kernel-4.7.2-201.vanilla.knurd.1.fc24.zip |
Merge remote-tracking branch 'origin/f24' into f24-user-thl-vanilla-fedorakernel-4.7.2-201.vanilla.knurd.1.fc24
Diffstat (limited to '0001-cgroup-reduce-read-locked-section-of-cgroup_threadgr.patch')
-rw-r--r-- | 0001-cgroup-reduce-read-locked-section-of-cgroup_threadgr.patch | 112 |
1 files changed, 112 insertions, 0 deletions
diff --git a/0001-cgroup-reduce-read-locked-section-of-cgroup_threadgr.patch b/0001-cgroup-reduce-read-locked-section-of-cgroup_threadgr.patch new file mode 100644 index 000000000..daad25310 --- /dev/null +++ b/0001-cgroup-reduce-read-locked-section-of-cgroup_threadgr.patch @@ -0,0 +1,112 @@ +From 568ac888215c7fb2fabe8ea739b00ec3c1f5d440 Mon Sep 17 00:00:00 2001 +From: Balbir Singh <bsingharora@gmail.com> +Date: Wed, 10 Aug 2016 15:43:06 -0400 +Subject: [PATCH] cgroup: reduce read locked section of + cgroup_threadgroup_rwsem during fork + +cgroup_threadgroup_rwsem is acquired in read mode during process exit +and fork. It is also grabbed in write mode during +__cgroups_proc_write(). I've recently run into a scenario with lots +of memory pressure and OOM and I am beginning to see + +systemd + + __switch_to+0x1f8/0x350 + __schedule+0x30c/0x990 + schedule+0x48/0xc0 + percpu_down_write+0x114/0x170 + __cgroup_procs_write.isra.12+0xb8/0x3c0 + cgroup_file_write+0x74/0x1a0 + kernfs_fop_write+0x188/0x200 + __vfs_write+0x6c/0xe0 + vfs_write+0xc0/0x230 + SyS_write+0x6c/0x110 + system_call+0x38/0xb4 + +This thread is waiting on the reader of cgroup_threadgroup_rwsem to +exit. The reader itself is under memory pressure and has gone into +reclaim after fork. There are times the reader also ends up waiting on +oom_lock as well. + + __switch_to+0x1f8/0x350 + __schedule+0x30c/0x990 + schedule+0x48/0xc0 + jbd2_log_wait_commit+0xd4/0x180 + ext4_evict_inode+0x88/0x5c0 + evict+0xf8/0x2a0 + dispose_list+0x50/0x80 + prune_icache_sb+0x6c/0x90 + super_cache_scan+0x190/0x210 + shrink_slab.part.15+0x22c/0x4c0 + shrink_zone+0x288/0x3c0 + do_try_to_free_pages+0x1dc/0x590 + try_to_free_pages+0xdc/0x260 + __alloc_pages_nodemask+0x72c/0xc90 + alloc_pages_current+0xb4/0x1a0 + page_table_alloc+0xc0/0x170 + __pte_alloc+0x58/0x1f0 + copy_page_range+0x4ec/0x950 + copy_process.isra.5+0x15a0/0x1870 + _do_fork+0xa8/0x4b0 + ppc_clone+0x8/0xc + +In the meanwhile, all processes exiting/forking are blocked almost +stalling the system. + +This patch moves the threadgroup_change_begin from before +cgroup_fork() to just before cgroup_canfork(). There is no nee to +worry about threadgroup changes till the task is actually added to the +threadgroup. This avoids having to call reclaim with +cgroup_threadgroup_rwsem held. + +tj: Subject and description edits. + +Signed-off-by: Balbir Singh <bsingharora@gmail.com> +Acked-by: Zefan Li <lizefan@huawei.com> +Cc: Oleg Nesterov <oleg@redhat.com> +Cc: Andrew Morton <akpm@linux-foundation.org> +Cc: stable@vger.kernel.org # v4.2+ +Signed-off-by: Tejun Heo <tj@kernel.org> +--- + kernel/fork.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/fork.c b/kernel/fork.c +index 52e725d..aaf7823 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -1404,7 +1404,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, + p->real_start_time = ktime_get_boot_ns(); + p->io_context = NULL; + p->audit_context = NULL; +- threadgroup_change_begin(current); + cgroup_fork(p); + #ifdef CONFIG_NUMA + p->mempolicy = mpol_dup(p->mempolicy); +@@ -1556,6 +1555,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, + INIT_LIST_HEAD(&p->thread_group); + p->task_works = NULL; + ++ threadgroup_change_begin(current); + /* + * Ensure that the cgroup subsystem policies allow the new process to be + * forked. It should be noted the the new process's css_set can be changed +@@ -1656,6 +1656,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, + bad_fork_cancel_cgroup: + cgroup_cancel_fork(p); + bad_fork_free_pid: ++ threadgroup_change_end(current); + if (pid != &init_struct_pid) + free_pid(pid); + bad_fork_cleanup_thread: +@@ -1688,7 +1689,6 @@ bad_fork_cleanup_policy: + mpol_put(p->mempolicy); + bad_fork_cleanup_threadgroup_lock: + #endif +- threadgroup_change_end(current); + delayacct_tsk_free(p); + bad_fork_cleanup_count: + atomic_dec(&p->cred->user->processes); +-- +2.7.4 + |