summaryrefslogtreecommitdiffstats
path: root/cputimer-cure-lock-inversion.patch
diff options
context:
space:
mode:
Diffstat (limited to 'cputimer-cure-lock-inversion.patch')
-rw-r--r--cputimer-cure-lock-inversion.patch73
1 files changed, 73 insertions, 0 deletions
diff --git a/cputimer-cure-lock-inversion.patch b/cputimer-cure-lock-inversion.patch
new file mode 100644
index 000000000..cebb775bf
--- /dev/null
+++ b/cputimer-cure-lock-inversion.patch
@@ -0,0 +1,73 @@
+Subject: cputimer: Cure lock inversion
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Mon Oct 17 11:50:30 CEST 2011
+
+There's a lock inversion between the cputimer->lock and rq->lock; notably
+the two callchains involved are:
+
+ update_rlimit_cpu()
+ sighand->siglock
+ set_process_cpu_timer()
+ cpu_timer_sample_group()
+ thread_group_cputimer()
+ cputimer->lock
+ thread_group_cputime()
+ task_sched_runtime()
+ ->pi_lock
+ rq->lock
+
+ scheduler_tick()
+ rq->lock
+ task_tick_fair()
+ update_curr()
+ account_group_exec()
+ cputimer->lock
+
+Where the first one is enabling a CLOCK_PROCESS_CPUTIME_ID timer, and
+the second one is keeping up-to-date.
+
+This problem was introduced by e8abccb7193 ("posix-cpu-timers: Cure
+SMP accounting oddities").
+
+Cure the problem by removing the cputimer->lock and rq->lock nesting,
+this leaves concurrent enablers doing duplicate work, but the time
+wasted should be on the same order otherwise wasted spinning on the
+lock and the greater-than assignment filter should ensure we preserve
+monotonicity.
+
+Reported-by: Dave Jones <davej@redhat.com>
+Reported-by: Simon Kirby <sim@hostway.ca>
+Cc: stable@kernel.org
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+---
+ kernel/posix-cpu-timers.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+Index: linux-2.6/kernel/posix-cpu-timers.c
+===================================================================
+--- linux-2.6.orig/kernel/posix-cpu-timers.c
++++ linux-2.6/kernel/posix-cpu-timers.c
+@@ -274,9 +274,7 @@ void thread_group_cputimer(struct task_s
+ struct task_cputime sum;
+ unsigned long flags;
+
+- spin_lock_irqsave(&cputimer->lock, flags);
+ if (!cputimer->running) {
+- cputimer->running = 1;
+ /*
+ * The POSIX timer interface allows for absolute time expiry
+ * values through the TIMER_ABSTIME flag, therefore we have
+@@ -284,8 +282,11 @@ void thread_group_cputimer(struct task_s
+ * it.
+ */
+ thread_group_cputime(tsk, &sum);
++ spin_lock_irqsave(&cputimer->lock, flags);
++ cputimer->running = 1;
+ update_gt_cputime(&cputimer->cputime, &sum);
+- }
++ } else
++ spin_lock_irqsave(&cputimer->lock, flags);
+ *times = cputimer->cputime;
+ spin_unlock_irqrestore(&cputimer->lock, flags);
+ }
+