From e761b7725234276a802322549cee5255305a0930 Mon Sep 17 00:00:00 2001 From: Max Krasnyansky Date: Tue, 15 Jul 2008 04:43:49 -0700 Subject: cpu hotplug, sched: Introduce cpu_active_map and redo sched domain managment (take 2) This is based on Linus' idea of creating cpu_active_map that prevents scheduler load balancer from migrating tasks to the cpu that is going down. It allows us to simplify domain management code and avoid unecessary domain rebuilds during cpu hotplug event handling. Please ignore the cpusets part for now. It needs some more work in order to avoid crazy lock nesting. Although I did simplfy and unify domain reinitialization logic. We now simply call partition_sched_domains() in all the cases. This means that we're using exact same code paths as in cpusets case and hence the test below cover cpusets too. Cpuset changes to make rebuild_sched_domains() callable from various contexts are in the separate patch (right next after this one). This not only boots but also easily handles while true; do make clean; make -j 8; done and while true; do on-off-cpu 1; done at the same time. (on-off-cpu 1 simple does echo 0/1 > /sys/.../cpu1/online thing). Suprisingly the box (dual-core Core2) is quite usable. In fact I'm typing this on right now in gnome-terminal and things are moving just fine. Also this is running with most of the debug features enabled (lockdep, mutex, etc) no BUG_ONs or lockdep complaints so far. I believe I addressed all of the Dmitry's comments for original Linus' version. I changed both fair and rt balancer to mask out non-active cpus. And replaced cpu_is_offline() with !cpu_active() in the main scheduler code where it made sense (to me). Signed-off-by: Max Krasnyanskiy Acked-by: Linus Torvalds Acked-by: Peter Zijlstra Acked-by: Gregory Haskins Cc: dmitry.adamushko@gmail.com Cc: pj@sgi.com Signed-off-by: Ingo Molnar --- include/linux/cpumask.h | 6 +++++- include/linux/cpuset.h | 7 +++++++ 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index c24875bd9c5..d614d247279 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -359,13 +359,14 @@ static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp, /* * The following particular system cpumasks and operations manage - * possible, present and online cpus. Each of them is a fixed size + * possible, present, active and online cpus. Each of them is a fixed size * bitmap of size NR_CPUS. * * #ifdef CONFIG_HOTPLUG_CPU * cpu_possible_map - has bit 'cpu' set iff cpu is populatable * cpu_present_map - has bit 'cpu' set iff cpu is populated * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler + * cpu_active_map - has bit 'cpu' set iff cpu available to migration * #else * cpu_possible_map - has bit 'cpu' set iff cpu is populated * cpu_present_map - copy of cpu_possible_map @@ -416,6 +417,7 @@ static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp, extern cpumask_t cpu_possible_map; extern cpumask_t cpu_online_map; extern cpumask_t cpu_present_map; +extern cpumask_t cpu_active_map; #if NR_CPUS > 1 #define num_online_cpus() cpus_weight(cpu_online_map) @@ -424,6 +426,7 @@ extern cpumask_t cpu_present_map; #define cpu_online(cpu) cpu_isset((cpu), cpu_online_map) #define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map) #define cpu_present(cpu) cpu_isset((cpu), cpu_present_map) +#define cpu_active(cpu) cpu_isset((cpu), cpu_active_map) #else #define num_online_cpus() 1 #define num_possible_cpus() 1 @@ -431,6 +434,7 @@ extern cpumask_t cpu_present_map; #define cpu_online(cpu) ((cpu) == 0) #define cpu_possible(cpu) ((cpu) == 0) #define cpu_present(cpu) ((cpu) == 0) +#define cpu_active(cpu) ((cpu) == 0) #endif #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 038578362b4..e8f450c499b 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -78,6 +78,8 @@ extern void cpuset_track_online_nodes(void); extern int current_cpuset_is_being_rebound(void); +extern void rebuild_sched_domains(void); + #else /* !CONFIG_CPUSETS */ static inline int cpuset_init_early(void) { return 0; } @@ -156,6 +158,11 @@ static inline int current_cpuset_is_being_rebound(void) return 0; } +static inline void rebuild_sched_domains(void) +{ + partition_sched_domains(0, NULL, NULL); +} + #endif /* !CONFIG_CPUSETS */ #endif /* _LINUX_CPUSET_H */ -- cgit From 1b427c153a08fdbc092c2bdbf845b92fda58d857 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 18 Jul 2008 14:01:39 +0200 Subject: sched: fix build error, provide partition_sched_domains() unconditionally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit provide an empty partition_sched_domains() definition for the UP case: include/linux/cpuset.h: In function ‘rebuild_sched_domains': include/linux/cpuset.h:163: error: implicit declaration of function ‘partition_sched_domains' Signed-off-by: Ingo Molnar --- include/linux/sched.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 1941d8b5cf1..26da921530f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -824,7 +824,16 @@ extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, struct sched_domain_attr *dattr_new); extern int arch_reinit_sched_domains(void); -#endif /* CONFIG_SMP */ +#else /* CONFIG_SMP */ + +struct sched_domain_attr; + +static inline void +partition_sched_domains(int ndoms_new, cpumask_t *doms_new, + struct sched_domain_attr *dattr_new) +{ +} +#endif /* !CONFIG_SMP */ struct io_context; /* See blkdev.h */ #define NGROUPS_SMALL 32 -- cgit From 31656519e132f6612584815f128c83976a9aaaef Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 18 Jul 2008 18:01:23 +0200 Subject: sched, x86: clean up hrtick implementation random uvesafb failures were reported against Gentoo: http://bugs.gentoo.org/show_bug.cgi?id=222799 and Mihai Moldovan bisected it back to: > 8f4d37ec073c17e2d4aa8851df5837d798606d6f is first bad commit > commit 8f4d37ec073c17e2d4aa8851df5837d798606d6f > Author: Peter Zijlstra > Date: Fri Jan 25 21:08:29 2008 +0100 > > sched: high-res preemption tick Linus suspected it to be hrtick + vm86 interaction and observed: > Btw, Peter, Ingo: I think that commit is doing bad things. They aren't > _incorrect_ per se, but they are definitely bad. > > Why? > > Using random _TIF_WORK_MASK flags is really impolite for doing > "scheduling" work. There's a reason that arch/x86/kernel/entry_32.S > special-cases the _TIF_NEED_RESCHED flag: we don't want to exit out of > vm86 mode unnecessarily. > > See the "work_notifysig_v86" label, and how it does that > "save_v86_state()" thing etc etc. Right, I never liked having to fiddle with those TIF flags. Initially I needed it because the hrtimer base lock could not nest in the rq lock. That however is fixed these days. Currently the only reason left to fiddle with the TIF flags is remote wakeups. We cannot program a remote cpu's hrtimer. I've been thinking about using the new and improved IPI function call stuff to implement hrtimer_start_on(). However that does require that smp_call_function_single(.wait=0) works from interrupt context - /me looks at the latest series from Jens - Yes that does seem to be supported, good. Here's a stab at cleaning this stuff up ... Mihai reported test success as well. Signed-off-by: Peter Zijlstra Tested-by: Mihai Moldovan Cc: Michal Januszewski Cc: Antonino Daplas Signed-off-by: Ingo Molnar --- include/asm-x86/thread_info.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index 895339d2bc0..d7012634ace 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -81,7 +81,6 @@ struct thread_info { #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ -#define TIF_HRTICK_RESCHED 11 /* reprogram hrtick timer */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* 32bit process */ #define TIF_FORK 18 /* ret_from_fork */ @@ -108,7 +107,6 @@ struct thread_info { #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) -#define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) @@ -132,7 +130,7 @@ struct thread_info { /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ - (_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED) + (_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ -- cgit