diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpuset.c | 35 | ||||
-rw-r--r-- | kernel/exit.c | 14 | ||||
-rw-r--r-- | kernel/posix-timers.c | 8 | ||||
-rw-r--r-- | kernel/sched.c | 64 | ||||
-rw-r--r-- | kernel/signal.c | 4 | ||||
-rw-r--r-- | kernel/sysctl.c | 2 | ||||
-rw-r--r-- | kernel/workqueue.c | 2 |
7 files changed, 86 insertions, 43 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 2a75e44e1a4..fe2f71f92ae 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1554,7 +1554,7 @@ struct ctr_struct { * when reading out p->cpuset, as we don't really care if it changes * on the next cycle, and we are not going to try to dereference it. */ -static inline int pid_array_load(pid_t *pidarray, int npids, struct cpuset *cs) +static int pid_array_load(pid_t *pidarray, int npids, struct cpuset *cs) { int n = 0; struct task_struct *g, *p; @@ -2150,6 +2150,33 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) } /** + * cpuset_lock - lock out any changes to cpuset structures + * + * The out of memory (oom) code needs to lock down cpusets + * from being changed while it scans the tasklist looking for a + * task in an overlapping cpuset. Expose callback_sem via this + * cpuset_lock() routine, so the oom code can lock it, before + * locking the task list. The tasklist_lock is a spinlock, so + * must be taken inside callback_sem. + */ + +void cpuset_lock(void) +{ + down(&callback_sem); +} + +/** + * cpuset_unlock - release lock on cpuset changes + * + * Undo the lock taken in a previous cpuset_lock() call. + */ + +void cpuset_unlock(void) +{ + up(&callback_sem); +} + +/** * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors? * @p: pointer to task_struct of some other task. * @@ -2158,7 +2185,7 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) * determine if task @p's memory usage might impact the memory * available to the current task. * - * Acquires callback_sem - not suitable for calling from a fast path. + * Call while holding callback_sem. **/ int cpuset_excl_nodes_overlap(const struct task_struct *p) @@ -2166,8 +2193,6 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p) const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */ int overlap = 0; /* do cpusets overlap? */ - down(&callback_sem); - task_lock(current); if (current->flags & PF_EXITING) { task_unlock(current); @@ -2186,8 +2211,6 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p) overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed); done: - up(&callback_sem); - return overlap; } diff --git a/kernel/exit.c b/kernel/exit.c index f8e609ff189..93cee367133 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -193,7 +193,7 @@ int is_orphaned_pgrp(int pgrp) return retval; } -static inline int has_stopped_jobs(int pgrp) +static int has_stopped_jobs(int pgrp) { int retval = 0; struct task_struct *p; @@ -230,7 +230,7 @@ static inline int has_stopped_jobs(int pgrp) * * NOTE that reparent_to_init() gives the caller full capabilities. */ -static inline void reparent_to_init(void) +static void reparent_to_init(void) { write_lock_irq(&tasklist_lock); @@ -244,7 +244,9 @@ static inline void reparent_to_init(void) /* Set the exit signal to SIGCHLD so we signal init on exit */ current->exit_signal = SIGCHLD; - if ((current->policy == SCHED_NORMAL) && (task_nice(current) < 0)) + if ((current->policy == SCHED_NORMAL || + current->policy == SCHED_BATCH) + && (task_nice(current) < 0)) set_user_nice(current, 0); /* cpus_allowed? */ /* rt_priority? */ @@ -367,7 +369,7 @@ void daemonize(const char *name, ...) EXPORT_SYMBOL(daemonize); -static inline void close_files(struct files_struct * files) +static void close_files(struct files_struct * files) { int i, j; struct fdtable *fdt; @@ -541,7 +543,7 @@ static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_re p->real_parent = reaper; } -static inline void reparent_thread(task_t *p, task_t *father, int traced) +static void reparent_thread(task_t *p, task_t *father, int traced) { /* We don't want people slaying init. */ if (p->exit_signal != -1) @@ -605,7 +607,7 @@ static inline void reparent_thread(task_t *p, task_t *father, int traced) * group, and if no such member exists, give it to * the global child reaper process (ie "init") */ -static inline void forget_original_parent(struct task_struct * father, +static void forget_original_parent(struct task_struct * father, struct list_head *to_release) { struct task_struct *p, *reaper = father; diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 9e66e614862..197208b3aa2 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -192,7 +192,7 @@ static inline int common_clock_set(const clockid_t which_clock, return do_sys_settimeofday(tp, NULL); } -static inline int common_timer_create(struct k_itimer *new_timer) +static int common_timer_create(struct k_itimer *new_timer) { hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock); new_timer->it.real.timer.data = new_timer; @@ -361,7 +361,7 @@ static int posix_timer_fn(void *data) return ret; } -static inline struct task_struct * good_sigevent(sigevent_t * event) +static struct task_struct * good_sigevent(sigevent_t * event) { struct task_struct *rtn = current->group_leader; @@ -687,7 +687,7 @@ sys_timer_getoverrun(timer_t timer_id) /* Set a POSIX.1b interval timer. */ /* timr->it_lock is taken. */ -static inline int +static int common_timer_set(struct k_itimer *timr, int flags, struct itimerspec *new_setting, struct itimerspec *old_setting) { @@ -829,7 +829,7 @@ retry_delete: /* * return timer owned by the process, used by exit_itimers */ -static inline void itimer_delete(struct k_itimer *timer) +static void itimer_delete(struct k_itimer *timer) { unsigned long flags; diff --git a/kernel/sched.c b/kernel/sched.c index c9dec2aa197..788ecce1e0e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -521,7 +521,7 @@ static inline void sched_info_dequeued(task_t *t) * long it was waiting to run. We also note when it began so that we * can keep stats on how long its timeslice is. */ -static inline void sched_info_arrive(task_t *t) +static void sched_info_arrive(task_t *t) { unsigned long now = jiffies, diff = 0; struct runqueue *rq = task_rq(t); @@ -748,10 +748,14 @@ static int recalc_task_prio(task_t *p, unsigned long long now) unsigned long long __sleep_time = now - p->timestamp; unsigned long sleep_time; - if (__sleep_time > NS_MAX_SLEEP_AVG) - sleep_time = NS_MAX_SLEEP_AVG; - else - sleep_time = (unsigned long)__sleep_time; + if (unlikely(p->policy == SCHED_BATCH)) + sleep_time = 0; + else { + if (__sleep_time > NS_MAX_SLEEP_AVG) + sleep_time = NS_MAX_SLEEP_AVG; + else + sleep_time = (unsigned long)__sleep_time; + } if (likely(sleep_time > 0)) { /* @@ -1003,7 +1007,7 @@ void kick_process(task_t *p) * We want to under-estimate the load of migration sources, to * balance conservatively. */ -static inline unsigned long __source_load(int cpu, int type, enum idle_type idle) +static unsigned long __source_load(int cpu, int type, enum idle_type idle) { runqueue_t *rq = cpu_rq(cpu); unsigned long running = rq->nr_running; @@ -1866,7 +1870,7 @@ void sched_exec(void) * pull_task - move a task from a remote runqueue to the local runqueue. * Both runqueues must be locked. */ -static inline +static void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) { @@ -1888,7 +1892,7 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, /* * can_migrate_task - may task p from runqueue rq be migrated to this_cpu? */ -static inline +static int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, struct sched_domain *sd, enum idle_type idle, int *all_pinned) @@ -2374,7 +2378,7 @@ out_balanced: * idle_balance is called by schedule() if this_cpu is about to become * idle. Attempts to pull tasks from other CPUs. */ -static inline void idle_balance(int this_cpu, runqueue_t *this_rq) +static void idle_balance(int this_cpu, runqueue_t *this_rq) { struct sched_domain *sd; @@ -2758,7 +2762,7 @@ static inline void wakeup_busy_runqueue(runqueue_t *rq) resched_task(rq->idle); } -static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq) +static void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq) { struct sched_domain *tmp, *sd = NULL; cpumask_t sibling_map; @@ -2812,7 +2816,7 @@ static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd) return p->time_slice * (100 - sd->per_cpu_gain) / 100; } -static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq) +static int dependent_sleeper(int this_cpu, runqueue_t *this_rq) { struct sched_domain *tmp, *sd = NULL; cpumask_t sibling_map; @@ -3560,7 +3564,7 @@ void set_user_nice(task_t *p, long nice) * The RT priorities are set via sched_setscheduler(), but we still * allow the 'normal' nice value to be set - but as expected * it wont have any effect on scheduling until the task is - * not SCHED_NORMAL: + * not SCHED_NORMAL/SCHED_BATCH: */ if (rt_task(p)) { p->static_prio = NICE_TO_PRIO(nice); @@ -3706,10 +3710,16 @@ static void __setscheduler(struct task_struct *p, int policy, int prio) BUG_ON(p->array); p->policy = policy; p->rt_priority = prio; - if (policy != SCHED_NORMAL) + if (policy != SCHED_NORMAL && policy != SCHED_BATCH) { p->prio = MAX_RT_PRIO-1 - p->rt_priority; - else + } else { p->prio = p->static_prio; + /* + * SCHED_BATCH tasks are treated as perpetual CPU hogs: + */ + if (policy == SCHED_BATCH) + p->sleep_avg = 0; + } } /** @@ -3733,29 +3743,35 @@ recheck: if (policy < 0) policy = oldpolicy = p->policy; else if (policy != SCHED_FIFO && policy != SCHED_RR && - policy != SCHED_NORMAL) - return -EINVAL; + policy != SCHED_NORMAL && policy != SCHED_BATCH) + return -EINVAL; /* * Valid priorities for SCHED_FIFO and SCHED_RR are - * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0. + * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and + * SCHED_BATCH is 0. */ if (param->sched_priority < 0 || (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) || (!p->mm && param->sched_priority > MAX_RT_PRIO-1)) return -EINVAL; - if ((policy == SCHED_NORMAL) != (param->sched_priority == 0)) + if ((policy == SCHED_NORMAL || policy == SCHED_BATCH) + != (param->sched_priority == 0)) return -EINVAL; /* * Allow unprivileged RT tasks to decrease priority: */ if (!capable(CAP_SYS_NICE)) { - /* can't change policy */ - if (policy != p->policy && - !p->signal->rlim[RLIMIT_RTPRIO].rlim_cur) + /* + * can't change policy, except between SCHED_NORMAL + * and SCHED_BATCH: + */ + if (((policy != SCHED_NORMAL && p->policy != SCHED_BATCH) && + (policy != SCHED_BATCH && p->policy != SCHED_NORMAL)) && + !p->signal->rlim[RLIMIT_RTPRIO].rlim_cur) return -EPERM; /* can't increase priority */ - if (policy != SCHED_NORMAL && + if ((policy != SCHED_NORMAL && policy != SCHED_BATCH) && param->sched_priority > p->rt_priority && param->sched_priority > p->signal->rlim[RLIMIT_RTPRIO].rlim_cur) @@ -4233,6 +4249,7 @@ asmlinkage long sys_sched_get_priority_max(int policy) ret = MAX_USER_RT_PRIO-1; break; case SCHED_NORMAL: + case SCHED_BATCH: ret = 0; break; } @@ -4256,6 +4273,7 @@ asmlinkage long sys_sched_get_priority_min(int policy) ret = 1; break; case SCHED_NORMAL: + case SCHED_BATCH: ret = 0; } return ret; @@ -5990,7 +6008,7 @@ next_sg: * Detach sched domains from a group of cpus specified in cpu_map * These cpus will now be attached to the NULL domain */ -static inline void detach_destroy_domains(const cpumask_t *cpu_map) +static void detach_destroy_domains(const cpumask_t *cpu_map) { int i; diff --git a/kernel/signal.c b/kernel/signal.c index 1da2e74beb9..5dafbd36d62 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -476,7 +476,7 @@ unblock_all_signals(void) spin_unlock_irqrestore(¤t->sighand->siglock, flags); } -static inline int collect_signal(int sig, struct sigpending *list, siginfo_t *info) +static int collect_signal(int sig, struct sigpending *list, siginfo_t *info) { struct sigqueue *q, *first = NULL; int still_pending = 0; @@ -1881,7 +1881,7 @@ do_signal_stop(int signr) * We return zero if we still hold the siglock and should look * for another signal without checking group_stop_count again. */ -static inline int handle_group_stop(void) +static int handle_group_stop(void) { int stop_count; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 62d4d956687..f5d69b6e29f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -648,7 +648,7 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, -#if defined(CONFIG_S390) +#if defined(CONFIG_S390) && defined(CONFIG_SMP) { .ctl_name = KERN_SPIN_RETRY, .procname = "spin_retry", diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 82c4fa70595..b052e2c4c71 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -147,7 +147,7 @@ int fastcall queue_delayed_work(struct workqueue_struct *wq, return ret; } -static inline void run_workqueue(struct cpu_workqueue_struct *cwq) +static void run_workqueue(struct cpu_workqueue_struct *cwq) { unsigned long flags; |