diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup.c | 28 | ||||
-rw-r--r-- | kernel/hrtimer.c | 41 | ||||
-rw-r--r-- | kernel/irq/chip.c | 2 | ||||
-rw-r--r-- | kernel/irq/handle.c | 16 | ||||
-rw-r--r-- | kernel/irq/manage.c | 10 | ||||
-rw-r--r-- | kernel/kallsyms.c | 16 | ||||
-rw-r--r-- | kernel/power/disk.c | 10 | ||||
-rw-r--r-- | kernel/time/tick-common.c | 26 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 27 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 15 | ||||
-rw-r--r-- | kernel/trace/trace.c | 5 | ||||
-rw-r--r-- | kernel/trace/trace_irqsoff.c | 1 | ||||
-rw-r--r-- | kernel/trace/trace_sched_wakeup.c | 1 |
13 files changed, 157 insertions, 41 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index c29831076e7..5a54ff42874 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1115,8 +1115,10 @@ static void cgroup_kill_sb(struct super_block *sb) { } write_unlock(&css_set_lock); - list_del(&root->root_list); - root_count--; + if (!list_empty(&root->root_list)) { + list_del(&root->root_list); + root_count--; + } mutex_unlock(&cgroup_mutex); @@ -2434,7 +2436,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, err_remove: + cgroup_lock_hierarchy(root); list_del(&cgrp->sibling); + cgroup_unlock_hierarchy(root); root->number_of_cgroups--; err_destroy: @@ -2507,7 +2511,7 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp) for_each_subsys(cgrp->root, ss) { struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; int refcnt; - do { + while (1) { /* We can only remove a CSS with a refcnt==1 */ refcnt = atomic_read(&css->refcnt); if (refcnt > 1) { @@ -2521,7 +2525,10 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp) * css_tryget() to spin until we set the * CSS_REMOVED bits or abort */ - } while (atomic_cmpxchg(&css->refcnt, refcnt, 0) != refcnt); + if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt) + break; + cpu_relax(); + } } done: for_each_subsys(cgrp->root, ss) { @@ -2991,20 +2998,21 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, mutex_unlock(&cgroup_mutex); return 0; } - task_lock(tsk); - cg = tsk->cgroups; - parent = task_cgroup(tsk, subsys->subsys_id); /* Pin the hierarchy */ - if (!atomic_inc_not_zero(&parent->root->sb->s_active)) { + if (!atomic_inc_not_zero(&root->sb->s_active)) { /* We race with the final deactivate_super() */ mutex_unlock(&cgroup_mutex); return 0; } /* Keep the cgroup alive */ + task_lock(tsk); + parent = task_cgroup(tsk, subsys->subsys_id); + cg = tsk->cgroups; get_css_set(cg); task_unlock(tsk); + mutex_unlock(&cgroup_mutex); /* Now do the VFS work to create a cgroup */ @@ -3043,7 +3051,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, mutex_unlock(&inode->i_mutex); put_css_set(cg); - deactivate_super(parent->root->sb); + deactivate_super(root->sb); /* The cgroup is still accessible in the VFS, but * we're not going to try to rmdir() it at this * point. */ @@ -3069,7 +3077,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, mutex_lock(&cgroup_mutex); put_css_set(cg); mutex_unlock(&cgroup_mutex); - deactivate_super(parent->root->sb); + deactivate_super(root->sb); return ret; } diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index f33afb0407b..f394d2a42ca 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -501,6 +501,13 @@ static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base) continue; timer = rb_entry(base->first, struct hrtimer, node); expires = ktime_sub(hrtimer_get_expires(timer), base->offset); + /* + * clock_was_set() has changed base->offset so the + * result might be negative. Fix it up to prevent a + * false positive in clockevents_program_event() + */ + if (expires.tv64 < 0) + expires.tv64 = 0; if (expires.tv64 < cpu_base->expires_next.tv64) cpu_base->expires_next = expires; } @@ -1158,6 +1165,29 @@ static void __run_hrtimer(struct hrtimer *timer) #ifdef CONFIG_HIGH_RES_TIMERS +static int force_clock_reprogram; + +/* + * After 5 iteration's attempts, we consider that hrtimer_interrupt() + * is hanging, which could happen with something that slows the interrupt + * such as the tracing. Then we force the clock reprogramming for each future + * hrtimer interrupts to avoid infinite loops and use the min_delta_ns + * threshold that we will overwrite. + * The next tick event will be scheduled to 3 times we currently spend on + * hrtimer_interrupt(). This gives a good compromise, the cpus will spend + * 1/4 of their time to process the hrtimer interrupts. This is enough to + * let it running without serious starvation. + */ + +static inline void +hrtimer_interrupt_hanging(struct clock_event_device *dev, + ktime_t try_time) +{ + force_clock_reprogram = 1; + dev->min_delta_ns = (unsigned long)try_time.tv64 * 3; + printk(KERN_WARNING "hrtimer: interrupt too slow, " + "forcing clock min delta to %lu ns\n", dev->min_delta_ns); +} /* * High resolution timer interrupt * Called with interrupts disabled @@ -1167,6 +1197,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); struct hrtimer_clock_base *base; ktime_t expires_next, now; + int nr_retries = 0; int i; BUG_ON(!cpu_base->hres_active); @@ -1174,6 +1205,10 @@ void hrtimer_interrupt(struct clock_event_device *dev) dev->next_event.tv64 = KTIME_MAX; retry: + /* 5 retries is enough to notice a hang */ + if (!(++nr_retries % 5)) + hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now)); + now = ktime_get(); expires_next.tv64 = KTIME_MAX; @@ -1226,7 +1261,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) /* Reprogramming necessary ? */ if (expires_next.tv64 != KTIME_MAX) { - if (tick_program_event(expires_next, 0)) + if (tick_program_event(expires_next, force_clock_reprogram)) goto retry; } } @@ -1580,6 +1615,10 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, break; #ifdef CONFIG_HOTPLUG_CPU + case CPU_DYING: + case CPU_DYING_FROZEN: + clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu); + break; case CPU_DEAD: case CPU_DEAD_FROZEN: { diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index f63c706d25e..7de11bd64df 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -383,6 +383,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) out_unlock: spin_unlock(&desc->lock); } +EXPORT_SYMBOL_GPL(handle_level_irq); /** * handle_fasteoi_irq - irq handler for transparent controllers @@ -593,6 +594,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, } spin_unlock_irqrestore(&desc->lock, flags); } +EXPORT_SYMBOL_GPL(__set_irq_handler); void set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip, diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index c20db0be917..3aba8d12f32 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -39,6 +39,18 @@ void handle_bad_irq(unsigned int irq, struct irq_desc *desc) ack_bad_irq(irq); } +#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS) +static void __init init_irq_default_affinity(void) +{ + alloc_bootmem_cpumask_var(&irq_default_affinity); + cpumask_setall(irq_default_affinity); +} +#else +static void __init init_irq_default_affinity(void) +{ +} +#endif + /* * Linux has a controller-independent interrupt architecture. * Every controller has a 'controller-template', that is used @@ -134,6 +146,8 @@ int __init early_irq_init(void) int legacy_count; int i; + init_irq_default_affinity(); + desc = irq_desc_legacy; legacy_count = ARRAY_SIZE(irq_desc_legacy); @@ -219,6 +233,8 @@ int __init early_irq_init(void) int count; int i; + init_irq_default_affinity(); + desc = irq_desc; count = ARRAY_SIZE(irq_desc); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index cd0cd8dcb34..291f0366455 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -15,17 +15,9 @@ #include "internals.h" -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS) cpumask_var_t irq_default_affinity; -static int init_irq_default_affinity(void) -{ - alloc_cpumask_var(&irq_default_affinity, GFP_KERNEL); - cpumask_setall(irq_default_affinity); - return 0; -} -core_initcall(init_irq_default_affinity); - /** * synchronize_irq - wait for pending IRQ handlers (on other CPUs) * @irq: interrupt number to wait for diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index e694afa0eb8..7b8b0f21a5b 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -30,19 +30,20 @@ #define all_var 0 #endif -extern const unsigned long kallsyms_addresses[]; -extern const u8 kallsyms_names[]; +/* These will be re-linked against their real values during the second link stage */ +extern const unsigned long kallsyms_addresses[] __attribute__((weak)); +extern const u8 kallsyms_names[] __attribute__((weak)); /* tell the compiler that the count isn't in the small data section if the arch * has one (eg: FRV) */ extern const unsigned long kallsyms_num_syms - __attribute__((__section__(".rodata"))); +__attribute__((weak, section(".rodata"))); -extern const u8 kallsyms_token_table[]; -extern const u16 kallsyms_token_index[]; +extern const u8 kallsyms_token_table[] __attribute__((weak)); +extern const u16 kallsyms_token_index[] __attribute__((weak)); -extern const unsigned long kallsyms_markers[]; +extern const unsigned long kallsyms_markers[] __attribute__((weak)); static inline int is_kernel_inittext(unsigned long addr) { @@ -167,6 +168,9 @@ static unsigned long get_symbol_pos(unsigned long addr, unsigned long symbol_start = 0, symbol_end = 0; unsigned long i, low, high, mid; + /* This kernel should never had been booted. */ + BUG_ON(!kallsyms_addresses); + /* do a binary search on the sorted kallsyms_addresses array */ low = 0; high = kallsyms_num_syms; diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 45e8541ab7e..432ee575c9e 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -71,6 +71,14 @@ void hibernation_set_ops(struct platform_hibernation_ops *ops) mutex_unlock(&pm_mutex); } +static bool entering_platform_hibernation; + +bool system_entering_hibernation(void) +{ + return entering_platform_hibernation; +} +EXPORT_SYMBOL(system_entering_hibernation); + #ifdef CONFIG_PM_DEBUG static void hibernation_debug_sleep(void) { @@ -411,6 +419,7 @@ int hibernation_platform_enter(void) if (error) goto Close; + entering_platform_hibernation = true; suspend_console(); error = device_suspend(PMSG_HIBERNATE); if (error) { @@ -445,6 +454,7 @@ int hibernation_platform_enter(void) Finish: hibernation_ops->finish(); Resume_devices: + entering_platform_hibernation = false; device_resume(PMSG_RESTORE); resume_console(); Close: diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 63e05d423a0..21a5ca84951 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -274,6 +274,21 @@ out_bc: } /* + * Transfer the do_timer job away from a dying cpu. + * + * Called with interrupts disabled. + */ +static void tick_handover_do_timer(int *cpup) +{ + if (*cpup == tick_do_timer_cpu) { + int cpu = cpumask_first(cpu_online_mask); + + tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu : + TICK_DO_TIMER_NONE; + } +} + +/* * Shutdown an event device on a given cpu: * * This is called on a life CPU, when a CPU is dead. So we cannot @@ -297,13 +312,6 @@ static void tick_shutdown(unsigned int *cpup) clockevents_exchange_device(dev, NULL); td->evtdev = NULL; } - /* Transfer the do_timer job away from this cpu */ - if (*cpup == tick_do_timer_cpu) { - int cpu = cpumask_first(cpu_online_mask); - - tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu : - TICK_DO_TIMER_NONE; - } spin_unlock_irqrestore(&tick_device_lock, flags); } @@ -357,6 +365,10 @@ static int tick_notify(struct notifier_block *nb, unsigned long reason, tick_broadcast_oneshot_control(reason); break; + case CLOCK_EVT_NOTIFY_CPU_DYING: + tick_handover_do_timer(dev); + break; + case CLOCK_EVT_NOTIFY_CPU_DEAD: tick_shutdown_broadcast_oneshot(dev); tick_shutdown_broadcast(dev); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 2f32969c09d..7dcf6e9f2b0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -17,6 +17,7 @@ #include <linux/clocksource.h> #include <linux/kallsyms.h> #include <linux/seq_file.h> +#include <linux/suspend.h> #include <linux/debugfs.h> #include <linux/hardirq.h> #include <linux/kthread.h> @@ -1965,6 +1966,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, #ifdef CONFIG_FUNCTION_GRAPH_TRACER static atomic_t ftrace_graph_active; +static struct notifier_block ftrace_suspend_notifier; int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) { @@ -2043,6 +2045,27 @@ static int start_graph_tracing(void) return ret; } +/* + * Hibernation protection. + * The state of the current task is too much unstable during + * suspend/restore to disk. We want to protect against that. + */ +static int +ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state, + void *unused) +{ + switch (state) { + case PM_HIBERNATION_PREPARE: + pause_graph_tracing(); + break; + + case PM_POST_HIBERNATION: + unpause_graph_tracing(); + break; + } + return NOTIFY_DONE; +} + int register_ftrace_graph(trace_func_graph_ret_t retfunc, trace_func_graph_ent_t entryfunc) { @@ -2050,6 +2073,9 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, mutex_lock(&ftrace_sysctl_lock); + ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call; + register_pm_notifier(&ftrace_suspend_notifier); + atomic_inc(&ftrace_graph_active); ret = start_graph_tracing(); if (ret) { @@ -2075,6 +2101,7 @@ void unregister_ftrace_graph(void) ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; ftrace_graph_entry = ftrace_graph_entry_stub; ftrace_shutdown(FTRACE_STOP_FUNC_RET); + unregister_pm_notifier(&ftrace_suspend_notifier); mutex_unlock(&ftrace_sysctl_lock); } diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 8b0daf0662e..bd38c5cfd8a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -246,7 +246,7 @@ static inline int test_time_stamp(u64 delta) return 0; } -#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page)) +#define BUF_PAGE_SIZE (PAGE_SIZE - offsetof(struct buffer_data_page, data)) /* * head_page == tail_page && head == tail then buffer is empty. @@ -1025,12 +1025,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, } if (next_page == head_page) { - if (!(buffer->flags & RB_FL_OVERWRITE)) { - /* reset write */ - if (tail <= BUF_PAGE_SIZE) - local_set(&tail_page->write, tail); + if (!(buffer->flags & RB_FL_OVERWRITE)) goto out_unlock; - } /* tail_page has not moved yet? */ if (tail_page == cpu_buffer->tail_page) { @@ -1105,6 +1101,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, return event; out_unlock: + /* reset write */ + if (tail <= BUF_PAGE_SIZE) + local_set(&tail_page->write, tail); + __raw_spin_unlock(&cpu_buffer->lock); local_irq_restore(flags); return NULL; @@ -2174,6 +2174,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->overrun = 0; cpu_buffer->entries = 0; + + cpu_buffer->write_stamp = 0; + cpu_buffer->read_stamp = 0; } /** diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c580233add9..17bb88d86ac 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -40,7 +40,7 @@ #define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE) -unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; +unsigned long __read_mostly tracing_max_latency; unsigned long __read_mostly tracing_thresh; /* @@ -3736,7 +3736,7 @@ static struct notifier_block trace_die_notifier = { * it if we decide to change what log level the ftrace dump * should be at. */ -#define KERN_TRACE KERN_INFO +#define KERN_TRACE KERN_EMERG static void trace_printk_seq(struct trace_seq *s) @@ -3770,6 +3770,7 @@ void ftrace_dump(void) dump_ran = 1; /* No turning back! */ + tracing_off(); ftrace_kill(); for_each_tracing_cpu(cpu) { diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 7c2e326bbc8..62a78d94353 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -380,6 +380,7 @@ static void stop_irqsoff_tracer(struct trace_array *tr) static void __irqsoff_tracer_init(struct trace_array *tr) { + tracing_max_latency = 0; irqsoff_trace = tr; /* make sure that the tracer is visible */ smp_wmb(); diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 43586b689e3..42ae1e77b6b 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -333,6 +333,7 @@ static void stop_wakeup_tracer(struct trace_array *tr) static int wakeup_tracer_init(struct trace_array *tr) { + tracing_max_latency = 0; wakeup_trace = tr; start_wakeup_tracer(tr); return 0; |