summaryrefslogtreecommitdiffstats
path: root/kernel/timer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/timer.c')
-rw-r--r--kernel/timer.c374
1 files changed, 232 insertions, 142 deletions
diff --git a/kernel/timer.c b/kernel/timer.c
index c2a8ccfc288..cb1b86a9c52 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -34,6 +34,8 @@
#include <linux/cpu.h>
#include <linux/syscalls.h>
#include <linux/delay.h>
+#include <linux/tick.h>
+#include <linux/kallsyms.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -85,7 +87,7 @@ static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases;
* @j: the time in (absolute) jiffies that should be rounded
* @cpu: the processor number on which the timeout will happen
*
- * __round_jiffies rounds an absolute time in the future (in jiffies)
+ * __round_jiffies() rounds an absolute time in the future (in jiffies)
* up or down to (approximately) full seconds. This is useful for timers
* for which the exact time they fire does not matter too much, as long as
* they fire approximately every X seconds.
@@ -98,7 +100,7 @@ static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases;
* processors firing at the exact same time, which could lead
* to lock contention or spurious cache line bouncing.
*
- * The return value is the rounded version of the "j" parameter.
+ * The return value is the rounded version of the @j parameter.
*/
unsigned long __round_jiffies(unsigned long j, int cpu)
{
@@ -142,7 +144,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies);
* @j: the time in (relative) jiffies that should be rounded
* @cpu: the processor number on which the timeout will happen
*
- * __round_jiffies_relative rounds a time delta in the future (in jiffies)
+ * __round_jiffies_relative() rounds a time delta in the future (in jiffies)
* up or down to (approximately) full seconds. This is useful for timers
* for which the exact time they fire does not matter too much, as long as
* they fire approximately every X seconds.
@@ -155,7 +157,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies);
* processors firing at the exact same time, which could lead
* to lock contention or spurious cache line bouncing.
*
- * The return value is the rounded version of the "j" parameter.
+ * The return value is the rounded version of the @j parameter.
*/
unsigned long __round_jiffies_relative(unsigned long j, int cpu)
{
@@ -173,7 +175,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies_relative);
* round_jiffies - function to round jiffies to a full second
* @j: the time in (absolute) jiffies that should be rounded
*
- * round_jiffies rounds an absolute time in the future (in jiffies)
+ * round_jiffies() rounds an absolute time in the future (in jiffies)
* up or down to (approximately) full seconds. This is useful for timers
* for which the exact time they fire does not matter too much, as long as
* they fire approximately every X seconds.
@@ -182,7 +184,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies_relative);
* at the same time, rather than at various times spread out. The goal
* of this is to have the CPU wake up less, which saves power.
*
- * The return value is the rounded version of the "j" parameter.
+ * The return value is the rounded version of the @j parameter.
*/
unsigned long round_jiffies(unsigned long j)
{
@@ -194,7 +196,7 @@ EXPORT_SYMBOL_GPL(round_jiffies);
* round_jiffies_relative - function to round jiffies to a full second
* @j: the time in (relative) jiffies that should be rounded
*
- * round_jiffies_relative rounds a time delta in the future (in jiffies)
+ * round_jiffies_relative() rounds a time delta in the future (in jiffies)
* up or down to (approximately) full seconds. This is useful for timers
* for which the exact time they fire does not matter too much, as long as
* they fire approximately every X seconds.
@@ -203,7 +205,7 @@ EXPORT_SYMBOL_GPL(round_jiffies);
* at the same time, rather than at various times spread out. The goal
* of this is to have the CPU wake up less, which saves power.
*
- * The return value is the rounded version of the "j" parameter.
+ * The return value is the rounded version of the @j parameter.
*/
unsigned long round_jiffies_relative(unsigned long j)
{
@@ -262,6 +264,18 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
list_add_tail(&timer->entry, vec);
}
+#ifdef CONFIG_TIMER_STATS
+void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
+{
+ if (timer->start_site)
+ return;
+
+ timer->start_site = addr;
+ memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
+ timer->start_pid = current->pid;
+}
+#endif
+
/**
* init_timer - initialize a timer.
* @timer: the timer to be initialized
@@ -273,11 +287,16 @@ void fastcall init_timer(struct timer_list *timer)
{
timer->entry.next = NULL;
timer->base = __raw_get_cpu_var(tvec_bases);
+#ifdef CONFIG_TIMER_STATS
+ timer->start_site = NULL;
+ timer->start_pid = -1;
+ memset(timer->start_comm, 0, TASK_COMM_LEN);
+#endif
}
EXPORT_SYMBOL(init_timer);
static inline void detach_timer(struct timer_list *timer,
- int clear_pending)
+ int clear_pending)
{
struct list_head *entry = &timer->entry;
@@ -324,6 +343,7 @@ int __mod_timer(struct timer_list *timer, unsigned long expires)
unsigned long flags;
int ret = 0;
+ timer_stats_timer_set_start_info(timer);
BUG_ON(!timer->function);
base = lock_timer_base(timer, &flags);
@@ -374,6 +394,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
tvec_base_t *base = per_cpu(tvec_bases, cpu);
unsigned long flags;
+ timer_stats_timer_set_start_info(timer);
BUG_ON(timer_pending(timer) || !timer->function);
spin_lock_irqsave(&base->lock, flags);
timer->base = base;
@@ -387,7 +408,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
* @timer: the timer to be modified
* @expires: new timeout in jiffies
*
- * mod_timer is a more efficient way to update the expire field of an
+ * mod_timer() is a more efficient way to update the expire field of an
* active timer (if the timer is inactive it will be activated)
*
* mod_timer(timer, expires) is equivalent to:
@@ -406,6 +427,7 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
{
BUG_ON(!timer->function);
+ timer_stats_timer_set_start_info(timer);
/*
* This is a common optimization triggered by the
* networking code - if the timer is re-modified
@@ -436,6 +458,7 @@ int del_timer(struct timer_list *timer)
unsigned long flags;
int ret = 0;
+ timer_stats_timer_clear_start_info(timer);
if (timer_pending(timer)) {
base = lock_timer_base(timer, &flags);
if (timer_pending(timer)) {
@@ -490,7 +513,7 @@ out:
* the timer it also makes sure the handler has finished executing on other
* CPUs.
*
- * Synchronization rules: callers must prevent restarting of the timer,
+ * Synchronization rules: Callers must prevent restarting of the timer,
* otherwise this function is meaningless. It must not be called from
* interrupt contexts. The caller must not hold locks which would prevent
* completion of the timer's handler. The timer's handler must not call
@@ -569,6 +592,8 @@ static inline void __run_timers(tvec_base_t *base)
fn = timer->function;
data = timer->data;
+ timer_stats_account_timer(timer);
+
set_running_timer(base, timer);
detach_timer(timer, 1);
spin_unlock_irq(&base->lock);
@@ -591,105 +616,124 @@ static inline void __run_timers(tvec_base_t *base)
spin_unlock_irq(&base->lock);
}
-#ifdef CONFIG_NO_IDLE_HZ
+#if defined(CONFIG_NO_IDLE_HZ) || defined(CONFIG_NO_HZ)
/*
* Find out when the next timer event is due to happen. This
* is used on S/390 to stop all activity when a cpus is idle.
* This functions needs to be called disabled.
*/
-unsigned long next_timer_interrupt(void)
+static unsigned long __next_timer_interrupt(tvec_base_t *base)
{
- tvec_base_t *base;
- struct list_head *list;
+ unsigned long timer_jiffies = base->timer_jiffies;
+ unsigned long expires = timer_jiffies + (LONG_MAX >> 1);
+ int index, slot, array, found = 0;
struct timer_list *nte;
- unsigned long expires;
- unsigned long hr_expires = MAX_JIFFY_OFFSET;
- ktime_t hr_delta;
tvec_t *varray[4];
- int i, j;
-
- hr_delta = hrtimer_get_next_event();
- if (hr_delta.tv64 != KTIME_MAX) {
- struct timespec tsdelta;
- tsdelta = ktime_to_timespec(hr_delta);
- hr_expires = timespec_to_jiffies(&tsdelta);
- if (hr_expires < 3)
- return hr_expires + jiffies;
- }
- hr_expires += jiffies;
-
- base = __get_cpu_var(tvec_bases);
- spin_lock(&base->lock);
- expires = base->timer_jiffies + (LONG_MAX >> 1);
- list = NULL;
/* Look for timer events in tv1. */
- j = base->timer_jiffies & TVR_MASK;
+ index = slot = timer_jiffies & TVR_MASK;
do {
- list_for_each_entry(nte, base->tv1.vec + j, entry) {
+ list_for_each_entry(nte, base->tv1.vec + slot, entry) {
+ found = 1;
expires = nte->expires;
- if (j < (base->timer_jiffies & TVR_MASK))
- list = base->tv2.vec + (INDEX(0));
- goto found;
+ /* Look at the cascade bucket(s)? */
+ if (!index || slot < index)
+ goto cascade;
+ return expires;
}
- j = (j + 1) & TVR_MASK;
- } while (j != (base->timer_jiffies & TVR_MASK));
+ slot = (slot + 1) & TVR_MASK;
+ } while (slot != index);
+
+cascade:
+ /* Calculate the next cascade event */
+ if (index)
+ timer_jiffies += TVR_SIZE - index;
+ timer_jiffies >>= TVR_BITS;
/* Check tv2-tv5. */
varray[0] = &base->tv2;
varray[1] = &base->tv3;
varray[2] = &base->tv4;
varray[3] = &base->tv5;
- for (i = 0; i < 4; i++) {
- j = INDEX(i);
+
+ for (array = 0; array < 4; array++) {
+ tvec_t *varp = varray[array];
+
+ index = slot = timer_jiffies & TVN_MASK;
do {
- if (list_empty(varray[i]->vec + j)) {
- j = (j + 1) & TVN_MASK;
- continue;
- }
- list_for_each_entry(nte, varray[i]->vec + j, entry)
+ list_for_each_entry(nte, varp->vec + slot, entry) {
+ found = 1;
if (time_before(nte->expires, expires))
expires = nte->expires;
- if (j < (INDEX(i)) && i < 3)
- list = varray[i + 1]->vec + (INDEX(i + 1));
- goto found;
- } while (j != (INDEX(i)));
- }
-found:
- if (list) {
- /*
- * The search wrapped. We need to look at the next list
- * from next tv element that would cascade into tv element
- * where we found the timer element.
- */
- list_for_each_entry(nte, list, entry) {
- if (time_before(nte->expires, expires))
- expires = nte->expires;
- }
+ }
+ /*
+ * Do we still search for the first timer or are
+ * we looking up the cascade buckets ?
+ */
+ if (found) {
+ /* Look at the cascade bucket(s)? */
+ if (!index || slot < index)
+ break;
+ return expires;
+ }
+ slot = (slot + 1) & TVN_MASK;
+ } while (slot != index);
+
+ if (index)
+ timer_jiffies += TVN_SIZE - index;
+ timer_jiffies >>= TVN_BITS;
}
- spin_unlock(&base->lock);
+ return expires;
+}
- /*
- * It can happen that other CPUs service timer IRQs and increment
- * jiffies, but we have not yet got a local timer tick to process
- * the timer wheels. In that case, the expiry time can be before
- * jiffies, but since the high-resolution timer here is relative to
- * jiffies, the default expression when high-resolution timers are
- * not active,
- *
- * time_before(MAX_JIFFY_OFFSET + jiffies, expires)
- *
- * would falsely evaluate to true. If that is the case, just
- * return jiffies so that we can immediately fire the local timer
- */
- if (time_before(expires, jiffies))
- return jiffies;
+/*
+ * Check, if the next hrtimer event is before the next timer wheel
+ * event:
+ */
+static unsigned long cmp_next_hrtimer_event(unsigned long now,
+ unsigned long expires)
+{
+ ktime_t hr_delta = hrtimer_get_next_event();
+ struct timespec tsdelta;
- if (time_before(hr_expires, expires))
- return hr_expires;
+ if (hr_delta.tv64 == KTIME_MAX)
+ return expires;
+ if (hr_delta.tv64 <= TICK_NSEC)
+ return now;
+
+ tsdelta = ktime_to_timespec(hr_delta);
+ now += timespec_to_jiffies(&tsdelta);
+ if (time_before(now, expires))
+ return now;
return expires;
}
+
+/**
+ * next_timer_interrupt - return the jiffy of the next pending timer
+ */
+unsigned long get_next_timer_interrupt(unsigned long now)
+{
+ tvec_base_t *base = __get_cpu_var(tvec_bases);
+ unsigned long expires;
+
+ spin_lock(&base->lock);
+ expires = __next_timer_interrupt(base);
+ spin_unlock(&base->lock);
+
+ if (time_before_eq(expires, now))
+ return now;
+
+ return cmp_next_hrtimer_event(now, expires);
+}
+
+#ifdef CONFIG_NO_IDLE_HZ
+unsigned long next_timer_interrupt(void)
+{
+ return get_next_timer_interrupt(jiffies);
+}
+#endif
+
#endif
/******************************************************************/
@@ -832,32 +876,35 @@ EXPORT_SYMBOL(do_settimeofday);
*
* Accumulates current time interval and initializes new clocksource
*/
-static int change_clocksource(void)
+static void change_clocksource(void)
{
struct clocksource *new;
cycle_t now;
u64 nsec;
+
new = clocksource_get_next();
- if (clock != new) {
- now = clocksource_read(new);
- nsec = __get_nsec_offset();
- timespec_add_ns(&xtime, nsec);
-
- clock = new;
- clock->cycle_last = now;
- printk(KERN_INFO "Time: %s clocksource has been installed.\n",
- clock->name);
- return 1;
- } else if (clock->update_callback) {
- return clock->update_callback();
- }
- return 0;
+
+ if (clock == new)
+ return;
+
+ now = clocksource_read(new);
+ nsec = __get_nsec_offset();
+ timespec_add_ns(&xtime, nsec);
+
+ clock = new;
+ clock->cycle_last = now;
+
+ clock->error = 0;
+ clock->xtime_nsec = 0;
+ clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
+
+ tick_clock_notify();
+
+ printk(KERN_INFO "Time: %s clocksource has been installed.\n",
+ clock->name);
}
#else
-static inline int change_clocksource(void)
-{
- return 0;
-}
+static inline void change_clocksource(void) { }
#endif
/**
@@ -871,33 +918,56 @@ int timekeeping_is_continuous(void)
do {
seq = read_seqbegin(&xtime_lock);
- ret = clock->is_continuous;
+ ret = clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
} while (read_seqretry(&xtime_lock, seq));
return ret;
}
+/**
+ * read_persistent_clock - Return time in seconds from the persistent clock.
+ *
+ * Weak dummy function for arches that do not yet support it.
+ * Returns seconds from epoch using the battery backed persistent clock.
+ * Returns zero if unsupported.
+ *
+ * XXX - Do be sure to remove it once all arches implement it.
+ */
+unsigned long __attribute__((weak)) read_persistent_clock(void)
+{
+ return 0;
+}
+
/*
* timekeeping_init - Initializes the clocksource and common timekeeping values
*/
void __init timekeeping_init(void)
{
unsigned long flags;
+ unsigned long sec = read_persistent_clock();
write_seqlock_irqsave(&xtime_lock, flags);
ntp_clear();
clock = clocksource_get_next();
- clocksource_calculate_interval(clock, tick_nsec);
+ clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
clock->cycle_last = clocksource_read(clock);
+ xtime.tv_sec = sec;
+ xtime.tv_nsec = 0;
+ set_normalized_timespec(&wall_to_monotonic,
+ -xtime.tv_sec, -xtime.tv_nsec);
+
write_sequnlock_irqrestore(&xtime_lock, flags);
}
-
+/* flag for if timekeeping is suspended */
static int timekeeping_suspended;
+/* time in seconds when suspend began */
+static unsigned long timekeeping_suspend_time;
+
/**
* timekeeping_resume - Resumes the generic timekeeping subsystem.
* @dev: unused
@@ -909,13 +979,26 @@ static int timekeeping_suspended;
static int timekeeping_resume(struct sys_device *dev)
{
unsigned long flags;
+ unsigned long now = read_persistent_clock();
write_seqlock_irqsave(&xtime_lock, flags);
- /* restart the last cycle value */
+
+ if (now && (now > timekeeping_suspend_time)) {
+ unsigned long sleep_length = now - timekeeping_suspend_time;
+
+ xtime.tv_sec += sleep_length;
+ wall_to_monotonic.tv_sec -= sleep_length;
+ }
+ /* re-base the last cycle value */
clock->cycle_last = clocksource_read(clock);
clock->error = 0;
timekeeping_suspended = 0;
write_sequnlock_irqrestore(&xtime_lock, flags);
+
+ touch_softlockup_watchdog();
+ /* Resume hrtimers */
+ clock_was_set();
+
return 0;
}
@@ -925,6 +1008,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
write_seqlock_irqsave(&xtime_lock, flags);
timekeeping_suspended = 1;
+ timekeeping_suspend_time = read_persistent_clock();
write_sequnlock_irqrestore(&xtime_lock, flags);
return 0;
}
@@ -1089,11 +1173,8 @@ static void update_wall_time(void)
clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift;
/* check to see if there is a new clocksource to use */
- if (change_clocksource()) {
- clock->error = 0;
- clock->xtime_nsec = 0;
- clocksource_calculate_interval(clock, tick_nsec);
- }
+ change_clocksource();
+ update_vsyscall(&xtime, clock);
}
/*
@@ -1162,11 +1243,9 @@ static inline void calc_load(unsigned long ticks)
* This read-write spinlock protects us from races in SMP while
* playing with xtime and avenrun.
*/
-#ifndef ARCH_HAVE_XTIME_LOCK
-__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
+__attribute__((weak)) __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
EXPORT_SYMBOL(xtime_lock);
-#endif
/*
* This function runs timers and the timer-tq in bottom half context.
@@ -1175,7 +1254,8 @@ static void run_timer_softirq(struct softirq_action *h)
{
tvec_base_t *base = __get_cpu_var(tvec_bases);
- hrtimer_run_queues();
+ hrtimer_run_queues();
+
if (time_after_eq(jiffies, base->timer_jiffies))
__run_timers(base);
}
@@ -1392,17 +1472,16 @@ asmlinkage long sys_gettid(void)
}
/**
- * sys_sysinfo - fill in sysinfo struct
+ * do_sysinfo - fill in sysinfo struct
* @info: pointer to buffer to fill
*/
-asmlinkage long sys_sysinfo(struct sysinfo __user *info)
+int do_sysinfo(struct sysinfo *info)
{
- struct sysinfo val;
unsigned long mem_total, sav_total;
unsigned int mem_unit, bitcount;
unsigned long seq;
- memset((char *)&val, 0, sizeof(struct sysinfo));
+ memset(info, 0, sizeof(struct sysinfo));
do {
struct timespec tp;
@@ -1422,17 +1501,17 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info)
tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
tp.tv_sec++;
}
- val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
+ info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
- val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
- val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
- val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
+ info->loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
+ info->loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
+ info->loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
- val.procs = nr_threads;
+ info->procs = nr_threads;
} while (read_seqretry(&xtime_lock, seq));
- si_meminfo(&val);
- si_swapinfo(&val);
+ si_meminfo(info);
+ si_swapinfo(info);
/*
* If the sum of all the available memory (i.e. ram + swap)
@@ -1443,11 +1522,11 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info)
* -Erik Andersen <andersee@debian.org>
*/
- mem_total = val.totalram + val.totalswap;
- if (mem_total < val.totalram || mem_total < val.totalswap)
+ mem_total = info->totalram + info->totalswap;
+ if (mem_total < info->totalram || mem_total < info->totalswap)
goto out;
bitcount = 0;
- mem_unit = val.mem_unit;
+ mem_unit = info->mem_unit;
while (mem_unit > 1) {
bitcount++;
mem_unit >>= 1;
@@ -1459,22 +1538,31 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info)
/*
* If mem_total did not overflow, multiply all memory values by
- * val.mem_unit and set it to 1. This leaves things compatible
+ * info->mem_unit and set it to 1. This leaves things compatible
* with 2.2.x, and also retains compatibility with earlier 2.4.x
* kernels...
*/
- val.mem_unit = 1;
- val.totalram <<= bitcount;
- val.freeram <<= bitcount;
- val.sharedram <<= bitcount;
- val.bufferram <<= bitcount;
- val.totalswap <<= bitcount;
- val.freeswap <<= bitcount;
- val.totalhigh <<= bitcount;
- val.freehigh <<= bitcount;
+ info->mem_unit = 1;
+ info->totalram <<= bitcount;
+ info->freeram <<= bitcount;
+ info->sharedram <<= bitcount;
+ info->bufferram <<= bitcount;
+ info->totalswap <<= bitcount;
+ info->freeswap <<= bitcount;
+ info->totalhigh <<= bitcount;
+ info->freehigh <<= bitcount;
+
+out:
+ return 0;
+}
+
+asmlinkage long sys_sysinfo(struct sysinfo __user *info)
+{
+ struct sysinfo val;
+
+ do_sysinfo(&val);
- out:
if (copy_to_user(info, &val, sizeof(struct sysinfo)))
return -EFAULT;
@@ -1613,6 +1701,8 @@ void __init init_timers(void)
int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
(void *)(long)smp_processor_id());
+ init_timer_stats();
+
BUG_ON(err == NOTIFY_BAD);
register_cpu_notifier(&timers_nb);
open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL);
@@ -1624,7 +1714,7 @@ struct time_interpolator *time_interpolator __read_mostly;
static struct time_interpolator *time_interpolator_list __read_mostly;
static DEFINE_SPINLOCK(time_interpolator_lock);
-static inline u64 time_interpolator_get_cycles(unsigned int src)
+static inline cycles_t time_interpolator_get_cycles(unsigned int src)
{
unsigned long (*x)(void);
@@ -1650,8 +1740,8 @@ static inline u64 time_interpolator_get_counter(int writelock)
if (time_interpolator->jitter)
{
- u64 lcycle;
- u64 now;
+ cycles_t lcycle;
+ cycles_t now;
do {
lcycle = time_interpolator->last_cycle;