diff options
Diffstat (limited to 'kernel/timer.c')
-rw-r--r-- | kernel/timer.c | 374 |
1 files changed, 232 insertions, 142 deletions
diff --git a/kernel/timer.c b/kernel/timer.c index c2a8ccfc288..cb1b86a9c52 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -34,6 +34,8 @@ #include <linux/cpu.h> #include <linux/syscalls.h> #include <linux/delay.h> +#include <linux/tick.h> +#include <linux/kallsyms.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -85,7 +87,7 @@ static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases; * @j: the time in (absolute) jiffies that should be rounded * @cpu: the processor number on which the timeout will happen * - * __round_jiffies rounds an absolute time in the future (in jiffies) + * __round_jiffies() rounds an absolute time in the future (in jiffies) * up or down to (approximately) full seconds. This is useful for timers * for which the exact time they fire does not matter too much, as long as * they fire approximately every X seconds. @@ -98,7 +100,7 @@ static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases; * processors firing at the exact same time, which could lead * to lock contention or spurious cache line bouncing. * - * The return value is the rounded version of the "j" parameter. + * The return value is the rounded version of the @j parameter. */ unsigned long __round_jiffies(unsigned long j, int cpu) { @@ -142,7 +144,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies); * @j: the time in (relative) jiffies that should be rounded * @cpu: the processor number on which the timeout will happen * - * __round_jiffies_relative rounds a time delta in the future (in jiffies) + * __round_jiffies_relative() rounds a time delta in the future (in jiffies) * up or down to (approximately) full seconds. This is useful for timers * for which the exact time they fire does not matter too much, as long as * they fire approximately every X seconds. @@ -155,7 +157,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies); * processors firing at the exact same time, which could lead * to lock contention or spurious cache line bouncing. * - * The return value is the rounded version of the "j" parameter. + * The return value is the rounded version of the @j parameter. */ unsigned long __round_jiffies_relative(unsigned long j, int cpu) { @@ -173,7 +175,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies_relative); * round_jiffies - function to round jiffies to a full second * @j: the time in (absolute) jiffies that should be rounded * - * round_jiffies rounds an absolute time in the future (in jiffies) + * round_jiffies() rounds an absolute time in the future (in jiffies) * up or down to (approximately) full seconds. This is useful for timers * for which the exact time they fire does not matter too much, as long as * they fire approximately every X seconds. @@ -182,7 +184,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies_relative); * at the same time, rather than at various times spread out. The goal * of this is to have the CPU wake up less, which saves power. * - * The return value is the rounded version of the "j" parameter. + * The return value is the rounded version of the @j parameter. */ unsigned long round_jiffies(unsigned long j) { @@ -194,7 +196,7 @@ EXPORT_SYMBOL_GPL(round_jiffies); * round_jiffies_relative - function to round jiffies to a full second * @j: the time in (relative) jiffies that should be rounded * - * round_jiffies_relative rounds a time delta in the future (in jiffies) + * round_jiffies_relative() rounds a time delta in the future (in jiffies) * up or down to (approximately) full seconds. This is useful for timers * for which the exact time they fire does not matter too much, as long as * they fire approximately every X seconds. @@ -203,7 +205,7 @@ EXPORT_SYMBOL_GPL(round_jiffies); * at the same time, rather than at various times spread out. The goal * of this is to have the CPU wake up less, which saves power. * - * The return value is the rounded version of the "j" parameter. + * The return value is the rounded version of the @j parameter. */ unsigned long round_jiffies_relative(unsigned long j) { @@ -262,6 +264,18 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer) list_add_tail(&timer->entry, vec); } +#ifdef CONFIG_TIMER_STATS +void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) +{ + if (timer->start_site) + return; + + timer->start_site = addr; + memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); + timer->start_pid = current->pid; +} +#endif + /** * init_timer - initialize a timer. * @timer: the timer to be initialized @@ -273,11 +287,16 @@ void fastcall init_timer(struct timer_list *timer) { timer->entry.next = NULL; timer->base = __raw_get_cpu_var(tvec_bases); +#ifdef CONFIG_TIMER_STATS + timer->start_site = NULL; + timer->start_pid = -1; + memset(timer->start_comm, 0, TASK_COMM_LEN); +#endif } EXPORT_SYMBOL(init_timer); static inline void detach_timer(struct timer_list *timer, - int clear_pending) + int clear_pending) { struct list_head *entry = &timer->entry; @@ -324,6 +343,7 @@ int __mod_timer(struct timer_list *timer, unsigned long expires) unsigned long flags; int ret = 0; + timer_stats_timer_set_start_info(timer); BUG_ON(!timer->function); base = lock_timer_base(timer, &flags); @@ -374,6 +394,7 @@ void add_timer_on(struct timer_list *timer, int cpu) tvec_base_t *base = per_cpu(tvec_bases, cpu); unsigned long flags; + timer_stats_timer_set_start_info(timer); BUG_ON(timer_pending(timer) || !timer->function); spin_lock_irqsave(&base->lock, flags); timer->base = base; @@ -387,7 +408,7 @@ void add_timer_on(struct timer_list *timer, int cpu) * @timer: the timer to be modified * @expires: new timeout in jiffies * - * mod_timer is a more efficient way to update the expire field of an + * mod_timer() is a more efficient way to update the expire field of an * active timer (if the timer is inactive it will be activated) * * mod_timer(timer, expires) is equivalent to: @@ -406,6 +427,7 @@ int mod_timer(struct timer_list *timer, unsigned long expires) { BUG_ON(!timer->function); + timer_stats_timer_set_start_info(timer); /* * This is a common optimization triggered by the * networking code - if the timer is re-modified @@ -436,6 +458,7 @@ int del_timer(struct timer_list *timer) unsigned long flags; int ret = 0; + timer_stats_timer_clear_start_info(timer); if (timer_pending(timer)) { base = lock_timer_base(timer, &flags); if (timer_pending(timer)) { @@ -490,7 +513,7 @@ out: * the timer it also makes sure the handler has finished executing on other * CPUs. * - * Synchronization rules: callers must prevent restarting of the timer, + * Synchronization rules: Callers must prevent restarting of the timer, * otherwise this function is meaningless. It must not be called from * interrupt contexts. The caller must not hold locks which would prevent * completion of the timer's handler. The timer's handler must not call @@ -569,6 +592,8 @@ static inline void __run_timers(tvec_base_t *base) fn = timer->function; data = timer->data; + timer_stats_account_timer(timer); + set_running_timer(base, timer); detach_timer(timer, 1); spin_unlock_irq(&base->lock); @@ -591,105 +616,124 @@ static inline void __run_timers(tvec_base_t *base) spin_unlock_irq(&base->lock); } -#ifdef CONFIG_NO_IDLE_HZ +#if defined(CONFIG_NO_IDLE_HZ) || defined(CONFIG_NO_HZ) /* * Find out when the next timer event is due to happen. This * is used on S/390 to stop all activity when a cpus is idle. * This functions needs to be called disabled. */ -unsigned long next_timer_interrupt(void) +static unsigned long __next_timer_interrupt(tvec_base_t *base) { - tvec_base_t *base; - struct list_head *list; + unsigned long timer_jiffies = base->timer_jiffies; + unsigned long expires = timer_jiffies + (LONG_MAX >> 1); + int index, slot, array, found = 0; struct timer_list *nte; - unsigned long expires; - unsigned long hr_expires = MAX_JIFFY_OFFSET; - ktime_t hr_delta; tvec_t *varray[4]; - int i, j; - - hr_delta = hrtimer_get_next_event(); - if (hr_delta.tv64 != KTIME_MAX) { - struct timespec tsdelta; - tsdelta = ktime_to_timespec(hr_delta); - hr_expires = timespec_to_jiffies(&tsdelta); - if (hr_expires < 3) - return hr_expires + jiffies; - } - hr_expires += jiffies; - - base = __get_cpu_var(tvec_bases); - spin_lock(&base->lock); - expires = base->timer_jiffies + (LONG_MAX >> 1); - list = NULL; /* Look for timer events in tv1. */ - j = base->timer_jiffies & TVR_MASK; + index = slot = timer_jiffies & TVR_MASK; do { - list_for_each_entry(nte, base->tv1.vec + j, entry) { + list_for_each_entry(nte, base->tv1.vec + slot, entry) { + found = 1; expires = nte->expires; - if (j < (base->timer_jiffies & TVR_MASK)) - list = base->tv2.vec + (INDEX(0)); - goto found; + /* Look at the cascade bucket(s)? */ + if (!index || slot < index) + goto cascade; + return expires; } - j = (j + 1) & TVR_MASK; - } while (j != (base->timer_jiffies & TVR_MASK)); + slot = (slot + 1) & TVR_MASK; + } while (slot != index); + +cascade: + /* Calculate the next cascade event */ + if (index) + timer_jiffies += TVR_SIZE - index; + timer_jiffies >>= TVR_BITS; /* Check tv2-tv5. */ varray[0] = &base->tv2; varray[1] = &base->tv3; varray[2] = &base->tv4; varray[3] = &base->tv5; - for (i = 0; i < 4; i++) { - j = INDEX(i); + + for (array = 0; array < 4; array++) { + tvec_t *varp = varray[array]; + + index = slot = timer_jiffies & TVN_MASK; do { - if (list_empty(varray[i]->vec + j)) { - j = (j + 1) & TVN_MASK; - continue; - } - list_for_each_entry(nte, varray[i]->vec + j, entry) + list_for_each_entry(nte, varp->vec + slot, entry) { + found = 1; if (time_before(nte->expires, expires)) expires = nte->expires; - if (j < (INDEX(i)) && i < 3) - list = varray[i + 1]->vec + (INDEX(i + 1)); - goto found; - } while (j != (INDEX(i))); - } -found: - if (list) { - /* - * The search wrapped. We need to look at the next list - * from next tv element that would cascade into tv element - * where we found the timer element. - */ - list_for_each_entry(nte, list, entry) { - if (time_before(nte->expires, expires)) - expires = nte->expires; - } + } + /* + * Do we still search for the first timer or are + * we looking up the cascade buckets ? + */ + if (found) { + /* Look at the cascade bucket(s)? */ + if (!index || slot < index) + break; + return expires; + } + slot = (slot + 1) & TVN_MASK; + } while (slot != index); + + if (index) + timer_jiffies += TVN_SIZE - index; + timer_jiffies >>= TVN_BITS; } - spin_unlock(&base->lock); + return expires; +} - /* - * It can happen that other CPUs service timer IRQs and increment - * jiffies, but we have not yet got a local timer tick to process - * the timer wheels. In that case, the expiry time can be before - * jiffies, but since the high-resolution timer here is relative to - * jiffies, the default expression when high-resolution timers are - * not active, - * - * time_before(MAX_JIFFY_OFFSET + jiffies, expires) - * - * would falsely evaluate to true. If that is the case, just - * return jiffies so that we can immediately fire the local timer - */ - if (time_before(expires, jiffies)) - return jiffies; +/* + * Check, if the next hrtimer event is before the next timer wheel + * event: + */ +static unsigned long cmp_next_hrtimer_event(unsigned long now, + unsigned long expires) +{ + ktime_t hr_delta = hrtimer_get_next_event(); + struct timespec tsdelta; - if (time_before(hr_expires, expires)) - return hr_expires; + if (hr_delta.tv64 == KTIME_MAX) + return expires; + if (hr_delta.tv64 <= TICK_NSEC) + return now; + + tsdelta = ktime_to_timespec(hr_delta); + now += timespec_to_jiffies(&tsdelta); + if (time_before(now, expires)) + return now; return expires; } + +/** + * next_timer_interrupt - return the jiffy of the next pending timer + */ +unsigned long get_next_timer_interrupt(unsigned long now) +{ + tvec_base_t *base = __get_cpu_var(tvec_bases); + unsigned long expires; + + spin_lock(&base->lock); + expires = __next_timer_interrupt(base); + spin_unlock(&base->lock); + + if (time_before_eq(expires, now)) + return now; + + return cmp_next_hrtimer_event(now, expires); +} + +#ifdef CONFIG_NO_IDLE_HZ +unsigned long next_timer_interrupt(void) +{ + return get_next_timer_interrupt(jiffies); +} +#endif + #endif /******************************************************************/ @@ -832,32 +876,35 @@ EXPORT_SYMBOL(do_settimeofday); * * Accumulates current time interval and initializes new clocksource */ -static int change_clocksource(void) +static void change_clocksource(void) { struct clocksource *new; cycle_t now; u64 nsec; + new = clocksource_get_next(); - if (clock != new) { - now = clocksource_read(new); - nsec = __get_nsec_offset(); - timespec_add_ns(&xtime, nsec); - - clock = new; - clock->cycle_last = now; - printk(KERN_INFO "Time: %s clocksource has been installed.\n", - clock->name); - return 1; - } else if (clock->update_callback) { - return clock->update_callback(); - } - return 0; + + if (clock == new) + return; + + now = clocksource_read(new); + nsec = __get_nsec_offset(); + timespec_add_ns(&xtime, nsec); + + clock = new; + clock->cycle_last = now; + + clock->error = 0; + clock->xtime_nsec = 0; + clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); + + tick_clock_notify(); + + printk(KERN_INFO "Time: %s clocksource has been installed.\n", + clock->name); } #else -static inline int change_clocksource(void) -{ - return 0; -} +static inline void change_clocksource(void) { } #endif /** @@ -871,33 +918,56 @@ int timekeeping_is_continuous(void) do { seq = read_seqbegin(&xtime_lock); - ret = clock->is_continuous; + ret = clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; } while (read_seqretry(&xtime_lock, seq)); return ret; } +/** + * read_persistent_clock - Return time in seconds from the persistent clock. + * + * Weak dummy function for arches that do not yet support it. + * Returns seconds from epoch using the battery backed persistent clock. + * Returns zero if unsupported. + * + * XXX - Do be sure to remove it once all arches implement it. + */ +unsigned long __attribute__((weak)) read_persistent_clock(void) +{ + return 0; +} + /* * timekeeping_init - Initializes the clocksource and common timekeeping values */ void __init timekeeping_init(void) { unsigned long flags; + unsigned long sec = read_persistent_clock(); write_seqlock_irqsave(&xtime_lock, flags); ntp_clear(); clock = clocksource_get_next(); - clocksource_calculate_interval(clock, tick_nsec); + clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); clock->cycle_last = clocksource_read(clock); + xtime.tv_sec = sec; + xtime.tv_nsec = 0; + set_normalized_timespec(&wall_to_monotonic, + -xtime.tv_sec, -xtime.tv_nsec); + write_sequnlock_irqrestore(&xtime_lock, flags); } - +/* flag for if timekeeping is suspended */ static int timekeeping_suspended; +/* time in seconds when suspend began */ +static unsigned long timekeeping_suspend_time; + /** * timekeeping_resume - Resumes the generic timekeeping subsystem. * @dev: unused @@ -909,13 +979,26 @@ static int timekeeping_suspended; static int timekeeping_resume(struct sys_device *dev) { unsigned long flags; + unsigned long now = read_persistent_clock(); write_seqlock_irqsave(&xtime_lock, flags); - /* restart the last cycle value */ + + if (now && (now > timekeeping_suspend_time)) { + unsigned long sleep_length = now - timekeeping_suspend_time; + + xtime.tv_sec += sleep_length; + wall_to_monotonic.tv_sec -= sleep_length; + } + /* re-base the last cycle value */ clock->cycle_last = clocksource_read(clock); clock->error = 0; timekeeping_suspended = 0; write_sequnlock_irqrestore(&xtime_lock, flags); + + touch_softlockup_watchdog(); + /* Resume hrtimers */ + clock_was_set(); + return 0; } @@ -925,6 +1008,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) write_seqlock_irqsave(&xtime_lock, flags); timekeeping_suspended = 1; + timekeeping_suspend_time = read_persistent_clock(); write_sequnlock_irqrestore(&xtime_lock, flags); return 0; } @@ -1089,11 +1173,8 @@ static void update_wall_time(void) clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; /* check to see if there is a new clocksource to use */ - if (change_clocksource()) { - clock->error = 0; - clock->xtime_nsec = 0; - clocksource_calculate_interval(clock, tick_nsec); - } + change_clocksource(); + update_vsyscall(&xtime, clock); } /* @@ -1162,11 +1243,9 @@ static inline void calc_load(unsigned long ticks) * This read-write spinlock protects us from races in SMP while * playing with xtime and avenrun. */ -#ifndef ARCH_HAVE_XTIME_LOCK -__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); +__attribute__((weak)) __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); EXPORT_SYMBOL(xtime_lock); -#endif /* * This function runs timers and the timer-tq in bottom half context. @@ -1175,7 +1254,8 @@ static void run_timer_softirq(struct softirq_action *h) { tvec_base_t *base = __get_cpu_var(tvec_bases); - hrtimer_run_queues(); + hrtimer_run_queues(); + if (time_after_eq(jiffies, base->timer_jiffies)) __run_timers(base); } @@ -1392,17 +1472,16 @@ asmlinkage long sys_gettid(void) } /** - * sys_sysinfo - fill in sysinfo struct + * do_sysinfo - fill in sysinfo struct * @info: pointer to buffer to fill */ -asmlinkage long sys_sysinfo(struct sysinfo __user *info) +int do_sysinfo(struct sysinfo *info) { - struct sysinfo val; unsigned long mem_total, sav_total; unsigned int mem_unit, bitcount; unsigned long seq; - memset((char *)&val, 0, sizeof(struct sysinfo)); + memset(info, 0, sizeof(struct sysinfo)); do { struct timespec tp; @@ -1422,17 +1501,17 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info) tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; tp.tv_sec++; } - val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); + info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); - val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); - val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT); - val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT); + info->loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); + info->loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT); + info->loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT); - val.procs = nr_threads; + info->procs = nr_threads; } while (read_seqretry(&xtime_lock, seq)); - si_meminfo(&val); - si_swapinfo(&val); + si_meminfo(info); + si_swapinfo(info); /* * If the sum of all the available memory (i.e. ram + swap) @@ -1443,11 +1522,11 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info) * -Erik Andersen <andersee@debian.org> */ - mem_total = val.totalram + val.totalswap; - if (mem_total < val.totalram || mem_total < val.totalswap) + mem_total = info->totalram + info->totalswap; + if (mem_total < info->totalram || mem_total < info->totalswap) goto out; bitcount = 0; - mem_unit = val.mem_unit; + mem_unit = info->mem_unit; while (mem_unit > 1) { bitcount++; mem_unit >>= 1; @@ -1459,22 +1538,31 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info) /* * If mem_total did not overflow, multiply all memory values by - * val.mem_unit and set it to 1. This leaves things compatible + * info->mem_unit and set it to 1. This leaves things compatible * with 2.2.x, and also retains compatibility with earlier 2.4.x * kernels... */ - val.mem_unit = 1; - val.totalram <<= bitcount; - val.freeram <<= bitcount; - val.sharedram <<= bitcount; - val.bufferram <<= bitcount; - val.totalswap <<= bitcount; - val.freeswap <<= bitcount; - val.totalhigh <<= bitcount; - val.freehigh <<= bitcount; + info->mem_unit = 1; + info->totalram <<= bitcount; + info->freeram <<= bitcount; + info->sharedram <<= bitcount; + info->bufferram <<= bitcount; + info->totalswap <<= bitcount; + info->freeswap <<= bitcount; + info->totalhigh <<= bitcount; + info->freehigh <<= bitcount; + +out: + return 0; +} + +asmlinkage long sys_sysinfo(struct sysinfo __user *info) +{ + struct sysinfo val; + + do_sysinfo(&val); - out: if (copy_to_user(info, &val, sizeof(struct sysinfo))) return -EFAULT; @@ -1613,6 +1701,8 @@ void __init init_timers(void) int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, (void *)(long)smp_processor_id()); + init_timer_stats(); + BUG_ON(err == NOTIFY_BAD); register_cpu_notifier(&timers_nb); open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL); @@ -1624,7 +1714,7 @@ struct time_interpolator *time_interpolator __read_mostly; static struct time_interpolator *time_interpolator_list __read_mostly; static DEFINE_SPINLOCK(time_interpolator_lock); -static inline u64 time_interpolator_get_cycles(unsigned int src) +static inline cycles_t time_interpolator_get_cycles(unsigned int src) { unsigned long (*x)(void); @@ -1650,8 +1740,8 @@ static inline u64 time_interpolator_get_counter(int writelock) if (time_interpolator->jitter) { - u64 lcycle; - u64 now; + cycles_t lcycle; + cycles_t now; do { lcycle = time_interpolator->last_cycle; |