summaryrefslogtreecommitdiffstats
path: root/runtime/time.c
blob: 3593b725bb114f1e9d5a0e5755b134066c91ce7c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
/* -*- linux-c -*- 
 * time-estimation with minimal dependency on xtime
 * Copyright (C) 2006 Intel Corporation.
 *
 * This file is part of systemtap, and is free software.  You can
 * redistribute it and/or modify it under the terms of the GNU General
 * Public License (GPL); either version 2, or (at your option) any
 * later version.
 */

#include <linux/cpufreq.h>

typedef struct __stp_time_t {
    /* 
     * A write lock is taken by __stp_time_timer_callback() and
     * __stp_time_cpufreq_callback().  The timer callback is called from a
     * softIRQ, and cpufreq callback guarantees that it is not called within
     * an interrupt context.  Thus there should be no opportunity for a
     * deadlock between writers.
     *
     * A read lock is taken by _stp_gettimeofday_us().  There is the potential
     * for this to occur at any time, so there is a slim chance that this will
     * happen while the write lock is held, and it will be impossible to get a
     * read lock.  However, we can limit how long we try to get the lock to
     * avoid a deadlock.
     *
     * Note that seqlock is safer than rwlock because some kernels
     * don't have read_trylock.
     */
    seqlock_t lock;

    /* These provide a reference time to correlate cycles to real time */
    struct timeval base_time;
    cycles_t base_cycles;

    /* The frequency in MHz of this CPU, for interpolating
     * cycle counts from the base time. */
    unsigned int cpufreq;

    /* Callback used to schedule updates of the base_time */
    struct timer_list timer;
} stp_time_t;

DEFINE_PER_CPU(stp_time_t, stp_time);

/* Try to estimate the number of CPU cycles in a microsecond - i.e. MHz.  This
 * relies heavily on the accuracy of udelay.  By calling udelay twice, we
 * attempt to account for overhead in the call.
 * 
 * NB: interrupts should be disabled when calling this.
 */
static unsigned int
__stp_estimate_cpufreq(void)
{
    cycles_t beg, mid, end;
    beg = get_cycles(); barrier();
    udelay(2); barrier();
    mid = get_cycles(); barrier();
    udelay(10); barrier();
    end = get_cycles(); barrier();
    return (beg - 2*mid + end)/8;
}

static void
__stp_time_timer_callback(unsigned long val)
{
    unsigned long flags;
    stp_time_t *time;
    struct timeval tv;
    cycles_t cycles;

    local_irq_save(flags);

    do_gettimeofday(&tv);
    cycles = get_cycles();

    time = &__get_cpu_var(stp_time);
    write_seqlock(&time->lock);
    time->base_time = tv;
    time->base_cycles = cycles;
    write_sequnlock(&time->lock);

    mod_timer(&time->timer, jiffies + 1);

    local_irq_restore(flags);
}

/* This is called as an IPI, with interrupts disabled. */
static void
__stp_init_time(void *info)
{
    stp_time_t *time = &__get_cpu_var(stp_time);

    seqlock_init(&time->lock);
    do_gettimeofday(&time->base_time);
    time->base_cycles = get_cycles();
    time->cpufreq = __stp_estimate_cpufreq();

    init_timer(&time->timer);
    time->timer.expires = jiffies + 1;
    time->timer.function = __stp_time_timer_callback;
    add_timer(&time->timer);
}

#ifdef CONFIG_CPU_FREQ
static int
__stp_time_cpufreq_callback(struct notifier_block *self,
        unsigned long state, void *vfreqs)
{
    unsigned long flags;
    struct cpufreq_freqs *freqs;
    unsigned int freq_mhz;
    stp_time_t *time;

    switch (state) {
        case CPUFREQ_POSTCHANGE:
        case CPUFREQ_RESUMECHANGE:
            freqs = (struct cpufreq_freqs *)vfreqs;
            freq_mhz = freqs->new / 1000;

            time = &per_cpu(stp_time, freqs->cpu);
            write_seqlock_irqsave(&time->lock, flags);
            time->cpufreq = freq_mhz;
            write_sequnlock_irqrestore(&time->lock, flags);
            break;
    }

    return NOTIFY_OK;
}

struct notifier_block __stp_time_notifier = {
    .notifier_call = __stp_time_cpufreq_callback,
};
#endif /* CONFIG_CPU_FREQ */

void
_stp_kill_time(void)
{
    int cpu;
    for_each_online_cpu(cpu) {
        stp_time_t *time = &per_cpu(stp_time, cpu);
        del_timer_sync(&time->timer);
    }
#ifdef CONFIG_CPU_FREQ
    cpufreq_unregister_notifier(&__stp_time_notifier,
            CPUFREQ_TRANSITION_NOTIFIER);
#endif
}

int
_stp_init_time(void)
{
    int ret = 0;
    int cpu, freq_mhz;
    unsigned long flags;

    ret = on_each_cpu(__stp_init_time, NULL, 0, 1);

#ifdef CONFIG_CPU_FREQ
    if (ret == 0) {
        for_each_online_cpu(cpu) {
            freq_mhz = cpufreq_get(cpu) / 1000;
            if (freq_mhz > 0) {
                stp_time_t *time = &per_cpu(stp_time, cpu);
                write_seqlock_irqsave(&time->lock, flags);
                time->cpufreq = freq_mhz;
                write_sequnlock_irqrestore(&time->lock, flags);
            }
        }

        ret = cpufreq_register_notifier(&__stp_time_notifier,
                CPUFREQ_TRANSITION_NOTIFIER);
    }
#endif

    return ret;
}

int64_t
_stp_gettimeofday_us(void)
{
    struct timeval base;
    cycles_t last, delta;
    unsigned int freq;
    unsigned int seq;
    stp_time_t *time;
    int i = 0;

    preempt_disable();

    time = &__get_cpu_var(stp_time);

    seq = read_seqbegin(&time->lock);
    base = time->base_time;
    last = time->base_cycles;
    freq = time->cpufreq;
    while (unlikely(read_seqretry(&time->lock, seq))) {
        if (unlikely(++i >= MAXTRYLOCK))
            return 0;
        ndelay(TRYLOCKDELAY);
        seq = read_seqbegin(&time->lock);
        base = time->base_time;
        last = time->base_cycles;
        freq = time->cpufreq;
    }

    delta = get_cycles() - last;
    do_div(delta, freq);

    preempt_enable();

    return (USEC_PER_SEC * (int64_t)base.tv_sec) + base.tv_usec + delta;
}