summaryrefslogtreecommitdiffstats
path: root/runtime/uprobes-common.c
blob: 17ed69fc4fa025b0fa2cc8d866aceeeeb5d00c7c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
/* -*- linux-c -*- 
 * uprobe Functions
 * Copyright (C) 2010 Red Hat Inc.
 *
 * This file is part of systemtap, and is free software.  You can
 * redistribute it and/or modify it under the terms of the GNU General
 * Public License (GPL); either version 2, or (at your option) any
 * later version.
 */

#ifndef _UPROBE_COMMON_C_
#define _UPROBE_COMMON_C_

/* NB: Because these utrace callbacks only occur before / after
   userspace instructions run, there is no concurrency control issue
   between active uprobe callbacks and these registration /
   unregistration pieces.

   We protect the stap_uprobe->spec_index (which also serves as a
   free/busy flag) value with the outer protective stap_probes_lock
   spinlock, to protect it against concurrent registration /
   unregistration.
*/

static int stap_uprobe_change_plus (struct task_struct *tsk, unsigned long relocation, unsigned long length, const struct stap_uprobe_tf *stf, unsigned long offset, unsigned long vm_flags) {
  int tfi = (stf - stap_uprobe_finders);
  int spec_index;
  /* iterate over stap_uprobe_spec[] that use this same stap_uprobe_tf */
  for (spec_index=0; spec_index<sizeof(stap_uprobe_specs)/sizeof(stap_uprobe_specs[0]); spec_index++) {
    int handled_p = 0;
    int slotted_p = 0;
    const struct stap_uprobe_spec *sups = &stap_uprobe_specs [spec_index];
    struct stap_uprobe *sup;
    pid_t sdt_sem_pid;
    int rc = 0;
    int i;
    if (likely(sups->tfi != tfi)) continue;
    /* skip probes with an address beyond this map event; should not 
       happen unless a shlib/exec got mmapped in weirdly piecemeal */
    if (likely((vm_flags & VM_EXEC) && sups->address >= length)) continue;

    /* Found a uprobe_spec for this stap_uprobe_tf.  Need to lock the
       stap_uprobes[] array to allocate a free spot, but then we can
       unlock and do the register_*probe subsequently. */

    mutex_lock (& stap_uprobes_lock);
    for (i=0; i<MAXUPROBES; i++) { /* XXX: slow linear search */
      sup = & stap_uprobes[i];

      /* register new uprobe
	 We make two passes for semaphores;
	 see _stap_uprobe_change_semaphore_plus */
 
      if (sup->spec_index < 0 || (sups->sdt_sem_offset && vm_flags & VM_WRITE && sup->spec_index == spec_index)) {
        #if (UPROBES_API_VERSION < 2)
	/* See PR6829 comment. */
        if (sup->spec_index == -1 && sup->up.kdata != NULL) continue;
        else if (sup->spec_index == -2 && sup->urp.u.kdata != NULL) continue;
        #endif
        sup->spec_index = spec_index;
        slotted_p = 1;
        break;
      }
    }
    mutex_unlock (& stap_uprobes_lock);
    #ifdef DEBUG_UPROBES
    _stp_dbug(__FUNCTION__,__LINE__, "+uprobe spec %d idx %d process %s[%d] addr %p pp %s\n", spec_index, (slotted_p ? i : -1), tsk->comm, tsk->tgid, (void*)(relocation+sups->address), sups->pp);
    #endif

    /* Here, slotted_p implies that `i' points to the single
       stap_uprobes[] element that has been slotted in for registration
       or unregistration processing.  !slotted_p implies that the table
       was full (registration; MAXUPROBES) or that no matching entry was
       found (unregistration; should not happen). */

    sdt_sem_pid = (sups->return_p ? sup->urp.u.pid : sup->up.pid);
    if (sups->sdt_sem_offset && (sdt_sem_pid != tsk->tgid || sup->sdt_sem_address == 0)) {
      /* If the probe is in the executable itself, the offset *is* the address. */
      if (vm_flags & VM_EXECUTABLE) {
        sup->sdt_sem_address = relocation + sups->sdt_sem_offset;
      }
      else {
        sup->sdt_sem_address = (relocation - offset) + sups->sdt_sem_offset;
      }
    } /* sdt_sem_offset */
    if (slotted_p) {
      struct stap_uprobe *sup = & stap_uprobes[i];
      if (sups->return_p) {
        sup->urp.u.pid = tsk->tgid;
        sup->urp.u.vaddr = relocation + sups->address;
        sup->urp.handler = &enter_uretprobe_probe;
        rc = register_uretprobe (& sup->urp);
      } else {
        sup->up.pid = tsk->tgid;
        sup->up.vaddr = relocation + sups->address;
        sup->up.handler = &enter_uprobe_probe;
        rc = register_uprobe (& sup->up);
      }
      if (rc) { /* failed to register */
        _stp_warn ("u*probe failed %s[%d] '%s' addr %p rc %d\n", tsk->comm, tsk->tgid, sups->pp, (void*)(relocation + sups->address), rc);
	/* NB: we need to release this slot,
	   so we need to borrow the mutex temporarily. */
        mutex_lock (& stap_uprobes_lock);
        sup->spec_index = -1;
        mutex_unlock (& stap_uprobes_lock);
      } else {
        handled_p = 1;
      }
    }
    /* NB: handled_p implies slotted_p */
    if (unlikely (! handled_p)) {
      #ifdef STP_TIMING
      atomic_inc (& skipped_count_uprobe_reg);
      #endif
      /* NB: duplicates common_entryfn_epilogue,
	 but then this is not a probe entry fn epilogue. */
      if (unlikely (atomic_inc_return (& skipped_count) > MAXSKIPPED)) {
        if (unlikely (pseudo_atomic_cmpxchg(& session_state, STAP_SESSION_RUNNING, STAP_SESSION_ERROR) == STAP_SESSION_RUNNING))
        _stp_error ("Skipped too many probes, check MAXSKIPPED or try again with stap -t for more details.");
      }
    }
  }  /* close iteration over stap_uprobe_spec[] */
  return 0; /* XXX: or rc? */
}

static int stap_uprobe_change_semaphore_plus (struct task_struct *tsk, unsigned long relocation, unsigned long length, const struct stap_uprobe_tf *stf) {
  int tfi = (stf - stap_uprobe_finders);
  int spec_index;
  int rc = 0;
  struct stap_uprobe *sup;
  int i;

  /* We make two passes for semaphores.
     The first pass, stap_uprobe_change_plus, calculates the address of the   
     semaphore.  If the probe is in a .so, we calculate the 
     address when the initial mmap maps the entire solib, e.g.
     7f089885a000-7f089885b000  rw-p-  libtcl.so
     A subsequent mmap maps in the writeable segment where the 
     semaphore control variable lives, e.g.
     7f089850d000-7f0898647000  r-xp-  libtcl.so
     7f0898647000-7f0898846000  ---p   libtcl.so
     7f0898846000-7f089885b000  rw-p-  libtcl.so
     The second pass, stap_uprobe_change_semaphore_plus, sets the semaphore.
     If the probe is in a .so this will be when the writeable segment of the .so
     is mapped in.  If the task changes, then recalculate the address.
  */

  for (i=0; i<MAXUPROBES; i++) {  /* XXX: slow linear search */
    sup = & stap_uprobes[i];
    if (sup->spec_index == -1) continue;
    if (sup->sdt_sem_address != 0 && !(sup->up.pid == tsk->tgid && sup->sdt_sem_address >= relocation && sup->sdt_sem_address < relocation+length)) continue;
    if (sup->sdt_sem_address) {
      unsigned short sdt_semaphore = 0; /* NB: fixed size */
      if ((rc = get_user (sdt_semaphore, (unsigned short __user*) sup->sdt_sem_address)) == 0) {
        sdt_semaphore ++;
        #ifdef DEBUG_UPROBES
        {
          const struct stap_uprobe_spec *sups = &stap_uprobe_specs [sup->spec_index];
          _stp_dbug(__FUNCTION__,__LINE__, "+semaphore %#x @ %#lx spec %d idx %d task %d\n", sdt_semaphore, sup->sdt_sem_address, sup->spec_index, i, tsk->tgid);
        }
        #endif
	rc = put_user (sdt_semaphore, (unsigned short __user*) sup->sdt_sem_address);
	/* XXX: need to analyze possibility of race condition */
      }
    }
  }
  return rc;
}

/* Removing/unmapping a uprobe is simpler than adding one (in the
  _plus function above).  We need not care about stap_uprobe_finders
  or anything, we just scan through stap_uprobes[] for a live probe
  within the given address range, and kill it.  */
static int stap_uprobe_change_minus (struct task_struct *tsk, unsigned long relocation, unsigned long length, const struct stap_uprobe_tf *stf) {
  int i;

  /* NB: it's not an error for us not to find a live uprobe within the
     given range.  We might have received a callback for a part of a
     shlib that was unmapped and unprobed. */

  for (i=0; i<MAXUPROBES; i++) { /* XXX: slow linear search */
    struct stap_uprobe *sup = & stap_uprobes[i];
    struct stap_uprobe_spec *sups;
    if (sup->spec_index < 0) continue; /* skip free uprobes slot */
    sups = (struct stap_uprobe_spec*) & stap_uprobe_specs[sup->spec_index];
    mutex_lock (& stap_uprobes_lock);

    /* PR6829, PR9940:
       Here we're unregistering for one of two reasons:
       1. the process image is going away (or gone) due to exit or exec; or
       2. the vma containing the probepoint has been unmapped.
       In case 1, it's sort of a nop, because uprobes will notice the event
       and dispose of the probes eventually, if it hasn't already.  But by
       calling unmap_u[ret]probe() ourselves, we free up sup right away.
       
       In both cases, we must use unmap_u[ret]probe instead of
       unregister_u[ret]probe, so uprobes knows not to try to restore the
       original opcode.
    */

    /* URETPROBE */
    if (sups->return_p && sup->urp.u.pid == tsk->tgid && sup->urp.u.vaddr >= relocation && sup->urp.u.vaddr < relocation+length) { /* in range */
      
      #ifdef DEBUG_UPROBES
      _stp_dbug (__FUNCTION__,__LINE__, "-uretprobe spec %d idx %d process %s[%d] addr %p pp %s\n", sup->spec_index, i, tsk->comm, tsk->tgid, (void*) sup->urp.u.vaddr, sups->pp);
      #endif
      #if (UPROBES_API_VERSION >= 2)
      unmap_uretprobe (& sup->urp);
      sup->spec_index = -1;
      #else
      /* Uprobes lacks unmap_uretprobe.  Before reusing sup, we must wait
	 until uprobes turns loose of the uretprobe on its own, as indicated
	 by uretprobe.kdata = NULL. */
      sup->spec_index = -2;
      #endif
      /* UPROBE */
    } else if (!sups->return_p && sup->up.pid == tsk->tgid && sup->up.vaddr >= relocation && sup->up.vaddr < relocation+length) { /* in range */
      
      #ifdef DEBUG_UPROBES
      _stp_dbug (__FUNCTION__,__LINE__, "-uprobe spec %d idx %d process %s[%d] reloc %p pp %s\n", sup->spec_index, i, tsk->comm, tsk->tgid, (void*) sup->up.vaddr, sups->pp);
      #endif
      #if (UPROBES_API_VERSION >= 2)
      unmap_uprobe (& sup->up);
      sup->spec_index = -1;
      #else
      /* Uprobes lacks unmap_uprobe.  Before reusing sup, we must wait
	 until uprobes turns loose of the uprobe on its own, as indicated
	 by uprobe.kdata = NULL. */
      sup->spec_index = -1;
      #endif
      /* PR10655: we don't need to fidget with the ENABLED semaphore either,
	 as the process is gone, buh-bye, toodaloo, au revoir, see ya later! */
    }
    mutex_unlock (& stap_uprobes_lock);
  }  /* close iteration over stap_uprobes[] */
  return 0; /* XXX: or !handled_p */
}

/* The task_finder_callback we use for ET_EXEC targets.
   We used to perform uprobe insertion/removal here, but not any more.
   (PR10524) */
static int stap_uprobe_process_found (struct stap_task_finder_target *tgt, struct task_struct *tsk, int register_p, int process_p) {
  const struct stap_uprobe_tf *stf = container_of(tgt, struct stap_uprobe_tf, finder);
  if (! process_p) return 0; /* ignore threads */
  #ifdef DEBUG_TASK_FINDER_VMA
  _stp_dbug (__FUNCTION__,__LINE__, "%cproc pid %d stf %p %p path %s\n", register_p?'+':'-', tsk->tgid, tgt, stf, stf->pathname);
  #endif
  /* ET_EXEC events are like shlib events, but with 0 relocation bases */
  if (register_p) {
    int rc = stap_uprobe_change_plus (tsk, 0, TASK_SIZE, stf, 0, 0);
    stap_uprobe_change_semaphore_plus (tsk, 0, TASK_SIZE, stf);
    return rc;
  } else
    return stap_uprobe_change_minus (tsk, 0, TASK_SIZE, stf);
}

/* The task_finder_mmap_callback */
static int stap_uprobe_mmap_found (struct stap_task_finder_target *tgt, struct task_struct *tsk, char *path, unsigned long addr, unsigned long length, unsigned long offset, unsigned long vm_flags) {
  const struct stap_uprobe_tf *stf = container_of(tgt, struct stap_uprobe_tf, finder);
  /* 1 - shared libraries' executable segments load from offset 0
       - ld.so convention offset != 0 is now allowed
         so stap_uprobe_change_plus can set a semaphore,
	 i.e. a static extern, in a shared object
     2 - the shared library we're interested in
     3 - mapping should be executable or writeable (for semaphore in .so) */
  if (path == NULL || strcmp (path, stf->pathname)) return 0;
  if (vm_flags & VM_EXEC) {
    #ifdef DEBUG_TASK_FINDER_VMA
    _stp_dbug (__FUNCTION__,__LINE__, "+mmap R-X pid %d path %s addr %p length %u offset %p stf %p %p path %s\n", tsk->tgid, path, (void *) addr, (unsigned)length, (void*) offset, tgt, stf, stf->pathname);
    #endif
    return stap_uprobe_change_plus (tsk, addr, length, stf, offset, vm_flags);
  } else if (vm_flags & VM_WRITE) {
    #ifdef DEBUG_TASK_FINDER_VMA
    _stp_dbug (__FUNCTION__,__LINE__, "+mmap RW- pid %d path %s addr %p length %u offset %p stf %p %p path %s\n", tsk->tgid, path, (void *) addr, (unsigned)length, (void*) offset, tgt, stf, stf->pathname);
    #endif
    return stap_uprobe_change_semaphore_plus (tsk, addr, length, stf);
  } else return 0;
}

/* The task_finder_munmap_callback */
static int stap_uprobe_munmap_found (struct stap_task_finder_target *tgt, struct task_struct *tsk, unsigned long addr, unsigned long length) {
  const struct stap_uprobe_tf *stf = container_of(tgt, struct stap_uprobe_tf, finder);
  #ifdef DEBUG_TASK_FINDER_VMA
  _stp_dbug (__FUNCTION__,__LINE__, "-mmap pid %d addr %p length %lu stf %p %p path %s\n", tsk->tgid, (void *) addr, length, tgt, stf, stf->pathname);
  #endif
  return stap_uprobe_change_minus (tsk, addr, length, stf);
}

/* The task_finder_callback we use for ET_DYN targets.
   This just forces an unmap of everything as the process exits.
   (PR11151) */
static int stap_uprobe_process_munmap (struct stap_task_finder_target *tgt, struct task_struct *tsk, int register_p, int process_p) {
  const struct stap_uprobe_tf *stf = container_of(tgt, struct stap_uprobe_tf, finder);
  if (! process_p) return 0; /* ignore threads */
  #ifdef DEBUG_TASK_FINDER_VMA
  _stp_dbug (__FUNCTION__,__LINE__, "%cproc pid %d stf %p %p path %s\n", register_p?'+':'-', tsk->tgid, tgt, stf, stf->pathname);
  #endif
  /* Covering 0->TASK_SIZE means "unmap everything" */
  if (!register_p)
    return stap_uprobe_change_minus (tsk, 0, TASK_SIZE, stf);
  return 0;
}

#endif /* _UPROBE_COMMON_C_ */