summaryrefslogtreecommitdiffstats
path: root/runtime
diff options
context:
space:
mode:
Diffstat (limited to 'runtime')
-rw-r--r--runtime/access_process_vm.h54
-rw-r--r--runtime/addr-map.c4
-rw-r--r--runtime/itrace.c54
-rw-r--r--runtime/print.c12
-rw-r--r--runtime/runtime.h28
-rw-r--r--runtime/staprun/relay.c83
-rw-r--r--runtime/staprun/relay_old.c82
-rw-r--r--runtime/uprobes2/uprobes.c35
8 files changed, 258 insertions, 94 deletions
diff --git a/runtime/access_process_vm.h b/runtime/access_process_vm.h
new file mode 100644
index 00000000..70489d48
--- /dev/null
+++ b/runtime/access_process_vm.h
@@ -0,0 +1,54 @@
+/*
+ * The kernel's access_process_vm is not exported in kernel.org kernels, although
+ * some distros export it on some architectures. To workaround this inconsistency,
+ * we copied and pasted it here. Fortunately, everything it calls is exported.
+ */
+#include <linux/pagemap.h>
+#include <asm/cacheflush.h>
+static int __access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
+{
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ struct page *page;
+ void *old_buf = buf;
+
+ mm = get_task_mm(tsk);
+ if (!mm)
+ return 0;
+
+ down_read(&mm->mmap_sem);
+ /* ignore errors, just check how much was sucessfully transfered */
+ while (len) {
+ int bytes, ret, offset;
+ void *maddr;
+
+ ret = get_user_pages(tsk, mm, addr, 1,
+ write, 1, &page, &vma);
+ if (ret <= 0)
+ break;
+
+ bytes = len;
+ offset = addr & (PAGE_SIZE-1);
+ if (bytes > PAGE_SIZE-offset)
+ bytes = PAGE_SIZE-offset;
+
+ maddr = kmap(page);
+ if (write) {
+ copy_to_user_page(vma, page, addr,
+ maddr + offset, buf, bytes);
+ set_page_dirty_lock(page);
+ } else {
+ copy_from_user_page(vma, page, addr,
+ buf, maddr + offset, bytes);
+ }
+ kunmap(page);
+ page_cache_release(page);
+ len -= bytes;
+ buf += bytes;
+ addr += bytes;
+ }
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+
+ return buf - old_buf;
+}
diff --git a/runtime/addr-map.c b/runtime/addr-map.c
index a9aa8d88..35de7a64 100644
--- a/runtime/addr-map.c
+++ b/runtime/addr-map.c
@@ -115,6 +115,10 @@ lookup_bad_addr(unsigned long addr, size_t size)
return 1;
#ifndef STP_PRIVILEGED
+ /* Unprivileged users must not access memory while the context
+ does not refer to their own process. */
+ if (! is_myproc ())
+ return 1;
/* Unprivileged users must not access kernel space memory. */
if (addr + size > TASK_SIZE)
return 1;
diff --git a/runtime/itrace.c b/runtime/itrace.c
index 6fe39db4..03e1e403 100644
--- a/runtime/itrace.c
+++ b/runtime/itrace.c
@@ -77,60 +77,8 @@ static struct itrace_info *create_itrace_info(
struct task_struct *tsk, u32 step_flag,
struct stap_itrace_probe *itrace_probe);
-/*
- * The kernel's access_process_vm is not exported in kernel.org kernels, although
- * some distros export it on some architectures. To workaround this inconsistency,
- * we copied and pasted it here. Fortunately, everything it calls is exported.
- */
-#include <linux/pagemap.h>
-#include <asm/cacheflush.h>
-static int __access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
-{
- struct mm_struct *mm;
- struct vm_area_struct *vma;
- struct page *page;
- void *old_buf = buf;
-
- mm = get_task_mm(tsk);
- if (!mm)
- return 0;
-
- down_read(&mm->mmap_sem);
- /* ignore errors, just check how much was sucessfully transfered */
- while (len) {
- int bytes, ret, offset;
- void *maddr;
- ret = get_user_pages(tsk, mm, addr, 1,
- write, 1, &page, &vma);
- if (ret <= 0)
- break;
-
- bytes = len;
- offset = addr & (PAGE_SIZE-1);
- if (bytes > PAGE_SIZE-offset)
- bytes = PAGE_SIZE-offset;
-
- maddr = kmap(page);
- if (write) {
- copy_to_user_page(vma, page, addr,
- maddr + offset, buf, bytes);
- set_page_dirty_lock(page);
- } else {
- copy_from_user_page(vma, page, addr,
- buf, maddr + offset, bytes);
- }
- kunmap(page);
- page_cache_release(page);
- len -= bytes;
- buf += bytes;
- addr += bytes;
- }
- up_read(&mm->mmap_sem);
- mmput(mm);
-
- return buf - old_buf;
-}
+/* Note: __access_process_vm moved to access_process_vm.h */
#ifdef UTRACE_ORIG_VERSION
static u32 usr_itrace_report_quiesce(struct utrace_attached_engine *engine,
diff --git a/runtime/print.c b/runtime/print.c
index 945f7a72..335403fb 100644
--- a/runtime/print.c
+++ b/runtime/print.c
@@ -225,7 +225,11 @@ static void _stp_print_kernel_info(char *vstr, int ctx, int num_probes)
#ifdef DEBUG_MEM
"+alloc"
#endif
- ", probes: %d\n",
+ ", probes: %d"
+#ifndef STP_PRIVILEGED
+ ", unpriv-uid: %d"
+#endif
+ "\n",
THIS_MODULE->name,
vstr,
#ifndef STAPCONF_GRSECURITY
@@ -242,7 +246,11 @@ static void _stp_print_kernel_info(char *vstr, int ctx, int num_probes)
#ifdef DEBUG_MEM
_stp_allocated_memory - _stp_allocated_net_memory,
#endif
- num_probes);
+ num_probes
+#ifndef STP_PRIVILEGED
+ , _stp_uid
+#endif
+ );
}
/** @} */
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 064ded7b..7087d435 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -54,6 +54,32 @@ static void _stp_warn (const char *fmt, ...) __attribute__ ((format (printf, 1,
static void _stp_exit(void);
+
+
+/* unprivileged user support */
+
+#ifdef STAPCONF_TASK_UID
+#define STP_CURRENT_EUID (current->euid)
+#else
+#define STP_CURRENT_EUID (task_euid(current))
+#endif
+
+#define is_myproc() (STP_CURRENT_EUID == _stp_uid)
+
+#ifndef STP_PRIVILEGED
+#define assert_is_myproc() do { \
+ if (! is_myproc()) { \
+ snprintf (CONTEXT->error_buffer, MAXSTRINGLEN, "semi-privileged tapset function called without is_myproc checking for pid %d (euid %d)", \
+ current->tgid, STP_CURRENT_EUID); \
+ CONTEXT->last_error = CONTEXT->error_buffer; \
+ goto out; \
+ } } while (0)
+#else
+#define assert_is_myproc() do {} while (0)
+#endif
+
+
+
#include "debug.h"
/* atomic globals */
@@ -106,6 +132,8 @@ static struct
#endif
#include "addr-map.c"
+
+
/* Support functions for int64_t module parameters. */
static int param_set_int64_t(const char *val, struct kernel_param *kp)
{
diff --git a/runtime/staprun/relay.c b/runtime/staprun/relay.c
index f4aa139f..0c009235 100644
--- a/runtime/staprun/relay.c
+++ b/runtime/staprun/relay.c
@@ -15,6 +15,7 @@
int out_fd[NR_CPUS];
static pthread_t reader[NR_CPUS];
static int relay_fd[NR_CPUS];
+static int switch_file[NR_CPUS];
static int bulkmode = 0;
static volatile int stop_threads = 0;
static time_t *time_backlog[NR_CPUS];
@@ -107,11 +108,25 @@ static int open_outfile(int fnum, int cpu, int remove_file)
return 0;
}
+static int switch_outfile(int cpu, int *fnum)
+{
+ int remove_file = 0;
+
+ dbug(3, "thread %d switching file\n", cpu);
+ close(out_fd[cpu]);
+ *fnum += 1;
+ if (fnum_max && *fnum >= fnum_max)
+ remove_file = 1;
+ if (open_outfile(*fnum, cpu, remove_file) < 0) {
+ perr("Couldn't open file for cpu %d, exiting.", cpu);
+ return -1;
+ }
+ return 0;
+}
+
/**
* reader_thread - per-cpu channel buffer reader
*/
-static void empty_handler(int __attribute__((unused)) sig) { /* do nothing */ }
-
static void *reader_thread(void *data)
{
char buf[131072];
@@ -119,10 +134,8 @@ static void *reader_thread(void *data)
struct pollfd pollfd;
struct timespec tim = {.tv_sec=0, .tv_nsec=200000000}, *timeout = &tim;
sigset_t sigs;
- struct sigaction sa;
off_t wsize = 0;
int fnum = 0;
- int remove_file = 0;
sigemptyset(&sigs);
sigaddset(&sigs,SIGUSR2);
@@ -131,11 +144,6 @@ static void *reader_thread(void *data)
sigfillset(&sigs);
sigdelset(&sigs,SIGUSR2);
- sa.sa_handler = empty_handler;
- sa.sa_flags = 0;
- sigemptyset(&sa.sa_mask);
- sigaction(SIGUSR2, &sa, NULL);
-
if (bulkmode) {
cpu_set_t cpu_mask;
CPU_ZERO(&cpu_mask);
@@ -156,33 +164,39 @@ static void *reader_thread(void *data)
pollfd.events = POLLIN;
do {
+ dbug(3, "thread %d start ppoll\n", cpu);
rc = ppoll(&pollfd, 1, timeout, &sigs);
+ dbug(3, "thread %d end ppoll:%d\n", cpu, rc);
if (rc < 0) {
dbug(3, "cpu=%d poll=%d errno=%d\n", cpu, rc, errno);
- if (errno != EINTR) {
+ if (errno == EINTR) {
+ if (stop_threads)
+ break;
+ if (switch_file[cpu]) {
+ switch_file[cpu] = 0;
+ if (switch_outfile(cpu, &fnum) < 0)
+ goto error_out;
+ wsize = 0;
+ }
+ } else {
_perr("poll error");
goto error_out;
- }
+ }
}
+
while ((rc = read(relay_fd[cpu], buf, sizeof(buf))) > 0) {
- wsize += rc;
/* Switching file */
- if (fsize_max && wsize > fsize_max) {
- close(out_fd[cpu]);
- fnum++;
- if (fnum_max && fnum == fnum_max)
- remove_file = 1;
- if (open_outfile(fnum, cpu, remove_file) < 0) {
- perr("Couldn't open file for cpu %d, exiting.", cpu);
+ if (fsize_max && wsize + rc > fsize_max) {
+ if (switch_outfile(cpu, &fnum) < 0)
goto error_out;
- }
- wsize = rc;
+ wsize = 0;
}
if (write(out_fd[cpu], buf, rc) != rc) {
if (errno != EPIPE)
perr("Couldn't write to output %d for cpu %d, exiting.", out_fd[cpu], cpu);
goto error_out;
}
+ wsize += rc;
}
} while (!stop_threads);
dbug(3, "exiting thread for cpu %d\n", cpu);
@@ -195,6 +209,25 @@ error_out:
return(NULL);
}
+static void switchfile_handler(int sig)
+{
+ int i;
+ if (stop_threads)
+ return;
+ for (i = 0; i < ncpus; i++)
+ if (reader[i] && switch_file[i]) {
+ dbug(2, "file switching is progressing, signal ignored.\n", sig);
+ return;
+ }
+ for (i = 0; i < ncpus; i++) {
+ if (reader[i]) {
+ switch_file[i] = 1;
+ pthread_kill(reader[i], SIGUSR2);
+ } else
+ break;
+ }
+}
+
/**
* init_relayfs - create files and threads for relayfs processing
*
@@ -308,6 +341,12 @@ int init_relayfs(void)
}
if (!load_only) {
+ struct sigaction sa;
+
+ sa.sa_handler = switchfile_handler;
+ sa.sa_flags = 0;
+ sigemptyset(&sa.sa_mask);
+ sigaction(SIGUSR2, &sa, NULL);
dbug(2, "starting threads\n");
for (i = 0; i < ncpus; i++) {
if (pthread_create(&reader[i], NULL, reader_thread,
@@ -327,7 +366,7 @@ void close_relayfs(void)
stop_threads = 1;
dbug(2, "closing\n");
for (i = 0; i < ncpus; i++) {
- if (reader[i])
+ if (reader[i])
pthread_kill(reader[i], SIGUSR2);
else
break;
diff --git a/runtime/staprun/relay_old.c b/runtime/staprun/relay_old.c
index 0254173f..8dfcc16b 100644
--- a/runtime/staprun/relay_old.c
+++ b/runtime/staprun/relay_old.c
@@ -19,6 +19,7 @@ static int proc_fd[NR_CPUS];
static FILE *percpu_tmpfile[NR_CPUS];
static char *relay_buffer[NR_CPUS];
static pthread_t reader[NR_CPUS];
+static int switch_file[NR_CPUS];
static int bulkmode = 0;
unsigned subbuf_size = 0;
unsigned n_subbufs = 0;
@@ -214,6 +215,22 @@ err1:
}
+static int switch_oldoutfile(int cpu, struct switchfile_ctrl_block *scb)
+{
+ dbug(3, "thread %d switching file\n", cpu);
+ if (percpu_tmpfile[cpu])
+ fclose(percpu_tmpfile[cpu]);
+ else
+ close(out_fd[cpu]);
+ scb->fnum ++;
+ if (fnum_max && scb->fnum == fnum_max)
+ scb->rmfile = 1;
+ if (open_oldoutfile(scb->fnum, cpu, scb->rmfile) < 0) {
+ perr("Couldn't open file for cpu %d, exiting.", cpu);
+ return -1;
+ }
+ return 0;
+}
/**
* process_subbufs - write ready subbufs to disk
*/
@@ -238,11 +255,7 @@ static int process_subbufs(struct _stp_buf_info *info,
len = (subbuf_size - sizeof(padding)) - padding;
scb->wsize += len;
if (fsize_max && scb->wsize > fsize_max) {
- fclose(percpu_tmpfile[cpu]);
- scb->fnum ++;
- if (fnum_max && scb->fnum == fnum_max)
- scb->rmfile = 1;
- if (open_oldoutfile(scb->fnum, cpu, scb->rmfile) < 0) {
+ if (switch_oldoutfile(cpu, scb) < 0) {
perr("Couldn't open file for cpu %d, exiting.", cpu);
return -1;
}
@@ -272,8 +285,17 @@ static void *reader_thread(void *data)
struct _stp_consumed_info consumed_info;
unsigned subbufs_consumed;
cpu_set_t cpu_mask;
+ struct timespec tim = {.tv_sec=0, .tv_nsec=200000000}, *timeout = &tim;
struct switchfile_ctrl_block scb = {0, 0, 0};
+ sigset_t sigs;
+
+ sigemptyset(&sigs);
+ sigaddset(&sigs,SIGUSR2);
+ pthread_sigmask(SIG_BLOCK, &sigs, NULL);
+ sigfillset(&sigs);
+ sigdelset(&sigs,SIGUSR2);
+
CPU_ZERO(&cpu_mask);
CPU_SET(cpu, &cpu_mask);
if( sched_setaffinity( 0, sizeof(cpu_mask), &cpu_mask ) < 0 )
@@ -281,15 +303,29 @@ static void *reader_thread(void *data)
pollfd.fd = relay_fd[cpu];
pollfd.events = POLLIN;
+#ifdef NEED_PPOLL
+ /* Without a real ppoll, there is a small race condition that could */
+ /* block ppoll(). So use a timeout to prevent that. */
+ timeout->tv_sec = 10;
+ timeout->tv_nsec = 0;
+#else
+ timeout = NULL;
+#endif
do {
- rc = poll(&pollfd, 1, -1);
+ rc = ppoll(&pollfd, 1, timeout, &sigs);
if (rc < 0) {
- if (errno != EINTR) {
+ if (errno == EINTR) {
+ if (switch_file[cpu]) {
+ switch_file[cpu] = 0;
+ if (switch_oldoutfile(cpu, &scb) < 0)
+ break;
+ scb.wsize = 0;
+ }
+ } else {
_perr("poll error");
break;
}
- err("WARNING: poll warning: %s\n", strerror(errno));
rc = 0;
}
@@ -324,12 +360,7 @@ int write_realtime_data(void *data, ssize_t nb)
ssize_t bw;
global_scb.wsize += nb;
if (fsize_max && global_scb.wsize > fsize_max) {
- close(out_fd[0]);
- global_scb.fnum++;
- if (fnum_max && global_scb.fnum == fnum_max)
- global_scb.rmfile = 1;
- if (open_oldoutfile(global_scb.fnum, 0,
- global_scb.rmfile) < 0) {
+ if (switch_oldoutfile(0, &global_scb) < 0) {
perr("Couldn't open file, exiting.");
return -1;
}
@@ -343,6 +374,23 @@ int write_realtime_data(void *data, ssize_t nb)
return bw != nb;
}
+static void switchfile_handler(int sig)
+{
+ int i;
+ for (i = 0; i < ncpus; i++)
+ if (reader[i] && switch_file[i]) {
+ dbug(2, "file switching is progressing, signal ignored.\n", sig);
+ return;
+ }
+ for (i = 0; i < ncpus; i++) {
+ if (reader[i]) {
+ switch_file[i] = 1;
+ pthread_kill(reader[i], SIGUSR2);
+ } else
+ break;
+ }
+}
+
/**
* init_relayfs - create files and threads for relayfs processing
*
@@ -353,6 +401,12 @@ int init_oldrelayfs(void)
int i, j;
struct statfs st;
char relay_filebase[PATH_MAX], proc_filebase[PATH_MAX];
+ struct sigaction sa;
+
+ sa.sa_handler = switchfile_handler;
+ sa.sa_flags = 0;
+ sigemptyset(&sa.sa_mask);
+ sigaction(SIGUSR2, &sa, NULL);
dbug(2, "initializing relayfs.n_subbufs=%d subbuf_size=%d\n", n_subbufs, subbuf_size);
diff --git a/runtime/uprobes2/uprobes.c b/runtime/uprobes2/uprobes.c
index 07ad3984..edf882a6 100644
--- a/runtime/uprobes2/uprobes.c
+++ b/runtime/uprobes2/uprobes.c
@@ -17,6 +17,7 @@
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2006
+ * Copyright (C) Red Hat, Inc. 2009
*/
#include <linux/types.h>
#include <linux/hash.h>
@@ -28,6 +29,8 @@
#include <linux/kref.h>
#include <linux/utrace.h>
#include <linux/regset.h>
+#include <linux/file.h>
+#include <linux/version.h>
#define UPROBES_IMPLEMENTATION 1
/* PR9974: Adapt to struct renaming. */
@@ -1387,6 +1390,9 @@ static noinline unsigned long uprobe_setup_ssol_vma(unsigned long nbytes)
unsigned long addr;
struct mm_struct *mm;
struct vm_area_struct *vma;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28)
+ struct file *file;
+#endif
BUG_ON(nbytes & ~PAGE_MASK);
if ((addr = find_old_ssol_vma()) != 0)
@@ -1400,17 +1406,40 @@ static noinline unsigned long uprobe_setup_ssol_vma(unsigned long nbytes)
* Find the end of the top mapping and skip a page.
* If there is no space for PAGE_SIZE above
* that, mmap will ignore our address hint.
+ *
+ * We allocate a "fake" unlinked shmem file because anonymous
+ * memory might not be granted execute permission when the selinux
+ * security hooks have their way. Only do this for 2.6.28 or higher
+ * since shmem_file_setup() isn't exported before that.
*/
vma = rb_entry(rb_last(&mm->mm_rb), struct vm_area_struct, vm_rb);
addr = vma->vm_end + PAGE_SIZE;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28)
+ file = shmem_file_setup("uprobes/ssol", nbytes, VM_NORESERVE);
+ if (file) {
+ addr = do_mmap_pgoff(file, addr, nbytes, PROT_EXEC,
+ MAP_PRIVATE, 0);
+ fput(file);
+ }
+ if (!file || addr & ~PAGE_MASK) {
+#else
addr = do_mmap_pgoff(NULL, addr, nbytes, PROT_EXEC,
MAP_PRIVATE|MAP_ANONYMOUS, 0);
if (addr & ~PAGE_MASK) {
+#endif
up_write(&mm->mmap_sem);
mmput(mm);
- printk(KERN_ERR "Uprobes failed to allocate a vma for"
- " pid/tgid %d/%d for single-stepping out of line.\n",
- current->pid, current->tgid);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28)
+ if (!file)
+ printk(KERN_ERR "Uprobes shmem_file_setup failed while"
+ " allocating vma for pid/tgid %d/%d for"
+ " single-stepping out of line.\n",
+ current->pid, current->tgid);
+ else
+#endif
+ printk(KERN_ERR "Uprobes failed to allocate a vma for"
+ " pid/tgid %d/%d for single-stepping out of"
+ " line.\n", current->pid, current->tgid);
return addr;
}