diff options
Diffstat (limited to 'runtime')
-rw-r--r-- | runtime/access_process_vm.h | 54 | ||||
-rw-r--r-- | runtime/addr-map.c | 4 | ||||
-rw-r--r-- | runtime/itrace.c | 54 | ||||
-rw-r--r-- | runtime/print.c | 12 | ||||
-rw-r--r-- | runtime/runtime.h | 28 | ||||
-rw-r--r-- | runtime/staprun/relay.c | 83 | ||||
-rw-r--r-- | runtime/staprun/relay_old.c | 82 | ||||
-rw-r--r-- | runtime/uprobes2/uprobes.c | 35 |
8 files changed, 258 insertions, 94 deletions
diff --git a/runtime/access_process_vm.h b/runtime/access_process_vm.h new file mode 100644 index 00000000..70489d48 --- /dev/null +++ b/runtime/access_process_vm.h @@ -0,0 +1,54 @@ +/* + * The kernel's access_process_vm is not exported in kernel.org kernels, although + * some distros export it on some architectures. To workaround this inconsistency, + * we copied and pasted it here. Fortunately, everything it calls is exported. + */ +#include <linux/pagemap.h> +#include <asm/cacheflush.h> +static int __access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) +{ + struct mm_struct *mm; + struct vm_area_struct *vma; + struct page *page; + void *old_buf = buf; + + mm = get_task_mm(tsk); + if (!mm) + return 0; + + down_read(&mm->mmap_sem); + /* ignore errors, just check how much was sucessfully transfered */ + while (len) { + int bytes, ret, offset; + void *maddr; + + ret = get_user_pages(tsk, mm, addr, 1, + write, 1, &page, &vma); + if (ret <= 0) + break; + + bytes = len; + offset = addr & (PAGE_SIZE-1); + if (bytes > PAGE_SIZE-offset) + bytes = PAGE_SIZE-offset; + + maddr = kmap(page); + if (write) { + copy_to_user_page(vma, page, addr, + maddr + offset, buf, bytes); + set_page_dirty_lock(page); + } else { + copy_from_user_page(vma, page, addr, + buf, maddr + offset, bytes); + } + kunmap(page); + page_cache_release(page); + len -= bytes; + buf += bytes; + addr += bytes; + } + up_read(&mm->mmap_sem); + mmput(mm); + + return buf - old_buf; +} diff --git a/runtime/addr-map.c b/runtime/addr-map.c index a9aa8d88..35de7a64 100644 --- a/runtime/addr-map.c +++ b/runtime/addr-map.c @@ -115,6 +115,10 @@ lookup_bad_addr(unsigned long addr, size_t size) return 1; #ifndef STP_PRIVILEGED + /* Unprivileged users must not access memory while the context + does not refer to their own process. */ + if (! is_myproc ()) + return 1; /* Unprivileged users must not access kernel space memory. */ if (addr + size > TASK_SIZE) return 1; diff --git a/runtime/itrace.c b/runtime/itrace.c index 6fe39db4..03e1e403 100644 --- a/runtime/itrace.c +++ b/runtime/itrace.c @@ -77,60 +77,8 @@ static struct itrace_info *create_itrace_info( struct task_struct *tsk, u32 step_flag, struct stap_itrace_probe *itrace_probe); -/* - * The kernel's access_process_vm is not exported in kernel.org kernels, although - * some distros export it on some architectures. To workaround this inconsistency, - * we copied and pasted it here. Fortunately, everything it calls is exported. - */ -#include <linux/pagemap.h> -#include <asm/cacheflush.h> -static int __access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) -{ - struct mm_struct *mm; - struct vm_area_struct *vma; - struct page *page; - void *old_buf = buf; - - mm = get_task_mm(tsk); - if (!mm) - return 0; - - down_read(&mm->mmap_sem); - /* ignore errors, just check how much was sucessfully transfered */ - while (len) { - int bytes, ret, offset; - void *maddr; - ret = get_user_pages(tsk, mm, addr, 1, - write, 1, &page, &vma); - if (ret <= 0) - break; - - bytes = len; - offset = addr & (PAGE_SIZE-1); - if (bytes > PAGE_SIZE-offset) - bytes = PAGE_SIZE-offset; - - maddr = kmap(page); - if (write) { - copy_to_user_page(vma, page, addr, - maddr + offset, buf, bytes); - set_page_dirty_lock(page); - } else { - copy_from_user_page(vma, page, addr, - buf, maddr + offset, bytes); - } - kunmap(page); - page_cache_release(page); - len -= bytes; - buf += bytes; - addr += bytes; - } - up_read(&mm->mmap_sem); - mmput(mm); - - return buf - old_buf; -} +/* Note: __access_process_vm moved to access_process_vm.h */ #ifdef UTRACE_ORIG_VERSION static u32 usr_itrace_report_quiesce(struct utrace_attached_engine *engine, diff --git a/runtime/print.c b/runtime/print.c index 945f7a72..335403fb 100644 --- a/runtime/print.c +++ b/runtime/print.c @@ -225,7 +225,11 @@ static void _stp_print_kernel_info(char *vstr, int ctx, int num_probes) #ifdef DEBUG_MEM "+alloc" #endif - ", probes: %d\n", + ", probes: %d" +#ifndef STP_PRIVILEGED + ", unpriv-uid: %d" +#endif + "\n", THIS_MODULE->name, vstr, #ifndef STAPCONF_GRSECURITY @@ -242,7 +246,11 @@ static void _stp_print_kernel_info(char *vstr, int ctx, int num_probes) #ifdef DEBUG_MEM _stp_allocated_memory - _stp_allocated_net_memory, #endif - num_probes); + num_probes +#ifndef STP_PRIVILEGED + , _stp_uid +#endif + ); } /** @} */ diff --git a/runtime/runtime.h b/runtime/runtime.h index 064ded7b..7087d435 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -54,6 +54,32 @@ static void _stp_warn (const char *fmt, ...) __attribute__ ((format (printf, 1, static void _stp_exit(void); + + +/* unprivileged user support */ + +#ifdef STAPCONF_TASK_UID +#define STP_CURRENT_EUID (current->euid) +#else +#define STP_CURRENT_EUID (task_euid(current)) +#endif + +#define is_myproc() (STP_CURRENT_EUID == _stp_uid) + +#ifndef STP_PRIVILEGED +#define assert_is_myproc() do { \ + if (! is_myproc()) { \ + snprintf (CONTEXT->error_buffer, MAXSTRINGLEN, "semi-privileged tapset function called without is_myproc checking for pid %d (euid %d)", \ + current->tgid, STP_CURRENT_EUID); \ + CONTEXT->last_error = CONTEXT->error_buffer; \ + goto out; \ + } } while (0) +#else +#define assert_is_myproc() do {} while (0) +#endif + + + #include "debug.h" /* atomic globals */ @@ -106,6 +132,8 @@ static struct #endif #include "addr-map.c" + + /* Support functions for int64_t module parameters. */ static int param_set_int64_t(const char *val, struct kernel_param *kp) { diff --git a/runtime/staprun/relay.c b/runtime/staprun/relay.c index f4aa139f..0c009235 100644 --- a/runtime/staprun/relay.c +++ b/runtime/staprun/relay.c @@ -15,6 +15,7 @@ int out_fd[NR_CPUS]; static pthread_t reader[NR_CPUS]; static int relay_fd[NR_CPUS]; +static int switch_file[NR_CPUS]; static int bulkmode = 0; static volatile int stop_threads = 0; static time_t *time_backlog[NR_CPUS]; @@ -107,11 +108,25 @@ static int open_outfile(int fnum, int cpu, int remove_file) return 0; } +static int switch_outfile(int cpu, int *fnum) +{ + int remove_file = 0; + + dbug(3, "thread %d switching file\n", cpu); + close(out_fd[cpu]); + *fnum += 1; + if (fnum_max && *fnum >= fnum_max) + remove_file = 1; + if (open_outfile(*fnum, cpu, remove_file) < 0) { + perr("Couldn't open file for cpu %d, exiting.", cpu); + return -1; + } + return 0; +} + /** * reader_thread - per-cpu channel buffer reader */ -static void empty_handler(int __attribute__((unused)) sig) { /* do nothing */ } - static void *reader_thread(void *data) { char buf[131072]; @@ -119,10 +134,8 @@ static void *reader_thread(void *data) struct pollfd pollfd; struct timespec tim = {.tv_sec=0, .tv_nsec=200000000}, *timeout = &tim; sigset_t sigs; - struct sigaction sa; off_t wsize = 0; int fnum = 0; - int remove_file = 0; sigemptyset(&sigs); sigaddset(&sigs,SIGUSR2); @@ -131,11 +144,6 @@ static void *reader_thread(void *data) sigfillset(&sigs); sigdelset(&sigs,SIGUSR2); - sa.sa_handler = empty_handler; - sa.sa_flags = 0; - sigemptyset(&sa.sa_mask); - sigaction(SIGUSR2, &sa, NULL); - if (bulkmode) { cpu_set_t cpu_mask; CPU_ZERO(&cpu_mask); @@ -156,33 +164,39 @@ static void *reader_thread(void *data) pollfd.events = POLLIN; do { + dbug(3, "thread %d start ppoll\n", cpu); rc = ppoll(&pollfd, 1, timeout, &sigs); + dbug(3, "thread %d end ppoll:%d\n", cpu, rc); if (rc < 0) { dbug(3, "cpu=%d poll=%d errno=%d\n", cpu, rc, errno); - if (errno != EINTR) { + if (errno == EINTR) { + if (stop_threads) + break; + if (switch_file[cpu]) { + switch_file[cpu] = 0; + if (switch_outfile(cpu, &fnum) < 0) + goto error_out; + wsize = 0; + } + } else { _perr("poll error"); goto error_out; - } + } } + while ((rc = read(relay_fd[cpu], buf, sizeof(buf))) > 0) { - wsize += rc; /* Switching file */ - if (fsize_max && wsize > fsize_max) { - close(out_fd[cpu]); - fnum++; - if (fnum_max && fnum == fnum_max) - remove_file = 1; - if (open_outfile(fnum, cpu, remove_file) < 0) { - perr("Couldn't open file for cpu %d, exiting.", cpu); + if (fsize_max && wsize + rc > fsize_max) { + if (switch_outfile(cpu, &fnum) < 0) goto error_out; - } - wsize = rc; + wsize = 0; } if (write(out_fd[cpu], buf, rc) != rc) { if (errno != EPIPE) perr("Couldn't write to output %d for cpu %d, exiting.", out_fd[cpu], cpu); goto error_out; } + wsize += rc; } } while (!stop_threads); dbug(3, "exiting thread for cpu %d\n", cpu); @@ -195,6 +209,25 @@ error_out: return(NULL); } +static void switchfile_handler(int sig) +{ + int i; + if (stop_threads) + return; + for (i = 0; i < ncpus; i++) + if (reader[i] && switch_file[i]) { + dbug(2, "file switching is progressing, signal ignored.\n", sig); + return; + } + for (i = 0; i < ncpus; i++) { + if (reader[i]) { + switch_file[i] = 1; + pthread_kill(reader[i], SIGUSR2); + } else + break; + } +} + /** * init_relayfs - create files and threads for relayfs processing * @@ -308,6 +341,12 @@ int init_relayfs(void) } if (!load_only) { + struct sigaction sa; + + sa.sa_handler = switchfile_handler; + sa.sa_flags = 0; + sigemptyset(&sa.sa_mask); + sigaction(SIGUSR2, &sa, NULL); dbug(2, "starting threads\n"); for (i = 0; i < ncpus; i++) { if (pthread_create(&reader[i], NULL, reader_thread, @@ -327,7 +366,7 @@ void close_relayfs(void) stop_threads = 1; dbug(2, "closing\n"); for (i = 0; i < ncpus; i++) { - if (reader[i]) + if (reader[i]) pthread_kill(reader[i], SIGUSR2); else break; diff --git a/runtime/staprun/relay_old.c b/runtime/staprun/relay_old.c index 0254173f..8dfcc16b 100644 --- a/runtime/staprun/relay_old.c +++ b/runtime/staprun/relay_old.c @@ -19,6 +19,7 @@ static int proc_fd[NR_CPUS]; static FILE *percpu_tmpfile[NR_CPUS]; static char *relay_buffer[NR_CPUS]; static pthread_t reader[NR_CPUS]; +static int switch_file[NR_CPUS]; static int bulkmode = 0; unsigned subbuf_size = 0; unsigned n_subbufs = 0; @@ -214,6 +215,22 @@ err1: } +static int switch_oldoutfile(int cpu, struct switchfile_ctrl_block *scb) +{ + dbug(3, "thread %d switching file\n", cpu); + if (percpu_tmpfile[cpu]) + fclose(percpu_tmpfile[cpu]); + else + close(out_fd[cpu]); + scb->fnum ++; + if (fnum_max && scb->fnum == fnum_max) + scb->rmfile = 1; + if (open_oldoutfile(scb->fnum, cpu, scb->rmfile) < 0) { + perr("Couldn't open file for cpu %d, exiting.", cpu); + return -1; + } + return 0; +} /** * process_subbufs - write ready subbufs to disk */ @@ -238,11 +255,7 @@ static int process_subbufs(struct _stp_buf_info *info, len = (subbuf_size - sizeof(padding)) - padding; scb->wsize += len; if (fsize_max && scb->wsize > fsize_max) { - fclose(percpu_tmpfile[cpu]); - scb->fnum ++; - if (fnum_max && scb->fnum == fnum_max) - scb->rmfile = 1; - if (open_oldoutfile(scb->fnum, cpu, scb->rmfile) < 0) { + if (switch_oldoutfile(cpu, scb) < 0) { perr("Couldn't open file for cpu %d, exiting.", cpu); return -1; } @@ -272,8 +285,17 @@ static void *reader_thread(void *data) struct _stp_consumed_info consumed_info; unsigned subbufs_consumed; cpu_set_t cpu_mask; + struct timespec tim = {.tv_sec=0, .tv_nsec=200000000}, *timeout = &tim; struct switchfile_ctrl_block scb = {0, 0, 0}; + sigset_t sigs; + + sigemptyset(&sigs); + sigaddset(&sigs,SIGUSR2); + pthread_sigmask(SIG_BLOCK, &sigs, NULL); + sigfillset(&sigs); + sigdelset(&sigs,SIGUSR2); + CPU_ZERO(&cpu_mask); CPU_SET(cpu, &cpu_mask); if( sched_setaffinity( 0, sizeof(cpu_mask), &cpu_mask ) < 0 ) @@ -281,15 +303,29 @@ static void *reader_thread(void *data) pollfd.fd = relay_fd[cpu]; pollfd.events = POLLIN; +#ifdef NEED_PPOLL + /* Without a real ppoll, there is a small race condition that could */ + /* block ppoll(). So use a timeout to prevent that. */ + timeout->tv_sec = 10; + timeout->tv_nsec = 0; +#else + timeout = NULL; +#endif do { - rc = poll(&pollfd, 1, -1); + rc = ppoll(&pollfd, 1, timeout, &sigs); if (rc < 0) { - if (errno != EINTR) { + if (errno == EINTR) { + if (switch_file[cpu]) { + switch_file[cpu] = 0; + if (switch_oldoutfile(cpu, &scb) < 0) + break; + scb.wsize = 0; + } + } else { _perr("poll error"); break; } - err("WARNING: poll warning: %s\n", strerror(errno)); rc = 0; } @@ -324,12 +360,7 @@ int write_realtime_data(void *data, ssize_t nb) ssize_t bw; global_scb.wsize += nb; if (fsize_max && global_scb.wsize > fsize_max) { - close(out_fd[0]); - global_scb.fnum++; - if (fnum_max && global_scb.fnum == fnum_max) - global_scb.rmfile = 1; - if (open_oldoutfile(global_scb.fnum, 0, - global_scb.rmfile) < 0) { + if (switch_oldoutfile(0, &global_scb) < 0) { perr("Couldn't open file, exiting."); return -1; } @@ -343,6 +374,23 @@ int write_realtime_data(void *data, ssize_t nb) return bw != nb; } +static void switchfile_handler(int sig) +{ + int i; + for (i = 0; i < ncpus; i++) + if (reader[i] && switch_file[i]) { + dbug(2, "file switching is progressing, signal ignored.\n", sig); + return; + } + for (i = 0; i < ncpus; i++) { + if (reader[i]) { + switch_file[i] = 1; + pthread_kill(reader[i], SIGUSR2); + } else + break; + } +} + /** * init_relayfs - create files and threads for relayfs processing * @@ -353,6 +401,12 @@ int init_oldrelayfs(void) int i, j; struct statfs st; char relay_filebase[PATH_MAX], proc_filebase[PATH_MAX]; + struct sigaction sa; + + sa.sa_handler = switchfile_handler; + sa.sa_flags = 0; + sigemptyset(&sa.sa_mask); + sigaction(SIGUSR2, &sa, NULL); dbug(2, "initializing relayfs.n_subbufs=%d subbuf_size=%d\n", n_subbufs, subbuf_size); diff --git a/runtime/uprobes2/uprobes.c b/runtime/uprobes2/uprobes.c index 07ad3984..edf882a6 100644 --- a/runtime/uprobes2/uprobes.c +++ b/runtime/uprobes2/uprobes.c @@ -17,6 +17,7 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * Copyright (C) IBM Corporation, 2006 + * Copyright (C) Red Hat, Inc. 2009 */ #include <linux/types.h> #include <linux/hash.h> @@ -28,6 +29,8 @@ #include <linux/kref.h> #include <linux/utrace.h> #include <linux/regset.h> +#include <linux/file.h> +#include <linux/version.h> #define UPROBES_IMPLEMENTATION 1 /* PR9974: Adapt to struct renaming. */ @@ -1387,6 +1390,9 @@ static noinline unsigned long uprobe_setup_ssol_vma(unsigned long nbytes) unsigned long addr; struct mm_struct *mm; struct vm_area_struct *vma; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28) + struct file *file; +#endif BUG_ON(nbytes & ~PAGE_MASK); if ((addr = find_old_ssol_vma()) != 0) @@ -1400,17 +1406,40 @@ static noinline unsigned long uprobe_setup_ssol_vma(unsigned long nbytes) * Find the end of the top mapping and skip a page. * If there is no space for PAGE_SIZE above * that, mmap will ignore our address hint. + * + * We allocate a "fake" unlinked shmem file because anonymous + * memory might not be granted execute permission when the selinux + * security hooks have their way. Only do this for 2.6.28 or higher + * since shmem_file_setup() isn't exported before that. */ vma = rb_entry(rb_last(&mm->mm_rb), struct vm_area_struct, vm_rb); addr = vma->vm_end + PAGE_SIZE; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28) + file = shmem_file_setup("uprobes/ssol", nbytes, VM_NORESERVE); + if (file) { + addr = do_mmap_pgoff(file, addr, nbytes, PROT_EXEC, + MAP_PRIVATE, 0); + fput(file); + } + if (!file || addr & ~PAGE_MASK) { +#else addr = do_mmap_pgoff(NULL, addr, nbytes, PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, 0); if (addr & ~PAGE_MASK) { +#endif up_write(&mm->mmap_sem); mmput(mm); - printk(KERN_ERR "Uprobes failed to allocate a vma for" - " pid/tgid %d/%d for single-stepping out of line.\n", - current->pid, current->tgid); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28) + if (!file) + printk(KERN_ERR "Uprobes shmem_file_setup failed while" + " allocating vma for pid/tgid %d/%d for" + " single-stepping out of line.\n", + current->pid, current->tgid); + else +#endif + printk(KERN_ERR "Uprobes failed to allocate a vma for" + " pid/tgid %d/%d for single-stepping out of" + " line.\n", current->pid, current->tgid); return addr; } |