diff options
author | David Smith <dsmith@redhat.com> | 2010-02-03 11:56:58 -0600 |
---|---|---|
committer | David Smith <dsmith@redhat.com> | 2010-02-03 11:56:58 -0600 |
commit | 23b7dbfaf1e9860f77b6bf1aa3da8610bf31b03c (patch) | |
tree | e04422de3d0c79bd1a2a62d97419a0cd01b292f7 | |
parent | 0d1ad607311857dc0b4666ce8a84c1a59c615ab9 (diff) | |
download | systemtap-steved-23b7dbfaf1e9860f77b6bf1aa3da8610bf31b03c.tar.gz systemtap-steved-23b7dbfaf1e9860f77b6bf1aa3da8610bf31b03c.tar.xz systemtap-steved-23b7dbfaf1e9860f77b6bf1aa3da8610bf31b03c.zip |
Fixed PR 11078. Changed code to avoided procfs race condition.
* runtime/procfs.c: Allow STP_MAX_PROCFS_FILES define to be overridden.
(_stp_create_procfs): Calls proc_create() instead of create_proc_entry()
to avoid a race condition.
* runtime/procfs-probes.c: New file containing procfs probe support
routines.
* tapset-procfs.cxx (procfs_derived_probe::join_group): Update struct
_stp_procfs_data definition.
(procfs_derived_probe::emit_module_decls): Include procfs-probes.c,
which is where the definition of struct stap_procfs_probe exists.
Update generated routines to read/write procfs data.
(procfs_derived_probe_group::emit_module_init): Pass file_operations
argument to _stp_create_procfs(). Initialize mutex.
(procfs_var_expanding_visitor::visit_target_symbol): Update generated
code.
-rw-r--r-- | runtime/procfs-probes.c | 204 | ||||
-rw-r--r-- | runtime/procfs.c | 15 | ||||
-rw-r--r-- | tapset-procfs.cxx | 114 |
3 files changed, 265 insertions, 68 deletions
diff --git a/runtime/procfs-probes.c b/runtime/procfs-probes.c new file mode 100644 index 00000000..73aa7e5f --- /dev/null +++ b/runtime/procfs-probes.c @@ -0,0 +1,204 @@ +#ifndef _STP_PROCFS_PROBES_C_ +#define _STP_PROCFS_PROBES_C_ + +#include <linux/mutex.h> +#include <linux/fs.h> + +#if 0 +// Currently we have to output _stp_procfs_data early in the +// translation process. It really should go here. +struct _stp_procfs_data { + char *buffer; + unsigned long count; +}; +#endif + +struct stap_procfs_probe { + const char *path; + const char *read_pp; + void (*read_ph) (struct context*); + const char *write_pp; + void (*write_ph) (struct context*); + + // FIXME: Eventually, this could get bigger than MAXSTRINGLEN + // when we support 'probe procfs("file").read.maxbuf(8192)' + // (bug 10690). + string_t buffer; + size_t count; + + int needs_fill; + struct mutex lock; +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,16) + atomic_t lockcount; +#endif +}; + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,16) +/* + * Kernels 2.6.16 or less don't really have mutexes. The 'mutex_*' + * functions are defined as their similar semaphore equivalents. + * However, there is no semaphore equivalent of 'mutex_is_locked'. + * So, we'll fake it with an atomic counter. + */ +static inline void _spp_lock_init(struct stap_procfs_probe *spp) +{ + atomic_set(&spp->lockcount, 0); + mutex_init(&spp->lock); +} +static inline int _spp_trylock(struct stap_procfs_probe *spp) +{ + int ret = mutex_trylock(&spp->lock); + if (ret) { + atomic_inc(&spp->lockcount); + } + return(ret); +} +static inline void _spp_lock(struct stap_procfs_probe *spp) +{ + mutex_lock(&spp->lock); + atomic_inc(&spp->lockcount); +} +static inline void _spp_unlock(struct stap_procfs_probe *spp) +{ + atomic_dec(&spp->lockcount); + mutex_unlock(&spp->lock); +} +static inline void _spp_lock_shutdown(struct stap_procfs_probe *spp) +{ + if (atomic_read(&spp->lockcount) != 0) { + _spp_unlock(spp); + } + mutex_destroy(&spp->lock); +} +#else /* LINUX_VERSION_CODE > KERNEL_VERSION(2,6,16) */ +#define _spp_lock_init(spp) mutex_init(&(spp)->lock) +#define _spp_trylock(spp) mutex_trylock(&(spp)->lock) +#define _spp_lock(spp) mutex_lock(&(spp)->lock) +#define _spp_unlock(spp) mutex_unlock(&(spp)->lock) +static inline void _spp_lock_shutdown(struct stap_procfs_probe *spp) +{ + if (mutex_is_locked(&spp->lock)) { + mutex_unlock(&spp->lock); + } + mutex_destroy(&spp->lock); +} +#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2,6,16) */ + +static int _stp_proc_fill_read_buffer(struct stap_procfs_probe *spp); + +static int _stp_process_write_buffer(struct stap_procfs_probe *spp, + const char __user *buf, size_t count); + +static int +_stp_proc_open_file(struct inode *inode, struct file *filp) +{ + struct stap_procfs_probe *spp; + int err; + + spp = (struct stap_procfs_probe *)PDE(inode)->data; + if (spp == NULL) { + return -EINVAL; + } + + err = generic_file_open(inode, filp); + if (err) + return err; + + /* To avoid concurrency problems, we only allow 1 open at a + * time. (Grabbing a mutex here doesn't really work. The + * debug kernel can OOPS with "BUG: lock held when returning + * to user space!".) + * + * If open() was called with + * O_NONBLOCK, don't block, just return EAGAIN. */ + if (filp->f_flags & O_NONBLOCK) { + if (_spp_trylock(spp) == 0) { + return -EAGAIN; + } + } + else { + _spp_lock(spp); + } + + filp->private_data = spp; + if ((filp->f_flags & O_ACCMODE) == O_RDONLY) { + spp->buffer[0] = '\0'; + spp->count = 0; + spp->needs_fill = 1; + } + return 0; +} + +static int +_stp_proc_release_file(struct inode *inode, struct file *filp) +{ + struct stap_procfs_probe *spp; + + spp = (struct stap_procfs_probe *)filp->private_data; + if (spp != NULL) { + _spp_unlock(spp); + } + return 0; +} + +static ssize_t +_stp_proc_read_file(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct stap_procfs_probe *spp = file->private_data; + ssize_t retval = 0; + + /* If we don't have a probe read function, just return 0 to + * indicate there isn't any data here. */ + if (spp == NULL || spp->read_ph == NULL) { + goto out; + } + + /* If needed, fill up the buffer.*/ + if (spp->needs_fill) { + if ((retval = _stp_proc_fill_read_buffer(spp))) { + goto out; + } + } + + /* Return bytes from the buffer. */ + retval = simple_read_from_buffer(buf, count, ppos, spp->buffer, + spp->count); +out: + return retval; +} + +static ssize_t +_stp_proc_write_file(struct file *file, const char __user *buf, size_t count, + loff_t *ppos) +{ + struct stap_procfs_probe *spp = file->private_data; + struct _stp_procfs_data pdata; + ssize_t len; + + /* If we don't have a write probe, return EIO. */ + if (spp->write_ph == NULL) { + len = -EIO; + goto out; + } + + /* Handle the input buffer. */ + len = _stp_process_write_buffer(spp, buf, count); + if (len > 0) { + *ppos += len; + } + +out: + return len; +} + +static struct file_operations _stp_proc_fops = { + .owner = THIS_MODULE, + .open = _stp_proc_open_file, + .read = _stp_proc_read_file, + .write = _stp_proc_write_file, + .llseek = generic_file_llseek, + .release = _stp_proc_release_file +}; + +#endif /* _STP_PROCFS_PROBES_C_ */ diff --git a/runtime/procfs.c b/runtime/procfs.c index 1d2ad837..1fd9fcc5 100644 --- a/runtime/procfs.c +++ b/runtime/procfs.c @@ -17,7 +17,10 @@ #ifndef _STP_PROCFS_C_ #define _STP_PROCFS_C_ +#ifndef STP_MAX_PROCFS_FILES #define STP_MAX_PROCFS_FILES 16 +#endif + static int _stp_num_pde = 0; static struct proc_dir_entry *_stp_pde[STP_MAX_PROCFS_FILES]; static struct proc_dir_entry *_stp_procfs_files[STP_MAX_PROCFS_FILES]; @@ -131,7 +134,8 @@ static struct proc_dir_entry *_stp_procfs_lookup(const char *dir, struct proc_di return NULL; } -static int _stp_create_procfs(const char *path, int num) +static int _stp_create_procfs(const char *path, int num, + const struct file_operations *fops) { const char *p; char *next; @@ -182,15 +186,18 @@ static int _stp_create_procfs(const char *path, int num) if (_stp_num_pde == STP_MAX_PROCFS_FILES) goto too_many; - de = create_proc_entry (p, 0600, last_dir); + de = proc_create(p, 0600, last_dir, fops); if (de == NULL) { _stp_error("Could not create file \"%s\" in path \"%s\"\n", p, path); goto err; } - _stp_pde[_stp_num_pde++] = de; - _stp_procfs_files[num] = de; +#ifdef AUTOCONF_PROCFS_OWNER + de->owner = THIS_MODULE; +#endif de->uid = _stp_uid; de->gid = _stp_gid; + _stp_pde[_stp_num_pde++] = de; + _stp_procfs_files[num] = de; return 0; too_many: diff --git a/tapset-procfs.cxx b/tapset-procfs.cxx index f5ab95f8..c4eb54f3 100644 --- a/tapset-procfs.cxx +++ b/tapset-procfs.cxx @@ -110,8 +110,7 @@ procfs_derived_probe::join_group (systemtap_session& s) embeddedcode *ec = new embeddedcode; ec->tok = NULL; ec->code = string("struct _stp_procfs_data {\n") - + string(" const char *buffer;\n") - + string(" off_t off;\n") + + string(" char *buffer;\n") + string(" unsigned long count;\n") + string("};\n"); s.embeds.push_back(ec); @@ -164,15 +163,10 @@ procfs_derived_probe_group::emit_module_decls (systemtap_session& s) s.op->newline() << "/* ---- procfs probes ---- */"; s.op->newline() << "#include \"procfs.c\""; + s.op->newline() << "#include \"procfs-probes.c\""; // Emit the procfs probe data list - s.op->newline() << "static struct stap_procfs_probe {"; - s.op->newline(1)<< "const char *path;"; - s.op->newline() << "const char *read_pp;"; - s.op->newline() << "void (*read_ph) (struct context*);"; - s.op->newline() << "const char *write_pp;"; - s.op->newline() << "void (*write_ph) (struct context*);"; - s.op->newline(-1) << "} stap_procfs_probes[] = {"; + s.op->newline() << "static struct stap_procfs_probe stap_procfs_probes[] = {"; s.op->indent(1); for (p_b_p_iterator it = probes_by_path.begin(); it != probes_by_path.end(); @@ -212,21 +206,20 @@ procfs_derived_probe_group::emit_module_decls (systemtap_session& s) } s.op->newline(-1) << "};"; + // Output routine to fill in the buffer with our data. Note that we + // need to do this even in the case where we have no read probes, + // but we can skip most of it then. + s.op->newline(); + + s.op->newline() << "static int _stp_proc_fill_read_buffer(struct stap_procfs_probe *spp) {"; + s.op->indent(1); if (has_read_probes) { - // Output routine to fill in 'page' with our data. - s.op->newline(); - - s.op->newline() << "static int _stp_procfs_read(char *page, char **start, off_t off, int count, int *eof, void *data) {"; - - s.op->newline(1) << "struct stap_procfs_probe *spp = (struct stap_procfs_probe *)data;"; s.op->newline() << "struct _stp_procfs_data pdata;"; common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING", "spp->read_pp"); - s.op->newline() << "pdata.buffer = page;"; - s.op->newline() << "pdata.off = off;"; - s.op->newline() << "pdata.count = count;"; + s.op->newline() << "pdata.buffer = spp->buffer;"; s.op->newline() << "if (c->data == NULL)"; s.op->newline(1) << "c->data = &pdata;"; s.op->newline(-1) << "else {"; @@ -242,29 +235,37 @@ procfs_derived_probe_group::emit_module_decls (systemtap_session& s) // call probe function s.op->newline() << "(*spp->read_ph) (c);"; - // Note that _procfs_value_set copied string data into 'page' + // Note that _procfs_value_set copied string data into spp->buffer s.op->newline() << "c->data = NULL;"; + s.op->newline() << "spp->needs_fill = 0;"; + s.op->newline() << "spp->count = strlen(spp->buffer);"; + common_probe_entryfn_epilogue (s.op); - s.op->newline() << "if (pdata.count == 0)"; - s.op->newline(1) << "*eof = 1;"; - s.op->indent(-1); - s.op->newline() << "return pdata.count;"; + s.op->newline() << "if (spp->needs_fill) {"; + s.op->newline(1) << "spp->needs_fill = 0;"; + s.op->newline() << "return -EIO;"; s.op->newline(-1) << "}"; } + s.op->newline() << "return 0;"; + s.op->newline(-1) << "}"; + + // Output routine to read data. Note that we need to do this even + // in the case where we have no write probes, but we can skip most + // of it then. + s.op->newline() << "static int _stp_process_write_buffer(struct stap_procfs_probe *spp, const char __user *buf, size_t count) {"; + s.op->indent(1); + s.op->newline() << "int retval = 0;"; if (has_write_probes) { - s.op->newline() << "static int _stp_procfs_write(struct file *file, const char *buffer, unsigned long count, void *data) {"; - - s.op->newline(1) << "struct stap_procfs_probe *spp = (struct stap_procfs_probe *)data;"; s.op->newline() << "struct _stp_procfs_data pdata;"; common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING", "spp->write_pp"); - s.op->newline() << "if (count > (MAXSTRINGLEN - 1))"; + s.op->newline() << "if (count >= MAXSTRINGLEN)"; s.op->newline(1) << "count = MAXSTRINGLEN - 1;"; s.op->indent(-1); - s.op->newline() << "pdata.buffer = buffer;"; + s.op->newline() << "pdata.buffer = (char *)buf;"; s.op->newline() << "pdata.count = count;"; s.op->newline() << "if (c->data == NULL)"; @@ -283,11 +284,14 @@ procfs_derived_probe_group::emit_module_decls (systemtap_session& s) s.op->newline() << "(*spp->write_ph) (c);"; s.op->newline() << "c->data = NULL;"; - common_probe_entryfn_epilogue (s.op); - - s.op->newline() << "return count;"; + s.op->newline() << "if (c->last_error == 0) {"; + s.op->newline(1) << "retval = count;"; s.op->newline(-1) << "}"; + + common_probe_entryfn_epilogue (s.op); } + s.op->newline() << "return retval;"; + s.op->newline(-1) << "}"; } @@ -304,36 +308,21 @@ procfs_derived_probe_group::emit_module_init (systemtap_session& s) s.op->newline(1) << "probe_point = spp->read_pp;"; s.op->newline(-1) << "else"; s.op->newline(1) << "probe_point = spp->write_pp;"; + s.op->indent(-1); - s.op->newline(-1) << "rc = _stp_create_procfs(spp->path, i);"; + s.op->newline() << "_spp_lock_init(spp);"; + s.op->newline() << "rc = _stp_create_procfs(spp->path, i, &_stp_proc_fops);"; s.op->newline() << "if (rc) {"; s.op->newline(1) << "_stp_close_procfs();"; + + s.op->newline() << "for (i = 0; i < " << probes_by_path.size() << "; i++) {"; + s.op->newline(1) << "spp = &stap_procfs_probes[i];"; + s.op->newline() << "_spp_lock_shutdown(spp);"; + s.op->newline(-1) << "}"; s.op->newline() << "break;"; s.op->newline(-1) << "}"; - if (has_read_probes) - { - s.op->newline() << "if (spp->read_pp)"; - s.op->newline(1) << "_stp_procfs_files[i]->read_proc = &_stp_procfs_read;"; - s.op->newline(-1) << "else"; - s.op->newline(1) << "_stp_procfs_files[i]->read_proc = NULL;"; - s.op->indent(-1); - } - else - s.op->newline() << "_stp_procfs_files[i]->read_proc = NULL;"; - - if (has_write_probes) - { - s.op->newline() << "if (spp->write_pp)"; - s.op->newline(1) << "_stp_procfs_files[i]->write_proc = &_stp_procfs_write;"; - s.op->newline(-1) << "else"; - s.op->newline(1) << "_stp_procfs_files[i]->write_proc = NULL;"; - s.op->indent(-1); - } - else - s.op->newline() << "_stp_procfs_files[i]->write_proc = NULL;"; - s.op->newline() << "_stp_procfs_files[i]->data = spp;"; s.op->newline(-1) << "}"; // for loop } @@ -346,6 +335,10 @@ procfs_derived_probe_group::emit_module_exit (systemtap_session& s) return; s.op->newline() << "_stp_close_procfs();"; + s.op->newline() << "for (i = 0; i < " << probes_by_path.size() << "; i++) {"; + s.op->newline(1) << "struct stap_procfs_probe *spp = &stap_procfs_probes[i];"; + s.op->newline() << "_spp_lock_shutdown(spp);"; + s.op->newline(-1) << "}"; } @@ -378,8 +371,7 @@ procfs_var_expanding_visitor::visit_target_symbol (target_symbol* e) embeddedcode *ec = new embeddedcode; ec->tok = e->tok; - string fname = (string(lvalue ? "_procfs_value_set" : "_procfs_value_get") - + "_" + lex_cast(tick++)); + string fname = (string(lvalue ? "_procfs_value_set" : "_procfs_value_get")); string locvalue = "CONTEXT->data"; if (! lvalue) @@ -391,14 +383,8 @@ procfs_var_expanding_visitor::visit_target_symbol (target_symbol* e) ec->code = string("int bytes = 0;\n") + string(" struct _stp_procfs_data *data = (struct _stp_procfs_data *)(") + locvalue + string(");\n") + string(" bytes = strnlen(THIS->value, MAXSTRINGLEN - 1);\n") - + string(" if (data->off >= bytes)\n") - + string(" bytes = 0;\n") - + string(" else {\n") - + string(" bytes -= data->off;\n") - + string(" if (bytes > data->count)\n") - + string(" bytes = data->count;\n") - + string(" memcpy((void *)data->buffer, THIS->value + data->off, bytes);\n") - + string(" }\n") + + string(" memcpy((void *)data->buffer, THIS->value, bytes);\n") + + string(" data->buffer[bytes] = '\\0';\n") + string(" data->count = bytes;\n"); fdecl->name = fname; |