summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Smith <dsmith@redhat.com>2010-02-03 11:56:58 -0600
committerDavid Smith <dsmith@redhat.com>2010-02-03 11:56:58 -0600
commit23b7dbfaf1e9860f77b6bf1aa3da8610bf31b03c (patch)
treee04422de3d0c79bd1a2a62d97419a0cd01b292f7
parent0d1ad607311857dc0b4666ce8a84c1a59c615ab9 (diff)
downloadsystemtap-steved-23b7dbfaf1e9860f77b6bf1aa3da8610bf31b03c.tar.gz
systemtap-steved-23b7dbfaf1e9860f77b6bf1aa3da8610bf31b03c.tar.xz
systemtap-steved-23b7dbfaf1e9860f77b6bf1aa3da8610bf31b03c.zip
Fixed PR 11078. Changed code to avoided procfs race condition.
* runtime/procfs.c: Allow STP_MAX_PROCFS_FILES define to be overridden. (_stp_create_procfs): Calls proc_create() instead of create_proc_entry() to avoid a race condition. * runtime/procfs-probes.c: New file containing procfs probe support routines. * tapset-procfs.cxx (procfs_derived_probe::join_group): Update struct _stp_procfs_data definition. (procfs_derived_probe::emit_module_decls): Include procfs-probes.c, which is where the definition of struct stap_procfs_probe exists. Update generated routines to read/write procfs data. (procfs_derived_probe_group::emit_module_init): Pass file_operations argument to _stp_create_procfs(). Initialize mutex. (procfs_var_expanding_visitor::visit_target_symbol): Update generated code.
-rw-r--r--runtime/procfs-probes.c204
-rw-r--r--runtime/procfs.c15
-rw-r--r--tapset-procfs.cxx114
3 files changed, 265 insertions, 68 deletions
diff --git a/runtime/procfs-probes.c b/runtime/procfs-probes.c
new file mode 100644
index 00000000..73aa7e5f
--- /dev/null
+++ b/runtime/procfs-probes.c
@@ -0,0 +1,204 @@
+#ifndef _STP_PROCFS_PROBES_C_
+#define _STP_PROCFS_PROBES_C_
+
+#include <linux/mutex.h>
+#include <linux/fs.h>
+
+#if 0
+// Currently we have to output _stp_procfs_data early in the
+// translation process. It really should go here.
+struct _stp_procfs_data {
+ char *buffer;
+ unsigned long count;
+};
+#endif
+
+struct stap_procfs_probe {
+ const char *path;
+ const char *read_pp;
+ void (*read_ph) (struct context*);
+ const char *write_pp;
+ void (*write_ph) (struct context*);
+
+ // FIXME: Eventually, this could get bigger than MAXSTRINGLEN
+ // when we support 'probe procfs("file").read.maxbuf(8192)'
+ // (bug 10690).
+ string_t buffer;
+ size_t count;
+
+ int needs_fill;
+ struct mutex lock;
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,16)
+ atomic_t lockcount;
+#endif
+};
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,16)
+/*
+ * Kernels 2.6.16 or less don't really have mutexes. The 'mutex_*'
+ * functions are defined as their similar semaphore equivalents.
+ * However, there is no semaphore equivalent of 'mutex_is_locked'.
+ * So, we'll fake it with an atomic counter.
+ */
+static inline void _spp_lock_init(struct stap_procfs_probe *spp)
+{
+ atomic_set(&spp->lockcount, 0);
+ mutex_init(&spp->lock);
+}
+static inline int _spp_trylock(struct stap_procfs_probe *spp)
+{
+ int ret = mutex_trylock(&spp->lock);
+ if (ret) {
+ atomic_inc(&spp->lockcount);
+ }
+ return(ret);
+}
+static inline void _spp_lock(struct stap_procfs_probe *spp)
+{
+ mutex_lock(&spp->lock);
+ atomic_inc(&spp->lockcount);
+}
+static inline void _spp_unlock(struct stap_procfs_probe *spp)
+{
+ atomic_dec(&spp->lockcount);
+ mutex_unlock(&spp->lock);
+}
+static inline void _spp_lock_shutdown(struct stap_procfs_probe *spp)
+{
+ if (atomic_read(&spp->lockcount) != 0) {
+ _spp_unlock(spp);
+ }
+ mutex_destroy(&spp->lock);
+}
+#else /* LINUX_VERSION_CODE > KERNEL_VERSION(2,6,16) */
+#define _spp_lock_init(spp) mutex_init(&(spp)->lock)
+#define _spp_trylock(spp) mutex_trylock(&(spp)->lock)
+#define _spp_lock(spp) mutex_lock(&(spp)->lock)
+#define _spp_unlock(spp) mutex_unlock(&(spp)->lock)
+static inline void _spp_lock_shutdown(struct stap_procfs_probe *spp)
+{
+ if (mutex_is_locked(&spp->lock)) {
+ mutex_unlock(&spp->lock);
+ }
+ mutex_destroy(&spp->lock);
+}
+#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2,6,16) */
+
+static int _stp_proc_fill_read_buffer(struct stap_procfs_probe *spp);
+
+static int _stp_process_write_buffer(struct stap_procfs_probe *spp,
+ const char __user *buf, size_t count);
+
+static int
+_stp_proc_open_file(struct inode *inode, struct file *filp)
+{
+ struct stap_procfs_probe *spp;
+ int err;
+
+ spp = (struct stap_procfs_probe *)PDE(inode)->data;
+ if (spp == NULL) {
+ return -EINVAL;
+ }
+
+ err = generic_file_open(inode, filp);
+ if (err)
+ return err;
+
+ /* To avoid concurrency problems, we only allow 1 open at a
+ * time. (Grabbing a mutex here doesn't really work. The
+ * debug kernel can OOPS with "BUG: lock held when returning
+ * to user space!".)
+ *
+ * If open() was called with
+ * O_NONBLOCK, don't block, just return EAGAIN. */
+ if (filp->f_flags & O_NONBLOCK) {
+ if (_spp_trylock(spp) == 0) {
+ return -EAGAIN;
+ }
+ }
+ else {
+ _spp_lock(spp);
+ }
+
+ filp->private_data = spp;
+ if ((filp->f_flags & O_ACCMODE) == O_RDONLY) {
+ spp->buffer[0] = '\0';
+ spp->count = 0;
+ spp->needs_fill = 1;
+ }
+ return 0;
+}
+
+static int
+_stp_proc_release_file(struct inode *inode, struct file *filp)
+{
+ struct stap_procfs_probe *spp;
+
+ spp = (struct stap_procfs_probe *)filp->private_data;
+ if (spp != NULL) {
+ _spp_unlock(spp);
+ }
+ return 0;
+}
+
+static ssize_t
+_stp_proc_read_file(struct file *file, char __user *buf, size_t count,
+ loff_t *ppos)
+{
+ struct stap_procfs_probe *spp = file->private_data;
+ ssize_t retval = 0;
+
+ /* If we don't have a probe read function, just return 0 to
+ * indicate there isn't any data here. */
+ if (spp == NULL || spp->read_ph == NULL) {
+ goto out;
+ }
+
+ /* If needed, fill up the buffer.*/
+ if (spp->needs_fill) {
+ if ((retval = _stp_proc_fill_read_buffer(spp))) {
+ goto out;
+ }
+ }
+
+ /* Return bytes from the buffer. */
+ retval = simple_read_from_buffer(buf, count, ppos, spp->buffer,
+ spp->count);
+out:
+ return retval;
+}
+
+static ssize_t
+_stp_proc_write_file(struct file *file, const char __user *buf, size_t count,
+ loff_t *ppos)
+{
+ struct stap_procfs_probe *spp = file->private_data;
+ struct _stp_procfs_data pdata;
+ ssize_t len;
+
+ /* If we don't have a write probe, return EIO. */
+ if (spp->write_ph == NULL) {
+ len = -EIO;
+ goto out;
+ }
+
+ /* Handle the input buffer. */
+ len = _stp_process_write_buffer(spp, buf, count);
+ if (len > 0) {
+ *ppos += len;
+ }
+
+out:
+ return len;
+}
+
+static struct file_operations _stp_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = _stp_proc_open_file,
+ .read = _stp_proc_read_file,
+ .write = _stp_proc_write_file,
+ .llseek = generic_file_llseek,
+ .release = _stp_proc_release_file
+};
+
+#endif /* _STP_PROCFS_PROBES_C_ */
diff --git a/runtime/procfs.c b/runtime/procfs.c
index 1d2ad837..1fd9fcc5 100644
--- a/runtime/procfs.c
+++ b/runtime/procfs.c
@@ -17,7 +17,10 @@
#ifndef _STP_PROCFS_C_
#define _STP_PROCFS_C_
+#ifndef STP_MAX_PROCFS_FILES
#define STP_MAX_PROCFS_FILES 16
+#endif
+
static int _stp_num_pde = 0;
static struct proc_dir_entry *_stp_pde[STP_MAX_PROCFS_FILES];
static struct proc_dir_entry *_stp_procfs_files[STP_MAX_PROCFS_FILES];
@@ -131,7 +134,8 @@ static struct proc_dir_entry *_stp_procfs_lookup(const char *dir, struct proc_di
return NULL;
}
-static int _stp_create_procfs(const char *path, int num)
+static int _stp_create_procfs(const char *path, int num,
+ const struct file_operations *fops)
{
const char *p;
char *next;
@@ -182,15 +186,18 @@ static int _stp_create_procfs(const char *path, int num)
if (_stp_num_pde == STP_MAX_PROCFS_FILES)
goto too_many;
- de = create_proc_entry (p, 0600, last_dir);
+ de = proc_create(p, 0600, last_dir, fops);
if (de == NULL) {
_stp_error("Could not create file \"%s\" in path \"%s\"\n", p, path);
goto err;
}
- _stp_pde[_stp_num_pde++] = de;
- _stp_procfs_files[num] = de;
+#ifdef AUTOCONF_PROCFS_OWNER
+ de->owner = THIS_MODULE;
+#endif
de->uid = _stp_uid;
de->gid = _stp_gid;
+ _stp_pde[_stp_num_pde++] = de;
+ _stp_procfs_files[num] = de;
return 0;
too_many:
diff --git a/tapset-procfs.cxx b/tapset-procfs.cxx
index f5ab95f8..c4eb54f3 100644
--- a/tapset-procfs.cxx
+++ b/tapset-procfs.cxx
@@ -110,8 +110,7 @@ procfs_derived_probe::join_group (systemtap_session& s)
embeddedcode *ec = new embeddedcode;
ec->tok = NULL;
ec->code = string("struct _stp_procfs_data {\n")
- + string(" const char *buffer;\n")
- + string(" off_t off;\n")
+ + string(" char *buffer;\n")
+ string(" unsigned long count;\n")
+ string("};\n");
s.embeds.push_back(ec);
@@ -164,15 +163,10 @@ procfs_derived_probe_group::emit_module_decls (systemtap_session& s)
s.op->newline() << "/* ---- procfs probes ---- */";
s.op->newline() << "#include \"procfs.c\"";
+ s.op->newline() << "#include \"procfs-probes.c\"";
// Emit the procfs probe data list
- s.op->newline() << "static struct stap_procfs_probe {";
- s.op->newline(1)<< "const char *path;";
- s.op->newline() << "const char *read_pp;";
- s.op->newline() << "void (*read_ph) (struct context*);";
- s.op->newline() << "const char *write_pp;";
- s.op->newline() << "void (*write_ph) (struct context*);";
- s.op->newline(-1) << "} stap_procfs_probes[] = {";
+ s.op->newline() << "static struct stap_procfs_probe stap_procfs_probes[] = {";
s.op->indent(1);
for (p_b_p_iterator it = probes_by_path.begin(); it != probes_by_path.end();
@@ -212,21 +206,20 @@ procfs_derived_probe_group::emit_module_decls (systemtap_session& s)
}
s.op->newline(-1) << "};";
+ // Output routine to fill in the buffer with our data. Note that we
+ // need to do this even in the case where we have no read probes,
+ // but we can skip most of it then.
+ s.op->newline();
+
+ s.op->newline() << "static int _stp_proc_fill_read_buffer(struct stap_procfs_probe *spp) {";
+ s.op->indent(1);
if (has_read_probes)
{
- // Output routine to fill in 'page' with our data.
- s.op->newline();
-
- s.op->newline() << "static int _stp_procfs_read(char *page, char **start, off_t off, int count, int *eof, void *data) {";
-
- s.op->newline(1) << "struct stap_procfs_probe *spp = (struct stap_procfs_probe *)data;";
s.op->newline() << "struct _stp_procfs_data pdata;";
common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING", "spp->read_pp");
- s.op->newline() << "pdata.buffer = page;";
- s.op->newline() << "pdata.off = off;";
- s.op->newline() << "pdata.count = count;";
+ s.op->newline() << "pdata.buffer = spp->buffer;";
s.op->newline() << "if (c->data == NULL)";
s.op->newline(1) << "c->data = &pdata;";
s.op->newline(-1) << "else {";
@@ -242,29 +235,37 @@ procfs_derived_probe_group::emit_module_decls (systemtap_session& s)
// call probe function
s.op->newline() << "(*spp->read_ph) (c);";
- // Note that _procfs_value_set copied string data into 'page'
+ // Note that _procfs_value_set copied string data into spp->buffer
s.op->newline() << "c->data = NULL;";
+ s.op->newline() << "spp->needs_fill = 0;";
+ s.op->newline() << "spp->count = strlen(spp->buffer);";
+
common_probe_entryfn_epilogue (s.op);
- s.op->newline() << "if (pdata.count == 0)";
- s.op->newline(1) << "*eof = 1;";
- s.op->indent(-1);
- s.op->newline() << "return pdata.count;";
+ s.op->newline() << "if (spp->needs_fill) {";
+ s.op->newline(1) << "spp->needs_fill = 0;";
+ s.op->newline() << "return -EIO;";
s.op->newline(-1) << "}";
}
+ s.op->newline() << "return 0;";
+ s.op->newline(-1) << "}";
+
+ // Output routine to read data. Note that we need to do this even
+ // in the case where we have no write probes, but we can skip most
+ // of it then.
+ s.op->newline() << "static int _stp_process_write_buffer(struct stap_procfs_probe *spp, const char __user *buf, size_t count) {";
+ s.op->indent(1);
+ s.op->newline() << "int retval = 0;";
if (has_write_probes)
{
- s.op->newline() << "static int _stp_procfs_write(struct file *file, const char *buffer, unsigned long count, void *data) {";
-
- s.op->newline(1) << "struct stap_procfs_probe *spp = (struct stap_procfs_probe *)data;";
s.op->newline() << "struct _stp_procfs_data pdata;";
common_probe_entryfn_prologue (s.op, "STAP_SESSION_RUNNING", "spp->write_pp");
- s.op->newline() << "if (count > (MAXSTRINGLEN - 1))";
+ s.op->newline() << "if (count >= MAXSTRINGLEN)";
s.op->newline(1) << "count = MAXSTRINGLEN - 1;";
s.op->indent(-1);
- s.op->newline() << "pdata.buffer = buffer;";
+ s.op->newline() << "pdata.buffer = (char *)buf;";
s.op->newline() << "pdata.count = count;";
s.op->newline() << "if (c->data == NULL)";
@@ -283,11 +284,14 @@ procfs_derived_probe_group::emit_module_decls (systemtap_session& s)
s.op->newline() << "(*spp->write_ph) (c);";
s.op->newline() << "c->data = NULL;";
- common_probe_entryfn_epilogue (s.op);
-
- s.op->newline() << "return count;";
+ s.op->newline() << "if (c->last_error == 0) {";
+ s.op->newline(1) << "retval = count;";
s.op->newline(-1) << "}";
+
+ common_probe_entryfn_epilogue (s.op);
}
+ s.op->newline() << "return retval;";
+ s.op->newline(-1) << "}";
}
@@ -304,36 +308,21 @@ procfs_derived_probe_group::emit_module_init (systemtap_session& s)
s.op->newline(1) << "probe_point = spp->read_pp;";
s.op->newline(-1) << "else";
s.op->newline(1) << "probe_point = spp->write_pp;";
+ s.op->indent(-1);
- s.op->newline(-1) << "rc = _stp_create_procfs(spp->path, i);";
+ s.op->newline() << "_spp_lock_init(spp);";
+ s.op->newline() << "rc = _stp_create_procfs(spp->path, i, &_stp_proc_fops);";
s.op->newline() << "if (rc) {";
s.op->newline(1) << "_stp_close_procfs();";
+
+ s.op->newline() << "for (i = 0; i < " << probes_by_path.size() << "; i++) {";
+ s.op->newline(1) << "spp = &stap_procfs_probes[i];";
+ s.op->newline() << "_spp_lock_shutdown(spp);";
+ s.op->newline(-1) << "}";
s.op->newline() << "break;";
s.op->newline(-1) << "}";
- if (has_read_probes)
- {
- s.op->newline() << "if (spp->read_pp)";
- s.op->newline(1) << "_stp_procfs_files[i]->read_proc = &_stp_procfs_read;";
- s.op->newline(-1) << "else";
- s.op->newline(1) << "_stp_procfs_files[i]->read_proc = NULL;";
- s.op->indent(-1);
- }
- else
- s.op->newline() << "_stp_procfs_files[i]->read_proc = NULL;";
-
- if (has_write_probes)
- {
- s.op->newline() << "if (spp->write_pp)";
- s.op->newline(1) << "_stp_procfs_files[i]->write_proc = &_stp_procfs_write;";
- s.op->newline(-1) << "else";
- s.op->newline(1) << "_stp_procfs_files[i]->write_proc = NULL;";
- s.op->indent(-1);
- }
- else
- s.op->newline() << "_stp_procfs_files[i]->write_proc = NULL;";
-
s.op->newline() << "_stp_procfs_files[i]->data = spp;";
s.op->newline(-1) << "}"; // for loop
}
@@ -346,6 +335,10 @@ procfs_derived_probe_group::emit_module_exit (systemtap_session& s)
return;
s.op->newline() << "_stp_close_procfs();";
+ s.op->newline() << "for (i = 0; i < " << probes_by_path.size() << "; i++) {";
+ s.op->newline(1) << "struct stap_procfs_probe *spp = &stap_procfs_probes[i];";
+ s.op->newline() << "_spp_lock_shutdown(spp);";
+ s.op->newline(-1) << "}";
}
@@ -378,8 +371,7 @@ procfs_var_expanding_visitor::visit_target_symbol (target_symbol* e)
embeddedcode *ec = new embeddedcode;
ec->tok = e->tok;
- string fname = (string(lvalue ? "_procfs_value_set" : "_procfs_value_get")
- + "_" + lex_cast(tick++));
+ string fname = (string(lvalue ? "_procfs_value_set" : "_procfs_value_get"));
string locvalue = "CONTEXT->data";
if (! lvalue)
@@ -391,14 +383,8 @@ procfs_var_expanding_visitor::visit_target_symbol (target_symbol* e)
ec->code = string("int bytes = 0;\n")
+ string(" struct _stp_procfs_data *data = (struct _stp_procfs_data *)(") + locvalue + string(");\n")
+ string(" bytes = strnlen(THIS->value, MAXSTRINGLEN - 1);\n")
- + string(" if (data->off >= bytes)\n")
- + string(" bytes = 0;\n")
- + string(" else {\n")
- + string(" bytes -= data->off;\n")
- + string(" if (bytes > data->count)\n")
- + string(" bytes = data->count;\n")
- + string(" memcpy((void *)data->buffer, THIS->value + data->off, bytes);\n")
- + string(" }\n")
+ + string(" memcpy((void *)data->buffer, THIS->value, bytes);\n")
+ + string(" data->buffer[bytes] = '\\0';\n")
+ string(" data->count = bytes;\n");
fdecl->name = fname;