Merge branch 'master' of ssh://sources.redhat.com/git/systemtap

author: Dave Brolley <brolley@redhat.com> 2009-07-10 11:10:51 -0400
committer: Dave Brolley <brolley@redhat.com> 2009-07-10 11:10:51 -0400
commit: 1e9ab8199dff90c1b6e7290f0f7b4eb424a9ff9f (patch)
tree: a0c10b78ad3e7142a59ffd6f6e75b75abf90d88e
parent: 7d54db1a2c0b3831b6fbc8282f1155426c4be540 (diff)
parent: c728b7da8be430367aa33f9fbacda93d4add9ea2 (diff)
download: systemtap-steved-1e9ab8199dff90c1b6e7290f0f7b4eb424a9ff9f.tar.gz
systemtap-steved-1e9ab8199dff90c1b6e7290f0f7b4eb424a9ff9f.tar.xz
systemtap-steved-1e9ab8199dff90c1b6e7290f0f7b4eb424a9ff9f.zip
14 files changed, 338 insertions, 188 deletions
diff --git a/buildrun.cxx b/buildrun.cxx
index 54aa5d4f..ccf1ca15 100644
--- a/buildrun.cxx
+++ b/buildrun.cxx
@@ -171,6 +171,7 @@ compile_pass (systemtap_session& s)
                   "STAPCONF_KERNEL_STACKTRACE", NULL);
   output_autoconf(s, o, "autoconf-asm-syscall.c",
 		  "STAPCONF_ASM_SYSCALL_H", NULL);
+  output_autoconf(s, o, "autoconf-ring_buffer-flags.c", "STAPCONF_RING_BUFFER_FLAGS", NULL);
 
   o << module_cflags << " += -include $(STAPCONF_HEADER)" << endl;
 
diff --git a/coveragedb.h b/coveragedb.h
index 3675e3b4..f0f071c4 100644
--- a/coveragedb.h
+++ b/coveragedb.h
@@ -10,6 +10,7 @@
 #define COVERAGEDB_H
 
 #include "session.h"
+#include "staptree.h"
 
 #include <string>
 
@@ -62,12 +63,12 @@ public:
   int compiled;
   int executed;
 
-  coverage_element() { line = 0; col = 0;
-	  compiled = 0; executed = 0; }
+  coverage_element():
+    line(0), col(0), compiled(0), executed(0) {}
 
-  coverage_element(source_loc &place) {
-	  file = place.file; line = place.line; col = place.column;
-	  compiled = 0; executed = 0; }
+  coverage_element(source_loc &place):
+    file(place.file->name), line(place.line), col(place.column),
+    compiled(0), executed(0) {}
 };
 
 
diff --git a/elaborate.cxx b/elaborate.cxx
index 30e9a775..fafc5e63 100644
--- a/elaborate.cxx
+++ b/elaborate.cxx
@@ -1491,9 +1491,9 @@ systemtap_session::print_token (ostream& o, const token* tok)
       tmpo << *tok;
       string ts = tmpo.str();
       // search & replace the file name with nothing
-      size_t idx = ts.find (tok->location.file);
+      size_t idx = ts.find (tok->location.file->name);
       if (idx != string::npos)
-          ts.replace (idx, tok->location.file.size(), "");
+          ts.replace (idx, tok->location.file->name.size(), "");
 
       o << ts;
     }
@@ -1560,16 +1560,16 @@ systemtap_session::print_error_source (std::ostream& message,
                                        std::string& align, const token* tok)
 {
   unsigned i = 0;
-  unsigned line = tok->location.line;
-  unsigned col = tok->location.column;
-  string file_contents;
 
   assert (tok);
-  if (tok->location.stap_file)
-    file_contents = tok->location.stap_file->file_contents;
-  else
+  if (!tok->location.file)
     //No source to print, silently exit
     return;
+
+  unsigned line = tok->location.line;
+  unsigned col = tok->location.column;
+  const string &file_contents = tok->location.file->file_contents;
+
   size_t start_pos = 0, end_pos = 0;
   //Navigate to the appropriate line
   while (i != line && end_pos != std::string::npos)
@@ -1937,7 +1937,7 @@ void semantic_pass_opt1 (systemtap_session& s, bool& relaxed_p)
       functiondecl* fd = it->second;
       if (ftv.traversed.find(fd) == ftv.traversed.end())
         {
-          if (fd->tok->location.file == s.user_file->name && // !tapset
+          if (fd->tok->location.file->name == s.user_file->name && // !tapset
               ! s.suppress_warnings)
 	    s.print_warning ("eliding unused function '" + fd->name + "'", fd->tok);
           else if (s.verbose>2)
@@ -1993,7 +1993,7 @@ void semantic_pass_opt2 (systemtap_session& s, bool& relaxed_p, unsigned iterati
         if (vut.read.find (l) == vut.read.end() &&
             vut.written.find (l) == vut.written.end())
           {
-            if (l->tok->location.file == s.user_file->name && // !tapset
+            if (l->tok->location.file->name == s.user_file->name && // !tapset
                 ! s.suppress_warnings)
 	      s.print_warning ("eliding unused variable '" + l->name + "'", l->tok);
             else if (s.verbose>2)
@@ -2037,7 +2037,7 @@ void semantic_pass_opt2 (systemtap_session& s, bool& relaxed_p, unsigned iterati
           if (vut.read.find (l) == vut.read.end() &&
               vut.written.find (l) == vut.written.end())
             {
-              if (l->tok->location.file == s.user_file->name && // !tapset
+              if (l->tok->location.file->name == s.user_file->name && // !tapset
                   ! s.suppress_warnings)
                 s.print_warning ("eliding unused variable '" + l->name + "'", l->tok);
               else if (s.verbose>2)
@@ -2083,7 +2083,7 @@ void semantic_pass_opt2 (systemtap_session& s, bool& relaxed_p, unsigned iterati
       if (vut.read.find (l) == vut.read.end() &&
           vut.written.find (l) == vut.written.end())
         {
-          if (l->tok->location.file == s.user_file->name && // !tapset
+          if (l->tok->location.file->name == s.user_file->name && // !tapset
               ! s.suppress_warnings)
             s.print_warning ("eliding unused variable '" + l->name + "'", l->tok);
           else if (s.verbose>2)
diff --git a/includes/sys/sdt.h b/includes/sys/sdt.h
index e448c90e..3b788b88 100644
--- a/includes/sys/sdt.h
+++ b/includes/sys/sdt.h
@@ -17,9 +17,17 @@
 #define STAP_PROBE_ADDR "\t.long "
 #endif
 
+/* Allocated section needs to be writable when creating pic shared objects
+   because we store relocatable addresses in them. */
+#ifdef __PIC__
+#define ALLOCSEC "\"aw\""
+#else
+#define ALLOCSEC "\"a\""
+#endif
+
 /* An allocated section .probes that holds the probe names and addrs. */
 #define STAP_PROBE_DATA_(probe,guard,arg)	\
-  __asm__ volatile (".section .probes, \"a\"\n" \
+  __asm__ volatile (".section .probes," ALLOCSEC "\n" \
 		    "\t.align 8\n"		\
 		    "1:\n\t.asciz " #probe "\n" \
 		    "\t.align 4\n"		\
diff --git a/parse.cxx b/parse.cxx
index a26d594c..cfa33cb4 100644
--- a/parse.cxx
+++ b/parse.cxx
@@ -583,47 +583,68 @@ parser::peek_kw (std::string const & kw)
 
 
 
-lexer::lexer (istream& i, const string& in, systemtap_session& s):
-  input (i), input_name (in), input_contents (""),
-  input_pointer (0), cursor_suspend_count(0),
-  cursor_line (1), cursor_column (1), session(s),
-  current_file (0)
+lexer::lexer (istream& input, const string& in, systemtap_session& s):
+  input_name (in), input_pointer (0), input_end (0),
+  cursor_suspend_count(0), cursor_line (1), cursor_column (1),
+  session(s), current_file (0)
 {
-  char c;
-  while(input.get(c))
-    input_contents.push_back(c);
-}
+  getline(input, input_contents, '\0');
 
-std::string
-lexer::get_input_contents ()
-{
-  return input_contents;
+  input_pointer = input_contents.data();
+  input_end = input_contents.data() + input_contents.size();
+
+  if (keywords.empty())
+    {
+      keywords.insert("probe");
+      keywords.insert("global");
+      keywords.insert("function");
+      keywords.insert("if");
+      keywords.insert("else");
+      keywords.insert("for");
+      keywords.insert("foreach");
+      keywords.insert("in");
+      keywords.insert("limit");
+      keywords.insert("return");
+      keywords.insert("delete");
+      keywords.insert("while");
+      keywords.insert("break");
+      keywords.insert("continue");
+      keywords.insert("next");
+      keywords.insert("string");
+      keywords.insert("long");
+    }
 }
 
+set<string> lexer::keywords;
+
 void
 lexer::set_current_file (stapfile* f)
 {
   current_file = f;
+  if (f)
+    {
+      f->file_contents = input_contents;
+      f->name = input_name;
+    }
 }
 
 int
 lexer::input_peek (unsigned n)
 {
-  if (input_contents.size() > (input_pointer + n))
-    return (int)(unsigned char)input_contents[input_pointer+n];
-  else
-    return -1;
+  if (input_pointer + n >= input_end)
+    return -1; // EOF
+  return (unsigned char)*(input_pointer + n);
 }
 
 
 int
 lexer::input_get ()
 {
-  int c = input_peek (0);
-  input_pointer ++;
-
+  int c = input_peek();
   if (c < 0) return c; // EOF
 
+  ++input_pointer;
+
   if (cursor_suspend_count)
     // Track effect of input_put: preserve previous cursor/line_column
     // until all of its characters are consumed.
@@ -648,9 +669,12 @@ lexer::input_get ()
 void
 lexer::input_put (const string& chars)
 {
-  // clog << "[put:" << chars << " @" << input_pointer << "]";
-  input_contents.insert (input_contents.begin() + input_pointer, chars.begin(), chars.end());
+  size_t pos = input_pointer - input_contents.data();
+  // clog << "[put:" << chars << " @" << pos << "]";
+  input_contents.insert (pos, chars);
   cursor_suspend_count += chars.size();
+  input_pointer = input_contents.data() + pos;
+  input_end = input_contents.data() + input_contents.size();
 }
 
 
@@ -658,9 +682,7 @@ token*
 lexer::scan (bool wildcard)
 {
   token* n = new token;
-  n->location.file = input_name;
-  if (current_file)
-    n->location.stap_file = current_file;
+  n->location.file = current_file;
 
   unsigned semiskipped_p = 0;
 
@@ -676,7 +698,6 @@ lexer::scan (bool wildcard)
     }
 
   int c = input_get();
-  int c2 = input_peek ();
   // clog << "{" << (char)c << (char)c2 << "}";
   if (c < 0)
     {
@@ -687,6 +708,8 @@ lexer::scan (bool wildcard)
   if (isspace (c))
     goto skip;
 
+  int c2 = input_peek ();
+
   // Paste command line arguments as character streams into
   // the beginning of a token.  $1..$999 go through as raw
   // characters; @1..@999 are quoted/escaped as strings.
@@ -740,23 +763,7 @@ lexer::scan (bool wildcard)
           c2 = input_peek ();
         }
 
-      if (n->content    == "probe"
-          || n->content == "global"
-          || n->content == "function"
-          || n->content == "if"
-          || n->content == "else"
-          || n->content == "for"
-          || n->content == "foreach"
-          || n->content == "in"
-          || n->content == "limit"
-          || n->content == "return"
-          || n->content == "delete"
-          || n->content == "while"
-          || n->content == "break"
-          || n->content == "continue"
-          || n->content == "next"
-          || n->content == "string"
-          || n->content == "long")
+      if (keywords.count(n->content))
         n->type = tok_keyword;
 
       return n;
@@ -767,23 +774,15 @@ lexer::scan (bool wildcard)
       n->type = tok_number;
       n->content = (char) c;
 
-      while (1)
+      while (isalnum (c2))
 	{
-	  int c2 = input_peek ();
-	  if (c2 < 0)
-	    break;
-
           // NB: isalnum is very permissive.  We rely on strtol, called in
           // parser::parse_literal below, to confirm that the number string
           // is correctly formatted and in range.
 
-	  if (isalnum (c2))
-	    {
-	      n->content.push_back (c2);
-	      input_get ();
-	    }
-	  else
-	    break;
+          input_get ();
+          n->content.push_back (c2);
+          c2 = input_peek ();
 	}
       return n;
     }
@@ -835,25 +834,21 @@ lexer::scan (bool wildcard)
 
   else if (ispunct (c))
     {
-      int c2 = input_peek ();
       int c3 = input_peek (1);
-      string s1 = string("") + (char) c;
-      string s2 = (c2 > 0 ? s1 + (char) c2 : s1);
-      string s3 = (c3 > 0 ? s2 + (char) c3 : s2);
 
       // NB: if we were to recognize negative numeric literals here,
       // we'd introduce another grammar ambiguity:
       // 1-1 would be parsed as tok_number(1) and tok_number(-1)
       // instead of tok_number(1) tok_operator('-') tok_number(1)
 
-      if (s1 == "#") // shell comment
+      if (c == '#') // shell comment
         {
           unsigned this_line = cursor_line;
           do { c = input_get (); }
           while (c >= 0 && cursor_line == this_line);
           goto skip;
         }
-      else if (s2 == "//") // C++ comment
+      else if ((c == '/' && c2 == '/')) // C++ comment
         {
           unsigned this_line = cursor_line;
           do { c = input_get (); }
@@ -862,15 +857,15 @@ lexer::scan (bool wildcard)
         }
       else if (c == '/' && c2 == '*') // C comment
 	{
+          (void) input_get (); // swallow '*' already in c2
+          c = input_get ();
           c2 = input_get ();
-          unsigned chars = 0;
           while (c2 >= 0)
             {
-              chars ++; // track this to prevent "/*/" from being accepted
+              if (c == '*' && c2 == '/')
+                break;
               c = c2;
               c2 = input_get ();
-              if (chars > 1 && c == '*' && c2 == '/')
-                break;
             }
           goto skip;
 	}
@@ -878,73 +873,63 @@ lexer::scan (bool wildcard)
         {
           n->type = tok_embedded;
           (void) input_get (); // swallow '{' already in c2
-          while (true)
+          c = input_get ();
+          c2 = input_get ();
+          while (c2 >= 0)
             {
-              c = input_get ();
-              if (c < 0) // EOF
-                {
-                  n->type = tok_junk;
-                  break;
-                }
-              if (c == '%')
-                {
-                  c2 = input_peek ();
-                  if (c2 == '}')
-                    {
-                      (void) input_get (); // swallow '}' too
-                      break;
-                    }
-                }
+              if (c == '%' && c2 == '}')
+                return n;
               n->content += c;
+              c = c2;
+              c2 = input_get ();
             }
+          n->type = tok_junk;
           return n;
         }
 
       // We're committed to recognizing at least the first character
       // as an operator.
       n->type = tok_operator;
+      n->content = c;
 
       // match all valid operators, in decreasing size order
-      if (s3 == "<<<" ||
-          s3 == "<<=" ||
-          s3 == ">>=")
+      if ((c == '<' && c2 == '<' && c3 == '<') ||
+          (c == '<' && c2 == '<' && c3 == '=') ||
+          (c == '>' && c2 == '>' && c3 == '='))
         {
-          n->content = s3;
+          n->content += c2;
+          n->content += c3;
           input_get (); input_get (); // swallow other two characters
         }
-      else if (s2 == "==" ||
-               s2 == "!=" ||
-               s2 == "<=" ||
-               s2 == ">=" ||
-               s2 == "+=" ||
-               s2 == "-=" ||
-               s2 == "*=" ||
-               s2 == "/=" ||
-               s2 == "%=" ||
-               s2 == "&=" ||
-               s2 == "^=" ||
-               s2 == "|=" ||
-               s2 == ".=" ||
-               s2 == "&&" ||
-               s2 == "||" ||
-               s2 == "++" ||
-               s2 == "--" ||
-               s2 == "->" ||
-               s2 == "<<" ||
-               s2 == ">>" ||
+      else if ((c == '=' && c2 == '=') ||
+               (c == '!' && c2 == '=') ||
+               (c == '<' && c2 == '=') ||
+               (c == '>' && c2 == '=') ||
+               (c == '+' && c2 == '=') ||
+               (c == '-' && c2 == '=') ||
+               (c == '*' && c2 == '=') ||
+               (c == '/' && c2 == '=') ||
+               (c == '%' && c2 == '=') ||
+               (c == '&' && c2 == '=') ||
+               (c == '^' && c2 == '=') ||
+               (c == '|' && c2 == '=') ||
+               (c == '.' && c2 == '=') ||
+               (c == '&' && c2 == '&') ||
+               (c == '|' && c2 == '|') ||
+               (c == '+' && c2 == '+') ||
+               (c == '-' && c2 == '-') ||
+               (c == '-' && c2 == '>') ||
+               (c == '<' && c2 == '<') ||
+               (c == '>' && c2 == '>') ||
                // preprocessor tokens
-               s2 == "%(" ||
-               s2 == "%?" ||
-               s2 == "%:" ||
-               s2 == "%)")
+               (c == '%' && c2 == '(') ||
+               (c == '%' && c2 == '?') ||
+               (c == '%' && c2 == ':') ||
+               (c == '%' && c2 == ')'))
         {
-          n->content = s2;
+          n->content += c2;
           input_get (); // swallow other character
         }
-      else
-        {
-          n->content = s1;
-        }
 
       return n;
     }
@@ -965,8 +950,6 @@ parser::parse ()
 {
   stapfile* f = new stapfile;
   input.set_current_file (f);
-  f->file_contents = input.get_input_contents ();
-  f->name = input_name;
 
   bool empty = true;
 
@@ -1034,18 +1017,16 @@ parser::parse ()
     {
       cerr << "Input file '" << input_name << "' is empty or missing." << endl;
       delete f;
-      input.set_current_file (0);
-      return 0;
+      f = 0;
     }
   else if (num_errors > 0)
     {
       cerr << num_errors << " parse error(s)." << endl;
       delete f;
-      input.set_current_file (0);
-      return 0;
+      f = 0;
     }
 
-  input.set_current_file (0);
+  input.set_current_file(0);
   return f;
 }
 
diff --git a/parse.h b/parse.h
index 59046bf3..cae49b65 100644
--- a/parse.h
+++ b/parse.h
@@ -15,6 +15,7 @@
 #include <fstream>
 #include <iostream>
 #include <vector>
+#include <set>
 #include <stdexcept>
 #include <stdint.h>
 
@@ -22,10 +23,9 @@ struct stapfile;
 
 struct source_loc
 {
-  std::string file;
+  stapfile* file;
   unsigned line;
   unsigned column;
-  stapfile* stap_file;
 };
 
 std::ostream& operator << (std::ostream& o, const source_loc& loc);
@@ -74,23 +74,22 @@ class lexer
 public:
   token* scan (bool wildcard=false);
   lexer (std::istream&, const std::string&, systemtap_session&);
-  std::string get_input_contents ();
   void set_current_file (stapfile* f);
 
 private:
-  int input_get ();
-  void input_put (int);
+  inline int input_get ();
+  inline int input_peek (unsigned n=0);
   void input_put (const std::string&);
-  int input_peek (unsigned n=0);
-  std::istream& input;
   std::string input_name;
   std::string input_contents;
-  int input_pointer; // index into input_contents
+  const char *input_pointer; // index into input_contents
+  const char *input_end;
   unsigned cursor_suspend_count;
   unsigned cursor_line;
   unsigned cursor_column;
   systemtap_session& session;
   stapfile* current_file;
+  static std::set<std::string> keywords;
 };
 
 struct probe;
diff --git a/runtime/autoconf-ring_buffer-flags.c b/runtime/autoconf-ring_buffer-flags.c
new file mode 100644
index 00000000..7d7b8df0
--- /dev/null
+++ b/runtime/autoconf-ring_buffer-flags.c
@@ -0,0 +1,6 @@
+#include <linux/ring_buffer.h>
+
+void ___autoconf_func(void)
+{
+    (void)ring_buffer_lock_reserve(NULL, 0, 0);
+}
diff --git a/runtime/transport/ring_buffer.c b/runtime/transport/ring_buffer.c
index 0b73d4b4..fe63bc83 100644
--- a/runtime/transport/ring_buffer.c
+++ b/runtime/transport/ring_buffer.c
@@ -4,6 +4,11 @@
 #include <linux/poll.h>
 #include <linux/cpumask.h>
 
+#ifndef STP_RELAY_TIMER_INTERVAL
+/* Wakeup timer interval in jiffies (default 10 ms) */
+#define STP_RELAY_TIMER_INTERVAL		((HZ + 99) / 100)
+#endif
+
 struct _stp_data_entry {
 	size_t			len;
 	unsigned char		buf[];
@@ -23,6 +28,8 @@ struct _stp_relay_data_type {
 	struct ring_buffer *rb;
 	struct _stp_ring_buffer_data rb_data;
 	cpumask_var_t trace_reader_cpumask;
+	struct timer_list timer;
+	int overwrite_flag;
 };
 static struct _stp_relay_data_type _stp_relay_data;
 
@@ -134,48 +141,33 @@ _stp_event_to_user(struct ring_buffer_event *event, char __user *ubuf,
 	return cnt;
 }
 
-static ssize_t tracing_wait_pipe(struct file *filp)
+static ssize_t _stp_tracing_wait_pipe(struct file *filp)
 {
-	while (ring_buffer_empty(_stp_relay_data.rb)) {
-
+	if (ring_buffer_empty(_stp_relay_data.rb)) {
 		if ((filp->f_flags & O_NONBLOCK)) {
 			dbug_trans(1, "returning -EAGAIN\n");
 			return -EAGAIN;
 		}
 
-		/*
-		 * This is a make-shift waitqueue. The reason we don't use
-		 * an actual wait queue is because:
-		 *  1) we only ever have one waiter
-		 *  2) the tracing, traces all functions, we don't want
-		 *     the overhead of calling wake_up and friends
-		 *     (and tracing them too)
-		 *     Anyway, this is really very primitive wakeup.
-		 */
-		set_current_state(TASK_INTERRUPTIBLE);
-
-		/* sleep for 100 msecs, and try again. */
-		schedule_timeout(HZ/10);
-
 		if (signal_pending(current)) {
 			dbug_trans(1, "returning -EINTR\n");
 			return -EINTR;
 		}
+		dbug_trans(1, "returning 0\n");
+		return 0;
 	}
 
 	dbug_trans(1, "returning 1\n");
 	return 1;
 }
 
-static struct ring_buffer_event *
-peek_next_event(int cpu, u64 *ts)
+static struct ring_buffer_event *_stp_peek_next_event(int cpu, u64 *ts)
 {
 	return ring_buffer_peek(_stp_relay_data.rb, cpu, ts);
 }
 
 /* Find the next real event */
-static struct ring_buffer_event *
-_stp_find_next_event(long cpu_file)
+static struct ring_buffer_event *_stp_find_next_event(long cpu_file)
 {
 	struct ring_buffer_event *event;
 
@@ -186,7 +178,7 @@ _stp_find_next_event(long cpu_file)
 	 */
 	if (ring_buffer_empty_cpu(_stp_relay_data.rb, (int)cpu_file))
 		return NULL;
-	event = peek_next_event(cpu_file, &_stp_relay_data.rb_data.ts);
+	event = _stp_peek_next_event(cpu_file, &_stp_relay_data.rb_data.ts);
 	_stp_relay_data.rb_data.cpu = cpu_file;
 
 	return event;
@@ -201,7 +193,7 @@ _stp_find_next_event(long cpu_file)
 		if (ring_buffer_empty_cpu(_stp_relay_data.rb, cpu))
 			continue;
 
-		event = peek_next_event(cpu, &ts);
+		event = _stp_peek_next_event(cpu, &ts);
 
 		/*
 		 * Pick the event with the smallest timestamp:
@@ -234,8 +226,8 @@ _stp_data_read_trace(struct file *filp, char __user *ubuf,
 
 	dbug_trans(1, "%lu\n", (unsigned long)cnt);
 
-	sret = tracing_wait_pipe(filp);
-	dbug_trans(1, "tracing_wait_pipe returned %ld\n", sret);
+	sret = _stp_tracing_wait_pipe(filp);
+	dbug_trans(1, "_stp_tracing_wait_pipe returned %ld\n", sret);
 	if (sret <= 0)
 		goto out;
 
@@ -291,9 +283,6 @@ static struct file_operations __stp_data_fops = {
 	.release	= _stp_data_release_trace,
 	.poll		= _stp_data_poll_trace,
 	.read		= _stp_data_read_trace,
-#if 0
-	.splice_read	= tracing_splice_read_pipe,
-#endif
 };
 
 /*
@@ -331,13 +320,56 @@ _stp_data_write_reserve(size_t size_request, void **entry)
 		size_request = __STP_MAX_RESERVE_SIZE;
 	}
 
+#ifdef STAPCONF_RING_BUFFER_FLAGS
+	event = ring_buffer_lock_reserve(_stp_relay_data.rb,
+					 (sizeof(struct _stp_data_entry)
+					  + size_request), 0);
+#else
 	event = ring_buffer_lock_reserve(_stp_relay_data.rb,
-					 sizeof(struct _stp_data_entry) + size_request,
-					 0);
+					 (sizeof(struct _stp_data_entry)
+					  + size_request));
+#endif
 	if (unlikely(! event)) {
-		dbug_trans(1, "event = NULL (%p)?\n", event);
-		entry = NULL;
-		return 0;
+		int cpu;
+
+		dbug_trans(0, "event = NULL (%p)?\n", event);
+		if (! _stp_relay_data.overwrite_flag) {
+			entry = NULL;
+			return 0;
+		}
+
+		/* If we're in overwrite mode and all the buffers are
+		 * full, take a event out of the buffer and consume it
+		 * (throw it away).  This should make room for the new
+		 * data. */
+		cpu = raw_smp_processor_id();
+		event = _stp_find_next_event(cpu);
+		if (event) {
+			ssize_t len;
+
+			sde = (struct _stp_data_entry *)ring_buffer_event_data(event);
+			if (sde->len < size_request)
+				size_request = sde->len;
+			ring_buffer_consume(_stp_relay_data.rb, cpu,
+					    &_stp_relay_data.rb_data.ts);
+			_stp_relay_data.rb_data.cpu = cpu;
+
+			/* Try to reserve again. */
+#ifdef STAPCONF_RING_BUFFER_FLAGS
+			event = ring_buffer_lock_reserve(_stp_relay_data.rb,
+							 sizeof(struct _stp_data_entry) + size_request,
+							 0);
+#else
+			event = ring_buffer_lock_reserve(_stp_relay_data.rb,
+							 sizeof(struct _stp_data_entry) + size_request);
+#endif
+			dbug_trans(0, "overwritten event = 0x%p\n", event);
+		}
+
+		if (unlikely(! event)) {
+			entry = NULL;
+			return 0;
+		}
 	}
 
 	sde = (struct _stp_data_entry *)ring_buffer_event_data(event);
@@ -361,7 +393,6 @@ static unsigned char *_stp_data_entry_data(void *entry)
 
 static int _stp_data_write_commit(void *entry)
 {
-	int ret;
 	struct ring_buffer_event *event = (struct ring_buffer_event *)entry;
 
 	if (unlikely(! entry)) {
@@ -369,14 +400,35 @@ static int _stp_data_write_commit(void *entry)
 		return -EINVAL;
 	}
 
-	ret = ring_buffer_unlock_commit(_stp_relay_data.rb, event, 0);
-	dbug_trans(1, "after commit, empty returns %d\n",
-		   ring_buffer_empty(_stp_relay_data.rb));
+#ifdef STAPCONF_RING_BUFFER_FLAGS
+	return ring_buffer_unlock_commit(_stp_relay_data.rb, event, 0);
+#else
+	return ring_buffer_unlock_commit(_stp_relay_data.rb, event);
+#endif
+}
+
+static void __stp_relay_wakeup_timer(unsigned long val)
+{
+	if (waitqueue_active(&_stp_poll_wait)
+	    && ! ring_buffer_empty(_stp_relay_data.rb))
+		wake_up_interruptible(&_stp_poll_wait);
+ 	mod_timer(&_stp_relay_data.timer, jiffies + STP_RELAY_TIMER_INTERVAL);
+}
 
-	wake_up_interruptible(&_stp_poll_wait);
-	return ret;
+static void __stp_relay_timer_start(void)
+{
+	init_timer(&_stp_relay_data.timer);
+	_stp_relay_data.timer.expires = jiffies + STP_RELAY_TIMER_INTERVAL;
+	_stp_relay_data.timer.function = __stp_relay_wakeup_timer;
+	_stp_relay_data.timer.data = 0;
+	add_timer(&_stp_relay_data.timer);
+	smp_mb();
 }
 
+static void __stp_relay_timer_stop(void)
+{
+	del_timer_sync(&_stp_relay_data.timer);
+}
 
 static struct dentry *__stp_entry[NR_CPUS] = { NULL };
 
@@ -422,6 +474,7 @@ static int _stp_transport_data_fs_init(void)
 
 		__stp_entry[cpu]->d_inode->i_uid = _stp_uid;
 		__stp_entry[cpu]->d_inode->i_gid = _stp_gid;
+		__stp_entry[cpu]->d_inode->i_private = (void *)cpu;
 
 #ifndef STP_BULKMODE
 		if (cpu != 0)
@@ -437,6 +490,7 @@ static int _stp_transport_data_fs_init(void)
 static void _stp_transport_data_fs_start(void)
 {
 	if (_stp_relay_data.transport_state == STP_TRANSPORT_INITIALIZED) {
+		__stp_relay_timer_start();
 		_stp_relay_data.transport_state = STP_TRANSPORT_RUNNING;
 	}
 }
@@ -444,6 +498,7 @@ static void _stp_transport_data_fs_start(void)
 static void _stp_transport_data_fs_stop(void)
 {
 	if (_stp_relay_data.transport_state == STP_TRANSPORT_RUNNING) {
+		__stp_relay_timer_stop();
 		_stp_relay_data.transport_state = STP_TRANSPORT_STOPPED;
 	}
 }
@@ -468,5 +523,6 @@ static enum _stp_transport_state _stp_transport_get_state(void)
 
 static void _stp_transport_data_fs_overwrite(int overwrite)
 {
-	/* FIXME: Just a place holder for now. */
+	dbug_trans(0, "setting ovewrite to %d\n", overwrite);
+	_stp_relay_data.overwrite_flag = overwrite;
 }
diff --git a/testsuite/systemtap.examples/index.html b/testsuite/systemtap.examples/index.html
index 5435829f..e186b615 100644
--- a/testsuite/systemtap.examples/index.html
+++ b/testsuite/systemtap.examples/index.html
@@ -94,6 +94,9 @@ keywords: <a href="keyword-index.html#LOCKING">LOCKING</a> <br>
 <li><a href="memory/kmalloc-top">memory/kmalloc-top</a> - Show Paths to Kernel Malloc (kmalloc) Invocations<br>
 keywords: <a href="keyword-index.html#MEMORY">MEMORY</a> <br>
 <p>The kmalloc-top perl program runs a small systemtap script to collect stack traces for each call to the kmalloc function and counts the time that each stack trace is observed. When kmalloc-top exits it prints out sorted list. The output can be be filtered to print only only the first stack traces (-t) stack traces with more a minimum counts (-m), or exclude certain stack traces (-e).</p></li>
+<li><a href="memory/numa_faults.stp">memory/numa_faults.stp</a> - Summarize Process Misses across NUMA Nodes<br>
+keywords: <a href="keyword-index.html#MEMORY">MEMORY</a> <a href="keyword-index.html#NUMA">NUMA</a> <br>
+<p>The numa_faults.stp script tracks the read and write pages faults for each process. When the script exits it prints out the total read and write pages faults for each process. The script also providea a break down of page faults per node for each process. This script is useful for determining whether the program has good locality (page faults limited to a single node) on a NUMA computer.</p></li>
 <li><a href="memory/pfaults.stp">memory/pfaults.stp</a> - Generate Log of Major and Minor Page Faults<br>
 keywords: <a href="keyword-index.html#MEMORY">MEMORY</a> <br>
 <p>The pfaults.stp script generates a simple log for each major and minor page fault that occurs on the system. Each line contains a timestamp (in microseconds) when the page fault servicing was completed, the pid of the process, the address of the page fault, the type of access (read or write), the type of fault (major or minor), and the elapsed time for page fault. This log can be examined to determine where the page faults are occuring.</p></li>
diff --git a/testsuite/systemtap.examples/index.txt b/testsuite/systemtap.examples/index.txt
index 53270b01..35decb82 100644
--- a/testsuite/systemtap.examples/index.txt
+++ b/testsuite/systemtap.examples/index.txt
@@ -175,6 +175,17 @@ keywords: memory
   counts (-m), or exclude certain stack traces (-e).
 
 
+memory/numa_faults.stp - Summarize Process Misses across NUMA Nodes
+keywords: memory numa
+
+  The numa_faults.stp script tracks the read and write pages faults for
+  each process. When the script exits it prints out the total read and
+  write pages faults for each process. The script also providea a break
+  down of page faults per node for each process. This script is useful
+  for determining whether the program has good locality (page faults
+  limited to a single node) on a NUMA computer.
+
+
 memory/pfaults.stp - Generate Log of Major and Minor Page Faults
 keywords: memory
 
diff --git a/testsuite/systemtap.examples/keyword-index.html b/testsuite/systemtap.examples/keyword-index.html
index f3db1429..4de28426 100644
--- a/testsuite/systemtap.examples/keyword-index.html
+++ b/testsuite/systemtap.examples/keyword-index.html
@@ -39,7 +39,7 @@
          	</ul>
 
 <h2>Examples by Keyword</h2>
-<p><tt><a href="#BACKTRACE">BACKTRACE</a> <a href="#BUFFER">BUFFER</a> <a href="#CALLGRAPH">CALLGRAPH</a> <a href="#CPU">CPU</a> <a href="#DISK">DISK</a> <a href="#FORMAT">FORMAT</a> <a href="#FREE">FREE</a> <a href="#FUNCTIONS">FUNCTIONS</a> <a href="#FUTEX">FUTEX</a> <a href="#GRAPH">GRAPH</a> <a href="#INTERRUPT">INTERRUPT</a> <a href="#IO">IO</a> <a href="#LOCKING">LOCKING</a> <a href="#MEMORY">MEMORY</a> <a href="#MONITOR">MONITOR</a> <a href="#NETWORK">NETWORK</a> <a href="#PER-PROCESS">PER-PROCESS</a> <a href="#PROCESS">PROCESS</a> <a href="#PROFILING">PROFILING</a> <a href="#READ">READ</a> <a href="#SCHEDULER">SCHEDULER</a> <a href="#SIGNALS">SIGNALS</a> <a href="#SIMPLE">SIMPLE</a> <a href="#SLEEP">SLEEP</a> <a href="#SOCKET">SOCKET</a> <a href="#SYSCALL">SYSCALL</a> <a href="#TCP">TCP</a> <a href="#TIME">TIME</a> <a href="#TRACE">TRACE</a> <a href="#TRACEPOINT">TRACEPOINT</a> <a href="#TRAFFIC">TRAFFIC</a> <a href="#TTY">TTY</a> <a href="#USE">USE</a> <a href="#WAIT4">WAIT4</a> <a href="#WRITE">WRITE</a> </tt></p>
+<p><tt><a href="#BACKTRACE">BACKTRACE</a> <a href="#BUFFER">BUFFER</a> <a href="#CALLGRAPH">CALLGRAPH</a> <a href="#CPU">CPU</a> <a href="#DISK">DISK</a> <a href="#FORMAT">FORMAT</a> <a href="#FREE">FREE</a> <a href="#FUNCTIONS">FUNCTIONS</a> <a href="#FUTEX">FUTEX</a> <a href="#GRAPH">GRAPH</a> <a href="#INTERRUPT">INTERRUPT</a> <a href="#IO">IO</a> <a href="#LOCKING">LOCKING</a> <a href="#MEMORY">MEMORY</a> <a href="#MONITOR">MONITOR</a> <a href="#NETWORK">NETWORK</a> <a href="#NUMA">NUMA</a> <a href="#PER-PROCESS">PER-PROCESS</a> <a href="#PROCESS">PROCESS</a> <a href="#PROFILING">PROFILING</a> <a href="#READ">READ</a> <a href="#SCHEDULER">SCHEDULER</a> <a href="#SIGNALS">SIGNALS</a> <a href="#SIMPLE">SIMPLE</a> <a href="#SLEEP">SLEEP</a> <a href="#SOCKET">SOCKET</a> <a href="#SYSCALL">SYSCALL</a> <a href="#TCP">TCP</a> <a href="#TIME">TIME</a> <a href="#TRACE">TRACE</a> <a href="#TRACEPOINT">TRACEPOINT</a> <a href="#TRAFFIC">TRAFFIC</a> <a href="#TTY">TTY</a> <a href="#USE">USE</a> <a href="#WAIT4">WAIT4</a> <a href="#WRITE">WRITE</a> </tt></p>
 <h3><a name="BACKTRACE">BACKTRACE</a></h3>
 <ul>
 <li><a href="interrupt/scf.stp">interrupt/scf.stp</a> - Tally Backtraces for Inter-Processor Interrupt (IPI)<br>
@@ -168,6 +168,9 @@ keywords: <a href="keyword-index.html#SYSCALL">SYSCALL</a> <a href="keyword-inde
 <li><a href="memory/kmalloc-top">memory/kmalloc-top</a> - Show Paths to Kernel Malloc (kmalloc) Invocations<br>
 keywords: <a href="keyword-index.html#MEMORY">MEMORY</a> <br>
 <p>The kmalloc-top perl program runs a small systemtap script to collect stack traces for each call to the kmalloc function and counts the time that each stack trace is observed. When kmalloc-top exits it prints out sorted list. The output can be be filtered to print only only the first stack traces (-t) stack traces with more a minimum counts (-m), or exclude certain stack traces (-e).</p></li>
+<li><a href="memory/numa_faults.stp">memory/numa_faults.stp</a> - Summarize Process Misses across NUMA Nodes<br>
+keywords: <a href="keyword-index.html#MEMORY">MEMORY</a> <a href="keyword-index.html#NUMA">NUMA</a> <br>
+<p>The numa_faults.stp script tracks the read and write pages faults for each process. When the script exits it prints out the total read and write pages faults for each process. The script also providea a break down of page faults per node for each process. This script is useful for determining whether the program has good locality (page faults limited to a single node) on a NUMA computer.</p></li>
 <li><a href="memory/pfaults.stp">memory/pfaults.stp</a> - Generate Log of Major and Minor Page Faults<br>
 keywords: <a href="keyword-index.html#MEMORY">MEMORY</a> <br>
 <p>The pfaults.stp script generates a simple log for each major and minor page fault that occurs on the system. Each line contains a timestamp (in microseconds) when the page fault servicing was completed, the pid of the process, the address of the page fault, the type of access (read or write), the type of fault (major or minor), and the elapsed time for page fault. This log can be examined to determine where the page faults are occuring.</p></li>
@@ -202,6 +205,12 @@ keywords: <a href="keyword-index.html#NETWORK">NETWORK</a> <a href="keyword-inde
 keywords: <a href="keyword-index.html#NETWORK">NETWORK</a> <a href="keyword-index.html#TRAFFIC">TRAFFIC</a> <br>
 <p>The tcpdumplike.stp prints out a line for each TCP packet received. Each line includes the source and destination IP addresses, the source and destination ports, and flags.</p></li>
 </ul>
+<h3><a name="NUMA">NUMA</a></h3>
+<ul>
+<li><a href="memory/numa_faults.stp">memory/numa_faults.stp</a> - Summarize Process Misses across NUMA Nodes<br>
+keywords: <a href="keyword-index.html#MEMORY">MEMORY</a> <a href="keyword-index.html#NUMA">NUMA</a> <br>
+<p>The numa_faults.stp script tracks the read and write pages faults for each process. When the script exits it prints out the total read and write pages faults for each process. The script also providea a break down of page faults per node for each process. This script is useful for determining whether the program has good locality (page faults limited to a single node) on a NUMA computer.</p></li>
+</ul>
 <h3><a name="PER-PROCESS">PER-PROCESS</a></h3>
 <ul>
 <li><a href="io/ttyspy.stp">io/ttyspy.stp</a> - Monitor tty typing.<br>
diff --git a/testsuite/systemtap.examples/keyword-index.txt b/testsuite/systemtap.examples/keyword-index.txt
index d3d66fe2..bbee341f 100644
--- a/testsuite/systemtap.examples/keyword-index.txt
+++ b/testsuite/systemtap.examples/keyword-index.txt
@@ -299,6 +299,17 @@ keywords: memory
   counts (-m), or exclude certain stack traces (-e).
 
 
+memory/numa_faults.stp - Summarize Process Misses across NUMA Nodes
+keywords: memory numa
+
+  The numa_faults.stp script tracks the read and write pages faults for
+  each process. When the script exits it prints out the total read and
+  write pages faults for each process. The script also providea a break
+  down of page faults per node for each process. This script is useful
+  for determining whether the program has good locality (page faults
+  limited to a single node) on a NUMA computer.
+
+
 memory/pfaults.stp - Generate Log of Major and Minor Page Faults
 keywords: memory
 
@@ -386,6 +397,19 @@ keywords: network traffic
   source and destination ports, and flags.
 
 
+= NUMA =
+
+memory/numa_faults.stp - Summarize Process Misses across NUMA Nodes
+keywords: memory numa
+
+  The numa_faults.stp script tracks the read and write pages faults for
+  each process. When the script exits it prints out the total read and
+  write pages faults for each process. The script also providea a break
+  down of page faults per node for each process. This script is useful
+  for determining whether the program has good locality (page faults
+  limited to a single node) on a NUMA computer.
+
+
 = PER-PROCESS =
 
 io/ttyspy.stp - Monitor tty typing.
diff --git a/testsuite/systemtap.examples/memory/numa_faults.meta b/testsuite/systemtap.examples/memory/numa_faults.meta
new file mode 100644
index 00000000..34034bef
--- /dev/null
+++ b/testsuite/systemtap.examples/memory/numa_faults.meta
@@ -0,0 +1,13 @@
+title: Summarize Process Misses across NUMA Nodes
+name: numa_faults.stp
+version: 1.0
+author: IBM
+keywords: memory numa
+subsystem: memory
+status: production
+exit: user-controlled
+output: list
+scope: system-wide
+description: The numa_faults.stp script tracks the read and write pages faults for each process. When the script exits it prints out the total read and write pages faults for each process. The script also providea a break down of page faults per node for each process. This script is useful for determining whether the program has good locality (page faults limited to a single node) on a NUMA computer.
+test_check: stap -p4 numa_faults.stp
+test_installcheck: stap numa_faults.stp -c "sleep 1"
diff --git a/testsuite/systemtap.examples/memory/numa_faults.stp b/testsuite/systemtap.examples/memory/numa_faults.stp
new file mode 100755
index 00000000..34a0ace7
--- /dev/null
+++ b/testsuite/systemtap.examples/memory/numa_faults.stp
@@ -0,0 +1,38 @@
+#! /usr/bin/env stap
+
+global execnames, page_faults, node_faults, nodes
+
+probe vm.pagefault {
+  p = pid(); n=addr_to_node(address)
+  execnames[p] = execname()
+  page_faults[p, write_access ? 1 : 0] <<< 1
+  node_faults[p, n] <<< 1
+  nodes[n] <<< 1
+}
+
+function print_pf () {
+  printf ("\n")
+  printf ("%-16s %-6s %10s %10s %-20s\n",
+          "Execname", "PID", "RD Faults", "WR Faults", "Node:Faults")
+  print ("======================= ========== ========== =============\n")
+  foreach (pid in execnames) {
+    printf ("%-16s %6d %10d %10d ", execnames[pid], pid,
+            @count(page_faults[pid,0]), @count(page_faults[pid,1]))
+    foreach ([node+] in nodes) {
+      if ([pid, node] in node_faults)
+        printf ("%d:%d ", node, @count(node_faults[pid, node]))
+    }
+    printf ("\n")
+  }
+  printf("\n")
+}
+
+probe begin {
+ printf("Starting pagefault counters \n")
+}
+
+probe end {
+ printf("Printing counters: \n")
+ print_pf ()
+ printf("Done\n")
+}
author	Dave Brolley <brolley@redhat.com>	2009-07-10 11:10:51 -0400
committer	Dave Brolley <brolley@redhat.com>	2009-07-10 11:10:51 -0400
commit	1e9ab8199dff90c1b6e7290f0f7b4eb424a9ff9f (patch)
tree	a0c10b78ad3e7142a59ffd6f6e75b75abf90d88e
parent	7d54db1a2c0b3831b6fbc8282f1155426c4be540 (diff)
parent	c728b7da8be430367aa33f9fbacda93d4add9ea2 (diff)
download	systemtap-steved-1e9ab8199dff90c1b6e7290f0f7b4eb424a9ff9f.tar.gz systemtap-steved-1e9ab8199dff90c1b6e7290f0f7b4eb424a9ff9f.tar.xz systemtap-steved-1e9ab8199dff90c1b6e7290f0f7b4eb424a9ff9f.zip