33 files changed, 5743 insertions, 552 deletions
diff --git a/ChangeLog b/ChangeLog
index 4326326c..5ae4fc4b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,4 @@
-2008-02-21  Dave Brolley  <brolley@redhat.com>
+2008-02-27  Dave Brolley  <brolley@redhat.com>
 
 	PR5189
 	* staptree.h (print_format::conv_memory): New enumerator.
diff --git a/doc/ChangeLog b/doc/ChangeLog
new file mode 100644
index 00000000..d9e609d2
--- /dev/null
+++ b/doc/ChangeLog
@@ -0,0 +1,7 @@
+2008-02-27  Frank Ch. Eigler  <fche@redhat.com>
+
+	* langref.tex, tutorial.tex: Copied over & aggregated
+	from former comfy digs under /cvs/doc.
+	* tutorial/*: Samples scripts from tutorial.
+	* Makefile.am: New build instructions.
+	* Makefile.in: New generated file.
diff --git a/doc/Makefile.am b/doc/Makefile.am
new file mode 100644
index 00000000..bf80fbd3
--- /dev/null
+++ b/doc/Makefile.am
@@ -0,0 +1,28 @@
+# Makefile.am --- automake input file for systemtap docs
+## process this file with automake to produce Makefile.in
+
+if BUILD_DOCS
+all-local: tutorial.pdf langref.pdf
+
+clean-local:
+	rm -f *.pdf *.out *.log *.aux *.toc *.lot *.idx *.glo
+endif
+
+SUFFIXES = ps pdf dvi ps tex
+
+.ps.pdf:
+	ps2pdf -r600 $<
+
+.dvi.ps:
+	dvips -t letter -o $@ $<
+
+.tex.dvi:
+	pwd=`pwd`; cd $(srcdir); \
+	latex -output-directory=$$pwd $<; \
+	touch $*.glo \
+	makeindex $*.glo -s nomencl.ist -o $*.gls \
+	latex -output-directory=$$pwd $<; \
+	latex -output-directory=$$pwd $<; \
+	latex -output-directory=$$pwd $<
+
+EXTRA_DIST = tutorial.tex langref.tex tutorial
diff --git a/doc/Makefile.in b/doc/Makefile.in
new file mode 100644
index 00000000..27a4ab8b
--- /dev/null
+++ b/doc/Makefile.in
@@ -0,0 +1,336 @@
+# Makefile.in generated by automake 1.10 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006  Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Makefile.am --- automake input file for systemtap docs
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+subdir = doc
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+SOURCES =
+DIST_SOURCES =
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DATE = @DATE@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PROCFLAGS = @PROCFLAGS@
+RANLIB = @RANLIB@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+U = @U@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build_alias = @build_alias@
+builddir = @builddir@
+cap_LIBS = @cap_LIBS@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+elfutils_abs_srcdir = @elfutils_abs_srcdir@
+exec_prefix = @exec_prefix@
+have_dvips = @have_dvips@
+have_latex = @have_latex@
+have_ps2pdf = @have_ps2pdf@
+host_alias = @host_alias@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+sqlite3_LIBS = @sqlite3_LIBS@
+srcdir = @srcdir@
+stap_LIBS = @stap_LIBS@
+staplog_CPPFLAGS = @staplog_CPPFLAGS@
+subdirs = @subdirs@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+SUFFIXES = ps pdf dvi ps tex
+EXTRA_DIST = tutorial.tex langref.tex tutorial
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: ps pdf dvi ps tex .dvi .pdf .ps .tex
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
+		&& exit 0; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu  doc/Makefile'; \
+	cd $(top_srcdir) && \
+	  $(AUTOMAKE) --gnu  doc/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+tags: TAGS
+TAGS:
+
+ctags: CTAGS
+CTAGS:
+
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+	    fi; \
+	    cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+	  else \
+	    test -f $(distdir)/$$file \
+	    || cp -p $$d/$$file $(distdir)/$$file \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+@BUILD_DOCS_FALSE@all-local:
+all-am: Makefile all-local
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	  `test -z '$(STRIP)' || \
+	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+@BUILD_DOCS_FALSE@clean-local:
+clean: clean-am
+
+clean-am: clean-generic clean-local mostlyclean-am
+
+distclean: distclean-am
+	-rm -f Makefile
+distclean-am: clean-am distclean-generic
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-info: install-info-am
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-ps: install-ps-am
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: install-am install-strip
+
+.PHONY: all all-am all-local check check-am clean clean-generic \
+	clean-local distclean distclean-generic distdir dvi dvi-am \
+	html html-am info info-am install install-am install-data \
+	install-data-am install-dvi install-dvi-am install-exec \
+	install-exec-am install-html install-html-am install-info \
+	install-info-am install-man install-pdf install-pdf-am \
+	install-ps install-ps-am install-strip installcheck \
+	installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-generic pdf \
+	pdf-am ps ps-am uninstall uninstall-am
+
+
+@BUILD_DOCS_TRUE@all-local: tutorial.pdf langref.pdf
+
+@BUILD_DOCS_TRUE@clean-local:
+@BUILD_DOCS_TRUE@	rm -f *.pdf *.out *.log *.aux *.toc *.lot *.idx *.glo
+
+.ps.pdf:
+	ps2pdf -r600 $<
+
+.dvi.ps:
+	dvips -t letter -o $@ $<
+
+.tex.dvi:
+	pwd=`pwd`; cd $(srcdir); \
+	latex -output-directory=$$pwd $<; \
+	touch $*.glo \
+	makeindex $*.glo -s nomencl.ist -o $*.gls \
+	latex -output-directory=$$pwd $<; \
+	latex -output-directory=$$pwd $<; \
+	latex -output-directory=$$pwd $<
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/doc/langref.tex b/doc/langref.tex
new file mode 100644
index 00000000..5b91d01d
--- /dev/null
+++ b/doc/langref.tex
@@ -0,0 +1,3285 @@
+% SystemTap Language Reference
+\documentclass[twoside,english]{article}
+\usepackage{geometry}
+\geometry{verbose,letterpaper,tmargin=1.5in,bmargin=1.5in,lmargin=1in,rmargin=1in}
+\usepackage{fancyhdr}
+\pagestyle{fancy}
+\usepackage{array}
+\usepackage{varioref}
+\usepackage{float}
+\usepackage{makeidx}
+\usepackage{verbatim}
+\usepackage{url}
+\makeindex
+
+\makeatletter
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% LyX specific LaTeX commands.
+\newcommand{\noun}[1]{\textsc{#1}}
+%% Bold symbol macro for standard LaTeX users
+%\providecommand{\boldsymbol}[1]{\mbox{\boldmath $#1$}}
+
+%% Because html converters don't know tabularnewline
+\providecommand{\tabularnewline}{\\}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% User specified LaTeX commands.
+\setlength{\parindent}{0pt}
+%\setlength{\parskip}{3pt plus 2pt minus 1pt}
+\setlength{\parskip}{5pt}
+
+%
+% this makes list spacing much better.
+%
+\newenvironment{my_itemize}{
+\begin{itemize}
+  \setlength{\itemsep}{1pt}
+  \setlength{\parskip}{0pt}
+  \setlength{\parsep}{0pt}}{\end{itemize}
+}
+
+\newenvironment{vindent}
+{\begin{list}{}{\setlength{\listparindent}{6pt}}
+\item[]}
+{\end{list}}
+
+\usepackage{babel}
+\makeatother
+\begin{document}
+
+\title{SystemTap Language Reference}
+
+\maketitle
+\newpage{}
+This document was derived from other documents contributed to the SystemTap project by employees of Red Hat, IBM and Intel.\newline
+
+Copyright \copyright\space  2007 Red Hat Inc.\newline
+Copyright \copyright\space  2007 IBM Corp.\newline
+Copyright \copyright\space  2007 Intel Corporation.\newline
+
+Permission is granted to copy, distribute and/or modify this document
+under the terms of the GNU Free Documentation License, Version 1.2
+or any later version published by the Free Software Foundation;
+with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts.\newline
+
+The GNU Free Documentation License is available from 
+\url{http://www.gnu.org/licenses/fdl.html} or by writing to
+the Free Software Foundation, Inc., 51 Franklin Street,
+Fifth Floor, Boston, MA 02110-1301, USA.
+\newpage{}
+\tableofcontents{}
+\listoftables
+\newpage{}
+
+\section{SystemTap overview\label{sec:SystemTap-Overview}}
+
+\subsection{About this guide}
+
+This guide is a comprehensive reference of SystemTap's language constructs
+and syntax. The contents borrow heavily from existing SystemTap documentation
+found in manual pages and the tutorial. The presentation of information here
+provides the reader with a single place to find language syntax and recommended
+usage. In order to successfully use this guide, you should be familiar with
+the general theory and operation of SystemTap. If you are new to SystemTap,
+you will find the tutorial to be an excellent place to start learning. For
+detailed information about tapsets, see the manual pages provided with the
+distribution. For information about the entire collection of SystemTap reference
+material, see Section~\ref{sec:For-Further-Reference} 
+
+\subsection{Reasons to use SystemTap}
+
+SystemTap provides infrastructure to simplify the gathering of information
+about a running Linux kernel so that it may be further analyzed. This analysis
+assists in identifying the underlying cause of a performance or functional
+problem. SystemTap was designed to eliminate the need for a developer to
+go through the tedious instrument, recompile, install, and reboot sequence
+normally required to collect this kind of data. To do this, it provides a
+simple command-line interface and scripting language for writing kernel instrumentation.
+With SystemTap, developers, system administrators, and users can easily write
+scripts that gather and manipulate kernel data that is not otherwise available
+using standard Linux tools. Users of SystemTap will find it to be a significant
+improvement over older methods.
+
+\subsection{Event-action language}
+\index{language}
+SystemTap's language is strictly typed, declaration free, procedural, and
+inspired by dtrace and awk. Source code points or events in the kernel are
+associated with handlers, which are subroutines that are executed synchronously.
+These probes are conceptually similar to \char`\"{}breakpoint command lists\char`\"{}
+in the GDB debugger.
+
+There are two main outermost constructs: probes and functions. Within these,
+statements and expressions use C-like operator syntax and precedence.
+
+\subsection{Sample SystemTap scripts}
+\index{example scripts}
+Following are some example scripts that illustrate the basic operation of
+SystemTap. For more examples, see the examples/small\_demos/ directory in
+the source directory, the SystemTap wiki at \url{http://sourceware.org/systemtap/wiki/HomePage},
+or the SystemTap War Stories at \url{http://sourceware.org/systemtap/wiki/WarStories} page.
+
+\subsubsection{Basic SystemTap syntax and control structures}
+
+The following code examples demonstrate SystemTap syntax and control structures.
+
+\begin{vindent}
+\begin{verbatim}
+global odds, evens
+
+probe begin {
+    # "no" and "ne" are local integers
+    for (i = 0; i < 10; i++) {
+        if (i % 2) odds [no++] = i
+            else evens [ne++] = i
+    }
+
+    delete odds[2]
+    delete evens[3]
+    exit()
+}
+
+probe end {
+    foreach (x+ in odds)
+        printf ("odds[%d] = %d", x, odds[x])
+
+    foreach (x in evens-)
+        printf ("evens[%d] = %d", x, evens[x])
+}
+\end{verbatim}
+\end{vindent}
+This prints:
+
+\begin{vindent}
+\begin{verbatim}
+odds[0] = 1
+odds[1] = 3
+odds[3] = 7
+odds[4] = 9
+evens[4] = 8
+evens[2] = 4
+evens[1] = 2
+evens[0] = 0
+\end{verbatim}
+\end{vindent}
+Note that all variable types are inferred, and that all locals and globals
+are initialized.
+
+\subsubsection{Primes between 0 and 49}
+
+\begin{vindent}
+\begin{verbatim}
+function isprime (x) {
+    if (x < 2) return 0
+    for (i = 2; i < x; i++) {
+        if (x % i == 0) return 0
+        if (i * i > x) break
+    }
+    return 1
+}
+
+probe begin {
+    for (i = 0; i < 50; i++)
+        if (isprime (i)) printf("%d\n", i)
+    exit()
+}
+\end{verbatim}
+\end{vindent}
+This prints:
+
+\begin{vindent}
+\begin{verbatim}
+2
+3
+5
+7
+11
+13
+17
+19
+23
+29
+31
+37
+41
+43
+47
+\end{verbatim}
+\end{vindent}
+
+\subsubsection{Recursive functions}
+\index{recursion}
+\begin{vindent}
+\begin{verbatim}
+function fibonacci(i) {
+    if (i < 1) error ("bad number")
+    if (i == 1) return 1
+    if (i == 2) return 2
+    return fibonacci (i-1) + fibonacci (i-2)
+}
+
+probe begin {
+    printf ("11th fibonacci number: %d", fibonacci (11))
+    exit ()
+}
+\end{verbatim}
+\end{vindent}
+This prints:
+
+\begin{vindent}
+\begin{verbatim}
+11th fibonacci number: 118
+\end{verbatim}
+\end{vindent}
+Any larger number input to the function may exceed the MAXACTION or MAXNESTING
+limits, which will be caught by the parser and result in an error. For more
+about limits see Section~\ref{sub:SystemTap-safety}.
+\newpage{}
+\subsection{The stap command}
+\index{stap}
+The stap program is the front-end to the SystemTap tool. It accepts probing
+instructions written in its scripting language, translates those instructions
+into C code, compiles this C code, and loads the resulting kernel module
+into a running Linux kernel to perform the requested system trace or probe
+functions. You can supply the script in a named file, from standard input,
+or from the command line. The program runs until it is interrupted by the
+user or a sufficient number of soft errors, or if the script voluntarily
+invokes the exit() function.
+
+The stap command does the following:
+
+\begin{itemize}
+\item Translates the script
+\item Generates and compiles a kernel module
+\item Inserts the module; output to stap's stdout
+\item CTRL-C unloads the module and terminates stap
+\end{itemize}
+For a full list of options to the stap command, see the stap(1) manual page.
+
+\subsection{Safety and security\label{sub:SystemTap-safety}}
+\index{limits}
+SystemTap is an administrative tool. It exposes kernel internal data structures
+and potentially private user information. It requires root privileges to
+actually run the kernel objects it builds using the \textbf{sudo} command,
+applied to the \textbf{staprun} program.
+
+staprun is a part of the SystemTap package, dedicated to module loading and
+unloading and kernel-to-user data transfer. Since staprun does not perform
+any additional security checks on the kernel objects it is given, do not
+give elevated privileges via sudo to untrusted users.
+
+The translator asserts certain safety constraints. \index{constraints}It
+ensures that no handler routine can run for too long, allocate memory, perform
+unsafe operations, or unintentionally interfere with the kernel. Use of script
+global variables is locked to protect against manipulation by concurrent
+probe handlers. Use of \emph{guru mode} constructs such as embedded C (see
+Section~\ref{sub:Embedded-C}) can violate these constraints, leading to
+a kernel crash or data corruption.
+
+The resource use limits are set by macros in the generated C code. These
+may be overridden with the -D flag. The following list describes a selection
+of these macros:
+
+\textbf{MAXNESTING} -- The maximum number of recursive function call levels. The default is 10.
+
+\textbf{MAXSTRINGLEN} -- The maximum length of strings. The default is 128.
+
+\textbf{MAXTRYLOCK} -- The maximum number of iterations to wait for locks on global variables before
+declaring possible deadlock and skipping the probe. The default is 1000.
+
+\textbf{MAXACTION} -- The maximum number of statements to execute during any single probe hit. The default is 1000. 
+
+\textbf{MAXMAPENTRIES} -- The maximum number of rows in an array if the array size is not specified
+explicitly when declared. The default is 2048.
+
+\textbf{MAXERRORS} -- The maximum number of soft errors before an exit is triggered. The default is 0.
+
+\textbf{MAXSKIPPED} -- The maximum number of skipped reentrant probes before an exit is triggered. The default is 100.
+
+\textbf{MINSTACKSPACE} -- The minimum number of free kernel stack bytes required in order to run a
+probe handler. This number should be large enough for the probe handler's
+own needs, plus a safety margin.  The default is 1024.
+
+If something goes wrong with stap or staprun after a probe has started running,
+you may safely kill both user processes, and remove the active probe kernel
+module with the rmmod command. Any pending trace messages may be lost.
+
+\section{Types of SystemTap scripts\label{sec:Types-of-SystemTap}}
+
+\subsection{Probe scripts}
+
+Probe scripts are analogous to programs; these scripts identify probe points
+and associated handlers.
+
+\subsection{Tapset scripts}
+
+Tapset scripts are libraries of probe aliases and auxiliary functions.
+
+The /usr/share/systemtap/tapset directory contains tapset scripts. While
+these scripts look like regular SystemTap scripts, they cannot be run directly.
+
+\section{Components of a SystemTap script}
+
+The main construct in the scripting language identifies probes. Probes associate
+abstract events with a statement block, or probe handler, that is to be executed
+when any of those events occur.
+
+The following example shows how to trace entry and exit from a function using
+two probes.
+
+\begin{vindent}
+\begin{verbatim}
+probe kernel.function("sys_mkdir") { log ("enter") }
+probe kernel.function("sys_mkdir").return { log ("exit") }
+\end{verbatim}
+\end{vindent}
+
+To list the probe-able functions in the kernel, use the last-pass option
+to the translator. The output needs to be filtered because each inlined function
+instance is listed separately. The following statement is an example.
+
+\begin{vindent}
+\begin{verbatim}
+# stap -p2 -e 'probe kernel.function("*") {}' | sort | uniq
+\end{verbatim}
+\end{vindent}
+
+\subsection{Probe definitions}
+
+The general syntax is as follows.
+
+\begin{vindent}
+\begin{verbatim}
+probe PROBEPOINT [, PROBEPOINT] { [STMT ...] }
+\end{verbatim}
+\end{vindent}
+Events are specified in a special syntax called \emph{probe points}. There
+are several varieties of probe points defined by the translator, and tapset
+scripts may define others using aliases. The provided probe points are listed
+in the stapprobes(5) man pages.
+
+The probe handler is interpreted relative to the context of each event. For
+events associated with kernel code, this context may include variables defined
+in the source code at that location. These \emph{target variables}\index{target variables}
+are presented to the script as variables whose names are prefixed with a
+dollar sign (\$). They may be accessed only if the compiler used to compile
+the kernel preserved them, despite optimization. This is the same constraint
+imposed by a debugger when working with optimized code. Other events may
+have very little context.
+
+
+\subsection{Probe aliases\label{sub:Probe-aliases}}
+\index{probe aliases}
+The general syntax is as follows.
+
+\begin{vindent}
+\begin{verbatim}
+probe <alias> = <probepoint> { <prologue_stmts> }
+probe <alias> += <probepoint> { <epilogue_stmts> }
+\end{verbatim}
+\end{vindent}
+New probe points may be defined using \emph{aliases}. A probe point alias
+looks similar to probe definitions, but instead of activating a probe at
+the given point, it defines a new probe point name as an alias to an existing
+one. New probe aliases may refer to one or more existing probe aliases. The
+following is an example.
+
+\begin{vindent}
+\begin{verbatim}
+probe socket.sendmsg = kernel.function ("sock_sendmsg") { ... }
+probe socket.do_write = kernel.function ("do_sock_write") { ... }
+probe socket.send = socket.sendmsg, socket.do_write { ... }
+\end{verbatim}
+\end{vindent}
+There are two types of aliases, the prologue style and the epilogue style
+which are identified by the equal sign (\texttt{\textbf{=}}) and \char`\"{}\texttt{\textbf{+=}}\char`\"{}
+respectively.
+
+A probe that names the new probe point will create an actual probe, with
+the handler of the alias \emph{pre-pended}.
+
+This pre-pending behavior serves several purposes. It allows the alias definition
+to pre-process the context of the probe before passing control to the handler
+specified by the user. This has several possible uses, demonstrated as follows.
+
+\begin{vindent}
+\begin{verbatim}
+# Skip probe unless given condition is met:
+if ($flag1 != $flag2) next
+
+# Supply values describing probes:
+name = "foo"
+
+# Extract the target variable to a plain local variable:
+var = $var
+\end{verbatim}
+\end{vindent}
+
+\subsubsection{Prologue-style aliases (=)}
+\index{prologue-style aliases}
+\index{=}
+For a prologue style alias, the statement block that follows an alias definition
+is implicitly added as a prologue to any probe that refers to the alias.
+The following is an example.
+
+\begin{vindent}
+\begin{verbatim}
+# Defines a new probe point syscall.read, which expands to
+# kernel.function("sys_read"), with the given statement as
+# a prologue.
+#
+probe syscall.read = kernel.function("sys_read") {
+    fildes = $fd
+}
+\end{verbatim}
+\end{vindent}
+
+\subsubsection{Epilogue-style aliases (+=)}
+\index{epilogue-style aliases}
+\index{+=}
+The statement block that follows an alias definition is implicitly added
+as an epilogue to any probe that refers to the alias. The following is an
+example:
+
+\begin{vindent}
+\begin{verbatim}
+# Defines a new probe point with the given statement as an
+# epilogue.
+#
+probe syscall.read += kernel.function("sys_read") {
+    fildes = $fd
+}
+\end{verbatim}
+\end{vindent}
+
+\subsubsection{Probe alias usage}
+
+Another probe definition may use a previously defined alias. The following
+is an example.
+
+\begin{vindent}
+\begin{verbatim}
+probe syscall.read {
+    printf("reading fd=%d\n", fildes)
+}
+\end{verbatim}
+\end{vindent}
+
+\subsubsection{Unused alias variables}
+\index{unused variables}
+An unused alias variable is a variable defined in a probe alias, usually
+as one of a group of \texttt{var = \$var} assignments, which is not actually
+used by the script probe that instantiates the alias. These variables are
+discarded.
+
+\subsection{Variables\label{sub:Variables}}
+\index{variables}
+Identifiers for variables and functions are alphanumeric sequences, and may
+include the underscore (\_) and the dollar sign (\$) characters. They may
+not start with a plain digit. Each variable is by default local to the probe
+or function statement block where it is mentioned, and therefore its scope
+and lifetime is limited to a particular probe or function invocation. Scalar
+variables are implicitly typed as either string or integer. Associative arrays
+also have a string or integer value, and a tuple of strings or integers serves
+as a key. Arrays must be declared as global. Local arrays\index{local arrays}
+are not allowed.
+
+The translator performs \emph{type inference} on all identifiers, including
+array indexes and function parameters. Inconsistent type-related use of identifiers
+results in an error.
+
+Variables may be declared global. Global variables are shared among all probes
+and remain instantiated as long as the SystemTap session. There is one namespace
+for all global variables, regardless of the script file in which they are
+found. Because of possible concurrency limits, such as multiple probe handlers,
+each global variable used by a probe is automatically read- or write-locked
+while the handler is running. A global declaration may be written at the
+outermost level anywhere in a script file, not just within a block of code.
+The following declaration marks \texttt{var1} and \texttt{var2} as global.
+The translator will infer a value type for each, and if the variable is used
+as an array, its key types.
+
+\begin{vindent}
+\begin{verbatim}
+global var1[=<value>], var2[=<value>]
+\end{verbatim}
+\end{vindent}
+
+\subsection{Auxiliary functions\label{sub:Auxiliary-functions}}
+\index{auxiliary functions}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+function <name>[:<type>] ( <arg1>[:<type>], ... ) { <stmts> }
+\end{verbatim}
+\end{vindent}
+SystemTap scripts may define subroutines to factor out common work. Functions
+may take any number of scalar arguments, and must return a single scalar
+value. Scalars in this context are integers or strings. For more information
+on scalars, see Section~\ref{sub:Variables} and Section~\ref{sub:Data-types}\texttt{.}
+The following is an example function declaration.
+
+\begin{vindent}
+\begin{verbatim}
+function thisfn (arg1, arg2) {
+    return arg1 + arg2
+}
+\end{verbatim}
+\end{vindent}
+Note the general absence of type declarations, which are inferred by the
+translator. If desired, a function definition may include explicit type declarations
+for its return value, its arguments, or both. This is helpful for embedded-C
+functions. In the following example, the type inference engine need only
+infer the type of arg2, a string.
+
+\begin{vindent}
+\begin{verbatim}
+function thatfn:string(arg1:long, arg2) {
+    return sprintf("%d%s", arg1, arg2)
+}
+\end{verbatim}
+\end{vindent}
+Functions may call others or themselves recursively, up to a fixed nesting
+limit. See Section~\ref{sub:SystemTap-safety}.
+
+
+\subsection{Embedded C\label{sub:Embedded-C}}
+\index{embedded C}
+SystemTap supports a \emph{guru\index{guru mode} mode} where script safety
+features such as code and data memory reference protection are removed. Guru
+mode is set by passing the ''-g'' flag to the stap command. When in guru
+mode, the translator accepts embedded code enclosed between {}``\%\{''
+and {}``\%\}'' markers in the script file. Embedded code is transcribed
+verbatim, without analysis, in sequence, into generated C code. At the outermost
+level of a script, guru mode may be useful to add \#include instructions,
+or any auxiliary definitions for use by other embedded code.
+
+
+\subsection{Embedded C functions}
+
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+function <name>:<type> ( <arg1>:<type>, ... ) %{ <C_stmts> %}
+\end{verbatim}
+\end{vindent}
+Embedded code is permitted in a function body. In that case, the script language
+body is replaced entirely by a piece of C code enclosed between \%\{ and
+\%\} markers. The enclosed code may do anything reasonable and safe as allowed
+by the parser.
+
+There are a number of undocumented but complex safety constraints on concurrency,
+resource consumption and runtime limits that are applied to code written
+in the SystemTap language. These constraints are not applied to embedded
+C code, so use such code with caution as it is used verbatim. Be especially
+careful when dereferencing pointers. Use the kread() macro to dereference
+any pointers that could potentially be invalid or dangerous. If you are unsure,
+err on the side of caution and use kread(). The kread() macro is one of the
+safety mechanisms used in code generated by embedded C. It protects against
+pointer accesses that could crash the system.
+
+For example, to access the pointer chain \texttt{name = skb->dev->name} in
+embedded C, use the following code.
+
+\begin{vindent}
+\begin{verbatim}
+struct net_device *dev;
+char *name;
+dev = kread(&(skb->dev));
+name = kread(&(dev->name));
+\end{verbatim}
+\end{vindent}
+The memory locations reserved for input and output values are provided to
+a function using a macro named \texttt{THIS}\index{THIS}. The following
+are examples.
+
+\begin{vindent}
+\begin{verbatim}
+function add_one (val) %{
+    THIS->__retvalue = THIS->val + 1;
+}
+function add_one_str (val) %{
+    strlcpy (THIS->__retvalue, THIS->val, MAXSTRINGLEN);
+    strlcat (THIS->__retvalue, "one", MAXSTRINGLEN);
+}
+\end{verbatim}
+\end{vindent}
+The function argument and return value types must be inferred by the translator
+from the call sites in order for this method to work. You should examine
+C code generated for ordinary script language functions to write compatible
+embedded-C. Note that all SystemTap functions and probes run with interrupts
+disabled, thus you cannot call functions that might sleep from within embedded
+C.
+
+\section{Probe points\label{sec:Probe-Points}}
+\index{probe points}
+\subsection{General syntax}
+\index{probe syntax}
+The general probe point syntax is a dotted-symbol sequence. This divides
+the event namespace into parts, analogous to the style of the Domain Name
+System. Each component identifier is parameterized by a string or number
+literal, with a syntax analogous to a function call.
+
+The following are all syntactically valid probe points.
+
+\begin{vindent}
+\begin{verbatim}
+kernel.function("foo")
+kernel.function("foo").return
+module{"ext3"}.function("ext3_*")
+kernel.function("no_such_function") ?
+syscall.*
+end
+timer.ms(5000)
+\end{verbatim}
+\end{vindent}
+Probes may be broadly classified into \emph{synchronous}\index{synchronous}
+or \emph{asynchronous}.\index{asynchronous} A synchronous event occurs when
+any processor executes an instruction matched by the specification. This
+gives these probes a reference point (instruction address) from which more
+contextual data may be available. Other families of probe points refer to
+asynchronous events such as timers, where no fixed reference point is related.
+Each probe point specification may match multiple locations, such as by using
+wildcards or aliases, and all are probed. A probe declaration may contain
+several specifications separated by commas, which are all probed.
+
+\subsubsection{Prefixes}
+\index{prefixes}
+Prefixes specify the probe target, such as \textbf{kernel}, \textbf{module},
+\textbf{timer}, and so on.
+
+\subsubsection{Suffixes}
+\index{suffixes}
+Suffixes further qualify the point to probe, such as \textbf{.return} for the
+exit point of a probed function. The absence of a suffix implies the function 
+entry point.
+
+\subsubsection{Wildcarded file names, function names}
+\index{wildcards}
+A component may include an asterisk ({*}) character, which expands to other
+matching probe points. An example follows.
+
+\begin{vindent}
+\begin{verbatim}
+kernel.syscall.*
+kernel.function("sys_*)
+\end{verbatim}
+\end{vindent}
+
+\subsubsection{Optional probe points\label{sub:Optional-probe-points}}
+\index{?}
+A probe point may be followed by a question mark (?) character, to indicate
+that it is optional, and that no error should result if it fails to expand.
+This effect passes down through all levels of alias or wildcard expansion.
+
+The following is the general syntax.
+
+\begin{vindent}
+\begin{verbatim}
+kernel.function("no_such_function") ?
+\end{verbatim}
+\end{vindent}
+
+\subsection{Built-in probe point types (DWARF probes)}
+\index{built-in probes}
+\index{dwarf probes}
+This family of probe points uses symbolic debugging information for the target
+kernel or module, as may be found in executables that have not
+been stripped, or in the separate \textbf{debuginfo} packages. They allow
+logical placement of probes into the execution path of the target 
+by specifying a set of points in the source or object code. When a matching
+statement executes on any processor, the probe handler is run in that context.
+
+Points in a kernel are identified by module, source file, line number, function
+name or some combination of these.
+
+Here is a list of probe point specifications currently supported: 
+
+\begin{vindent}
+\begin{verbatim}
+kernel.function(PATTERN)
+kernel.function(PATTERN).call
+kernel.function(PATTERN).return
+kernel.function(PATTERN).return.maxactive(VALUE)
+kernel.function(PATTERN).inline
+module(MPATTERN).function(PATTERN)
+module(MPATTERN).function(PATTERN).call
+module(MPATTERN).function(PATTERN).return.maxactive(VALUE)
+module(MPATTERN).function(PATTERN).inline
+kernel.statement(PATTERN)
+kernel.statement(ADDRESS).absolute
+module(MPATTERN).statement(PATTERN)
+\end{verbatim}
+\end{vindent}
+
+The \textbf{.function} variant places a probe near the beginning of the named
+function, so that parameters are available as context variables. 
+
+The \textbf{.return} variant places a probe at the moment of return from the named
+function, so the return value is available as the \$return context variable.
+The entry parameters are also available, though the function may have changed
+their values.  Return probes may be further qualified with \textbf{.maxactive}, 
+which specifies how many instances of the specified function can be probed simultaneously.
+You can leave off \textbf{.maxactive} in most cases, as the default should be sufficient.
+However, if you notice an excessive number of skipped probes, try setting \textbf{.maxactive}
+to incrementally higher values to see if the number of skipped probes decreases.
+
+The \textbf{.inline} modifier for \textbf{.function} filters the results to include only 
+instances of inlined functions. The \textbf{.call} modifier selects the opposite subset.
+Inline functions do not have an identifiable return point, so \textbf{.return}
+is not supported on \textbf{.inline} probes.
+
+The \textbf{.statement} variant places a probe at the exact spot, exposing those local
+variables that are visible there.
+
+In the above probe descriptions, MPATTERN stands for a string literal
+that identifies the loaded kernel module of interest. It may include asterisk
+({*}), square brackets \char`\"{}{[}]\char`\"{}, and question mark (?) wildcards.
+PATTERN stands for a string literal that identifies a point in the program.
+It is composed of three parts:
+
+\begin{enumerate}
+\item The first part is the name of a function, as would appear in the nm program's
+output. This part may use the asterisk and question mark wildcard operators
+to match multiple names.
+\item The second part is optional, and begins with the ampersand (@) character.
+It is followed by the path to the source file containing the function,
+which may include a wildcard pattern, such as mm/slab{*}.
+In most cases, the path should be relative to the top of the
+linux source directory, although an absolute path may be necessary for some kernels.
+If a relative pathname doesn't work, try absolute.
+\item The third part is optional if the file name part was given. It identifies
+the line number in the source file, preceded by a colon. 
+\end{enumerate}
+Alternately, specify PATTERN as a numeric constant to indicate a relative
+module address or an absolute kernel address.
+
+Some of the source-level variables, such as function parameters, locals,
+or globals visible in the compilation unit, are visible to probe handlers.
+Refer to these variables by prefixing their name with a dollar sign within
+the scripts. In addition, a special syntax allows limited traversal of structures,
+pointers, and arrays.
+
+\texttt{\$var} refers to an in-scope variable var. If it is a type similar
+to an integer, it will be cast to a 64-bit integer for script use. Pointers
+similar to a string (char {*}) are copied to SystemTap string values by the
+kernel\_string() or user\_string functions().
+
+\texttt{\$var->field} traverses a structure's field. The indirection operator
+may be repeated to follow additional levels of pointers.
+
+\texttt{\$var{[}N]} indexes into an array. The index is given with a literal
+number.
+
+\subsubsection{kernel.function, module().function}
+\index{kernel.function}
+\index{module().function}
+The \textbf{.function} variant places a probe near the beginning of the named function,
+so that parameters are available as context variables.
+
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+kernel.function("func[@file]"
+module("modname").function("func[@file]"
+\end{verbatim}
+\end{vindent}
+Examples:
+
+\begin{vindent}
+\begin{verbatim}
+# Refers to all kernel functions with "init" or "exit"
+# in the name:
+kernel.function("*init*"), kernel.function("*exit*")
+
+# Refers to any functions within the "kernel/sched.c"
+# file that span line 240:
+kernel.function("*@kernel/sched.c:240")
+
+# Refers to all functions in the ext3 module:
+module("ext3").function("*")
+\end{verbatim}
+\end{vindent}
+
+\subsubsection{kernel.statement, module().statement}
+\index{kernel.statement}
+\index{module().statement}
+The \textbf{.statement} variant places a probe at the exact spot, exposing those local
+variables that are visible there.
+
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+kernel.statement("func@file:linenumber")
+module("modname").statement("func@file:linenumber")
+\end{verbatim}
+\end{vindent}
+Example:
+
+\begin{vindent}
+\begin{verbatim}
+# Refers to the statement at line 2917 within the
+# kernel/sched.c file:
+kernel.statement("*@kernel/sched.c:2917")
+\end{verbatim}
+\end{vindent}
+
+\begin{comment}
+\subsection{Marker probes}
+
+This family of probe points connects to static probe markers inserted into
+the kernel or a module. These markers are special macro calls in the kernel
+that make probing faster and more reliable than with DWARF-based probes.
+DWARF debugging information is not required to use probe markers.
+
+Marker probe points begin with a kernel or module(\char`\"{}\emph{name}\char`\"{})
+prefix, the same as DWARF probes. This prefix identifies the source of the
+symbol table used for finding markers. The suffix names the marker itself:
+mark(\char`\"{}\emph{name}\char`\"{}). The marker name string, which may
+contain wildcard characters, is matched against the names given to the marker
+macros when the kernel or module was compiled.
+
+The handler associated with a marker probe reads any optional parameters
+specified at the macro call site named \$arg1 through \$argNN, where NN is
+the number of parameters supplied by the macro. Number and string parameters
+are passed in a type-safe manner.
+\end{comment}
+
+\subsection{Timer probes}
+\index{timer probes}
+You can use intervals defined by the standard kernel jiffies\index{jiffies}
+timer to trigger probe handlers asynchronously. A \emph{jiffy} is a kernel-defined
+unit of time typically between 1 and 60 msec. Two probe point variants are
+supported by the translator: 
+
+\begin{vindent}
+\begin{verbatim}
+timer.jiffies(N)
+timer.jiffies(N).randomize(M)
+\end{verbatim}
+\end{vindent}
+The probe handler runs every N jiffies. If the \texttt{randomize}\index{randomize}
+component is given, a linearly distributed random value in the range {[}-M
+\ldots{} +M] is added to N every time the handler executes. N is restricted
+to a reasonable range (1 to approximately 1,000,000), and M is restricted
+to be less than N. There are no target variables provided in either context.
+Probes can be run concurrently on multiple processors.
+
+Intervals may be specified in units of time. There are two probe point variants
+similar to the jiffies timer:
+
+\begin{vindent}
+\begin{verbatim}
+timer.ms(N)
+timer.ms(N).randomize(M)
+\end{verbatim}
+\end{vindent}
+Here, N and M are specified in milliseconds\index{milliseconds}, but the
+full options for units are seconds (s or sec), milliseconds (ms or msec),
+microseconds (us or usec), nanoseconds (ns or nsec), and hertz (hz). Randomization
+is not supported for hertz timers.
+
+The resolution of the timers depends on the target kernel. For kernels prior
+to 2.6.17, timers are limited to jiffies resolution, so intervals are rounded
+up to the nearest jiffies interval. After 2.6.17, the implementation uses
+hrtimers for tighter precision, though the resulting resolution will be dependent
+upon architecture. In either case, if the randomize component is given, then
+the random value will be added to the interval before any rounding occurs.
+
+Profiling timers are available to provide probes that execute on all CPUs
+at each system tick. This probe takes no parameters, as follows.
+
+\begin{vindent}
+\begin{verbatim}
+timer.profile
+\end{verbatim}
+\end{vindent}
+Full context information of the interrupted process is available, making
+this probe suitable for implementing a time-based sampling profiler.
+
+The following is an example of timer usage.
+
+\begin{vindent}
+\begin{verbatim}
+# Refers to a periodic interrupt, every 1000 jiffies:
+timer.jiffies(1000)
+
+# Fires every 5 seconds:
+timer.sec(5)
+
+# Refers to a periodic interrupt, every 1000 +/- 200 jiffies:
+timer.jiffies(1000).randomize(200)
+\end{verbatim}
+\end{vindent}
+
+\subsection{Return probes}
+\index{return probes}
+The \texttt{.return} variant places a probe at the moment of return from
+the named function, so that the return value is available as the \$return
+context variable. The entry parameters are also accessible in the context
+of the return probe, though their values may have been changed by the function.
+Inline functions do not have an identifiable return point, so \texttt{.return}
+is not supported on \texttt{.inline} probes.
+
+
+\subsection{Special probe points}
+
+The probe points \texttt{begin} and \texttt{end} are defined by the translator
+to refer to the time of session startup and shutdown. There are no target
+variables available in either context.
+
+
+\subsubsection{begin}
+\index{begin}
+The \texttt{begin} probe is the start of the SystemTap session. All \texttt{begin}
+probe handlers are run during the startup of the session. All global variables
+must be declared prior to this point.
+
+
+\subsubsection{end}
+\index{end}
+The \texttt{end} probe is the end of the SystemTap session. All \texttt{end}
+probes are run during the normal shutdown of a session, such as in the aftermath
+of an \texttt{exit} function call, or an interruption from the user. In the
+case of an shutdown triggered by error, \texttt{end} probes are not run.
+
+
+\subsubsection{begin and end probe sequence}
+\index{sequence}
+\texttt{begin} and \texttt{end} probes are specified with an optional sequence
+number that controls the order in which they are run. If no sequence number
+is provided, the sequence number defaults to zero and probes are run in the
+order that they occur in the script file. Sequence numbers may be either
+positive or negative, and are especially useful for tapset writers who want
+to do initialization in a \texttt{begin} probe. The following are examples.
+
+\begin{vindent}
+\begin{verbatim}
+# In a tapset file:
+probe begin(-1000) { ... }
+
+# In a user script:
+probe begin { ... }
+\end{verbatim}
+\end{vindent}
+The user script \texttt{begin} probe defaults to sequence number zero, so
+the tapset \texttt{begin} probe will run first.
+
+
+\subsubsection{never}
+\index{never}
+The \texttt{never} probe point is defined by the translator to mean \emph{never}.
+Its statements are analyzed for symbol and type correctness, but its probe
+handler is never run. This probe point may be useful in conjunction with
+optional probes. See Section~\ref{sub:Optional-probe-points}.
+
+
+\begin{comment} % Comment out until perfmon code is reactivated
+\subsection{Probes to monitor performance}
+
+The perfmon family of probe points is used to access the performance monitoring
+hardware available in modern processors. These probe points require perfmon2
+support in the kernel to access the hardware.
+
+Performance monitor hardware points have a \texttt{perfmon} prefix. The suffix
+names the event being counted, for example \texttt{counter(event)}. The event
+names are specific to the processor implementation, except for generic cycle
+and instructions events, which are available on all processors. The probe
+\texttt{perfmon.counter(event)} starts a counter on the processor which counts
+the number of events that occur on that processor. For more details about
+the performance monitoring events available on a specific processor, see
+the help text returned by typing the perfmon2 command \texttt{pfmon -l.}
+
+\subsubsection{\$counter}
+
+\$counter is a handle used in the body of a probe for operations involving
+the counter associated with the probe.
+
+\subsubsection{read\_counter}
+
+read\_counter is a function passed to the handle for a perfmon probe. It
+returns the current count for the event.
+\end{comment}
+
+\section{Language elements\label{sec:Language-Elements}}
+
+
+\subsection{Identifiers}
+\index{identifiers}
+\emph{Identifiers} are used to name variables and functions. They are an
+alphanumeric sequence that may include the underscore (\_) and dollar sign
+(\$) characters. They have the same syntax as C identifiers, except that
+the dollar sign is also a legal character. Identifiers that begin with a
+dollar sign are interpreted as references to variables in the target software,
+rather than to SystemTap script variables. Identifiers may not start with
+a plain digit. 
+
+
+\subsection{Data types\label{sub:Data-types}}
+\index{data types}
+The SystemTap language includes a small number of data types, but no type
+declarations. A variable's type is inferred\index{inference} from its use.
+To support this inference, the translator enforces consistent typing of function
+arguments and return values, array indices and values. There are no implicit
+type conversions between strings and numbers. Inconsistent type-related use
+of identifiers signals an error.
+
+
+\subsubsection{Numbers}
+\index{numbers}
+Numbers are 64-bit signed integers. The parser will also accept (and wrap
+around) values above positive $2^{63}$.
+
+
+\subsubsection{Literals}
+\index{literals}
+Literals are either strings or integers. Literals can be expressed as decimal,
+octal, or hexadecimal, using C notation. Type suffixes (e.g., \emph{L} or
+\emph{U}) are not used. 
+
+
+\subsubsection{Integers\label{sub:Integers}}
+\index{integers}
+Integers are decimal, hexadecimal, or octal, and use the same notation as
+in C. Integers are 64-bit signed quantities, although the parser also accepts
+(and wraps around) values above positive $2^{63}$.
+
+
+\subsubsection{Strings\label{sub:Strings}}
+\index{strings}
+Strings are enclosed in quotation marks ({}``string''), and pass through
+standard C escape codes with backslashes. Strings are limited in length to
+MAXSTRINGLEN. For more information about this and other limits, see Section~\ref{sub:SystemTap-safety}.
+
+
+\subsubsection{Associative arrays}
+
+See Section~\ref{sec:Associative-Arrays}
+
+
+\subsubsection{Statistics}
+
+See Section~\ref{sec:Statistics}
+
+
+\subsection{Semicolons}
+\index{;}
+The semicolon is the null statement, or do nothing statement. It is optional,
+and useful as a separator between statements to improve detection of syntax
+errors and to reduce ambiguities in grammar.
+
+
+\subsection{Comments}
+\index{comments}
+Three forms of comments are supported, as follows.
+
+\begin{vindent}
+\begin{verbatim}
+# ... shell style, to the end of line
+// ... C++ style, to the end of line
+/* ... C style ... */
+\end{verbatim}
+\end{vindent}
+
+\subsection{Whitespace}
+\index{whitespace}
+As in C, spaces, tabs, returns, newlines, and comments are treated as whitespace.
+Whitespace is ignored by the parser.
+
+
+\subsection{Expressions}
+\index{expressions}
+SystemTap supports a number of operators that use the same general syntax,
+semantics, and precedence as in C and awk. Arithmetic is performed per C
+rules for signed integers. If the parser detects division by zero or an overflow,
+it generates an error. The following subsections list these operators.
+
+
+\subsubsection{Binary numeric operators}
+\index{binary}
+\texttt{{*} / \% + - >\,{}> <\,{}< \& \textasciicircum{}
+| \&\& ||}
+
+
+\subsubsection{Binary string operators}
+\index{binary}
+\texttt{\textbf{.}} (string concatenation)
+
+
+\subsubsection{Numeric assignment operators}
+\index{numeric}
+\texttt{= {*}= /= \%= += -= >\,{}>= <\,{}<=
+\&= \textasciicircum{}= |=}
+
+
+\subsubsection{String assignment operators}
+
+\texttt{= .=}
+
+
+\subsubsection{Unary numeric operators}
+\index{unary}
+\texttt{+ - ! \textasciitilde{} ++ -{}-}
+
+
+\subsubsection{Binary numeric or string comparison operators}
+\index{comparison}
+\texttt{< > <= >= == !=}
+
+
+\subsubsection{Ternary operator\label{sub:Ternary-operator}}
+\index{?}
+\texttt{cond ? exp1 : exp2}
+
+
+\subsubsection{Grouping operator}
+\index{grouping}
+\texttt{( exp )}
+
+
+\subsubsection{Function call}
+\index{fn}
+General syntax:
+
+\texttt{fn ({[} arg1, arg2, ... ])}
+
+
+\subsubsection{\$ptr-\textgreater member}
+\index{pointer}
+\texttt{ptr} is a kernel pointer available in a probed context.
+
+
+\subsubsection{\textless value\textgreater\ in \textless array\_name\textgreater}
+\index{index}
+This expression evaluates to true if the array contains an element with the
+specified index.
+
+
+\subsubsection{{[} \textless value\textgreater, ... ] in \textless array\_name\textgreater}
+
+The number of index values must match the number of indexes previously specified.
+
+
+\subsection{Literals passed in from the stap command line\label{sub:Literals-passed-in}}
+\index{literals}
+\emph{Literals} are either strings enclosed in double quotes ('' '') or
+integers. For information about integers, see Section~\ref{sub:Integers}.
+For information about strings, see Section~\ref{sub:Strings}.
+
+Script arguments at the end of a command line are expanded as literals. You
+can use these in all contexts where literals are accepted. A reference to
+a nonexistent argument number is an error.
+
+
+\subsubsection{\$1 \ldots{} \$\textless NN\textgreater\ for integers}
+\index{\$}
+Use \texttt{\$1 \ldots{} \$<NN>} for casting as a numeric literal.
+
+
+\subsubsection{@1 \ldots{} @\textless NN\textgreater\ for strings}
+
+Use \texttt{@1 \ldots{} @<NN>} for casting as a string literal.
+
+
+\subsubsection{Examples}
+
+For example, if the following script named example.stp
+
+\begin{vindent}
+\begin{verbatim}
+probe begin { printf("%d, %s\n", $1, @2) }
+\end{verbatim}
+\end{vindent}
+is invoked as follows
+
+\begin{vindent}
+\begin{verbatim}
+# stap example.stp 10 mystring
+\end{verbatim}
+\end{vindent}
+then 10 is substituted for \$1 and \char`\"{}mystring\char`\"{} for @2. The
+output will be
+
+\begin{vindent}
+\begin{verbatim}
+10, mystring
+\end{verbatim}
+\end{vindent}
+
+\subsection{Conditional compilation}
+
+
+\subsubsection{Conditions}
+\index{conditions}
+One of the steps of parsing is a simple conditional preprocessing stage.
+The general form of this is similar to the ternary operator (Section~\ref{sub:Ternary-operator}).
+
+\begin{vindent}
+\begin{verbatim}
+%( CONDITION %? TRUE-TOKENS %)
+%( CONDITION %? TRUE-TOKENS %: FALSE-TOKENS %)
+\end{verbatim}
+\end{vindent}
+The CONDITION is a limited expression whose format is determined by its first
+keyword. The following is the general syntax.
+
+\begin{vindent}
+\begin{verbatim}
+%( <condition> %? <code> [ %: <code> ] %)
+\end{verbatim}
+\end{vindent}
+
+\subsubsection{Conditions based on kernel version: kernel\_v, kernel\_vr}
+\index{kernel version}
+\index{kernel\_vr}
+\index{kernel\_v}
+If the first part of a conditional expression is the identifier \texttt{kernel\_v}
+or \texttt{kernel\_vr}, the second part must be one of six standard numeric
+comparison operators {}``\textless'', {}``\textless ='', {}``=='', {}``!='', {}``\textgreater'',
+or {}``\textgreater ='',
+and the third part must be a string literal that contains an RPM-style version-release
+value. The condition returns true if the version of the target kernel (as
+optionally overridden by the \textbf{-r} option) matches the given version
+string. The comparison is performed by the glibc function strverscmp.
+
+\texttt{kernel\_v} refers to the kernel version number only, such as {}``2.6.13\char`\"{}.
+
+\texttt{kernel\_vr} refers to the kernel version number including the release
+code suffix, such as {}``2.6.13-1.322FC3smp''.
+
+
+\subsubsection{Conditions based on architecture: arch}
+\index{arch}
+If the first part of the conditional expression is the identifier \texttt{arch}
+which refers to the processor architecture, then the second part is a string
+comparison operator ''=='' or ''!='', and the third part is a string
+literal for matching it. This comparison is a simple string equality or inequality.
+The currently supported architecture strings are i386, i686, x86\_64, ia64,
+s390x and ppc64.
+
+
+\subsubsection{True and False Tokens}
+\index{tokens}
+TRUE-TOKENS and FALSE-TOKENS are zero or more general parser tokens, possibly
+including nested preprocessor conditionals, that are pasted into the input
+stream if the condition is true or false. For example, the following code
+induces a parse error unless the target kernel version is newer than 2.6.5.
+
+\begin{vindent}
+\begin{verbatim}
+%( kernel_v <= "2.6.5" %? **ERROR** %) # invalid token sequence
+\end{verbatim}
+\end{vindent}
+The following code adapts to hypothetical kernel version drift.
+
+\begin{vindent}
+\begin{verbatim}
+probe kernel.function (
+    %( kernel_v <= "2.6.12" %? "__mm_do_fault" %:
+        %( kernel_vr == "2.6.13-1.8273FC3smp" %? "do_page_fault" %: UNSUPPORTED %)
+    %)) { /* ... */ }
+
+%( arch == "ia64" %?
+    probe syscall.vliw = kernel.function("vliw_widget") {}
+%)
+\end{verbatim}
+\end{vindent}
+
+\section{Statement types\label{sec:Statement-Types}}
+
+Statements enable procedural control flow within functions and probe handlers.
+The total number of statements executed in response to any single probe event
+is limited to MAXACTION, which defaults to 1000. See Section~\ref{sub:SystemTap-safety}.
+
+
+\subsection{break and continue}
+\index{break}
+\index{continue}
+Use \texttt{break} or \texttt{continue} to exit or iterate the innermost
+nesting loop statement, such as within a \texttt{while, for,} or \texttt{foreach}
+statement. The syntax and semantics are the same as those used in C.
+
+
+\subsection{delete}
+\index{delete}
+\texttt{delete} removes an element.
+
+The following statement removes from ARRAY the element specified by the index
+tuple. The value will no longer be available, and subsequent iterations will
+not report the element. It is not an error to delete an element that does
+not exist.
+
+\begin{vindent}
+\begin{verbatim}
+delete ARRAY[INDEX1, INDEX2, ...]
+\end{verbatim}
+\end{vindent}
+The following syntax removes all elements from ARRAY:
+
+\begin{vindent}
+\begin{verbatim}
+delete ARRAY
+\end{verbatim}
+\end{vindent}
+The following statement removes the value of SCALAR. Integers and strings
+are cleared to zero and null (\char`\"{}\char`\"{}) respectively, while statistics
+are reset to their initial empty state.
+
+\begin{vindent}
+\begin{verbatim}
+delete SCALAR
+\end{verbatim}
+\end{vindent}
+
+\subsection{do}
+\index{do}
+The \texttt{do} statement has the same syntax and semantics as in C.
+
+\begin{vindent}
+\begin{verbatim}
+do STMT while (EXP)
+\end{verbatim}
+\end{vindent}
+
+\subsection{EXP (expression)}
+\index{expression}
+An \texttt{expression} executes a string- or integer-valued expression and
+discards the value.
+
+
+\subsection{for}
+\index{for}
+General syntax:
+\begin{vindent}
+\begin{verbatim}
+for (EXP1; EXP2; EXP3) STMT
+\end{verbatim}
+\end{vindent}
+The \texttt{for} statement is similar to the \texttt{for} statement in C.
+The \texttt{for} expression executes EXP1 as initialization. While EXP2 is
+non-zero, it executes STMT, then the iteration expression EXP3.
+
+\subsection{foreach\label{sub:foreach}}
+\index{foreach}
+General syntax:
+\begin{vindent}
+\begin{verbatim}
+foreach (VAR in ARRAY) STMT
+\end{verbatim}
+\end{vindent}
+The \texttt{foreach} statement loops over each element of a named global array, assigning
+the current key to VAR. The array must not be modified within the statement.
+If you add a single plus (+) or minus (-) operator after the VAR or the ARRAY
+identifier, the iteration order will be sorted by the ascending or descending
+index or value. 
+
+The following statement behaves the same as the first example, except it
+is used when an array is indexed with a tuple of keys.  Use a sorting suffix
+on at most one VAR or ARRAY identifier.
+
+\begin{vindent}
+\begin{verbatim}
+foreach ([VAR1, VAR2, ...] in ARRAY) STMT
+\end{verbatim}
+\end{vindent}
+The following statement is the same as the first example, except that the
+\texttt{limit} keyword limits the number of loop iterations to EXP times.
+EXP is evaluated once at the beginning of the loop.
+
+\begin{vindent}
+\begin{verbatim}
+foreach (VAR in ARRAY limit EXP) STMT
+\end{verbatim}
+\end{vindent}
+
+\subsection{if}
+\index{if}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+if (EXP) STMT1 [ else STMT2 ]
+\end{verbatim}
+\end{vindent}
+The \texttt{if} statement compares an integer-valued EXP to zero. It executes
+the first STMT if non-zero, or the second STMT if zero.
+
+The \texttt{if} command has the same syntax and semantics as used in C.
+
+
+\subsection{next}
+\index{next}
+The \texttt{next} statement returns immediately from the enclosing probe
+handler.
+
+
+\subsection{; (null statement)}
+\index{;}
+\index{null statement}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+statement1
+;
+statement2
+\end{verbatim}
+\end{vindent}
+The semicolon represents the null statement, or do nothing. It is useful
+as an optional separator between statements to improve syntax error detection
+and to handle certain grammar ambiguities.
+
+
+\subsection{return}
+\index{return}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+return EXP
+\end{verbatim}
+\end{vindent}
+The \texttt{return} statement returns the EXP value from the enclosing function.
+If the value of the function is not returned, then a return statement is
+not needed, and the function will have a special \emph{unknown} type with
+no return value.
+
+\subsection{\{ \} (statement block)}
+\index{\{ \}}
+\index{statement block}
+This is the statement block with zero or more statements enclosed within
+brackets. The following is the general syntax:
+
+\begin{vindent}
+\begin{verbatim}
+{ STMT1 STMT2 ... }
+\end{verbatim}
+\end{vindent}
+The statement block executes each statement in sequence in the block. Separators
+or terminators are generally not necessary between statements. The statement
+block uses the same syntax and semantics as in C.
+
+
+\subsection{while}
+\index{while}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+while (EXP) STMT
+\end{verbatim}
+\end{vindent}
+The \texttt{while} statement uses the same syntax and semantics as in C.
+In the statement above, while the integer-valued EXP evaluates to non-zero,
+the parser will execute STMT. 
+
+
+\section{Associative arrays\label{sec:Associative-Arrays}}
+\index{associative arrays}
+Associative arrays are implemented as hash tables with a maximum size set
+at startup. Associative arrays are too large to be created dynamically for
+individual probe handler runs, so they must be declared as global. The basic
+operations for arrays are setting and looking up elements. These operations
+are expressed in awk syntax: the array name followed by an opening bracket
+({[}), a comma-separated list of up to five index index expressions, and
+a closing bracket (]). Each index expression may be a string or a number,
+as long as it is consistently typed throughout the script.
+
+
+\subsection{Examples}
+
+\begin{vindent}
+\begin{verbatim}
+# Increment the named array slot:
+foo [4,"hello"] ++
+
+# Update a statistic:
+processusage [uid(),execname()] ++
+
+# Set a timestamp reference point:
+times [tid()] = get_cycles()
+
+# Compute a timestamp delta:
+delta = get_cycles() - times [tid()]
+\end{verbatim}
+\end{vindent}
+
+\subsection{Types of values}
+
+Array elements may be set to a number or a string. The type must be consistent
+throughout the use of the array. The first assignment to the array defines
+the type of the elements. Unset array elements may be fetched and return
+a null value (zero or empty string) as appropriate, but they are not seen
+by a membership test.
+
+
+\subsection{Array capacity}
+
+Array sizes can be specified explicitly or allowed to default to the maximum
+size as defined by MAXMAPENTRIES. See Section~\ref{sub:SystemTap-safety}
+for details on changing MAXMAPENTRIES.
+
+You can explicitly specify the size of an array as follows:
+
+\begin{vindent}
+\begin{verbatim}
+global ARRAY[<size>]
+\end{verbatim}
+\end{vindent}
+If you do not specify the size parameter, then the array is created to hold
+MAXMAPENTRIES number of elements
+
+
+\subsection{Iteration, foreach}
+\index{foreach}
+Like awk, SystemTap's foreach creates a loop that iterates over key tuples
+of an array, not only values. The iteration may be sorted by any single key
+or a value by adding an extra plus symbol (+) or minus symbol (-) to the
+code. The following are examples.
+
+\begin{vindent}
+\begin{verbatim}
+# Simple loop in arbitrary sequence:
+foreach ([a,b] in foo)
+    fuss_with(foo[a,b])
+
+# Loop in increasing sequence of value:
+foreach ([a,b] in foo+) { ... }
+
+# Loop in decreasing sequence of first key:
+foreach ([a-,b] in foo) { ... }
+\end{verbatim}
+\end{vindent}
+The \texttt{break} and \texttt{continue} statements also work inside foreach
+loops. Since arrays can be large but probe handlers must execute quickly,
+you should write scripts that exit iteration early, if possible. For simplicity,
+SystemTap forbids any modification of an array during iteration with a foreach.
+
+
+\section{Statistics (aggregates)\label{sec:Statistics}}
+\index{aggregates}
+Aggregate instances are used to collect statistics on numerical values, when
+it is important to accumulate new data quickly and in large volume. These
+instances operate without exclusive locks, and store only aggregated stream
+statistics. Aggregates make sense only for global variables. They are stored
+individually or as elements of an array.
+
+\subsection{The aggregation (\textless\hspace{1 sp}\textless\hspace{1 sp}\textless) operator}
+\index{\textless\hspace{1 sp}\textless\hspace{1 sp}\textless}
+The aggregation operator is {}``\textless\hspace{1 sp}\textless\hspace{1 sp}\textless'',
+and its effect is similar to an assignment or a C++ output streaming operation.
+The left operand specifies a scalar or array-index \emph{l-value}, which
+must be declared global. The right operand is a numeric expression. The meaning
+is intuitive: add the given number to the set of numbers to compute their
+statistics. The specific list of statistics to gather is given separately
+by the extraction functions. The following is an example.
+
+\begin{vindent}
+\begin{verbatim}
+a <<< delta_timestamp
+writes[execname()] <<< count
+\end{verbatim}
+\end{vindent}
+
+\subsection{Extraction functions}
+\index{extraction}
+For each instance of a distinct extraction function operating on a given
+identifier, the translator computes a set of statistics. With each execution
+of an extraction function, the aggregation is computed for that moment across
+all processors. The first argument of each function is the same style of
+l-value as used on the left side of the aggregation operation.
+
+
+\subsection{Integer extractors}
+
+The following functions provide methods to extract information about integer
+values.
+
+
+\subsubsection{@count(s)}
+\index{count}
+This statement returns the number of all values accumulated into s.
+
+
+\subsubsection{@sum(s)}
+\index{sum}
+This statement returns the total of all values accumulated into s.
+
+
+\subsubsection{@min(s)}
+\index{min}
+This statement returns the minimum of all values accumulated into s.
+
+
+\subsubsection{@max(s)}
+\index{max}
+This statement returns the maximum of all values accumulated into s.
+
+
+\subsubsection{@avg(s)}
+\index{avg}
+This statement returns the average of all values accumulated into s.
+
+
+\subsection{Histogram extractors}
+\index{histograms}
+The following functions provide methods to extract histogram information.
+Printing a histogram with the print family of functions renders a histogram
+object as a tabular "ASCII art" bar chart.
+
+\subsubsection{@hist\_linear}
+\index{hist\_linear}
+The statement \texttt{@hist\_linear(v,L,H,W)} represents a linear histogram
+\texttt{v}, where \emph{L} and \emph{H} represent the lower and upper end of
+a range of values and \emph{W} represents the width (or size) of each bucket
+within the range.  The low and high values can be negative, but the overall
+difference (high minus low) must be positive. The width parameter must also
+be positive.
+
+In the output, a range of consecutive empty buckets may be replaced with a tilde
+(\textasciitilde{}) character.  This can be controlled on the command line
+with -DHIST\_ELISION=\textless\hspace{1 sp}num\textgreater\hspace{1 sp},
+where \textless\hspace{1 sp}num\textgreater\hspace{1 sp} specifies how many
+empty buckets at the top and bottom of the range to print.
+The default is 2.  A \textless\hspace{1 sp}num\textgreater\hspace{1 sp} of 0
+removes all empty buckets. A negative \textless\hspace{1 sp}num\textgreater\hspace{1 sp}
+turns off bucket removal all together.
+
+For example, if you specify -DHIST\_ELISION=3 and the histogram has 10 
+consecutive empty buckets, the first 3 and last 3 empty buckets will
+be printed and the middle 4 empty buckets will be represented by a
+tilde (\textasciitilde{}).
+
+The following is an example.
+
+\begin{vindent}
+\begin{verbatim}
+global reads
+probe netdev.receive {
+    reads <<< length
+}
+probe end {
+    print(@hist_linear(reads, 0, 10240, 200))
+}
+\end{verbatim}
+\end{vindent}
+This generates the following output.
+
+\pagebreak
+\begin{vindent}
+\begin{verbatim}
+value |-------------------------------------------------- count
+    0 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1650
+  200 |                                                      8
+  400 |                                                      0
+  600 |                                                      0
+      ~
+ 1000 |                                                      0
+ 1200 |                                                      0
+ 1400 |                                                      1
+ 1600 |                                                      0
+ 1800 |                                                      0
+\end{verbatim}
+\end{vindent}
+This shows that 1650 network reads were of a size between 0 and 200 bytes, 
+8 reads were between 200 and 400 bytes, and 1 read was between
+1200 and 1400 bytes.  The tilde (\textasciitilde{}) character indicates 
+buckets 700, 800 and 900 were removed because they were empty.
+Empty buckets at the upper end were also removed.
+
+\subsubsection{@hist\_log}
+\index{hist\_log}
+The statement \texttt{@hist\_log(v)} represents a base-2 logarithmic 
+histogram.  Empty buckets are replaced with a tilde (\textasciitilde{})
+character in the same way as \texttt{@hist\_linear()} (see above).
+
+The following is an example.
+
+\begin{vindent}
+\begin{verbatim}
+global reads
+probe netdev.receive {
+    reads <<< length
+}
+probe end {
+    print(@hist_log(reads))
+}
+\end{verbatim}
+\end{vindent}
+This generates the following output.
+
+\begin{vindent}
+\begin{verbatim}
+value |-------------------------------------------------- count
+    8 |                                                      0
+   16 |                                                      0
+   32 |                                                    254
+   64 |                                                      3
+  128 |                                                      2
+  256 |                                                      2
+  512 |                                                      4
+ 1024 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 16689
+ 2048 |                                                      0
+ 4096 |                                                      0
+\end{verbatim}
+\end{vindent}
+
+\section{Predefined functions\label{sec:Predefined-Functions}}
+
+Unlike built-in functions, predefined functions are implemented in tapsets.
+
+
+\subsection{Output functions}
+
+The following sections describe the functions you can use to output data.
+
+
+\subsubsection{error}
+\index{error}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+error:unknown (msg:string)
+\end{verbatim}
+\end{vindent}
+This function logs the given string to the error stream. It appends an implicit
+end-of-line. It blocks any further execution of statements in this probe.
+If the number of errors exceeds the MAXERRORS parameter, it triggers an \texttt{exit}.
+
+
+\subsubsection{log}
+\index{log}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+log:unknown (msg:string)
+log (const char *fmt, )
+\end{verbatim}
+\end{vindent}
+This function logs data. \texttt{log} sends the message immediately to staprun
+and to the bulk transport (relayfs) if it is being used. If the last character
+given is not a newline, then one is added.
+
+This function is not as efficient as printf and should only be used for urgent
+messages.
+
+\subsubsection{print}
+\index{print}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+print:unknown ()
+\end{verbatim}
+\end{vindent}
+This function prints a single value of any type.
+
+
+\subsubsection{printf}
+\index{printf}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+printf:unknown (fmt:string, )
+\end{verbatim}
+\end{vindent}
+The printf function takes a formatting string as an argument, and a number
+of values of corresponding types, and prints them all. The format must be a
+literal string constant. The printf formatting directives are similar to those
+of C, except that they are fully checked for type by the translator.
+
+The formatting string can contain tags that are defined as follows:
+
+\begin{vindent}
+\begin{verbatim}
+%[flags][width][.precision][length]specifier
+\end{verbatim}
+\end{vindent}
+Where \texttt{specifier} is required and defines the type and the interpretation
+of the value of the corresponding argument. The following table shows the
+details of the specifier parameter:
+
+\begin{table}[H]
+\caption{printf specifier values}
+\begin{tabular}{|>{\raggedright}p{1in}|>{\raggedright}p{3.5in}|>{\raggedright}p{1.25in}|}
+\hline
+\textbf{Specifier}&
+\textbf{Output}&
+\textbf{Example}\tabularnewline
+\hline
+\hline 
+d or i&
+Signed decimal&
+392\tabularnewline
+\hline 
+o&
+Unsigned octal&
+610\tabularnewline
+\hline 
+s&
+String&
+sample\tabularnewline
+\hline 
+u&
+Unsigned decimal&
+7235\tabularnewline
+\hline 
+x&
+Unsigned hexadecimal (lowercase letters)&
+7fa\tabularnewline
+\hline 
+X&
+Unsigned hexadecimal (uppercase letters)&
+7FA\tabularnewline
+\hline 
+p&
+Pointer address&
+0x0000000000bc614e\tabularnewline
+\hline 
+n&
+Writes a binary value that is the total length of the string written by printf.
+The field width specifies the number of bytes to write. Valid specifications
+are \%n, \%1n, \%2n and \%4n. The default is 2.&
+See below\tabularnewline
+\hline 
+b&
+Writes a binary value as text. The field width specifies the number of bytes
+to write. Valid specifications are \%b, \%1b, \%2b, \%4b and \%8b. The default
+width is 4 (32-bits).&
+See below\tabularnewline
+\hline 
+\%&
+A \% followed by another \% character will write \% to stdout.&
+\%\tabularnewline
+\hline
+\end{tabular}
+\end{table}
+The tag can also contain \texttt{flags}, \texttt{width}, \texttt{.precision}
+and \texttt{modifiers} sub-specifiers, which are optional and follow these
+specifications:
+
+\begin{table}[H]
+\caption{printf flag values}
+\begin{tabular}{|>{\raggedright}p{1.5in}|>{\raggedright}p{4.5in}|}
+\hline
+\textbf{Flags}&
+\textbf{Description}\tabularnewline
+\hline
+\hline
+- (minus sign)&
+Left-justify within the given field width. Right justification is the default
+(see \texttt{width} sub-specifier).\tabularnewline
+\hline 
++ (plus sign)&
+Precede the result with a plus or minus sign even for positive numbers. By
+default, only negative numbers are preceded with a minus sign.\tabularnewline
+\hline 
+(space)&
+If no sign is going to be written, a blank space is inserted before the value.\tabularnewline
+\hline 
+\#&
+Used with \texttt{o}, \texttt{x} or \texttt{X} specifiers the value is preceded
+with \texttt{0}, \texttt{0x} or \texttt{0X} respectively for non-zero values.\tabularnewline
+\hline 
+0&
+Left-pads the number with zeroes instead of spaces, where padding is specified
+(see \texttt{width} sub-specifier).\tabularnewline
+\hline
+\end{tabular}
+\end{table}
+
+\begin{table}[H]
+\caption{printf width values}
+\begin{tabular}{|>{\raggedright}p{1.5in}|>{\raggedright}p{4.5in}|}
+\hline
+\textbf{Width}&
+\textbf{Description}\tabularnewline
+\hline
+\hline
+(number)&
+Minimum number of characters to be printed. If the value to be printed is
+shorter than this number, the result is padded with blank spaces. The value
+is not truncated even if the result is larger.\tabularnewline
+\hline
+\end{tabular}
+\end{table}
+
+%
+\begin{table}[H]
+
+\caption{printf precision values}
+
+\begin{tabular}{|>{\raggedright}p{1.5in}|>{\raggedright}p{4.5in}|}
+\hline 
+\textbf{Precision}&
+\textbf{Description}\tabularnewline
+\hline
+\hline 
+.number&
+For integer specifiers (\texttt{d, i, o, u, x, X}): \texttt{precision} specifies
+the minimum number of digits to be written. If the value to be written is
+shorter than this number, the result is padded with leading zeros. The value
+is not truncated even if the result is longer. A precision of 0 means that
+no character is written for the value 0. For s: this is the maximum number
+of characters to be printed. By default all characters are printed until
+the ending null character is encountered. When no \texttt{precision} is specified,
+the default is 1. If the period is specified without an explicit value for
+\texttt{precision}, 0 is assumed.\tabularnewline
+\hline
+\end{tabular}
+\end{table}
+
+\textbf{Binary Write Examples}
+
+The following is an example of using the binary write functions:
+
+\begin{vindent}
+\begin{verbatim}
+probe begin {
+    for (i = 97; i < 110; i++)
+        printf("%3d: %1b%1b%1b\n", i, i, i-32, i-64)
+    exit()
+}
+\end{verbatim}
+\end{vindent}
+This prints:
+
+\begin{vindent}
+\begin{verbatim}
+ 97: aA!
+ 98: bB"
+ 99: cC#
+100: dD$
+101: eE%
+102: fF&
+103: gG'
+104: hH(
+105: iI)
+106: jJ*
+107: kK+
+108: lL,
+109: mM-
+\end{verbatim}
+\end{vindent}
+Another example:
+
+\begin{vindent}
+\begin{verbatim}
+stap -e 'probe begin{printf("%1n%b%b", 0xc0dedbad, \
+0x12345678);exit()}' | hexdump -C
+
+\end{verbatim}
+\end{vindent}
+This prints:
+
+\begin{vindent}
+\begin{verbatim}
+00000000  08 ad db de c0 78 56 34  12                       |.....xV4.|
+00000009
+\end{verbatim}
+\end{vindent}
+Another example:
+
+\begin{vindent}
+\begin{verbatim}
+probe begin{
+    printf("%1b%1b%1blo %1b%1brld\n", 72,101,108,87,111)
+    exit()
+}
+\end{verbatim}
+\end{vindent}
+This prints:
+
+\begin{vindent}
+\begin{verbatim}
+Hello World
+\end{verbatim}
+\end{vindent}
+
+\subsubsection{printd}
+\index{printd}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+printd:unknown (delimiter:string, )
+\end{verbatim}
+\end{vindent}
+This function takes a string delimiter and two or more values of any type, then
+prints the values with the delimiter interposed. The delimiter must be a
+literal string constant.
+
+For example:
+\begin{vindent}
+\begin{verbatim}
+printd("/", "one", "two", "three", 4, 5, 6)
+\end{verbatim}
+\end{vindent}
+prints:
+\begin{vindent}
+\begin{verbatim}
+one/two/three/4/5/6
+\end{verbatim}
+\end{vindent}
+
+\subsubsection{printdln}
+\index{printdln}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+printdln:unknown ()
+\end{verbatim}
+\end{vindent}
+This function operates like \texttt{printd}, but also appends a newline.
+
+\subsubsection{println}
+\index{println}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+println:unknown ()
+\end{verbatim}
+\end{vindent}
+This function operates like \texttt{print}, but also appends a newline.
+
+\subsubsection{sprint}
+\index{sprint}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+sprint:unknown ()
+\end{verbatim}
+\end{vindent}
+This function operates like \texttt{print}, but returns the string rather
+than printing it.
+
+\subsubsection{sprintf}
+\index{sprintf}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+sprintf:unknown (fmt:string, )
+\end{verbatim}
+\end{vindent}
+This function operates like \texttt{printf}, but returns the formatted string
+rather than printing it.
+
+
+\subsubsection{system}
+\index{system}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+system (cmd:string)
+\end{verbatim}
+\end{vindent}
+The system function runs a command on the system. The specified command runs
+in the background once the current probe completes. 
+
+
+\subsubsection{warn}
+\index{warn}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+warn:unknown (msg:string)
+\end{verbatim}
+\end{vindent}
+This function sends a warning message immediately to staprun. It is also
+sent over the bulk transport (relayfs) if it is being used. If the last character
+is not a newline, then one is added.
+
+\subsection{Context at the probe point}
+
+The following functions provide ways to access the current task context 
+at a probe point. Note that these may not return correct values when
+a probe is hit in interrupt context.
+
+\subsubsection{backtrace}
+\index{backtrace}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+backtrace:string ()
+\end{verbatim}
+\end{vindent}
+Returns a string of hex addresses that are a backtrace of the
+stack. The output is truncated to MAXSTRINGLEN.
+
+\subsubsection{caller}
+\index{caller}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+caller:string()
+\end{verbatim}
+\end{vindent}
+Returns the address and name of the calling function. It works
+only for return probes.
+
+\subsubsection{caller\_addr}
+\index{caller\_addr}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+caller_addr:long ()
+\end{verbatim}
+\end{vindent}
+Returns the address of the calling function. It works only
+for return probes.
+
+
+\subsubsection{cpu}
+\index{cpu}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+cpu:long ()
+\end{verbatim}
+\end{vindent}
+Returns the current cpu number.
+
+
+\subsubsection{egid}
+\index{egid}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+egid:long ()
+\end{verbatim}
+\end{vindent}
+Returns the effective group ID of the current process.
+
+
+\subsubsection{euid}
+\index{euid}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+euid:long ()
+\end{verbatim}
+\end{vindent}
+Returns the effective user ID of the current process.
+
+
+\subsubsection{execname}
+\index{execname}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+execname:string ()
+\end{verbatim}
+\end{vindent}
+Returns the name of the current process.
+
+
+\subsubsection{gid}
+\index{gid}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+gid:long ()
+\end{verbatim}
+\end{vindent}
+Returns the group ID of the current process.
+
+
+\subsubsection{is\_return}
+\index{is\_return}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+is_return:long ()
+\end{verbatim}
+\end{vindent}
+Returns 1 if the probe point is a return probe, else it returns
+zero.
+
+\noun{Deprecated}.
+
+
+\subsubsection{pexecname}
+\index{pexecname}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+pexecname:string ()
+\end{verbatim}
+\end{vindent}
+Returns the name of the parent process.
+
+
+\subsubsection{pid}
+\index{pid}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+pid:long ()
+\end{verbatim}
+\end{vindent}
+Returns the process ID of the current process.
+
+
+\subsubsection{ppid}
+\index{ppid}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+ppid:long ()
+\end{verbatim}
+\end{vindent}
+Returns the process ID of the parent process.
+
+
+\subsubsection{tid}
+\index{tid}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+tid:long ()
+\end{verbatim}
+\end{vindent}
+Returns the ID of the current thread.
+
+
+\subsubsection{uid}
+\index{uid}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+uid:long ()
+\end{verbatim}
+\end{vindent}
+Returns the user ID of the current task.
+
+
+\subsubsection{print\_backtrace}
+\index{print\_backtrace}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+print_backtrace:unknown ()
+\end{verbatim}
+\end{vindent}
+This function is equivalent to \texttt{print\_stack(backtrace())}, except
+that deeper stack nesting is supported. The function does not return a value.
+
+
+\subsubsection{print\_regs}
+\index{print\_regs}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+print_regs:unknown ()
+\end{verbatim}
+\end{vindent}
+This function prints a register dump.
+
+
+\subsubsection{print\_stack}
+\index{print\_stack}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+print_stack:unknown (stk:string)
+\end{verbatim}
+\end{vindent}
+This function performs a symbolic lookup of the addresses in the given string,
+which is assumed to be the result of a prior call to \texttt{backtrace()}.
+It prints one line per address. Each printed line includes the address, the
+name of the function containing the address, and an estimate of its position
+within that function. The function does not return a value.
+
+
+\subsubsection{stack\_size}
+\index{stack\_size}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+stack_size:long ()
+\end{verbatim}
+\end{vindent}
+Returns the size of the stack.
+
+
+\subsubsection{stack\_unused}
+\index{stack\_unused}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+stack_unused:long ()
+\end{verbatim}
+\end{vindent}
+Returns how many bytes are currently unused in the stack.
+
+
+\subsubsection{stack\_used}
+\index{stack\_used}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+stack_used:long ()
+\end{verbatim}
+\end{vindent}
+Returns how many bytes are currently used in the stack.
+
+
+\subsubsection{stp\_pid}
+\index{stp\_pid}
+\begin{vindent}
+\begin{verbatim}
+stp_pid:long ()
+\end{verbatim}
+\end{vindent}
+Returns the process ID of the of the staprun process.
+
+
+\subsubsection{target}
+\index{target}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+target:long ()
+\end{verbatim}
+\end{vindent}
+Returns the process ID of the target process. This is useful
+in conjunction with the -x PID or -c CMD command-line options to stap. An
+example of its use is to create scripts that filter on a specific process.
+
+\begin{verbatim}
+-x <pid>
+\end{verbatim}
+target() returns the pid specified by -x
+
+\begin{verbatim}
+-c <command>
+\end{verbatim}
+target() returns the pid for the executed command specified
+by -c.
+
+\subsection{Task data}
+
+These functions return data about a task.  They all require a task handle as
+input, such as the  value return by task\_current() or the variables
+prev\_task and next\_task in the scheduler.ctxswitch probe alias.
+
+\subsubsection{task\_cpu}
+\index{task\_cpu}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+task_cpu:long (task:long)
+\end{verbatim}
+\end{vindent}
+Returns the scheduled cpu for the given task.
+
+
+\subsubsection{task\_current}
+\index{task\_current}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+task_current:long ()
+\end{verbatim}
+\end{vindent}
+Returns the address of the task\_struct representing
+the current process. This address can be passed to the various task\_{*}()
+functions to extract more task-specific data.
+
+
+\subsubsection{task\_egid}
+\index{task\_egid}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+task_egid:long (task:long)
+\end{verbatim}
+\end{vindent}
+Returns the effective group ID of the given task.
+
+
+\subsubsection{task\_execname}
+\index{task\_execname}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+task_execname:string (task:long)
+\end{verbatim}
+\end{vindent}
+Returns the name of the given task.
+
+
+\subsubsection{task\_euid}
+\index{task\_euid}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+task_euid:long (task:long)
+\end{verbatim}
+\end{vindent}
+Returns the effective user ID of the given task.
+
+
+\subsubsection{task\_gid}
+\index{task\_gid}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+task_gid:long (task:long)
+\end{verbatim}
+\end{vindent}
+Returns the group ID of the given task.
+
+
+\subsubsection{task\_nice}
+\index{task\_nice}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+task_nice:long (task:long)
+\end{verbatim}
+\end{vindent}
+Returns the nice value of the given task.
+
+
+\subsubsection{task\_parent}
+\index{task\_parent}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+task_parent:long (task:long)
+\end{verbatim}
+\end{vindent}
+Returns the address of the parent task\_struct of the given
+task. This address can be passed to the various task\_{*}() functions to
+extract more task-specific data.
+
+
+\subsubsection{task\_pid}
+\index{task\_pid}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+task_pid:long (task:long)
+\end{verbatim}
+\end{vindent}
+Returns the process ID of the given task.
+
+
+\subsubsection{task\_prio}
+\index{task\_prio}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+task_prio:long (task:long)
+\end{verbatim}
+\end{vindent}
+Returns the priority value of the given task.
+
+
+\subsubsection{task\_state}
+\index{task\_state}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+task_state:long (task:long)
+\end{verbatim}
+\end{vindent}
+Returns the state of the given task. Possible states are:
+
+\begin{vindent}
+\begin{verbatim}
+TASK_RUNNING           0
+TASK_INTERRUPTIBLE     1
+TASK_UNINTERRUPTIBLE   2
+TASK_STOPPED           4
+TASK_TRACED            8
+EXIT_ZOMBIE           16
+EXIT_DEAD             32
+\end{verbatim}
+\end{vindent}
+
+\subsubsection{task\_tid}
+\index{task\_tid}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+task_tid:long (task:long)
+\end{verbatim}
+\end{vindent}
+Returns the thread ID of the given task.
+
+
+\subsubsection{task\_uid}
+\index{task\_uid}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+task_uid:long (task:long)
+\end{verbatim}
+\end{vindent}
+Returns the user ID of the given task.
+
+
+\subsubsection{task\_open\_file\_handles}
+\index{task\_open\_file\_handles}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+task_open_file_handles:long(task:long)
+\end{verbatim}
+\end{vindent}
+Returns the number of open file handles for the given task.
+
+
+\subsubsection{task\_max\_file\_handles}
+\index{task\_max\_file\_handles}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+task_max_file_handles:long(task:long)
+\end{verbatim}
+\end{vindent}
+Returns the maximum number of file handles for the given task.
+
+
+\subsection{Accessing string data at a probe point}
+
+The following functions provide methods to access string data at a probe
+point.
+
+
+\subsubsection{kernel\_string}
+\index{kernel\_string}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+kernel_string:string (addr:long)
+\end{verbatim}
+\end{vindent}
+Copies a string from kernel space at a given address. The validation of this
+address is only partial.
+
+
+\subsubsection{user\_string\label{sub:user_string}}
+\index{user\_string}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+user_string:string (addr:long)
+\end{verbatim}
+\end{vindent}
+This function copies a string from user space at a given address. The validation
+of this address is only partial. In rare cases when userspace data is not
+accessible, this function returns the string \texttt{<unknown>.}
+
+
+\subsubsection{user\_string2}
+\index{user\_string2}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+user_string2:string (addr:long, err_msg:string)
+\end{verbatim}
+\end{vindent}
+This function is similar to \texttt{user\_string}, (Section~\ref{sub:user_string})
+but allows passing an error message as an argument to be returned if userspace
+data is not available.
+
+
+\subsubsection{user\_string\_warn}
+\index{user\_string\_warn}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+user_string_warn:string (addr:long)
+\end{verbatim}
+\end{vindent}
+This function copies a string from userspace at given address. It prints
+a verbose error message on failure.
+
+
+\subsubsection{user\_string\_quoted}
+\index{user\_string\_quoted}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+user_string_quoted:string (addr:long)
+\end{verbatim}
+\end{vindent}
+This function copies a string from userspace at given address. Any ASCII
+characters that are not printable are replaced by the corresponding escape
+sequence in the returned string. 
+
+
+\subsection{Initializing queue statistics}
+\index{queue statistics}
+The queue\_stats tapset provides functions that, when given notification
+of queuing events like wait, run, or done, track averages such as queue length,
+service and wait times, and utilization. Call the following three functions
+from appropriate probes, in sequence.
+
+
+\subsubsection{qs\_wait}
+\index{qs\_wait}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+qs_wait:unknown (qname:string)
+\end{verbatim}
+\end{vindent}
+This function records that a new request was enqueued for the given queue
+name.
+
+
+\subsubsection{qs\_run}
+\index{qs\_run}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+qs_run:unknown (qname:string)
+\end{verbatim}
+\end{vindent}
+This function records that a previously enqueued request was removed from
+the given wait queue and is now being serviced.
+
+
+\subsubsection{qs\_done}
+\index{qs\_done}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+qs_done:unknown (qname:string)
+\end{verbatim}
+\end{vindent}
+This function records that a request originally from the given queue has
+completed being serviced.
+
+
+\subsection{Using queue statistics}
+
+Functions with the qsq\_ prefix query the statistics averaged since the first
+queue operation or when qsq\_start was called. Since statistics are often
+fractional, a scale parameter multiplies the result to a more useful scale.
+For some fractions, a scale of 100 returns percentage numbers.
+
+
+\subsubsection{qsq\_blocked}
+\index{qsq\_blocked}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+qsq_blocked:long (qname:string, scale:long)
+\end{verbatim}
+\end{vindent}
+This function returns the fraction of elapsed time during which one or more
+requests were on the wait queue.
+
+
+\subsubsection{qsq\_print}
+\index{qsq\_print}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+qsq_print:unknown (qname:string)
+\end{verbatim}
+\end{vindent}
+This function prints a line containing the following statistics for the given
+queue: 
+
+\begin{itemize}
+\item queue name
+\item average rate of requests per second
+\item average wait queue length
+\item average time on the wait queue
+\item average time to service a request
+\item percentage of time the wait queue was used
+\item percentage of time any request was being serviced
+\end{itemize}
+
+\subsubsection{qsq\_service\_time}
+\index{qsq\_service\_time}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+qsq_service_time:long (qname:string, scale:long)
+\end{verbatim}
+\end{vindent}
+This function returns the average time in microseconds required to service
+a request once it is removed from the wait queue.
+
+
+\subsubsection{qsq\_start}
+\index{qsq\_start}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+qsq_start:unknown (qname:string)
+\end{verbatim}
+\end{vindent}
+This function resets the statistics counters for the given queue, and restarts
+tracking from the moment the function was called. This command is used to
+create a queue.
+
+
+\subsubsection{qsq\_throughput}
+\index{qsq\_throughput}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+qsq_throughput:long (qname:string, scale:long)
+\end{verbatim}
+\end{vindent}
+This function returns the average number of requests served per microsecond.
+
+
+\subsubsection{qsq\_utilization}
+\index{qsq\_utilization}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+qsq_utilization:long (qname:string, scale:long)
+\end{verbatim}
+\end{vindent}
+This function returns the average time in microseconds that at least one
+request was being serviced.
+
+
+\subsubsection{qsq\_wait\_queue\_length}
+\index{qsq wait\_queue\_length}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+qsq_wait_queue_length:long (qname:string, scale:long)
+\end{verbatim}
+\end{vindent}
+This function returns the average length of the wait queue.
+
+
+\subsubsection{qsq\_wait\_time}
+\index{qsq\_wait\_time}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+qsq_wait_time:long (qname:string, scale:long)
+\end{verbatim}
+\end{vindent}
+This function returns the average time in microseconds that it took for a
+request to be serviced (qs\_wait() to qs\_done()).
+
+
+\subsubsection{A queue example}
+
+What follows is an example from src/testsuite/systemtap.samples/queue\_demo.stp.
+It uses the randomize feature of the timer probe to simulate queuing activity.
+
+\begin{vindent}
+\begin{verbatim}
+probe begin {
+    qsq_start ("block-read")
+    qsq_start ("block-write")
+}
+
+probe timer.ms(3500), end {
+    qsq_print ("block-read")
+    qsq_start ("block-read")
+    qsq_print ("block-write")
+    qsq_start ("block-write")
+}
+
+probe timer.ms(10000) {
+    exit ()
+}
+
+# synthesize queue work/service using three randomized "threads" for each queue.
+global tc
+
+function qs_doit (thread, name) {
+    n = tc[thread] = (tc[thread]+1) % 3 # per-thread state counter
+    if (n==1) qs_wait (name)
+    else if (n==2) qs_run (name)
+    else if (n==0) qs_done (name)
+}
+
+probe timer.ms(100).randomize(100) { qs_doit (0, "block-read") }
+probe timer.ms(100).randomize(100) { qs_doit (1, "block-read") }
+probe timer.ms(100).randomize(100) { qs_doit (2, "block-read") }
+probe timer.ms(100).randomize(100) { qs_doit (3, "block-write") }
+probe timer.ms(100).randomize(100) { qs_doit (4, "block-write") }
+probe timer.ms(100).randomize(100) { qs_doit (5, "block-write") }
+\end{verbatim}
+\end{vindent}
+This prints:
+
+\begin{vindent}
+\begin{verbatim}
+block-read: 9 ops/s, 1.090 qlen, 215749 await, 96382 svctm, 69% wait, 64% util
+block-write: 9 ops/s, 0.992 qlen, 208485 await, 103150 svctm, 69% wait, 61% util
+block-read: 9 ops/s, 0.968 qlen, 197411 await, 97762 svctm, 63% wait, 63% util
+block-write: 8 ops/s, 0.930 qlen, 202414 await, 93870 svctm, 60% wait, 56% util
+block-read: 8 ops/s, 0.774 qlen, 192957 await, 99995 svctm, 58% wait, 62% util
+block-write: 9 ops/s, 0.861 qlen, 193857 await, 101573 svctm, 56% wait, 64% util
+\end{verbatim}
+\end{vindent}
+
+\subsection{Probe point identification}
+
+The following functions help you identify probe points.
+
+
+\subsubsection{pp}
+\index{pp}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+pp:string ()
+\end{verbatim}
+\end{vindent}
+This function returns the probe point associated with a currently running
+probe handler, including alias and wild-card expansion effects.
+
+
+\subsubsection{probefunc}
+\index{probefunc}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+probefunc:string ()
+\end{verbatim}
+\end{vindent}
+This function returns the name of the function being probed.
+
+
+\subsubsection{probemod}
+\index{probefunc}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+probemod:string ()
+\end{verbatim}
+\end{vindent}
+This function returns the name of the module containing the probe point.
+
+
+\subsection{Formatting functions}
+\index{formatting}
+The following functions help you format output.
+
+
+\subsubsection{ctime}
+\index{ctime}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+ctime:string(epochsecs:long)
+\end{verbatim}
+\end{vindent}
+This function accepts an argument of seconds since the epoch as returned
+by \texttt{gettimeofday\_s()}. It returns a date string in UTC of the form:
+
+\begin{vindent}
+\begin{verbatim}
+"Wed Jun 30 21:49:008 2006"
+\end{verbatim}
+\end{vindent}
+This function does not adjust for timezones. The returned time is always
+in GMT. Your script must manually adjust epochsecs before passing it to ctime()
+if you want to print local time.
+
+
+\subsubsection{errno\_str}
+\index{errno\_str}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+errno_str:string (err:long)
+\end{verbatim}
+\end{vindent}
+This function returns the symbolic string associated with the given error
+code, such as ENOENT for the number 2, or E\#3333 for an out-of-range value
+such as 3333.
+
+
+\subsubsection{returnstr}
+\index{returnstr}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+returnstr:string (returnp:long)
+\end{verbatim}
+\end{vindent}
+This function is used by the syscall tapset, and returns a string. Set \texttt{}returnp
+equal to 1 for decimal, or 2 for hex.
+
+
+\subsubsection{thread\_indent}
+\index{thread\_indent}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+thread_indent:string (delta:long)
+\end{verbatim}
+\end{vindent}
+This function returns a string with appropriate indentation for a thread.
+Call it with a small positive or matching negative delta. If this is the
+outermost, initial level of indentation, then the function resets the relative
+timestamp base to zero.
+
+The following example uses thread\_indent() to trace the functions called
+in the drivers/usb/core kernel source. It prints a relative timestamp and
+the name and ID of the current process, followed by the appropriate indent
+and the function name. Note that \char`\"{}swapper(0)\char`\"{} indicates
+the kernel is running in interrupt context and there is no valid current
+process.
+
+\begin{vindent}
+\begin{verbatim}
+probe kernel.function("*@drivers/usb/core/*") {
+    printf ("%s -> %s\n", thread_indent(1), probefunc())
+}
+probe kernel.function("*@drivers/usb/core/*").return {
+    printf ("%s <- %s\n", thread_indent(-1), probefunc())
+}
+\end{verbatim}
+\end{vindent}
+This prints:
+
+\begin{vindent}
+\begin{verbatim}
+ 0 swapper(0): -> usb_hcd_irq
+ 8 swapper(0): <- usb_hcd_irq
+ 0 swapper(0): -> usb_hcd_irq
+10 swapper(0):  -> usb_hcd_giveback_urb
+16 swapper(0):   -> urb_unlink
+22 swapper(0):   <- urb_unlink
+29 swapper(0):   -> usb_free_urb
+35 swapper(0):   <- usb_free_urb
+39 swapper(0):  <- usb_hcd_giveback_urb
+45 swapper(0): <- usb_hcd_irq
+ 0 usb-storage(1338): -> usb_submit_urb
+ 6 usb-storage(1338):  -> usb_hcd_submit_urb
+12 usb-storage(1338):   -> usb_get_urb
+18 usb-storage(1338):   <- usb_get_urb
+25 usb-storage(1338):  <- usb_hcd_submit_urb
+29 usb-storage(1338): <- usb_submit_urb
+ 0 swapper(0): -> usb_hcd_irq
+ 7 swapper(0): <- usb_hcd_irq
+\end{verbatim}
+\end{vindent}
+
+\subsubsection{thread\_timestamp}
+\index{thread\_timestamp}
+
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+thread_timestamp:long ()
+\end{verbatim}
+\end{vindent}
+This function returns an absolute timestamp value for use by the indentation
+function. The default function uses \texttt{gettimeofday\_us.}
+
+
+\subsection{String functions}
+\index{string}
+The following are string functions you can use.
+
+
+\subsubsection{isinstr}
+\index{isinstr}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+isinstr:long (s1:string, s2:string)
+\end{verbatim}
+\end{vindent}
+This function returns 1 if string s1 contains string s2, otherwise zero.
+
+
+\subsubsection{strlen}
+\index{strlen}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+strlen:long (str:string)
+\end{verbatim}
+\end{vindent}
+This function returns the number of characters in str.
+
+
+\subsubsection{strtol}
+
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+strtol:long (str:string, base:long)
+\end{verbatim}
+\end{vindent}
+This function converts the string representation of a number to an integer.
+The base parameter indicates the number base to assume for the string (e.g.
+16 for hex, 8 for octal, 2 for binary).
+
+
+\subsubsection{substr}
+\index{substr}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+substr:string (str:string, start:long, stop:long)
+\end{verbatim}
+\end{vindent}
+This function returns the substring of \texttt{str} starting from character
+position \texttt{start} and ending at character position \texttt{stop}.
+
+
+\subsubsection{text\_str}
+\index{text\_str}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+text_str:string (input:string)
+\end{verbatim}
+\end{vindent}
+This function accepts a string argument. Any ASCII characters in the string
+that are not printable are replaced by a corresponding escape sequence in
+the returned string.
+
+
+\subsubsection{text\_strn}
+\index{text\_strn}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+text_strn:string (input:string, len:long, quoted:long)
+\end{verbatim}
+\end{vindent}
+This function accepts a string of length \texttt{len}. Any ASCII characters
+that are not printable are replaced by a corresponding escape sequence in
+the returned string. If \texttt{quoted} is not null, the function adds a
+backslash character to the output.
+
+
+\subsubsection{tokenize}
+
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+tokenize:string (input:string, delim:string)
+\end{verbatim}
+\end{vindent}
+This function returns the next token in the given input string, where 
+the tokens are delimited by one of the characters in the delim string.
+If the input string is non-NULL, it returns the first token. If the input string
+is NULL, it returns the next token in the string passed in the previous call
+to tokenize. If no delimiter is found, the entire remaining input string
+is returned.  It returns NULL when no more tokens are available.
+
+
+\subsection{Timestamps}
+\index{timestamps}
+The following functions provide methods to extract time data.
+
+
+\subsubsection{get\_cycles}
+\index{get\_cycles}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+get_cycles:long ()
+\end{verbatim}
+\end{vindent}
+This function returns the processor cycle counter value if available, else
+it returns zero.
+
+
+\subsubsection{gettimeofday\_ms}
+\index{gettimeofday\_ms}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+gettimeofday_ms:long ()
+\end{verbatim}
+\end{vindent}
+This function returns the number of milliseconds since the UNIX epoch.
+
+
+\subsubsection{gettimeofday\_ns}
+\index{gettimeofday\_ns}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+gettimeofday_ns:long ()
+\end{verbatim}
+\end{vindent}
+This function returns the number of nanoseconds since the UNIX epoch.
+
+
+\subsubsection{gettimeofday\_s}
+\index{gettimeofday\_ s}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+gettimeofday_s:long ()
+\end{verbatim}
+\end{vindent}
+This function returns the number of seconds since the UNIX epoch.
+
+
+\subsubsection{gettimeofday\_us}
+\index{gettimeofday\_us}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+gettimeofday_us:long ()
+\end{verbatim}
+\end{vindent}
+This function returns the number of microseconds since the UNIX epoch.
+
+
+\subsection{Miscellaneous tapset functions}
+
+The following are miscellaneous functions.
+
+
+\subsubsection{addr\_to\_node}
+\index{addr\_to\_node}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+addr_to_node:long (addr:long)
+\end{verbatim}
+\end{vindent}
+This function accepts an address, and returns the node that the given address
+belongs to in a NUMA system.
+
+
+\subsubsection{exit}
+\index{exit}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+exit:unknown ()
+\end{verbatim}
+\end{vindent}
+This function enqueues a request to shut down the SystemTap session. It does
+not unwind the current probe handler, nor block new probe handlers. The stap
+daemon will respond to the request and initiate an ordered shutdown.
+
+
+\subsubsection{system}
+\index{system}
+General syntax:
+
+\begin{vindent}
+\begin{verbatim}
+system (cmd:string)
+\end{verbatim}
+\end{vindent}
+This function runs a command on the system. The command will run in the background
+when the current probe completes.
+
+
+\section{For Further Reference\label{sec:For-Further-Reference}}
+
+For more information, see:
+\begin{itemize}
+\item The SystemTap tutorial at \url{http://sourceware.org/systemtap/tutorial/}
+\item The SystemTap wiki at \url{http://sourceware.org/systemtap/wiki}
+\item The SystemTap documentation page at \url{http://sourceware.org/systemtap/documentation.html}
+\item From an unpacked source tarball or CVS directory, the examples in in the
+src/examples directory, the tapsets in the src/tapset directory, and the
+test scripts in the src/testsuite directory.
+\item The man pages for tapsets. For a list, run the command \texttt{{}``man -k
+stapprobes}''.
+\end {itemize}
+
+\setcounter{secnumdepth}{0}
+\newpage{}
+\addcontentsline{toc}{section}{Index}
+\printindex{}
+\end{document}
diff --git a/doc/tutorial.tex b/doc/tutorial.tex
new file mode 100644
index 00000000..d465bf0b
--- /dev/null
+++ b/doc/tutorial.tex
@@ -0,0 +1,1210 @@
+% Copyright (C) 2005-2007 Red Hat Inc.
+% This file is part of systemtap, and is free software.  You can
+% redistribute it and/or modify it under the terms of the GNU General
+% Public License (GPL); either version 2, or (at your option) any
+% later version.
+
+\documentclass{article}
+\usepackage{html}
+\usepackage{graphicx}
+% \usepackage{moreverb}
+\usepackage{fancyvrb}
+\usepackage{listings}
+\usepackage{fullpage}
+\usepackage{fancybox}
+\usepackage[compatible]{nomencl}
+% \usepackage{geometry}
+% \geometry{letterpaper,text={7in,8.5in}}
+\usepackage{charter}
+
+\newenvironment{boxedminipage}%% Boxed minipage
+    {\begin{makeimage}\begin{center}\begin{Sbox}\begin{minipage}}%
+    {\end{minipage}\end{Sbox}\fbox{\TheSbox}\end{center}\end{makeimage}}
+
+\begin{htmlonly}
+\renewcommand{\nomenclature}[2]{}
+\end{htmlonly}
+
+% \usepackage{draftcopy} % ugly
+\bibliographystyle{plain}
+\makeglossary
+\parindent0.0cm
+\parskip0.2cm
+
+\begin{document}
+
+\begin{center}
+\LARGE {\bf Systemtap tutorial}
+\end{center}
+
+\hfill \begin{minipage}{2.5in}
+% contributors please add your names to the list
+Frank Ch. Eigler {\tt \small <fche@redhat.com>} \\
+
+\hfill \today
+\end{minipage}
+
+\tableofcontents
+
+\section{Introduction}
+
+Systemtap is a tool that allows developers and administrators to write
+and reuse simple scripts to deeply examine the activities of a live
+Linux system.  Data may be extracted, filtered, and summarized quickly
+and safely, to enable diagnoses of complex performance or functional
+problems.
+
+\nomenclature{script}{A simple programming language understood by systemtap.}
+
+The essential idea behind a systemtap script is to name {\em events},
+and to give them {\em handlers}.  Whenever a specified event occurs,
+the Linux kernel runs the handler as if it were a quick subroutine,
+then resumes.  There are several kind of events, such as entering or
+exiting a function, a timer expiring, or the entire systemtap session
+starting or stopping.  A handler is a series of script language
+statements that specify the work to be done whenever the event occurs.
+This work normally includes extracting data from the event context,
+storing them into internal variables, or printing results.
+
+\nomenclature{event}{An identifiable instant in the operating system's
+execution state, such as entry to a function, or expiry of a timer.}
+\nomenclature{session}{A complete run of a systemtap script program.}
+\nomenclature{handler}{A series of statements, written in script, which
+is to be performed whenever an event occurs.}
+\nomenclature{\tt .stp}{The standard file name extension for systemtap
+scripts.}
+
+Systemtap works by translating the script to C, running the system C
+compiler to create a kernel module from that.  When the module is
+loaded, it activates all the probed events by hooking into the kernel.
+Then, as events occur on any processor, the compiled handlers run.
+Eventually, the session stops, the hooks are disconnected, and the
+module removed.  This entire process is driven from a single
+command-line program, \verb+stap+.
+
+\begin{figure}[h!]
+\begin{boxedminipage}{4.5in}
+\begin{verbatim}
+# cat hello-world.stp
+probe begin 
+{
+  print ("hello world\n")
+  exit ()
+}
+
+# stap hello-world.stp
+hello world
+\end{verbatim}
+\end{boxedminipage}
+\label{fig:hello-world}
+\caption{A systemtap smoke test.}
+\end{figure}
+
+This paper assumes that you have installed systemtap and its
+prerequisite kernel development tools and debugging data, so that you
+can run the scripts such as the simple one in
+Figure~\ref{fig:hello-world}.  Log on as \verb+root+, or even better,
+as a user authorized to \verb+sudo+, before running systemtap.
+
+\begin{figure}[h]
+\begin{boxedminipage}{4.5in}
+\begin{verbatim}
+# cat strace-open.stp
+probe syscall.open
+{
+  printf ("%s(%d) open (%s)\n", execname(), pid(), argstr) 
+}
+probe timer.ms(4000) # after 4 seconds
+{
+  exit ()
+}
+
+# stap strace-open.stp
+vmware-guestd(2206) open ("/etc/redhat-release", O_RDONLY)
+hald(2360) open ("/dev/hdc", O_RDONLY|O_EXCL|O_NONBLOCK)
+hald(2360) open ("/dev/hdc", O_RDONLY|O_EXCL|O_NONBLOCK)
+hald(2360) open ("/dev/hdc", O_RDONLY|O_EXCL|O_NONBLOCK)
+df(3433) open ("/etc/ld.so.cache", O_RDONLY)
+df(3433) open ("/lib/tls/libc.so.6", O_RDONLY)
+df(3433) open ("/etc/mtab", O_RDONLY)
+hald(2360) open ("/dev/hdc", O_RDONLY|O_EXCL|O_NONBLOCK)
+\end{verbatim}
+\end{boxedminipage}
+\label{fig:strace-open}
+\caption{A taste of systemtap: a system-wide {\tt strace}, just for
+the {\tt open} system call.}
+\end{figure}
+\nomenclature{strace}{A standard ptrace-based command line tool to trace system call activity of a process.}
+
+\section{Tracing}
+
+The simplest kind of probe is simply to {\em trace} an event.
+\nomenclature{trace}{A compact textual record of an event occurrence.}
+This is the effect of inserting strategically located \verb+print+
+statements into a program.  This is often the first step of problem
+solving: explore by seeing a history of what has happened.
+
+This style of instrumentation is the simplest.  It just asks systemtap
+to print something at each event.  To express this in the script
+language, you need to say where to probe and what to print there.
+
+\subsection{Where to probe}
+
+Systemtap supports a number of built-in events.  The library of
+scripts that comes with systemtap, each called a ``tapset'', may
+define additional ones defined in terms of the built-in family.  See
+the \verb+stapprobes+ man page for details.  \nomenclature{tapset}{A
+reusable script forming part of the automatically searched tapset
+library.}  All these events are named using a unified syntax that
+looks like dot-separated parameterized identifiers:
+
+\begin{tabular}{rl}
+\verb+begin+ & The startup of the systemtap session. \\
+\verb+end+ & The end of the systemtap session. \\
+\verb+kernel.function("sys_open")+ & The entry to the function named
+\verb+sys_open+ in the kernel. \\
+\verb+syscall.close.return+ & The return from the \verb+close+ system
+call. \\
+\verb+module("ext3").statement(0xdeadbeef)+ & The addressed instruction
+in the \verb+ext3+ filesystem driver. \\
+\verb+timer.ms(200)+ & A timer that fires every 200 milliseconds. \\
+\end{tabular}
+
+Let's say that you would like to trace all function entries and exits
+in a source file, say \verb+net/socket.c+ in the kernel.  The
+\verb+kernel.function+ probe point lets you express that easily, since
+systemtap examines the kernel's debugging information to relate object
+code to source code.  It works like a debugger: if you can name or
+place it, you can probe it.  Use
+\verb+kernel.function("*@net/socket.c")+ for the function entries, and
+\verb+kernel.function("*@net/socket.c").return+ for the exits.  Note
+the use of wildcards in the function name part, and the subsequent
+\verb+@FILENAME+ part.  You can also put wildcards into the file name,
+and even add a colon (\verb+:+) and a line number, if you want to
+restrict the search that precisely.  Since systemtap will put a
+separate probe in every place that matches a probe point, a few
+wildcards can expand to hundreds or thousands of probes, so be careful
+what you ask for.  \nomenclature{debug information}{Data created by the
+compiler when the kernel or application was built, sometimes packaged into
+{\tt debuginfo} files, for use by a symbolic debugger.}
+\nomenclature{wildcard}{Presence of \verb+*+ globbing patterns in probe points.}
+
+Once you identify the probe points, the skeleton of the systemtap
+script appears.  The \verb+probe+ keyword introduces a probe point, or
+a comma-separated list of them.  The following \verb+{+ and \verb+}+
+braces enclose the handler for all listed probe points.
+\begin{verbatim}
+probe kernel.function("*@net/socket.c") { }
+probe kernel.function("*@net/socket.c").return { }
+\end{verbatim}
+You can run this script as is, though with empty handlers there will
+be no output.  Put the two lines into a new file.  Run
+\verb+stap -v FILE+.  Terminate it any time with \verb+^C+.  (The
+\verb+-v+ option tells systemtap to print more verbose messages during
+its processing.  Try the \verb+-h+ option to see more options.)
+
+\subsection{What to print}
+
+Since you are interested in each function that was entered and exited,
+a line should be printed for each, containing the function name.  In
+order to make that list easy to read, systemtap should indent the
+lines so that functions called by other traced functions are nested
+deeper.  To tell each single process apart from any others that may be
+running concurrently, systemtap should also print the process ID in
+the line.
+
+Systemtap provides a variety of such contextual data, ready for
+formatting.  They usually appear as function calls within the handler,
+like you already saw in Figure~\ref{fig:strace-open}.  See the
+\verb+stapfuncs+ man page for those functions and more defined in the
+tapset library, but here's a sampling:
+
+\begin{tabular}{rl}
+\verb+tid()+ & The id of the current thread. \\
+\verb+pid()+ & The process (task group) id of the current thread. \\
+\verb+uid()+ & The id of the current user. \\
+\verb+execname()+ & The name of the current process. \\
+\verb+cpu()+ & The current cpu number. \\
+\verb+gettimeofday_s()+ & Number of seconds since epoch. \\
+\verb+get_cycles()+ & Snapshot of hardware cycle counter. \\
+\verb+pp()+ & A string describing the probe point being currently handled. \\
+\verb+probefunc()+ & If known, the name of the function in which
+                     this probe was placed. \\
+\end{tabular}
+
+The values returned may be strings or numbers.  The \verb+print()+
+built-in function accepts either as its sole argument.  Or, you can
+use the C-style \verb+printf()+ built-in, whose formatting argument
+may include \verb+%s+ for a string, \verb+%d+ for a number.
+\verb+printf+ and other functions take comma-separated arguments.
+Don't forget a \verb+"\n"+ at the end.
+
+A particularly handy function in the tapset library is
+\verb+thread_indent+.  Given an indentation delta parameter, it stores
+internally an indentation counter for each thread (\verb+tid()+), and
+returns a string with some generic trace data plus an appropriate
+number of indentation spaces.  That generic data includes a timestamp
+(number of microseconds since the most recent initial indentation), a
+process name and the thread id itself.  It therefore gives an idea not
+only about what functions were called, but who called them, and how
+long they took.  Figure~\ref{fig:socket-trace} shows the finished
+script.  It lacks a call to the \verb+exit()+ function, so you need to
+interrupt it with \verb+^C+ when you want the tracing to stop.
+
+\begin{figure}[h!]
+\begin{boxedminipage}{4.5in}
+\begin{verbatim}
+# cat socket-trace.stp
+probe kernel.function("*@net/socket.c") {
+  printf ("%s -> %s\n", thread_indent(1), probefunc())
+}
+probe kernel.function("*@net/socket.c").return {
+  printf ("%s <- %s\n", thread_indent(-1), probefunc())
+}
+
+# stap socket-trace.stp
+     0 hald(2632): -> sock_poll
+    28 hald(2632): <- sock_poll
+[...]
+     0 ftp(7223): -> sys_socketcall
+  1159 ftp(7223):  -> sys_socket
+  2173 ftp(7223):   -> __sock_create
+  2286 ftp(7223):    -> sock_alloc_inode
+  2737 ftp(7223):    <- sock_alloc_inode
+  3349 ftp(7223):    -> sock_alloc
+  3389 ftp(7223):    <- sock_alloc
+  3417 ftp(7223):   <- __sock_create
+  4117 ftp(7223):   -> sock_create
+  4160 ftp(7223):   <- sock_create
+  4301 ftp(7223):   -> sock_map_fd
+  4644 ftp(7223):    -> sock_map_file
+  4699 ftp(7223):    <- sock_map_file
+  4715 ftp(7223):   <- sock_map_fd
+  4732 ftp(7223):  <- sys_socket
+  4775 ftp(7223): <- sys_socketcall
+[...]
+\end{verbatim}
+\end{boxedminipage}
+\caption{Tracing and timing functions in {\tt net/sockets.c}.}
+\label{fig:socket-trace}
+\end{figure}
+
+\subsection{Exercises}
+
+\begin{enumerate}
+\item Use the \verb+-p2+ option to systemtap to list all the kernel
+functions named with the word ``nit'' in them.  The probe handlers
+might as well be empty.
+
+\item Trace some system calls (use \verb+syscall.NAME+ and \verb+.return+
+probe points), with the same \verb+thread_indent+ probe handler as in
+Figure~\ref{fig:socket-trace}.  Interpret the results.
+
+\end{enumerate}
+
+\section{Analysis}
+
+Pages of generic tracing text may give you enough information for
+exploring a system.  With systemtap, it is possible to analyze that
+data, to filter, aggregate, transform, and summarize it.  Different
+probes can work together to share data.  Probe handlers can use a rich
+set of control constructs to describe algorithms, with a syntax taken
+roughly from \verb+awk+.  With these tools, systemtap scripts can
+focus on a specific question and provide a compact response: no
+\verb+grep+ needed.
+\nomenclature{awk}{A classic UNIX stream processing language.}
+
+\subsection{Basic constructs}
+
+Most systemtap scripts include conditionals, to limit tracing or other
+logic to those processes or users or {\em whatever} of interest.  The
+syntax is simple:
+
+\begin{tabular}{rl}
+\verb+if (+{\em EXPR}\verb+)+ {\em STATEMENT} [\verb+else+ {\em STATEMENT}\verb+]+ & if/else statement \\
+\verb+while (+{\em EXPR}\verb+)+ {\em STATEMENT} & while loop \\
+\verb+for (+{\em A}\verb+;+ {\em B}\verb+;+ {\em C}\verb+)+ {\em STATEMENT} & for loop \\
+\end{tabular}
+
+Scripts may use \verb+break+/\verb+continue+ as in C.
+Probe handlers can return early using \verb+next+ as in \verb+awk+.
+Blocks of statements are enclosed in \verb+{+ and \verb+}+.  In
+systemtap, the semicolon (\verb+;+) is accepted as a null statement
+rather than as a statement terminator, so is only rarely\footnote{Use
+them between consecutive expressions that place unary {\tt +},{\tt -}
+or mixed pre/post {\tt ++},{\tt --} in an ambiguous manner.}
+necessary.  Shell-style (\verb+#+), C-style (\verb+/* */+), and
+C++-style (\verb+//+) comments are all accepted.
+
+Expressions look like C or \verb+awk+, and support the usual
+operators, precedences, and numeric literals.  Strings are treated as
+atomic values rather than arrays of characters.  String concatenation
+is done with the dot (\verb+"a" . "b"+).  Some examples:
+
+\begin{tabular}{rl}
+\verb+(uid() > 100)+ & probably an ordinary user \\
+\verb+(execname() == "sed")+ & current process is sed \\
+\verb+(cpu() == 0 && gettimeofday_s() > 1140498000)+ & after Feb. 21, 2006, on CPU 0 \\
+\verb+"hello" . " " . "world"+ & a string in three easy pieces \\
+\end{tabular}
+
+Variables may be used as well.  Just pick a name, assign to it, and
+use it in expressions.  They are automatically initialized and
+declared.  The type of each identifier -- string vs. number -- is
+automatically inferred by systemtap from the kinds of operators and
+literals used on it.  Any inconsistencies will be reported as errors.
+Conversion between string and number types is done through explicit
+function calls.
+
+\nomenclature{type}{A designation of each identifier such as a
+variable, or function, or array value or index, as containing a string
+or number.}  \nomenclature{string}{A \verb+\0+-terminated character
+string of up to a fixed limit in length.}  \nomenclature{number}{A
+64-bit signed integer.}  \nomenclature{type inference}{The automatic
+determination of the type of each variable, function parameter, array
+value and index, based on their use.}
+
+\begin{tabular}{rl}
+\verb+foo = gettimeofday_s()+ & foo is a number \\
+\verb+bar = "/usr/bin/" . execname()+ & bar is a string \\
+\verb|c++| & c is a number \\
+\verb+s = sprint(2345)+ & s becomes the string "2345" \\
+\end{tabular}
+
+By default, variables are local to the probe they are used in.  That
+is, they are initialized, used, and disposed of at each probe handler
+invocation.  To share variables between probes, declare them global
+anywhere in the script.  Because of possible concurrency (multiple
+probe handlers running on different CPUs), each global variable used
+by a probe is automatically read- or write-locked while the handler is
+running.  \nomenclature{global variable}{A scalar, array, or aggregate that was
+named in a \verb+global+ declaration, sharing that object amongst all
+probe handlers and functions executed during a systemtap session.}
+\nomenclature{locking}{An automated facility used by systemtap to
+protect global variables against concurrent modification and/or
+access.}
+
+\begin{figure}[h!]
+\begin{boxedminipage}{4.5in}
+\begin{verbatim}
+# cat timer-jiffies.stp
+global count_jiffies, count_ms
+probe timer.jiffies(100) { count_jiffies ++ }
+probe timer.ms(100) { count_ms ++ }
+probe timer.ms(12345) 
+{
+  hz=(1000*count_jiffies) / count_ms
+  printf ("jiffies:ms ratio %d:%d => CONFIG_HZ=%d\n",
+    count_jiffies, count_ms, hz)
+  exit ()
+}
+
+# stap timer-jiffies.stp
+jiffies:ms ratio 30:123 => CONFIG_HZ=243
+\end{verbatim}
+\end{boxedminipage}
+\caption{Experimentally measuring {\tt CONFIG\_HZ}.}
+\label{fig:timer-jiffies}
+\end{figure}
+
+\subsection{Target variables}
+
+A class of special ``target variables'' allow access to the probe
+point context.  \nomenclature{target variable}{A value that may be
+extracted from the kernel context of the probe point, such as a
+parameter or local variable within a probed function.}  In a symbolic
+debugger, when you're stopped at a breakpoint, you can print values
+from the program's context.  In systemtap scripts, for those probe
+points that match with specific executable point (rather than an
+asynchronous event like a timer), you can do the same.  To know which
+variables are likely to be available, you will need to be familiar
+with the kernel source you are probing.  In addition, you will need to
+check that the compiler has not optimized those values into
+unreachable nonexistence.
+
+Let's say that you are trying to trace filesystem reads/writes to a
+particular device/inode.  From your knowledge of the kernel, you know
+that two functions of interest could be \verb+vfs_read+ and
+\verb+vfs_write+.  Each takes a \verb+struct file *+ argument, inside
+which there is a \verb+struct dentry *+, a \verb+struct inode *+, and
+so on.  Systemtap allows limited dereferencing of such pointer chains.
+Two functions, \verb+user_string+ and \verb+kernel_string+, can copy
+\verb+char *+ target variables into systemtap strings.
+Figure~\ref{fig:inode-watch} demonstrates one way to monitor a
+particular file (identifed by device number and inode number).  This
+example also demonstrates pasting numeric command-line arguments
+(\verb+$1+ etc.) into scripts.
+%$
+
+\begin{figure}[h!]
+\begin{boxedminipage}{4.5in}
+\begin{verbatim}
+# cat inode-watch.stp
+probe kernel.function ("vfs_write"),
+      kernel.function ("vfs_read")
+{
+  dev_nr = $file->f_dentry->d_inode->i_sb->s_dev
+  inode_nr = $file->f_dentry->d_inode->i_ino
+
+  if (dev_nr == ($1 << 20 | $2) # major/minor device
+      && inode_nr == $3)
+    printf ("%s(%d) %s 0x%x/%u\n",
+      execname(), pid(), probefunc(), dev_nr, inode_nr)
+}
+# stat -c '%D %i' /etc/crontab
+803 988136
+# stap inode-watch.stp 8 3 988136
+crond(2419) vfs_read 0x800003/988136
+crond(2419) vfs_read 0x800003/988136
+crond(2419) vfs_read 0x800003/988136
+\end{verbatim}
+% $
+\end{boxedminipage}
+\caption{Watching for reads/writes to a particular file.}
+\label{fig:inode-watch}
+\end{figure}
+
+\subsection{Functions}
+
+Functions are conveniently packaged reusable software: it would be a
+shame to have to duplicate a complex condition expression or logging
+directive in every placed it's used.  So, systemtap lets you define
+functions of your own.  Like global variables, systemtap functions may
+be defined anywhere in the script.  They may take any number of string
+or numeric arguments (by value), and may return a single string or
+number.  The parameter types are inferred as for ordinary variables,
+and must be consistent throughout the program.  Local and global
+script variables are available, but target variables are {\em not}.
+That's because there is no specific debugging-level context associated
+with a function.
+\nomenclature{function}{A clump of parametrized script statements that
+may be repeatedly and recursively called from probe handlers and other
+functions.}
+
+A function is defined with the keyword \verb+function+ followed by a
+name.  Then comes a comma-separated formal argument list (just a list
+of variable names).  The \verb+{ }+-enclosed body consists of any list
+of statements, including expressions that call functions.  Recursion
+is possible, up to a nesting depth limit.  Figure~\ref{fig:functions}
+displays function syntax.
+
+
+\begin{figure}[h!]
+\begin{boxedminipage}{4.5in}
+\begin{verbatim}
+# Red Hat convention
+function system_uid_p (u) { return u < 500 }
+
+# kernel device number assembly macro
+function makedev (major,minor) { return major << 20 | minor }
+
+function trace_common ()
+{
+  printf("%d %s(%d)", gettimeofday_s(), execname(), pid())
+  # no return value necessary
+} 
+
+function fibonacci (i)
+{
+  if (i < 1) return 0
+  else if (i < 2) return 1
+  else return fibonacci(i-1) + fibonacci(i-2)
+}
+\end{verbatim}
+\end{boxedminipage}
+\caption{Some functions of dubious utility.}
+\label{fig:functions}
+\end{figure}
+
+\subsection{Arrays}
+
+Often, probes will want to share data that cannot be represented as a
+simple scalar value.  Much data is naturally tabular in nature,
+indexed by some tuple of thread numbers, processor ids, names, time,
+and so on.  Systemtap offers associative arrays for this purpose.
+These arrays are implemented as hash tables with a maximum size that
+is fixed at startup.  Because they are too large to be created
+dynamically for inidividual probes handler runs, they must be declared
+as global.  \nomenclature{array}{A global
+\verb+[+$k_1,k_2,\ldots,k_n\verb+]+\rightarrow value$
+associative lookup table, with a string,
+number for each index; the value may be a string, number, or an aggregate.}
+
+\begin{tabular}{rl}
+\verb|global a| & declare global scalar or array variable \\
+\verb|global b[400]| & declare array, reserving space for up to 400 tuples \\
+\end{tabular}
+
+The basic operations for arrays are setting and looking up elements.
+These are expressed in \verb+awk+ syntax: the array name followed by
+an opening \verb+[+ bracket, a comma-separated list of index
+expressions, and a closing \verb+]+ bracket.  Each index expression
+may be string or numeric, as long as it is consistently typed
+throughout the script.
+\nomenclature{arity}{Number of indexes to an array, or number of parameters
+to a function.}
+
+\begin{tabular}{rl}
+\verb|foo [4,"hello"] ++ | & increment the named array slot \\
+\verb|processusage [uid(),execname()] ++| & update a statistic \\
+\verb|times [tid()] = get_cycles()| & set a timestamp reference point \\
+\verb|delta = get_cycles() - times [tid()]| & compute a timestamp delta \\
+\end{tabular}
+
+Array elements that have not been set {\em may} be fetched, and return
+a dummy null value (zero or an empty string) as appropriate.  However,
+assigning a null value does not delete the element: an explicit
+\verb|delete| statement is required.  \nomenclature{null value}{A
+default initialized value for globals and array elements: a zero or an
+empty string, depending on type.}  Systemtap provides syntactic sugar
+for these operations, in the form of explicit membership testing and
+deletion.
+
+\begin{tabular}{rl}
+\verb|if ([4,"hello"] in foo) { }| & membership test \\
+\verb|delete times[tid()]| & deletion of a single element \\
+\verb|delete times| & deletion of all elements \\
+\end{tabular}
+
+One final and important operation is iteration over arrays.  This uses
+the keyword \verb+foreach+.  Like \verb+awk+, this creates a loop that
+{\em iterates over key tuples} of an array, not just {\em values}.  In
+addition, the iteration may be {\em sorted} by any single key or the
+value by adding an extra \verb|+| or \verb|-| code.
+
+The \verb+break+ and \verb+continue+ statements work inside
+\verb+foreach+ loops, too.  Since arrays can be large but probe
+handlers must not run for long, it is a good idea to exit iteration
+early if possible.  The \verb+limit+ option in the \verb+foreach+
+expression is one way.  For simplicity, systemtap forbids any {\em
+modification} of an array while it is being iterated using a
+\verb+foreach+.
+
+\begin{tabular}{rl}
+\verb|foreach ([a,b] in foo) { fuss_with(foo[a,b]) }| & simple loop in arbitrary sequence \\
+\verb|foreach ([a,b] in foo+ limit 5) { }| & loop in increasing sequence of value, stop after 5 \\
+\verb|foreach ([a-,b] in foo) { }| & loop in decreasing sequence of first key \\
+\end{tabular}
+
+\subsection{Aggregates}
+
+When we said above that values can only be strings or numbers, we lied
+a little.  There is a third type: statistics aggregates, or aggregates
+for short.  Instances of this type are used to collect statistics on
+numerical values, where it is important to accumulate new data quickly
+({\em without} exclusive locks) and in large volume (storing only
+aggregated stream statistics).  This type only makes sense for global
+variables, and may be stored individually or as elements of an array.
+\nomenclature{aggregate}{A special ``write-mostly'' data type used to
+efficiently store aggregated statistical values of a potentially huge
+data stream.}
+
+To add a value to a statistics aggregate, systemtap uses the special
+operator \verb+<<<+.  Think of it like C++'s \verb+<<+ output
+streamer: the left hand side object accumulates the data sample given
+on the right hand side.  This operation is efficient (taking a shared
+lock) because the aggregate values are kept separately on each
+processor, and are only aggregated across processors on request.
+
+\begin{verbatim}
+a <<< delta_timestamp
+writes[execname()] <<< count
+\end{verbatim}
+
+To read the aggregate value, special functions are available to
+extract a selected statistical function. {\em The aggregate value
+cannot be read by simply naming it as if it were an ordinary
+variable.}  These operations take an exclusive lock on the respective
+globals, and should therefore be relatively rare.  The simple ones
+are: \verb+@min+, \verb+@max+, \verb+@count+, \verb+@avg+, and
+\verb+@sum+, and evaluate to a single number.  In addition, histograms
+of the data stream may be extracted using the \verb+@hist_log+ and
+\verb+@hist_linear+.  These evaluate to a special sort of array that
+may at present\footnote{We anticipate support for indexing and looping
+using {\tt foreach} shortly.} only be printed.
+\nomenclature{extractor}{A function-like expression in a script that
+computes a single statistic for a given aggregate.}
+
+\begin{tabular}{rl}
+\verb+@avg(a)+ & the average of all the values accumulated
+                        into \verb+a+ \\
+\verb+print(@hist_linear(a,0,100,10))+ & print an ``ascii art'' linear
+                  histogram of the same data stream, \\
+    & bounds $0 \ldots 100$, bucket width is $10$ \\
+\verb|@count(writes["zsh"])| & the number of times ``zsh''
+                ran the probe handler \\
+\verb+print(@hist_log(writes["zsh"]))+ & print an ``ascii art'' logarithmic
+                  histogram of the same data stream \\
+\end{tabular}
+
+\subsection{Safety}
+\label{sec:safety}
+
+The full expressivity of the scripting language raises good questions
+of safety.  Here is a set of Q\&A:
+
+\begin{description}
+\item{\bf What about infinite loops?  recursion?} A probe handler is
+bounded in time.  The C code generated by systemtap includes explicit
+checks that limit the total number of statements executed to a small
+number.  A similar limit is imposed on the nesting depth of function
+calls.  When either limit is exceeded, that probe handler cleanly
+aborts and signals an error.  The systemtap session is normally
+configured to abort as a whole at that time.
+
+\item{\bf What about running out of memory?}  No dynamic memory
+allocation whatsoever takes place during the execution of probe
+handlers.  Arrays, function contexts, and buffers are allocated during
+initialization.  These resources may run out during a session, and
+generally result in errors.
+
+\item{\bf What about locking?}  If multiple probes seek conflicting
+locks on the same global variables, one or more of them will time out,
+and be aborted.  Such events are tallied as ``skipped'' probes, and a
+count is displayed at session end.  A configurable number of skipped
+probes can trigger an abort of the session.
+
+\item{\bf What about null pointers? division by zero?}  The C code
+generated by systemtap translates potentially dangerous operations to
+routines that check their arguments at run time.  These signal errors
+if they are invalid.  Many arithmetic and string operations silently
+overflow if the results exceed representation limits.
+
+\item{\bf What about bugs in the translator?  compiler?}  While bugs
+in the translator, or the runtime layer certainly exist\footnote{See
+\tt http://sources.redhat.com/bugzilla}, our test suite gives some
+assurance.  Plus, the entire generated C code may be inspected (try
+the \verb+-p3+ option).  Compiler bugs are unlikely to be of any
+greater concern for systemtap than for the kernel as a whole.  In
+other words, if it was reliable enough to build the kernel, it will
+build the systemtap modules properly too.
+
+\item{\bf Is that the whole truth?}  In practice, there are several
+weak points in systemtap and the underlying kprobes system at the time
+of writing.  Putting probes indiscriminately into unusually sensitive
+parts of the kernel (low level context switching, interrupt
+dispatching) has reportedly caused crashes in the past.  We are
+fixing these bugs as they are found, and
+constructing a probe point ``blacklist'', but it is not complete.
+\nomenclature{blacklist}{A list of probe point patterns encoded into
+the translator or the kernel, where probing is prohibited for safety
+reasons.}  \nomenclature{kprobes}{A breakpoint dispatching system for
+dynamic kernel probes, used by systemtap to implement some families of
+probe points.}
+
+\end{description}
+
+
+\subsection{Exercises}
+\begin{enumerate}
+\item Alter the last probe in \verb+timer-jiffies.stp+ to reset the
+counters and continue reporting instead of exiting.
+
+\item Write a script that, every ten seconds, displays the top five
+most frequent users of \verb+open+ system call during that interval.
+
+\item Write a script that experimentally measures the speed of the
+\verb+get_cycles()+ counter on each processor.
+
+\item Use any suitable probe point to get an approximate profile of
+process CPU usage: which processes/users use how much of each CPU.
+\end{enumerate}
+
+\section{Tapsets}
+
+After writing enough analysis scripts for yourself, your may become
+known as an expert to your colleagues, who will want to use your
+scripts.  Systemtap makes it possible to share in a controlled manner;
+to build libraries of scripts that build on each other.  In fact, all
+of the functions (\verb+pid()+, etc.) used in the scripts above come
+from tapset scripts like that.  A ``tapset'' is just a script that
+designed for reuse by installation into a special directory.
+
+\subsection{Automatic selection}
+
+Systemtap attempts to resolve references to global symbols (probes,
+functions, variables) that are not defined within the script by a
+systematic search through the tapset library for scripts that define
+those symbols.  Tapset scripts are installed under the default
+directory named \verb+/usr/share/systemtap/tapset+.  A user may give
+additional directories with the \verb+-I DIR+ option.  Systemtap
+searches these directories for script (\verb+.stp+) files.
+
+The search process includes subdirectories that are specialized for a
+particular kernel version and/or architecture, and ones that name only
+larger kernel families.  Naturally, the search is ordered from
+specific to general, as shown in Figure~\ref{fig:tapset-search}.
+\nomenclature{tapset search path}{A list of subdirectories searched by
+systemtap for tapset scripts, allowing specialization by version
+architecture.}
+
+\begin{figure}[h!]
+\begin{boxedminipage}{6in}
+\begin{verbatim}
+# stap -p1 -vv -e 'probe begin { }' > /dev/null
+Created temporary directory "/tmp/staplnEBh7"
+Searched '/usr/share/systemtap/tapset/2.6.15/i686/*.stp', match count 0
+Searched '/usr/share/systemtap/tapset/2.6.15/*.stp', match count 0
+Searched '/usr/share/systemtap/tapset/2.6/i686/*.stp', match count 0
+Searched '/usr/share/systemtap/tapset/2.6/*.stp', match count 0
+Searched '/usr/share/systemtap/tapset/i686/*.stp', match count 1
+Searched '/usr/share/systemtap/tapset/*.stp', match count 12
+Pass 1: parsed user script and 13 library script(s) in 350usr/10sys/375real ms.
+Running rm -rf /tmp/staplnEBh7
+\end{verbatim}
+\end{boxedminipage}
+\caption{Listing the tapset search path.}
+\label{fig:tapset-search}
+\end{figure}
+
+When a script file is found that {\em defines} one of the undefined
+symbols, that {\em entire file} is added to the probing session being
+analyzed.  This search is repeated until no more references can become
+satisfied.  Systemtap signals an error if any are still unresolved.
+
+This mechanism enables several programming idioms.  First, it allows
+some global symbols to be defined only for applicable kernel
+version/architecture pairs, and cause an error if their use is
+attempted on an inapplicable host.  Similarly, the same symbol can be
+defined differently depending on kernels, in much the same way that
+different kernel \verb+include/asm/ARCH/+ files contain macros that
+provide a porting layer.
+
+Another use is to separate the default parameters of a tapset routine
+from its implementation.  For example, consider a tapset that defines
+code for relating elapsed time intervals to process scheduling
+activities.  The data collection code can be generic with respect to
+which time unit (jiffies, wall-clock seconds, cycle counts) it can
+use.  It should have a default, but should not require additional
+run-time checks to let a user choose another.
+Figure~\ref{fig:tapset-default} shows a way.
+
+\begin{figure}[h!]
+\begin{boxedminipage}{6in}
+\begin{verbatim}
+# cat tapset/time-common.stp
+global __time_vars
+function timer_begin (name) { __time_vars[name] = __time_value () }
+function timer_end (name) { return __time_value() - __time_vars[name] }
+
+# cat tapset/time-default.stp
+function __time_value () { return gettimeofday_us () }
+
+# cat tapset-time-user.stp
+probe begin
+{
+  timer_begin ("bench")
+  for (i=0; i<100; i++) ; 
+  printf ("%d cycles\n", timer_end ("bench"))
+  exit ()
+}
+function __time_value () { return get_ticks () } # override for greater precision
+
+\end{verbatim}
+\end{boxedminipage}
+\caption{Providing an overrideable default.}
+\label{fig:tapset-default}
+\end{figure}
+
+A tapset that exports only {\em data} may be as useful as ones that
+exports functions or probe point aliases (see below).  Such global
+data can be computed and kept up-to-date using probes internal to the
+tapset.  Any outside reference to the global variable would
+incidentally activate all the required probes.
+
+\subsection{Probe point aliases}
+
+\nomenclature{probe point alias}{A probe point that is defined in
+terms of another probe point.}  Probe point aliases allow creation of
+new probe points from existing ones.  This is useful if the new probe
+points are named to provide a higher level of abstraction.  For
+example, the system-calls tapset defines probe point aliases of the
+form \verb+syscall.open+ etc., in terms of lower level ones like
+\verb+kernel.function("sys_open")+.  Even if some future kernel
+renames \verb+sys_open+, the aliased name can remain valid.
+
+A probe point alias definition looks like a normal probe.  Both start
+with the keyword \verb+probe+ and have a probe handler statement block
+at the end.  But where a normal probe just lists its probe points, an
+alias creates a new name using the assignment (\verb+=+) operator.
+Another probe that names the new probe point will create an actual
+probe, with the handler of the alias {\em prepended}.
+
+This prepending behavior serves several purposes.  It allows the alias
+definition to ``preprocess'' the context of the probe before passing
+control to the user-specified handler.  This has several possible uses:
+\begin{tabular}{rl}
+\verb+if ($flag1 != $flag2) next+ & skip probe unless given condition is met \\
+\verb+name = "foo"+ & supply probe-describing values \\
+\verb+var = $var+ & extract target variable to plain local variable \\ %$
+\end{tabular}
+
+Figure~\ref{fig:probe-alias} demonstrates a probe point alias
+definition as well as its use.  It demonstrates how a single probe
+point alias can expand to multiple probe points, even to other
+aliases.  It also includes probe point wildcarding.  These functions
+are designed to compose sensibly.
+
+\begin{figure}[h!]
+\begin{boxedminipage}{4.5in}
+\begin{verbatim}
+# cat probe-alias.stp
+probe syscallgroup.io = syscall.open, syscall.close, 
+                        syscall.read, syscall.write
+{ groupname = "io" }
+
+probe syscallgroup.process = syscall.fork, syscall.execve
+{ groupname = "process" }
+
+probe syscallgroup.* 
+{ groups [execname() . "/" . groupname] ++ }
+
+probe end
+{
+  foreach (eg+ in groups)
+    printf ("%s: %d\n", eg, groups[eg])
+}
+
+global groups
+
+# stap probe-alias.stp
+05-wait_for_sys/io: 19
+10-udev.hotplug/io: 17
+20-hal.hotplug/io: 12
+X/io: 73
+apcsmart/io: 59
+[...]
+make/io: 515
+make/process: 16
+[...]
+xfce-mcs-manage/io: 3
+xfdesktop/io: 5
+[...]
+xmms/io: 7070
+zsh/io: 78
+zsh/process: 5
+\end{verbatim}
+\end{boxedminipage}
+\caption{Classified system call activity.}
+\label{fig:probe-alias}
+\end{figure}
+
+\subsection{Embedded C}
+\label{embedded-c}
+
+Sometimes, a tapset needs provide data values from the kernel that
+cannot be extracted using ordinary target variables (\verb+$var+).  %$
+This may be because the values are in complicated data structures, may
+require lock awareness, or are defined by layers of macros.  Systemtap
+provides an ``escape hatch'' to go beyond what the language can safely
+offer.  In certain contexts, you may embed plain raw C in tapsets,
+exchanging power for the safety guarantees listed in
+section~\ref{sec:safety}.  End-user scripts {\em may not} include
+embedded C code, unless systemtap is run with the \verb+-g+ (``guru''
+mode) option.  Tapset scripts get guru mode privileges automatically.
+\nomenclature{embedded C}{Special syntax permitting tapsets to include
+literal C code.}
+
+Embedded C can be the body of a script function.  Instead enclosing
+the function body statements in \verb+{+ and \verb+}+, use \verb+%{+
+and \verb+%}+.  Any enclosed C code is literally transcribed into the
+kernel module: it is up to you to make it safe and correct.  In order
+to take parameters and return a value, a pointer macro \verb+THIS+ is
+available.  Function parameters and a place for the return value are
+available as fields of that pointer.  The familiar data-gathering
+functions \verb+pid()+, \verb+execname()+, and their neighbours are
+all embedded C functions.  Figure~\ref{fig:embedded-C} contains
+another example.
+
+Since systemtap cannot examine the C code to infer these types, an
+optional\footnote{This is only necessary if the types cannot be
+inferred from other sources, such as the call sites.} annotation
+syntax is available to assist the type inference process.  Simply
+suffix parameter names and/or the function name with \verb+:string+ or
+\verb+:long+ to designate the string or numeric type.  In addition,
+the script may include a \verb+%{+ \verb+%}+ block at the outermost
+level of the script, in order to transcribe declarative code like
+\verb+#include <linux/foo.h>+.  These enable the embedded C functions
+to refer to general kernel types.
+
+There are a number of safety-related constraints that should be
+observed by developers of embedded C code.
+\begin{enumerate}
+\item Do not dereference pointers that are not known or testable valid.
+\item Do not call any kernel routine that may cause a sleep or fault.
+\item Consider possible undesirable recursion, where your embedded C
+function calls a routine that may be the subject of a probe.  If that
+probe handler calls your embedded C function, you may suffer infinite
+regress.  Similar problems may arise with respect to non-reentrant
+locks.
+\item If locking of a data structure is necessary, use a
+\verb+trylock+ type call to attempt to take the lock.  If that fails,
+give up, do not block.
+\end{enumerate}
+
+\begin{figure}[h!]
+\begin{boxedminipage}{4.5in}
+\begin{verbatim}
+# cat embedded-C.stp
+%{
+#include <linux/utsname.h>
+%}
+
+function utsname:string (field:long)
+%{
+  if (down_read_trylock (& uts_sem))
+    {
+      const char *f =
+         (THIS->field == 0 ? system_utsname.sysname :
+          THIS->field == 1 ? system_utsname.nodename :
+          THIS->field == 2 ? system_utsname.release :
+          THIS->field == 3 ? system_utsname.version :
+          THIS->field == 4 ? system_utsname.machine :
+          THIS->field == 5 ? system_utsname.domainname : "");
+      strlcpy (THIS->__retvalue, f, MAXSTRINGLEN);
+      up_read (& uts_sem);
+    }
+%}
+
+probe begin
+{
+  printf ("%s %s\n", utsname(0), utsname(2))
+  exit ()
+}
+
+# stap -g embedded-C.stp
+Linux 2.6.15
+\end{verbatim}
+\end{boxedminipage}
+\caption{Embedded C function.}
+\label{fig:embedded-C}
+\end{figure}
+
+\subsection{Naming conventions}
+
+Using the tapset search mechanism just described, potentially many
+script files can become selected for inclusion in a single session.
+This raises the problem of name collisions, where different tapsets
+accidentally use the same names for functions/globals.  This can
+result in errors at translate or run time.
+
+To control this problem, systemtap tapset developers are advised to
+follow naming conventions.  Here is some of the guidance.
+\nomenclature{naming convention}{Guidelines for naming variables and
+functions to prevent unintentional duplication.}
+\begin{enumerate}
+\item Pick a unique name for your tapset, and substitute it for
+{\em TAPSET} below.
+\item Separate identifiers meant to be used by tapset users from
+those that are internal implementation artifacts.
+\item Document the first set in the appropriate \verb+man+ pages.
+\item Prefix the names of external identifiers with {\em TAPSET}\_ if
+there is any likelihood of collision with other tapsets or end-user
+scripts.
+\item Prefix any probe point aliases with an appropriate prefix.
+\item Prefix the names of internal identifiers with \_\_{\em TAPSET}\_.
+\end{enumerate}
+
+\subsection{Exercises}
+
+\begin{enumerate}
+\item Write a tapset that implements deferred and ``cancelable''
+logging.  Export a function that enqueues a text string (into some
+private array), returning an id token.  Include a timer-based probe
+that periodically flushes the array to the standard log output.
+Export another function that, if the entry was not already flushed,
+allows a text string to be cancelled from the queue.
+
+\item Create a ``relative timestamp'' tapset with functions return all
+the same values as the ones in the timestamp tapset, except that they
+are made relative to the start time of the script.
+
+\item Create a tapset that exports a global array that contains a
+mapping of recently seen process ID numbers to process names.
+Intercept key system calls (\verb+execve+?) to update the list
+incrementally.
+
+\item Send your tapset ideas to the mailing list!
+\end{enumerate}
+
+\section{Further information}
+
+For further information about systemtap, several sources are available.
+
+There are \verb+man+ pages:
+
+\begin{tabular}{rl}
+\verb+stap+ & systemtap program usage, language summary \\
+\verb+stapfuncs+ & functions provided by tapsets \\
+\verb+stapprobes+ & probes / probe aliases provided by tapsets \\
+\verb+stapex+ & some example scripts \\
+\end{tabular}
+
+Then, there is the source code itself.  Since systemtap is {\em free
+software}, you should have available the entire source code.  The
+source files in the \verb+tapset/+ directory are also packaged along
+with the systemtap binary.  Since systemtap reads these files rather
+than their documentation, they are the most reliable way to see what's
+inside all the tapsets.  Use the \verb+-v+ (verbose) command line
+option, several times if you like, to show inner workings.
+\nomenclature{free software}{Software licensed under terms such as the
+GNU GPL, which aims to enforce certain specified user freedoms such
+as study, modification, and sharing.}
+
+Finally, there is the project web site
+(\verb+http://sources.redhat.com/systemtap/+) with several articles,
+an archived public mailing list for users and developers
+(\verb+systemtap@sources.redhat.com+), and a live CVS source
+repository.  Come join us!
+
+
+\appendix
+
+\section{Glossary}
+\renewcommand{\nomname}{}
+\printglossary
+\begin{htmlonly}
+{\em Sorry, not available in HTML.}
+\end{htmlonly}
+
+\section{Errors}
+
+We explain some common systemtap error messages in this section.  Most
+error messages include line/character numbers with which one can
+locate the precise location of error in the script code.  There is
+sometimes a subsequent or prior line that elaborates.
+
+{\large {\em error} {\tt at:} {\em filename}:{\em line}:{\em column}: {\em details}}
+
+\subsection{Parse errors}
+
+\begin{description}
+\item{\bf parse error: expected {\em foo}, saw {\em bar} $\ldots$} \\
+The script contained a grammar error.  A different type of construct
+was expected in the given context.
+
+\item{\bf parse error: embedded code in unprivileged script} \\ The
+script contained unsafe constructs such as embedded C (section
+\ref{embedded-c}), but was run without the \verb+-g+ (guru mode)
+option.  Confirm that the constructs are used safely, then try
+again with \verb+-g+.
+\end{description}
+
+\subsection{Type errors}
+
+\begin{description}
+\item{\bf semantic error: type mismatch for identifier '{\em foo}'
+$\ldots$ string vs. long} \\ In this case, the identifier {\em foo}
+was previously inferred as a numeric type (``long''), but at the given
+point is being used as a string.  Similar messages appear if an array
+index or function parameter slot is used with conflicting types.
+
+\item{\bf semantic error: unresolved type for identifier '{\em foo}'}
+\\ The identifier {\em foo} was used, for example in a \verb+print+,
+but without any operations that could assign it a type.  Similar
+messages may appear if a symbol is misspelled by a typo.
+
+\item{\bf semantic error: Expecting symbol or array index expression}
+\\ Something other than an assignable lvalue was on the left hand sign
+of an assignment.
+\end{description}
+
+\subsection{Symbol errors}
+
+\begin{description}
+\item{\bf while searching for arity {\em N} function, semantic error:
+unresolved function call} \\ The script calls a function with {\em N}
+arguments that does not exist.  The function may exist with different
+arity.
+
+\item{\bf semantic error: array locals not supported: $\ldots$} \\ An
+array operation is present for which no matching global declaration
+was found.  Similar messages appear if an array is used with
+inconsistent arities.
+
+\item{\bf semantic error: variable '{\em foo}' modified during 'foreach'} \\
+The array {\em foo} is being modified (being assigned to or deleted from)
+within an active \verb+foreach+ loop.  This invalid operation is also
+detected within a function called from within the loop. 
+\end{description}
+
+\subsection{Probing errors }
+
+\begin{description}
+\item{\bf semantic error: probe point mismatch at position {\em N},
+while resolving probe point {\em foo}} \\ A probe point was named that
+neither directly understood by systemtap, nor defined as an alias by a
+tapset script.  The divergence from the ``tree'' of probe point
+namespace is at position {\em N} (starting with zero at left).
+
+\item{\bf semantic error: no match for probe point, while resolving
+probe point {\em foo}} \\ A probe point cannot be resolved for any of
+a variety of reasons.  It may be a debuginfo-based probe point such as
+\verb+kernel.function("foobar")+ where no \verb+foobar+ function was
+found.  This can occur if the script specifies a wildcard on function
+names, or an invalid file name or source line number.
+
+\item{\bf semantic error: unresolved target-symbol expression} \\ A
+target variable was referred to in a probe handler that was not
+resolvable.  Or, a target variable is not valid at all in a context
+such as a script function.  This variable may have been elided by an
+optimizing compiler, or may not have a suitable type, or there might
+just be an annoying bug somewhere.  Try again with a slightly
+different probe point (use \verb+statement()+ instead of
+\verb+function()+) to search for a more cooperative neighbour in the
+same area.
+
+\item{\bf semantic error: libdwfl failure $\ldots$} \\ There was a
+problem processing the debugging information.  It may simply be
+missing, or may have some consistency / correctness problems.  Later
+compilers tend to produce better debugging information, so if you can
+upgrade and recompile your kernel/application, it may help.
+
+\item{\bf semantic error: cannot find {\em foo} debuginfo} \\ Similarly,
+suitable debugging information was not found.  Check that your kernel
+build/installation includes a matching version of debugging data.
+\end{description}
+
+\subsection{Runtime errors}
+
+\begin{description}
+
+\item{\bf WARNING: Number of errors: {\em N}, skipped probes: {\em M}} \\
+Errors and/or skipped probes occurred during this run.
+\nomenclature{skipped probe}{A probe handler that should have run but
+couldn't, due to contention or temporary resource problems.}
+
+\item{\bf division by 0} \\ The script code performed an invalid
+division.
+
+\item{\bf aggregate element not found} \\ An statistics extractor
+function other than \verb+@count+ was invoked on an aggregate that has
+not had any values accumulated yet.  This is similar to a division by
+zero.
+
+\item{\bf aggregation overflow} \\ An array containing aggregate
+values contains too many distinct key tuples at this time.
+
+\item{\bf MAXNESTING exceeded} \\ Too many levels of function call nesting
+were attempted.
+
+\item{\bf MAXACTION exceeded} \\ The probe handler attempted to execute
+too many statements.
+
+\item{\bf kernel/user string copy fault at {\em 0xaddr}} \\
+The probe handler attempted to copy a string from kernel or user space
+at an invalid address. 
+
+\item{\bf pointer dereference fault} \\ 
+There was a fault encountered during a pointer dereference operation such
+as a target variable evaluation.
+
+\end{description}
+
+
+\section{Acknowledgments}
+
+The author thanks Martin Hunt, Will Cohen, and Jim Keniston for
+improvement advice for this paper.
+
+\end{document}
diff --git a/doc/tutorial/embedded-C.stp b/doc/tutorial/embedded-C.stp
new file mode 100644
index 00000000..6834d728
--- /dev/null
+++ b/doc/tutorial/embedded-C.stp
@@ -0,0 +1,25 @@
+%{
+#include <linux/utsname.h>
+%}
+
+function utsname:string (field:long)
+%{
+  if (down_read_trylock (& uts_sem))
+    {
+      const char *f =
+         (THIS->field == 0 ? system_utsname.sysname :
+          THIS->field == 1 ? system_utsname.nodename :
+          THIS->field == 2 ? system_utsname.release :
+          THIS->field == 3 ? system_utsname.version :
+          THIS->field == 4 ? system_utsname.machine :
+          THIS->field == 5 ? system_utsname.domainname : "");
+      strlcpy (THIS->__retvalue, f, MAXSTRINGLEN);
+      up_read (& uts_sem);
+    }
+%}
+
+probe begin
+{
+  printf ("%s %s\n", utsname(0), utsname(2))
+  exit ()
+}
diff --git a/doc/tutorial/functions.stp b/doc/tutorial/functions.stp
new file mode 100644
index 00000000..6a825722
--- /dev/null
+++ b/doc/tutorial/functions.stp
@@ -0,0 +1,18 @@
+# Red Hat convention
+function system_uid_p (u) { return u < 500 }
+
+# kernel device number assembly macro
+function makedev (major,minor) { return major << 20 | minor }
+
+function trace_common ()
+{
+  printf("%d %s(%d)", gettimeofday_s(), execname(), pid())
+  # no return value
+} 
+
+function fibonacci (i)
+{
+  if (i < 1) return 0
+  else if (i < 2) return 1
+  else return fibonacci(i-1) + fibonacci(i-2)
+}
diff --git a/doc/tutorial/hello-world.stp b/doc/tutorial/hello-world.stp
new file mode 100644
index 00000000..6a9037a7
--- /dev/null
+++ b/doc/tutorial/hello-world.stp
@@ -0,0 +1,5 @@
+probe begin 
+{
+  print ("hello world\n")
+  exit ()
+}
diff --git a/doc/tutorial/inode-watch.stp b/doc/tutorial/inode-watch.stp
new file mode 100644
index 00000000..caf04b9a
--- /dev/null
+++ b/doc/tutorial/inode-watch.stp
@@ -0,0 +1,13 @@
+probe kernel.function ("vfs_write"),
+      kernel.function ("vfs_read")
+{
+  dev_nr = $file->f_dentry->d_inode->i_sb->s_dev
+  inode_nr = $file->f_dentry->d_inode->i_ino
+
+  if (dev_nr == ($1 << 20 | $2) # major/minor device
+      && inode_nr == $3)
+    printf ("%s(%d) %s 0x%x/%u\n",
+      execname(), pid(), probefunc(), dev_nr, inode_nr)
+}
+
+# dev_name = kernel_string ($file->f_dentry->d_inode->i_sb->s_id)
diff --git a/doc/tutorial/probe-alias.stp b/doc/tutorial/probe-alias.stp
new file mode 100644
index 00000000..aa5feb1b
--- /dev/null
+++ b/doc/tutorial/probe-alias.stp
@@ -0,0 +1,17 @@
+probe syscallgroup.io = syscall.open, syscall.close, 
+                        syscall.read, syscall.write
+{ groupname = "io" }
+
+probe syscallgroup.process = syscall.fork, syscall.execve
+{ groupname = "process" }
+
+probe syscallgroup.* 
+{ groups [execname() . "/" . groupname] ++ }
+
+probe end
+{
+  foreach (eg+ in groups)
+    printf ("%s: %d\n", eg, groups[eg])
+}
+
+global groups
diff --git a/doc/tutorial/socket-trace.stp b/doc/tutorial/socket-trace.stp
new file mode 100644
index 00000000..53b69ecc
--- /dev/null
+++ b/doc/tutorial/socket-trace.stp
@@ -0,0 +1,6 @@
+probe kernel.function("*@net/socket.c") {
+  printf ("%s -> %s\n", thread_indent(1), probefunc())
+}
+probe kernel.function("*@net/socket.c").return {
+  printf ("%s <- %s\n", thread_indent(-1), probefunc())
+}
diff --git a/doc/tutorial/strace-open.stp b/doc/tutorial/strace-open.stp
new file mode 100644
index 00000000..fb87cec1
--- /dev/null
+++ b/doc/tutorial/strace-open.stp
@@ -0,0 +1,8 @@
+probe syscall.open
+{
+  printf ("%s(%d) open (%s)\n", execname(), pid(), argstr) 
+}
+probe timer.ms(4000) # after 4 seconds
+{
+  exit ()
+}
diff --git a/doc/tutorial/tapset-time-user.stp b/doc/tutorial/tapset-time-user.stp
new file mode 100644
index 00000000..32069b03
--- /dev/null
+++ b/doc/tutorial/tapset-time-user.stp
@@ -0,0 +1,8 @@
+probe begin
+{
+  timer_begin ("bench")
+  for (i=0; i<100; i++) ;
+  printf ("%d cycles\n", timer_end ("bench"))
+  exit ()
+}
+function __time_value () { return get_cycles () } # override
diff --git a/doc/tutorial/tapset/time-common.stp b/doc/tutorial/tapset/time-common.stp
new file mode 100644
index 00000000..cec5a4ea
--- /dev/null
+++ b/doc/tutorial/tapset/time-common.stp
@@ -0,0 +1,4 @@
+global __time_vars
+function timer_begin (name) { __time_vars[name] = __time_value () }
+function timer_end (name) { return __time_value() - __time_vars[name] }
+
diff --git a/doc/tutorial/tapset/time-default.stp b/doc/tutorial/tapset/time-default.stp
new file mode 100644
index 00000000..614ff506
--- /dev/null
+++ b/doc/tutorial/tapset/time-default.stp
@@ -0,0 +1,2 @@
+function __time_value () { return gettimeofday_us () }
+
diff --git a/doc/tutorial/timer-jiffies.stp b/doc/tutorial/timer-jiffies.stp
new file mode 100644
index 00000000..d5e92e4a
--- /dev/null
+++ b/doc/tutorial/timer-jiffies.stp
@@ -0,0 +1,10 @@
+global count_jiffies, count_ms
+probe timer.jiffies(100) { count_jiffies ++ }
+probe timer.ms(100) { count_ms ++ }
+probe timer.ms(12345) 
+{
+  hz=(1000*count_jiffies) / count_ms
+  printf ("jiffies:ms ratio %d:%d => CONFIG_HZ=%d\n",
+    count_jiffies, count_ms, hz)
+  exit ()
+}
diff --git a/runtime/ChangeLog b/runtime/ChangeLog
index 512fa061..497b9d5b 100644
--- a/runtime/ChangeLog
+++ b/runtime/ChangeLog
@@ -1,3 +1,19 @@
+2008-02-27  Martin Hunt  <hunt@redhat.com>
+
+	* sym.h (_stp_module): Add text_size, lock, and unwind data
+	pointer.	
+	* sym.c (_stp_find_module_by_addr): New function.
+	(_stp_kallsyms_lookup): Call _stp_find_module_by_addr().
+	(_stp_get_unwind_info): New.
+	
+	* runtime.h: Move debug macros to debug.h. Include it.
+	* debug.h: New file.
+	* map.c: Update debug calls.
+	* map-gen.c: Update debug calls.
+	* pmap-gen.c: Update debug calls.
+	
+	* mempool.c: New file. 
+	
 2008-02-27  Dave Brolley  <brolley@redhat.com>
 
 	PR5189
diff --git a/runtime/debug.h b/runtime/debug.h
new file mode 100644
index 00000000..8f877ede
--- /dev/null
+++ b/runtime/debug.h
@@ -0,0 +1,66 @@
+/* Systemtap Debug Macros
+ * Copyright (C) 2008 Red Hat Inc.
+ *
+ * This file is part of systemtap, and is free software.  You can
+ * redistribute it and/or modify it under the terms of the GNU General
+ * Public License (GPL); either version 2, or (at your option) any
+ * later version.
+ */
+
+#ifndef _STP_DEBUG_H_
+#define _STP_DEBUG_H_
+
+/* These are always on.
+ * _dbug() writes to systemtap stderr.
+ * errk() writes to the system log.
+ */
+#define _dbug(args...) _stp_dbug(__FUNCTION__, __LINE__, args)
+
+#define errk(args...) do {						\
+		printk("Systemtap Error at %s:%d ",__FUNCTION__, __LINE__); \
+		printk(args);						\
+	} while (0)
+
+#ifdef DEBUG_TRANSPORT
+#undef DEBUG_TRANSPORT
+#define DEBUG_TRANSPORT 1
+#else
+#define DEBUG_TRANSPORT 0
+#endif
+
+#ifdef DEBUG_UNWIND
+#undef DEBUG_UNWIND
+#define DEBUG_UNWIND 2
+#else
+#define DEBUG_UNWIND 0
+#endif
+
+#ifdef DEBUG_SYMBOLS
+#undef DEBUG_SYMBOLS
+#define DEBUG_SYMBOLS 4
+#else
+#define DEBUG_SYMBOLS 0
+#endif
+
+#define DEBUG_TYPE (DEBUG_TRANSPORT|DEBUG_UNWIND|DEBUG_SYMBOLS)
+
+#if DEBUG_TYPE > 0
+
+#define dbug(type, args...) do {					\
+		if ((type) & DEBUG_TYPE)				\
+			_stp_dbug(__FUNCTION__, __LINE__, args);	\
+	} while (0)
+
+#define kbug(type, args...) do {					\
+		if ((type) & DEBUG_TYPE) {				\
+			printk("%s:%d ",__FUNCTION__, __LINE__);	\
+			printk(args);					\
+		}							\
+	} while (0)
+
+#else
+#define dbug(type, args...) ;
+#define kbug(type, args...) ;
+#endif /* DEBUG_TYPE > 0 */
+
+#endif /* _STP_DEBUG_H_ */
diff --git a/runtime/map-gen.c b/runtime/map-gen.c
index a17f7e34..ce6e8742 100644
--- a/runtime/map-gen.c
+++ b/runtime/map-gen.c
@@ -229,7 +229,6 @@ static key_data KEYSYM(map_get_key) (struct map_node *mn, int n, int *type)
 	key_data ptr;
 	struct KEYSYM(map_node) *m = (struct KEYSYM(map_node) *)mn;	
 
-	dbug ("n = %d type=%lx\n", n, type);
 	if (n > KEY_ARITY || n < 1) {
 		if (type)
 			*type = END;
@@ -359,7 +358,6 @@ MAP KEYSYM(_stp_map_new) (unsigned max_entries, int htype, ...)
 		start = va_arg(ap, int);
 		stop = va_arg(ap, int);
 		interval = va_arg(ap, int);
-		// dbug ("start=%d stop=%d interval=%d\n", start, stop, interval);
 		va_end (ap);
 	}
 
@@ -404,7 +402,6 @@ int KEYSYM(__stp_map_set) (MAP map, ALLKEYSD(key), VSTYPE val, int add)
 
 	hlist_for_each(e, head) {
 		n = (struct KEYSYM(map_node) *)((long)e - sizeof(struct list_head));
-		//dbug ("n=%lx  key1=%ld n->key1=%ld\n", (long)n, key1, n->key1);
 		if (KEY1_EQ_P(n->key1, key1)
 #if KEY_ARITY > 1
 		    && KEY2_EQ_P(n->key2, key2)
@@ -423,8 +420,6 @@ int KEYSYM(__stp_map_set) (MAP map, ALLKEYSD(key), VSTYPE val, int add)
 		}
 	}
 	/* key not found */
-	dbug("key not found\n");
-
 	n = (struct KEYSYM(map_node)*)_new_map_create (map, head);
 	if (n == NULL)
 		return -1;
diff --git a/runtime/map.c b/runtime/map.c
index 70990876..513e27df 100644
--- a/runtime/map.c
+++ b/runtime/map.c
@@ -138,7 +138,6 @@ int64_t _stp_key_get_int64 (struct map_node *mn, int n)
 
 	if (mn) {
 		res = (*mn->map->get_key)(mn, n, &type).val;
-		dbug("type=%d\n", type);
 		if (type != INT64)
 			res = 0;
 	}
@@ -159,7 +158,6 @@ char *_stp_key_get_str (struct map_node *mn, int n)
 
 	if (mn) {
 		str = (*mn->map->get_key)(mn, n, &type).strp;
-		dbug("type=%d\n", type);
 		if (type != STRING)
 			str = "bad type";
 	}
@@ -716,7 +714,6 @@ void _stp_map_printn (MAP map, int n, const char *fmt)
 	struct map_node *ptr;
 	int type, num;
 	key_data kd;
-	dbug ("print map %lx fmt=%s\n", (long)map, fmt);
 
 	if (n < 0)
 		return;
@@ -763,7 +760,6 @@ static struct map_node *_stp_new_agg(MAP agg, struct hlist_head *ahead, struct m
 {
 	struct map_node *aptr;
 	/* copy keys and aggregate */
-	dbug("creating new entry in %lx\n", (long)agg);
 	aptr = _new_map_create(agg, ahead);
 	if (aptr == NULL)
 		return NULL;
@@ -952,12 +948,10 @@ static struct map_node *_new_map_create (MAP map, struct hlist_head *head)
 			return NULL;
 		}
 		m = (struct map_node *)map->head.next;
-		dbug ("got %lx off head\n", (long)m);
 		hlist_del_init(&m->hnode);
 	} else {
 		m = (struct map_node *)map->pool.next;
 		map->num++;
-		dbug ("got %lx off pool\n", (long)m);
 	}
 	list_move_tail(&m->lnode, &map->head);
 	
diff --git a/runtime/mempool.c b/runtime/mempool.c
new file mode 100644
index 00000000..0fbb4326
--- /dev/null
+++ b/runtime/mempool.c
@@ -0,0 +1,135 @@
+/*  -*- linux-c -*-
+ * Preallocated memory pools
+ * Copyright (C) 2008 Red Hat Inc.
+ *
+ * This file is part of systemtap, and is free software.  You can
+ * redistribute it and/or modify it under the terms of the GNU General
+ * Public License (GPL); either version 2, or (at your option) any
+ * later version.
+ */
+
+#ifndef _STP_MEMPOOL_C_
+#define _STP_MEMPOOL_C_
+
+/* An opaque struct identifying the memory pool. */
+typedef struct {
+	struct list_head free_list;
+	unsigned num;
+	unsigned size;
+	spinlock_t lock;
+} _stp_mempool_t;
+
+/* for internal use only */
+struct _stp_mem_buffer {
+	struct list_head list;
+	_stp_mempool_t *pool;
+	void *buf;
+};
+
+/* Delete a memory pool */
+static void _stp_mempool_destroy(_stp_mempool_t *pool)
+{
+	struct list_head *p, *tmp;
+	if (pool) {
+		list_for_each_safe(p, tmp, &pool->free_list) {
+			list_del(p);
+			_stp_kfree(p);
+		}
+		_stp_kfree(pool);
+	}
+}
+
+/* Create a new memory pool */
+static _stp_mempool_t *_stp_mempool_init(size_t size, size_t num)
+{
+	int i, alloc_size;
+	struct _stp_mem_buffer *m;
+
+	_stp_mempool_t *pool = (_stp_mempool_t *)_stp_kmalloc(sizeof(_stp_mempool_t));
+	if (unlikely(pool == NULL)) {
+		errk("Memory allocation failed.\n");
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&pool->free_list);
+	spin_lock_init(&pool->lock);
+
+	alloc_size = size + sizeof(struct _stp_mem_buffer) - sizeof(void *);
+
+	for (i = 0; i < num; i++) {
+		m = (struct _stp_mem_buffer *)_stp_kmalloc(alloc_size);
+		if (unlikely(m == NULL))
+			goto err;
+		m->pool = pool;
+		list_add((struct list_head *)m, &pool->free_list);
+	}
+	pool->num = num;
+	pool->size = alloc_size;
+	return pool;
+
+err:
+	_stp_mempool_destroy(pool);
+	return NULL;
+}
+
+/* Resize a memory pool */
+static int _stp_mempool_resize(_stp_mempool_t *pool, size_t num)
+{
+	int i;
+	unsigned long flags;
+	struct _stp_mem_buffer *m;
+
+	if (unlikely(num == 0 || num == pool->num))
+		return pool->num;
+
+	if (num > pool->num) {
+		for (i = 0; i < num - pool->num; i++) {
+			m = (struct _stp_mem_buffer *)_stp_kmalloc(pool->size);
+			if (unlikely(m == NULL))
+				goto done;
+			m->pool = pool;
+			pool->num++;
+			spin_lock_irqsave(&pool->lock, flags);
+			list_add((struct list_head *)m, &pool->free_list);
+			spin_unlock_irqrestore(&pool->lock, flags);
+		}
+	} else {
+		for (i = 0; i < pool->num - num; i++) {
+			spin_lock_irqsave(&pool->lock, flags);
+			m = (struct _stp_mem_buffer *)pool->free_list.next;
+			list_del(&m->list);
+			spin_unlock_irqrestore(&pool->lock, flags);
+			_stp_kfree(m);
+		}
+		pool->num = num;
+	}
+done:
+	return num;
+}
+
+/* allocate a buffer from a memory pool */
+static void *_stp_mempool_alloc(_stp_mempool_t *pool)
+{
+	unsigned long flags;
+	struct _stp_mem_buffer *ptr = NULL;
+	spin_lock_irqsave(&pool->lock, flags);
+	if (likely(!list_empty(&pool->free_list))) {
+		ptr = (struct _stp_mem_buffer *)pool->free_list.next;
+		list_del_init(&ptr->list);
+		spin_unlock_irqrestore(&pool->lock, flags);
+		return &ptr->buf;
+	}
+	spin_unlock_irqrestore(&pool->lock, flags);
+	return NULL;
+}
+
+/* return a buffer to its memory pool */
+static void _stp_mempool_free(void *buf)
+{
+	unsigned long flags;
+	struct _stp_mem_buffer *m = container_of(buf, struct _stp_mem_buffer, buf);
+	spin_lock_irqsave(&m->pool->lock, flags);
+	list_add(&m->list, &m->pool->free_list);
+	spin_unlock_irqrestore(&m->pool->lock, flags);
+}
+#endif /* _STP_MEMPOOL_C_ */
diff --git a/runtime/pmap-gen.c b/runtime/pmap-gen.c
index 0efffdb6..8666549b 100644
--- a/runtime/pmap-gen.c
+++ b/runtime/pmap-gen.c
@@ -437,7 +437,6 @@ PMAP KEYSYM(_stp_pmap_new) (unsigned max_entries, int htype, ...)
 		start = va_arg(ap, int);
 		stop = va_arg(ap, int);
 		interval = va_arg(ap, int);
-		// dbug ("start=%d stop=%d interval=%d\n", start, stop, interval);
 		va_end (ap);
 	}
 
@@ -515,8 +514,6 @@ int KEYSYM(__stp_pmap_set) (MAP map, ALLKEYSD(key), VSTYPE val, int add)
 	}
 
 	/* key not found */
-	dbug("key not found\n");
-
 	n = (struct KEYSYM(pmap_node)*)_new_map_create (map, head);
 	if (n == NULL)
 		return -1;
@@ -678,7 +675,6 @@ VALTYPE KEYSYM(_stp_pmap_get) (PMAP pmap, ALLKEYSD(key))
 #endif
 				) {
 				if (anode == NULL) {
-					// dbug("agg=%lx ahead=%lx\n", (long)agg, (long)ahead);
 					anode = _stp_new_agg(agg, ahead, (struct map_node *)n);
 				} else {
 					if (clear_agg) {
@@ -738,7 +734,6 @@ int KEYSYM(__stp_pmap_del) (MAP map, ALLKEYSD(key))
 	}
 
 	/* key not found */
-	dbug("key not found\n");
 	return 0;
 }
 
diff --git a/runtime/runtime.h b/runtime/runtime.h
index d951833d..318d3038 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -1,5 +1,5 @@
 /* main header file
- * Copyright (C) 2005-2007 Red Hat Inc.
+ * Copyright (C) 2005-2008 Red Hat Inc.
  * Copyright (C) 2005, 2006 Intel Corporation.
  *
  * This file is part of systemtap, and is free software.  You can
@@ -46,21 +46,7 @@
 static void _stp_dbug (const char *func, int line, const char *fmt, ...);
 void _stp_error (const char *fmt, ...);
 
-#ifdef DEBUG
-/** Prints debug line.
- * This function prints a debug message immediately to staprun. 
- * If the last character is not a newline, then one is added. 
- * @param args A variable number of args in a format like printf.
- * @ingroup io
- */
-#define dbug(args...) _stp_dbug(__FUNCTION__, __LINE__, args)
-#define kbug(args...) {printk("%s:%d ",__FUNCTION__, __LINE__); printk(args); }
-#else
-#define dbug(args...) ;
-#define kbug(args...) ;
-#endif /* DEBUG */
-#define _dbug(args...) _stp_dbug(__FUNCTION__, __LINE__, args)
-#define errk(args...) {printk("Systemtap Error at %s:%d ",__FUNCTION__, __LINE__); printk(args); }
+#include "debug.h"
 
 /* atomic globals */
 static atomic_t _stp_transport_failures = ATOMIC_INIT (0);
diff --git a/runtime/staprun/symbols.c b/runtime/staprun/symbols.c
index e33ee624..c7362d9e 100644
--- a/runtime/staprun/symbols.c
+++ b/runtime/staprun/symbols.c
@@ -19,9 +19,10 @@ static int send_data(int32_t type, void *data, int len)
 	return write(control_channel, data, len);
 }
 
+
 /* Get the sections for a module. Put them in the supplied buffer */
 /* in the following order: */
-/* [struct _stp_msg_module][struct _stp_symbol sections ...][string data]*/
+/* [struct _stp_msg_module][struct _stp_symbol sections ...][string data][unwind data] */
 /* Return the total length of all the data. */
 
 #define SECDIR "/sys/module/%s/sections"
@@ -31,8 +32,9 @@ static int get_sections(char *name, char *data_start, int datalen)
 	char filename[STP_MODULE_NAME_LEN + 256]; 
 	char buf[32], strdata_start[32768];
 	char *strdata=strdata_start, *data=data_start;
-	int fd, len, res;
+	int fd, len, res, unwind_data_len=0;
 	struct _stp_msg_module *mod = (struct _stp_msg_module *)data_start;
+
 	struct dirent *d;
 	DIR *secdir;
 	void *sec;
@@ -63,6 +65,9 @@ static int get_sections(char *name, char *data_start, int datalen)
 		return -1;
 	}
 
+	/* FIXME: optionally fill in unwind data here */
+	mod->unwind_len = unwind_data_len;
+
 	while ((d = readdir(secdir))) {
 		char *secname = d->d_name;
 
@@ -138,6 +143,14 @@ static int get_sections(char *name, char *data_start, int datalen)
 	while (len--)
 		*data++ = *strdata++;
 	
+#if 0
+	if (unwind_data_len) {
+		if ((unwind_data_len + data - data_start) > datalen)
+			goto err0;
+		memcpy(data, unwind_data, unwind_data_len);
+		data += unwind_data_len;
+	}
+#endif
 	return data - data_start;
 
 err1:
@@ -211,7 +224,7 @@ int do_kernel_symbols(void)
 	int ret, num_syms, i = 0, struct_symbol_size;
 	int max_syms= MAX_SYMBOLS, data_basesize = MAX_SYMBOLS*32;
 
-	if (kernel_ptr_size == 8) 
+	if (kernel_ptr_size == 8)
 		struct_symbol_size = sizeof(struct _stp_symbol64);
 	else
 		struct_symbol_size = sizeof(struct _stp_symbol32);
@@ -285,10 +298,12 @@ int do_kernel_symbols(void)
 	if (num_syms <= 0)
 		goto err;
 
+
 	/* send header */
 	struct _stp_msg_symbol_hdr smsh;
 	smsh.num_syms = num_syms;
 	smsh.sym_size = (uint32_t)(dataptr - data_base);
+	smsh.unwind_size = (uint32_t)0;
 	if (send_request(STP_SYMBOLS, &smsh, sizeof(smsh)) <= 0)
 		goto err;
 
diff --git a/runtime/sym.c b/runtime/sym.c
index 56c93064..3c2f859a 100644
--- a/runtime/sym.c
+++ b/runtime/sym.c
@@ -1,6 +1,6 @@
 /* -*- linux-c -*- 
  * Symbolic Lookup Functions
- * Copyright (C) 2005, 2006, 2007 Red Hat Inc.
+ * Copyright (C) 2005-2008 Red Hat Inc.
  * Copyright (C) 2006 Intel Corporation.
  *
  * This file is part of systemtap, and is free software.  You can
@@ -9,8 +9,8 @@
  * later version.
  */
 
-#ifndef _SYM_C_
-#define _SYM_C_
+#ifndef _STP_SYM_C_
+#define _STP_SYM_C_
 
 #include "string.c"
 
@@ -20,11 +20,12 @@
  * @{
  */
 
-unsigned long _stp_module_relocate (const char *module, const char *section, unsigned long offset) {
+unsigned long _stp_module_relocate(const char *module, const char *section, unsigned long offset)
+{
 	static struct _stp_module *last = NULL;
 	static struct _stp_symbol *last_sec;
 	unsigned long flags;
-	int i,j;
+	int i, j;
 
 	/* if module is -1, we invalidate last. _stp_del_module calls this when modules are deleted. */
 	if ((long)module == -1) {
@@ -32,53 +33,52 @@ unsigned long _stp_module_relocate (const char *module, const char *section, uns
 		return 0;
 	}
 
-	dbug("%s, %s, %lx\n", module, section, offset);
+	dbug(DEBUG_SYMBOLS, "%s, %s, %lx\n", module, section, offset);
 
-	STP_LOCK_MODULES;
-	if (! module
-            || !strcmp (section, "") /* absolute, unrelocated address */
-            || _stp_num_modules == 0) {
-		STP_UNLOCK_MODULES;
-		return offset; 
+	STP_RLOCK_MODULES;
+	if (!module || !strcmp(section, "")	/* absolute, unrelocated address */
+	    ||_stp_num_modules == 0) {
+		STP_RUNLOCK_MODULES;
+		return offset;
 	}
 
 	/* Most likely our relocation is in the same section of the same module as the last. */
 	if (last) {
-		if (!strcmp (module, last->name) && !strcmp (section, last_sec->symbol)) {
+		if (!strcmp(module, last->name) && !strcmp(section, last_sec->symbol)) {
 			offset += last_sec->addr;
-			STP_UNLOCK_MODULES;
-			dbug("offset = %lx\n", offset);
+			STP_RUNLOCK_MODULES;
+			dbug(DEBUG_SYMBOLS, "offset = %lx\n", offset);
 			return offset;
 		}
 	}
 
 	/* not cached. need to scan all modules */
-        if (! strcmp (module, "kernel")) {
-		STP_UNLOCK_MODULES;
+	if (!strcmp(module, "kernel")) {
+		STP_RUNLOCK_MODULES;
 
 		/* See also transport/symbols.c (_stp_do_symbols). */
-		if (strcmp (section, "_stext"))
+		if (strcmp(section, "_stext"))
 			return 0;
 		else
 			return offset + _stp_modules[0]->text;
 	} else {
 		/* relocatable module */
-		for (i = 1; i < _stp_num_modules; i++) { /* skip over [0]=kernel */
+		for (i = 1; i < _stp_num_modules; i++) {	/* skip over [0]=kernel */
 			last = _stp_modules[i];
 			if (strcmp(module, last->name))
 				continue;
 			for (j = 0; j < (int)last->num_sections; j++) {
 				last_sec = &last->sections[j];
-				if (!strcmp (section, last_sec->symbol)) {
+				if (!strcmp(section, last_sec->symbol)) {
 					offset += last_sec->addr;
-					STP_UNLOCK_MODULES;
-					dbug("offset = %lx\n", offset);
+					STP_RUNLOCK_MODULES;
+					dbug(DEBUG_SYMBOLS, "offset = %lx\n", offset);
 					return offset;
 				}
 			}
 		}
 	}
-	STP_UNLOCK_MODULES;
+	STP_RUNLOCK_MODULES;
 	last = NULL;
 	return 0;
 }
@@ -97,24 +97,15 @@ static unsigned long _stp_kallsyms_lookup_name(const char *name)
 	return 0;
 }
 
-static const char * _stp_kallsyms_lookup (
-	unsigned long addr,
-	unsigned long *symbolsize,
-	unsigned long *offset,
-	char **modname,
-	char *namebuf)
+static struct _stp_module *_stp_find_module_by_addr(unsigned long addr)
 {
-	struct _stp_module *m;
-	struct _stp_symbol *s;
-	unsigned long flags;
-	unsigned end, begin = 0;
+	unsigned begin = 0;
+	unsigned end = _stp_num_modules;
 
-	if (STP_TRYLOCK_MODULES)
+	if (unlikely(addr < _stp_modules_by_addr[0]->text))
 		return NULL;
 
-	end = _stp_num_modules;
-
-	if (_stp_num_modules >= 2 && addr > _stp_modules_by_addr[1]->text) {
+	if (_stp_num_modules > 1 && addr > _stp_modules_by_addr[0]->data) {
 		/* binary search on index [begin,end) */
 		do {
 			unsigned mid = (begin + end) / 2;
@@ -125,18 +116,51 @@ static const char * _stp_kallsyms_lookup (
 		} while (begin + 1 < end);
 		/* result index in $begin, guaranteed between [0,_stp_num_modules) */
 	}
-	m = _stp_modules_by_addr[begin];
-	begin = 0;
-	end = m->num_symbols;
+	/* check if addr is past the last module */
+	if (unlikely(begin == _stp_num_modules - 1
+		     && (addr > _stp_modules_by_addr[begin]->text + _stp_modules_by_addr[begin]->text_size)))
+		return NULL;
+
+	return _stp_modules_by_addr[begin];
+}
 
-	/* m->data is the lowest address of a data section. It should be  */
-	/* after the text section. */
-	/* If our address is in the data section, then return now. */
-	if (m->data > m->text && addr >= m->data) {
-		STP_UNLOCK_MODULES;
+static struct _stp_module *_stp_get_unwind_info(unsigned long addr)
+{
+	struct _stp_module *m;
+	struct _stp_symbol *s;
+	unsigned long flags;
+
+	STP_RLOCK_MODULES;
+	m = _stp_find_module_by_addr(addr);
+	if (unlikely(m == NULL)) {
+		STP_RUNLOCK_MODULES;
 		return NULL;
 	}
-	
+	/* Lock the module struct so it doesn't go away while being used. */
+	/* Probably could never happen, but lock it to be sure for now. */
+	read_lock(&m->lock);
+
+	STP_RUNLOCK_MODULES;
+	return m;
+}
+
+static const char *_stp_kallsyms_lookup(unsigned long addr,
+					unsigned long *symbolsize, unsigned long *offset, char **modname, char *namebuf)
+{
+	struct _stp_module *m;
+	struct _stp_symbol *s;
+	unsigned long flags;
+	unsigned end, begin = 0;
+
+	STP_RLOCK_MODULES;
+	m = _stp_find_module_by_addr(addr);
+	if (unlikely(m == NULL)) {
+		STP_RUNLOCK_MODULES;
+		return NULL;
+	}
+
+	end = m->num_symbols;
+
 	/* binary search for symbols within the module */
 	do {
 		unsigned mid = (begin + end) / 2;
@@ -148,31 +172,29 @@ static const char * _stp_kallsyms_lookup (
 	/* result index in $begin */
 
 	s = &m->symbols[begin];
-	if (addr < s->addr) {
-		STP_UNLOCK_MODULES;
-		return NULL;
-	} else {
-		if (offset) *offset = addr - s->addr;
-		if (modname) *modname = m->name;
+	if (likely(addr >= s->addr)) {
+		if (offset)
+			*offset = addr - s->addr;
+		if (modname)
+			*modname = m->name;
 		if (symbolsize) {
 			if ((begin + 1) < m->num_symbols)
-				*symbolsize = m->symbols[begin+1].addr - s->addr;
+				*symbolsize = m->symbols[begin + 1].addr - s->addr;
 			else
 				*symbolsize = 0;
 			// NB: This is only a heuristic.  Sometimes there are large
 			// gaps between text areas of modules.
 		}
 		if (namebuf) {
-			strlcpy (namebuf, s->symbol, KSYM_NAME_LEN+1);
-			STP_UNLOCK_MODULES;
+			strlcpy(namebuf, s->symbol, KSYM_NAME_LEN + 1);
+			STP_RUNLOCK_MODULES;
 			return namebuf;
-		}
-		else {
-			STP_UNLOCK_MODULES;
+		} else {
+			STP_RUNLOCK_MODULES;
 			return s->symbol;
 		}
 	}
-	STP_UNLOCK_MODULES;
+	STP_RUNLOCK_MODULES;
 	return NULL;
 }
 
@@ -182,31 +204,31 @@ static const char * _stp_kallsyms_lookup (
  * a probe because it is too time-consuming. Use at module exit time.
  */
 
-void _stp_symbol_print (unsigned long address)
-{ 
+void _stp_symbol_print(unsigned long address)
+{
 	char *modname;
-        const char *name;
-        unsigned long offset, size;
+	const char *name;
+	unsigned long offset, size;
 
-        name = _stp_kallsyms_lookup(address, &size, &offset, &modname, NULL);
+	name = _stp_kallsyms_lookup(address, &size, &offset, &modname, NULL);
 
-	_stp_printf ("%p", (int64_t)address);
+	_stp_printf("%p", (int64_t) address);
 
-	if (name) {		
+	if (name) {
 		if (modname && *modname)
-			_stp_printf (" : %s+%#lx/%#lx [%s]", name, offset, size, modname);
+			_stp_printf(" : %s+%#lx/%#lx [%s]", name, offset, size, modname);
 		else
-			_stp_printf (" : %s+%#lx/%#lx", name, offset, size);
+			_stp_printf(" : %s+%#lx/%#lx", name, offset, size);
 	}
 }
 
 /* Like _stp_symbol_print, except only print if the address is a valid function address */
 
-void _stp_func_print (unsigned long address, int verbose, int exact)
-{ 
+void _stp_func_print(unsigned long address, int verbose, int exact)
+{
 	char *modname;
-        const char *name;
-        unsigned long offset, size;
+	const char *name;
+	unsigned long offset, size;
 	char *exstr;
 
 	if (exact)
@@ -214,33 +236,32 @@ void _stp_func_print (unsigned long address, int verbose, int exact)
 	else
 		exstr = " (inexact)";
 
-        name = _stp_kallsyms_lookup(address, &size, &offset, &modname, NULL);
+	name = _stp_kallsyms_lookup(address, &size, &offset, &modname, NULL);
 
 	if (name) {
 		if (verbose) {
 			if (modname && *modname)
-				_stp_printf (" %p : %s+%#lx/%#lx [%s]%s\n", 
-					     (int64_t)address, name, offset, size, modname, exstr);
+				_stp_printf(" %p : %s+%#lx/%#lx [%s]%s\n",
+					    (int64_t) address, name, offset, size, modname, exstr);
 			else
-				_stp_printf (" %p : %s+%#lx/%#lx%s\n", 
-					     (int64_t)address, name, offset, size, exstr);
-		} else 
-			_stp_printf ("%p ", (int64_t)address);
+				_stp_printf(" %p : %s+%#lx/%#lx%s\n", (int64_t) address, name, offset, size, exstr);
+		} else
+			_stp_printf("%p ", (int64_t) address);
 	}
 }
 
-void _stp_symbol_snprint (char *str, size_t len, unsigned long address)
-{ 
-    char *modname;
-    const char *name;
-    unsigned long offset, size;
-
-    name = _stp_kallsyms_lookup(address, &size, &offset, &modname, NULL);
-    if (name)
-	    strlcpy(str, name, len);
-    else
-	    _stp_snprintf(str, len, "%p", (int64_t)address);
+void _stp_symbol_snprint(char *str, size_t len, unsigned long address)
+{
+	char *modname;
+	const char *name;
+	unsigned long offset, size;
+
+	name = _stp_kallsyms_lookup(address, &size, &offset, &modname, NULL);
+	if (name)
+		strlcpy(str, name, len);
+	else
+		_stp_snprintf(str, len, "%p", (int64_t) address);
 }
 
 /** @} */
-#endif /* _SYM_C_ */
+#endif /* _STP_SYM_C_ */
diff --git a/runtime/sym.h b/runtime/sym.h
index 6a55a22e..b124882a 100644
--- a/runtime/sym.h
+++ b/runtime/sym.h
@@ -1,5 +1,5 @@
-/*
- * Copyright (C) 2005, 2006 Red Hat Inc.
+/* -*- linux-c -*- 
+ * Copyright (C) 2005-2008 Red Hat Inc.
  *
  * This file is part of systemtap, and is free software.  You can
  * redistribute it and/or modify it under the terms of the GNU General
@@ -13,39 +13,59 @@
 #define STP_MODULE_NAME_LEN 64
 
 struct _stp_symbol {
-  unsigned long addr;
-  const char *symbol;
+	unsigned long addr;
+	const char *symbol;
+};
+struct stap_symbol {
+	unsigned long addr;
+	const char *symbol;
+	const char *module;
 };
 
-struct _stp_module {
-  /* the module name, or "" for kernel */
-  char name[STP_MODULE_NAME_LEN];
-
-  /* A pointer to the struct module. Note that we cannot */
-  /* trust this because as of 2.6.19, there are not yet */
-  /* any notifier hooks that will tell us when a module */
-  /* is unloading. */
-  unsigned long module;
-
-  /* the start of the module's text and data sections */
-  unsigned long text;
-  unsigned long data;
-
-  /* how many symbols this module has that we are interested in */
-  uint32_t num_symbols;
-
-  /* how many sections this module has */
-  uint32_t num_sections;
-  struct _stp_symbol *sections;
-
-  /* how the symbol_data below was allocated */
-  int32_t allocated;  /* 0 = kmalloc, 1 = vmalloc */
-  
-  /* an array of num_symbols _stp_symbol structs */
-  struct _stp_symbol *symbols; /* ordered by address */
+DEFINE_RWLOCK(_stp_module_lock);
+#define STP_RLOCK_MODULES  read_lock_irqsave(&_stp_module_lock, flags)
+#define STP_WLOCK_MODULES  write_lock_irqsave(&_stp_module_lock, flags)
+#define STP_RUNLOCK_MODULES read_unlock_irqrestore(&_stp_module_lock, flags)
+#define STP_WUNLOCK_MODULES write_unlock_irqrestore(&_stp_module_lock, flags)
 
-  /* where we stash our copy of the strtab */
-  void *symbol_data; /* private */
+struct _stp_module {
+	/* the module name, or "" for kernel */
+	char name[STP_MODULE_NAME_LEN];
+	
+	/* A pointer to the struct module. Note that we cannot */
+	/* trust this because as of 2.6.19, there are not yet */
+	/* any notifier hooks that will tell us when a module */
+	/* is unloading. */
+	unsigned long module;
+	
+	/* the start of the module's text and data sections */
+	unsigned long text;
+	unsigned long data;
+	
+	uint32_t text_size;
+	
+	/* how many symbols this module has that we are interested in */
+	uint32_t num_symbols;
+	
+	/* how many sections this module has */
+	uint32_t num_sections;
+	
+	/* how the symbol_data below was allocated */
+	int32_t allocated;  /* 0 = kmalloc, 1 = vmalloc */
+	
+	struct _stp_symbol *sections;
+	
+	/* an array of num_symbols _stp_symbol structs */
+	struct _stp_symbol *symbols; /* ordered by address */
+	
+	/* where we stash our copy of the strtab */
+	void *symbol_data;
+	
+	/* the stack unwind data for this module */
+	void *unwind_data;
+	uint32_t unwind_data_len;
+	rwlock_t lock; /* lock while unwinding is happening */
+	
 };
 
 #ifndef STP_MAX_MODULES
@@ -62,4 +82,5 @@ struct _stp_module *_stp_modules_by_addr[STP_MAX_MODULES];
 int _stp_num_modules = 0;
 
 unsigned long _stp_module_relocate (const char *module, const char *section, unsigned long offset);
+static struct _stp_module *_stp_get_unwind_info (unsigned long addr);
 #endif /* _STAP_SYMBOLS_H_ */
diff --git a/runtime/transport/ChangeLog b/runtime/transport/ChangeLog
index 764e3579..c3837f86 100644
--- a/runtime/transport/ChangeLog
+++ b/runtime/transport/ChangeLog
@@ -1,3 +1,16 @@
+2008-02-27  Martin Hunt  <hunt@redhat.com>
+
+	* symbols.c: Use rwlocks. Use new dbug macros. Handle
+	unwind info if present.
+
+	* transport.c: Include mempool.c. Update dbug and kbug calls
+	to new macros.
+	* transport_msgs.h (_stp_command_name): Add
+	struct containing message names for debugging.
+
+	* control.c, procfs.c: Use new dbug macros. Use
+	new mempool functions.
+
 2008-01-28  Martin Hunt  <hunt@redhat.com>
 
 	* control.c, procfs.c, symbols.c: Use DEFINE_SPINLOCK
diff --git a/runtime/transport/control.c b/runtime/transport/control.c
index 0bf99fc8..6a5b272d 100644
--- a/runtime/transport/control.c
+++ b/runtime/transport/control.c
@@ -12,15 +12,13 @@
 #define STP_DEFAULT_BUFFERS 50
 static int _stp_current_buffers = STP_DEFAULT_BUFFERS;
 
+static _stp_mempool_t *_stp_pool_q;
 static struct list_head _stp_ctl_ready_q;
 static struct list_head _stp_sym_ready_q;
-static struct list_head _stp_pool_q;
-DEFINE_SPINLOCK(_stp_pool_lock);
 DEFINE_SPINLOCK(_stp_ctl_ready_lock);
 DEFINE_SPINLOCK(_stp_sym_ready_lock);
 
-static ssize_t _stp_sym_write_cmd (struct file *file, const char __user *buf,
-				    size_t count, loff_t *ppos)
+static ssize_t _stp_sym_write_cmd(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
 {
 	static int saved_type = 0;
 	int type;
@@ -28,7 +26,7 @@ static ssize_t _stp_sym_write_cmd (struct file *file, const char __user *buf,
 	if (count < sizeof(int32_t))
 		return 0;
 
-	/* Allow sending of packet type followed by data in the next packet.*/
+	/* Allow sending of packet type followed by data in the next packet. */
 	if (count == sizeof(int32_t)) {
 		if (get_user(saved_type, (int __user *)buf))
 			return -EFAULT;
@@ -42,11 +40,14 @@ static ssize_t _stp_sym_write_cmd (struct file *file, const char __user *buf,
 		count -= sizeof(int);
 		buf += sizeof(int);
 	}
-	
-	kbug ("count:%d type:%d\n", (int)count, type);
+
+#if DEBUG_TRANSPORT > 0
+	if (type < STP_MAX_CMD)
+		_dbug("Got %s. len=%d\n", _stp_command_name[type], (int)count);
+#endif
 
 	switch (type) {
-	case STP_SYMBOLS:		
+	case STP_SYMBOLS:
 		count = _stp_do_symbols(buf, count);
 		break;
 	case STP_MODULE:
@@ -54,21 +55,20 @@ static ssize_t _stp_sym_write_cmd (struct file *file, const char __user *buf,
 			count = _stp_do_module(buf, count);
 		else {
 			/* count == 1 indicates end of initial modules list */
-			_stp_ctl_send(STP_TRANSPORT, NULL, 0);			
+			_stp_ctl_send(STP_TRANSPORT, NULL, 0);
 		}
 		break;
 	case STP_EXIT:
 		_stp_exit_flag = 1;
 		break;
 	default:
-		errk ("invalid symbol command type %d\n", type);
+		errk("invalid symbol command type %d\n", type);
 		return -EINVAL;
 	}
 
 	return count;
 }
-static ssize_t _stp_ctl_write_cmd (struct file *file, const char __user *buf,
-				    size_t count, loff_t *ppos)
+static ssize_t _stp_ctl_write_cmd(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
 {
 	int type;
 	static int started = 0;
@@ -79,7 +79,10 @@ static ssize_t _stp_ctl_write_cmd (struct file *file, const char __user *buf,
 	if (get_user(type, (int __user *)buf))
 		return -EFAULT;
 
-	kbug ("count:%d type:%d\n", (int)count, type);
+#if DEBUG_TRANSPORT > 0
+	if (type < STP_MAX_CMD)
+		_dbug("Got %s. len=%d\n", _stp_command_name[type], (int)count);
+#endif
 
 	count -= sizeof(int);
 	buf += sizeof(int);
@@ -90,9 +93,9 @@ static ssize_t _stp_ctl_write_cmd (struct file *file, const char __user *buf,
 			struct _stp_msg_start st;
 			if (count < sizeof(st))
 				return 0;
-			if (copy_from_user (&st, buf, sizeof(st)))
+			if (copy_from_user(&st, buf, sizeof(st)))
 				return -EFAULT;
-			_stp_handle_start (&st);
+			_stp_handle_start(&st);
 			started = 1;
 		}
 		break;
@@ -107,11 +110,11 @@ static ssize_t _stp_ctl_write_cmd (struct file *file, const char __user *buf,
 #endif
 	case STP_READY:
 		/* request symbolic information */
-		_stp_ask_for_symbols();		
+		_stp_ask_for_symbols();
 		break;
-		
+
 	default:
-		errk ("invalid command type %d\n", type);
+		errk("invalid command type %d\n", type);
 		return -EINVAL;
 	}
 
@@ -130,55 +133,55 @@ struct _stp_buffer {
 static DECLARE_WAIT_QUEUE_HEAD(_stp_ctl_wq);
 static DECLARE_WAIT_QUEUE_HEAD(_stp_sym_wq);
 
-#ifdef DEBUG
-static void _stp_ctl_write_dbug (int type, void *data, int len)
+#if DEBUG_TRANSPORT > 0
+static void _stp_ctl_write_dbug(int type, void *data, int len)
 {
 	char buf[64];
 	switch (type) {
 	case STP_START:
-		printk("_stp_ctl_write: sending STP_START\n");
+		_dbug("sending STP_START\n");
 		break;
 	case STP_EXIT:
-		printk("_stp_ctl_write: sending STP_EXIT\n");
+		_dbug("sending STP_EXIT\n");
 		break;
 	case STP_OOB_DATA:
-		snprintf(buf, sizeof(buf), "%s", (char *)data); 
-		printk("_stp_ctl_write: sending %d bytes of STP_OOB_DATA: %s\n", len, buf);
+		snprintf(buf, sizeof(buf), "%s", (char *)data);
+		_dbug("sending %d bytes of STP_OOB_DATA: %s\n", len, buf);
 		break;
 	case STP_SYSTEM:
-		snprintf(buf, sizeof(buf), "%s", (char *)data); 
-		printk("_stp_ctl_write: sending STP_SYSTEM: %s\n", buf);
+		snprintf(buf, sizeof(buf), "%s", (char *)data);
+		_dbug("sending STP_SYSTEM: %s\n", buf);
 		break;
 	case STP_TRANSPORT:
-		printk("_stp_ctl_write: sending STP_TRANSPORT\n");
+		_dbug("sending STP_TRANSPORT\n");
 		break;
 	default:
-		printk("_stp_ctl_write: ERROR: unknown message type: %d\n", type);
+		_dbug("ERROR: unknown message type: %d\n", type);
 		break;
 	}
 }
-static void _stp_sym_write_dbug (int type, void *data, int len)
+static void _stp_sym_write_dbug(int type, void *data, int len)
 {
 	switch (type) {
 	case STP_SYMBOLS:
-		printk("_stp_sym_write: sending STP_SYMBOLS\n");
+		_dbug("sending STP_SYMBOLS\n");
 		break;
 	case STP_MODULE:
-		printk("_stp_sym_write: sending STP_MODULE\n");
+		_dbug("sending STP_MODULE\n");
 		break;
 	default:
-		printk("_stp_sym_write: ERROR: unknown message type: %d\n", type);
+		_dbug("ERROR: unknown message type: %d\n", type);
 		break;
 	}
 }
 #endif
 
-static int _stp_ctl_write (int type, void *data, unsigned len)
+static int _stp_ctl_write(int type, void *data, unsigned len)
 {
 	struct _stp_buffer *bptr;
 	unsigned long flags;
-	unsigned numtrylock;
-#ifdef DEBUG
+
+#if DEBUG_TRANSPORT > 0
 	_stp_ctl_write_dbug(type, data, len);
 #endif
 
@@ -186,47 +189,29 @@ static int _stp_ctl_write (int type, void *data, unsigned len)
 	if (unlikely(len > STP_CTL_BUFFER_SIZE))
 		return 0;
 
-	numtrylock = 0;
-	while (!spin_trylock_irqsave (&_stp_pool_lock, flags) && (++numtrylock < MAXTRYLOCK)) 
-		ndelay (TRYLOCKDELAY);
-	if (unlikely (numtrylock >= MAXTRYLOCK))
-		return 0;
-
-	if (unlikely(list_empty(&_stp_pool_q))) {
-		spin_unlock_irqrestore(&_stp_pool_lock, flags); 
-		dbug("_stp_pool_q empty\n");
+	/* get a buffer from the free pool */
+	bptr = _stp_mempool_alloc(_stp_pool_q);
+	if (unlikely(bptr == NULL))
 		return -1;
-	}
-
-	/* get the next buffer from the pool */
-	bptr = (struct _stp_buffer *)_stp_pool_q.next;
-	list_del_init(&bptr->list);
-	spin_unlock_irqrestore(&_stp_pool_lock, flags);
 
 	bptr->type = type;
 	memcpy(bptr->buf, data, len);
 	bptr->len = len;
-	
-	/* put it on the pool of ready buffers */
-	numtrylock = 0;
-	while (!spin_trylock_irqsave (&_stp_ctl_ready_lock, flags) && (++numtrylock < MAXTRYLOCK)) 
-		ndelay (TRYLOCKDELAY);
-
-	if (unlikely (numtrylock >= MAXTRYLOCK)) 
-		return 0;
 
+	/* put it on the pool of ready buffers */
+	spin_lock_irqsave(&_stp_ctl_ready_lock, flags);
 	list_add_tail(&bptr->list, &_stp_ctl_ready_q);
 	spin_unlock_irqrestore(&_stp_ctl_ready_lock, flags);
 
 	return len;
 }
 
-static int _stp_sym_write (int type, void *data, unsigned len)
+static int _stp_sym_write(int type, void *data, unsigned len)
 {
 	struct _stp_buffer *bptr;
 	unsigned long flags;
 
-#ifdef DEBUG
+#if DEBUG_TRANSPORT > 0
 	_stp_sym_write_dbug(type, data, len);
 #endif
 
@@ -234,24 +219,17 @@ static int _stp_sym_write (int type, void *data, unsigned len)
 	if (unlikely(len > STP_CTL_BUFFER_SIZE))
 		return 0;
 
-	spin_lock_irqsave (&_stp_pool_lock, flags);
-	if (unlikely(list_empty(&_stp_pool_q))) {
-		spin_unlock_irqrestore(&_stp_pool_lock, flags); 
-		dbug("_stp_pool_q empty\n");
+	/* get a buffer from the free pool */
+	bptr = _stp_mempool_alloc(_stp_pool_q);
+	if (unlikely(bptr == NULL))
 		return -1;
-	}
-
-	/* get the next buffer from the pool */
-	bptr = (struct _stp_buffer *)_stp_pool_q.next;
-	list_del_init(&bptr->list);
-	spin_unlock_irqrestore(&_stp_pool_lock, flags);
 
 	bptr->type = type;
 	memcpy(bptr->buf, data, len);
 	bptr->len = len;
-	
+
 	/* put it on the pool of ready buffers */
-	spin_lock_irqsave (&_stp_sym_ready_lock, flags);
+	spin_lock_irqsave(&_stp_sym_ready_lock, flags);
 	list_add_tail(&bptr->list, &_stp_sym_ready_q);
 	spin_unlock_irqrestore(&_stp_sym_ready_lock, flags);
 
@@ -262,25 +240,24 @@ static int _stp_sym_write (int type, void *data, unsigned len)
 }
 
 /* send commands with timeout and retry */
-static int _stp_ctl_send (int type, void *data, int len)
+static int _stp_ctl_send(int type, void *data, int len)
 {
 	int err, trylimit = 50;
-	kbug("ctl_send: type=%d len=%d\n", type, len);
+	kbug(DEBUG_TRANSPORT, "ctl_send: type=%d len=%d\n", type, len);
 	if (unlikely(type == STP_SYMBOLS || type == STP_MODULE)) {
 		while ((err = _stp_sym_write(type, data, len)) < 0 && trylimit--)
-			msleep (5);
+			msleep(5);
 	} else {
 		while ((err = _stp_ctl_write(type, data, len)) < 0 && trylimit--)
-			msleep (5);
+			msleep(5);
 		if (err > 0)
 			wake_up_interruptible(&_stp_ctl_wq);
 	}
-	kbug("returning %d\n", err);
+	kbug(DEBUG_TRANSPORT, "returning %d\n", err);
 	return err;
 }
 
-static ssize_t
-_stp_sym_read_cmd (struct file *file, char __user *buf, size_t count, loff_t *ppos)
+static ssize_t _stp_sym_read_cmd(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
 	struct _stp_buffer *bptr;
 	int len;
@@ -296,7 +273,7 @@ _stp_sym_read_cmd (struct file *file, char __user *buf, size_t count, loff_t *pp
 			return -ERESTARTSYS;
 		spin_lock_irqsave(&_stp_sym_ready_lock, flags);
 	}
-  
+
 	/* get the next buffer off the ready list */
 	bptr = (struct _stp_buffer *)_stp_sym_ready_q.next;
 	list_del_init(&bptr->list);
@@ -314,15 +291,12 @@ _stp_sym_read_cmd (struct file *file, char __user *buf, size_t count, loff_t *pp
 	}
 
 	/* put it on the pool of free buffers */
-	spin_lock_irqsave(&_stp_pool_lock, flags);
-	list_add_tail(&bptr->list, &_stp_pool_q);
-	spin_unlock_irqrestore(&_stp_pool_lock, flags);
+	_stp_mempool_free(bptr);
 
 	return len;
 }
 
-static ssize_t
-_stp_ctl_read_cmd (struct file *file, char __user *buf, size_t count, loff_t *ppos)
+static ssize_t _stp_ctl_read_cmd(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
 	struct _stp_buffer *bptr;
 	int len;
@@ -338,7 +312,7 @@ _stp_ctl_read_cmd (struct file *file, char __user *buf, size_t count, loff_t *pp
 			return -ERESTARTSYS;
 		spin_lock_irqsave(&_stp_ctl_ready_lock, flags);
 	}
-  
+
 	/* get the next buffer off the ready list */
 	bptr = (struct _stp_buffer *)_stp_ctl_ready_q.next;
 	list_del_init(&bptr->list);
@@ -356,15 +330,13 @@ _stp_ctl_read_cmd (struct file *file, char __user *buf, size_t count, loff_t *pp
 	}
 
 	/* put it on the pool of free buffers */
-	spin_lock_irqsave(&_stp_pool_lock, flags);
-	list_add_tail(&bptr->list, &_stp_pool_q);
-	spin_unlock_irqrestore(&_stp_pool_lock, flags);
+	_stp_mempool_free(bptr);
 
 	return len;
 }
 
 static int _stp_sym_opens = 0;
-static int _stp_sym_open_cmd (struct inode *inode, struct file *file)
+static int _stp_sym_open_cmd(struct inode *inode, struct file *file)
 {
 	/* only allow one reader */
 	if (_stp_sym_opens)
@@ -374,14 +346,14 @@ static int _stp_sym_open_cmd (struct inode *inode, struct file *file)
 	return 0;
 }
 
-static int _stp_sym_close_cmd (struct inode *inode, struct file *file)
+static int _stp_sym_close_cmd(struct inode *inode, struct file *file)
 {
 	if (_stp_sym_opens)
 		_stp_sym_opens--;
 	return 0;
 }
 
-static int _stp_ctl_open_cmd (struct inode *inode, struct file *file)
+static int _stp_ctl_open_cmd(struct inode *inode, struct file *file)
 {
 	if (_stp_attached)
 		return -1;
@@ -390,7 +362,7 @@ static int _stp_ctl_open_cmd (struct inode *inode, struct file *file)
 	return 0;
 }
 
-static int _stp_ctl_close_cmd (struct inode *inode, struct file *file)
+static int _stp_ctl_close_cmd(struct inode *inode, struct file *file)
 {
 	if (_stp_attached)
 		_stp_detach();
@@ -416,12 +388,12 @@ static struct file_operations _stp_sym_fops_cmd = {
 static struct dentry *_stp_cmd_file = NULL;
 static struct dentry *_stp_sym_file = NULL;
 
-static int _stp_register_ctl_channel (void)
+static int _stp_register_ctl_channel(void)
 {
 	int i;
 	struct list_head *p, *tmp;
 	char buf[32];
-	
+
 	if (_stp_utt == NULL) {
 		errk("_expected _stp_utt to be set.\n");
 		return -1;
@@ -429,21 +401,16 @@ static int _stp_register_ctl_channel (void)
 
 	INIT_LIST_HEAD(&_stp_ctl_ready_q);
 	INIT_LIST_HEAD(&_stp_sym_ready_q);
-	INIT_LIST_HEAD(&_stp_pool_q);
 
 	/* allocate buffers */
-	for (i = 0; i < STP_DEFAULT_BUFFERS; i++) {
-		p = (struct list_head *)_stp_kmalloc(sizeof(struct _stp_buffer));
-		// printk("allocated buffer at %lx\n", (long)p);
-		if (!p)
-			goto err0;
-		_stp_allocated_net_memory += sizeof(struct _stp_buffer);
-		list_add (p, &_stp_pool_q);
-	}
+	_stp_pool_q = _stp_mempool_init(sizeof(struct _stp_buffer), STP_DEFAULT_BUFFERS);
+	if (unlikely(_stp_pool_q == NULL))
+		goto err0;
+	_stp_allocated_net_memory += sizeof(struct _stp_buffer) * STP_DEFAULT_BUFFERS;
 
 	/* create [debugfs]/systemtap/module_name/.cmd  */
 	_stp_cmd_file = debugfs_create_file(".cmd", 0600, _stp_utt->dir, NULL, &_stp_ctl_fops_cmd);
-	if (_stp_cmd_file == NULL) 
+	if (_stp_cmd_file == NULL)
 		goto err0;
 	_stp_cmd_file->d_inode->i_uid = _stp_uid;
 	_stp_cmd_file->d_inode->i_gid = _stp_gid;
@@ -455,35 +422,29 @@ static int _stp_register_ctl_channel (void)
 	return 0;
 
 err0:
-	if (_stp_cmd_file) debugfs_remove(_stp_cmd_file);
-
-	list_for_each_safe(p, tmp, &_stp_pool_q) {
-		list_del(p);
-		_stp_kfree(p);
-	}
-	errk ("Error creating systemtap debugfs entries.\n");
+	if (_stp_cmd_file)
+		debugfs_remove(_stp_cmd_file);
+	_stp_mempool_destroy(_stp_pool_q);
+	errk("Error creating systemtap debugfs entries.\n");
 	return -1;
 }
 
-
-static void _stp_unregister_ctl_channel (void)
+static void _stp_unregister_ctl_channel(void)
 {
 	struct list_head *p, *tmp;
-	if (_stp_sym_file) debugfs_remove(_stp_sym_file);
-	if (_stp_cmd_file) debugfs_remove(_stp_cmd_file);
+	if (_stp_sym_file)
+		debugfs_remove(_stp_sym_file);
+	if (_stp_cmd_file)
+		debugfs_remove(_stp_cmd_file);
 
-	/* free memory pools */
-	list_for_each_safe(p, tmp, &_stp_pool_q) {
-		list_del(p);
-		_stp_kfree(p);
-	}
+	/* Return memory to pool and free it. */
 	list_for_each_safe(p, tmp, &_stp_sym_ready_q) {
 		list_del(p);
-		_stp_kfree(p);
+		_stp_mempool_free(p);
 	}
 	list_for_each_safe(p, tmp, &_stp_ctl_ready_q) {
 		list_del(p);
-		_stp_kfree(p);
+		_stp_mempool_free(p);
 	}
+	_stp_mempool_destroy(_stp_pool_q);
 }
-
diff --git a/runtime/transport/procfs.c b/runtime/transport/procfs.c
index 33f6db33..2afea1c9 100644
--- a/runtime/transport/procfs.c
+++ b/runtime/transport/procfs.c
@@ -12,18 +12,16 @@
 #define STP_DEFAULT_BUFFERS 256
 static int _stp_current_buffers = STP_DEFAULT_BUFFERS;
 
+static _stp_mempool_t *_stp_pool_q;
 static struct list_head _stp_ctl_ready_q;
 static struct list_head _stp_sym_ready_q;
-static struct list_head _stp_pool_q;
-DEFINE_SPINLOCK(_stp_pool_lock);
 DEFINE_SPINLOCK(_stp_ctl_ready_lock);
 DEFINE_SPINLOCK(_stp_sym_ready_lock);
 
 #ifdef STP_BULKMODE
 extern int _stp_relay_flushing;
 /* handle the per-cpu subbuf info read for relayfs */
-static ssize_t
-_stp_proc_read (struct file *file, char __user *buf, size_t count, loff_t *ppos)
+static ssize_t _stp_proc_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
 	int num;
 	struct _stp_buf_info out;
@@ -46,8 +44,7 @@ _stp_proc_read (struct file *file, char __user *buf, size_t count, loff_t *ppos)
 }
 
 /* handle the per-cpu subbuf info write for relayfs */
-static ssize_t _stp_proc_write (struct file *file, const char __user *buf,
-				size_t count, loff_t *ppos)
+static ssize_t _stp_proc_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
 {
 	struct _stp_consumed_info info;
 	int cpu = *(int *)(PDE(file->f_dentry->d_inode)->data);
@@ -65,8 +62,7 @@ static struct file_operations _stp_proc_fops = {
 };
 #endif /* STP_BULKMODE */
 
-static ssize_t _stp_sym_write_cmd (struct file *file, const char __user *buf,
-				    size_t count, loff_t *ppos)
+static ssize_t _stp_sym_write_cmd(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
 {
 	static int saved_type = 0;
 	int type;
@@ -74,7 +70,7 @@ static ssize_t _stp_sym_write_cmd (struct file *file, const char __user *buf,
 	if (count < sizeof(int32_t))
 		return 0;
 
-	/* Allow sending of packet type followed by data in the next packet.*/
+	/* Allow sending of packet type followed by data in the next packet. */
 	if (count == sizeof(int32_t)) {
 		if (get_user(saved_type, (int __user *)buf))
 			return -EFAULT;
@@ -88,11 +84,14 @@ static ssize_t _stp_sym_write_cmd (struct file *file, const char __user *buf,
 		count -= sizeof(int);
 		buf += sizeof(int);
 	}
-	
-	// kbug ("count:%d type:%d\n", (int)count, type);
+
+#if DEBUG_TRANSPORT > 0
+	if (type < STP_MAX_CMD)
+		_dbug("Got %s. len=%d\n", _stp_command_name[type], (int)count);
+#endif
 
 	switch (type) {
-	case STP_SYMBOLS:		
+	case STP_SYMBOLS:
 		count = _stp_do_symbols(buf, count);
 		break;
 	case STP_MODULE:
@@ -100,19 +99,19 @@ static ssize_t _stp_sym_write_cmd (struct file *file, const char __user *buf,
 			count = _stp_do_module(buf, count);
 		else {
 			/* count == 1 indicates end of initial modules list */
-			_stp_ctl_send(STP_TRANSPORT, NULL, 0);			
+			_stp_ctl_send(STP_TRANSPORT, NULL, 0);
 		}
 		break;
 	default:
-		errk ("invalid symbol command type %d\n", type);
+		errk("invalid symbol command type %d\n", type);
 		return -EINVAL;
 	}
 
 	return count;
 
 }
-static ssize_t _stp_ctl_write_cmd (struct file *file, const char __user *buf,
-				    size_t count, loff_t *ppos)
+
+static ssize_t _stp_ctl_write_cmd(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
 {
 	int type;
 	static int started = 0;
@@ -123,7 +122,10 @@ static ssize_t _stp_ctl_write_cmd (struct file *file, const char __user *buf,
 	if (get_user(type, (int __user *)buf))
 		return -EFAULT;
 
-	// kbug ("count:%d type:%d\n", count, type);
+#if DEBUG_TRANSPORT > 0
+	if (type < STP_MAX_CMD)
+		_dbug("Got %s. len=%d\n", _stp_command_name[type], (int)count);
+#endif
 
 	count -= sizeof(int);
 	buf += sizeof(int);
@@ -134,9 +136,9 @@ static ssize_t _stp_ctl_write_cmd (struct file *file, const char __user *buf,
 			struct _stp_msg_start st;
 			if (count < sizeof(st))
 				return 0;
-			if (copy_from_user (&st, buf, sizeof(st)))
+			if (copy_from_user(&st, buf, sizeof(st)))
 				return -EFAULT;
-			_stp_handle_start (&st);
+			_stp_handle_start(&st);
 			started = 1;
 		}
 		break;
@@ -145,10 +147,10 @@ static ssize_t _stp_ctl_write_cmd (struct file *file, const char __user *buf,
 		break;
 	case STP_READY:
 		/* request symbolic information */
-		_stp_ask_for_symbols();		
+		_stp_ask_for_symbols();
 		break;
 	default:
-		errk ("invalid command type %d\n", type);
+		errk("invalid command type %d\n", type);
 		return -EINVAL;
 	}
 
@@ -165,76 +167,66 @@ struct _stp_buffer {
 static DECLARE_WAIT_QUEUE_HEAD(_stp_ctl_wq);
 static DECLARE_WAIT_QUEUE_HEAD(_stp_sym_wq);
 
-#ifdef DEBUG
-static void _stp_ctl_write_dbug (int type, void *data, int len)
+#if DEBUG_TRANSPORT > 0
+static void _stp_ctl_write_dbug(int type, void *data, int len)
 {
 	char buf[64];
 	switch (type) {
-	case STP_REALTIME_DATA:
-		break;
 	case STP_START:
-		printk("_stp_ctl_write: sending STP_START\n");
+		_dbug("sending STP_START\n");
 		break;
 	case STP_EXIT:
-		printk("_stp_ctl_write: sending STP_EXIT\n");
+		_dbug("sending STP_EXIT\n");
 		break;
 	case STP_OOB_DATA:
-		snprintf(buf, sizeof(buf), "%s", (char *)data); 
-		printk("_stp_ctl_write: sending %d bytes of STP_OOB_DATA: %s\n", len, buf);
+		snprintf(buf, sizeof(buf), "%s", (char *)data);
+		_dbug("sending %d bytes of STP_OOB_DATA: %s\n", len, buf);
 		break;
 	case STP_SYSTEM:
-		snprintf(buf, sizeof(buf), "%s", (char *)data); 
-		printk("_stp_ctl_write: sending STP_SYSTEM: %s\n", buf);
+		snprintf(buf, sizeof(buf), "%s", (char *)data);
+		_dbug("sending STP_SYSTEM: %s\n", buf);
 		break;
 	case STP_TRANSPORT:
-		printk("_stp_ctl_write: sending STP_TRANSPORT\n");
+		_dbug("sending STP_TRANSPORT\n");
 		break;
 	default:
-		printk("_stp_ctl_write: ERROR: unknown message type: %d\n", type);
+		_dbug("ERROR: unknown message type: %d\n", type);
 		break;
 	}
 }
-static void _stp_sym_write_dbug (int type, void *data, int len)
+static void _stp_sym_write_dbug(int type, void *data, int len)
 {
 	switch (type) {
 	case STP_SYMBOLS:
-		printk("_stp_sym_write: sending STP_SYMBOLS\n");
+		_dbug("sending STP_SYMBOLS\n");
 		break;
 	case STP_MODULE:
-		printk("_stp_sym_write: sending STP_MODULE\n");
+		_dbug("sending STP_MODULE\n");
 		break;
 	default:
-		printk("_stp_sym_write: ERROR: unknown message type: %d\n", type);
+		_dbug("ERROR: unknown message type: %d\n", type);
 		break;
 	}
 }
 #endif
 
-static int _stp_ctl_write (int type, void *data, int len)
+static int _stp_ctl_write(int type, void *data, int len)
 {
 	struct _stp_buffer *bptr;
 	unsigned long flags;
-	unsigned numtrylock;
 
-#ifdef DEBUG
+#if DEBUG_TRANSPORT > 0
 	_stp_ctl_write_dbug(type, data, len);
 #endif
 
 #define WRITE_AGG
 #ifdef WRITE_AGG
 
-	numtrylock = 0;
-	while (!spin_trylock_irqsave (&_stp_ctl_ready_lock, flags) && (++numtrylock < MAXTRYLOCK)) 
-		ndelay (TRYLOCKDELAY);
-	if (unlikely (numtrylock >= MAXTRYLOCK))
-		return 0;
-
+	spin_lock_irqsave(&_stp_ctl_ready_lock, flags);
 	if (!list_empty(&_stp_ctl_ready_q)) {
 		bptr = (struct _stp_buffer *)_stp_ctl_ready_q.prev;
-		if (bptr->len + len <= STP_BUFFER_SIZE 
-		    && type == STP_REALTIME_DATA 
-		    && bptr->type == STP_REALTIME_DATA) {
-			memcpy (bptr->buf + bptr->len, data, len);
+		if (bptr->len + len <= STP_BUFFER_SIZE && type == STP_REALTIME_DATA && bptr->type == STP_REALTIME_DATA) {
+			memcpy(bptr->buf + bptr->len, data, len);
 			bptr->len += len;
 			spin_unlock_irqrestore(&_stp_ctl_ready_lock, flags);
 			return len;
@@ -247,47 +239,29 @@ static int _stp_ctl_write (int type, void *data, int len)
 	if (unlikely(len > STP_BUFFER_SIZE))
 		return 0;
 
-	numtrylock = 0;
-	while (!spin_trylock_irqsave (&_stp_pool_lock, flags) && (++numtrylock < MAXTRYLOCK)) 
-		ndelay (TRYLOCKDELAY);
-	if (unlikely (numtrylock >= MAXTRYLOCK))
-		return 0;
-
-	if (unlikely(list_empty(&_stp_pool_q))) {
-		spin_unlock_irqrestore(&_stp_pool_lock, flags);
-		dbug("_stp_pool_q empty\n");
+	/* get a buffer from the free pool */
+	bptr = _stp_mempool_alloc(_stp_pool_q);
+	if (unlikely(bptr == NULL))
 		return -1;
-	}
-
-	/* get the next buffer from the pool */
-	bptr = (struct _stp_buffer *)_stp_pool_q.next;
-	list_del_init(&bptr->list);
-	spin_unlock_irqrestore(&_stp_pool_lock, flags);
 
 	bptr->type = type;
-	memcpy (bptr->buf, data, len);
+	memcpy(bptr->buf, data, len);
 	bptr->len = len;
-	
-	/* put it on the pool of ready buffers */
-	numtrylock = 0;
-	while (!spin_trylock_irqsave (&_stp_ctl_ready_lock, flags) && (++numtrylock < MAXTRYLOCK)) 
-		ndelay (TRYLOCKDELAY);
-
-	if (unlikely (numtrylock >= MAXTRYLOCK))
-		return 0;
 
+	/* put it on the pool of ready buffers */
+	spin_lock_irqsave(&_stp_ctl_ready_lock, flags);
 	list_add_tail(&bptr->list, &_stp_ctl_ready_q);
 	spin_unlock_irqrestore(&_stp_ctl_ready_lock, flags);
 
 	return len;
 }
 
-static int _stp_sym_write (int type, void *data, unsigned len)
+static int _stp_sym_write(int type, void *data, unsigned len)
 {
 	struct _stp_buffer *bptr;
 	unsigned long flags;
 
-#ifdef DEBUG
+#if DEBUG_TRANSPORT > 0
 	_stp_sym_write_dbug(type, data, len);
 #endif
 
@@ -295,24 +269,17 @@ static int _stp_sym_write (int type, void *data, unsigned len)
 	if (unlikely(len > STP_BUFFER_SIZE))
 		return 0;
 
-	spin_lock_irqsave (&_stp_pool_lock, flags);
-	if (unlikely(list_empty(&_stp_pool_q))) {
-		spin_unlock_irqrestore(&_stp_pool_lock, flags); 
-		dbug("_stp_pool_q empty\n");
+	/* get a buffer from the free pool */
+	bptr = _stp_mempool_alloc(_stp_pool_q);
+	if (unlikely(bptr == NULL))
 		return -1;
-	}
-
-	/* get the next buffer from the pool */
-	bptr = (struct _stp_buffer *)_stp_pool_q.next;
-	list_del_init(&bptr->list);
-	spin_unlock_irqrestore(&_stp_pool_lock, flags);
 
 	bptr->type = type;
 	memcpy(bptr->buf, data, len);
 	bptr->len = len;
 
 	/* put it on the pool of ready buffers */
-	spin_lock_irqsave (&_stp_sym_ready_lock, flags);
+	spin_lock_irqsave(&_stp_sym_ready_lock, flags);
 	list_add_tail(&bptr->list, &_stp_sym_ready_q);
 	spin_unlock_irqrestore(&_stp_sym_ready_lock, flags);
 
@@ -323,23 +290,24 @@ static int _stp_sym_write (int type, void *data, unsigned len)
 }
 
 /* send commands with timeout and retry */
-static int _stp_ctl_send (int type, void *data, int len)
+static int _stp_ctl_send(int type, void *data, int len)
 {
 	int err, trylimit = 50;
+	kbug(DEBUG_TRANSPORT, "ctl_send: type=%d len=%d\n", type, len);
 	if (unlikely(type == STP_SYMBOLS || type == STP_MODULE)) {
 		while ((err = _stp_sym_write(type, data, len)) < 0 && trylimit--)
-			msleep (5);
+			msleep(5);
 	} else {
 		while ((err = _stp_ctl_write(type, data, len)) < 0 && trylimit--)
-			msleep (5);
+			msleep(5);
 		if (err > 0)
 			wake_up_interruptible(&_stp_ctl_wq);
 	}
+	kbug(DEBUG_TRANSPORT, "returning %d\n", err);
 	return err;
 }
 
-static ssize_t
-_stp_sym_read_cmd (struct file *file, char __user *buf, size_t count, loff_t *ppos)
+static ssize_t _stp_sym_read_cmd(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
 	struct _stp_buffer *bptr;
 	int len;
@@ -355,7 +323,7 @@ _stp_sym_read_cmd (struct file *file, char __user *buf, size_t count, loff_t *pp
 			return -ERESTARTSYS;
 		spin_lock_irqsave(&_stp_sym_ready_lock, flags);
 	}
-  
+
 	/* get the next buffer off the ready list */
 	bptr = (struct _stp_buffer *)_stp_sym_ready_q.next;
 	list_del_init(&bptr->list);
@@ -373,15 +341,12 @@ _stp_sym_read_cmd (struct file *file, char __user *buf, size_t count, loff_t *pp
 	}
 
 	/* put it on the pool of free buffers */
-	spin_lock_irqsave(&_stp_pool_lock, flags);
-	list_add_tail(&bptr->list, &_stp_pool_q);
-	spin_unlock_irqrestore(&_stp_pool_lock, flags);
+	_stp_mempool_free(bptr);
 
 	return len;
 }
 
-static ssize_t
-_stp_ctl_read_cmd (struct file *file, char __user *buf, size_t count, loff_t *ppos)
+static ssize_t _stp_ctl_read_cmd(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
 	struct _stp_buffer *bptr;
 	int len;
@@ -397,7 +362,7 @@ _stp_ctl_read_cmd (struct file *file, char __user *buf, size_t count, loff_t *pp
 			return -ERESTARTSYS;
 		spin_lock_irqsave(&_stp_ctl_ready_lock, flags);
 	}
-  
+
 	/* get the next buffer off the ready list */
 	bptr = (struct _stp_buffer *)_stp_ctl_ready_q.next;
 	list_del_init(&bptr->list);
@@ -415,15 +380,13 @@ _stp_ctl_read_cmd (struct file *file, char __user *buf, size_t count, loff_t *pp
 	}
 
 	/* put it on the pool of free buffers */
-	spin_lock_irqsave(&_stp_pool_lock, flags);
-	list_add_tail(&bptr->list, &_stp_pool_q);
-	spin_unlock_irqrestore(&_stp_pool_lock, flags);
+	_stp_mempool_free(bptr);
 
 	return len;
 }
 
 static int _stp_sym_opens = 0;
-static int _stp_sym_open_cmd (struct inode *inode, struct file *file)
+static int _stp_sym_open_cmd(struct inode *inode, struct file *file)
 {
 	/* only allow one reader */
 	if (_stp_sym_opens)
@@ -433,14 +396,14 @@ static int _stp_sym_open_cmd (struct inode *inode, struct file *file)
 	return 0;
 }
 
-static int _stp_sym_close_cmd (struct inode *inode, struct file *file)
+static int _stp_sym_close_cmd(struct inode *inode, struct file *file)
 {
 	if (_stp_sym_opens)
 		_stp_sym_opens--;
 	return 0;
 }
 
-static int _stp_ctl_open_cmd (struct inode *inode, struct file *file)
+static int _stp_ctl_open_cmd(struct inode *inode, struct file *file)
 {
 	if (_stp_attached)
 		return -1;
@@ -449,7 +412,7 @@ static int _stp_ctl_open_cmd (struct inode *inode, struct file *file)
 	return 0;
 }
 
-static int _stp_ctl_close_cmd (struct inode *inode, struct file *file)
+static int _stp_ctl_close_cmd(struct inode *inode, struct file *file)
 {
 	if (_stp_attached)
 		_stp_detach();
@@ -482,46 +445,14 @@ static int my_proc_match(int len, const char *name, struct proc_dir_entry *de)
 /* set the number of buffers to use to 'num' */
 static int _stp_set_buffers(int num)
 {
-	int i;
-	struct list_head *p;
-	unsigned long flags;
-
-	//printk("stp_set_buffers %d\n", num);
-
-	if (num == 0 || num == _stp_current_buffers)
-		return _stp_current_buffers;
-	
-	if (num > _stp_current_buffers) {
-		for (i = 0; i < num - _stp_current_buffers; i++) {
-			p = (struct list_head *)_stp_kmalloc(sizeof(struct _stp_buffer));
-			if (!p)	{
-				_stp_current_buffers += i;
-				goto err;
-			}
-			_stp_allocated_net_memory += sizeof(struct _stp_buffer);
-			spin_lock_irqsave(&_stp_pool_lock, flags);
-			list_add (p, &_stp_pool_q);
-			spin_unlock_irqrestore(&_stp_pool_lock, flags);
-		}
-	} else {
-		for (i = 0; i < _stp_current_buffers - num; i++) {
-			spin_lock_irqsave(&_stp_pool_lock, flags);
-			p = _stp_pool_q.next;
-			list_del(p);
-			spin_unlock_irqrestore(&_stp_pool_lock, flags);
-			_stp_kfree(p);
-		}
-	}
-	_stp_current_buffers = num;
-err:
-	return _stp_current_buffers;
+	kbug(DEBUG_TRANSPORT, "stp_set_buffers %d\n", num);
+	return _stp_mempool_resize(_stp_pool_q, num);
 }
 
-static int _stp_ctl_read_bufsize (char *page, char **start, off_t off,
-				  int count, int *eof, void *data)
+static int _stp_ctl_read_bufsize(char *page, char **start, off_t off, int count, int *eof, void *data)
 {
 	int len = sprintf(page, "%d,%d\n", _stp_nsubbufs, _stp_subbuf_size);
-	if (len <= off+count)
+	if (len <= off + count)
 		*eof = 1;
 	*start = page + off;
 	len -= off;
@@ -532,7 +463,7 @@ static int _stp_ctl_read_bufsize (char *page, char **start, off_t off,
 	return len;
 }
 
-static int _stp_register_ctl_channel (void)
+static int _stp_register_ctl_channel(void)
 {
 	int i;
 	const char *dirname = "systemtap";
@@ -546,17 +477,12 @@ static int _stp_register_ctl_channel (void)
 
 	INIT_LIST_HEAD(&_stp_ctl_ready_q);
 	INIT_LIST_HEAD(&_stp_sym_ready_q);
-	INIT_LIST_HEAD(&_stp_pool_q);
 
 	/* allocate buffers */
-	for (i = 0; i < STP_DEFAULT_BUFFERS; i++) {
-		p = (struct list_head *)_stp_kmalloc(sizeof(struct _stp_buffer));
-		// printk("allocated buffer at %lx\n", (long)p);
-		if (!p)
-			goto err0;
-		_stp_allocated_net_memory += sizeof(struct _stp_buffer);
-		list_add (p, &_stp_pool_q);
-	}
+	_stp_pool_q = _stp_mempool_init(sizeof(struct _stp_buffer), STP_DEFAULT_BUFFERS);
+	if (unlikely(_stp_pool_q == NULL))
+		goto err0;
+	_stp_allocated_net_memory += sizeof(struct _stp_buffer) * STP_DEFAULT_BUFFERS;
 
 	if (!_stp_mkdir_proc_module())
 		goto err0;
@@ -565,15 +491,15 @@ static int _stp_register_ctl_channel (void)
 	/* now for each cpu "n", create /proc/systemtap/module_name/n  */
 	for_each_cpu(i) {
 		sprintf(buf, "%d", i);
-		de = create_proc_entry (buf, 0600, _stp_proc_root);
-		if (de == NULL) 
+		de = create_proc_entry(buf, 0600, _stp_proc_root);
+		if (de == NULL)
 			goto err1;
 		de->uid = _stp_uid;
 		de->gid = _stp_gid;
 		de->proc_fops = &_stp_proc_fops;
 		de->data = _stp_kmalloc(sizeof(int));
 		if (de->data == NULL) {
-			remove_proc_entry (buf, _stp_proc_root);
+			remove_proc_entry(buf, _stp_proc_root);
 			goto err1;
 		}
 		*(int *)de->data = i;
@@ -582,48 +508,44 @@ static int _stp_register_ctl_channel (void)
 #endif /* STP_BULKMODE */
 
 	/* create /proc/systemtap/module_name/.cmd  */
-	de = create_proc_entry (".cmd", 0600, _stp_proc_root);
-	if (de == NULL) 
+	de = create_proc_entry(".cmd", 0600, _stp_proc_root);
+	if (de == NULL)
 		goto err1;
 	de->uid = _stp_uid;
 	de->gid = _stp_gid;
 	de->proc_fops = &_stp_proc_fops_cmd;
 
 	/* create /proc/systemtap/module_name/.symbols  */
-	de = create_proc_entry (".symbols", 0600, _stp_proc_root);
-	if (de == NULL) 
+	de = create_proc_entry(".symbols", 0600, _stp_proc_root);
+	if (de == NULL)
 		goto err2;
 	de->proc_fops = &_stp_sym_fops_cmd;
 
 	return 0;
 err2:
-	remove_proc_entry (".cmd", _stp_proc_root);	
+	remove_proc_entry(".cmd", _stp_proc_root);
 err1:
 #ifdef STP_BULKMODE
 	for (de = _stp_proc_root->subdir; de; de = de->next)
-		_stp_kfree (de->data);
+		_stp_kfree(de->data);
 	for_each_cpu(j) {
 		if (j == i)
 			break;
 		sprintf(buf, "%d", j);
-		remove_proc_entry (buf, _stp_proc_root);
-		
+		remove_proc_entry(buf, _stp_proc_root);
+
 	}
-	if (bs) remove_proc_entry ("bufsize", _stp_proc_root);
+	if (bs)
+		remove_proc_entry("bufsize", _stp_proc_root);
 #endif /* STP_BULKMODE */
 	_stp_rmdir_proc_module();
 err0:
-	list_for_each_safe(p, tmp, &_stp_pool_q) {
-		list_del(p);
-		_stp_kfree(p);
-	}
-
-	errk ("Error creating systemtap /proc entries.\n");
+	_stp_mempool_destroy(_stp_pool_q);
+	errk("Error creating systemtap /proc entries.\n");
 	return -1;
 }
 
-
-static void _stp_unregister_ctl_channel (void)
+static void _stp_unregister_ctl_channel(void)
 {
 	struct list_head *p, *tmp;
 	char buf[32];
@@ -632,31 +554,27 @@ static void _stp_unregister_ctl_channel (void)
 	struct proc_dir_entry *de;
 	kbug("unregistering procfs\n");
 	for (de = _stp_proc_root->subdir; de; de = de->next)
-		_stp_kfree (de->data);
+		_stp_kfree(de->data);
 
 	for_each_cpu(i) {
 		sprintf(buf, "%d", i);
-		remove_proc_entry (buf, _stp_proc_root);
+		remove_proc_entry(buf, _stp_proc_root);
 	}
-	remove_proc_entry ("bufsize", _stp_proc_root);
+	remove_proc_entry("bufsize", _stp_proc_root);
 #endif /* STP_BULKMODE */
 
-	remove_proc_entry (".symbols", _stp_proc_root);
-	remove_proc_entry (".cmd", _stp_proc_root);
+	remove_proc_entry(".symbols", _stp_proc_root);
+	remove_proc_entry(".cmd", _stp_proc_root);
 	_stp_rmdir_proc_module();
 
-	/* free memory pools */
-	list_for_each_safe(p, tmp, &_stp_pool_q) {
-		list_del(p);
-		_stp_kfree(p);
-	}
+	/* Return memory to pool and free it. */
 	list_for_each_safe(p, tmp, &_stp_sym_ready_q) {
 		list_del(p);
-		_stp_kfree(p);
+		_stp_mempool_free(p);
 	}
 	list_for_each_safe(p, tmp, &_stp_ctl_ready_q) {
 		list_del(p);
-		_stp_kfree(p);
+		_stp_mempool_free(p);
 	}
+	_stp_mempool_destroy(_stp_pool_q);	
 }
-
diff --git a/runtime/transport/symbols.c b/runtime/transport/symbols.c
index e740dde8..8c453a55 100644
--- a/runtime/transport/symbols.c
+++ b/runtime/transport/symbols.c
@@ -16,16 +16,6 @@
 #define _SYMBOLS_C_
 #include "../sym.h"
 
-DEFINE_SPINLOCK(_stp_module_lock);
-#define STP_TRYLOCK_MODULES  ({						\
-		int numtrylock = 0;					\
-		while (!spin_trylock_irqsave (&_stp_module_lock, flags) && (++numtrylock < MAXTRYLOCK)) \
-			ndelay (TRYLOCKDELAY);				\
-		(numtrylock >= MAXTRYLOCK);				\
-			})
-#define STP_LOCK_MODULES  spin_lock_irqsave(&_stp_module_lock, flags)
-#define STP_UNLOCK_MODULES spin_unlock_irqrestore(&_stp_module_lock, flags)
-
 static char *_stp_symbol_data = NULL;
 static int _stp_symbol_state = 0;
 static char *_stp_module_data = NULL;
@@ -63,7 +53,7 @@ static unsigned _stp_get_sym_sizes(struct module *m, unsigned *dsize)
 }
 
 /* allocate space for a module and symbols */
-static struct _stp_module * _stp_alloc_module(unsigned num, unsigned datasize)
+static struct _stp_module * _stp_alloc_module(unsigned num, unsigned datasize, unsigned unwindsize)
 {
 	struct _stp_module *mod = (struct _stp_module *)_stp_kzalloc(sizeof(struct _stp_module));
 	if (mod == NULL)
@@ -85,6 +75,14 @@ static struct _stp_module * _stp_alloc_module(unsigned num, unsigned datasize)
 		mod->allocated |= 2;
 	}
 
+	mod->unwind_data = _stp_kmalloc(unwindsize);
+	if (mod->unwind_data == NULL) {
+		mod->unwind_data = _stp_vmalloc(unwindsize);
+		if (mod->unwind_data == NULL)
+			goto bad;
+		mod->allocated |= 4;
+	}
+	
 	mod->num_symbols = num;
 	return mod;
 
@@ -97,19 +95,40 @@ bad:
 				_stp_kfree(mod->symbols);
 			mod->symbols = NULL;
 		}
+		if (mod->symbol_data) {
+			if (mod->allocated & 2)
+				_stp_vfree(mod->symbol_data);
+			else
+				_stp_kfree(mod->symbol_data);
+			mod->symbol_data = NULL;
+		}
+		_stp_kfree(mod); 
+		if (mod->symbols) {
+			if (mod->allocated & 1)
+				_stp_vfree(mod->symbols);
+			else
+				_stp_kfree(mod->symbols);
+			mod->symbols = NULL;
+		}
 		_stp_kfree(mod); 
 	}
 	return NULL;
 }
 
-static struct _stp_module * _stp_alloc_module_from_module (struct module *m)
+static struct _stp_module * _stp_alloc_module_from_module (struct module *m, uint32_t unwind_len)
 {
 	unsigned datasize, num = _stp_get_sym_sizes(m, &datasize);
-	return _stp_alloc_module(num, datasize);
+	return _stp_alloc_module(num, datasize, unwind_len);
 }
 
 static void _stp_free_module(struct _stp_module *mod)
 {
+	/* The module write lock is held. Any prior readers of this */
+	/* module's data will have read locks and need to finish before */
+	/* the memory is freed. */
+	write_lock(&mod->lock);
+	write_unlock(&mod->lock); /* there will be no more readers */
+
 	/* free symbol memory */
 	if (mod->symbols) {
 		if (mod->allocated & 1)
@@ -126,21 +145,30 @@ static void _stp_free_module(struct _stp_module *mod)
 		mod->symbol_data = NULL;
 
 	}
+	if (mod->unwind_data) {
+		if (mod->allocated & 4)
+			_stp_vfree(mod->unwind_data);
+		else
+			_stp_kfree(mod->unwind_data);
+		mod->unwind_data = NULL;
+
+	}
 	if (mod->sections) {
 		_stp_kfree(mod->sections);
 		mod->sections = NULL;
 	}
+
 	/* free module memory */
 	_stp_kfree(mod);
 }
 
 /* Delete a module and free its memory. */
-/* The lock should already be held before calling this. */
+/* The module lock should already be held before calling this. */
 static void _stp_del_module(struct _stp_module *mod)
 {
 	int i, num;
 
-	// kbug("deleting %s\n", mod->name);
+	// kbug(DEBUG_SYMBOLS, "deleting %s\n", mod->name);
 
 	/* signal relocation code to clear its cache */
 	_stp_module_relocate((char *)-1, NULL, 0);
@@ -185,7 +213,7 @@ static unsigned long _stp_kallsyms_lookup_name(const char *name);
 static int _stp_do_symbols(const char __user *buf, int count)
 {
 	struct _stp_symbol *s;
-	unsigned datasize, num;	
+	unsigned datasize, num, unwindsize;	
 	int i;
 
 	switch (_stp_symbol_state) {
@@ -198,23 +226,26 @@ static int _stp_do_symbols(const char __user *buf, int count)
 			return -EFAULT;
 		if (get_user(datasize, (unsigned __user *)(buf+4)))
 			return -EFAULT;
-		//kbug("num=%d datasize=%d\n", num, datasize);
+		if (get_user(unwindsize, (unsigned __user *)(buf+8)))
+			return -EFAULT;
+		dbug(DEBUG_UNWIND, "num=%d datasize=%d unwindsize=%d\n", num, datasize, unwindsize);
 
-		_stp_modules[0] = _stp_alloc_module(num, datasize);
+		_stp_modules[0] = _stp_alloc_module(num, datasize, unwindsize);
 		if (_stp_modules[0] == NULL) {
 			errk("cannot allocate memory\n");
 			return -EFAULT;
 		}
+		rwlock_init(&_stp_modules[0]->lock);
 		_stp_symbol_state = 1;
 		break;
 	case 1:
-		//kbug("got stap_symbols, count=%d\n", count);
+		dbug(DEBUG_SYMBOLS, "got stap_symbols, count=%d\n", count);
 		if (copy_from_user ((char *)_stp_modules[0]->symbols, buf, count))
 			return -EFAULT;
 		_stp_symbol_state = 2;
 		break;
 	case 2:
-		//kbug("got symbol data, count=%d buf=%p\n", count, buf);
+		dbug(DEBUG_SYMBOLS, "got symbol data, count=%d buf=%p\n", count, buf);
 		if (copy_from_user (_stp_modules[0]->symbol_data, buf, count))
 			return -EFAULT;
 		_stp_num_modules = 1;
@@ -227,8 +258,19 @@ static int _stp_do_symbols(const char __user *buf, int count)
                 /* NB: this mapping is used by kernel/_stext pseudo-relocations. */
 		_stp_modules[0]->text = _stp_kallsyms_lookup_name("_stext");
 		_stp_modules[0]->data = _stp_kallsyms_lookup_name("_etext");
+		_stp_modules[0]->text_size = _stp_modules[0]->data - _stp_modules[0]->text;		
 		_stp_modules_by_addr[0] = _stp_modules[0];
-		//kbug("done with symbol data\n");
+		dbug(DEBUG_SYMBOLS, "Got kernel symbols. text=%p len=%u\n", 
+		     (int64_t)_stp_modules[0]->text, _stp_modules[0]->text_size);
+		break;
+	case 3:
+		dbug(DEBUG_UNWIND, "got unwind data, count=%d\n", count);
+		_stp_symbol_state = 4;
+		if (copy_from_user (_stp_modules[0]->unwind_data, buf, count)) {
+			_dbug("cfu failed\n");
+			return -EFAULT;
+		}
+		_stp_modules[0]->unwind_data_len = count;
 		break;
 	default:
 		errk("unexpected symbol data of size %d.\n", count);
@@ -266,10 +308,8 @@ static void u32_swap(void *a, void *b, int size)
 
 static void generic_swap(void *a, void *b, int size)
 {
-	char t;
-
 	do {
-		t = *(char *)a;
+		char t = *(char *)a;
 		*(char *)a++ = *(char *)b;
 		*(char *)b++ = t;
 	} while (--size > 0);
@@ -328,7 +368,7 @@ void _stp_sort(void *base, size_t num, size_t size,
 }
 
 /* Create a new _stp_module and load the symbols */
-static struct _stp_module *_stp_load_module_symbols (struct _stp_module *imod)
+static struct _stp_module *_stp_load_module_symbols (struct _stp_module *imod, uint32_t unwind_len)
 {
 	unsigned i, num=0;
 	struct module *m = (struct module *)imod->module;
@@ -336,12 +376,12 @@ static struct _stp_module *_stp_load_module_symbols (struct _stp_module *imod)
 	char *dataptr;
 
 	if (m == NULL) {
-		kbug("imod->module is NULL\n");
+		kbug(DEBUG_SYMBOLS, "imod->module is NULL\n");
 		return NULL;
 	}
 	if (try_module_get(m)) {
 
-		mod = _stp_alloc_module_from_module(m);
+		mod = _stp_alloc_module_from_module(m, unwind_len);
 		if (mod == NULL) {
 			module_put(m);
 			errk("failed to allocate memory for module.\n");
@@ -354,6 +394,8 @@ static struct _stp_module *_stp_load_module_symbols (struct _stp_module *imod)
 		mod->data = imod->data;
 		mod->num_sections = imod->num_sections;
 		mod->sections = imod->sections;
+		mod->text_size = m->core_text_size;
+		rwlock_init(&mod->lock);
 
 		/* now copy all the symbols we are interested in */
 		dataptr = mod->symbol_data;
@@ -375,24 +417,32 @@ static struct _stp_module *_stp_load_module_symbols (struct _stp_module *imod)
 	return mod;
 }
 
-/* Do we already have this module? */
-static int _stp_module_exists(struct _stp_module *mod)
+/* Remove any old module info from our database */
+static void _stp_module_exists_delete (struct _stp_module *mod)
 {
-	int i, res;
-	unsigned long flags;
-	// kbug("exists? %s\n", mod->name);
-	STP_LOCK_MODULES;
-	for (i = 1; i < _stp_num_modules; i++) {
-		res = strcmp(_stp_modules[i]->name, mod->name);
-		if (res > 0)
+	int i, num;
+
+	/* remove any old modules with the same name */
+	for (num = 1; num < _stp_num_modules; num++) {
+		if (strcmp(_stp_modules[num]->name, mod->name) == 0) {
+			dbug(DEBUG_SYMBOLS, "found existing module with name %s. Deleting.\n", mod->name);
+			_stp_del_module(_stp_modules[num]);
 			break;
-		if (res == 0 && _stp_modules[i]->module == mod->module) {
-			STP_UNLOCK_MODULES;
-			return 1;
 		}
 	}
-	STP_UNLOCK_MODULES;
-	return 0;
+
+	/* remove modules with overlapping addresses */
+	for (num = 1; num < _stp_num_modules; num++) {
+		if (mod->text + mod->text_size < _stp_modules_by_addr[num]->text)
+			continue;
+		if (mod->text < _stp_modules_by_addr[num]->text 
+		    + _stp_modules_by_addr[num]->text_size) {
+			dbug(DEBUG_SYMBOLS, "New module %s overlaps with old module %s. Deleting old.\n", 
+			     mod->name, _stp_modules_by_addr[num]->name);
+			_stp_del_module(_stp_modules_by_addr[num]);
+		}
+	}
+
 }
 
 static int _stp_ins_module(struct _stp_module *mod)
@@ -400,9 +450,11 @@ static int _stp_ins_module(struct _stp_module *mod)
 	int i, num, res, ret = 0;
 	unsigned long flags;
 
-	// kbug("insert %s\n", mod->name);
+	// kbug(DEBUG_SYMBOLS, "insert %s\n", mod->name);
 
-	STP_LOCK_MODULES;
+	STP_WLOCK_MODULES;
+
+	_stp_module_exists_delete(mod);
 
 	/* check for overflow */
 	if (_stp_num_modules == STP_MAX_MODULES) {
@@ -412,32 +464,25 @@ static int _stp_ins_module(struct _stp_module *mod)
 	}
 	
 	/* insert alphabetically in _stp_modules[] */
-	for (num = 1; num < _stp_num_modules; num++) {
-		res = strcmp(_stp_modules[num]->name, mod->name);
-		if (res < 0)
-			continue;
-		if (res > 0)
+	for (num = 1; num < _stp_num_modules; num++)
+		if (strcmp(_stp_modules[num]->name, mod->name) > 0)
 			break;
-		_stp_del_module(_stp_modules[num]);
-		break;
-	}
 	for (i = _stp_num_modules; i > num; i--)
 		_stp_modules[i] = _stp_modules[i-1];
 	_stp_modules[num] = mod;
 
 	/* insert by text address in _stp_modules_by_addr[] */
-	for (num = 1; num < _stp_num_modules; num++) {
-		if (_stp_modules_by_addr[num]->text > mod->text)
+	for (num = 1; num < _stp_num_modules; num++)
+		if (mod->text < _stp_modules_by_addr[num]->text)
 			break;
-	}
 	for (i = _stp_num_modules; i > num; i--)
 		_stp_modules_by_addr[i] = _stp_modules_by_addr[i-1];
 	_stp_modules_by_addr[num] = mod;
-
+	
 	_stp_num_modules++;
 
 done:
-	STP_UNLOCK_MODULES;
+	STP_WUNLOCK_MODULES;
 	return ret;
 }
 
@@ -456,13 +501,13 @@ static int _stp_do_module(const char __user *buf, int count)
 	if (copy_from_user ((char *)&tmpmod, buf, sizeof(tmpmod)))
 		return -EFAULT;
 
-	section_len = count - sizeof(tmpmod);
+	section_len = count - sizeof(tmpmod) - tmpmod.unwind_len;
 	if (section_len <= 0) {
 		errk("section_len = %d\n", section_len);
 		return -EFAULT;
 	}
-	kbug("Got module %s, count=%d section_len=%d\n", 
-	     tmpmod.name, count, section_len);
+	dbug(DEBUG_SYMBOLS, "Got module %s, count=%d section_len=%d unwind_len=%d\n", 
+	     tmpmod.name, count, section_len, tmpmod.unwind_len);
 
 	strcpy(mod.name, tmpmod.name);
 	mod.module = tmpmod.module;
@@ -470,9 +515,6 @@ static int _stp_do_module(const char __user *buf, int count)
 	mod.data = tmpmod.data;
 	mod.num_sections = tmpmod.num_sections;
 	
-	if (_stp_module_exists(&mod))
-		return count;
-
 	/* copy in section data */
 	mod.sections = _stp_kmalloc(section_len);
 	if (mod.sections == NULL) {
@@ -489,18 +531,27 @@ static int _stp_do_module(const char __user *buf, int count)
 				 + (long)((long)mod.sections + mod.num_sections * sizeof(struct _stp_symbol)));
 	}
 
-	#ifdef DEBUG_SYMBOLS
+	#if 0
 	for (i = 0; i < mod.num_sections; i++)
-		printk("section %d (stored at %p): %s %lx\n", i, &mod.sections[i], mod.sections[i].symbol, mod.sections[i].addr);
+		_dbug("section %d (stored at %p): %s %lx\n", i, &mod.sections[i], mod.sections[i].symbol, mod.sections[i].addr);
 	#endif
 
 	/* load symbols from tmpmod.module to mod */
-	m = _stp_load_module_symbols(&mod);	
+	m = _stp_load_module_symbols(&mod, tmpmod.unwind_len); 
 	if (m == NULL) {
 		_stp_kfree(mod.sections);
 		return 0;
 	}
 
+	dbug(DEBUG_SYMBOLS, "module %s loaded.  Text=%p text_size=%u\n", m->name, (int64_t)m->text, m->text_size);
+	/* finally copy unwind info */
+	if (copy_from_user (m->unwind_data, buf+sizeof(tmpmod)+section_len, tmpmod.unwind_len)) {
+		_stp_free_module(m);
+		_stp_kfree(mod.sections);
+		return -EFAULT;
+	}
+	m->unwind_data_len = tmpmod.unwind_len;
+
 	if (_stp_ins_module(m) < 0) {
 		_stp_free_module(m);
 		return -ENOMEM;
@@ -513,20 +564,18 @@ static int _stp_ctl_send (int type, void *data, int len);
 
 static int _stp_module_load_notify(struct notifier_block * self, unsigned long val, void * data)
 {
-#ifdef CONFIG_MODULES
 	struct module *mod = (struct module *)data;
 	struct _stp_module rmod;
 
 	switch (val) {
 	case MODULE_STATE_COMING:
-		dbug("module %s loaded\n", mod->name);
+		dbug(DEBUG_SYMBOLS, "module %s load notify\n", mod->name);
 		strlcpy(rmod.name, mod->name, STP_MODULE_NAME_LEN);
 		_stp_ctl_send(STP_MODULE, &rmod, sizeof(struct _stp_module));
 		break;
 	default:
 		errk("module loaded? val=%ld\n", val);
 	}
-#endif
 	return 0;
 }
 
diff --git a/runtime/transport/transport.c b/runtime/transport/transport.c
index 6b90ee64..8335e44b 100644
--- a/runtime/transport/transport.c
+++ b/runtime/transport/transport.c
@@ -18,6 +18,7 @@
 #include <linux/namei.h>
 #include "transport.h"
 #include "time.c"
+#include "../mempool.c"
 #include "symbols.c"
 #include "../procfs.c"
 
@@ -76,7 +77,7 @@ static void _stp_ask_for_symbols(void)
 
 	if (sent_symbols == 0) {
 		/* ask for symbols and modules */
-		kbug("AFS\n");
+		kbug(DEBUG_SYMBOLS|DEBUG_TRANSPORT, "AFS\n");
 		
 		req.endian = 0x1234;
 		req.ptr_size = sizeof(char *);
@@ -94,7 +95,7 @@ static void _stp_ask_for_symbols(void)
 
 void _stp_handle_start (struct _stp_msg_start *st)
 {
-	kbug ("stp_handle_start\n");
+	kbug (DEBUG_TRANSPORT, "stp_handle_start\n");
 
 	if (register_module_notifier(&_stp_module_load_nb))
 		errk("failed to load module notifier\n");
@@ -116,7 +117,7 @@ void _stp_handle_start (struct _stp_msg_start *st)
 /* when someone does /sbin/rmmod on a loaded systemtap module. */
 static void _stp_cleanup_and_exit (int dont_rmmod)
 {
-	kbug("cleanup_and_exit (%d)\n", dont_rmmod);
+	kbug(DEBUG_TRANSPORT, "cleanup_and_exit (%d)\n", dont_rmmod);
 	if (!_stp_exit_called) {
 		int failures;
 
@@ -127,23 +128,23 @@ static void _stp_cleanup_and_exit (int dont_rmmod)
 		_stp_exit_called = 1;
 
 		if (_stp_probes_started) {
-			kbug("calling probe_exit\n");
+			kbug(DEBUG_TRANSPORT, "calling probe_exit\n");
 			/* tell the stap-generated code to unload its probes, etc */
 			probe_exit();
-			kbug("done with probe_exit\n");
+			kbug(DEBUG_TRANSPORT, "done with probe_exit\n");
 		}
 
 		failures = atomic_read(&_stp_transport_failures);
 		if (failures)
 			_stp_warn ("There were %d transport failures.\n", failures);
 
-		kbug("************** calling startstop 0 *************\n");
+		kbug(DEBUG_TRANSPORT, "************** calling startstop 0 *************\n");
 		if (_stp_utt) utt_trace_startstop(_stp_utt, 0, &utt_seq);
 
-		kbug("ctl_send STP_EXIT\n");
+		kbug(DEBUG_TRANSPORT, "ctl_send STP_EXIT\n");
 		/* tell staprun to exit (if it is still there) */
 		_stp_ctl_send(STP_EXIT, &dont_rmmod, sizeof(int));
-		kbug("done with ctl_send STP_EXIT\n");
+		kbug(DEBUG_TRANSPORT, "done with ctl_send STP_EXIT\n");
 	}
 }
 
@@ -152,7 +153,7 @@ static void _stp_cleanup_and_exit (int dont_rmmod)
  */
 static void _stp_detach(void)
 {
-	kbug("detach\n");
+	kbug(DEBUG_TRANSPORT, "detach\n");
 	_stp_attached = 0;
 	_stp_pid = 0;
 
@@ -168,7 +169,7 @@ static void _stp_detach(void)
  */
 static void _stp_attach(void)
 {
-	kbug("attach\n");
+	kbug(DEBUG_TRANSPORT, "attach\n");
 	_stp_attached = 1;
 	_stp_pid = current->pid;
 		utt_set_overwrite(0);
@@ -210,7 +211,7 @@ static void _stp_work_queue (void *data)
  */
 void _stp_transport_close()
 {
-	kbug("%d: ************** transport_close *************\n", current->pid);
+	kbug(DEBUG_TRANSPORT, "%d: ************** transport_close *************\n", current->pid);
 	_stp_cleanup_and_exit(1);
 	destroy_workqueue(_stp_wq);
 	_stp_unregister_ctl_channel();
@@ -219,7 +220,7 @@ void _stp_transport_close()
 	_stp_kill_time();
 	_stp_print_cleanup(); 	/* free print buffers */
 	_stp_mem_debug_done();
-	kbug("---- CLOSED ----\n");
+	kbug(DEBUG_TRANSPORT, "---- CLOSED ----\n");
 }
 
 
@@ -248,7 +249,7 @@ int _stp_transport_init(void)
 {
 	int ret;
 
-	kbug("transport_init\n");
+	kbug(DEBUG_TRANSPORT, "transport_init\n");
 	_stp_init_pid = current->pid;
 	_stp_uid = current->uid;
 	_stp_gid = current->gid;
@@ -263,7 +264,7 @@ int _stp_transport_init(void)
 		unsigned size = _stp_bufsize * 1024 * 1024;
 		_stp_subbuf_size = ((size >> 2) + 1) * 65536;
 		_stp_nsubbufs = size / _stp_subbuf_size;
-		kbug("Using %d subbufs of size %d\n", _stp_nsubbufs, _stp_subbuf_size);
+		kbug(DEBUG_TRANSPORT, "Using %d subbufs of size %d\n", _stp_nsubbufs, _stp_subbuf_size);
 	}
 
 	/* initialize timer code */
@@ -388,12 +389,12 @@ static struct dentry *_stp_get_root_dir(const char *name) {
 		_stp_lock_inode(sb->s_root->d_inode);
 		root = lookup_one_len(name, sb->s_root, strlen(name));
 		_stp_unlock_inode(sb->s_root->d_inode);
-		kbug("root=%p\n", root);
+		kbug(DEBUG_TRANSPORT, "root=%p\n", root);
 		if (!IS_ERR(root))
 			dput(root);
 		else {
 			root = NULL;
-			kbug("Could not create or find transport directory.\n");
+			kbug(DEBUG_TRANSPORT, "Could not create or find transport directory.\n");
 		}
 	}
 	_stp_unlock_debugfs();
diff --git a/runtime/transport/transport_msgs.h b/runtime/transport/transport_msgs.h
index b2187cd5..55de2d4a 100644
--- a/runtime/transport/transport_msgs.h
+++ b/runtime/transport/transport_msgs.h
@@ -36,7 +36,30 @@ enum
 	STP_SUBBUFS_CONSUMED,
 	STP_REALTIME_DATA,
 #endif
+
+	STP_MAX_CMD
+};
+
+#ifdef DEBUG_TRANSPORT
+static const char *_stp_command_name[] = {
+	"STP_START",
+	"STP_EXIT",
+	"STP_OOB_DATA",
+	"STP_SYSTEM",
+	"STP_SYMBOLS",
+	"STP_MODULE",
+	"STP_TRANSPORT",
+	"STP_CONNECT",
+	"STP_DISCONNECT",
+	"STP_BULK",
+	"STP_READY",
+#ifdef STP_OLD_TRANSPORT
+	"STP_BUF_INFO",
+	"STP_SUBBUFS_CONSUMED",
+	"STP_REALTIME_DATA",
+#endif
 };
+#endif /* DEBUG_TRANSPORT */
 
 /* control channel messages */