summaryrefslogtreecommitdiffstats
path: root/doc
diff options
context:
space:
mode:
Diffstat (limited to 'doc')
-rw-r--r--doc/Makefile.am13
-rw-r--r--doc/action-call.dot33
-rw-r--r--doc/action_state.dot34
-rw-r--r--doc/batch_state.dot28
-rw-r--r--doc/build_from_repo.html29
-rw-r--r--doc/debug.html2
-rw-r--r--doc/design.tex887
-rw-r--r--doc/dev_oplugins.html179
-rw-r--r--doc/highperf.txt4
-rw-r--r--doc/imfile.html19
-rw-r--r--doc/impstats.html64
-rw-r--r--doc/imptcp.html20
-rw-r--r--doc/imtcp.html85
-rw-r--r--doc/imudp.html58
-rw-r--r--doc/imuxsock.html92
-rw-r--r--doc/index.html2
-rw-r--r--doc/manual.html24
-rw-r--r--doc/messageparser.html222
-rw-r--r--doc/mmsnmptrapd.html95
-rw-r--r--doc/module_workflow.pngbin0 -> 14749 bytes
-rw-r--r--doc/msgflow.txt56
-rw-r--r--doc/multi_ruleset.html33
-rw-r--r--doc/omhdfs.html69
-rw-r--r--doc/ommysql.html12
-rw-r--r--doc/omruleset.html140
-rw-r--r--doc/omstdout.html42
-rw-r--r--doc/omudpspoof.html92
-rw-r--r--doc/pmlastmsg.html62
-rw-r--r--doc/property_replacer.html6
-rw-r--r--doc/queue_msg_state.dot25
-rw-r--r--doc/queue_msg_state.jpegbin0 -> 12499 bytes
-rw-r--r--doc/queues.html27
-rw-r--r--doc/rfc5424layers.pngbin0 -> 10605 bytes
-rw-r--r--doc/rsconf1_abortonuncleanconfig.html37
-rw-r--r--doc/rsconf1_escape8bitcharsonreceive.html44
-rw-r--r--doc/rsconf1_generateconfiggraph.html8
-rw-r--r--doc/rsconf1_omfileforcechown.html5
-rw-r--r--doc/rsconf1_rulesetcreatemainqueue.html83
-rw-r--r--doc/rsconf1_rulesetparser.html123
-rw-r--r--doc/rsyslog_conf.html11
-rw-r--r--doc/rsyslog_conf_global.html22
-rw-r--r--doc/rsyslog_conf_modules.html113
-rw-r--r--doc/rsyslog_conf_templates.html46
-rw-r--r--doc/rsyslog_ng_comparison.html9
-rw-r--r--doc/rsyslog_pgsql.html4
-rw-r--r--doc/rsyslog_queue_pointers.jpegbin0 -> 9226 bytes
-rw-r--r--doc/rsyslog_queue_pointers2.jpegbin0 -> 20459 bytes
-rw-r--r--doc/rsyslog_secure_tls.html2
-rw-r--r--doc/rsyslog_tls.html18
-rw-r--r--doc/src/classes.diabin4575 -> 6273 bytes
-rw-r--r--doc/src/module_workflow.diabin0 -> 1700 bytes
-rw-r--r--doc/src/rfc5424layers.diabin0 -> 1205 bytes
-rw-r--r--doc/src/rsyslog_queue_pointers.diabin0 -> 1657 bytes
-rw-r--r--doc/src/rsyslog_queue_pointers2.diabin0 -> 2899 bytes
-rw-r--r--doc/src/tls.diabin4656 -> 5201 bytes
-rw-r--r--doc/status.html54
-rw-r--r--doc/syslog_parsing.html18
-rw-r--r--doc/tls_cert_server.html9
-rw-r--r--doc/troubleshoot.html54
-rw-r--r--doc/v5compatibility.html30
60 files changed, 2983 insertions, 161 deletions
diff --git a/doc/Makefile.am b/doc/Makefile.am
index 72954e9c..91d92afd 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -31,6 +31,9 @@ html_files = \
version_naming.html \
contributors.html \
dev_queue.html \
+ omstdout.html \
+ omudpspoof.html \
+ omruleset.html \
omsnmp.html \
ommysql.html \
omoracle.html \
@@ -38,11 +41,14 @@ html_files = \
imfile.html \
imtcp.html \
imptcp.html \
+ impstats.html \
imgssapi.html \
imrelp.html \
imsolaris.html \
imuxsock.html \
imklog.html \
+ pmlastmsg.html \
+ mmsnmptrapd.html \
queues.html \
src/queueWorkerLogic.dia \
queueWorkerLogic.jpg \
@@ -65,6 +71,7 @@ html_files = \
rsconf1_actionresumeinterval.html \
rsconf1_allowedsender.html \
rsconf1_controlcharacterescapeprefix.html \
+ rsconf1_escape8bitcharsonreceive.html \
rsconf1_debugprintcfsyslinehandlerlist.html \
rsconf1_debugprintmodulelist.html \
rsconf1_debugprinttemplatelist.html \
@@ -90,9 +97,12 @@ html_files = \
rsconf1_moddir.html \
rsconf1_repeatedmsgreduction.html \
rsconf1_resetconfigvariables.html \
+ rsconf1_rulesetcreatemainqueue.html \
rsconf1_umask.html \
+ rsconf1_rulesetparser.html \
v3compatibility.html \
v4compatibility.html \
+ v5compatibility.html \
im3195.html \
netstream.html \
ns_gtls.html \
@@ -120,6 +130,7 @@ html_files = \
grfx_files = \
rsyslog_confgraph_complex.png\
rsyslog_confgraph_std.png \
+ module_workflow.png \
direct_queue0.png \
direct_queue1.png \
direct_queue2.png \
@@ -130,6 +141,8 @@ grfx_files = \
dataflow.png \
queue_analogy_tv.png \
gssapi.png \
+ rfc5424layers.png \
+ src/rfc5424layers.dia \
rsyslog-vers.png
EXTRA_DIST = $(html_files) $(grfx_files)
diff --git a/doc/action-call.dot b/doc/action-call.dot
new file mode 100644
index 00000000..86c6834d
--- /dev/null
+++ b/doc/action-call.dot
@@ -0,0 +1,33 @@
+// This file is part of rsyslog.
+//
+// rsyslog action call state diagram
+//
+// see http://www.graphviz.org for how to obtain the graphviz processor
+// which is used to build the actual graph.
+//
+// generate the graph with
+// $ dot action-call.dot -Tpng >action-call.png
+
+digraph G {
+ label="\n\nrsyslog message states during action processing\nhttp://www.rsyslog.com";
+ //fontsize=20;
+
+ ok [label="ready for processing" color="green"];
+ mpf [label="message permanent failure" color="red"];
+ tf [label="temporary failure"]
+ cPen [label="commit pending"];
+ com [label="committed" color="red"];
+
+ tf -> tf [label="retry fails, i < n"];
+ tf -> mpf [label="retry fails, i = n"];
+ tf -> ok [label="retry succeeds"];
+ ok -> com [label="doAction RS_RET_OK"];
+ ok -> cPen [label="doAction COMMIT_PENDING"];
+ ok -> tf [label="doAction RS_RET_SUSPENDED"];
+ ok -> mpf [label="doAction RS_RET_DISABLED"];
+ cPen -> com [label="endTransaction RS_RET_OK"];
+ cPen -> tf [label="endTransaction _SUSPENDED"];
+
+ //{rank=same; tf cPen}
+ {rank=same; com mpf}
+}
diff --git a/doc/action_state.dot b/doc/action_state.dot
new file mode 100644
index 00000000..2f36d8da
--- /dev/null
+++ b/doc/action_state.dot
@@ -0,0 +1,34 @@
+// This file is part of rsyslog.
+//
+// rsyslog message state diagram
+//
+// see http://www.graphviz.org for how to obtain the graphviz processor
+// which is used to build the actual graph.
+//
+// generate the graph with
+// $ dot file.dot -Tpng >file.png
+
+digraph msgState {
+ compound=true; nodesep=1.0
+ //label="\n\nrsyslog action transaction states\nhttp://www.rsyslog.com";
+ //fontsize=20;
+
+ rdy [label="ready" group="main"];
+ itx [label="in Tx" group="main"];
+ comm [label="commit"]
+ rtry [label="retry"]
+ susp [label="suspended"]
+
+ rdy -> itx [label="transaction begins"]
+ rdy -> rtry [label="begin tx\nerror"]
+ itx -> itx [label="success"]
+ itx -> comm [label="commit\n(caller or auto)"]
+ itx -> rtry [label="error"]
+ comm -> rdy [label="success"]
+ comm -> rtry [label="error"]
+ rtry -> rdy [label="recovered"]
+ rtry -> susp [label="could not\nrecover"]
+ susp -> rtry [label="timeout expired"]
+
+ {rank=same; comm rtry}
+}
diff --git a/doc/batch_state.dot b/doc/batch_state.dot
new file mode 100644
index 00000000..0dd48b47
--- /dev/null
+++ b/doc/batch_state.dot
@@ -0,0 +1,28 @@
+// This file is part of rsyslog.
+//
+// rsyslog batch state diagram
+//
+// see http://www.graphviz.org for how to obtain the graphviz processor
+// which is used to build the actual graph.
+//
+// generate the graph with
+// $ dot file.dot -Tpng >file.png
+
+digraph msgState {
+ compound=true; nodesep=1.0
+ //label="\n\nrsyslog batch states\nhttp://www.rsyslog.com";
+ rankdir=LR
+
+ rdy [label="ready"];
+ bad [label="message-caused\nfailure"];
+ sub [label="submitted"]
+ disc [label="discarded" color="red"]
+
+ rdy -> sub [label="submitted to action"]
+ rdy -> bad [label="permanent fail"]
+ rdy -> disc [label="action requests discarding"]
+ sub -> rdy [label="next action or\naction-caused failure"]
+ bad -> rdy [label="next action"]
+
+ //{rank=same; comm rtry }
+}
diff --git a/doc/build_from_repo.html b/doc/build_from_repo.html
index 8d3b20fe..a06863e1 100644
--- a/doc/build_from_repo.html
+++ b/doc/build_from_repo.html
@@ -43,12 +43,37 @@ you downloaded an official distribution tarball (see the
<a href="install.html">rsyslog install guide</a>, starting at step 2,
for further details about that).
+<h2>Special Compile-Time Options</h2>
+<p>On some platforms, compile-time issues occur, like the one shown below:
+<p><pre><code>
+make[2]: Entering directory `/home/az/RSyslog/rsyslog-5.5.0/tools'
+ CCLD rsyslogd
+rsyslogd-omfile.o: In function `getClockFileAccess':
+/home/az/RSyslog/rsyslog-5.5.0/tools/omfile.c:91: undefined reference to `__sync_fetch_and_add_8'
+/home/az/RSyslog/rsyslog-5.5.0/tools/omfile.c:91: undefined reference to `__sync_fetch_and_add_8'
+/home/az/RSyslog/rsyslog-5.5.0/tools/omfile.c:91: undefined reference to `__sync_fetch_and_add_8'
+</code></pre>
+<p>Note that the exact error messages can be different. These type of errors stem down to
+atomic instruction support in GCC, which is somewhat depending on the machine architecture it
+compiles code for. Very old machines (like the original i386) do not even at all provide support
+for these instructions.
+<p>The availability of atomic instructions is vital for rsyslog - it can not be built without them.
+Consequently, there is a configure check included for them. But under some circumstances,
+GCC seems to report they are available, but does not provide implementations for
+all of them (at least this is my observation...). The simple cure is to make sure that
+GCC generates code for a modern-enough architecture. This, for example, can be done as
+follows:
+<p><pre><code>
+./configure CFLAGS="-march=i586 -mcpu=i686" --enable-imfile ... (whatever you need)
+</code></pre>
+<p>These settings should resolve the issue .
+
<p>[<a href="manual.html">manual index</a>]
[<a href="http://www.rsyslog.com/">rsyslog site</a>]</p>
<p><font size="2">This documentation is part of the
<a href="http://www.rsyslog.com/">rsyslog</a> project.<br>
-Copyright &copy; 2008 by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a> and
+Copyright &copy; 2008, 2009 by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a> and
<a href="http://www.adiscon.com/">Adiscon</a>. Released under the GNU GPL
-version 1.2 or higher.</font></p>
+version 3 or higher.</font></p>
</body>
</html>
diff --git a/doc/debug.html b/doc/debug.html
index 46759986..6aeb7975 100644
--- a/doc/debug.html
+++ b/doc/debug.html
@@ -138,7 +138,7 @@ instance of rsyslogd can be aborted by pressing ctl-c.
<p>[<a href="manual.html">manual index</a>] [<a href="http://www.rsyslog.com/">rsyslog site</a>]</p>
<p><font size="2">This documentation is part of the
<a href="http://www.rsyslog.com/">rsyslog</a> project.<br>
-Copyright &copy; 2008, 2009 by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a> and
+Copyright &copy; 2008-2010 by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a> and
<a href="http://www.adiscon.com/">Adiscon</a>.
Released under the GNU GPL version 3 or higher.</font></p>
</body>
diff --git a/doc/design.tex b/doc/design.tex
new file mode 100644
index 00000000..a3ec8f45
--- /dev/null
+++ b/doc/design.tex
@@ -0,0 +1,887 @@
+\documentclass[a4paper,10pt]{article}
+\usepackage{amsmath}
+\usepackage{amsfonts}
+\usepackage{amssymb}
+\usepackage{graphicx}
+\usepackage{listings}
+\usepackage{algorithm,algorithmic}
+\usepackage{float}
+
+\pagestyle{headings}
+
+\newcommand{\IN}{\mathbb{N}}
+\newcommand{\MM}{\mathcal{M}}
+\newcommand{\QQ}{\mathcal{Q}}
+\newcommand{\AAA}{\mathcal{A}}
+\title{Rsyslog Design and Internals}
+\author{Rainer Gerhards\\
+rgerhards@adiscon.com}
+
+\begin{document}
+
+\maketitle
+
+\begin{abstract}
+This paper describes rsyslog design and internals. It is created to facilitate a discussion about the implementation of "batched queue processing". As such, it does not describe the full design of rsyslog but rather those elements that are relevant to queues. However, the document may be expanded in the future. This is work in progress and should be considered with care. It is NOT updated during all phases of development.
+\end{abstract}
+
+\tableofcontents
+
+\section{Preliminaries}
+\subsection{On the Use of English}
+\begin{quotation}
+\begin{flushright}
+I ventured to write this book in English because ... \\
+it will be more easily read in poor English, \\
+than in good German by 90\% of my intended readers. \\
+--- HANS J. STETTER, Analysis of Discretization Methods for \\
+Ordinary Differential Equations (1973)
+\end{flushright}
+\end{quotation}
+
+There is not much I could add to Mr. Stetter's thought, except, maybe, that the number to quote probably tends more to 99\% in this case than to the 90\% Mr. Stetter notes. So please pardon those errors in language use that I have not yet been able to fix or even see. Suggestions for corrections and improvements are always welcome.
+\subsection{Notational Conventions}
+In general, in rsyslog there exists single objects $o$, which are used to build larger sets $O$, which form a superset $\mathcal{O}$ of all those objects that exist at a given time inside a running instance of rsyslog. As seen above, single objects are always described by lower case letters ($o$), larger sets by upper case letters ($O$) and the ``all-sets'' in caligraphic letters ($\mathcal{O}$). Often, objects $O_i, i \in \IN, i \le |\mathcal{O}|$ partition $\mathcal{O}$, but this is not necessarily the case.
+
+\subsection{Definitions}
+\subsubsection{Sudden Fatal Failure}
+As sudden fatal failure is one that occurs at some instant and causes Complete loss of processing capabilities. The two major cases are a sudden power loss or a ``kill -9'' of the process. There are more exotic cases, too, like disasters.
+
+One may argue that it is possible to protect against many sudden fatal failure cases. For example, using an uninterruptable power supply (UPS) will prevent a sudden power loss. While this is true in most cases, it does not hold if looked very closely: in the case of the UPS, for example, a failure in the UPS itself may cause a sudden power loss, which can not be mitigated. Well, actually there can be several layers of mitigation, but always one more potential failure scenario remains. So it is not possible to totally solve the issue.
+
+The concept of ``sudden fatal failure'' now covers all these rest risk that result in termiantion of rsyslogd without the ability execute any code before this happens. This is a very important concept in regard to audit-gradeness.
+
+\subsubsection{Audit Grade}
+In the context of this document, ``audit grade'' means that a subsystem never loses a message that it has taken responsibility for, not even in cases of sudden fatal failures. The only limit in this restriction is that a subsystem does not guarantee message survival if the subsytem at large is being destroyed (e.g. during a disaster) or some of its components are not of audit-grade. This draws a fine limitation on the audit-grade of a subsystem.
+
+For example, the rsyslog queue subsystem receives messages and acknowledges them to the submitter (e.g. an input), when they have been enqueued in the storage system. If the queue system is configured to provide audit-grade operation\footnote{Audit-grade queue operation is considerably slower than regular operations, as such this mode is not enabled by default. Most installations will never need a completely audit-grade queue}, the queue relies on the storage subsystem to work properly. If, for example, a disk read error occurs, the message may no longer be readable from the disk and as such is lost. The root cause here is that the disk subsystem was not of audit grade, because it otherwise would not have lost the message. So in this case the queue code is of audit grade, but the one of its components, the disk subsytem, was not. So the overall system is not of audit grade.
+
+To simplify talking about the audit-gradness of several subsytems, we assume that all of their subsystems are also of audit grade. In an actual deployment, however, this means the the system designer must carefully select audit-grade subsystems. Overlooking a single non-audit-grade component will make the whole system of not audit grade quality.
+
+Please note that it can be rather tricky to ensure a complete system is of audit grade. A border case is main memory integrity. Even with error-correcting memory, there may situations arise where a memory error occurs (probably due to a very unlikely series of well-hitting cosmic rays) that is unrecoverable. At this point, system integrity is at risk. The only real solution is to immediately shut down the system and restart it (without giving any process a chance to execute). Note, however, that in an extreme view, an operating system routine that does so can also be considered dangerous, as memory in use by this routine might be affected by the malfunction. We could extend this scenario and further complicate it, but that goes beyond the scope of this paper. The example was primarily meant to show how subtle audit-grade reliability is.
+
+In rsyslog, we currently use a slightly \marginpar{duplication\\permitted}relaxed consistency condition for message integrity inside an audit-grade subsystem. While we do not accept message loss, we permit slight message \emph{duplication}, but only in exceptional cases. This is permitted because, with proper message generation, the dulication problem can be easily fixed at the end-to-end layer. For example, the original sender can include a UUID, which can be used to sort out duplicates at the final destination. Insisting on not allowing duplication complicates matters and is often impossible with today's logging protocols. So, for the time being, we aim at this relaxed criteria, which is hard enough to achive. After we have achieved that goal, we may further try to solve the duplicaton problem. Some hooks already exist. But we do not guarantee such an effort will be made any time soon.
+
+\section{Overall Design}
+From a high-level prespective, rsyslogd is ``just'' a high-performance message router. It accepts messages from various sources, applies user-configured filters to them, and routes potentially transformed messages to destinations based on these filters.
+\section{Objects}
+\subsection{Plugins}
+Plugins provide code potentially written by a third party to extend rsyslog.
+
+Conceptually, a plugin is a tuple of callable functions $(\phi_1, \phi_2, \ldots)$ which implement an interface. There are three different types of plugins: input, output and library. The plugin type denotes the primary interface implemented by the plugin. Additional interfaces may be implemented\footnote{This is not yet done in plugins, but is possible and assumed to be done at a later point in time}.
+
+In the context of this paper, the output plugin interface is most important. It implements three entry points:
+
+\paragraph{doAction()}
+is used to submit messages to the output plugin. The entry point may or may not commit the messages to their ultimate destination.
+
+\paragraph{beginTransaction()}
+is used to inform the plugin that a new transaction begins. It must prepare for processing.
+
+\paragraph{endTransaction()}
+is indicated that the upper layer \emph{needs} to close the transaction. If there is any uncommited data left, it must be commited or rolled back.
+
+Every instance of an output plugin is guaranteed \emph{not} to be called concurrently by multiple threads. Further, no context switch will happen between calls to $doAction()$ and $endTransaction()$.
+
+\subsection{State Sets}
+Several object have associated state based on a specific state set. These state sets are described together with the objects.
+
+As a general rule, individual state is associated with all instances $o$ of a class of objects. This state is called the object's \marginpar{state component} \emph{state component} $s$. If we want to obtain an object's state, we write $S(o)$. Please note that $S(o)$ is only defined for those objects that have a state component.
+
+\subsection{Messages}
+A message $m$ represents a a single syslog message inside the system. It is a tuple of attributes. Some of these attributes directly orginate from the message content, some others are meta-information taken from the context. For example, there is an meta-attribute ``time of reception'' which conveys when the message was received by rsyslog's input subsystem. We do not list attributes here, as there are many and it is not of importance which exactly they are.
+
+The set $\MM$ is composed of all messages that exist at a given time inside rsyslog.
+
+\subsection{Queue}
+A queue
+$$Q = (C, \Phi, M)$$
+is a triplet of a set of configuration parameters $C$, a set of callbacks $\Phi$ and a set of messages $M \subseteq \MM$.
+
+If we need to obtain the set of message from a queue, we write $M(Q)$. The elements of the set of configuration parameters are written as $C_{param}$ where $param$ is an abbreviation of the parameter's meaning. To obtain a specific parameter from a queue, we write $C_{param}(Q)$. The most important elements of $C$ are:
+
+\paragraph{$C_{type}$} which denotes the queue implementation type. Most importantly, this selects from a set of queue drivers (for example disk-only or in-memory driver), which affects the basic operation of the queue instance.
+
+\paragraph{$C_{mMsg}$} which denotes the upper bound on the cardinality of $M$.
+
+\paragraph{$C_{mBatch}$} which denotes the upper bound of the cardinality of message batches created for this queue.
+
+Be $\QQ = \{Q_m, Q_1, Q_2, \ldots, Q_{|\AAA|}\}$ the set of all queues that exist inside rsyslog after the configuration file has been processed, with $|\QQ| = |\AAA| + 1$.
+
+Then
+$$M_0 = \MM \setminus \bigcup_{i=1}^{|\QQ|} Q_i(M)$$
+\marginpar{at-risk-set}is the set of non-queued messages. The messages have either never been enqueued or have been dequeued but not finally been processed. This set represents the messages that may potentially be lost during an unclean shutdown of rsyslogd. This is why I call this set the ``\emph{at-risk-set}''.
+
+
+\subsection{Batches}
+A batch represents multiple processable messages. It is a unit of processing inside rsyslog's output system. Batches are used to dequeue a number of messages from a queue and then submit them to the lower action layer. Batches are natural \emph{transaction boundaries}, in the sense that multiple output transactions may be done on the messages inside a batch, but each transaction must end at the end of the batch. A batch is always associated to a specific queue $Q$.
+
+A batch
+$$B = (b_1, b_2, \ldots, b_n )$$
+is a $n$-tuple of \marginpar{processable\\message}processable messages
+$$b = (m, s)$$
+which are an ordered pair of a message $m$ and an associated processing state $s$. To denote the $n$-th message inside the batch, we write $m(b_n)$, to denote the status component of the $n$-th message, we write $S(b_n)$.
+
+\begin{figure}
+\begin{center}
+\includegraphics[scale=0.4]{batch_state.jpeg}
+\end{center}
+\caption{batch message processing states}
+\label{fig_batchmsg_states}
+\end{figure}
+
+The state set for the processing states is defined as follows:
+$$
+S_B = \{ rdy, bad, sub, disc \}
+$$
+
+With the semantics of the various states being the following:
+
+\begin{center}
+\begin{tabular}{|l|l|} \hline
+ State & Semantics \\\hline
+ rdy & ready for processing\\
+ bad & this message triggered an unrecoverable failure in action\\
+ & processing and must not be resubmitted to this action\\
+ sub & message submitted for processsing, result yet unknown \\
+ disc & action sucessfully processed, but must not be submitted \\
+ & to any further action in action unit \\\hline
+\end{tabular}
+\end{center}
+The associated state diagram is shown in figure \ref{fig_batchmsg_states} on page \pageref{fig_batchmsg_states}.
+
+Batch sizes vary. The actual cardinality is a function of the cardinality of $M(Q)$ at the time of batch creation and the queue configuration:
+
+$$1 \leq |B| \leq \max(C_{mBatch}(Q), |M(Q)|)$$
+
+\subsection{Action Unit}
+An action unit
+$$u = (f, a_1, \ldots, a_n), a_i \in \AAA \text{ for } i \in \IN, i \le n$$
+is a tuple consisting of a filter function $f$ and $n \in \IN$ actions. \emph{Does rsyslog still support nonsense action units with $n=0$? - check!}
+
+\subsection{Action}
+An action
+$$a = (a_C, a_\psi)$$
+is an ordered pair of a tuple of configuration attributes $a_C$, and a tuple of processing functions $a_\psi$. Be the set $\AAA$ composed of all actions that exist in rsyslog after the configuration file has been processed.
+
+
+\section{Processing}
+\subsection{Object States}
+Various objects keep state. Some of these objects, like messages, batches and actions seem to share state. However, thinking about shared state leads to very complex setup. As such, state is modelled for each object $o$ individually. Instead, the state function $S_O(o)$ can be used to obtain an obtain an individual objects state. That state can be used to modify the state diagrams of the other objects with which relationships exist.
+
+\subsubsection{Actions}
+Actions are provided by output plugins. An action enables the engine to write messages to some destination. It is important to note that ``destination'' is a very broad abstraction. A destination may be a file inside a local or remote file system, a database table or a remote syslog server in another network.
+
+Actions are transactional in the following sense: more than one message can be submitted to an action. The action does not necessarily process the submitted messages unless the caller ends the transaction. However, the action itself may also end the transaction and notify the caller. This is \emph{not} considered an error condition and \emph{must} be handled gracefully by the caller. If a transaction aborts, the caller \emph{must} assume that none of the elements submitted since the begin of transaction have been processed. The action will try to backout anything that was already processed at the time the transaction failed. However, not all outputs work on actually transactional destination. As such, an action is permitted not to backout incomplete interim results. As such, after a transaction abort, some message duplication may occur. We call this the \emph{relaxed integrity condition} for actions.
+
+An output transaction is started by calling \emph{beginTransaction()} either explicitely or implicitely by a call to \emph{doAction()} without calling \emph{beginTransaction()} before. Then, one or more calls to \emph{doAction()} follow. When the caller intends to finish the transaction, it calls \emph{endTransaction()}. However, the transaction may also be terminated from the action itself in response to a \emph{doAction()} call.
+
+Mathematically, an action transaction builds a totally ordered set of uncommitted messages $M_u$. The order relation is defined over the sequence in which messages are being provided to \emph{doAction()}. At any time a commit is attempted, the full set $M_u$ is committed and may either succeeed completely or not at all (in the sense of the relaxed integrity condition described above).
+
+A commit is attempted when
+\begin{enumerate}
+\item the caller decides to call \emph{endTransaction()}
+\item or earlier if the action decides it needs to commit now (e.g. because of buffers filling up).
+\end{enumerate}
+
+In the seconds case, the action may decide to commit all message but the current one or all (this is depending on action logic). So if the action decideds to commit a transaction before the caller calls \emph{endTransaction()}, a set of commited messages $M_c$ is build and $M_u$ is modified. Be $n$ the $n$-th iterated \emph{doAction()} call and $m_n$ the current message of this call, then the sets are build as follows:
+
+\begin{algorithm}
+%\caption{}
+\begin{algorithmic}
+\IF{action commits $m_n$}
+ \STATE $M_c = M_u \cup m_n$
+ \STATE $M_u = \emptyset$
+\ELSE
+ \STATE $M_c = M_u$
+ \STATE $M_u = \{ m_n\}$
+\ENDIF
+\end{algorithmic}
+\end{algorithm}
+
+In other words, if anything is committed early, it is always the full set $M_u$, with or without the current message. The caller needs to know which messages are already commited. As \emph{doAction()} finishes one transaction and starts a new one in a single call, we can not use action state the let the caller know this happened. So we use our above finding and just convey back if the transacton is still continuing or the current message or all others before it were committed. The caller must then act accordingly. Please note that when an error happens, the whole transaction must still be considered failed. As such, ``partial commit'' states need not to be mixed with failure states.
+
+Please note that the above method leaves a small potential issue unaddressed: if the action does an early commit of $M_u \setminus m_n$, an error happens when adding $m_n$ to the new $M_u$ (like running out of resources), the action would need to convey both the successful transaction as well as the failure state. This is not possible with the current interface. We could use callbacks to provide such notification, but this complicates the code. So, if that situaton arises, the action must temporarily buffer the error condition and convey it as part of either the next \emph{doAction()} call or during \emph{endTransation()} processing. This can be done, for example, by advancing its internal state accordingly.
+
+The state set for a actions is defined as follows:
+$$
+S_A = \{ rdy, itx, comm, rtry, susp, died \}
+$$
+
+With the semantics of the various states being the following:
+
+\begin{center}
+\begin{tabular}{|l|l|} \hline
+ State & Semantics \\\hline
+ rdy & ready, waiting for transaction begin\\
+ itx & in transaction, accept more data \\
+ comm & transaction finished \\
+ rtry & action failed but may be able to recover \\
+ susp & action currently defunctional until timeout expires \\
+ died & unrecoverable error condition occured, no longer usable \\\hline
+\end{tabular}
+\end{center}
+
+In the associated state diagram in figure \ref{fig_action_states}, we do not include the \emph{died} state, because it is entered whenever a totally unrecoverable error state may occur. This is a very exceptional incident (which most output plugins do not even support), so we have kept the diagram simple.
+
+\begin{figure}
+\begin{center}
+\includegraphics[scale=0.5]{action_state.jpeg}
+\end{center}
+\caption{Action State Diagram}
+\label{fig_action_states}
+\end{figure}
+
+\emph{Note well} that the state diagram describes the action state. It does \emph{not} describe the transaction state. While action- and transaction state are closely related to each other, they are different entities.
+
+The return code of \emph{doAction()} and \emph{endTransaction()} is used to convey the transaction state. As such, it is a function of the actions's current state after processing the request. The mapping is as shown below:
+
+\begin{center}
+\begin{tabular}{|l|l|} \hline
+ State & Return Code (RS\_RET\_\ldots)\\\hline
+ rdy & OK \\
+ itx & COMMITTED (if there was an auto-commit without $m_n$)\\
+ & DEFER\_COMMIT (if there was no auto-commit)\\
+ comm & internal state, not to be exposed to upper layer \\
+ rtry & SUSPENDED \emph{(new code needed)} \\
+ susp & SUSPENDED \\
+ died & DISABLED \\\hline
+\end{tabular}
+\end{center}
+
+For the rest of this document, let's assume there is a function \emph{getReturnCode()} that implements this mapping.
+
+It is important to think about how retries are handled. There is a user-configured per-action upper number of retries $C_r$ and retry interval $C_i$. In \emph{rsyslog v3}, there is no concept of output transactions. As such, only single messages are processed. When a temporary action failure occurs, the action is re-tried $C_r$ times, where the action processing thread is waiting in a \emph{sleep()} $C_i$ operating system API call\footnote{a suitable API is used, not \emph{sleep()} itself}. If the action succeeds during the retry processing, everything continues as usual. If it does not succeed, two things happen:
+\begin{itemize}
+\item the message is flagged as ``action permanent failure'' (what may trigger backup processing)
+\item the action is actually suspended for $C_i$ seconds
+\end{itemize}
+If then a new message is sent to the action, and $C_i$ seconds have not yet elapsed, the action is flagged as having failed without being re-tried again\footnote{During the analysis for this paper, it was seen that actually $C_r$ retries are attempted in v3, but each of them will never actually re-try the action. This is a software bug, which does not cause any harm and thus will not be fixed in v3. The new implementation in v4 will obviously not inherit this problem}. This is done in an effort to reduce resource utilization and prevent the system from slowing down e.g. by too-many retries to a remote server that went offline.
+
+With transactional output mode in \emph{rsyslog v4}, the logic above can no longer work. First of all, retrying single actions does not help, because all of the current transaction needs to be resubmitted. As such, the upper layers need to be notified of failure. Then, they need to resubmit the batch. In that design, the lower layer needs to return immediately after detecting the failure. Recovery handling is now to be done when the next transaction is started. However, we must make sure that we do not do excessive retries. So retry processing is only to be carried out if it was not tried less than $C_i$ seconds ago.
+
+The required functionality can be implemeted by a \emph{prepareAction} function that readies the action for processing if there is need to do so. That function is then called in all entry points before anything else is done. Then, actual processing is carried out and the resulting action state be used to generate the return code for the upper-layer caller. Find below a rough pseudocode to do so:
+
+\lstset{language=python}
+\begin{lstlisting}
+def prepareAction():
+ if state == rtry:
+ try recovery (adjust state accordingly)
+ if state == rdy:
+ beginTransaction() [output plugin]
+
+def processMessage(message):
+ prepareAction()
+ if state == itx
+ doAction(message) [output plugin]
+ return getReturnCode()
+
+def doEndTransaction():
+ prepareAction()
+ if state == itx
+ endTransaction(); [output plugin]
+ return getReturnCode()
+\end{lstlisting}
+
+\subsection{Output Subsystem Layers}
+The rsyslog engine is organized in layers, where each layer is represented by the dominating object:
+
+\begin{figure}
+\includegraphics[scale=0.75]{rsyslog_output_layers.jpeg}
+\label{rsyslog output layers}
+\end{figure}
+
+If looking at the data flow, a queue dequeues batches of messages, which are than run through a generic action system and put into output plugins. Note that on the batch layer, only batches are supported as units of work, whereas the action layer is message-oriented but supports transactions of multiple messages. This is done by indicating when a transaction necessarily needs to end (that point being the end of batch from the batch layer).
+
+The plugins can be written by third parties and are roughly comparable to minidrivers. The generic action system provides all complexity of action processing wheras the output plugin provides a limited set of callbacks that enable the generic framework to talk to the actual destination system. As such, writing outputs is a very simple task. However, rsyslog does not limit the creation of very complex outputs, which may be able to offer superior performance for some destinations.
+
+\subsection{Output Failure}
+\subsubsection{Cases}
+When an output action is called, it may encounter a failure condition. In general, there are two different cases:
+\begin{enumerate}
+\item action caused failures
+\item message-content caused failures
+\end{enumerate}.
+
+Failures rooted in the action are things like broken network connections, file systems run out of space or database servers that are down. Most importantly, the failure is not related to message content. As such, it is appropriate to retry the action with the same message until it finally succeeds (assuming that someone restores the system in question to proper operation). We can not expect that the problem is cleared just by discarding the current message and re-trying with the next one.
+
+In my view, action caused failures are the far majority of all failures. For rsyslog versions 3 and below, all rsyslog-provided plugins consider failures to be action-caused and thus potentially recoverable by simple retry. With the only exception being fatal error conditions that render the whole action unusable.
+
+David Lang pointed out, that there may also exist error conditions that are not caused by the action (or the subsystem it talks to) itself, but rather by message data. He provided the following samples where message content can cause permanent issues with action execution:
+
+\begin{itemize}
+\item unicode text causing grief
+\item dynafile hits a read-only file
+\item basicly data-driven things that trigger bugs in the message delivery
+mechanism in some form.
+\end{itemize}
+
+As David Lang said ``In an ideal world these would never happen, but for most output types I can think of some form of corrupt input that could cause that message to fail.''.
+So this class of failure conditions actually exists. No matter how often the action retry mechanism is called, it will never succeeds (one may argue that the read-only dynafile is fixable, but we could replace that sample with an invalidly generated filename). The proper cure for these actions is to find the offending one and discard it.
+
+In conclusion, actions need to return different error states for these two different types of failures. Traditionally, RS\_RET\_SUSPENDED is returned when an action specific failure is hit. Most existing plugins also do this if a message-related failure occured, simply because they did not yet know that this situation exists. However, plugins also return different error codes, and at least these can be treated to mean message-permanent failures. To support this, a change to plugins is still required, because many simple return SUSPENDED state if anything went wrong (replacing the real error condition with SUSPENDED). A dedicated PROBABLE\_INVALID\_MSG return state is probably useful so that an output plugin can convey back that it consideres the message to be bad. On the other hand, this implies that the plugin must try to detect those, what means that the developer must think about all potential message-causes problems. That approach can be considered unreliable and as such it may be better not to provide such a dedicted state.
+
+\subsubsection{Handling of Failures}
+In spite of the two different failure cases, different handling is needed for them. The action-based failure cases can and must be handled on the action level. As transactions abort when a failure occurs, support from the upper ``batch layer'' is necessary in order to handle resending batches of messages.
+
+For message-caused failure cases, the offending message must be found and then be discarded. A complexity here is that while a failure-causing message is being searched for, an action-based failure might occur. In that case, first the action-based failure condition must be solved, before the search for the problem message can continue.
+
+One approach might be that when the action-layer conveys back an action-caused failure (SUSPENDED), the batch layer knows that it simply needs to restart the full transaction (but not start an ``invalid message search''). If a message-based error condition is conveyed back, the batch system can not restart the full batch. Instead, it needs to enter search mode, where it creates partitions of the original batch, and calls itself recursively (at least in theory) on each of the subsets.
+
+Then, the same handling applies until either a failing message has been found or all messages have been successfully processed. Note that in the recursive step, action-based failures are recovered by full batch resubmits. This solves the above-mentioned complexity in a consistent way.
+
+If a binary-search-like method is used to detect failing records\footnote{This was originally suggested by David Lang.}, recursion may not really be an issue, as the recursion depth is limited to $\log_2 |B|$ where $B$ is the message batch.
+
+A message-caused failure can be rooted in one or more messages. One important question is if it is expected that the failure is caused by a single or multiple messages. Both is possible, so it is a question of probability. If we assume that it is more probable that a single messages causes the problems, it is useful to immediately return back to full batch submission of transactions once a problem-causing message has been identified. But then, if there are multiple problem-causing messages inside the batch, we may need many more iterations.
+
+If, on the other hand, we assume that it is more probable that multiple messages cause problems, it may make sense to keep resubmitting only subsets of the batch. However, then the performance is suboptimal if actually only one message was problematic. A solution might be to pick a compromise, e.g. first assume that a single message is problematic, but assume the opposite as soon as a second message with problems has been found.
+
+A potential algorithm for processing $n \le |B|$ messages from batch $B$ is described below. In the pseudocode, a ``processable'' message is one that neither is already committed nor had a permanent failure with this action. The term ``mpf'' means ``message permanent failure'' for this action (this will later be described in a batch state set).
+
+\begin{small}
+\lstset{language=python}
+\begin{lstlisting}
+def submitBatch(B, n):
+ foreach processable message in
+ (first [at most] n messages of batch):
+ call processMessage
+ if action-caused failure:
+ retry full batch
+ if action-caused permanent failure:
+ mark all n messages as mpf
+ return
+ if auto-commit:
+ mark commited messages in batch as committed
+ if message-caused failure:
+ if n == 1:
+ mark message as mpf
+ return
+ else:
+ call submitBatch(B, n/2)
+ call submitBatch(B, n/2)
+\end{lstlisting}
+\end{small}
+
+After submitBatch() has completed, all messages are either committed or in mpf state.
+
+Note that an action-caused permanent failure occurs if an action-caused failure can not be resolved with the operator-configured number of retries. It will never occur if the user configured infinite retries. While an action is suspended, all calls will result in an action-caused permanent failure. Please keep in mind that these will be resubmitted to any backup actions inside the action unit, so the action's ability to cause permanent failure states is vital for a number of use cases (backup syslog server, to name just one).
+
+Batch processing inside an action unit thus can follow these strucuture:
+
+\begin{algorithm}
+\caption{processBatch(B)}
+\begin{algorithmic}
+\FORALL{action $a$ in action unit}
+ \IF{execute action only on messages that failed before}
+ \STATE $n = |\text{messages in batch in mpf state}|$
+ \STATE change mpf state back to ready
+ \ELSE
+ \STATE $n = |B \setminus \text{msgs with state discard}|$
+ \STATE change all message states $\ne$ discard to ready
+ \ENDIF
+ \IF{$n >0$ }
+ \STATE call submitBatch(B, n) for action $a$
+ \ENDIF
+\ENDFOR
+\end{algorithmic}
+\end{algorithm}
+
+\paragraph{Why is it Important to differentiate the failure cases?}
+This text originates from the mailing list and must be merged in. I provide it in the form it is, so it will not be forgotten (plus, it conveys the information).
+
+One may think that it is not necessary to differentiate between action-caused and message-caused failures. However, not doing so introduces subtle issues, because
+then you either
+
+A) do not need the batch logic at all (because the action is configured for
+infinite retries)
+
+Or
+
+B) you loose many messages if the action is not configured for infinite
+retries and you have a longer-duration outage e.g. on a database server.
+Let's say it is offline for a couple of hours, then you lose almost
+everything in that period
+
+To prevent this, you need two different retry methods.
+
+One may argue that it is hard to differentiate between the two failure cases. This is correct. Buit I think it mostly depends on the quality of the output module.
+
+First of all, ``mostly'' implies that there may be some other cases, where it
+really is impossible to differentiate between the two. In that case, I would
+treat the issue as an action-caused failure. There are two reasons for this:
+
+1) rsyslog v3 currently does this always and not even a single person
+complained about that so far. This is an empiric argument, and it does not
+mean it caused problems. But it carries the co-notation that this seems not
+to be too bad.
+
+2) If we would treat it as message-caused failure, we would no longer be able
+to handle extended outages of destination systems, which I consider a vitally
+important feature.
+
+When weighing the two, I know of lots of people who rely on 2), in sharp
+contrast to knowig noone having problems with 1). So my conclusion is that it is
+less problematic to define an otherwise undefinable failure reason to be
+action-caused. Even more so as I assume this problem only exists in the
+minority of cases.
+
+Now back to the quality of the output module: thinking about databases, their
+API is usually very good at conveying back if there was a SQL error or a
+connection abort. So while a SQL error may also be an indication of a
+configuration problem, I would strongly tend to treat it is a being
+message-caused. This is under the assumption that any reasonable responsive
+admin will hopefully test his configuration at least once before turning it
+into production. And config SQL errors should manifest immediately, so I
+expect these to be fixed before a configuration runs in production. So it is
+the duty of the output module to interpret the return code it received from
+the API call and decide whether the failure is more likely action-caused or
+message-caused. For database outputs, I would assume that it is always easy
+to classify failures that must be action-caused, especially in the
+dominating cases of failed network connections or failed servers.
+
+For other outputs it may not be as easy. But, for example, all stream network
+outputs can detect a broken connection, so this also is a sure fit.
+
+For dynafiles, it really depends on how hard the output module is tries to differentiate
+between the two failure cases. But I think you can go great length here, too.
+Especially if you do not only look at the create() return code, but, iff a
+failure occurs, you do more API calls to find out the cause.
+
+So I think the remaining problem is small enough to cause not too much issues
+(and if so, they are unavoidable in any case). In conclusion, the two failure states are not only necessary, but can sufficiently sure enough be detected.
+
+\subsection{Random Topics}
+I have begun to gather material from the mailing list in this section, because I feel it may be useful for others as well. Right now, the information is well hidden in the mailing list archives and there may be value in combining it all in one place.
+
+Due to the nature of this material, there is no specific organization between the subchapters and also formatting and language doesn't deny its rooting in the mailing list.
+
+\subsection{Reliability of Message Dequeueing}
+A batch is actually dequeued when it is taken off a queue. So if at that point we
+have a system power failure (for whatever reason), the messages are lost.
+While the rsyslog engine intends to be very reliable, it is not a complete
+transactional system. A slight risk remains. For this, you need to understand
+what happens when the batch is processed. I assume that we have no sudden,
+untrappable process termination. Then, if a batch cannot be processed, it is
+returned back to the top of queue. This is not yet implemented, but is how
+single messages (which you can think of an abstraction of a batch in the
+current code) are handled. If, for example, the engine shuts down, but an
+action takes longer than the configured shutdown timeout, the action is
+cancelled and the queue engine reclaims the unprocessed messages. They go
+into a special area inside the .qi file and are placed on top of the queue
+once the engine restarts.
+
+The only case where this not work is sudden process termination. I see two
+cases:
+
+a) a fatal software bug
+We cannot really address this. Even if the messages were remaining in the
+queue until finally processed, a software bug (maybe an invalid pointer) may
+affect the queue structures at large, possibly even at the risk of total loss
+of all data inside that queue. So this is an inevitable risk.
+
+b) sudden power fail
+... which can and should be mitigated at another level
+
+One may argue that there also is
+
+c) admin error
+e.g, kill -9 rsyslogd
+Here a fully transactional queue will probably help.
+
+However, I do not think that the risk involved justifies a far more complex
+fully transactional implementation of the queue object. Some risk always
+remains (what in the disaster case, even with a fully transactional queue?).
+
+And it is so complex to let the messages stay in queue because it is complex
+to work with such messages and disk queues. It would also cost a lot of
+performance, especially when done reliably (need to sync). We would then need
+to touch each element at least four times, twice as much as currently. Also,
+the hybrid disk/memory queues become very, very complex. There are more
+complexities around this, I just wanted to tell the most obvious.
+
+So, all in all, the idea is that messages are dequeued, processed and put
+back to the queue (think: ungetc()) when something goes wrong. Reasonable
+(but not more) effort is made to prevent message loss while the messages are
+in unprocessed state outside of the queue.
+
+\paragraph{More reliable can actually be less reliable}
+On the rsyslog mailing list, we had a discussion about how reliable rsyslog should be. It circles about a small potential window of message loss in the case of sudden fatal failure. Rsyslog can be configured to put all messages into a disk queue (instead of main memory), so these messages survive such a powerfail condition. However, messages dequeued and scheduled for processing during the power outage may be lost.
+
+I now consider a case where we have bursty UDP traffic and rsyslog is configured to use a disk-only queue (which obviously is much slower than an in-memory queue). Looking at processing speeds, the max burst rate is limited by using an ultra-reliable queue. To avoid using UDP messages, a second instance could be run that uses an in-memory queue and forwards received messages to the one in ultra-reliable mode (that is with the disk-only queue). So that second instance queues in memory until the (slower) reliable rsyslogd can now accept the message and put it into the reliable queue. Let's say that you have a burst of $r$ messages and that from these burst only $r/2$ can be enqueued (because the ultra reliable queue is so slow). So you lose $r/2$ messages.
+
+Now consider the case that you run rsyslog with just a reliable queue, one that is kept in memory but not able to cover the power failure scenario. Obviously, all messages in that queue are lost when power fails (or almost all to be precise). However, that system has a much broader bandwidth. So with it, there would never have been r messages inside the queue, because that system has a much higher sustained message rate (and thus the burst causes much less of trouble). Let's say the system is just twice as fast in this setup (I guess it usually would be *much* faster). Than, it would be able to process all r records.
+
+In that scenario, the ultra-reliable system loses $r/2$ messages, whereas the somewhat more "unreliable" system loses none - by virtue of being able to process messages as they arrive.
+
+Now extend that picture to messages residing inside the OS buffers or even those that are still queued in their sources because a stream transport blocked sending them.
+
+I know that each detail of this picture can be argued at length about.
+
+However, my opinion is that there is no "ultra-reliable" system in life, only various probabilities in losing messages. These probabilities often depend on each other, what makes calculating them very hard to impossible. Still, the probability of message loss in the system at large is just the product of the probabilities in each of its components. And reliability is just the inverse of that probability.
+
+This is where *I* conclude that it can make sense to permit a system to lose some messages under certain circumstances, if that influences the overall probability calculation towards the desired end result. In that sense, I tend to think that a fast, memory-queuing rsyslogd instance can be much more reliable compared to one that is configured as being ultra-reliable, where the rest of the system at large is badly influenced by this (the scenario above).
+
+However, I also know that for regulatory requirements, you often seem to need to prove that a system may not lose messages once it has received them, even at the cost of an overall increased probability of message loss.
+
+My view of reliability is much the same as my view of security: there is no such thing as "being totally secure", you can just reduce the probability that something bad happens. The worst thing in security is someone who thinks he is "totally secure" and as such is no longer actively looking at potential issues.
+
+The same I see for reliability. There is no thing like "being totally reliable" and it is a really bad idea to think you could ever be. Knowing this, one may begin to think about how to decrease the overall probability of message loss AND think about what rate is acceptable (and what to do with these cases, e.g. "how can they hurt").
+
+\paragraph{Different Use Cases}
+As David Lang pointed out, there exist different use cases for different levels of reliability. Most importantly, there exist use cases that do not demand very high throughput but rather ultra-realiability of the queue system. Here, ultra-reliability is just another word for the queue being of ``audit-grade''. Even if the queue provides audit-grade, the overall system is only then of audit-grade when all other components - most notably the transport protocols spoken by the inputs and outputs - are also of audit-grade. Most importantly, this means that an audit-grade system purely based on the IETF syslog protocol series can not be build.
+
+Used together with truly reliable protocols \emph{and} senders that block processing until a final acknowledgement has been received, an audit-grade system can potentially build based on rsyslog. To do so, an audit-grade queue subsystem is required, which is not present in releases less than 4.1.? (most importantly, v2 and v3 do not provide this capability).
+
+\subsection{Audit-Grade Queue Operations}
+\subsubsection{Perquisites}
+Audit-grade queue operations certain perquisites:
+\begin{itemize}
+\item rsyslog engine is of version 4.1.? or greater
+\item disk-only queue type
+\item checkpoint interval set to 1
+\item queue is configured to not permit losing any messages\footnote{The queue has several settings that can be used to fine-tune situations in which it may discard messages intentionally. All of these must be turned off. Most importantly, that means the producer is blocked for an infinite time if the queue is full.}
+\item queue consumer must also be of audit-grade
+\end{itemize}
+Only when these prequisites are met, queue operation can be considered of being audit-grade. Note that when message loss in case of sudden fatal failure and similar incidents is acceptable, neither disk-only queues nore a checkpoint interval of 1 is necessary. Such a configuration can also be build with rsyslog v3, which is up to that level.
+
+Note that in the sections below we describe the implementation in broader terms. Most importantly, we do not restrict ourselves to disk-only queue storage drivers. This is important, because it simplifies design and opens the capability to introduce new, possibly faster-performing, queue storage drivers in the future.
+
+But it is important to keep in mind that a concrete queue is only of audit-grade if it matches all the perquisites given here, most importantly with the right configuration.
+
+\subsubsection{Implementation Alternatives}
+Messages, or more precisely objects\footnote{While rsyslog deals with messages, the queue is designed to handle any type of thing that is represented as an rsyslog object. This is considered useful as queues may at some time contain other things than just messages, so we keep it generic.}, are enqueued by the queue producer (either an input module or the main message queue's consumer). The enqueue operation is completed only when the message has been successfully accepted by the queue storage driver. Then and only then the producer is permitted to remove the object from its own storage system. A rough sketch is given in algorithm \ref{alg_q_enq}.
+
+\begin{algorithm}
+\caption{enqueueObject($o$)}
+\begin{algorithmic}
+\label{alg_q_enq}
+\STATE lock queue mutex
+\WHILE{queue is not ready for enqueue}
+ \STATE wait on queue to become ready
+\ENDWHILE
+\STATE call queue store driver to add $o$
+\STATE unlock queue mutex
+\end{algorithmic}
+\end{algorithm}
+
+The dequeue-operation is more complex. We must ensure that each object stays in the queue until it is finally processed. Hereby, an object is finally processed, when processing of it has been completed. Remember that to enhance performance, objects are dequeued in batches of many. So at any given time, multiple messages may be processed, but not necessarily have finally completed doing so. If another worker thread then tries to obtain a new batch for processing, those ``in-process'' message must not be handed out a second time. Also, if a sudden fatal failure occurs during processing, queue operation must restart at the point of last commit. This means that all ``in-process'' messages need to be changed back to ``no processed'' state and be restarted again. In those cases the (acceptable) slight message duplication can occur.
+
+In our design, we differentiate between ``logical'' and ``physical'' dequeuing of batches. If a batch is generated for processing, it is logically dequeued --- in the sense that no other batch generating request will be able to receive another copy of these messages. If no exceptional situation happens, those messages will be processed and thus can be considered consumed under normal circumstances.
+
+However, actual deletion from the physical queue storage happens only after the batch is fully processed. At this point, all objects have been acknowledged by their destinations, which now have the responsibility for the object's survival. Consequently, we can delete them from the queue store. This process is considered the ``physical'' dequeue of the object.
+
+In order to find some simpler terms, we will call the logical dequeue operation just ``dequeue'' and the physical dequeue operation ``delete''. This is consistent with all previous work on rsyslog and thus probably leads to the least surprise when reading older source code and documentation.
+
+A first idea for a deletion is given in algorithm \ref{alg_pdeq_batch_1} (remember that $O(b)$ contains all objects within the given batch $b$, this is \emph{not} $O$-notation and should probably in the future be replaced by something else).
+
+\begin{algorithm}
+\caption{deleteBatch($b$), first approach}
+\begin{algorithmic}
+\label{alg_pdeq_batch_1}
+\STATE lock queue mutex
+\FORALL{$o \in O(b)$}
+ \STATE find $o$ in queue storage
+ \STATE remove $o$ and keep queue structures intact
+\ENDFOR
+\STATE unlock queue mutex
+\end{algorithmic}
+\end{algorithm}
+
+This algorithm is simple, but requires searching the queue store for the object to be deleted -- a potentially lengthy operation. However, we can improve the searching process if we know more about the inner structure of batch objects. It seems appropriate to dequeue objects in queue-sequential order. A drawback of doing so is that we must prevent other worker threads from trying to dequeue concurrently. This is not really a drawback. We need to guard dequeue operations by a mutex in any case, because otherwise internal structures can not be kept consistent. Practical experience and testing have shown that many small dequeue operations cause a lot of locking contention and as such badly affect performance. So it actually is a welcome enhancement to aquire the queue lock only once for the whole batch dequeue operation. As dequeing is a comperatively fast operation, the lock is not held for extended periods of time.
+
+A first approach to this functionality is shown in algorithm \ref{alg_ldeq_batch_1}. Note that $C_{mBatch}$ is the configured maximum number of elements inside a batch, $i$ is an index to address the objects inside the batch.
+
+\begin{figure}[h]
+\begin{center}
+\includegraphics[scale=0.6]{rsyslog_queue_pointers.jpeg}
+\end{center}
+\caption{\textbf{Queue Store Pointers}: boxes represent queue entries, colored boxes entries with objects. Objects in green are unprocessed, in blue are dequeued but not deleted and those in gray have already been deleted. White indicates not yet used entries. Gray objects may be overwritten at any time. Their entries are actually free, we have used the gray color primarily to indicate there once existed objects. Each queue pointer points to the next entry to process.}
+\label{fig_queue_ptr}
+\end{figure}
+
+\begin{algorithm}
+\caption{dequeueBatch($b$)}
+\begin{algorithmic}
+\label{alg_ldeq_batch_1}
+\STATE lock queue mutex
+\STATE $0 \to i$
+\WHILE{queue non-empty and $i < C_{mBatch}$}
+ \STATE obtain next obj $o$ from queue store
+ \STATE advance logical dequeue position
+ \STATE put $o$ into batch
+\ENDWHILE
+\STATE unlock queue mutex
+\end{algorithmic}
+\end{algorithm}
+
+A key concept is somewhat hidden in \marginpar{queue pointers} \emph{advance logical dequeue position}. Each queue store is purely sequential, with objects being enqueued at one ``end'' of the store and dequeued at the other. Of course, each queue store has only finite capacity, but we ignore this to explain the overall picture. A queue can be implemented by two pointers: one that points to the tail of the queue, where new messages are enqueued and one that points to the head of it, where new messages are dequeued. The idea is now to duplicate the dequeue pointer and split it into one for (logical) dequeue and one for deletion. Figure \ref{fig_queue_ptr} shows this three-pointer approach. Now, we can simple advance either the dequeue or deletion pointer, depending on operation, and do not need to find the first dequeue position inside the queue store. The dequeue pointer always points at it. This mode can be implemented with all currently existing queue storage drivers (but the sequential disk driver may need to use a second file handle or stream object instead of two pointers).
+
+This makes an efficient implementation of algorithm \ref{alg_ldeq_batch_1} possible: when it logically dequeues, it just needs to advance the dequeue pointer. So the algorithm executes in $O(n)$ time where $n$ specifies the number of elements to dequeue with an upper bound of $C_{mBatch}$.
+
+\begin{figure}[h]
+\begin{center}
+\includegraphics[scale=0.6]{rsyslog_queue_pointers2.jpeg}
+\end{center}
+\caption{\textbf{Physically Dequeueing Messages}: In this sample, we have two batches. With multiple workers, they may be deleted in any order.}
+\label{fig_queue_ptr_deq}
+\end{figure}
+
+Furthermore, we can also improve algorithm \ref{alg_pdeq_batch_1}: Consider that each batch is logically dequeued as an atomic operation. That means all batch objects form a sequential subset of the queue. Figure \ref{fig_queue_ptr_deq} shows the situation when two batches have been dequeued. So the costly ``find'' operation now needs to be carried out only once at the beginning of the batch. As all other objects are sequential, once we have found the batch begin inside the queue, we can simply delete the $|b|$ elements in queue-sequential order after it. So the cost of the find operation can be reduced from $O(|b|)$ to $O(1)$.
+
+We can even reduce the remaining cost of the find operation. If the batch to be deleted is right at the queue's head (as is ``B1'' in the figure), the ``find'' immediately terminates with the first element and incurs no cost at all. The situation is different if the batch is not at the queue head, ``B2'' is an example for that (assuming that ``B1'' has not yet been dequeued). We would now still need to search over the objects that are not part of the batch and can then finally get to the object at the head of the batch in question. For queue storage drivers that support random access to queue elements, storing a simple pointer to the batches' queue head element further improves the situation and enables $O(1)$ access to the queue element. This is indicated by the dotted lines in figure \ref{fig_queue_ptr_deq}. Once the head of the queue has been found, two things can happen (depending on the capabilities of the queue storage driver):
+
+\begin{enumerate}
+\item the head element can be flagged as ``this and next $n$ elements are deleted''
+\item all elements are actually deleted
+\end{enumerate}
+
+Note that a mixed form is also possible (and probably useful for our \emph{singly} linked list storage driver: there, some $n'$ elements be actually deleted and the head element is flagged as ``this and next $n - n'$ elements are deleted''. Note that in the linked-list case, all but the first elements can be deleted with ease\footnote{It can be considered to change from a singly-linked list to a doubly-linked list, if the benefit outweighs the extra effort required.}, so probably just the head would stay inside the queue. Note that removing elements off the queue, where possible, is useful because it frees resources. On a busy system, freeing messages as soon as possible can prevent message loss (in non-audit-grade setup) or system slowdown. So it should be done when possible.
+
+If we have a purely sequential queue storage driver (currently the sequential disk driver), finding and updating the head element is not an option. Even in this case, we can observe that the batch at the actual deletion pointer will eventually be submitted for deletion. So a route to take is to create a list of elements that can be deleted as soon as the physical dequeue pointer reaches any of these elements. We call this the \marginpar{to-delete list}``to-delete list''. To facilitate processing, this list must be ordered in sequence of dequeing. This information may not be available from the storage subsystem itself, but it can easily be generated. To do so, a strictly monotonically increasing counter is kept with each logical dequeue operation and stored as part of the batch\footnote{As this must be done via the usual computer-implemented modular arithmetic, we must be careful that we do not see repetion of values because of overflows. Each day has $60 \cdot 60 \cot 24 = 86,400$ seconds (ignoring the subleties of UTC). Now let's assume that we have a moderately-busy system with 1,000 messages per second. We further assume, to be on the save side, that each message is processed inside its own batch. So we have $86,400,000$ batches per day. If we now use a typical $32$-bit integer for generating the batch IDs, we the unique range will be used up after
+$$\frac{2^{32}}{8640000} \approx 497 \text{ days}$$
+days of uninterrupted rsyslog operation. While this sounds somewhat save, it goes down to approximately 10 days of messages are submitted at rate of 50,000 messages per second (which is high, but not unheared of). So it is strongly advised to use 64 bits, which we consider to be save, because for our 1,000 messages per second the range would be exhausted only after
+$$\frac{2^{64}}{8640000} \approx 2.135 \cdot 10^{11} \text{ days}$$
+which equals approximately $584,500,000$ \emph{years}. So even at a rate of one million messages per second, the range would be sufficient for over 500,000 years of continuos operations -- that should be far sufficient.}
+An example: let us assume that ``B2'' was submitted for deletion first. Then, the head of ``B2'' is not at the queue's delete pointer. As such, no action can be carried out immediately. So the batch head pointer is stored into a ``to be deleted'' list. Processing continues. Some time later, batch ``B1'' is submitted for deletion. Now, the head pointer is at the head of the delete list, as such all batch elements are dequeued. Then, the ``to be deleted'' list is checked, and ``B2'' is found in it. Now, ``B2'' is at the head of the (new) deletion pointer and can also be removed. So, ultimately, all messages are physically dequeued. This is more formally describe in algorithm \ref{alg_phys_deq_seq_store}. In that pseudocode, we made a simplification by always putting the to be deleted batch in the ``to-delete'' list, which then enables us to use somewhat more generic code to carry out the work.
+
+Note that there is a price to pay for deletions via the ``to-delete'' list: if a sudden fatal failure happens during processing, the set of duplicate messages is increased. For example, if a fatal failure happens after ``B2'' has been fully processed and scheduled for deletion, but \emph{before ``B1'' is also submitted for deletion}, ``B2'' will be reprocessed after recovery. This would not happen if ``B2'' would have been removed from the queue.
+
+\begin{algorithm}
+\caption{deleteBatch($b$)}
+\begin{algorithmic}
+\label{alg_phys_deq_seq_store}
+\REQUIRE queue mutex is locked by caller
+\STATE enqueue $b.head, |b|$ in ``to-delete'' list $D$
+\COMMENT ``to-delete'' list must be in order of logical dequeue
+\WHILE{$D.head = Q.deletePtr$}
+ \FOR{$|b|$ elements}
+ \STATE delete element at queue head
+ \STATE move $q.deletePtr$
+ \ENDFOR
+ \STATE remove head of ``to-delete'' list
+\ENDWHILE
+\end{algorithmic}
+\end{algorithm}
+
+\paragraph{Warp-Up of Queue Delete Operations}
+When evaluating which route to take, the ``to-delete'' list approach looks elegant for all cases. The negative side effect of potentially increased message duplication currently does not even exist: today, the sequential disk queue storage driver permits only a single worker thread and thus there always will be only one thread at a time. Even if we remove that limitation, message duplication could not be avoided, as stated in the algorithm description above. What remains are the other queue storage drivers. However, they operate in-memory, so message duplication will not happen simply because all messages will be lost on sudden fatal failure. The advantage of limited message duplication only exists in the so-far hypothetical case of a random-access, audit-grade disk queue storage driver. Thus, the decision could be postponed unless that happens (if it ever does).
+
+From a code complexity point of view, the ``to-delete'' list approch is definitely advantagous. Not only because of the reduced number of algorithms required. We also do not need to maintain unique batch IDs and all the logic associated with them.
+
+The other aspect to look at is memory consumption. Assuming that we delete the actual objects, just not their containers inside the queue, extra memory consumption is not really that worse. More importantly, currently only the linked-list queue storage driver can benefit at all, because it is the only driver capable of deleting queue entries in mid-queue. All others, including the array memory driver, do not have this capability.
+
+From a performance point of view, the ``to delete'' list approach looks approximately as good as the others, with some mild better performance for some storage drivers for a non-``to delete'' list approach. This can be mitigated, especially if the potentially somewhat-costly maintenance of the ``to-delete'' list is slightly optimized and the algorithm actually checks if the to be deleted batch is right at the queue's delete pointer position. The improved code simplicity, together with current CPU's code caching, may even result in an otherwise not expected speedup.
+
+In conclusion, we will implement the ``to-delete'' list approach on the queue layer (above the queue storage drivers). However, we will leave the window open to permit overwriting it with queue storage driver specific functionality. How to do this will not be specified now, as there is currently no need and we do not even know if there ever will be. However, we retain the discussion on the various modes as well as the relevant algorithmic discussions and data structurs inside this paper so that it is readily available should need arise. We also think this is important so that everybody later knows that the decision was made based on good argument and not by accident (we consider this useful in another design enhancement attempt).
+
+\paragraph{Processing Sequence} Looking at the processing sequence, we notice that always objects are dequeued, then processed and then deleted. Then, the whole process starts again. In particular, this meanss that after the previous batch has been deleted, the next batch will be dequeued. Now consider that we need to have exclusive access to the queue for both of these operations. As such it seems natural to combine this into a single step, further reducing potential locking contention.
+
+Note that a side-effect of this approach is that messages can be deleted only when a new batch is dequeued. With current design, this means that at least one message must reside inside the queue. Otherwise, the last batch will not be deleted. However, this something that can (and must!) be solved on the queue worker layer, in that it deletes a batch when the queue is empty.
+
+This leads us to the implementation of dequeueBatch() and deleteBatch() shown in algorithms \ref{alg_deq_batch_final} and \ref{alg_del_batch_final}. Note that $l$ is a flag variable that indicates if the queue is already locked.
+
+\begin{algorithm}
+\caption{dequeueBatch($b$): final version}
+\begin{algorithmic}
+\label{alg_deq_batch_final}
+\STATE lock queue mutex
+\STATE call deleteBatch(b, 1)
+\STATE $0 \to i$
+\WHILE{queue non-empty and $i < C_{mBatch}$}
+ \STATE obtain next obj $o$ from queue store
+ \STATE advance dequeue position
+ \STATE put $o$ into batch
+\ENDWHILE
+\STATE commit queue changes to storage system (if needed, e.g. fsync())
+\STATE unlock queue mutex
+\end{algorithmic}
+\end{algorithm}
+
+
+\begin{algorithm}
+\caption{deleteBatch($b, l$): final version}
+\begin{algorithmic}
+\label{alg_del_batch_final}
+\IF{queue not yet locked (test via $l$)}
+ \STATE lock queue mutex
+\ENDIF
+\FORALL{objects $o$ in $b$}
+ \STATE destruct $o$
+\ENDFOR
+\STATE enqueue $b.head, |b|$ in ``to-delete'' list $D$
+\COMMENT ``to-delete'' list must be in order of logical dequeue
+\WHILE{$D.head = Q.deletePtr$}
+ \FOR{$|b|$ elements}
+ \STATE delete element at queue head
+ \STATE move $q.deletePtr$
+ \ENDFOR
+ \STATE remove head of ``to-delete'' list
+\ENDWHILE
+\STATE commit queue changes to storage system (if needed, e.g. fsync())
+\IF{queue not yet locked (test via $l$)}
+ \STATE unlock queue mutex
+\ENDIF
+\end{algorithmic}
+\end{algorithm}
+
+\subsubsection{Queue Stores}
+Currently, rsyslog supports three different types of queue store drivers:
+
+\begin{itemize}
+\item memory array
+\item memory linked list
+\item disk sequential file
+\end{itemize}
+
+They all provide an abstracted sequential queue store as shown in figure \ref{fig_queue_ptr} on page \pageref{fig_queue_ptr}.
+
+Obviously, some differences exist. Most importantly, the disk sequential file driver does \emph{not} support more than one queue worker thread (in order to prevent excessive disk activity and the subtle issues with rewriting parts of sequential files). So if this driver is used, the queue automatically limits itself to a maximum of one worker thread (even if user configuration settings
+
+Different queue store drivers have different properties:
+
+\begin{tabular}{|l||l|l|l|}\hline
+ & array & linked list & seqential file \\ \hline
+pointer type & integer index & memory address & file number and \\
+ & & & offset within file \\ \hline
+physical access & random & random & sequential \\ \hline
+remove middle & no & yes & no \\
+elements & & & \\ \hline
+access to $n$-th& $O(1)$, index:& $O(n)$, follow & not supported \\
+element & $n \mod C_{mMsg}$ & pointer links & \\ \hline
+speed & fastest & fast & slow \\\hline
+mem overhead & large & some & almost none \\\hline
+reliability & reliable & reliable & audit-grade\footnote{if configured correctly}\\
+\hline
+\end{tabular}
+
+\subsubsection{Implementation}
+The actual implementation will be based on algorithms \ref{alg_deq_batch_final} and \ref{alg_del_batch_final}. The rsyslog v3 queue storage driver will be extended one additional method, which permits non-destructive dequeueing of elements. As such, the driver now has the $qAdd()$, $qDeq()$, and $qDel()$ entry points (together with the usual construction and destruction entry points). The queue drivers must support the three pointers for enqueue, dequeue and delete. The ``to-delete'' list will be maintained on the upper queue layer (and not the queue driver layer). This functionality will be optimized so that if a batch to delete is right at the queue's delete pointer, it will immediatly be deleted and not be sent to the ``to-delete'' list. This is especially important with the sequential disk driver, as the condition here always is true (and thus the driver can pretend this in the relevant API without even comparing any pointers -- what would otherwise quite complicated in this driver.
+
+The full list of the queue store driver interface is:
+
+\paragraph{qConstruct} Initializes the queue store.
+
+\paragraph{qDestruct} Destructs the queue store, including all messages that may still be present in it.
+
+\paragraph{qAdd} Enqueue a new object into the queue. Note that this entry point must only be called when the queue is non-full.
+
+\paragraph{qDeq} Non-destructive dequeue of the object at queue head. Dequeue pointer is advanced.
+
+\paragraph{qDel} Delete the object at queue head. Delete pointer is advanced.
+
+Disk queue store drivers may support additional internal functions. However, they should not be exposed to the rest of the queue subsystem.
+
+\begin{figure}
+\begin{center}
+\includegraphics[scale=0.4]{queue_msg_state.jpeg}
+\end{center}
+\caption{Logical Message States during Queue Processing}
+\label{fig_queue_msg_state}
+\end{figure}
+
+Figure \ref{fig_queue_msg_state} shows a logical message state diagram during queue processing. There is no actual state variable, but rather the processing flow demands these state. Note that the state transition from ``dequeued'' to ``queued'' only happens after a fatal failure and a successful system recovery. So this is a rather exceptional case.
+
+Another subtle issue is that we now need two different queue size counters: one for seeing when the queue is physically full and one for detecting when there are no more messages to be dequeued.
+
+As a simplification, support for ungetting objects can be removed (as objects never leave the queue), what also means that cancel-processing is probably less complex.
+
+\paragraph{Sequential Disk Queue Store Driver}
+The enequeue, deqeueue and delete pointers must be implemented via three stream objects. Most importantly, the dequeue stream must be configured not to delete files when it closes them. A side-effect of this implementation is that data is actually read twice, once to actually obtain it and a second time to delete it. This could only be avoided by an overall redesign on how the disk queue works.
+
+\subsubsection{Checkmarks}
+The following things need to be verified in the actual implementation.
+
+\paragraph{Queue Full}
+Is it possible to set an infinte timeout on queue full condition during enqueue? If not, we must provide it.
+
+\paragraph{Termination the Queue}
+If we cancel a worker, we need to start from the physical dequeue pointer and pull everything that is not scheduled for deletion - NOT from the logical dequeue pointer.
+
+\paragraph{Failed Messages}
+If a message fails on a detached action queue, no backup processing is available (because we detect the failure at a point where the message is already considered processed from the main queue's point of view. We need address this and have two options:
+
+
+I see two approaches at handling this:
+
+a) we enable an action to configure a backup file that shall receive all
+message permanent failures. This is simple (not only to implement but to
+configure and understand)
+
+b) we push the failed message back to the main queue, but with an indication
+that it failed in an action. This is harder to implement and most importantly
+harder to understand/configure, but more flexible
+
+\section{Network Stream Subsystem}
+The idea of network streams was introduced when we implemented RFC5425 (syslog over TLS) in 2008. The core idea is to encapsulate all stream-oriented network data transfer into a single transport layer and make the upper layers independent of actual transport being used. This is in line with the traditional layer approaches in communication systems.
+
+Under this system, the upper layer provides plugins to send and receive streams of syslog data. Framing is provided by the upper layer. The upper layer itself is integrated in input and output plugins, which then are used to provide application-level syslog message objects to and from the rsyslog core. To these upper layers, the netstream layer provides reliable and sequenced message delivery with much of the same semantics as a usual TCP stream.
+
+\begin{figure}
+\begin{center}
+\includegraphics[scale=0.4]{tls.jpeg}
+\end{center}
+\caption{Objects at the Network Stream Layer}
+\label{fig_netstream_objects}
+\end{figure}
+
+At the netstream layer, we have a small set of generic classes, which are used for setup of the drivers and driver parameters. This is a very thin layer, mostly a wrapper. Once an actual lower-level netstream driver has been loaded, all parameters are passed through to it.
+
+Please note that both in theory and practice netstream drivers may call back into different netstream drivers. For example, the GnuTLS RFC5425 driver loads and calls back into the plain tcp driver, simply because that driver provides part of the required functionality and there is no point in re-implementing it for GnuTLS.
+
+The netstream driver layer does not only provide read and write calls but supports i/o multiplexing. To do so, it offers an interface that follows select() semantics. That permits an upper-layer comonent to request being blocked unless some data arrives. Note that due to the subleties in TLS processing, the upper layer may be awoken while there is no upper-layer work to do. This will properly be indicated by the netstream subsystem, is not an error and must be accepted and poperly handled by the upper layer.
+
+Using the nestream layer, we do not need to modify the input and output plugins while at the same time we can add additional transport providers. One weak spot in this design is the current configuration process. With the current system, we need to provide one configuration statement per driver property and we need to hardcode this. So if a new driver would require new properties, we still would need to modify the upper layers. This is unfortunate, but the current config system does not provide for any better way to handle the situation. Once we are able to create a new config system, we will address this by providing the ability to pass a string of parameters onto the driver, which will then have the ability to parse its content. So once we do this, we need to modify the driver interface, but the end result would be a simlification.
+
+So far, only drivers for GnuTLS and plain tcp are provided. However, during the design of the layer we also looked at openssl and Mozilla Network Security Services as well as kept an eye on the needs of Kerberos. In theory, it should not be a major problem to write drivers for these systems (but it most probably still is a lot of work to do).
+
+A final note on Kerberos: in order to keep compatible with previous protocol handling and due to constraints in testing environment and knowledge, we still support Kerberos not via the netstream layer but via special extension into the input and output modules. That, too, is unfortunate, but given the current resources at hand, there is no alternative to handling in that way. We would be very interested in moving over Kerberos to a netstream driver and any volunteer would be very welcome.
+
+\section{Future Development}
+This section covers topics that can not currently be developed, but where important thoughts came up in discussions. For obvious reasons, the section has brainstorming character.
+
+\subsection{Lock-Free Queuing}
+On a very busy system, lock contention can limit performance. We should investigate ways to apply lock-free algorithms inside rsyslog. It is believed that at least for some scenarios, lock-free algorigthms can be applied with great benefit. To do so, we should introduce new queue modes, which will use very different semantics from what is described so far for the queue engine. Most importantly, in lock-free mode we will have limits on the number of producers and we will most probably not be able to guarantee audit-grade processing. The later is not a problem, because there are ample use cases that do not require audit-gradeness.
+
+\subsection{Audit-Grade High Performance Queue Storage Driver}
+An audit grade driver must ensure that no message is lost, but should also be able to handle large workloads. The sequential disk driver does not support the later.
+
+An additional disk driver is envisioned with the properties like the linked list driver, but a reliable on-disk store. In particular, random access to queue elements is desired, which requires an addressing capability.
+
+A potential implementation requires a pre-formatted file. That file is organized in pages of $n$ bytes (e.g. 1K). The page index is used to address a queue item. If an item fits into 1K, it uses one page. If it is larger than 1K, consequtive pages are used to store the element. A page header must be present to indicate how many pages a single element is made up of.
+
+It may be noted that we could even improve performance by keeping part of the data in-memory. For audit-gradeness, it is required that upon enqueue the message is written to disk and only after final processing it needs to be removed. However, it is not forbidden to keep the same message in main memory. That way, the logical dequeue operation could be done one the in-memory representation. Only the physical dequeue would need to write to disk again. As such, we save one disk read out of three writes and one read otherwise required (so one can roughly say that we save one third of disk operations.
+
+Note that due to potential multi-pages messages we can not directly address individual elements, but we can reliably and quikly address elements whom's address we know (learned, for example, during logical dequeue). This is similar to the organization of the in-memory linked list. Actally, such a store \emph{is} a linked list implementation, just that memory is allocated on disk instead of in main memory.
+
+To further improve speed, object representation could be zipped before being written to a page.
+
+File Layout
+Page 0: control structures (most importantyle queue pointers) (can make sense to store in a separate file, which could be moved to a dedicated disk subsystem - can potentially greatly reduce disk seek times).
+Page 1 to n: actual object storage
+
+Algorithms \ref{alg_AuditGradeStoreEnqueue} and \ref{alg_AuditGradeStoreDelete} show how records are enqueued and deleted. Note that the delete part does not even need to read back the record. If we keep at last some records in-memory, the performance cost of ultra-reliable mode can actually comparatively low. Note that we may not even really need to commit data to the storage system in ``AuditGradeStoreDelete()'', because if a fatal failure occurs at this point, at worst message duplication may happen, what we have considered to be acceptable.
+
+\begin{algorithm}
+\caption{AuditGradeStoreEnqueue($o$)}
+\begin{algorithmic}
+\label{alg_AuditGradeStoreEnqueue}
+\REQUIRE queue mutex is locked by caller
+\STATE write $o$ to current enqueue location
+\STATE update \& write queue structures [page 0]
+\STATE sync all files touched
+\STATE store $o$ in an in-memory structure (or a cache)
+\end{algorithmic}
+\end{algorithm}
+
+\begin{algorithm}
+\caption{AuditGradeStoreDelete($o$)}
+\begin{algorithmic}
+\label{alg_AuditGradeStoreDelete}
+\REQUIRE queue mutex is locked by caller
+\STATE update queue dequeue pointer \& write queue structures [page 0]
+\STATE sync all files touched
+\end{algorithmic}
+\end{algorithm}
+
+
+\end{document}
diff --git a/doc/dev_oplugins.html b/doc/dev_oplugins.html
index cc2f7f38..63c186a3 100644
--- a/doc/dev_oplugins.html
+++ b/doc/dev_oplugins.html
@@ -144,19 +144,172 @@ array-passing capability not blindly be used.</b> In such cases, we can not guar
plugin from segfaulting and if the plugin (as currently always) is run within
rsyslog's process space, that results in a segfault for rsyslog. So do not do this.
<h3>Batching of Messages</h3>
-<p>With the current plugin interface, each message is passed via a separate call to the plugin.
-This is annoying and costs performance in some uses cases (primarily for database outputs).
-However, that's the way it (currently) is, no easy way around it. There are some ideas
-to implement batching capabilities inside the rsyslog core, but without that the only
-resort is to do it inside your plugin yourself. You are not prohibited from doing so.
-There are some consequences, though: most importantly, the rsyslog core is no longer
-intersted in messages that it passed to a plugin. As such, it will not try to make sure
-the message is not lost before it was ultimately processed (because rsyslog, due to
-doAction() returning successfully, thinks the message *was* ultimately processed).
-<p>When the rsyslog core receives batching capabilities, this will be implemented in
-a way that is fully compatible to the existing plugin interface. While we have not yet
-thought about the implementation, that will probably mean that some new interfaces
-or options be used to turn on batching capabilities.
+<p>Starting with rsyslog 4.3.x, batching of output messages is supported. Previously, only
+a single-message interface was supported.
+<p>With the <b>single message</b> plugin interface, each message is passed via a separate call to the plugin.
+Most importantly, the rsyslog engine assumes that each call to the plugin is a complete transaction
+and as such assumes that messages be properly commited after the plugin returns to the engine.
+<p>With the <b>batching</b> interface, rsyslog employs something along the line of
+&quot;transactions&quot;. Obviously, the rsyslog core can not make non-transactional outputs
+to be fully transactional. But what it can is support that the output tells the core which
+messages have been commited by the output and which not yet. The core can than take care
+of those uncommited messages when problems occur. For example, if a plugin has received
+50 messages but not yet told the core that it commited them, and then returns an error state, the
+core assumes that all these 50 messages were <b>not</b> written to the output. The core then
+requeues all 50 messages and does the usual retry processing. Once the output plugin tells the
+core that it is ready again to accept messages, the rsyslog core will provide it with these 50
+not yet commited messages again (actually, at this point, the rsyslog core no longer knows that
+it is re-submiting the messages). If, in contrary, the plugin had told rsyslog that 40 of these 50
+messages were commited (before it failed), then only 10 would have been requeued and resubmitted.
+<p>In order to provide an efficient implementation, there are some (mild) constraints in that
+transactional model: first of all, rsyslog itself specifies the ultimate transaction boundaries.
+That is, it tells the plugin when a transaction begins and when it must finish. The plugin
+is free to commit messages in between, but it <b>must</b> commit all work done when the core
+tells it that the transaction ends. All messages passed in between a begin and end transaction
+notification are called a batch of messages. They are passed in one by one, just as without
+transaction support. Note that batch sizes are variable within the range of 1 to a user configured
+maximum limit. Most importantly, that means that plugins may receive batches of single messages,
+so they are required to commit each message individually. If the plugin tries to be &quot;smarter&quot;
+than the rsyslog engine and does not commit messages in those cases (for example), the plugin
+puts message stream integrity at risk: once rsyslog has notified the plugin of transacton end,
+it discards all messages as it considers them committed and save. If now something goes wrong,
+the rsyslog core does not try to recover lost messages (and keep in mind that &quot;goes wrong&quot;
+includes such uncontrollable things like connection loss to a database server). So it is
+highly recommended to fully abide to the plugin interface details, even though you may
+think you can do it better. The second reason for that is that the core engine will
+have configuration settings that enable the user to tune commit rate to their use-case
+specific needs. And, as a relief: why would rsyslog ever decide to use batches of one?
+There is a trivial case and that is when we have very low activity so that no queue of
+messages builds up, in which case it makes sense to commit work as it arrives.
+(As a side-note, there are some valid cases where a timeout-based commit feature makes sense.
+This is also under evaluation and, once decided, the core will offer an interface plus a way
+to preserve message stream integrity for properly-crafted plugins).
+<p>The second restriction is that if a plugin makes commits in between (what is perfectly
+legal) those commits must be in-order. So if a commit is made for message ten out of 50,
+this means that messages one to nine are also commited. It would be possible to remove
+this restriction, but we have decided to deliberately introduce it to simpify things.
+<h3>Output Plugin Transaction Interface</h3>
+<p>In order to keep compatible with existing output plugins (and because it introduces
+no complexity), the transactional plugin interface is build on the traditional
+non-transactional one. Well... actually the traditional interface was transactional
+since its introduction, in the sense that each message was processed in its own
+transaction.
+<p>So the current <code>doAction()</b> entry point can be considered to have this
+structure (from the transactional interface point of view):
+<p><pre><code>
+doAction()
+ {
+ beginTransaction()
+ ProcessMessage()
+ endTransaction()
+ }
+ </code></pre>
+<p>For the <b>transactional interface</b>, we now move these implicit <code>beginTransaction()</code>
+and <code>endTransaction(()</code> call out of the message processing body, resulting is such
+a structure:
+<p><pre><code>
+beginTransaction()
+ {
+ /* prepare for transaction */
+ }
+
+doAction()
+ {
+ ProcessMessage()
+ /* maybe do partial commits */
+ }
+
+endTransaction()
+ {
+ /* commit (rest of) batch */
+ }
+</code></pre>
+<p>And this calling structure actually is the transactional interface! It is as simple as this.
+For the new interface, the core calls a <code>beginTransaction()</code> entry point inside the
+plugin at the start of the batch. Similarly, the core call <code>endTransaction()</code> at the
+end of the batch. The plugin must implement these entry points according to its needs.
+<p>But how does the core know when to use the old or the new calling interface? This is rather
+easy: when loading a plugin, the core queries the plugin for the <code>beginTransaction()</code>
+and <code>endTransaction()</code> entry points. If the plugin supports these, the new interface is
+used. If the plugin does not support them, the old interface is used and rsyslog implies that
+a commit is done after each message. Note that there is no special "downlevel" handling
+necessary to support this. In the case of the non-transactional interface, rsyslog considers
+each completed call to <code>doAction</code> as partial commit up to the current message.
+So implementation inside the core is very straightforward.
+<p>Actually, <b>we recommend that the transactional entry points only be defined by those
+plugins that actually need them</b>. All others should not define them in which case
+the default commit behaviour inside rsyslog will apply (thus removing complexity from the
+plugin).
+<p>In order to support partial commits, special return codes must be defined for
+<code>doAction</code>. All those return codes mean that processing completed successfully.
+But they convey additional information about the commit status as follows:
+<p>
+<table border="0">
+<tr>
+<td valign="top"><i>RS_RET_OK</i></td>
+<td>The record and all previous inside the batch has been commited.
+<i>Note:</i> this definition is what makes integrating plugins without the
+transaction being/end calls so easy - this is the traditional "success" return
+state and if every call returns it, there is no need for actually calling
+<code>endTransaction()</code>, because there is no transaction open).</td>
+</tr>
+<tr>
+<td valign="top"><i>RS_RET_DEFER_COMMIT</i></td>
+<td>The record has been processed, but is not yet commited. This is the
+expected state for transactional-aware plugins.</td>
+</tr>
+<tr>
+<td valign="top"><i>RS_RET_PREVIOUS_COMMITTED</i></td>
+<td>The <b>previous</b> record inside the batch has been committed, but the
+current one not yet. This state is introduced to support sources that fill up
+buffers and commit once a buffer is completely filled. That may occur halfway
+in the next record, so it may be important to be able to tell the
+engine the everything up to the previouos record is commited</td>
+</tr>
+</table>
+<p>Note that the typical <b>calling cycle</b> is <code>beginTransaction()</code>,
+followed by <i>n</i> times
+<code>doAction()</code></n> followed by <code>endTransaction()</code>. However, if either
+<code>beginTransaction()</code> or <code>doAction()</code> return back an error state
+(including RS_RET_SUSPENDED), then the transaction is considered aborted. In result, the
+remaining calls in this cycle (e.g. <code>endTransaction()</code>) are never made and a
+new cycle (starting with <code>beginTransaction()</code> is begun when processing resumes.
+So an output plugin must expect and handle those partial cycles gracefully.
+<p><b>The question remains how can a plugin know if the core supports batching?</b>
+First of all, even if the engine would not know it, the plugin would return with RS_RET_DEFER_COMMIT,
+what then would be treated as an error by the engine. This would effectively disable the
+output, but cause no further harm (but may be harm enough in itself).
+<p>The real solution is to enable the plugin to query the rsyslog core if this feature is
+supported or not. At the time of the introduction of batching, no such query-interface
+exists. So we introduce it with that release. What the means is if a rsyslog core can
+not provide this query interface, it is a core that was build before batching support
+was available. So the absence of a query interface indicates that the transactional
+interface is not available. One might now be tempted the think there is no need to do
+the actual check, but is is recommended to ask the rsyslog engine explicitely if
+the transactional interface is present and will be honored. This enables us to
+create versions in the future which have, for whatever reason we do not yet know, no
+support for this interface.
+<p>The logic to do these checks is contained in the <code>INITChkCoreFeature</code> macro,
+which can be used as follows:
+<p><pre><code>
+INITChkCoreFeature(bCoreSupportsBatching, CORE_FEATURE_BATCHING);
+</code></pre>
+<p>Here, bCoreSupportsBatching is a plugin-defined integer which after execution is
+1 if batches (and thus the transational interface) is supported and 0 otherwise.
+CORE_FEATURE_BATCHING is the feature we are interested in. Future versions of rsyslog
+may contain additional feature-test-macros (you can see all of them in
+./runtime/rsyslog.h).
+<p>Note that the ompsql output plugin supports transactional mode in a hybrid way and
+thus can be considered good example code.
+
+<h2>Open Issues</h2>
+<ul>
+<li>Processing errors handling
+<li>reliable re-queue during error handling and queue termination
+</ul>
+
+
+
<h3>Licensing</h3>
<p>From the rsyslog point of view, plugins constitute separate projects. As such,
we think plugins are not required to be compatible with GPLv3. However, this is
diff --git a/doc/highperf.txt b/doc/highperf.txt
new file mode 100644
index 00000000..5f9481e1
--- /dev/null
+++ b/doc/highperf.txt
@@ -0,0 +1,4 @@
+links to high performance papers:
+
+- http://www.kegel.com/c10k.html
+- http://pl.atyp.us/content/tech/servers.html (**)
diff --git a/doc/imfile.html b/doc/imfile.html
index eb1a72a9..7961729b 100644
--- a/doc/imfile.html
+++ b/doc/imfile.html
@@ -88,8 +88,8 @@ level may be needed. Even if you need quick response, 1 seconds should
be well enough. Please note that imfile keeps reading files as long as
there is any data in them. So a "polling sleep" will only happen when
nothing is left to be processed.</li>
-<li><b>$InputFilePersistStateInterval </b> [lines]</b><br>
-Available in 4.7.3+<br>
+<li><b>$InputFilePersistStateInterval</b> [lines]</b><br>
+Available in 4.7.3+, 5.6.2+<br>
Specifies how often the state file shall be written when processing the input
file. The default value is 0, which means a new state file is only written when
the monitored files is being closed (end of rsyslogd execution). Any other
@@ -98,6 +98,21 @@ been processed. This setting can be used to guard against message duplication du
to fatal errors (like power fail). Note that this setting affects imfile
performance, especially when set to a low value. Frequently writing the state
file is very time consuming.
+<li><b>$InputFileReadMode</b> [mode]</b><br>
+Available in 5.7.5+
+<li><b>$InputFileMaxLinesAtOnce</b> [number]</b><br>
+Available in 5.9.0+
+<br>
+This is useful if multiple files need to be monitored. If set to 0, each file
+will be fully processed and then processing switches to the next file
+(this was the default in previous versions). If it is set, a maximum of
+[number] lines is processed in sequence for each file, and then the file is
+switched. This provides a kind of mutiplexing the load of multiple files and
+probably leads to a more natural distribution of events when multiple busy files
+are monitored. The default is 10240.
+<li>$InputFileBindRuleset &lt;ruleset&gt;<br>
+Available in 5.7.5+, 6.1.5+
+Binds the listener to a specific <a href="multi_ruleset.html">ruleset</a>.</li>
</ul>
<b>Caveats/Known Bugs:</b>
<p>So far, only 100 files can be monitored. If more are needed,
diff --git a/doc/impstats.html b/doc/impstats.html
new file mode 100644
index 00000000..260c1aa4
--- /dev/null
+++ b/doc/impstats.html
@@ -0,0 +1,64 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html><head>
+<title>Periodic Statistics of Internal Counters (impstats)</title>
+</head>
+<body>
+<a href="rsyslog_conf_modules.html">back</a>
+
+<h1>Input Module to Generate Periodic Statistics of Internal Counters</h1>
+<p><b>Module Name:&nbsp;&nbsp;&nbsp; impstats</b></p>
+<p><b>Available since: </b>5.7.0+, 6.1.1+
+<p><b>Author: </b>Rainer Gerhards &lt;rgerhards@adiscon.com&gt;</p>
+<p><b>Description</b>:</p>
+<p>This module provides periodic output of rsyslog internal counters.
+Note that the whole statistics system is currently under development. So
+availabilty and format of counters may change and is not yet stable (so be
+prepared to change your trending scripts when you upgrade to a newer rsyslog version).
+<p>The set of available counters will be output as a set of syslog messages. This
+output is periodic, with the interval being configurable (default is 5 minutes).
+Be sure that your configuration records the counter messages (default is syslog.info).
+<p>Note that loading this module has impact on rsyslog performance. Depending on
+settings, this impact may be noticable (for high-load environments).
+<p>The rsyslog website has an updated overview of available
+<a href="http://rsyslog.com/rsyslog-statistic-counter/">rsyslog statistic counters</a>.
+</p>
+<p><b>Configuration Directives</b>:</p>
+<ul>
+<li>$PStatInterval &lt;Seconds&gt;<br>
+Sets the interval, in <b>seconds</b> at which messages are generated. Please note that the
+actual interval may be a bit longer. We do not try to be precise and so the interval is
+actually a sleep period which is entered after generating all messages. So the actual
+interval is what is configured here plus the actual time required to generate messages.
+In general, the difference should not really matter.
+<li>$PStatFacility &lt;numerical facility&gt;<br>
+The numerical syslog facility code to be used for generated messages. Default
+is 5 (syslog).This is useful for filtering messages.</li>
+<li>$PStatSeverity &lt;numerical severity&gt;<br>
+The numerical syslog severity code to be used for generated messages. Default
+is 6 (info).This is useful for filtering messages.</li>
+</ul>
+<b>Caveats/Known Bugs:</b>
+<ul>
+<li>This module MUST be loaded right at the top of rsyslog.conf, otherwise
+stats may not get turned on in all places.</li>
+<li>experimental code</li>
+</ul>
+<p><b>Sample:</b></p>
+<p>This activates the module and records messages to /var/log/rsyslog-stats in 10 minute intervals:<br>
+</p>
+<textarea rows="8" cols="60">$ModLoad impstats
+$PStatInterval 600
+$PStatSeverity 7
+
+syslog.debug /var/log/rsyslog-stats
+</textarea>
+<p>[<a href="rsyslog_conf.html">rsyslog.conf overview</a>]
+[<a href="manual.html">manual index</a>] [<a href="http://www.rsyslog.com/">rsyslog site</a>]</p>
+<p><font size="2">This documentation is part of the
+<a href="http://www.rsyslog.com/">rsyslog</a>
+project.<br>
+Copyright &copy; 2010 by <a href="http://www.gerhards.net/rainer">Rainer
+Gerhards</a> and
+<a href="http://www.adiscon.com/">Adiscon</a>.
+Released under the GNU GPL version 3 or higher.</font></p>
+</body></html>
diff --git a/doc/imptcp.html b/doc/imptcp.html
index d4228185..386e691a 100644
--- a/doc/imptcp.html
+++ b/doc/imptcp.html
@@ -45,7 +45,25 @@ can be found at the <a href="http://www.rsyslog.com/Article321.phtml">Cisco tcp
page.
<li>$InputPTCPServerNotifyOnConnectionClose [on/<b>off</b>]<br>
instructs imptcp to emit a message if the remote peer closes a connection.<br>
-<li>$InputPTCPServerRun &lt;port&gt;<br>
+<li><b>$InputPTCPServerKeepAlive</b> &lt;on/<b>off</b>&gt;<br>
+enable of disable keep-alive packets at the tcp socket layer. The default is
+to disable them.</li>
+<li><b>$InputPTCPServerKeepAlive_probes</b> &lt;number&gt;<br>
+The number of unacknowledged probes to send before considering the connection dead and notifying the application layer.
+The default, 0, means that the operating system defaults are used. This has only
+effect if keep-alive is enabled. The functionality may not be available on
+all platforms.
+<li><b>$InputPTCPServerKeepAlive_intvl</b> &lt;number&gt;<br>
+The interval between subsequential keepalive probes, regardless of what the connection has exchanged in the meantime.
+The default, 0, means that the operating system defaults are used. This has only
+effect if keep-alive is enabled. The functionality may not be available on
+all platforms.
+<li><b>$InputPTCPServerKeepAlive_time</b> &lt;number&gt;<br>
+The interval between the last data packet sent (simple ACKs are not considered data) and the first keepalive probe; after the connection is marked to need keepalive, this counter is not used any further.
+The default, 0, means that the operating system defaults are used. This has only
+effect if keep-alive is enabled. The functionality may not be available on
+all platforms.
+<li><b>$InputPTCPServerRun</b> &lt;port&gt;<br>
Starts a TCP server on selected port</li>
<li>$InputPTCPServerInputName &lt;name&gt;<br>
Sets a name for the inputname property. If no name is set "imptcp" is used by default. Setting a
diff --git a/doc/imtcp.html b/doc/imtcp.html
index 0ccdecc7..7653f601 100644
--- a/doc/imtcp.html
+++ b/doc/imtcp.html
@@ -1,27 +1,29 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-<html><head>
-<meta http-equiv="Content-Language" content="en"><title>TCP Syslog Input Module</title></head>
+<html>
+<head>
+<meta http-equiv="Content-Language" content="en">
+<title>TCP Syslog Input Module</title>
+</head>
+
<body>
-<a href="rsyslog_conf_modules.html">back</a>
+<a href="rsyslog_conf_modules.html">back to rsyslog module overview</a>
<h1>TCP Syslog Input Module</h1>
<p><b>Module Name:&nbsp;&nbsp;&nbsp; imtcp</b></p>
-<p><b>Author: </b>Rainer Gerhards
-&lt;rgerhards@adiscon.com&gt;</p>
+<p><b>Author: </b>Rainer Gerhards &lt;rgerhards@adiscon.com&gt;</p>
+<p><b>Multi-Ruleset Support: </b>since 4.5.0 and 5.1.1
<p><b>Description</b>:</p>
<p>Provides the ability to receive syslog messages via TCP.
-Encryption can be provided by using <a href="rsyslog_stunnel.html">stunnel</a>
-(an alternative is the use
-the&nbsp;<a href="imgssapi.html">imgssapi</a>
-modul).</p>
-<p>Multiple receivers may be configured by
-specifying
+Encryption is natively provided by selecting the approprioate network stream driver and
+can also be provided by using <a href="rsyslog_stunnel.html">stunnel</a>
+(an alternative is the use the <a href="imgssapi.html">imgssapi</a> module).</p>
+<p>Multiple receivers may be configured by specifying
$InputTCPServerRun multiple times. This is available since version 4.3.1, earlier
versions do NOT support it.
</p>
<p><b>Configuration Directives</b>:</p>
<ul>
-<li>$InputTCPServerAddtlFrameDelimiter &lt;Delimiter&gt;<br>
+<li><b>$InputTCPServerAddtlFrameDelimiter &lt;Delimiter&gt;</b><br>
This directive permits to specify an additional frame delimiter for plain tcp syslog.
The industry-standard specifies using the LF character as frame delimiter. Some vendors,
notable Juniper in their NetScreen products, use an invalid frame delimiter, in Juniper's
@@ -41,28 +43,49 @@ very limited interest in fixing this issue. This directive <b>can not</b> fix th
That would require much more code changes, which I was unable to do so far. Full details
can be found at the <a href="http://www.rsyslog.com/Article321.phtml">Cisco tcp syslog anomaly</a>
page.
-<li>$InputTCPServerNotifyOnConnectionClose [on/<b>off</b>] (available since 4.5.5)<br>
+<li><b>$InputTCPServerDisableLFDelimiter</b> &lt;on/<b>off</b>&gt; (available since 5.5.3)<br>
+Industry-strandard plain text tcp syslog uses the LF to delimit syslog frames. However,
+some users brought up the case that it may be useful to define a different delimiter and
+totally disable LF as a delimiter (the use case named were multi-line messages). This mode
+is non-standard and will probably come with a lot of problems. However, as there is need
+for it and it is relatively easy to support, we do so. Be sure to turn this setting to
+"on" only if you exactly know what you are doing. You may run into all sorts of troubles,
+so be prepared to wrangle with that!
+<li><b>$InputTCPServerNotifyOnConnectionClose</b> [on/<b>off</b>] (available since 4.5.5)<br>
instructs imtcp to emit a message if the remote peer closes a connection.<br>
<b>Important:</b> This directive is global to all listeners and must be given right
after loading imtcp, otherwise it may have no effect.</li>
-<li>$InputTCPServerRun &lt;port&gt;<br>
+<li><b>$InputTCPServerKeepAlive</b> &lt;on/<b>off</b>&gt;<br>
+enable of disable keep-alive packets at the tcp socket layer. The default is
+to disable them.</li>
+<li><b>$InputTCPServerRun</b> &lt;port&gt;<br>
Starts a TCP server on selected port</li>
-<li>$InputTCPMaxListeners &lt;number&gt;<br>
+<li><b>$InputTCPFlowControl</b> &lt;<b>on</b>/off&gt;<br>
+This setting specifies whether some message flow control shall be exercised on the
+related TCP input. If set to on, messages are handled as "light delayable", which means
+the sender is throttled a bit when the queue becomes near-full. This is done in order
+to preserve some queue space for inputs that can not throttle (like UDP), but it
+may have some undesired effect in some configurations. Still, we consider this as
+a useful setting and thus it is the default. To turn the handling off, simply
+configure that explicitely.
+</li>
+<li><b>$InputTCPMaxListeners</b> &lt;number&gt;<br>
Sets the maximum number of listeners (server ports) supported. Default is 20. This must be set before the first $InputTCPServerRun directive.</li>
-<li>$InputTCPMaxSessions &lt;number&gt;<br>
-Sets the maximum number of sessions supported. Default is 200. This must be set before the first $InputTCPServerRun directive</li>
-<li>$InputTCPServerStreamDriverMode &lt;number&gt;<br>
+<li><b>$InputTCPMaxSessions</b> &lt;number&gt;<br> Sets the maximum number of sessions supported. Default is 200. This must be set before the first $InputTCPServerRun directive</li>
+<li><b>$InputTCPServerStreamDriverMode</b> &lt;number&gt;<br>
Sets the driver mode for the currently selected <a href="netstream.html">network stream driver</a>. &lt;number&gt; is driver specifc.</li>
-<li>$InputTCPServerInputName &lt;name&gt;<br>
+<li><b>$InputTCPServerInputName</b> &lt;name&gt;<br>
Sets a name for the inputname property. If no name is set "imtcp" is used by default. Setting a
name is not strictly necessary, but can be useful to apply filtering based on which input
the message was received from.
-<li>$InputTCPServerStreamDriverAuthMode &lt;mode-string&gt;<br>
+<li><b>$InputTCPServerStreamDriverAuthMode</b> &lt;mode-string&gt;<br>
Sets the authentication mode for the currently selected <a href="netstream.html">network stream driver</a>. &lt;mode-string&gt; is driver specifc.</li>
-<li>$InputTCPServerStreamDriverPermittedPeer &lt;id-string&gt;<br>
+<li><b>$InputTCPServerStreamDriverPermittedPeer</b> &lt;id-string&gt;<br>
Sets permitted peer IDs. Only these peers are able to connect to the
listener. &lt;id-string&gt; semantics depend on the currently selected
AuthMode and&nbsp; <a href="netstream.html">network stream driver</a>. PermittedPeers may not be set in anonymous modes.</li>
+<li><b>$InputTCPServerBindRuleset</b> &lt;ruleset&gt;<br>
+Binds the listener to a specific <a href="multi_ruleset.html">ruleset</a>.</li>
</ul>
<b>Caveats/Known Bugs:</b>
<ul>
@@ -70,20 +93,22 @@ AuthMode and&nbsp; <a href="netstream.html">network stream driver</a>. Permitted
<li>can not be loaded together with <a href="imgssapi.html">imgssapi</a>
(which includes the functionality of imtcp)</li>
</ul>
-<p><b>Sample:</b></p>
-<p>This sets up a TCP server on port 514:<br>
+<p><b>Example:</b></p>
+<p>This sets up a TCP server on port 514 and permits it to accept up to 500 connections:<br>
</p>
-<textarea rows="15" cols="60">$ModLoad imtcp #
-needs to be done just once
+<textarea rows="15" cols="60">$ModLoad imtcp # needs to be done just once
+$InputTCPMaxSessions 500
$InputTCPServerRun 514
</textarea>
+<p>Note that the parameters (here: max sessions) need to be set <b>before</b> the listener
+is activated. Otherwise, the parameters will not apply.
+</p>
<p>[<a href="rsyslog_conf.html">rsyslog.conf overview</a>]
[<a href="manual.html">manual index</a>] [<a href="http://www.rsyslog.com/">rsyslog site</a>]</p>
-<p><font size="2">This documentation is part of the
-<a href="http://www.rsyslog.com/">rsyslog</a>
+<p><font size="2">This documentation is part of the <a href="http://www.rsyslog.com/">rsyslog</a>
project.<br>
-Copyright © 2008 by <a href="http://www.gerhards.net/rainer">Rainer
-Gerhards</a> and
+Copyright &copy; 2008,2009 by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a> and
<a href="http://www.adiscon.com/">Adiscon</a>.
Released under the GNU GPL version 3 or higher.</font></p>
-</body></html>
+</body>
+</html>
diff --git a/doc/imudp.html b/doc/imudp.html
new file mode 100644
index 00000000..f0e86307
--- /dev/null
+++ b/doc/imudp.html
@@ -0,0 +1,58 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+<meta http-equiv="Content-Language" content="en">
+<title>TCP Syslog Input Module</title>
+</head>
+
+<body>
+<a href="rsyslog_conf_modules.html">back to rsyslog module overview</a>
+
+<h1>UDP Syslog Input Module</h1>
+<p><b>Module Name:&nbsp;&nbsp;&nbsp; imudp</b></p>
+<p><b>Author: </b>Rainer Gerhards &lt;rgerhards@adiscon.com&gt;</p>
+<p><b>Multi-Ruleset Support: </b>since 5.3.2
+<p><b>Description</b>:</p>
+<p>Provides the ability to receive syslog messages via UDP.
+<p>Multiple receivers may be configured by specifying
+$UDPServerRun multiple times.
+</p>
+<p><b>Configuration Directives</b>:</p>
+<ul>
+<li>$UDPServerAddress &lt;IP&gt;<br>
+local IP address (or name) the UDP listens should bind to</li>
+<li>$UDPServerRun &lt;port&gt;<br>
+former -r&lt;port&gt; option, default 514, start UDP server on this
+port, "*" means all addresses</li>
+<li>$UDPServerTimeRequery &lt;nbr-of-times&gt;<br>
+this is a performance
+optimization. Getting the system time is very costly. With this setting, imudp can
+be instructed to obtain the precise time only once every n-times. This logic is
+only activated if messages come in at a very fast rate, so doing less frequent
+time calls should usually be acceptable. The default value is two, because we have
+seen that even without optimization the kernel often returns twice the identical time.
+You can set this value as high as you like, but do so at your own risk. The higher
+the value, the less precise the timestamp.
+<li>$InputUDPServerBindRuleset &lt;ruleset&gt;<br>
+Binds the listener to a specific <a href="multi_ruleset.html">ruleset</a>.</li>
+</ul>
+<b>Caveats/Known Bugs:</b>
+<ul>
+<li>currently none known</li>
+</ul>
+<p><b>Sample:</b></p>
+<p>This sets up an UPD server on port 514:<br>
+</p>
+<textarea rows="15" cols="60">$ModLoad imudp # needs to be done just once
+$UDPServerRun 514
+</textarea>
+<p>[<a href="rsyslog_conf.html">rsyslog.conf overview</a>]
+[<a href="manual.html">manual index</a>] [<a href="http://www.rsyslog.com/">rsyslog site</a>]</p>
+<p><font size="2">This documentation is part of the
+<a href="http://www.rsyslog.com/">rsyslog</a>
+project.<br>
+Copyright &copy; 2009 by <a href="http://www.gerhards.net/rainer">Rainer
+Gerhards</a> and
+<a href="http://www.adiscon.com/">Adiscon</a>.
+Released under the GNU GPL version 3 or higher.</font></p>
+</body></html>
diff --git a/doc/imuxsock.html b/doc/imuxsock.html
index 381374d2..f80bc598 100644
--- a/doc/imuxsock.html
+++ b/doc/imuxsock.html
@@ -25,6 +25,21 @@ the past four years. Alternate behaviour may be desirable if
gateway-like processes send messages via the local log slot - in this
case, it can be enabled via the
$InputUnixListenSocketIgnoreMsgTimestamp and $SystemLogSocketIgnoreMsgTimestamp config directives</p>
+<p><b>There is input rate limiting available,</b> (since 5.7.1) to guard you against
+the problems of a wild running logging process.
+If more than $SystemLogRateLimitInterval * $SystemLogRateLimitBurst log messages are emitted
+from the same process, those messages with $SystemLogRateLimitSeverity or lower will be
+dropped. It is not possible to recover anything about these messages, but imuxsock will
+tell you how many it has dropped one the interval has expired AND the next message
+is logged. Rate-limiting depends on SCM_CREDENTIALS. If the platform does not support
+this socket option, rate limiting is turned off. If multiple sockets are configured,
+rate limiting works independently on each of them (that should be what you usually expect).
+The same functionality is available for additional log sockets, in which case the
+config statements just use
+the prefix $IMUXSockRateLimit... but otherwise works exactly the same.
+When working with severities, please keep in mind that higher severity numbers mean lower
+severity and configure things accordingly.
+To turn off rate limiting, set the interval to zero.
<p><b>Unix log sockets can be flow-controlled.</b> That is, if processing queues fill up,
the unix socket reader is blocked for a short while. This may be useful to prevent overruning
the queues (which may cause exessive disk-io where it actually would not be needed). However,
@@ -34,18 +49,63 @@ are places as quickly as possible into the processing queues. If you would like
flow control, you need to enable it via the $SystemLogSocketFlowControl and
$InputUnixListenSocketFlowControl config directives. Just make sure you thought about
the implications. Note that for many systems, turning on flow control does not hurt.
+<p>Starting with rsyslog 5.9.4,
+<b><a href="http://www.rsyslog.com/what-are-trusted-properties/">trusted syslog properties</a>
+are available</b>. These require a recent enough Linux Kernel and access to the /proc file
+system. In other words, this may not work on all platforms and may not work fully when
+privileges are dropped (depending on how they are dropped). Note that trusted properties
+can be very useful, but also typically cause the message to grow rather large. Also, the
+format of log messages is obviously changed by adding the trusted properties at the end.
+For these reasons, the feature is <b>not enabled by default</b>. If you want to use it,
+you must turn it on (via $SystemLogSocketAnnotate and $InputUnixListenSocketAnnotate).
<p><b>Configuration Directives</b>:</p>
<ul>
<li><b>$InputUnixListenSocketIgnoreMsgTimestamp</b> [<b>on</b>/off]
<br>Ignore timestamps included in the message. Applies to the next socket being added.</li>
<li><b>$InputUnixListenSocketFlowControl</b> [on/<b>off</b>] - specifies if flow control should be applied
to the next socket.</li>
+<li><b>$IMUXSockRateLimitInterval</b> [number] - specifies the rate-limiting
+interval in seconds. Default value is 5 seconds. Set it to 0 to turn rate limiting off.
+</li>
+<li><b>$IMUXSockRateLimitBurst</b> [number] - specifies the rate-limiting
+burst in number of messages. Default is 200.
+</li>
+<li><b>$IMUXSockRateLimitSeverity</b> [numerical severity] - specifies the severity of
+messages that shall be rate-limited.
+</li>
+<li><b>$InputUnixListenSocketUsePIDFromSystem</b> [on/<b>off</b>] - specifies if the pid being logged shall
+be obtained from the log socket itself. If so, the TAG part of the message is rewritten.
+It is recommended to turn this option on, but the default is "off" to keep compatible
+with earlier versions of rsyslog. This option was introduced in 5.7.0.</li>
+<li><b>$InputUnixListenSocketUseSysTimeStamp</b> [<b>on</b>/off] instructs imuxsock
+to obtain message time from the system (via control messages) insted of using time
+recorded inside the message. This may be most useful in combination with systemd. Note:
+this option was introduced with version 5.9.1. Due to the usefulness of it, we
+decided to enable it by default. As such, 5.9.1 and above behave slightly different
+than previous versions. However, we do not see how this could negatively affect
+existing environments.<br>
<li><b>$SystemLogSocketIgnoreMsgTimestamp</b> [<b>on</b>/off]<br>
Ignore timestamps included in the messages, applies to messages received via the system log socket.</li>
-<li><b>$OmitLocalLogging</b> (imuxsock) [on/<b>off</b>] -- former -o option</li>
+<li><b>$OmitLocalLogging</b> (imuxsock) [on/<b>off</b>] -- former -o option;
+do NOT listen for the local log socket. This is most useful if you run multiple
+instances of rsyslogd where only one shall handle the system log socket.</li>
<li><b>$SystemLogSocketName</b> &lt;name-of-socket&gt; -- former -p option</li>
<li><b>$SystemLogFlowControl</b> [on/<b>off</b>] - specifies if flow control should be applied
to the system log socket.</li>
+<li><b>$SystemLogUsePIDFromSystem</b> [on/<b>off</b>] - specifies if the pid being logged shall
+be obtained from the log socket itself. If so, the TAG part of the message is rewritten.
+It is recommended to turn this option on, but the default is "off" to keep compatible
+with earlier versions of rsyslog. This option was introduced in 5.7.0.</li>
+<li><b>$SystemLogRateLimitInterval</b> [number] - specifies the rate-limiting
+interval in seconds. Default value is 5 seconds. Set it to 0 to turn rate limiting off.
+</li>
+<li><b>$SystemLogRateLimitBurst</b> [number] - specifies the rate-limiting
+burst in number of messages. Default is 200.
+</li>
+<li><b>$SystemLogRateLimitSeverity</b> [numerical severity] - specifies the severity of
+messages that shall be rate-limited.
+</li>
+<li><b>$SystemLogUseSysTimeStamp</b> [<b>on</b>/off] the same as $InputUnixListenSocketUseSysTimeStamp, but for the system log socket.
<li><b>$InputUnixListenSocketCreatePath</b> [on/<b>off</b>] - create directories in the socket path
if they do not already exist. They are created with 0755 permissions with the owner being the process under
which rsyslogd runs. The default is not to create directories. Keep in mind, though, that rsyslogd always
@@ -64,10 +124,18 @@ shall be used inside messages taken from the <b>next</b> $AddUnixListenSocket so
the hostname must be specified before the $AddUnixListenSocket configuration directive, and it
will only affect the next one and then automatically be reset. This functionality is provided so
that the local hostname can be overridden in cases where that is desired.</li>
+<li><b>$InputUnixListenSocketAnnotate</b> &lt;on/<b>off</b>&gt; turn on annotation/trusted
+properties for the non-system log socket in question.</li>
+<li><b>$SystemLogSocketAnnotate</b> &lt;on/<b>off</b>&gt; turn on annotation/trusted
+properties for the system log socket.</li>
</ul>
+
<b>Caveats/Known Bugs:</b><br>
-<br>
-This documentation is sparse and incomplete.
+<ul>
+<li>There is a compile-time limit of 50 concurrent sockets. If you need more, you need to
+change the array size in imuxsock.c.
+<li>This documentation is sparse and incomplete.
+</ul>
<p><b>Sample:</b></p>
<p>The following sample is the minimum setup required to accept syslog messages from applications running
on the local system.<br>
@@ -95,13 +163,23 @@ the $InputUnixListenSocketCreatePath and the $InputUnixListenSocketHostName.</p>
$InputUnixListenSocketCreatePath on # turn on for *next* socket
$InputUnixListenSocketHostName /var/run/sshd/dev/log
</textarea>
+<p>The following sample is used to turn off input rate limiting on the system log
+socket.
+<textarea rows="4" cols="70">$ModLoad imuxsock # needs to be done just once
+
+$SystemLogRateLimitInterval 0 # turn off rate limiting
+</textarea>
+<p>The following sample is used activate message annotation and thus trusted properties
+on the system log socket.
+<textarea rows="4" cols="70">$ModLoad imuxsock # needs to be done just once
+
+$SystemLogSocketAnnotate on
+</textarea>
<p>[<a href="rsyslog_conf.html">rsyslog.conf overview</a>]
[<a href="manual.html">manual index</a>] [<a href="http://www.rsyslog.com/">rsyslog site</a>]</p>
<p><font size="2">This documentation is part of the
-<a href="http://www.rsyslog.com/">rsyslog</a>
-project.<br>
-Copyright &copy; 2008-2010 by <a href="http://www.gerhards.net/rainer">Rainer
-Gerhards</a> and
+<a href="http://www.rsyslog.com/">rsyslog</a> project.<br>
+Copyright &copy; 2008-2011 by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a> and
<a href="http://www.adiscon.com/">Adiscon</a>.
Released under the GNU GPL version 3 or higher.</font></p>
</body></html>
diff --git a/doc/index.html b/doc/index.html
index b3b336a7..d753e2ed 100644
--- a/doc/index.html
+++ b/doc/index.html
@@ -13,7 +13,7 @@ installed documentation which exactly matches the version you have installed.
It is highly suggested to at least briefly look over these files.
<li>The <a href="http://www.rsyslog.com">rsyslog web site</a> which offers
probably every information you'll ever need (ok, just kidding...).
-<li>The <a href="http://www.rsyslog.com/doc-status.html">project status page</a> provides
+<li>The <a href="http://www.rsyslog.com/status">project status page</a> provides
information on current releases
<li>and the <a href="troubleshoot.html">troubleshooting guide</a> hopefully helps if
things do not immediately work out
diff --git a/doc/manual.html b/doc/manual.html
index 27d7a24b..fdeb2980 100644
--- a/doc/manual.html
+++ b/doc/manual.html
@@ -19,18 +19,20 @@ rsyslog support</a> available directly from the source!</p>
<p><b>Please visit the <a href="http://www.rsyslog.com/sponsors">rsyslog sponsor's page</a>
to honor the project sponsors or become one yourself!</b> We are very grateful for any help towards the
project goals.</p>
-<p><b>This documentation is for version 4.8.0 (v4-stable branch) of rsyslog.</b>
-Visit the <i> <a href="http://www.rsyslog.com/status">rsyslog status page</a></i></b> to obtain current
-version information and project status.
+<p><b>This documentation is for version 5.9.4 (stable branch) of rsyslog.</b>
+Visit the <i><a href="http://www.rsyslog.com/status">rsyslog status page</a></i></b>
+to obtain current version information and project status.
</p><p><b>If you like rsyslog, you might
want to lend us a helping hand. </b>It doesn't require a lot of
time - even a single mouse click helps. Learn <a href="how2help.html">how to help the rsyslog project</a>.
Due to popular demand, there is now a <a href="rsyslog_ng_comparison.html">side-by-side comparison
between rsyslog and syslog-ng</a>.</p>
<p>If you are upgrading from rsyslog v2 or stock sysklogd,
-<a href="v3compatibility.html">be sure to read the rsyslog v3 compatibility document</a>,
+<a href="v3compatibility.html">be sure to read the rsyslog v3 compatibility notes</a>,
and if you are upgrading from v3, read the
-<a href="v4compatibility.html">rsyslog v4 compatibility document</a>.
+<a href="v4compatibility.html">rsyslog v4 compatibility notes</a> and
+if you upgrade from v4, read the
+<a href="v5compatibility.html">rsyslog v5 compatibility notes</a>.
<p>Rsyslog will work even
if you do not read the doc, but doing so will definitely improve your experience.</p>
<p><b>Follow the links below for the</b></p>
@@ -39,12 +41,11 @@ if you do not read the doc, but doing so will definitely improve your experience
<li><a href="rsyslog_conf.html">configuration file syntax (rsyslog.conf)</a></li>
<li><a href="http://www.rsyslog.com/tool-regex">a regular expression checker/generator tool for rsyslog</a></li>
<li> <a href="property_replacer.html">property replacer, an important core component</a></li>
-<li>a commented <a href="sample.conf.html">sample rsyslog.conf</a> </li>
<li><a href="bugs.html">rsyslog bug list</a></li>
-<li><a href="rsyslog_packages.html"> rsyslog packages</a></li>
-<li><a href="generic_design.html">backgrounder on
-generic syslog application design</a>
+<li><a href="messageparser.html">understanding rsyslog message parsers</a></li>
+<li><a href="generic_design.html">backgrounder on generic syslog application design</a></li>
<li><a href="modules.html">description of rsyslog modules</a></li>
+<li><a href="rsyslog_packages.html">rsyslog packages</a></li>
<li><a href="http://cookbook.rsyslog.com">the rsyslog "cookbook"</a> - a set of configurations ready to use</li>
</ul>
<p><b>We have some in-depth papers on</b></p>
@@ -69,6 +70,7 @@ syslog sender over NAT</a> (online only)</li>
<li><a href="debug.html">debug support in rsyslog</a></li>
<li>Developer Documentation
<ul>
+ <li><a href="build_from_repo.html">building rsyslog from the source repository</a></li>
<li><a href="dev_oplugins.html">writing rsyslog output plugins</a></li>
<li><a href="dev_queue.html">the rsyslog message queue object (developer's view)</a></li>
</ul></li>
@@ -108,5 +110,7 @@ any restriction as long as your license is GPLv3 compatible. If your license is
you may even be still permitted to use rsyslog source code. However, then you need to look at the way
<a href="licensing.html">rsyslog is licensed</a>.</p>
<p>Feedback is always welcome, but if you have a support question, please do not
-mail Rainer directly (<a href="free_support.html">why not?</a>).
+mail Rainer directly (<a href="free_support.html">why not?</a>) - use the
+<a href="http://lists.adiscon.net/mailman/listinfo/rsyslog">rsyslogmailing list</a>
+or <a href="http://kb.monitorware.com/rsyslog-f40.html">rsyslog formum</a> instead.
</body></html>
diff --git a/doc/messageparser.html b/doc/messageparser.html
new file mode 100644
index 00000000..370db59f
--- /dev/null
+++ b/doc/messageparser.html
@@ -0,0 +1,222 @@
+<html>
+<head>
+<title>Message parsers in rsyslog</title>
+</head>
+<body>
+<a href="manual.html">rsyslog documentation</a>
+
+<h1>Message parsers in rsyslog</h1>
+<p><small><i>Written by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a>
+(2009-11-06)</i></small></p>
+<h2>Intro</h2>
+<p>Message parsers are a feature of rsyslog 5.3.4 and above. In this article, I describe what
+message parsers are, what they can do and how they relate to the relevant standards. I will
+also describe what you can not do with time. Finally, I give some advice on implementing your
+own custom parser.
+
+<h2>What are message parsers?</h2>
+<p>Well, the quick answer is that message parsers are the component of rsyslog that
+parses the syslog message after it is being received. Prior to rsyslog 5.3.4, message parsers
+where built in into the rsyslog core itself and could not be modified (other than by modifying
+the rsyslog code).
+<p>In 5.3.4, we changed that: message parsers are now loadable modules (just
+like input and output modules). That means that new message parsers can be added without
+modifying the rsyslog core, even without contributing something back to the
+project.
+<p>But that doesn't answer what a message parser really is. What does ist mean to &quot;parse a
+message&quot; and, maybe more importantly, what is a message? To answer these questions correctly,
+we need to dig down into the relevant standards.
+<a href="http://tools.ietf.org/html/rfc5424">RFC5424</a> specifies a layered architecture
+for the syslog protocol:
+<p align="center"><img src="rfc5424layers.png" alt="RFC5424 syslog protocol layers">
+<p>For us important is the distinction between the syslog transport and the upper layers.
+The transport layer specifies how a stream of messages is assembled at the sender side and
+how this stream of messages is disassembled into the individual messages at the receiver
+side. In networking terminology, this is called &quot;framing&quot;. The core idea is that
+each message is put into a so-called "frame", which then is transmitted over the communications
+link.
+<p>The framing used is depending on the protocol. For example, in UDP the "frame"-equivalent is
+a packet that is being sent (this also means that no two messages can travel within a single
+UDP packet). In "plain tcp syslog", the industry standard, LF is used as a frame delimiter
+(which also means that no multi-line message can properly be transmitted, a "design" flaw
+in plain tcp syslog). In <a href="http://tools.ietf.org/html/rfc5425">RFC5425</a> there is
+a header in front of each frame that contains the size of the message. With this framing,
+any message content can properly be transferred.
+<p>And now comes the important part: <b>message parsers do NOT operate at the transport
+layer</b>, they operate, as their name implies, on messages. So we can not use message
+parsers to change the underlying framing. For example, if a sender splits (for whatever
+reason) a single message into two and encapsulates these into two frames, there is no way
+a message parser could undo that.
+<p>A typical example may be a multi-line message: let's assume some originator has generated
+a message for the format "A\nB" (where \n means LF). If that message is being transmitted
+via plain tcp syslog, the frame delimiter is LF. So the sender will delimite the frame with
+LF, but otherwise send the message unmodified onto the wire (because that is how things are
+-unfortunately- done in plain tcp syslog...). So wire will see "A\nB\n". When this
+arrives at the receiver, the transport layer will undo the framing. When it sees the LF
+after A, it thinks it finds a valid frame delimiter (in fact, this is the correct view!). So
+the receive will extract one complete message A and one complete message B, not knowing
+that they once were both part of a large multi-line message. These two messages are then
+passed to the upper layers, where the message parsers receive them and extract information.
+However, the message parsers never know (or even have a chance to see) that A and B
+belonged together. Even further, in rsyslog there is no guarnatee that A will be parsed
+before B - concurrent operations may cause the reverse order (and do so very validly).
+<p>The important lesson is: <b>message parsers can not be used to fix a broken framing</b>.
+You need a full protocol implementation to do that, what is the domain of input and
+output modules.
+<p>I have now told you what you can not do with message parsers. But what they are good for?
+Thankfully, broken framing is not the primary problem of the syslog world. A wealth of different
+formats is. Unfortunately, many real-world implementations violate the relevant standards
+in one way or another. That makes it often very hard to extract meaningful information from
+a message or to process messages from different sources by the same rules. In my article
+<a href="syslog_parsing.html">syslog parsing in rsyslog</a> I have elaborated on all
+the real-world evil that you can usually see. So I won't repeat that here. But in short, the
+real problem is not the framing, but how to make malformed messages well-looking.
+<p><b>This is what message parsers permit you to do: take a (well-known) malformed message, parse
+it according to its semantics and generate perfectly valid internal message representations
+from it.</b> So as long as messages are consistenly in the same wrong format (and they usually
+are!), a message parser can look at that format, parse it, and make the message processable just
+like it were wellformed in the first place. Plus, one can abuse the interface to do some other
+"intersting" tricks, but that would take us to far.
+<p>While this functionality may not sound exciting, it actually solves a very big issue (that you
+only really understand if you have managed a system with various different syslog sources).
+Note that we were often able to process malformed messages in the past with the help of the
+property replacer and regular expressions. While this is nice, it has a performance hit. A
+message parser is a C code, compiled to native language, and thus typically much faster than
+any regular expression based method (depending, of course, on the quality of the implementation...).
+
+<h2>How are message parsers used?</h2>
+<p>In a simlified view, rsyslog
+<ol>
+<li>first receives messages (via the input module),
+<li><i>then parses them (at the message level!)</i> and
+<li>then processes them (operating on the internal message representation).
+</ol>
+Message parsers are utilized in the second step (written in italics).
+Thus, they take the raw message (NOT frame!) received from the remote system and create
+the internal structure out of it that the other parts of rsyslog need in order to perform
+their processing. Parsing is vital, because an unparsed message can not be processed in the
+third stage, the actual application-level processing (like forwarding or writing to files).
+<h3>Parser Chains and how they Operate</h3>
+Rsyslog chains parsers together to provide flexibility.
+A <b>parser chain</b>
+contains all parsers that can potentially be used to parse a message.
+It is assumed that there is some
+way a parser can detect if the message it is being presented is supported by it. If so, the parser
+will tell the rsyslog engine and parse the message. The rsyslog engine now calls each parser
+inside the chain (in sequence!) until the first parser is able to parse the message. After one
+parser has been found, the message is considered parsed and no others parsers are called on that
+message.
+<p>Side-note: this method implies there are some "not-so-dirty" tricks available to modify
+the message by a parser module that declares itself as "unable to parse" but still does
+some message modification. This was not a primary design goal, but may be utilized, and the
+interface probably extended, to support generic filter modules. These would need to go
+to the root of the parser chain. As mentioned, the current system already supports this.
+<p>The position inside the parser chain can be thought of as a priority: parser sitting
+earlier in the chain take precedence over those sitting later in it. So more specific
+parser should go ealier in the chain. A good example of how this works is the default parser
+set provided by rsyslog: rsyslog.rfc5424 and rsyslog.rfc3164, each one parses according to the
+rfc that has named it. RFC5424 was designed to be distinguishable from RFC3164 message by the
+sequence "1 " immediately after the so-called PRI-part (don't worry about these words, it is
+sufficient if you understand there is a well-defined sequence used to indentify RFC5424
+messages). In contrary, RFC3164 actually permits everything as a valid message. Thus the
+RFC3164 parser will always parse a message, sometimes with quite unexpected outcome (there is
+a lot of guesswork involved in that parser, which unfortunately is unavoidable due to
+existing techology limits). So the default parser chain is to try the RFC5424 parser first
+and after it the RFC3164 parser. If we have a 5424-formatted message, that parser will
+identify and parse it and the rsyslog engine will stop processing. But if we receive a
+legacy syslog message, the RFC5424 will detect that it can not parse it, return this status
+to the engine which then calls the next parser inside the chain. That usually happens to be
+the RFC3164 parser, which will always process the message. But there could also be any other
+parser inside the chain, and then each one would be called unless one that is able to parse
+can be found.
+<p>If we reversed the parser order, RFC5424 messages would incorrectly parsed. Why? Because the
+RFC3164 parser will always parse every message, so if it were asked first, it would parse
+(and misinterpret) the 5424-formatted message, return it did so and the rsyslog engine would
+never call the 5424 parser. So oder of sequence is very important.
+<p>What happens if no parser in the chain could parse a message? Well, then we could not
+obtain the in-memory representation that is needed to further process the message. In that
+case, rsyslog has no other choice than to discard the message. If it does so, it will emit
+a warning message, but only in the first 1,000 incidents. This limit is a safety measure
+against message-loops, which otherwise could quickly result from a parser chain
+misconfiguration. <b>If you do not tolerate loss of unparsable messages, you must ensure
+that each message can be parsed.</b> You can easily achive this by always using the
+"rsyslog-rfc3164" parser as the <i>last</i> parser inside parser chains. That may result
+in invalid parsing, but you will have a chance to see the invalid message (in debug mode,
+a warning message will be written to the debug log each time a message is dropped due to
+inability to parse it).
+<h3>Where are parser chains used?</h3>
+<p>We now know what parser chains are and how they operate. The question is now how many
+parser chains can be active and how it is decicded which parser chain is used on which message.
+This is controlled via <a href="multi_ruleset.html">rsyslog's rulesets</a>. In short, multiple
+rulesets can be defined and there always exist at least one ruleset (for specifcs, follow
+the <a href="multi_ruleset.html">link</a>). A parser chain is bound to a specific ruleset.
+This is done by virtue of defining parsers via the
+<a href="rsconf1_rulesetparser.html">$RulesetParser</a> configuration directive (for specifics,
+see there). If no such directive is specified, the default parser chain is used. As of this
+writing, the default parser chain always consists of "rsyslog.rfc5424", "rsyslog.rfc3164", in
+that order. As soon as a parser is configured, the default list is cleared and the new parser
+is added to the end of the (initially empty) ruleset's parser chain.
+<p>The important point to know is that parser chains are defined on a per-ruleset basis.
+<h3>Can I use different parser chains for different devices?</h3>
+<p>The correct answer is: generally yes, but it depends. First of all, remember that input
+modules (and specific listeners) may be bound to specific rulesets. As parser chains "reside"
+in rulesets, binding to a ruleset also binds to the parser chain that is bound to that ruleset.
+As a number one prequisite, the input module must support binding to different rulesets. Not
+all do, but their number is growing. For example, the important
+<a href="imudp.html">imudp</a> and <a href="imtcp.html">imtcp</a> input modules support
+that functionality. Those that do not (for example <a href="im3195">im3195</a>) can only
+utilize the default ruleset and thus the parser chain defined in that ruleset.
+<p>If you do not know if the input module in question supports ruleset binding, check
+its documentation page. Those that support it have the requiered directives.
+<p>Note that it is currently under evaluation if rsyslog will support binding parser chains
+to specific inputs directly, without depending on the ruleset. There are some concerns that
+this may not be necessary but adds considerable complexity to the configuration. So this may
+or may not be possible in the future. In any case, if we decide to add it, input modules
+need to support it, so this functionality would require some time to implement.
+<p>The coockbook recipe for using different parsers for different devices is given
+as an actual in-depth example in the <a href="rscon1_rulesetsparser.html">$RulesetParser</a>
+configuration directive doc page. In short, it is acomplished by defining specific rulesets
+for the required parser chains, definining different listener ports for each of the devices
+with different format and binding these listeners to the correct ruleset (and thus parser
+chains). Using that approach, a variety of different message formats can be supported
+via a single rsyslog instance.
+
+<h2>Which message parsers are available</h2>
+<p>As of this writing, there exist only two message parsers, one for RFC5424 format and one for
+legacy syslog (loosely described in
+<a href="http://tools.ietf.org/html/rfc3164">RFC3164</a>). These parsers are built-in and
+must not be explicitely loaded. However, message parsers can be added with relative ease
+by anyone knowing to code in C. Then, they can be loaded via $ModLoad just like any
+other loadable module. It is expected that the rsyslog project will be contributed additional
+message parsers over time, so that at some point there hopefully is a rich choice of them
+(I intend to add a browsable repository as soon as new parsers pop up).
+<h3>How to write a message parser?</h3>
+<p>As a prequisite, you need to know the exact format that the device is sending. Then, you need
+moderate C coding skills, and a little bit of rsyslog internals. I guess the rsyslog specific part
+should not be that hard, as almost all information can be gained from the existing parsers. They
+are rather simple in structure and can be found under the "./tools" directory. They are named
+pmrfc3164.c and pmrfc5424.c. You need to follow the usual loadable module guidelines.
+It is my expectation that writing a parser should typically not take longer than a single
+day, with maybe a day more to get aquainted with rsyslog. Of course, I am not sure if the number
+is actually right.
+<p>If you can not program or have no time to do it, Adiscon can also write a message parser
+for you as
+part of the <a href="http://www.rsyslog/professional-services">rsyslog professional services
+offering</a>.
+<h2>Conclusion</h2>
+<p>Malformed syslog messages are a pain and unfortunately often seen in practice. Message parsers
+provide a fast and efficient solution for this problem. Different parsers can be defined for
+different devices, and they all convert message information into rsyslog's well-defined
+internal format. Message parsers were first introduced in rsyslog 5.3.4 and also offer
+some interesting ideas that may be explored in the future - up to full message normalization
+capabilities. It is strongly recommended that anyone with a heterogenous environment take
+a look at message parser capabilities.
+
+<p>[<a href="rsyslog_conf.html">rsyslog.conf overview</a>] [<a href="manual.html">manual
+index</a>] [<a href="http://www.rsyslog.com/">rsyslog site</a>]</p>
+<p><font size="2">This documentation is part of the
+<a href="http://www.rsyslog.com/">rsyslog</a> project.<br>
+Copyright &copy; 2009 by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a> and
+<a href="http://www.adiscon.com/">Adiscon</a>. Released under the GNU GPL version 3 or higher.</font></p>
+</body>
+</html>
diff --git a/doc/mmsnmptrapd.html b/doc/mmsnmptrapd.html
new file mode 100644
index 00000000..699049d3
--- /dev/null
+++ b/doc/mmsnmptrapd.html
@@ -0,0 +1,95 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+<meta http-equiv="Content-Language" content="en">
+<title>mmsnmptrapd message modification module</title>
+</head>
+
+<body>
+<a href="rsyslog_conf_modules.html">back to rsyslog module overview</a>
+
+<h1>mmsnmptrapd message modification module</h1>
+<p><b>Module Name:&nbsp;&nbsp;&nbsp; imtcp</b></p>
+<p><b>Author: </b>Rainer Gerhards &lt;rgerhards@adiscon.com&gt; (custom-created)</p>
+<p><b>Multi-Ruleset Support: </b>since 5.8.1
+<p><b>Description</b>:</p>
+<p>This module uses a specific configuration of snmptrapd's tag values to
+obtain information of the original source system and the severity present inside the
+original SNMP trap. It then replaces these fields inside the syslog message.
+<p>Let's look at an example. Essentially, SNMPTT will invoke something like this:
+<pre>logger -t snmptrapd/warning/realhost Host 003c.abcd.ffff in vlan 17 is flapping between port Gi4/1 and port Gi3/2
+</pre>
+<p>
+This message modification module will change the tag (removing the additional information),
+hostname and severity (not shown in example), so the log entry will look as follows:
+<pre>
+2011-04-21T16:43:09.101633+02:00 realhost snmptrapd: Host 003c.abcd.ffff in vlan 122 is flapping between port Gi4/1 and port Gi3/2
+</pre>
+The following logic is applied to all message being processed:
+<ol>
+<li>The module checks incoming syslog entries. If their TAG field starts with "snmptrapd/"
+(configurable), they are modified, otherwise not. If the are modified, this happens as follows:
+<li>It will derive the hostname from the tag field which has format snmptrapd/severity/hostname
+<li>It should derive the severity from the tag field which has format
+snmptrapd/severity/hostname. A configurable mapping table will be used to drive a new
+severity value from that severity string. If no mapping has been defined, the original
+severity is not changed.
+<li>It replaces the "FromHost" value with the derived value from step2
+<li>It replaces the "Severity" value with the derived value from step 3
+</ol>
+<p>Note that the placement of this module inside the configuration is important. All actions
+before this modules is called will work on the unmodified message. All messages after it's call
+will work on the modified message. Please also note that there is some extra power in case it
+is required: as this module is implemented via the output module interface, a filter
+can be used (actually must be used) in order to tell when it is called. Usually, the catch-all
+filter (*.*) is used, but more specific filters are fully supported. So it is possible to define
+different parameters for this module depending on different filters. It is also possible to
+just run messages from one remote system through this module, with the help of filters or
+multiple rulesets and ruleset bindings. In short words, all capabilities rsyslog offers
+to control output modules are also available to mmsnmptrapd.
+<p><b>Configuration Directives</b>:</p>
+<ul>
+<li><b>$mmsnmptrapdTag</b> [tagname]<br>
+tells the module which start string inside the tag to look for. The default is
+"snmptrapd". Note that a slash is automatically added to this tag when it comes to
+matching incoming messages. It MUST not be given, except if two slashes are required
+for whatever reasons (so "tag/" results in a check for "tag//" at the start of
+the tag field).
+<li><b>$mmsnmptrapdSeverityMapping</b> [severtiymap]<br>
+This specifies the severity mapping table. It needs to be specified as a list. Note that
+due to the current config system <b>no whitespace</b> is supported inside the list, so be
+sure not to use any whitespace inside it.<br>
+The list is constructed of Severtiy-Name/Severity-Value pairs, delimited by comma.
+Severity-Name is a case-sensitive string, e.g. "warning" and an associated
+numerical value (e.g. 4).
+Possible values are in the rage 0..7 and are defined in RFC5424, table 2. The
+given sample would be specified as "warning/4".<br>
+If multiple instances of mmsnmptrapd are used, each instance uses the most recently
+defined $mmsnmptrapdSeverityMapping before itself.
+</ul>
+<b>Caveats/Known Bugs:</b>
+<ul>
+<li>currently none known</li>
+</ul>
+<p><b>Example:</b></p>
+<p>This enables to rewrite messages from snmptrapd and configures error and warning
+severities. The default tag is used.<br>
+</p>
+<textarea rows="10" cols="80">$ModLoad mmsnmptrapd # needs to be done just once
+# ... other module loads and listener setup ...
+*.* /path/to/file/with/orignalMessage # this file receives *un*modified messages
+$mmsnmptrapdSeverityMapping warning/4,error/3
+*.* :mmsnmptrapd: # *now* message is modified
+*.* /path/to/file/with/modifiedMessage # this file receives modified messages
+# ... rest of config ...
+</textarea>
+</p>
+<p>[<a href="rsyslog_conf.html">rsyslog.conf overview</a>]
+[<a href="manual.html">manual index</a>] [<a href="http://www.rsyslog.com/">rsyslog site</a>]</p>
+<p><font size="2">This documentation is part of the <a href="http://www.rsyslog.com/">rsyslog</a>
+project.<br>
+Copyright &copy; 2011 by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a> and
+<a href="http://www.adiscon.com/">Adiscon</a>.
+Released under the GNU GPL version 3 or higher.</font></p>
+</body>
+</html>
diff --git a/doc/module_workflow.png b/doc/module_workflow.png
new file mode 100644
index 00000000..e1a72e96
--- /dev/null
+++ b/doc/module_workflow.png
Binary files differ
diff --git a/doc/msgflow.txt b/doc/msgflow.txt
new file mode 100644
index 00000000..ebee18f8
--- /dev/null
+++ b/doc/msgflow.txt
@@ -0,0 +1,56 @@
+flow of messages (in terms of functions) after they have
+been pulled off the main queue.
+
+Functions are listed in the order they are (usually) called
+if there are branches in processing flow, this is explicitely
+stated.
+
+as of: 2010-06-08, master branch (v5)
+
+syslogd.c/msgConsumer
+syslogd.c/msgConsumeOne
+ if ACLcheck needed:
+ net.cvthname,
+ net.isAllowedSinder2
+ MsgSetRcvFromStr
+ MsgSetRcvFromIPStr
+ if NEEDS_PARSING:
+ parser.ParseMsg
+ruleset.ProcessBatch (loops through ruleset)
+ruleset.c/processMsgDoRules (for each rule in ruleset)
+rule.c/processMsg
+1:rule.c/shouldProcessThisMessage
+ (evaluates filters, optimize via ALL-Filter)
+if to be processed, loop through associated actions ->
+2:rule.c/processMsgsDoAction
+action.c/actionCallAction (LOCKs action object!)
+action.c/doActionCallAction (does duplicate message reduction)
+action.c/actionWriteToAction
+ limits based on iExecEveryNthOccur
+ generates "message repeated..." string if necessary
+ limits based on iSecsExecOnceInterval
+! **qqueueEnqObj**
+ This means, we are done processing the action at this
+ stage. The queue may run async, but usually does not
+ do so (in default settings).
+
+
+Now looking at processing of the action queue. If the queue is
+in direct mode, remember that the action object is still
+be locked (this may also be a potential bug in non-direct mode, as
+it looks like we need this prequisite!).
+
+action.c/processBatchMain (queue Consumer, LOOK mutActExec)
+action.c/processAction
+ (calls finishBatch at the end, but not so important
+ for our analysis)
+action.c/submitBatch (recursive submit/retry loop for messages)
+action.c/tryDoAction (submits a [potentially partial] batch)
+action.c/actionProcessMessage
+ (action.c/actionPrepare (utility to set status/TX mode))
+action.c/actionCallDoAction
+1: action.c/prepareDoActionParams
+1: template.c/tplToString-tplToArray
+ string buffer is cached in action object
+2:<output Module>/doAction
+
diff --git a/doc/multi_ruleset.html b/doc/multi_ruleset.html
index 8d8c614f..da65b4ba 100644
--- a/doc/multi_ruleset.html
+++ b/doc/multi_ruleset.html
@@ -23,7 +23,7 @@ write it to a file or forward it to a remote logging server.
<p>A traditional configuration file is made up of one or more of these rules. When a new
message arrives, its processing starts with the first rule (in order of appearance in
rsyslog.conf) and continues for each rule until either all rules have been processed or
-a so-called &quote;discard&quot; action happens, in which case processing stops and the
+a so-called &quot;discard&quot; action happens, in which case processing stops and the
message is thrown away (what also happens after the last rule has been processed).
<p>The <b>multi-ruleset</b> support now permits to specify more than one such rule sequence.
@@ -66,11 +66,18 @@ to seperate the messages by any other method.
<pre>$InputTCPServerBindRuleset &lt;name&gt;
</pre>
-directive. Note that &quot;name&quote; must be the name of a ruleset that is already defined
+directive. Note that &quot;name&quot; must be the name of a ruleset that is already defined
at the time the bind directive is given. There are many ways to make sure this happens, but
I personally think that it is best to define all rule sets at the top of rsyslog.conf and
define the inputs at the bottom. This kind of reverses the traditional recommended ordering, but
seems to be a really useful and straightforward way of doing things.
+<h2>Why are rulesets important for different parser configurations?</h2>
+<p>Custom message parsers, used to handle differnet (and potentially otherwise-invalid)
+message formats, can be bound to rulesets. So multiple rulesets can be a very useful
+way to handle devices sending messages in different malformed formats in a consistent
+way. Unfortunately, this is not uncommon in the syslog world. An in-depth explanation
+with configuration sample can be found at the
+<a href="rsconf1_rulesetparser.html">$RulesetParser</a> configuration directive.
<h2>Can I use a different Ruleset as the default?</h2>
<p>This is possible by using the
@@ -249,11 +256,12 @@ $InputTCPServerBindRuleset remote10516
$InputTCPServerRun 10516
</pre>
-<p>Note that the &quot;mail.*&quot; rule inside the &quot;remote10516&quote; ruleset does
+<p>Note that the &quot;mail.*&quot; rule inside the &quot;remote10516&quot; ruleset does
not affect processing inside any other rule set, including the default rule set.
<h2>Performance</h2>
+<h3>Fewer Filters</h3>
<p>No rule processing can be faster than not processing a rule at all. As such, it is useful
for a high performance system to identify disjunct actions and try to split these off to
different rule sets. In the example section, we had a case where three different tcp listeners
@@ -263,6 +271,25 @@ is no need to check the reception service - instead messages are automatically p
right rule set and can be processed by very simple rules (maybe even with
&quot;*.*&quot;-filters, the fastest ones available).
+<h3>Partitioning of Input Data</h3>
+<p>Starting with rsyslog 5.3.4, rulesets permit higher concurrency. They offer the ability
+to run on their own &quot;main&quot; queue. What that means is that a own queue is associated
+with a specific rule set. That means that inputs bound to that ruleset do no longer need
+to compete with each other when they enqueue a data element into the queue. Instead, enqueue
+operations can be completed in parallel.
+<p>An example: let us assume we have three TCP listeners. Without rulesets, each of them
+needs to insert messages into the main message queue. So if each of them wants to
+submit a newly arrived message into the queue at the same time, only one can do so
+while the others need to wait. With multiple rulesets, its own queue can be created for each
+ruleset. If now each listener is bound to its own ruleset, concurrent message submission is
+possible. On a machine with a sufficiently large number of corse, this can result in
+dramatic performance improvement.
+<p>It is highly advised that high-performance systems define a dedicated ruleset, with a
+dedicated queue for each of the inputs.
+<p>By default, rulesets do <b>not</b> have their own queue. It must be activated via the
+<a href="rsconf1_rulesetcreatemainqueue.html">$RulesetCreateMainQueue</a> directive.
+
+<h3>Future Enhancements</h3>
<p>In the long term, multiple rule sets will probably lay the foundation for even better
optimizations. So it is not a bad idea to get aquainted with them.
diff --git a/doc/omhdfs.html b/doc/omhdfs.html
new file mode 100644
index 00000000..ef7e60c5
--- /dev/null
+++ b/doc/omhdfs.html
@@ -0,0 +1,69 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html><head><title>rsyslog output module for HDFS (omhdfs)</title>
+<a href="features.html">back</a>
+</head>
+<body>
+<h1>Unix sockets Output Module (omhdfs)</h1>
+<p><b>Module Name:&nbsp;&nbsp;&nbsp; omhdfs</b></p>
+<p><b>Available since:&nbsp;&nbsp;&nbsp;</b> 5.7.1</p>
+<p><b>Author: </b>Rainer Gerhards &lt;rgerhards@adiscon.com&gt;</p>
+<p><b>Description</b>:</p>
+<p>This module supports writing message into files on Hadoop's HDFS
+file system.
+<p><b>Configuration Directives</b>:</p>
+<ul>
+<li><b>$OMHDFSFileName</b> [name]<br>
+The name of the file to which the output data shall be written.
+</li>
+<li><b>$OMHDFSHost</b> [name]<br>
+Name or IP address of the HDFS host to connect to.
+</li>
+<li><b>$OMHDFSPort</b> [name]<br>
+Port on which to connect to the HDFS host.
+</li>
+<li><b>$OMHDFSDefaultTemplate</b> [name]<br>
+Default template to be used when none is specified. This saves the work of
+specifying the same template ever and ever again. Of course, the default
+template can be overwritten via the usual method.
+</li>
+</ul>
+<b>Caveats/Known Bugs:</b>
+<p>Building omhdfs is a challenge because we could not yet find out how
+to integrate Java properly into the autotools build process. The issue is
+that HDFS is written in Java and libhdfs uses JNI to talk to it. That requires
+that various system-specific environment options and pathes be set correctly. At
+this point, we leave this to the user. If someone know how to do it better,
+please drop us a line!
+<ul>
+<li>In order to build, you need to set these environment variables BEFORE running
+./configure:
+<ul>
+<li>JAVA_INCLUDES - must have all include pathes that are needed to build
+JNI C programms, including the -I options necessary for gcc. An example is<br>
+# export JAVA_INCLUDES="-I/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/include -I/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/include/linux"
+<li>JAVA_LIBS - must have all library pathes that are needed to build
+JNI C programms, including the -l/-L options necessary for gcc. An example is<br>
+# export export JAVA_LIBS="-L/usr/java/jdk1.6.0_21/jre/lib/amd64 -L/usr/java/jdk1.6.0_21/jre/lib/amd64/server -ljava -ljvm -lverify"
+</ul>
+
+<li>As of HDFS architecture, you must make sure that all relevant environment
+variables (the usual Java stuff and HADOOP's home directory) are properly set.
+<li>As it looks, libhdfs makes Java throw exceptions to stdout. There is no
+known work-around for this (and it usually should not case any troubles.
+</ul>
+<p><b>Sample:</b></p>
+<p>
+</p>
+<textarea rows="4" cols="80">$ModLoad omhdfs
+
+$OMHDFSFileName /var/log/logfile
+*.* :omhdfs:
+</textarea>
+[<a href="manual.html">manual index</a>] [<a href="http://www.rsyslog.com/">rsyslog site</a>]</p>
+<p><font size="2">This documentation is part of the <a href="http://www.rsyslog.com/">rsyslog</a>
+project.<br>
+Copyright &copy; 2010 by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a> and
+<a href="http://www.adiscon.com/">Adiscon</a>.
+Released under the GNU GPL version 3 or higher.</font></p>
+
+</body></html>
diff --git a/doc/ommysql.html b/doc/ommysql.html
index 9b35b402..daef9cab 100644
--- a/doc/ommysql.html
+++ b/doc/ommysql.html
@@ -24,6 +24,18 @@ directive configuration system.
a non-standard port for the MySQL server. The default is 0, which means the
system default port is used. There is no need to specify this directive unless
you know the server is running on a non-standard listen port.
+<li><b>$OmMySQLConfigFile &lt;file name&gt;</b><br>Permits the selection
+of an optional MySQL Client Library configuration file (my.cnf) for extended
+configuration functionality. The use of this configuration directive is necessary
+only if you have a non-standard environment or if fine-grained control over the
+database connection is desired.</li>
+<li><b>$OmMySQLConfigSection &lt;string&gt;</b><br>Permits the selection of the
+section within the configuration file specified by the <b>$OmMySQLConfigFile</b> directive.
+<br>This will likely only be used where the database administrator provides a single
+configuration file with multiple profiles.
+<br>This configuration directive is ignored unless <b>$OmMySQLConfigFile</b> is also used
+in the rsyslog configration file.
+<br>If omitted, the MySQL Client Library default of &quot;client&quot; will be used.</li>
<li>Action parameters:
<br><b>:ommysql:database-server,database-name,database-userid,database-password</b>
<br>All parameters should be filled in for a successful connect.
diff --git a/doc/omruleset.html b/doc/omruleset.html
new file mode 100644
index 00000000..41d6ccfc
--- /dev/null
+++ b/doc/omruleset.html
@@ -0,0 +1,140 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html><head>
+<meta http-equiv="Content-Language" content="en">
+<title>ruleset output module (omruleset)</title>
+</head>
+<body>
+<a href="rsyslog_conf_modules.html">rsyslog module reference</a>
+
+<h1>ruleset output/including module (omruleset)</h1>
+<p><b>Module Name:&nbsp;&nbsp;&nbsp; omruleset</b></p>
+<p><b>Author: </b>Rainer Gerhards &lt;rgerhards@adiscon.com&gt;</p>
+<p><b>Available Since</b>: 5.3.4</p>
+<p><b>Description</b>:</p>
+<p>This is a very special &quot;output&quot; module. It permits to pass a message object
+to another rule set. While this is a very simple action, it enables very
+complex configurations, e.g. it supports high-speed "and" conditions, sending
+data to the same file in a non-racy way, include-ruleset functionality as well as
+some high-performance optimizations (in case the rule sets have the necessary
+queue definitions).
+<p>While it leads to a lot of power, this output module offers seamingly easy functionaltiy.
+The complexity (and capablities) arise from how everthing can be combined.
+<p>With this module, a message can be sent to processing to another ruleset. This is somewhat
+similar to a &quot;#include&quot; in the C programming language. However, one needs to keep
+on the mind that a ruleset can contain its own queue and that a queue can run in various modes.
+<p>Note that if no queue is defined in the ruleset, the message is enqueued into the main message
+queue. This most often is not optimal and means that message processing may be severely defered.
+Also note that when the ruleset's target queue is full and no free space can be aquired
+within the usual timeout, the message object may actually be lost. This is an extreme scenario,
+but users building an audit-grade system need to know this restriction. For regular installations,
+it should not really be relevant.
+<p><b>At minimum, be sure you understand the
+<a href="rsconf1_rulesetcreatemainqueue.html">$RulesetCreateMainQueue</a>
+directive as well as the importance of statement order in rsyslog.conf before using omruleset!</b>
+<p><b>Recommended Use:</b>
+<ul>
+<li>create rulesets specifically for omruleset
+<li>create these rulesets with their own main queue
+<li> decent queueing parameters (sizes, threads, etc) should be used
+for the ruleset main queue. If in doubt, use the same parameters as for the
+overall main queue.
+<li>if you use multiple levels of ruleset nesting, double check for endless loops - the rsyslog
+engine does not detect these
+</ul>
+
+<p><b>Configuration Directives</b>:</p>
+<ul>
+<li><b>$ActionOmrulesetRulesetName</b> ruleset-to-submit-to<br>
+This directive specifies the name of the ruleset that the message
+provided to omruleset should be submitted to. This ruleset must already have
+been defined. Note that the directive is automatically reset after each
+:omruleset: action and there is no default. This is done to prevent accidential
+loops in ruleset definition, what can happen very quickly.
+The :omruleset: action will NOT be honored if no ruleset name has been defined. As usual,
+the ruleset name must be specified in front of the action that it modifies.
+</ul>
+<p><b>Examples:</b></p>
+<p>This example creates a ruleset for a write-to-file action. The idea here
+is that the same file is written based on multiple filters, problems occur if the file is used
+together with a buffer. That is because file buffers are action-specific, and so some partial
+buffers would be written. With omruleset, we create a single action inside its own ruleset and
+then pass all messages to it whenever we need to do so. Of course, such a simple situation could
+also be solved by a more complex filter, but the method used here can also be utilized in
+more complex scenarios (e.g. with multiple listeners). The example tries to keep it simple.
+Note that we create a ruleset-specific main queue (for simplicity with the default main queue
+parameters) in order to avoid re-queueing messages back into the main queue.
+</p>
+<textarea rows="30" cols="80">$ModLoad omruleset
+
+# define ruleset for commonly written file
+$RuleSet commonAction
+$RulesetCreateMainQueue on
+*.* /path/to/file.log
+
+#switch back to default ruleset
+$ruleset RSYSLOG_DefaultRuleset
+
+# begin first action
+# note that we must first specify which ruleset to use for omruleset:
+$ActionOmrulesetRulesetName CommonAction
+mail.info :omruleset:
+#end first action
+
+# begin second action
+# note that we must first specify which ruleset to use for omruleset:
+$ActionOmrulesetRulesetName CommonAction
+:FROMHOST, isequal, "myhost.example.com" :omruleset:
+#end second action
+
+# of course, we can have "regular" actions alongside :omrulset: actions
+*.* /path/to/general-message-file.log
+</textarea>
+<p>The next example is used to creat a high-performance nested and filter condition. Here,
+it is first checked if the message contains a string &quot;error&quot;. If so, the message is forwarded
+to another ruleset which then applies some filters. The advantage of this is that we can use
+high-performance filters where we otherwise would need to use the (much slower) expression-based
+filters. Also, this enables pipeline processing, in that second ruleset is executed in
+parallel to the first one.</p>
+<textarea rows="30" cols="80">$ModLoad omruleset
+
+# define "second" ruleset
+$RuleSet nested
+$RulesetCreateMainQueue on # again, we use our own queue
+mail.* /path/to/mailerr.log
+kernel.* /path/to/kernelerr.log
+auth.* /path/to/autherr.log
+
+#switch back to default ruleset
+$ruleset RSYSLOG_DefaultRuleset
+
+# begin first action - here we filter on "error"
+# note that we must first specify which ruleset to use for omruleset:
+$ActionOmrulesetRulesetName nested
+:msg, contains, "error :omruleset:
+#end first action
+
+# begin second action - as an example we can do anything else in
+# this processing. Note that these actions are processed concurrently
+# to the ruleset "nested"
+:FROMHOST, isequal, "myhost.example.com" /path/to/host.log
+#end second action
+
+# of course, we can have "regular" actions alongside :omrulset: actions
+*.* /path/to/general-message-file.log
+</textarea>
+<p><b>Caveats/Known Bugs:</b>
+<p>The current configuration file language is not really adequate for a complex construct
+like omruleset. Unfortunately, more important work is currently preventing me from redoing the
+config language. So use extreme care when nesting rulesets and be sure to test-run your
+config before putting it into production, ensuring you have a suffciently large probe
+of the traffic run over it. If problems arise, the
+<a href="troubleshoot.html">rsyslog debug log</a> is your friend.
+<p>[<a href="rsyslog_conf.html">rsyslog.conf overview</a>]
+[<a href="manual.html">manual index</a>] [<a href="http://www.rsyslog.com/">rsyslog site</a>]</p>
+<p><font size="2">This documentation is part of the
+<a href="http://www.rsyslog.com/">rsyslog</a>
+project.<br>
+Copyright &copy; 2009 by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a> and
+<a href="http://www.adiscon.com/">Adiscon</a>.
+Released under the GNU GPL version 3 or higher.</font></p>
+</body></html>
diff --git a/doc/omstdout.html b/doc/omstdout.html
new file mode 100644
index 00000000..0bd10cfb
--- /dev/null
+++ b/doc/omstdout.html
@@ -0,0 +1,42 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html><head>
+<meta http-equiv="Content-Language" content="en">
+<title>stdout output module (omstdout)</title>
+</head>
+<body>
+<a href="rsyslog_conf_modules.html">rsyslog module reference</a>
+
+<h1>stdout output module (stdout)</h1>
+<p><b>Module Name:&nbsp;&nbsp;&nbsp; omstdout</b></p>
+<p><b>Author: </b>Rainer Gerhards
+&lt;rgerhards@adiscon.com&gt;</p>
+<p><b>Available Since</b>: 4.1.6</p>
+<p><b>Description</b>:</p>
+<p>This module writes any messages that are passed to it to stdout.
+It was developed for the rsyslog test suite. However, there may
+(limited) other uses exists. Please not that we do not put too much
+effort into the quality of this module as we do not expect it to
+be used in real deployments. If you do, please drop us a note so
+that we can enhance its priority!
+<p><b>Configuration Directives</b>:</p>
+<ul>
+<li><b>$ActionOMStdoutArrayInterface</b> [on|<b>off</b><br>
+This setting instructs omstdout to use the alternate
+array based method of parameter passing. If used, the values
+will be output with commas between the values but no other padding bytes.
+This is a test aid for the alternate calling interface.
+<li><b>$ActionOMStdoutEnsureLFEnding</b> [<b>on</b>|off<br>
+Makes sure that each message is written with a terminating LF. This is needed for
+the automatted tests. If the message contains a trailing LF, none is added.
+</ul>
+<b>Caveats/Known Bugs:</b>
+<p>Currently none known.
+<p>[<a href="rsyslog_conf.html">rsyslog.conf overview</a>]
+[<a href="manual.html">manual index</a>] [<a href="http://www.rsyslog.com/">rsyslog site</a>]</p>
+<p><font size="2">This documentation is part of the
+<a href="http://www.rsyslog.com/">rsyslog</a>
+project.<br>
+Copyright &copy; 2009 by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a> and
+<a href="http://www.adiscon.com/">Adiscon</a>.
+Released under the GNU GPL version 3 or higher.</font></p>
+</body></html>
diff --git a/doc/omudpspoof.html b/doc/omudpspoof.html
new file mode 100644
index 00000000..16cb9b13
--- /dev/null
+++ b/doc/omudpspoof.html
@@ -0,0 +1,92 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html><head>
+<title>UDP spoofing output module (omudpspoof)</title>
+</head>
+<body>
+<a href="rsyslog_conf_modules.html">rsyslog module reference</a>
+
+<h1>UDP spoofing output module (omudpspoof)</h1>
+<p><b>Module Name:&nbsp;&nbsp;&nbsp; omstdout</b></p>
+<p><b>Author: </b>David Lang &lt;david@lang.hm&gt; and Rainer Gerhards
+&lt;rgerhards@adiscon.com&gt;</p>
+<p><b>Available Since</b>: 5.1.3</p>
+<p><b>Description</b>:</p>
+<p>This module is similar to the regular UDP forwarder, but permits to
+spoof the sender address. Also, it enables to circle through a number of
+source ports.
+<p><b>Configuration Directives</b>:</p>
+<ul>
+<li><b>$ActionOMUDPSpoofSourceNameTemplate</b> &lt;templatename&gt;<br>
+This is the name of the template that contains a
+numerical IP address that is to be used as the source system IP address.
+While it may often be a constant value, it can be generated as usual via the
+property replacer, as long as it is a valid IPv4 address. If not specified, the
+build-in default template RSYSLOG_omudpspoofDfltSourceTpl is used. This template is defined
+as follows:<br>
+$template RSYSLOG_omudpspoofDfltSourceTpl,"%fromhost-ip%"<br>
+So in essence, the default template spoofs the address of the system the message
+was received from. This is considered the most important use case.
+<li><b>$ActionOMUDPSpoofTargetHost</b> &lt;hostname&gt;<br>
+Host that the messages shall be sent to.
+<li><b>$ActionUDPSpoofTargetPort</b> &lt;port&gt;<br>
+Remote port that the messages shall be sent to.
+<li><b>$ActionOMUDPSpoofDefaultTemplate</b> &lt;templatename&gt;<br>
+This setting instructs omudpspoof to use a template different from the
+default template for all of its actions that do not have a template specified
+explicitely.
+<li><b>$ActionOMUDPSpoofSourcePortStart</b> &lt;number&gt;<br>
+Specifies the start value for circeling the source ports. Must be less than or
+equal to the end value. Default is 32000.
+<li><b>$ActionOMUDPSpoofSourcePortEnd</b> &lt;number&gt;<br>
+Specifies the ending value for circeling the source ports. Must be less than or
+equal to the start value. Default is 42000.
+</ul>
+<b>Caveats/Known Bugs:</b>
+<ul>
+<li><b>IPv6</b> is currently not supported. If you need this capability, please let us
+know via the rsyslog mailing list.
+</ul>
+<p><b>Sample:</b></p>
+<p>The following sample forwards all syslog messages in standard form to the
+remote server server.example.com. The original sender's address is used. We do not
+care about the source port. This example is considered the typical use case for
+omudpspoof.
+</p>
+<textarea rows="5" cols="80">$ModLoad omudpspoof
+$ActionUDPSpoofTargetHost server.example.com
+*.*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; :omudpspoof:
+</textarea>
+
+<p>The following sample forwards all syslog messages in unmodified form to the
+remote server server.example.com. The sender address 192.0.2.1 with fixed
+source port 514 is used.
+</p>
+<textarea rows="8" cols="80">$ModLoad omudpspoof
+$template spoofaddr,"192.0.2.1"
+$template spooftemplate,"%rawmsg%"
+$ActionUDPSpoofSourceNameTemplate spoofaddr
+$ActionUDPSpoofTargetHost server.example.com
+$ActionUDPSpoofSourcePortStart 514
+$ActionUDPSpoofSourcePortEnd 514
+*.*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; :omudpspoof:;spooftemplate
+</textarea>
+<p>The following sample is similar to the previous, but uses as many defaults as possible.
+In that sample, a source port in the range 32000..42000 is used. The message is formatted
+according to rsyslog's canned default forwarding format. Note that if any parameters
+have been changed, the previously set defaults will be used!
+</p>
+<textarea rows="5" cols="80">$ModLoad omudpspoof
+$template spoofaddr,"192.0.2.1"
+$ActionUDPSpoofSourceNameTemplate spoofaddr
+$ActionUDPSpoofTargetHost server.example.com
+*.*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; :omudpspoof:
+</textarea>
+<p>[<a href="rsyslog_conf.html">rsyslog.conf overview</a>]
+[<a href="manual.html">manual index</a>] [<a href="http://www.rsyslog.com/">rsyslog site</a>]</p>
+<p><font size="2">This documentation is part of the
+<a href="http://www.rsyslog.com/">rsyslog</a>
+project.<br>
+Copyright &copy; 2009 by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a> and
+<a href="http://www.adiscon.com/">Adiscon</a>.
+Released under the GNU GPL version 3 or higher.</font></p>
+</body></html>
diff --git a/doc/pmlastmsg.html b/doc/pmlastmsg.html
new file mode 100644
index 00000000..2abeac6a
--- /dev/null
+++ b/doc/pmlastmsg.html
@@ -0,0 +1,62 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html><head>
+<meta http-equiv="Content-Language" content="en">
+<title>parser module for &quot;last message repeated n times&quot; (pmlastmsg)</title>
+</head>
+<body>
+<a href="rsyslog_conf_modules.html">rsyslog module reference</a>
+
+<h1>parser module for &quot;last message repeated n times&quot; (pmlastmsg)</h1>
+<p><b>Module Name:&nbsp;&nbsp;&nbsp; pmlastmsg</b></p>
+<p><b>Module Type:&nbsp;&nbsp;&nbsp; parser module</b></p>
+<p><b>Author: </b>Rainer Gerhards &lt;rgerhards@adiscon.com&gt;</p>
+<p><b>Available Since</b>: 5.5.6</p>
+<p><b>Description</b>:</p>
+<p>Some syslogds are known to emit severily malformed messages with content
+"last message repeated n times". These messages can mess up message reception, as
+they lead to wrong interpretation with the standard RFC3164 parser. Rather than
+trying to fix this issue in pmrfc3164, we have created a new parser module
+specifically for these messages. The reason is that some processing overhead is
+involved in processing these messages (they must be recognized) and we would
+not like to place this toll on every user but only on those actually in need
+of the feature. Note that the performance toll is not large -- but if you expect
+a very high message rate with tenthousands of messages per second, you will notice
+a difference.
+<p>This module should be loaded first inside <a href="messageparser.html">rsyslog's
+parser chain</a>. It processes all those messages that contain a PRI, then none or
+some spaces and then the exact text (case-insensitive) "last message repeated n times"
+where n must be an integer. All other messages are left untouched.
+
+<p><b>Configuration Directives</b>:</p>
+<p>There do not currently exist any configuration directives for this module.
+<p><b>Examples:</b></p>
+<p>This example is the typical use case, where some systems emit malformed
+"repeated msg" messages. Other than that, the default RFC5424 and RFC3164 parsers
+should be used. Note that when a parser is specified, the default parser chain
+is removed, so we need to specify all three parsers. We use this together with the
+default ruleset.
+</p>
+<textarea rows="15" cols="80">$ModLoad pmlastmsg # this parser is NOT a built-in module
+
+# note that parser are tried in the
+# order they appear in rsyslog.conf, so put pmlastmsg first
+$RulesetParser rsyslog.lastline
+# as we have removed the default parser chain, we
+# need to add the default parsers as well.
+$RulesetParser rsyslog.rfc5424
+$RulesetParser rsyslog.rfc3164
+
+# now come the typical rules, like...
+*.* /path/to/file.log
+</textarea>
+<p><b>Caveats/Known Bugs:</b>
+<p>currently none
+<p>[<a href="rsyslog_conf.html">rsyslog.conf overview</a>]
+[<a href="manual.html">manual index</a>] [<a href="http://www.rsyslog.com/">rsyslog site</a>]</p>
+<p><font size="2">This documentation is part of the
+<a href="http://www.rsyslog.com/">rsyslog</a>
+project.<br>
+Copyright &copy; 2010 by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a> and
+<a href="http://www.adiscon.com/">Adiscon</a>.
+Released under the GNU GPL version 3 or higher.</font></p>
+</body></html>
diff --git a/doc/property_replacer.html b/doc/property_replacer.html
index 6ba149d7..f0153f2a 100644
--- a/doc/property_replacer.html
+++ b/doc/property_replacer.html
@@ -156,6 +156,12 @@ than messages generated somewhere.
</td>
</tr>
<tr>
+<td><b>$bom</b></td>
+<td>The UTF-8 encoded Unicode byte-order mask (BOM). This may be useful
+in templates for RFC5424 support, when the character set is know to be
+Unicode.</td>
+</tr>
+<tr>
<td><b>$now</b></td>
<td>The current date stamp in the format YYYY-MM-DD</td>
</tr>
diff --git a/doc/queue_msg_state.dot b/doc/queue_msg_state.dot
new file mode 100644
index 00000000..bfef2657
--- /dev/null
+++ b/doc/queue_msg_state.dot
@@ -0,0 +1,25 @@
+// This file is part of rsyslog.
+//
+// rsyslog message state in queue processing
+//
+// see http://www.graphviz.org for how to obtain the graphviz processor
+// which is used to build the actual graph.
+//
+// generate the graph with
+// $ dot file.dot -Tpng >file.png
+
+digraph msgState {
+ rankdir=LR
+
+ prod [label="producer" style="dotted" shape="box"]
+ que [label="queued"]
+ deq [label="dequeued"]
+ del [label="deleted"]
+
+ prod -> que [label="qEnq()" style="dotted"]
+ que -> deq [label="qDeq()"]
+ deq -> del [label="qDel()"]
+ deq -> que [label="fatal failure\n& restart"]
+
+ //{rank=same; del apf pdn }
+}
diff --git a/doc/queue_msg_state.jpeg b/doc/queue_msg_state.jpeg
new file mode 100644
index 00000000..a215f000
--- /dev/null
+++ b/doc/queue_msg_state.jpeg
Binary files differ
diff --git a/doc/queues.html b/doc/queues.html
index 45ce1bd1..75b70fbf 100644
--- a/doc/queues.html
+++ b/doc/queues.html
@@ -336,6 +336,33 @@ in this regard - it was just not requested so far. So if you need more
fine-grained control, let us know and we'll probably implement it.
There are two configuration directives, both should be used together or
results are unpredictable:" <i>$&lt;object&gt;QueueDequeueTimeBegin &lt;hour&gt;</i>" and&nbsp;"<i>$&lt;object&gt;QueueDequeueTimeEnd &lt;hour&gt;</i>". The hour parameter must be specified in 24-hour format (so 10pm is 22). A use case for this parameter can be found in the <a href="http://wiki.rsyslog.com/index.php/OffPeakHours">rsyslog wiki</a>. </p>
+<h2>Performance</h2>
+<p>The locking involved with maintaining the queue has a potentially large
+performance impact. How large this is, and if it exists at all, depends much on
+the configuration and actual use case. However, the queue is able to work on
+so-called &quot;batches&quot; when dequeueing data elements. With batches,
+multiple data elements are dequeued at once (with a single locking call).
+The queue dequeues all available elements up to a configured upper
+limit (<i>&lt;object&gt;DequeueBatchSize &lt;number&gt;</i>). It is important
+to note that the actual upper limit is dictated by availability. The queue engine
+will never wait for a batch to fill. So even if a high upper limit is configured,
+batches may consist of fewer elements, even just one, if there are no more elements
+waiting in the queue.
+<p>Batching
+can improve performance considerably. Note, however, that it affects the
+order in which messages are passed to the queue worker threads, as each worker
+now receive as batch of messages. Also, the larger the batch size and the higher
+the maximum number of permitted worker threads, the more main memory is needed.
+For a busy server, large batch sizes (around 1,000 or even more elements) may be useful.
+Please note that with batching, the main memory must hold BatchSize * NumOfWorkers
+objects in memory (worst-case scenario), even if running in disk-only mode. So if you
+use the default 5 workers at the main message queue and set the batch size to 1,000, you need
+to be prepared that the main message queue holds up to 5,000 messages in main memory
+<b>in addition</b> to the configured queue size limits!
+<p>The queue object's default maximum batch size
+is eight, but there exists different defaults for the actual parts of
+rsyslog processing that utilize queues. So you need to check these object's
+defaults.
<h2>Terminating Queues</h2>
<p>Terminating a process sounds easy, but can be complex.
Terminating a running queue is in fact the most complex operation a queue
diff --git a/doc/rfc5424layers.png b/doc/rfc5424layers.png
new file mode 100644
index 00000000..70192cc0
--- /dev/null
+++ b/doc/rfc5424layers.png
Binary files differ
diff --git a/doc/rsconf1_abortonuncleanconfig.html b/doc/rsconf1_abortonuncleanconfig.html
new file mode 100644
index 00000000..77526c07
--- /dev/null
+++ b/doc/rsconf1_abortonuncleanconfig.html
@@ -0,0 +1,37 @@
+<html>
+<head>
+<title>rsyslog.conf file</title>
+</head>
+<body>
+<a href="rsyslog_conf_global.html">rsyslog.conf configuration directive</a>
+
+<h2>$AboortOnUncleanConfig</h2>
+<p><b>Type:</b> global configuration directive</p>
+<p><b>Parameter Values:</b> boolean (on/off, yes/no)</p>
+<p><b>Available since:</b> 5.3.1+</p>
+<p><b>Default:</b> off</p>
+<p><b>Description:</b></p>
+<p>This directive permits to prevent rsyslog from running when the configuration file
+is not clean. "Not Clean" means there are errors or some other annoyances that rsyslgod
+reports on startup. This is a user-requested feature to have a strict startup mode. Note
+that with the current code base it is not always possible to differentiate between an
+real error and a warning-like condition. As such, the startup will also prevented if
+warnings are present. I consider this a good thing in being &quot;strict&quot;, but I admit
+there also currently is no other way of doing it.
+<p><b>Caveats:</b></p>
+Note that the consequences of a failed rsyslogd startup can be much more serious than a
+startup with only partial configuration. For example, log data may be lost or systems that
+depend on the log server in question will not be able to send logs, what in the ultimate
+result could result in a system hang on those systems. Also, the local system may hang when
+the local log socket has become full and is not read. There exist many such scenarios.
+As such, it is strongly recommended not to turn on this directive.
+
+<p>[<a href="rsyslog_conf.html">rsyslog.conf overview</a>] [<a href="manual.html">manual
+index</a>] [<a href="http://www.rsyslog.com/">rsyslog site</a>]</p>
+<p><font size="2">This documentation is part of the
+<a href="http://www.rsyslog.com/">rsyslog</a> project.<br>
+Copyright &copy; 2009 by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a> and
+<a href="http://www.adiscon.com/">Adiscon</a>. Released under the GNU GPL
+version 2 or higher.</font></p>
+</body>
+</html>
diff --git a/doc/rsconf1_escape8bitcharsonreceive.html b/doc/rsconf1_escape8bitcharsonreceive.html
new file mode 100644
index 00000000..408851c1
--- /dev/null
+++ b/doc/rsconf1_escape8bitcharsonreceive.html
@@ -0,0 +1,44 @@
+<html>
+<head>
+<title>rsyslog.conf file</title>
+</head>
+<body>
+<a href="rsyslog_conf_global.html">back</a>
+
+<h2>$Escape8BitCharactersOnReceive</h2>
+<p><b>Type:</b> global configuration directive</p>
+<p><b>Default:</b> off</p>
+<p><b>Available Since:</b> 5.5.2</p>
+<p><b>Description:</b></p>
+<p>This directive instructs rsyslogd to replace non US-ASCII characters (those that
+have the 8th bit set) during reception of the message.
+This may be useful for some systems.
+Please note that this escaping breaks Unicode and many other encodings. Most importantly,
+it can be assumed that Asian and European characters will be rendered hardly readable by
+this settings. However, it may still be useful when the logs themself are primarily
+in English and only occasionally contain local script.
+If this option is turned on, all control-characters are converted to a 3-digit octal number and be prefixed with the $ControlCharacterEscapePrefix character (being '#' by default).
+<p><b>Warning:</b></p>
+<ul>
+ <li>turning on this option most probably destroys non-western character sets
+ (like Japanese, Chinese and Korean) as well as European character sets.</li>
+ <li>turning on this option destroys digital signatures if such exists inside
+ the message</li>
+ <li>if turned on, the drop-cc, space-cc and escape-cc
+ <a href="property_replacer.html">property replacer</a> options do not work
+ as expected because control characters are already removed upon message
+ reception. If you intend to use these property replacer options, you must
+ turn off $Escape8BitCharactersOnReceive.</li>
+</ul>
+<p><b>Sample:</b></p>
+<p><code><b>$Escape8BitCharactersOnReceive on</b></code></p>
+
+<p>[<a href="rsyslog_conf.html">rsyslog.conf overview</a>] [<a href="manual.html">manual
+index</a>] [<a href="http://www.rsyslog.com/">rsyslog site</a>]</p>
+<p><font size="2">This documentation is part of the
+<a href="http://www.rsyslog.com/">rsyslog</a> project.<br>
+Copyright &copy; 2010 by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a> and
+<a href="http://www.adiscon.com/">Adiscon</a>. Released under the GNU GPL
+version 3 or higher.</font></p>
+</body>
+</html>
diff --git a/doc/rsconf1_generateconfiggraph.html b/doc/rsconf1_generateconfiggraph.html
index 0b18463a..3f0fd666 100644
--- a/doc/rsconf1_generateconfiggraph.html
+++ b/doc/rsconf1_generateconfiggraph.html
@@ -8,8 +8,14 @@
<h2>$GenerateConfigGraph</h2>
<p><b>Type:</b> global configuration directive</p>
<p><b>Default:</b> </p>
-<p><b>Available Since:</b> 4.3.1</p>
+<p><b>Available Since:</b> 4.3.1 <b>CURRENTLY NOT AVAILABLE</b></p>
<p><b>Description:</b></p>
+<b>This directive is currently not supported. We had to disable it when we improved the
+rule engine. It is considerable effort to re-enable it. On the other hand, we are about
+to add a new config system, which will make yet another config graph method necessary.
+As such we have decided to currently disable this functionality and re-introduce it when
+the new config system has been instantiated.
+</b></p>
<p>This directive permits to create (hopefully) good-looking visualizations of rsyslogd's
configuration. It does not affect rsyslog operation. If the directive is specified multiple
times, all but the last are ignored. If it is specified, a graph is created. This happens
diff --git a/doc/rsconf1_omfileforcechown.html b/doc/rsconf1_omfileforcechown.html
index 7415a6f6..a680810b 100644
--- a/doc/rsconf1_omfileforcechown.html
+++ b/doc/rsconf1_omfileforcechown.html
@@ -8,7 +8,10 @@
<h2>$omfileForceChown</h2>
<p><b>Type:</b> global configuration directive</p>
<p><b>Parameter Values:</b> boolean (on/off, yes/no)</p>
-<p><b>Available since:</b> 4.7.0+, 5.3.0+</p>
+<p><b>Available:</b> 4.7.0+, 5.3.0-5.8.x, <b>NOT</b> available in 5.9.x or higher</p>
+<p><b>Note: this directive has been removed and is no longer available. The
+documentation is currently being retained for historical reaons.</b> Expect
+it to go away at some later stage as well.
<p><b>Default:</b> off</p>
<p><b>Description:</b></p>
<p>Forces rsyslogd to change the ownership for output files that already exist. Please note
diff --git a/doc/rsconf1_rulesetcreatemainqueue.html b/doc/rsconf1_rulesetcreatemainqueue.html
new file mode 100644
index 00000000..5c1e0dec
--- /dev/null
+++ b/doc/rsconf1_rulesetcreatemainqueue.html
@@ -0,0 +1,83 @@
+<html>
+<head>
+<title>RulesetCreateMainQueue - rsyslog.conf file</title>
+</head>
+<body>
+<a href="rsyslog_conf_global.html">rsyslog.conf configuration directive</a>
+
+<h2>$RulesetCreateMainQueue</h2>
+<p><b>Type:</b> ruleset-specific configuration directive</p>
+<p><b>Parameter Values:</b> boolean (on/off, yes/no)</p>
+<p><b>Available since:</b> 5.3.5+</p>
+<p><b>Default:</b> off</p>
+<p><b>Description:</b></p>
+<p>
+Rulesets may use their own &quot;main&quot; message queue for message submission. Specifying
+this directive, <b>inside a ruleset definition</b>, turns this on. This is both a performance
+enhancement and also permits different rulesets (and thus different inputs within the same
+rsyslogd instance) to use different types of main message queues.
+<p>The ruleset queue is created with the parameters that are specified for the main message
+queue at the time the directive is given. If different queue configurations are desired,
+different main message queue directives must be used in front of the $RulesetCreateMainQueue
+directive. Note that this directive may only be given once per ruleset. If multiple statements
+are specified, only the first is used and for the others error messages are emitted.
+<p>Note that the final set of ruleset configuration directives specifies the parameters for
+the default main message queue.
+<p>To learn more about this feature, please be sure to read about
+<a href="multi_ruleset.html">multi-ruleset support in rsyslog</a>.
+<p><b>Caveats:</b></p>
+The configuration statement &quot;$RulesetCreateMainQueue off&quot; has no effect at all.
+The capability to specify this is an artifact of the current (ugly!) configuration
+language.
+
+<p><b>Example:</b></p>
+<p>This example sets up a tcp server with three listeners. Each of these
+three listener is bound to a specific ruleset. As a performance optimization,
+the rulesets all receive their own private queue. The result is that received messages
+can be independently processed. With only a single main message queue, we would have
+some lock contention between the messages. This does not happen here. Note that in this
+example, we use different processing. Of course, all messages could also have been
+processed in the same way ($IncludeConfig may be useful in that case!).
+</p>
+<textarea rows="30" cols="60">$ModLoad imtcp
+# at first, this is a copy of the unmodified rsyslog.conf
+#define rulesets first
+$RuleSet remote10514
+$RulesetCreateMainQueue on # create ruleset-specific queue
+*.* /var/log/remote10514
+
+$RuleSet remote10515
+$RulesetCreateMainQueue on # create ruleset-specific queue
+*.* /var/log/remote10515
+
+$RuleSet remote10516
+$RulesetCreateMainQueue on # create ruleset-specific queue
+mail.* /var/log/mail10516
+&amp; ~
+# note that the discard-action will prevent this messag from
+# being written to the remote10516 file - as usual...
+*.* /var/log/remote10516
+
+# and now define listners bound to the relevant ruleset
+$InputTCPServerBindRuleset remote10514
+$InputTCPServerRun 10514
+
+$InputTCPServerBindRuleset remote10515
+$InputTCPServerRun 10515
+
+$InputTCPServerBindRuleset remote10516
+$InputTCPServerRun 10516
+</textarea>
+<p>Note the positions of the directives. With the current config language,
+position is very important. This is ugly, but unfortunately the way it currently
+works.
+</p>
+
+<p>[<a href="rsyslog_conf.html">rsyslog.conf overview</a>] [<a href="manual.html">manual
+index</a>] [<a href="http://www.rsyslog.com/">rsyslog site</a>]</p>
+<p><font size="2">This documentation is part of the
+<a href="http://www.rsyslog.com/">rsyslog</a> project.<br>
+Copyright &copy; 2009 by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a> and
+<a href="http://www.adiscon.com/">Adiscon</a>. Released under the GNU GPL version 2 or higher.</font></p>
+</body>
+</html>
diff --git a/doc/rsconf1_rulesetparser.html b/doc/rsconf1_rulesetparser.html
new file mode 100644
index 00000000..ef29c2a8
--- /dev/null
+++ b/doc/rsconf1_rulesetparser.html
@@ -0,0 +1,123 @@
+<html>
+<head>
+<title>RulesetParser - rsyslog.conf file</title>
+</head>
+<body>
+<a href="rsyslog_conf_global.html">rsyslog.conf configuration directive</a>
+
+<h2>$RulesetParser</h2>
+<p><b>Type:</b> ruleset-specific configuration directive</p>
+<p><b>Parameter Values:</b> string</p>
+<p><b>Available since:</b> 5.3.4+</p>
+<p><b>Default:</b> rsyslog.rfc5424 followed by rsyslog.rfc5425</p>
+<p><b>Description:</b></p>
+<p>
+This directive permits to specify which
+<a href="messageparser.html">message parsers</a> should be used for the ruleset
+in question. It no ruleset is explicitely specified, the default ruleset is used. Message
+parsers are contained in (loadable) parser modules with the most common cases
+(RFC3164 and RFC5424) being build-in into rsyslogd.
+<p>When this directive is specified the first time for a ruleset, it will not only add the
+parser to the ruleset's parser chain, it will also wipe out the default parser chain.
+So if you need to have
+them in addition to the custom parser, you need to specify those as well.
+<p>Order of directives is important. Parsers are tried one after another, in the order
+they are specified inside the config. As soon as a parser is able to parse the message,
+it will do so and no other parsers will be executed. If no matching parser can be found,
+the message will be discarded and a warning message be issued (but only for the first
+1,000 instances of this problem, to prevent message generation loops).
+<p>Note that the rfc3164 parser will <b>always</b> be able to parse a message - it may
+just not be the format that you like. This has two important implications: 1) always place
+that parser at the END of the parser list, or the other parsers after it will never
+be tried and 2) if you would like to make sure no message is lost, placing the rfc3164
+parser at the end of the parser list ensures that.
+<p>Multiple parser modules are very useful if you have various devices that emit
+messages that are malformed in various ways. The route to take then is
+<ul>
+<li>make sure you find a custom parser for that device; if there is no one, you
+may consider writing one yourself (it is not that hard) or getting one written
+as part of
+<a href="http://www.rsyslog.com/professional-services">Adiscon's professional services
+for rsyslog</a>.
+<li>load your custom parsers via $ModLoad
+<li>create a ruleset for each malformed format; assign the custom parser to it
+<li>create a specific listening port for all devices that emit the same
+malformed format
+<li>bind the listener to the ruleset with the required parser
+</ul>
+<p>Note that it may be cumbersome to add all rules to all rulesets. To avoid this,
+you can either use $Include or <a href="omruleset.html">omruleset</a>
+(what probably provides the best solution).
+<p>More information about rulesets in general can be found in
+<a href="multi_ruleset.html">multi-ruleset support in rsyslog</a>.
+<p><b>Caveats:</b></p>
+<p>currently none known</p>
+
+<p><b>Example:</b></p>
+<p>This example assumes there are two devices emiting malformed messages via UDP.
+We have two custom parsers for them, named &quot;device1.parser&quot; and
+&quot;device2.parser&quot;. In addition to that, we have a number of other
+devices sending wellformed messages, also via UDP.
+<p>The solution is to listen for data from the two devices on two special
+ports (10514 and 10515 in this example), create a ruleset for each and
+assign the custom parsers to them. The rest of the messages are received via
+port 514 using the regular parsers. Processing shall be equal for all messages.
+So we simply forward the malformed messages to the regular queue once they are parsed (keep
+in mind that a message is never again parsed once any parser properly processed it).
+</p>
+<textarea rows="40" cols="80">$ModLoad imudp
+$ModLoad pmdevice1 # load parser "device1.parser" for device 1
+$ModLoad pmdevice2 # load parser "device2.parser" for device 2
+
+# define ruleset for the first device sending malformed data
+$Ruleset maldev1
+$RulesetCreateMainQueue on # create ruleset-specific queue
+$RulesetParser "device1.parser" # note: this deactivates the default parsers
+# forward all messages to default ruleset:
+$ActionOmrulesetRulesetName RSYSLOG_DefaultRuleset
+*.* :omruleset:
+
+# define ruleset for the second device sending malformed data
+$Ruleset maldev2
+$RulesetCreateMainQueue on # create ruleset-specific queue
+$RulesetParser "device2.parser" # note: this deactivates the default parsers
+# forward all messages to default ruleset:
+$ActionOmrulesetRulesetName RSYSLOG_DefaultRuleset
+*.* :omruleset:
+
+# switch back to default ruleset
+$Ruleset RSYSLOG_DefaultRuleset
+*.* /path/to/file
+auth.info @authlogger.example.net
+# whatever else you usually do...
+
+
+# now define the inputs and bind them to the rulesets
+# first the default listener (utilizing the default ruleset)
+$UDPServerRun 514
+
+# now the one with the parser for device type 1:
+$InputUDPServerBindRuleset maldev1
+$UDPServerRun 10514
+
+# and finally the one for device type 2:
+$InputUDPServerBindRuleset maldev2
+$UDPServerRun 10515
+</textarea>
+
+<p>For an example of how multiple parser can be chained (and an actual use case), please see
+the example section on the <a href="pmlastmsg.html">pmlastmsg</a> parser
+module.
+<p>Note the positions of the directives. With the current config language,
+<b>sequence of statements is very important</b>. This is ugly, but unfortunately
+the way it currently works.
+</p>
+
+<p>[<a href="rsyslog_conf.html">rsyslog.conf overview</a>] [<a href="manual.html">manual
+index</a>] [<a href="http://www.rsyslog.com/">rsyslog site</a>]</p>
+<p><font size="2">This documentation is part of the
+<a href="http://www.rsyslog.com/">rsyslog</a> project.<br>
+Copyright &copy; 2009 by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a> and
+<a href="http://www.adiscon.com/">Adiscon</a>. Released under the GNU GPL version 2 or higher.</font></p>
+</body>
+</html>
diff --git a/doc/rsyslog_conf.html b/doc/rsyslog_conf.html
index 6990c6bd..703e7a6e 100644
--- a/doc/rsyslog_conf.html
+++ b/doc/rsyslog_conf.html
@@ -2,13 +2,16 @@
<html><head><title>rsyslog.conf file</title></head>
<body>
<h1>rsyslog.conf configuration file</h1>
-<p><b>This document is currently being enhanced. Please
-pardon its current appearance.</b></p>
<p><b>Rsyslogd is configured via the rsyslog.conf file</b>,
typically found in /etc. By default, rsyslogd reads the file
/etc/rsyslog.conf. This may be changed by a command line option.</p>
<p><a href="http://wiki.rsyslog.com/index.php/Configuration_Samples">
-Configuration file examples can be found in the rsyslog wiki</a>.</p>
+Configuration file examples can be found in the rsyslog wiki</a>. Also
+keep the
+<a href="http://www.rsyslog.com/config-snippets/">rsyslog config snippets</a>
+on your mind. These are ready-to-use
+real building blocks for rsyslog configuration.
+</p>
<p>There is also one sample file provided together with the
documentation set. If you do not like to read, be sure to have at least
a quick look at
@@ -74,7 +77,7 @@ such features is available in rsyslogd, only.</p>
[<a href="http://www.rsyslog.com/">rsyslog site</a>]</p>
<p><font size="2">This documentation is part of the
<a href="http://www.rsyslog.com/">rsyslog</a> project.<br>
-Copyright &copy; 2008,2009 by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a> and
+Copyright &copy; 2008-2011 by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a> and
<a href="http://www.adiscon.com/">Adiscon</a>. Released under the GNU GPL
version 3 or higher.</font></p>
</body>
diff --git a/doc/rsyslog_conf_global.html b/doc/rsyslog_conf_global.html
index 4939d4c5..b254f366 100644
--- a/doc/rsyslog_conf_global.html
+++ b/doc/rsyslog_conf_global.html
@@ -17,6 +17,8 @@ appear as implementation progresses.
many parameter settings modify queue parameters. If in doubt, use the
default, it is usually well-chosen and applicable in most cases.</p>
<ul>
+<li><a href="rsconf1_abortonuncleanconfig.html">$AbortOnUncleanConfig</a> - abort startup if there is
+any issue with the config file</li>
<li><a href="rsconf1_actionexeconlywhenpreviousissuspended.html">$ActionExecOnlyWhenPreviousIsSuspended</a></li>
<li>$ActionName &lt;a_single_word&gt; - used primarily for documentation, e.g. when
generating a configuration graph. Available sice 4.3.1.
@@ -58,6 +60,7 @@ default template for UDP and plain TCP forwarding action</li>
<li>$ActionGSSForwardDefaultTemplate [templateName] - sets a
new default template for GSS-API forwarding action</li>
<li>$ActionQueueCheckpointInterval &lt;number&gt;</li>
+<li>$ActionQueueDequeueBatchSize &lt;number&gt; [default 16]</li>
<li>$ActionQueueDequeueSlowdown &lt;number&gt; [number
is timeout in <i> micro</i>seconds (1000000us is 1sec!),
default 0 (no delay). Simple rate-limiting!]</li>
@@ -90,7 +93,7 @@ default 60000 (1 minute)]</li>
<li>$ActionQueueWorkerThreadMinumumMessages &lt;number&gt;, default 100</li>
<li><a href="rsconf1_actionresumeinterval.html">$ActionResumeInterval</a></li>
<li>$ActionResumeRetryCount &lt;number&gt; [default 0, -1 means eternal]</li>
-<li>$ActionSendResendLastMsgOnReconn &lt;[on/<b>off</b>]&gt; specifies if the last message is to be resend when a connecition broken and has been reconnedcted. May increase reliability, but comes at the risk of message duplication.
+<li>$ActionSendResendLastMsgOnReconnect &lt;[on/<b>off</b>]&gt; specifies if the last message is to be resend when a connecition breaks and has been reconnected. May increase reliability, but comes at the risk of message duplication.
<li>$ActionSendStreamDriver &lt;driver basename&gt; just like $DefaultNetstreamDriver, but for the specific action</li>
<li>$ActionSendStreamDriverMode &lt;mode&gt;, default 0, mode to use with the stream driver (driver-specific)</li>
<li>$ActionSendStreamDriverAuthMode &lt;mode&gt;,&nbsp; authentication mode to use with the stream driver. Note that this directive requires TLS
@@ -109,6 +112,14 @@ that it should be not be much more often than once per second).</li>
<li><b>$ActionSendUDPRebindInterval</b> nbr</a>- [available since 4.3.2] - instructs the UDP send
action to rebind the send socket every nbr of messages sent. Zero, the default, means
that no rebind is done. This directive is useful for use with load-balancers.</li>
+<li><b>$ActionWriteAllMarkMessages</b> [on/<b>off</b>]- [available since 5.1.5] - normally, mark messages
+are written to actions only if the action was not recently executed (by default, recently means within the
+past 20 minutes). If this setting is switched to &quot;on&quot;, mark messages are always sent to actions,
+no matter how recently they have been executed. In this mode, mark messages can be used as a kind of
+heartbeat. Note that this option auto-resets to &quot;off&quot;, so if you intend to use it with multiple
+actions, it must be specified in front off <b>all</b> selector lines that should provide this
+functionality.
+</li>
<li><a href="rsconf1_allowedsender.html">$AllowedSender</a></li>
<li><a href="rsconf1_controlcharacterescapeprefix.html">$ControlCharacterEscapePrefix</a></li>
<li><a href="rsconf1_debugprintcfsyslinehandlerlist.html">$DebugPrintCFSyslineHandlerList</a></li>
@@ -129,6 +140,7 @@ our paper on <a href="multi_ruleset.html">using multiple rule sets in rsyslog</a
<li><a href="rsconf1_dropmsgswithmaliciousdnsptrrecords.html">$DropMsgsWithMaliciousDnsPTRRecords</a></li>
<li><a href="rsconf1_droptrailinglfonreception.html">$DropTrailingLFOnReception</a></li>
<li><a href="rsconf1_dynafilecachesize.html">$DynaFileCacheSize</a></li>
+<li><a href="rsconf1_escape8bitcharsonreceive.html">$Escape8BitCharactersOnReceive</a></li>
<li><a href="rsconf1_escapecontrolcharactersonreceive.html">$EscapeControlCharactersOnReceive</a></li>
<li><b>$EscapeControlCharactersOnReceive</b> [<b>on</b>|off] - escape USASCII HT character</li>
<li>$SpaceLFOnReceive [on/<b>off</b>] - instructs rsyslogd to replace LF with spaces during message reception (sysklogd compatibility aid)</li>
@@ -161,6 +173,8 @@ a bug (but one may argue if the design should be changed ;)). Available since
rsyslog emits message on startup and shutdown as well as when it is HUPed.
This information might be needed by some log analyzers. If set to off, no such
status messages are logged, what may be useful for other scenarios.
+[available since 4.7.0 and 5.3.0]
+<li><b>$MainMsgQueueDequeueBatchSize</b> &lt;number&gt; [default 32]</li>
<li>$MainMsgQueueDequeueSlowdown &lt;number&gt; [number
is timeout in <i> micro</i>seconds (1000000us is 1sec!),
default 0 (no delay). Simple rate-limiting!]</li>
@@ -252,10 +266,14 @@ large enough for the whole message. (Introduced with 4.1.5). Once set, it affect
<li><a href="rsconf1_resetconfigvariables.html">$ResetConfigVariables</a></li>
<li><b>$Ruleset</b> <i>name</i> - starts a new ruleset or switches back to one already defined.
All following actions belong to that new rule set.
-the <i>name</i> does not yet exist, it is created. To swith back to rsyslog's
+the <i>name</i> does not yet exist, it is created. To switch back to rsyslog's
default ruleset, specify &quot;RSYSLOG_DefaultRuleset&quot;) as the name.
All following actions belong to that new rule set. It is advised to also read
our paper on <a href="multi_ruleset.html">using multiple rule sets in rsyslog</a>.</li>
+<li><b><a href="rsconf1_rulesetcreatemainqueue.html">$RulesetCreateMainQueue</a></b> on - creates
+a ruleset-specific main queue.
+<li><b><a href="rsconf1_rulesetparser.html">$RulesetParser</a></b> - enables to set
+a specific (list of) message parsers to be used with the ruleset.
<li><b>$OptimizeForUniprocessor</b> [on/<b>off</b>] - turns on optimizatons which lead to better
performance on uniprocessors. If you run on multicore-machiens, turning this off lessens CPU load. The
default may change as uniprocessor systems become less common. [available since 4.1.0]</li>
diff --git a/doc/rsyslog_conf_modules.html b/doc/rsyslog_conf_modules.html
index d9a59d8c..9fa35ccd 100644
--- a/doc/rsyslog_conf_modules.html
+++ b/doc/rsyslog_conf_modules.html
@@ -19,8 +19,17 @@ modules solve your need, you may consider writing one or have one written
for you by
<a href="http://www.rsyslog.com/professional-services">Adiscon's professional services for rsyslog</a>
</b>(this often is a very cost-effective and efficient way of getting what you need).
+<p>There exist different classes of loadable modules:
+<ul>
+<li><a href="rsyslog_conf_modules.html#im">Input Modules</a>
+<li><a href="rsyslog_conf_modules.html#om">Output Modules</a>
+<li><a href="rsyslog_conf_modules.html#pm">Parser Modules</a>
+<li><a href="rsyslog_conf_modules.html#mm">Message Modification Modules</a>
+<li><a href="rsyslog_conf_modules.html#sm">String Generator Modules</a>
+<li><a href="rsyslog_conf_modules.html#lm">Library Modules</a>
+</ul>
-<h2>Input Modules</h2>
+<a name"im"></a><h2>Input Modules</h2>
<p>Input modules are used to gather messages from various sources. They interface
to message generators.
<ul>
@@ -35,15 +44,17 @@ to message generators.
<li><a href="imuxsock.html">imuxsock</a> - unix sockets, including the system log socket</li>
<li><a href="imsolaris.html">imsolaris</a> - input for the Sun Solaris system log source</li>
<li><a href="im3195.html">im3195</a> - accepts syslog messages via RFC 3195</li>
+<li><a href="impstats.html">impstats</a> - provides periodic statistics of rsyslog internal counters</li>
</ul>
-<h2>Output Modules</h2>
+<a name"om"></a><h2>Output Modules</h2>
<p>Output modules process messages. With them, message formats can be transformed
and messages be transmitted to various different targets.
<ul>
<li><a href="omsnmp.html">omsnmp</a> - SNMP trap output module</li>
<li><a href="omstdout.html">omtdout</a> - stdout output module (mainly a test tool)</li>
<li><a href="omrelp.html">omrelp</a> - RELP output module</li>
+<li><a href="omruleset.html">omruleset</a> - forward message to another ruleset</li>
<li>omgssapi - output module for GSS-enabled syslog</li>
<li><a href="ommysql.html">ommysql</a> - output module for MySQL</li>
<li>ompgsql - output module for PostgreSQL</li>
@@ -54,21 +65,115 @@ SQLLite, Ingres, Oracle, mSQL)</li>
permits rsyslog to alert folks by mail if something important happens</li>
<li><a href="omprog.html">omprog</a> - permits sending messages to a program for custom processing</li>
<li><a href="omoracle.html">omoracle</a> - output module for Oracle (native OCI interface)</li>
+<li><a href="omudpspoof.html">omudpspoof</a> - output module sending UDP syslog messages with a spoofed address</li>
<li><a href="omuxsock.html">omuxsock</a> - output module Unix domain sockets</li>
+<li><a href="omhdfs.html">omhdfs</a> - output module for Hadoop's HDFS file system</li>
+</ul>
+
+<a name="pm"></a><h2>Parser Modules</h2>
+<p>Parser modules are used to parse message content, once the message has been
+received. They can be used to process custom message formats or invalidly formatted
+messages. For details, please see the <a href="messageparser.html">rsyslog
+message parser documentation</a>.
+<p>The current modules are currently provided as part of rsyslog:
+<ul>
+<li>pmrfc5424[builtin] - rsyslog.rfc5424 -
+parses RFC5424-formatted messages (the new syslog standard)
+<li>pmrfc3164[builtin] - rsyslog.rfc3164 -
+the traditional/legacy syslog parser
+<li>pmrfc3164sd - rsyslog.rfc3164sd -
+a contributed module supporting RFC5424 structured data inside
+RFC3164 messages (not supported by the rsyslog team)
+<li><a href="pmlastmsg.html">pmlastmsg</a> - rsyslog.lastmsg -
+a parser module that handles the typically malformed "last messages
+repated n times" messages emitted by some syslogds.
</ul>
-<h2>Library Modules</h2>
+<a name="mm"></a><h2>Message Modification Modules</h2>
+<p>Message modification modules are used to change the content of messages being processed.
+They can be implemented using either the output module or the parser module interface.
+From the rsyslog core's point of view, they actually are output or parser modules, it is their
+implementation that makes them special.
+<p>Currently, there do not exist any such modules, but could be written with
+the methods the engine provides. They could be used, for example, to:
+<ul>
+<li>anonymize message content
+<li>add dynamically computed content to message (fields)
+</ul>
+<p>Message modification modules are usually written for one specific task and thus
+usually are not generic enough to be reused. However, existing module's code is
+probably an excellent starting base for writing a new module. Currently, the following
+modules existin inside the source tree
+<ul>
+<li><a href="mmsnmptrapd.html">mmsnmptrapd</a> - uses information provided by snmptrapd inside
+the tag to correct the original sender system and priority of messages. Implemented via
+the output module interface.
+</ul>
+
+<a name="lm"></a><h2>String Generator Modules</h2>
+<p>String generator modules are used, as the name implies, to generate strings based
+on the message content. They are currently tightly coupled with the template system.
+Their primary use is to speed up template processing by providing a native C
+interface to template generation. These modules exist since 5.5.6. To get an idea
+of the potential speedup, the default file format, when generated by a string generator,
+provides a roughly 5% speedup. For more complex strings, especially those that include
+multiple regular expressions, the speedup may be considerably higher.
+<p>String generator modules are written to a quite simple interface. However, a word of
+caution is due: they access the rsyslog message object via a low-level interface.
+That interface is not guaranteed yet to stay stable. So it may be necessary to
+modify string generator modules if the interface changes. Obviously, we will not do that
+without good reason, but it may happen.
+<p>Rsyslog comes with a set of core, build-in string generators, which are used
+to provide those default templates that we consider to be time-critical:
+<ul>
+<li>smfile - the default rsyslog file format
+<li>smfwd - the default rsyslog (network) forwarding format
+<li>smtradfile - the traditional syslog file format
+<li>smfwd - the traditional syslog (network) forwarding format
+</ul>
+<p>Note that when you replace these defaults be some custom strings, you will
+loose some performance (around 5%). For typical systems, this is not really relevant.
+But for a high-performance systems, it may be very relevant. To solve that issue, create
+a new string generator module for your custom format, starting out from one of the
+default generators provided. If you can not do this yourself, you may want to
+contact <a href="mailto:info%40adiscon.com">Adiscon</a> as we offer custom development
+of string generators at a very low price.
+<p>Note that string generator modules can be dynamically loaded. However, the default
+ones provided are so important that they are build right into the executable. But this
+does not need to be done that way (and it is straightforward to do it dynamic).
+
+
+<a name="lm"></a><h2>Library Modules</h2>
<p>Library modules provide dynamically loadable functionality for parts of rsyslog,
most often for other loadable modules. They can not be user-configured and are loaded
automatically by some components. They are just mentioned so that error messages that
point to library moduls can be understood. No module list is provided.
+<h2>Where are the modules integrated into the Message Flow?</h2>
+<p>Depending on their module type, modules may access and/or modify messages at
+various stages during rsyslog's processing. Note that only the "core type" (e.g. input,
+output) but not any type derived from it (message modification module) specifies when
+a module is called.
+<p>The simplified workflow is as follows:
+<p align="center">
+<img src="module_workflow.png" alt"rsyslog: loadable modules and message flow">
+<p>As can be seen, messages are received by input modules, then passed to one or many
+parser modules, which generate the in-memory representation of the message and may
+also modify the message itself. The, the internal representation is passed to
+output modules, which may output a message and (with the interfaces newly introduced
+in v5) may also modify messageo object content.
+<p>String generator modules are not included inside this picture, because they are
+not a required part of the workflow. If used, they operate "in front of" the
+output modules, because they are called during template generation.
+<p>Note that the actual flow is much more complex and depends a lot on queue and
+filter settings. This graphic above is a high-level message flow diagram.
+
<p>[<a href="manual.html">manual index</a>]
[<a href="rsyslog_conf.html">rsyslog.conf</a>]
[<a href="http://www.rsyslog.com/">rsyslog site</a>]</p>
<p><font size="2">This documentation is part of the
<a href="http://www.rsyslog.com/">rsyslog</a> project.<br>
-Copyright &copy; 2008, 2009 by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a> and
+Copyright &copy; 2008-2010 by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a> and
<a href="http://www.adiscon.com/">Adiscon</a>. Released under the GNU GPL
version 3 or higher.</font></p>
</body>
diff --git a/doc/rsyslog_conf_templates.html b/doc/rsyslog_conf_templates.html
index 06cc805e..bd0b3253 100644
--- a/doc/rsyslog_conf_templates.html
+++ b/doc/rsyslog_conf_templates.html
@@ -16,6 +16,35 @@ compatible with the stock syslogd formats are hardcoded into rsyslogd.
So if no template is specified, we use one of these hardcoded
templates. Search for "template_" in syslogd.c and you will find the
hardcoded ones.</p>
+<p>Starting with 5.5.6, there are actually two differnt types of template:
+<ul>
+<li>string based
+<li>string-generator module based
+</ul>
+<p><a href="rsyslog_conf_modules.html#sm">String-generator module</a> based templates
+have been introduced in 5.5.6. They permit a string generator, actually a C "program",
+the generate a format. Obviously, it is more work required to code such a generator,
+but the reward is speed improvement. If you do not need the ultimate throughput, you
+can forget about string generators (so most people never need to know what they are).
+You may just be interested in learning that for the most important default formats,
+rsyslog already contains highly optimized string generators and these are called
+without any need to configure anything. But if you have written (or purchased) a
+string generator module, you need to know how to call it. Each such module has a name,
+which you need to know (look it up in the module doc or ask the developer). Let's assume
+that "mystrgen" is the module name. Then you can define a template for that strgen
+in the following way:
+<blockquote><code>$template MyTemplateName,=mystrgen</code></blockquote>
+(Of course, you must have first loaded the module via $ModLoad).
+<p>The important part is the equal sign: it tells the rsyslog config parser that
+no string follows but a strgen module name.
+<p>There are no additional parameters but the module name supported. This is because
+there is no way to customize anything inside such a "template" other than by
+modifying the code of the string generator.
+
+<p>So for most use cases, string-generator module based templates are <b>not</b>
+the route to take. Usually, us use <b>string based templates</b> instead.
+This is what the rest of the documentation now talks about.
+
<p>A template consists of a template directive, a name, the
actual template text and optional options. A sample is:</p>
<blockquote><code>$template MyTemplateName,"\7Text
@@ -140,6 +169,23 @@ out, but this may happen.</li>
is meant to be written to a log file. Do <b>not</b> use for production or remote
forwarding.</li>
</ul>
+<h3>String-based Template Samples</h3>
+<p>This section provides some sample of what the default formats would
+look as a text-based template. Hopefully, their description is self-explanatory.
+Note that each $Template statement is on a <b>single</b> line, but probably broken
+accross several lines for display purposes by your browsers. Lines are separated by
+empty lines.
+<p><code>
+$template FileFormat,"%TIMESTAMP:::date-rfc3339% %HOSTNAME% %syslogtag%%msg:::sp-if-no-1st-sp%%msg:::drop-last-lf%\n"
+<br><br>
+$template TraditionalFileFormat,"%TIMESTAMP% %HOSTNAME% %syslogtag%%msg:::sp-if-no-1st-sp%%msg:::drop-last-lf%\n"
+<br><br>
+$template ForwardFormat,"<%PRI%>%TIMESTAMP:::date-rfc3339% %HOSTNAME% %syslogtag:1:32%%msg:::sp-if-no-1st-sp%%msg%"
+<br><br>
+$template TraditionalForwardFormat,"<%PRI%>%TIMESTAMP% %HOSTNAME% %syslogtag:1:32%%msg:::sp-if-no-1st-sp%%msg%"
+<br><br>
+$template StdSQLFormat,"insert into SystemEvents (Message, Facility, FromHost, Priority, DeviceReportedTime, ReceivedAt, InfoUnitID, SysLogTag) values ('%msg%', %syslogfacility%, '%HOSTNAME%', %syslogpriority%, '%timereported:::date-mysql%', '%timegenerated:::date-mysql%', %iut%, '%syslogtag%')",SQL
+</code></p>
<p>[<a href="manual.html">manual index</a>]
[<a href="rsyslog_conf.html">rsyslog.conf</a>]
diff --git a/doc/rsyslog_ng_comparison.html b/doc/rsyslog_ng_comparison.html
index 8e121a8d..7d12a4a7 100644
--- a/doc/rsyslog_ng_comparison.html
+++ b/doc/rsyslog_ng_comparison.html
@@ -5,6 +5,10 @@
<h1>rsyslog vs. syslog-ng</h1>
<p><small><i>Written by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a>
(2008-05-06)</i></small></p>
+<p><i>Warning</i>: this comparison is a little outdated, take it with a grain
+of salt and be sure to check the links at the bottom (both syslog-ng as well as
+rsyslog features are missing, but our priority is on creating great software not
+continously updating this comparison ;)).
<p>We have often been asked about a comparison sheet between
rsyslog and syslog-ng. Unfortunately, I do not know much about
syslog-ng, I did not even use it once. Also, there seems to be no
@@ -81,9 +85,10 @@ optional input</td>
</tr>
<tr>
<td valign="top">Windows Event Log</td>
-<td valign="top">via <a href="http://www.eventreporter.com">EventReporter</a>
+<td valign="top">via a Windows event logging software such as
+<a href="http://www.eventreporter.com">EventReporter</a>
or <a href="http://www.mwagent.com">MonitorWare Agent</a>
-(both commercial software)</td>
+(both commercial software, both fund rsyslog development)</td>
<td valign="top">via separate Windows agent, paid
edition only</td>
</tr>
diff --git a/doc/rsyslog_pgsql.html b/doc/rsyslog_pgsql.html
index dcb9dc3a..21516ec8 100644
--- a/doc/rsyslog_pgsql.html
+++ b/doc/rsyslog_pgsql.html
@@ -30,7 +30,7 @@
-->
</STYLE>
</HEAD>
-<BODY LANG="de-DE" DIR="LTR">
+<BODY>
<H1 CLASS="western"><SPAN LANG="en-US">Writing </SPAN>syslog messages
to MySQL, PostgreSQL or any other supported Database</H1>
<P CLASS="western"><FONT SIZE=2><I>Written by </I></FONT><A HREF="http://www.adiscon.com/en/people/rainer-gerhards.php"><FONT SIZE=2><I>Rainer
@@ -333,4 +333,4 @@ Gerhards</A>, Marc Schiffbauer and <A HREF="http://www.adiscon.com/en/">Adiscon<
<P CLASS="western"><BR><BR>
</P>
</BODY>
-</HTML> \ No newline at end of file
+</HTML>
diff --git a/doc/rsyslog_queue_pointers.jpeg b/doc/rsyslog_queue_pointers.jpeg
new file mode 100644
index 00000000..809dd446
--- /dev/null
+++ b/doc/rsyslog_queue_pointers.jpeg
Binary files differ
diff --git a/doc/rsyslog_queue_pointers2.jpeg b/doc/rsyslog_queue_pointers2.jpeg
new file mode 100644
index 00000000..2ad60113
--- /dev/null
+++ b/doc/rsyslog_queue_pointers2.jpeg
Binary files differ
diff --git a/doc/rsyslog_secure_tls.html b/doc/rsyslog_secure_tls.html
index be2811f4..b15e5a4e 100644
--- a/doc/rsyslog_secure_tls.html
+++ b/doc/rsyslog_secure_tls.html
@@ -51,7 +51,7 @@ google_ad_height = 125;
src="http://pagead2.googlesyndication.com/pagead/show_ads.js">
</script>
</span>
-I private keys have become known to third parties, the system does not provide
+If private keys have become known to third parties, the system does not provide
any security at all. Also, our solution bases on X.509 certificates and a (very
limited) chain of trust. We have one instance (the CA) that issues all machine
certificates. The machine certificate indentifies a particular machine. hile in
diff --git a/doc/rsyslog_tls.html b/doc/rsyslog_tls.html
index bb312c77..286660d2 100644
--- a/doc/rsyslog_tls.html
+++ b/doc/rsyslog_tls.html
@@ -162,25 +162,11 @@ similar "smart" command on the client. It should show up in the
respective server log file. If you dig out your sniffer, you should see
that the traffic on the wire is actually protected.</p>
<h3>Limitations</h3>
-<p>The current implementation has a number of limitations. These
-are
-being worked on. Most importantly, neither the client nor the server
-are authenticated. So while the message transfer is encrypted, you can
-not be sure which peer you are talking to. Please note that this is a
-limitation found in most real-world SSL syslog systems. Of course, that
-is not an excuse for not yet providing this feature - but it tells you
-that it is acceptable and can be worked around by proper firewalling,
-ACLs and other organizational measures. Mutual authentication will be
-added shortly to rsyslog.</p>
-<p>Secondly, the plain tcp syslog listener
-can currently listen to a single port, in a single mode. So if you use
-a TLS-based listener, you can not run unencrypted syslog on the same
-instance at the same time. A work-around is to run a second rsyslogd
-instance. This limitation, too, is scheduled to be removed soon.</p>
<p>The
RELP transport can currently not be protected by TLS. A work-around is
to use stunnel. TLS support for RELP will be added once plain TCP
-syslog has sufficiently matured.</p>
+syslog has sufficiently matured and there either is some time left to do this
+or we find a sponsor ;).</p>
<h2>Certificates</h2>
<p>In order to be really secure, certificates are needed. This is
a short summary on how to generate the necessary certificates with
diff --git a/doc/src/classes.dia b/doc/src/classes.dia
index 70e91566..8cfcbd0c 100644
--- a/doc/src/classes.dia
+++ b/doc/src/classes.dia
Binary files differ
diff --git a/doc/src/module_workflow.dia b/doc/src/module_workflow.dia
new file mode 100644
index 00000000..178571f4
--- /dev/null
+++ b/doc/src/module_workflow.dia
Binary files differ
diff --git a/doc/src/rfc5424layers.dia b/doc/src/rfc5424layers.dia
new file mode 100644
index 00000000..300b7796
--- /dev/null
+++ b/doc/src/rfc5424layers.dia
Binary files differ
diff --git a/doc/src/rsyslog_queue_pointers.dia b/doc/src/rsyslog_queue_pointers.dia
new file mode 100644
index 00000000..2ad4cacb
--- /dev/null
+++ b/doc/src/rsyslog_queue_pointers.dia
Binary files differ
diff --git a/doc/src/rsyslog_queue_pointers2.dia b/doc/src/rsyslog_queue_pointers2.dia
new file mode 100644
index 00000000..6a35c664
--- /dev/null
+++ b/doc/src/rsyslog_queue_pointers2.dia
Binary files differ
diff --git a/doc/src/tls.dia b/doc/src/tls.dia
index 77e5d185..d7c9811d 100644
--- a/doc/src/tls.dia
+++ b/doc/src/tls.dia
Binary files differ
diff --git a/doc/status.html b/doc/status.html
deleted file mode 100644
index 4e8f1a5f..00000000
--- a/doc/status.html
+++ /dev/null
@@ -1,54 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-<html><head><title>rsyslog status page</title></head>
-<body>
-<h2>rsyslog status page</h2>
-<p>This page reflects the status as of 2009-05-25.</p>
-<h2>Current Releases</h2>
-
-<p><b>development:</b> 4.3.1 [2009-05-25] -
-<a href="http://www.rsyslog.com/Article372.phtml">change log</a> -
-<a href="http://www.rsyslog.com/Downloads-req-viewdownloaddetails-lid-159.phtml">download</a>
-
-<br><b>beta:</b> 3.21.11 [2009-04-03] -
-<a href="http://www.rsyslog.com/Article358.phtml">change log</a> -
-<a href="http://www.rsyslog.com/Downloads-req-viewdownloaddetails-lid-152.phtml">download</a></p>
-
-<p><b>v3 stable:</b> 3.22.0 [2009-04-21] - <a href="http://www.rsyslog.com/Article368.phtml">change log</a> -
-<a href="http://www.rsyslog.com/Downloads-req-viewdownloaddetails-lid-157.phtml">download</a>
-
-<br><b>v2 stable:</b> 2.0.7 [2009-04-14] - <a href="http://www.rsyslog.com/Article362.phtml">change log</a> -
-<a href="http://www.rsyslog.com/Downloads-req-viewdownloaddetails-lid-154.phtml">download</a>
-<br>v0 and v1 are deprecated and no longer supported. If you absolutely do not like to
-upgrade, you may consider purchasing a
-<a href="professional_support.html">commercial rsyslog support package</a>. Just let us point
-out that it is really not a good idea to still run a v0 version.
-
-<p><a href="v3compatibility.html">If you updgrade from version 2, be sure to read the rsyslog v3
-compatibility document.</a> There are no additional compatibility concerns at this time for
-upgrading from v3 to v4. If some occur, we will post an additional compatiblity document.</p>
-<p>(<a href="version_naming.html">How are versions named?</a>)</p>
-
-<h2>Platforms</h2>
-<p>Thankfully, a number of folks have begin to build packages and
-help port rsyslog to other platforms. As such,
-<a href="http://wiki.rsyslog.com/index.php/Platforms">the
-platform list is now maintained inside the rsyslog wiki</a>.
-Platform maintainers perhaps have posted extra information there. If
-you do platform-specific work, feel free to add information to the wiki.</p>
-<p>Rsyslog is the default syslogd in Fedora 8 and above.</p>
-<h2>Additional information</h2>
-<p><b>Currently supported features are listed on the <a href="features.html">rsyslog features page</a>.</b></p>
-<ul>
-<li>The rsyslog home page is <a href="http://www.rsyslog.com">www.rsyslog.com</a>.</li>
-<li>Mailing list info can be found at <a href="http://lists.adiscon.net/mailman/listinfo/rsyslog">http://lists.adiscon.com/rsyslog</a>.</li>
-<li>The change log can be found at <a href="http://www.rsyslog.com/Topic4.phtml">
-http://www.rsyslog.com/Topic4.phtml</a>. </li>
-<li>Online documentation is available at <a href="http://www.rsyslog.com/doc">http://www.rsyslog.com/doc</a>.</li>
-<li>You may also find <a href="http://rgerhards.blogspot.com/">Rainer's blog</a>
-an interesting read.</li>
-</ul>
-<p>The project was initiated in 2003 and seriouosly begun in 2004 by
-<a href="http://www.gerhards.net/rainer">Rainer Gerhards</a>
-and is currently being maintained by him. See the <a href="history.html">history page</a> for more
-background information.</p>
-</body></html>
diff --git a/doc/syslog_parsing.html b/doc/syslog_parsing.html
index 57da6657..1ccec6f1 100644
--- a/doc/syslog_parsing.html
+++ b/doc/syslog_parsing.html
@@ -176,6 +176,19 @@ $template, MalfromedMsgFormater,"%timegenerated% %fromhost% %rawmsg:::drop-last-
<p>This will make your log much nicer, but not look perfect. Experiment a bit
with the available properties and replacer extraction options to fine-tune it
to your needs.
+<h2>The Ultimate Solution...</h2>
+<p>Is available with rsyslog 5.3.4 and above. Here, we can define so-called custom
+parsers. These are plugin modules, written in C and adapted to a specific message format
+need. The big plus of custom parsers is that they offer excellent performance and unlimited
+possibilities - far better than any work-around could do. Custom parsers can be
+<a href="rsconf1_rulesetparser.html">bound to specific rule sets</a>
+(and thus listening) ports with relative ease. The only con is that they must be written.
+However, if you are lucky, a parser for your device may already exist. If not, you can
+opt to write it yourself, what is not too hard if you know some C. Alternatively,
+Adiscon can program one for you as part of the
+<a href="http://www.rsyslog.com/professional-services">rsyslog professional services offering</a>.
+In any case, you should seriously consider custom parsers as an alternative if you can not
+reconfigure your device to send decent message format.
<h2>Wrap-Up</h2>
<p>Syslog message format is not sufficiently standardized. There exists a weak
"standard" format, which is used by a good number of implementations. However, there
@@ -183,14 +196,15 @@ exist many others, including mainstream vendor implementations, which have a
(sometimes horribly) different format. Rsyslog tries to deal with anomalies but
can not guess right in all instances. If possible, the sender should be configured
to submit well-formed messages. If that is not possible, you can work around these
-issues with rsyslog's property replacer and template system.
+issues with rsyslog's property replacer and template system. Or you can use a suitable
+message parser or write one for your needs.
<p>I hope this is a useful guide. You may also have a look at the
<a href="troubleshoot.html">rsyslog troubleshooting guide</a> for further help and places where
to ask questions.
<p>[<a href="manual.html">manual index</a>] [<a href="http://www.rsyslog.com/">rsyslog site</a>]</p>
<p><font size="2">This documentation is part of the
<a href="http://www.rsyslog.com/">rsyslog</a> project.<br>
-Copyright &copy; 2008 by <a href="http://www.gerhards.net/rainer">Rainer
+Copyright &copy; 2009 by <a href="http://www.gerhards.net/rainer">Rainer
Gerhards</a> and <a href="http://www.adiscon.com/">Adiscon</a>.
Released under the GNU GPL version 3 or higher.</font></p>
</body></html>
diff --git a/doc/tls_cert_server.html b/doc/tls_cert_server.html
index 9c68db5d..9c024bc9 100644
--- a/doc/tls_cert_server.html
+++ b/doc/tls_cert_server.html
@@ -37,6 +37,15 @@ src="http://pagead2.googlesyndication.com/pagead/show_ads.js">
</script>
</span>
<p><center><img src="tls_cert_100.jpg"></center>
+<p><i><font color="red"><b>Important:</b> Keep in mind that the order of configuration directives
+is very important in rsyslog. As such, the samples given below do only work if the given
+order is preserved.</font> Re-ordering the directives can break configurations and has broken them
+in practice. If you intend to re-order them, please be sure that you fully understand how
+the configuration language works and, most importantly, which statements form a block together.
+Please also note that we understand the the current configuration file format is
+ugly. However, there has been more important work in the way of enhancing it. If you would like
+to contribute some time to improve the config file language, please let us know. Any help
+is appreciated (be it doc or coding work!).</i>
<p>Steps to do:
<ul>
<li>make sure you have a functional CA (<a href="tls_cert_ca.html">Setting up the CA</a>)
diff --git a/doc/troubleshoot.html b/doc/troubleshoot.html
index a8855fd4..0f0c7fca 100644
--- a/doc/troubleshoot.html
+++ b/doc/troubleshoot.html
@@ -102,13 +102,63 @@ comes without any guarantees, include no guarantee on confidentiality
[aka "we don't want to be sued for work were are not even paid for ;)].
<b>So if you submit debug logs, do so at your sole risk</b>. By submitting them, you accept
this policy.
+<p><b>Segmentation Faults</b>
+<p>Rsyslog has a very rapid development process, complex capabilities and now gradually gets
+more and more exposure. While we are happy about this, it also has some bad effects: some
+deployment scenarios have probably never been tested and it may be impossible to test
+them for the development team because of resources needed. So while we try to avoid this,
+you may see a serious problem during deployments in demanding, non-standard, environments
+(hopefully not with a stable version, but chances are good you'll run into troubles with
+the development versions).
+<p>Active support from the user base is very important to help us track down those things.
+Most often, serious problems are the result of some memory misadressing. During development,
+we routinely use valgrind, a very well and capable memory debugger. This helps us to create
+pretty clean code. But valgrind can not detect everything, most importantly not code pathes
+that are never executed. So of most use for us is information about aborts and abort locations.
+<p>Unforutnately, faults rooted in adressing errors typically show up only later, so the
+actual abort location is in an unrelated spot. To help track down the original spot,
+<a href="http://www.gnu.org/software/hello/manual/libc/Heap-Consistency-Checking.html">libc
+later than 5.4.23 offers support</a> for finding, and possible temporary relief from it,
+by means of the MALLOC_CHECK_ environment variable. Setting it to 2 is a useful troubleshooting
+aid for us. It will make the program abort as soon as the check routines detect anything
+suspicious (unfortunately, this may still not be the root cause, but hopefully closer to it).
+Setting it to 0 may even make some problems disappear (but it will NOT fix them!).
+With functionality comes cost, and so exporting MALLOC_CHECK_ without need comes at
+a performance penalty. However, we strongly recommend adding this instrumentation to your
+test environment should you see any serious problems. Chances are good it will help us
+interpret a dump better, and thus be able to quicker craft a fix.
+<p>In order to get useful information, we need some backtrace of the abort. First, you need
+to make sure that a core file is created. Under Fedora, for example, that means you need
+to have an "ulimit -c unlimited" in place.
+<p>Now let's assume you got a core file (e.g. in /core.1234). So what to do next? Sending a
+core file to us is most often pointless - we need to have the exact same system configuration in
+order to interpret it correctly. Obviously, chances are extremely slim for this to be. So we would
+appreciate if you could extract the most important information. This is done as follows:
+<ul>
+<li>$gdb /path/to/rsyslogd
+<li>$info thread
+<li>you'll see a number of threads (in the range 0 to n with n being the highest number). For
+ <b>each</b> of them, do the following (let's assume that i is the thread number):
+ <ul>
+ <li>$ thread i (e.g. thread 0, thread 1, ...)
+ <li>$bt
+ </ul>
+<li>then you can quit gdb with "$q"
+</ul>
+<p>Then please send all information that gdb spit out to the development team. It is best to first
+ask on the forum or mailing list on how to do that. The developers will keep in contact with you
+and, I fear, will probably ask for other things as well ;)
+<p>Note that we strive for highest reliability of the engine even in unusual deployment scenarios.
+Unfortunately, this is hard to achieve, especially with limited resources. So we are depending on
+cooperation from users. This is your chance to make a big contribution to the project without the
+need to program or do anything else except get a problem solved ;)
<p>[<a href="manual.html">manual index</a>]
[<a href="http://www.rsyslog.com/">rsyslog site</a>]</p>
<p><font size="2">This documentation is part of the
<a href="http://www.rsyslog.com/">rsyslog</a> project.<br>
-Copyright &copy; 2008 by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a> and
+Copyright &copy; 2008-2010 by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a> and
<a href="http://www.adiscon.com/">Adiscon</a>. Released under the GNU GPL
-version 2 or higher.</font></p>
+version 3 or higher.</font></p>
</body>
</html>
diff --git a/doc/v5compatibility.html b/doc/v5compatibility.html
new file mode 100644
index 00000000..6d60062f
--- /dev/null
+++ b/doc/v5compatibility.html
@@ -0,0 +1,30 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html><head><title>Compatibility notes for rsyslog v5</title>
+</head>
+<body>
+<h1>Compatibility Notes for rsyslog v5</h1>
+<p><small><i>Written by <a href="http://www.gerhards.net/rainer">Rainer Gerhards</a>
+(2009-07-15)</i></small></p>
+<p>The changes introduced in rsyslog v5 are numerous, but not very intrusive.
+This document describes things to keep in mind when moving from v4 to v5. It
+does not list enhancements nor does it talk about compatibility concerns introduced
+by earlier versions (for this, see their respective compatibility documents).
+<h2>HUP processing</h2>
+<p>The $HUPisRestart directive is supported by some early v5 versions, but has been removed
+in 5.1.3 and above. That means that restart-type HUP processing is no longer
+available. This processing was redundant and had a lot a drawbacks.
+For details, please see the
+<a href="v4compatibility.html">rsyslog v4 compatibility notes</a> which elaborate
+on the reasons and the (few) things you may need to change.
+<h2>Queue Worker Thread Shutdown</h2>
+<p>Previous rsyslog versions had the capability to &quot;run&quot; on zero queue worker
+if no work was required. This was done to save a very limited number of resources. However,
+it came at the price of great complexity. In v5, we have decided to let a minium of one
+worker run all the time. The additional resource consumption is probably not noticable at
+all, however, this enabled us to do some important code cleanups, resulting in faster
+and more reliable code (complex code is hard to maintain and error-prone). From the
+regular user's point of view, this change should be barely noticable. I am including the
+note for expert users, who will notice it in rsyslog debug output and other analysis tools.
+So it is no error if each queue in non-direct mode now always runs at least one worker
+thread.
+</body></html>