From eddaca33a81206aab7c6627e5c91d22232445adf Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Wed, 4 Jun 2008 11:11:52 +0200 Subject: enhanced property replacer to support multiple regex matches --- ChangeLog | 1 + doc/property_replacer.html | 11 +++++++++-- runtime/msg.c | 33 ++++++++++++++++++++++++++++----- template.c | 26 ++++++++++++++++++++++---- template.h | 1 + 5 files changed, 61 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index 0c7a4109..df941331 100644 --- a/ChangeLog +++ b/ChangeLog @@ -13,6 +13,7 @@ Version 3.19.5 (rgerhards), 2008-05-30 - implemented in property replacer: if a regular expression does not match, it can now either return "**NO MATCH** (default, as before), a blank property or the full original property text +- enhanced property replacer to support multiple regex matches --------------------------------------------------------------------------- Version 3.19.4 (rgerhards), 2008-05-27 - implemented x509/certvalid gtls auth mode diff --git a/doc/property_replacer.html b/doc/property_replacer.html index b6eaae0f..86d28274 100644 --- a/doc/property_replacer.html +++ b/doc/property_replacer.html @@ -207,13 +207,18 @@ sequence with a regular expression is: "%msg:R:.*Sev:. \(.*\) \[.*--end%"

It is possible to specify some parametes after the "R". These are comma-separated. They are: -

R,<regexp-type>,<submatch>,<nomatch> +

R,<regexp-type>,<submatch>,<nomatch>,<match-number>

regexp-type is either "BRE" for Posix basic regular expressions or "ERE" for extended ones. The string must be given in upper case. The default is "BRE" to be consistent with earlier versions of rsyslog that did not support ERE. The submatch identifies the submatch to be used with the result. A single digit is supported. Match 0 is the full match, -while 1 to 9 are the acutal submatches. +while 1 to 9 are the acutal submatches. The match-number identifies which match to +use, if the expression occurs more than once inside the string. Please note +that the first match is number 0, the second 1 and so on. Up to 10 matches +(up to number 9) are supported. Please note that it would be more +natural to have the match-number in front of submatch, but this would break +backward-compatibility. So the match-number must be specified after "nomatch".

nomatch is either "DFLT", "BLANK" or "FIELD" (all upper case!). It tells what to use if no match is found. With "DFLT", the strig "**NO MATCH**" is used. This was the only supported value up to rsyslog 3.19.5. With "BLANK" @@ -224,6 +229,8 @@ to be useful. submatch from the message string and replaces the expression with the full field if no match is found:

%msg:R,ERE,1,FIELD:for (vlan[0-9]*):--end% +

and this takes the first submatch of the second match of said expression: +

%msg:R,ERE,1,FIELD,1:for (vlan[0-9]*):--end%

Also, extraction can be done based on so-called "fields". To do so, place a "F" into FromChar. A field in its current definition is anything that is delimited by a delimiter diff --git a/runtime/msg.c b/runtime/msg.c index a90416ff..f195d3bd 100644 --- a/runtime/msg.c +++ b/runtime/msg.c @@ -1602,6 +1602,7 @@ char *MsgGetProp(msg_t *pMsg, struct templateEntry *pTpe, char *pBufStart; char *pBuf; int iLen; + short iOffs; #ifdef FEATURE_REGEXP /* Variables necessary for regular expression matching */ @@ -1842,7 +1843,29 @@ char *MsgGetProp(msg_t *pMsg, struct templateEntry *pTpe, dbgprintf("string to match for regex is: %s\n", pRes); if(objUse(regexp, LM_REGEXP_FILENAME) == RS_RET_OK) { - if (0 != regexp.regexec(&pTpe->data.field.re, pRes, nmatch, pmatch, 0)) { + short iTry = 0; + uchar bFound = 0; + iOffs = 0; + /* first see if we find a match, iterating through the series of + * potential matches over the string. + */ + while(!bFound) { + if(regexp.regexec(&pTpe->data.field.re, pRes + iOffs, nmatch, pmatch, 0) == 0) { + if(pmatch[0].rm_so == -1) { + dbgprintf("oops ... start offset of successful regexec is -1\n"); + break; + } + if(iTry == pTpe->data.field.iMatchToUse) { + bFound = 1; + } else { + iOffs += pmatch[0].rm_eo; + ++iTry; + } + } else { + break; + } + } + if(!bFound) { /* we got no match! */ if(pTpe->data.field.nomatchAction != TPL_REGEX_NOMATCH_USE_WHOLE_FIELD) { if (*pbMustBeFreed == 1) { @@ -1857,7 +1880,7 @@ char *MsgGetProp(msg_t *pMsg, struct templateEntry *pTpe, } else { /* Match- but did it match the one we wanted? */ /* we got no match! */ - if(pmatch[pTpe->data.field.iMatchToUse].rm_so == -1) { + if(pmatch[pTpe->data.field.iSubMatchToUse].rm_so == -1) { if(pTpe->data.field.nomatchAction != TPL_REGEX_NOMATCH_USE_WHOLE_FIELD) { if (*pbMustBeFreed == 1) { free(pRes); @@ -1873,8 +1896,8 @@ char *MsgGetProp(msg_t *pMsg, struct templateEntry *pTpe, int iLenBuf; char *pB; - iLenBuf = pmatch[pTpe->data.field.iMatchToUse].rm_eo - - pmatch[pTpe->data.field.iMatchToUse].rm_so; + iLenBuf = pmatch[pTpe->data.field.iSubMatchToUse].rm_eo + - pmatch[pTpe->data.field.iSubMatchToUse].rm_so; pB = (char *) malloc((iLenBuf + 1) * sizeof(char)); if (pB == NULL) { @@ -1885,7 +1908,7 @@ char *MsgGetProp(msg_t *pMsg, struct templateEntry *pTpe, } /* Lets copy the matched substring to the buffer */ - memcpy(pB, pRes + pmatch[pTpe->data.field.iMatchToUse].rm_so, iLenBuf); + memcpy(pB, pRes + iOffs + pmatch[pTpe->data.field.iSubMatchToUse].rm_so, iLenBuf); pB[iLenBuf] = '\0';/* terminate string, did not happen before */ if (*pbMustBeFreed == 1) diff --git a/template.c b/template.c index 2b336ba9..e85be4be 100644 --- a/template.c +++ b/template.c @@ -534,14 +534,14 @@ static int do_Parameter(unsigned char **pp, struct template *pTpl) } /* now check for submatch ID */ - pTpe->data.field.iMatchToUse = 0; + pTpe->data.field.iSubMatchToUse = 0; if(*p == ',') { /* in this case a number follows, which indicates which match * shall be used. This must be a single digit. */ ++p; /* eat ',' */ if(isdigit((int) *p)) { - pTpe->data.field.iMatchToUse = *p - '0'; + pTpe->data.field.iSubMatchToUse = *p - '0'; ++p; /* eat digit */ } } @@ -561,12 +561,30 @@ static int do_Parameter(unsigned char **pp, struct template *pTpl) && (p[5] == ',' || p[5] == ':')) { pTpe->data.field.nomatchAction = TPL_REGEX_NOMATCH_USE_WHOLE_FIELD; p += 5; /* eat indicator sequence */ + } else if(p[0] == ',') { /* empty, use default */ + pTpe->data.field.nomatchAction = TPL_REGEX_NOMATCH_USE_DFLTSTR; + /* do NOT eat indicator sequence, as this was already eaten - the + * comma itself is already part of the next field. + */ } else { errmsg.LogError(NO_ERRCODE, "error: invalid regular expression type, rest of line %s", (char*) p); } } + /* now check for match ID */ + pTpe->data.field.iMatchToUse = 0; + if(*p == ',') { + /* in this case a number follows, which indicates which match + * shall be used. This must be a single digit. + */ + ++p; /* eat ',' */ + if(isdigit((int) *p)) { + pTpe->data.field.iMatchToUse = *p - '0'; + ++p; /* eat digit */ + } + } + if(*p != ':') { /* There is something more than an R , this is invalid ! */ /* Complain on extra characters */ @@ -574,8 +592,8 @@ static int do_Parameter(unsigned char **pp, struct template *pTpl) (char*) *pp); } else { pTpe->data.field.has_regex = 1; - dbgprintf("we have a regexp and use match #%d\n", - pTpe->data.field.iMatchToUse); + dbgprintf("we have a regexp and use match #%d, submatch #%d\n", + pTpe->data.field.iMatchToUse, pTpe->data.field.iSubMatchToUse); } } else { /* now we fall through the "regular" FromPos code */ diff --git a/template.h b/template.h index dff06583..baf33d4e 100644 --- a/template.h +++ b/template.h @@ -69,6 +69,7 @@ struct templateEntry { regex_t re; /* APR: this is the regular expression */ short has_regex; short iMatchToUse;/* which match should be obtained (10 max) */ + short iSubMatchToUse;/* which submatch should be obtained (10 max) */ enum { TPL_REGEX_BRE = 0, /* posix BRE */ TPL_REGEX_ERE = 1 /* posix ERE */ -- cgit