From 9c76723c5b048afe4009f0528a6201741fec234a Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Fri, 27 Apr 2012 09:42:53 +0200 Subject: added capability to specify substrings for field extraction mode --- ChangeLog | 1 + doc/property_replacer.html | 9 ++++ runtime/msg.c | 112 +++++++++++++++++++++++---------------------- template.c | 37 +++++++++++---- template.h | 1 + 5 files changed, 97 insertions(+), 63 deletions(-) diff --git a/ChangeLog b/ChangeLog index 07f746a7..4617864a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,4 @@ +- added capability to specify substrings for field extraction mode --------------------------------------------------------------------------- Version 5.8.11 [V5-stable] 2012-04-?? - bugfix: imptcp input name could not be set diff --git a/doc/property_replacer.html b/doc/property_replacer.html index f0153f2a..5dbdc4c6 100644 --- a/doc/property_replacer.html +++ b/doc/property_replacer.html @@ -274,6 +274,15 @@ fields in the property is requested. The field number must be placed in the "ToChar" parameter. An example where the 3rd field (delimited by TAB) from the msg property is extracted is as follows: "%msg:F:3%". The same example with semicolon as delimiter is "%msg:F,59:3%".

+

The use of fields does not permit to select substrings, what is rather +unfortunate. To solve this issue, starting with 6.3.9, fromPos and toPos +can be specified for strings as well. However, the syntax is quite ugly, but +it was the only way to integrate this functonality into the already-existing +system. To do so, use ",fromPos" and ",toPos" during field extraction. +Let's assume you want to extract the substring from position 5 to 9 in the previous +example. Then, the syntax is as follows: "%msg:F,59,5:3,9%". As you can see, +"F,59" means field-mode, with semicolon delimiter and ",5" means starting +at position 5. Then "3,9" means field 3 and string extraction to position 9.

Please note that the special characters "F" and "R" are case-sensitive. Only upper case works, lower case will return an error. There are no white spaces permitted inside the sequence (that will lead diff --git a/runtime/msg.c b/runtime/msg.c index 7b94228c..6a84cd63 100644 --- a/runtime/msg.c +++ b/runtime/msg.c @@ -2537,7 +2537,7 @@ uchar *MsgGetProp(msg_t *pMsg, struct templateEntry *pTpe, */ iCurrFld = 1; pFld = pRes; - while(*pFld && iCurrFld < pTpe->data.field.iToPos) { + while(*pFld && iCurrFld < pTpe->data.field.iFieldNr) { /* skip fields until the requested field or end of string is found */ while(*pFld && (uchar) *pFld != pTpe->data.field.field_delim) ++pFld; /* skip to field terminator */ @@ -2551,9 +2551,9 @@ uchar *MsgGetProp(msg_t *pMsg, struct templateEntry *pTpe, ++iCurrFld; } } - dbgprintf("field requested %d, field found %d\n", pTpe->data.field.iToPos, (int) iCurrFld); + dbgprintf("field requested %d, field found %d\n", pTpe->data.field.iFieldNr, (int) iCurrFld); - if(iCurrFld == pTpe->data.field.iToPos) { + if(iCurrFld == pTpe->data.field.iFieldNr) { /* field found, now extract it */ /* first of all, we need to find the end */ pFldEnd = pFld; @@ -2588,58 +2588,6 @@ uchar *MsgGetProp(msg_t *pMsg, struct templateEntry *pTpe, *pPropLen = sizeof("**FIELD NOT FOUND**") - 1; return UCHAR_CONSTANT("**FIELD NOT FOUND**"); } - } else if(pTpe->data.field.iFromPos != 0 || pTpe->data.field.iToPos != 0) { - /* we need to obtain a private copy */ - int iFrom, iTo; - uchar *pSb; - iFrom = pTpe->data.field.iFromPos; - iTo = pTpe->data.field.iToPos; - /* need to zero-base to and from (they are 1-based!) */ - if(iFrom > 0) - --iFrom; - if(iTo > 0) - --iTo; - if(bufLen == -1) - bufLen = ustrlen(pRes); - if(iFrom == 0 && iTo >= bufLen) { - /* in this case, the requested string is a superset of what we already have, - * so there is no need to do any processing. This is a frequent case for size-limited - * fields like TAG in the default forwarding template (so it is a useful optimization - * to check for this condition ;)). -- rgerhards, 2009-07-09 - */ - ; /*DO NOTHING*/ - } else { - iLen = iTo - iFrom + 1; /* the +1 is for an actual char, NOT \0! */ - pBufStart = pBuf = MALLOC((iLen + 1) * sizeof(char)); - if(pBuf == NULL) { - if(*pbMustBeFreed == 1) - free(pRes); - RET_OUT_OF_MEMORY; - } - pSb = pRes; - if(iFrom) { - /* skip to the start of the substring (can't do pointer arithmetic - * because the whole string might be smaller!!) - */ - while(*pSb && iFrom) { - --iFrom; - ++pSb; - } - } - /* OK, we are at the begin - now let's copy... */ - bufLen = iLen; - while(*pSb && iLen) { - *pBuf++ = *pSb; - ++pSb; - --iLen; - } - *pBuf = '\0'; - bufLen -= iLen; /* subtract remaining length if the string was smaller! */ - if(*pbMustBeFreed == 1) - free(pRes); - pRes = pBufStart; - *pbMustBeFreed = 1; - } #ifdef FEATURE_REGEXP } else { /* Check for regular expressions */ @@ -2765,6 +2713,60 @@ uchar *MsgGetProp(msg_t *pMsg, struct templateEntry *pTpe, #endif /* #ifdef FEATURE_REGEXP */ } + if(pTpe->data.field.iFromPos != 0 || pTpe->data.field.iToPos != 0) { + /* we need to obtain a private copy */ + int iFrom, iTo; + uchar *pSb; + iFrom = pTpe->data.field.iFromPos; + iTo = pTpe->data.field.iToPos; + /* need to zero-base to and from (they are 1-based!) */ + if(iFrom > 0) + --iFrom; + if(iTo > 0) + --iTo; + if(bufLen == -1) + bufLen = ustrlen(pRes); + if(iFrom == 0 && iTo >= bufLen) { + /* in this case, the requested string is a superset of what we already have, + * so there is no need to do any processing. This is a frequent case for size-limited + * fields like TAG in the default forwarding template (so it is a useful optimization + * to check for this condition ;)). -- rgerhards, 2009-07-09 + */ + ; /*DO NOTHING*/ + } else { + iLen = iTo - iFrom + 1; /* the +1 is for an actual char, NOT \0! */ + pBufStart = pBuf = MALLOC((iLen + 1) * sizeof(char)); + if(pBuf == NULL) { + if(*pbMustBeFreed == 1) + free(pRes); + RET_OUT_OF_MEMORY; + } + pSb = pRes; + if(iFrom) { + /* skip to the start of the substring (can't do pointer arithmetic + * because the whole string might be smaller!!) + */ + while(*pSb && iFrom) { + --iFrom; + ++pSb; + } + } + /* OK, we are at the begin - now let's copy... */ + bufLen = iLen; + while(*pSb && iLen) { + *pBuf++ = *pSb; + ++pSb; + --iLen; + } + *pBuf = '\0'; + bufLen -= iLen; /* subtract remaining length if the string was smaller! */ + if(*pbMustBeFreed == 1) + free(pRes); + pRes = pBufStart; + *pbMustBeFreed = 1; + } + } + /* now check if we need to do our "SP if first char is non-space" hack logic */ if(*pRes && pTpe->data.field.options.bSPIffNo1stSP) { /* here, we always destruct the buffer and return a new one */ diff --git a/template.c b/template.c index 2038c6c1..3f6ea3ea 100644 --- a/template.c +++ b/template.c @@ -715,6 +715,13 @@ static int do_Parameter(unsigned char **pp, struct template *pTpl) pTpe->data.field.field_expand = 1; p ++; } + if(*p == ',') { /* real fromPos? */ + ++p; + iNum = 0; + while(isdigit((int)*p)) + iNum = iNum * 10 + *p++ - '0'; + pTpe->data.field.iFromPos = iNum; + } } } } else { @@ -815,10 +822,24 @@ static int do_Parameter(unsigned char **pp, struct template *pTpl) /* fallthrough to "regular" ToPos code */ #endif /* #ifdef FEATURE_REGEXP */ - iNum = 0; - while(isdigit((int)*p)) - iNum = iNum * 10 + *p++ - '0'; - pTpe->data.field.iToPos = iNum; + if(pTpe->data.field.has_fields == 1) { + iNum = 0; + while(isdigit((int)*p)) + iNum = iNum * 10 + *p++ - '0'; + pTpe->data.field.iFieldNr = iNum; + if(*p == ',') { /* get real toPos? */ + ++p; + iNum = 0; + while(isdigit((int)*p)) + iNum = iNum * 10 + *p++ - '0'; + pTpe->data.field.iToPos = iNum; + } + } else { + iNum = 0; + while(isdigit((int)*p)) + iNum = iNum * 10 + *p++ - '0'; + pTpe->data.field.iToPos = iNum; + } /* skip to next known good */ while(*p && *p != '%' && *p != ':') { /* TODO: complain on extra characters */ @@ -830,7 +851,7 @@ static int do_Parameter(unsigned char **pp, struct template *pTpl) #endif /* #ifdef FEATURE_REGEXP */ } - if((pTpe->data.field.has_fields == 0) && (pTpe->data.field.iToPos < pTpe->data.field.iFromPos)) { + if(pTpe->data.field.iToPos < pTpe->data.field.iFromPos) { iNum = pTpe->data.field.iToPos; pTpe->data.field.iToPos = pTpe->data.field.iFromPos; pTpe->data.field.iFromPos = iNum; @@ -1252,9 +1273,9 @@ void tplPrintList(void) } if(pTpe->data.field.has_fields == 1) { dbgprintf("[substring, field #%d only (delemiter %d)] ", - pTpe->data.field.iToPos, pTpe->data.field.field_delim); - } else if(pTpe->data.field.iFromPos != 0 || - pTpe->data.field.iToPos != 0) { + pTpe->data.field.iFieldNr, pTpe->data.field.field_delim); + } + if(pTpe->data.field.iFromPos != 0 || pTpe->data.field.iToPos != 0) { dbgprintf("[substring, from character %d to %d] ", pTpe->data.field.iFromPos, pTpe->data.field.iToPos); diff --git a/template.h b/template.h index f7ac2e08..d839be7e 100644 --- a/template.h +++ b/template.h @@ -67,6 +67,7 @@ struct templateEntry { propid_t propid; /* property to be used */ unsigned iFromPos; /* for partial strings only chars from this position ... */ unsigned iToPos; /* up to that one... */ + unsigned iFieldNr; /* for field extraction: field to extract */ #ifdef FEATURE_REGEXP regex_t re; /* APR: this is the regular expression */ short has_regex; -- cgit