From a86e42028afeba1daca262b590bfd49d9c393b90 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 2 Apr 2009 16:16:57 +0200 Subject: improved performance of regexp-based filters Thanks to Arnaud Cornet for providing the idea and initial patch. --- runtime/stringbuf.c | 30 ++++++++++++++++++++++++++++++ runtime/stringbuf.h | 2 ++ 2 files changed, 32 insertions(+) (limited to 'runtime') diff --git a/runtime/stringbuf.c b/runtime/stringbuf.c index a5dc625a..c0a19ae4 100644 --- a/runtime/stringbuf.c +++ b/runtime/stringbuf.c @@ -724,6 +724,36 @@ finalize_it: RETiRet; } +/* same as above, only not braindead */ +int rsCStrSzStrMatchRegexCache(cstr_t *pCS1, uchar *psz, void **rc) +{ + int ret; + + BEGINfunc + + if(objUse(regexp, LM_REGEXP_FILENAME) == RS_RET_OK) { + regex_t **cache = rc; + if (*cache == NULL) { + *cache = calloc(sizeof(regex_t), 1); + regexp.regcomp(*cache, (char*) rsCStrGetSzStr(pCS1), 0); + } + ret = regexp.regexec(*cache, (char*) psz, 0, NULL, 0); + } else { + ret = 1; /* simulate "not found" */ + } + + ENDfunc + return ret; +} + +/* free a cached compiled regex */ +void rsRegexDestruct(void **rc) { + regex_t **cache = rc; + regexp.regfree(*cache); + free(*cache); + *cache = NULL; +} + /* compare a rsCStr object with a classical sz string. This function * is almost identical to rsCStrZsStrCmp(), but it also takes an offset diff --git a/runtime/stringbuf.h b/runtime/stringbuf.h index f3e08439..4b0fb065 100644 --- a/runtime/stringbuf.h +++ b/runtime/stringbuf.h @@ -137,6 +137,8 @@ int rsCStrStartsWithSzStr(cstr_t *pCS1, uchar *psz, size_t iLenSz); int rsCStrCaseInsensitveStartsWithSzStr(cstr_t *pCS1, uchar *psz, size_t iLenSz); int rsCStrSzStrStartsWithCStr(cstr_t *pCS1, uchar *psz, size_t iLenSz); rsRetVal rsCStrSzStrMatchRegex(cstr_t *pCS1, uchar *psz, int iType); +int rsCStrSzStrMatchRegexCache(cstr_t *pCS1, uchar *psz, void **cache); +void rsRegexDestruct(void **rc); rsRetVal rsCStrConvertToNumber(cstr_t *pStr, number_t *pNumber); rsRetVal rsCStrConvertToBool(cstr_t *pStr, number_t *pBool); rsRetVal rsCStrAppendCStr(cstr_t *pThis, cstr_t *pstrAppend); -- cgit From 1d16216aa326296673cc8520a8df351c4d492dfe Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 2 Apr 2009 16:51:53 +0200 Subject: streamlined regex patch - abided to code conventions - fixed a potential segfault when regex library can not be loaded --- runtime/stringbuf.c | 61 ++++++++++++++++++++++++++++++++--------------------- runtime/stringbuf.h | 4 ++-- 2 files changed, 39 insertions(+), 26 deletions(-) (limited to 'runtime') diff --git a/runtime/stringbuf.c b/runtime/stringbuf.c index c0a19ae4..4a7cc4bd 100644 --- a/runtime/stringbuf.c +++ b/runtime/stringbuf.c @@ -725,33 +725,46 @@ finalize_it: } /* same as above, only not braindead */ -int rsCStrSzStrMatchRegexCache(cstr_t *pCS1, uchar *psz, void **rc) +int rsCStrSzStrMatchRegexCache(cstr_t *pCS1, uchar *psz, void *rc) { - int ret; - - BEGINfunc - - if(objUse(regexp, LM_REGEXP_FILENAME) == RS_RET_OK) { - regex_t **cache = rc; - if (*cache == NULL) { - *cache = calloc(sizeof(regex_t), 1); - regexp.regcomp(*cache, (char*) rsCStrGetSzStr(pCS1), 0); - } - ret = regexp.regexec(*cache, (char*) psz, 0, NULL, 0); - } else { - ret = 1; /* simulate "not found" */ - } - - ENDfunc - return ret; + int ret; + regex_t **cache = (regex_t**) rc; + + BEGINfunc + + assert(cache != NULL); + + if(objUse(regexp, LM_REGEXP_FILENAME) == RS_RET_OK) { + if (*cache == NULL) { + *cache = calloc(sizeof(regex_t), 1); + regexp.regcomp(*cache, (char*) rsCStrGetSzStr(pCS1), 0); + } + ret = regexp.regexec(*cache, (char*) psz, 0, NULL, 0); + } else { + ret = 1; /* simulate "not found" */ + } + + ENDfunc + return ret; } -/* free a cached compiled regex */ -void rsRegexDestruct(void **rc) { - regex_t **cache = rc; - regexp.regfree(*cache); - free(*cache); - *cache = NULL; + +/* free a cached compiled regex + * Caller must provide a pointer to a buffer that was created by + * rsCStrSzStrMatchRegexCache() + */ +void rsCStrRegexDestruct(void *rc) +{ + regex_t **cache = rc; + + assert(cache != NULL); + assert(*cache != NULL); + + if(objUse(regexp, LM_REGEXP_FILENAME) == RS_RET_OK) { + regexp.regfree(*cache); + free(*cache); + *cache = NULL; + } } diff --git a/runtime/stringbuf.h b/runtime/stringbuf.h index 4b0fb065..311d7f41 100644 --- a/runtime/stringbuf.h +++ b/runtime/stringbuf.h @@ -137,8 +137,8 @@ int rsCStrStartsWithSzStr(cstr_t *pCS1, uchar *psz, size_t iLenSz); int rsCStrCaseInsensitveStartsWithSzStr(cstr_t *pCS1, uchar *psz, size_t iLenSz); int rsCStrSzStrStartsWithCStr(cstr_t *pCS1, uchar *psz, size_t iLenSz); rsRetVal rsCStrSzStrMatchRegex(cstr_t *pCS1, uchar *psz, int iType); -int rsCStrSzStrMatchRegexCache(cstr_t *pCS1, uchar *psz, void **cache); -void rsRegexDestruct(void **rc); +int rsCStrSzStrMatchRegexCache(cstr_t *pCS1, uchar *psz, void *cache); +void rsCStrRegexDestruct(void *rc); rsRetVal rsCStrConvertToNumber(cstr_t *pStr, number_t *pNumber); rsRetVal rsCStrConvertToBool(cstr_t *pStr, number_t *pBool); rsRetVal rsCStrAppendCStr(cstr_t *pThis, cstr_t *pstrAppend); -- cgit From 4ab540e3ba25a13fd079490ac52438e55dc92672 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 2 Apr 2009 17:54:48 +0200 Subject: fully integrated regex patch Now have removed the previous method, as really nobody should call it any longer (and now nobody does ;)). Also did some other cleanup. --- runtime/stringbuf.c | 41 +++++++++++++---------------------------- runtime/stringbuf.h | 3 +-- 2 files changed, 14 insertions(+), 30 deletions(-) (limited to 'runtime') diff --git a/runtime/stringbuf.c b/runtime/stringbuf.c index 4a7cc4bd..35ec44c6 100644 --- a/runtime/stringbuf.c +++ b/runtime/stringbuf.c @@ -703,49 +703,34 @@ int rsCStrCaseInsensitveStartsWithSzStr(cstr_t *pCS1, uchar *psz, size_t iLenSz) * never is a \0 *inside* a property string. * Note that the function returns -1 if regexp functionality is not available. * rgerhards: 2009-03-04: ERE support added, via parameter iType: 0 - BRE, 1 - ERE + * Arnaud Cornet/rgerhards: 2009-04-02: performance improvement by caching compiled regex + * If a caller does not need the cached version, it must still provide memory for it + * and must call rsCStrRegexDestruct() afterwards. */ -rsRetVal rsCStrSzStrMatchRegex(cstr_t *pCS1, uchar *psz, int iType) +rsRetVal rsCStrSzStrMatchRegex(cstr_t *pCS1, uchar *psz, int iType, void *rc) { - regex_t preq; + regex_t **cache = (regex_t**) rc; int ret; DEFiRet; - if(objUse(regexp, LM_REGEXP_FILENAME) == RS_RET_OK) { - regexp.regcomp(&preq, (char*) rsCStrGetSzStr(pCS1), (iType == 1 ? REG_EXTENDED : 0) | REG_NOSUB); - ret = regexp.regexec(&preq, (char*) psz, 0, NULL, 0); - regexp.regfree(&preq); - if(ret != 0) - ABORT_FINALIZE(RS_RET_NOT_FOUND); - } else { - ABORT_FINALIZE(RS_RET_NOT_FOUND); - } - -finalize_it: - RETiRet; -} - -/* same as above, only not braindead */ -int rsCStrSzStrMatchRegexCache(cstr_t *pCS1, uchar *psz, void *rc) -{ - int ret; - regex_t **cache = (regex_t**) rc; - - BEGINfunc - + assert(pCS1 != NULL); + assert(psz != NULL); assert(cache != NULL); if(objUse(regexp, LM_REGEXP_FILENAME) == RS_RET_OK) { if (*cache == NULL) { *cache = calloc(sizeof(regex_t), 1); - regexp.regcomp(*cache, (char*) rsCStrGetSzStr(pCS1), 0); + regexp.regcomp(*cache, (char*) rsCStrGetSzStr(pCS1), (iType == 1 ? REG_EXTENDED : 0) | REG_NOSUB); } ret = regexp.regexec(*cache, (char*) psz, 0, NULL, 0); + if(ret != 0) + ABORT_FINALIZE(RS_RET_NOT_FOUND); } else { - ret = 1; /* simulate "not found" */ + ABORT_FINALIZE(RS_RET_NOT_FOUND); } - ENDfunc - return ret; +finalize_it: + RETiRet; } diff --git a/runtime/stringbuf.h b/runtime/stringbuf.h index 311d7f41..684133bb 100644 --- a/runtime/stringbuf.h +++ b/runtime/stringbuf.h @@ -136,8 +136,7 @@ int rsCStrCaseInsensitiveLocateInSzStr(cstr_t *pThis, uchar *sz); int rsCStrStartsWithSzStr(cstr_t *pCS1, uchar *psz, size_t iLenSz); int rsCStrCaseInsensitveStartsWithSzStr(cstr_t *pCS1, uchar *psz, size_t iLenSz); int rsCStrSzStrStartsWithCStr(cstr_t *pCS1, uchar *psz, size_t iLenSz); -rsRetVal rsCStrSzStrMatchRegex(cstr_t *pCS1, uchar *psz, int iType); -int rsCStrSzStrMatchRegexCache(cstr_t *pCS1, uchar *psz, void *cache); +rsRetVal rsCStrSzStrMatchRegex(cstr_t *pCS1, uchar *psz, int iType, void *cache); void rsCStrRegexDestruct(void *rc); rsRetVal rsCStrConvertToNumber(cstr_t *pStr, number_t *pNumber); rsRetVal rsCStrConvertToBool(cstr_t *pStr, number_t *pBool); -- cgit