From 84084ea2a178f782184d75db10f252efdc62fb5f Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Mon, 19 Apr 2010 18:39:55 +0200 Subject: improved quality of imsolaris code including refctoring for a more simple solution --- plugins/imsolaris/imsolaris.c | 146 ++++++++++++++++++++++++++++++++++++------ plugins/imsolaris/imsolaris.h | 1 + plugins/imsolaris/sun_cddl.c | 143 ++++++++++------------------------------- plugins/imsolaris/sun_cddl.h | 4 +- 4 files changed, 163 insertions(+), 131 deletions(-) (limited to 'plugins') diff --git a/plugins/imsolaris/imsolaris.c b/plugins/imsolaris/imsolaris.c index 67aa479d..c8076d93 100644 --- a/plugins/imsolaris/imsolaris.c +++ b/plugins/imsolaris/imsolaris.c @@ -66,6 +66,7 @@ #include "rsyslog.h" #include #include +#include #include #include #include @@ -96,9 +97,6 @@ DEFobjCurrIf(glbl) DEFobjCurrIf(prop) -static int logfd = -1; /* file descriptor to access the system log */ - - /* config settings */ static prop_t *pInputName = NULL; /* our inputName currently is always "imuxsock", and this will hold it */ static char *LogName = NULL; /* the log socket name TODO: make configurable! */ @@ -128,8 +126,6 @@ solaris_readLog(int fd) uchar *pRcv = NULL; /* receive buffer */ char errStr[1024]; - assert(logfd >= 0); - iMaxLine = glbl.GetMaxLine(); /* we optimize performance: if iMaxLine is below 4K (which it is in almost all @@ -152,10 +148,10 @@ solaris_readLog(int fd) ret = getmsg(fd, &ctl, &data, &flags); if(ret < 0) { rs_strerror_r(errno, errStr, sizeof(errStr)); - dbgprintf("imsolaris: getmsg() error on fd %d: %s.\n", fd, errStr); + DBGPRINTF("imsolaris: getmsg() error on fd %d: %s.\n", fd, errStr); } - dbgprintf("imsolaris: getmsg() returns %d\n", ret); - dbgprintf("imsolaris: message from log socket: #%d: %s\n", fd, pRcv); + DBGPRINTF("imsolaris: getmsg() returns %d\n", ret); + DBGPRINTF("imsolaris: message from log socket: #%d: %s\n", fd, pRcv); if (1) {//iRcvd > 0) { CHKiRet(msgConstruct(&pMsg)); //MsgSetFlowControlType(pMsg, eFLOWCTL_FULL_DELAY); @@ -170,7 +166,7 @@ solaris_readLog(int fd) } else if (iRcvd < 0 && errno != EINTR) { int en = errno; rs_strerror_r(en, errStr, sizeof(errStr)); - dbgprintf("imsolaris: stream error: %d = %s.\n", errno, errStr); + DBGPRINTF("imsolaris: stream error: %d = %s.\n", errno, errStr); errmsg.LogError(en, NO_ERRCODE, "imsolaris: stream input error: %s", errStr); } @@ -182,6 +178,117 @@ finalize_it: } +/* we try to recover a failed file by closing and re-opening + * it. We loop until the re-open works, but wait between each + * failure. If the open succeeds, we assume all is well. If it is + * not, we will run into the retry process with the next + * iteration. + * rgerhards, 2010-04-19 + */ +static inline void +tryRecover(void) +{ + int tryNum = 0; + int waitsecs; + int waitusecs; + rsRetVal iRet; + + close(sun_Pfd.fd); + sun_Pfd.fd = -1; + + while(1) { /* loop broken inside */ + iRet = sun_openklog((LogName == NULL) ? PATH_LOG : LogName); + if(iRet == RS_RET_OK) { + if(tryNum > 0) { + errmsg.LogError(0, iRet, "failure on system log socket recovered."); + } + break; + } + /* failure, so sleep a bit. We wait try*10 ms, with a max of 15 seconds */ + if(tryNum == 0) { + errmsg.LogError(0, iRet, "failure on system log socket, trying to recover..."); + waitusecs = tryNum * 10000; + waitsecs = waitusecs / 1000000; + if(waitsecs != 15) { + waitsecs = 15; + waitusecs = 0; + } else { + waitusecs = waitusecs % 1000000; + } + srSleep(waitsecs, waitusecs); + ++tryNum; + } + } +} + + +/* a function to replace the sun logerror() function. + * It generates an error message from the supplied string. The main + * reason for not calling logError directly is that sun_cddl.c does not + * know or has acces to rsyslog objects (namely errmsg) -- and we do not + * want to do this effort. -- rgerhards, 2010-04-19 + */ +void +imsolaris_logerror(int err, char *errStr) +{ + errmsg.LogError(err, RS_RET_ERR_DOOR, "%s", errStr); +} + + +/* once the system is fully initialized, we wait for new messages. + * We may think about replacing this with a read-loop, thus saving + * us the overhead of the poll. + * The timeout variable is the timeout to use for poll. During startup, + * it should be set to 0 (non-blocking) and later to -1 (infinit, blocking). + * This mimics the (strange) behaviour of the original syslogd. + * rgerhards, 2010-04-19 + */ +static inline rsRetVal +getMsgs(int timeout) +{ + DEFiRet; + int nfds; + char errStr[1024]; + + do { + DBGPRINTF("imsolaris: waiting for next message (timeout %d)...\n", timeout); + nfds = poll(&sun_Pfd, 1, timeout); /* wait without timeout */ + + /* v5-TODO: here we must check if we should terminante! */ + + if(nfds == 0) { + if(timeout == 0) { + DBGPRINTF("imsolaris: no more messages, getMsgs() terminates\n"); + FINALIZE; + } else { + continue; + } + } + + if(nfds < 0) { + if(errno != EINTR) { + int en = errno; + rs_strerror_r(en, errStr, sizeof(errStr)); + DBGPRINTF("imsolaris: poll error: %d = %s.\n", errno, errStr); + errmsg.LogError(en, NO_ERRCODE, "imsolaris: poll error: %s", errStr); + } + continue; + } + if(sun_Pfd.revents & POLLIN) { + solaris_readLog(sun_Pfd.fd); + } else if(sun_Pfd.revents & (POLLNVAL|POLLHUP|POLLERR)) { + tryRecover(); + } + + } while(1); + + /* Note: in v4, this code is never reached (our thread will be cancelled) */ + +finalize_it: + RETiRet; +} + + /* This function is called to gather input. */ BEGINrunInput CODESTARTrunInput @@ -190,19 +297,18 @@ CODESTARTrunInput * right into the sleep below. */ - dbgprintf("imsolaris: prepare_sys_poll()\n"); - prepare_sys_poll(); + DBGPRINTF("imsolaris: doing startup poll before openeing door()\n"); + CHKiRet(getMsgs(0)); /* note: sun's syslogd code claims that the door should only - * be opened when the log socket has been polled. So file header + * be opened when the log stream has been polled. So file header * comment of this file for more details. */ sun_open_door(); - dbgprintf("imsolaris: starting regular poll loop\n"); - while(1) { - sun_sys_poll(); - } + DBGPRINTF("imsolaris: starting regular poll loop\n"); + iRet = getMsgs(-1); /* this is the primary poll loop, infinite timeout */ +finalize_it: RETiRet; ENDrunInput @@ -214,8 +320,7 @@ CODESTARTwillRun CHKiRet(prop.SetString(pInputName, UCHAR_CONSTANT("imsolaris"), sizeof("imsolaris") - 1)); CHKiRet(prop.ConstructFinalize(pInputName)); - iRet = sun_openklog((LogName == NULL) ? PATH_LOG : LogName, &logfd); - dbgprintf("imsolaris opened system log socket as fd %d\n", logfd); + iRet = sun_openklog((LogName == NULL) ? PATH_LOG : LogName); if(iRet != RS_RET_OK) { errmsg.LogError(0, iRet, "error opening system log socket"); } @@ -245,7 +350,8 @@ CODESTARTqueryEtryPt CODEqueryEtryPt_STD_IMOD_QUERIES ENDqueryEtryPt -static rsRetVal resetConfigVariables(uchar __attribute__((unused)) *pp, void __attribute__((unused)) *pVal) +static rsRetVal resetConfigVariables(uchar __attribute__((unused)) *pp, + void __attribute__((unused)) *pVal) { return RS_RET_OK; } @@ -259,7 +365,7 @@ CODEmodInit_QueryRegCFSLineHdlr CHKiRet(objUse(glbl, CORE_COMPONENT)); CHKiRet(objUse(prop, CORE_COMPONENT)); - dbgprintf("imsolaris version %s initializing\n", PACKAGE_VERSION); + DBGPRINTF("imsolaris version %s initializing\n", PACKAGE_VERSION); /* register config file handlers */ CHKiRet(omsdRegCFSLineHdlr((uchar *)"resetconfigvariables", 1, eCmdHdlrCustomHandler, diff --git a/plugins/imsolaris/imsolaris.h b/plugins/imsolaris/imsolaris.h index 9637220b..e73380fa 100644 --- a/plugins/imsolaris/imsolaris.h +++ b/plugins/imsolaris/imsolaris.h @@ -1 +1,2 @@ rsRetVal solaris_readLog(int fd); +void imsolaris_logerror(int err, char *errStr); diff --git a/plugins/imsolaris/sun_cddl.c b/plugins/imsolaris/sun_cddl.c index 69636df0..6d49c8bc 100644 --- a/plugins/imsolaris/sun_cddl.c +++ b/plugins/imsolaris/sun_cddl.c @@ -71,7 +71,14 @@ /* Buffer to allocate for error messages: */ #define ERRMSG_LEN 1024 -static struct pollfd Pfd; /* Pollfd for local the log device */ +/* Max number of door server threads for syslogd. Since door is used + * to check the health of syslogd, we don't need large number of + * server threads. + */ +#define MAX_DOOR_SERVER_THR 3 + + +struct pollfd sun_Pfd; /* Pollfd for local log device */ static int DoorFd = -1; static int DoorCreated = 0; @@ -82,14 +89,16 @@ static pthread_mutex_t door_server_cnt_lock = PTHREAD_MUTEX_INITIALIZER; static uint_t door_server_cnt = 0; static pthread_attr_t door_thr_attr; -/* - * the 'server' function that we export via the door. It does +/* the 'server' function that we export via the door. It does * nothing but return. */ /*ARGSUSED*/ static void -server(void __attribute__((unused)) *cookie, char __attribute__((unused)) *argp, size_t __attribute__((unused)) arg_size, - door_desc_t __attribute__((unused)) *dp, __attribute__((unused)) uint_t n) +server( void __attribute__((unused)) *cookie, + char __attribute__((unused)) *argp, + size_t __attribute__((unused)) arg_size, + door_desc_t __attribute__((unused)) *dp, + __attribute__((unused)) uint_t n ) { (void) door_return(NULL, 0, NULL, 0); /* NOTREACHED */ @@ -102,8 +111,7 @@ create_door_thr(void __attribute__((unused)) *arg) (void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); (void) door_return(NULL, 0, NULL, 0); - /* - * If there is an error in door_return(), it will return here and + /* If there is an error in door_return(), it will return here and * the thread will exit. Hence we need to decrement door_server_cnt. */ (void) pthread_mutex_lock(&door_server_cnt_lock); @@ -112,13 +120,6 @@ create_door_thr(void __attribute__((unused)) *arg) return (NULL); } -/* - * Max number of door server threads for syslogd. Since door is used - * to check the health of syslogd, we don't need large number of - * server threads. - */ -#define MAX_DOOR_SERVER_THR 3 - /* * Manage door server thread pool. */ @@ -149,8 +150,7 @@ sun_delete_doorfiles(void) err = errno; (void) snprintf(line, sizeof (line), "unlink() of %s failed - fatal", DoorFileName); - errno = err; - DBGPRINTF("%s", line);//logerror(line); + imsolaris_logerror(err, line); DBGPRINTF("delete_doorfiles: error: %s, " "errno=%d\n", line, err); exit(1); @@ -171,7 +171,7 @@ sun_delete_doorfiles(void) errno = err; (void) strlcat(line, " - fatal", sizeof (line)); - //logerror(line); + imsolaris_logerror(err, line); DBGPRINTF("delete_doorfiles: " "error: %s, errno=%d\n", line, err); @@ -233,8 +233,7 @@ sun_open_door(void) info.di_target, getpid()); DBGPRINTF("open_door: error: " "%s\n", line); - errno = 0; - //logerror(line); + imsolaris_logerror(0, line); exit(1); } } @@ -256,8 +255,7 @@ sun_open_door(void) DBGPRINTF("open_door: error: %s, " "errno=%d\n", line, err); - errno = err; - //logerror(line); + imsolaris_logerror(err, line); sun_delete_doorfiles(); exit(1); } @@ -284,8 +282,7 @@ sun_open_door(void) OLD_DOORFILE); DBGPRINTF("open_door: error: " "%s\n", line); - errno = 0; - //logerror(line); + imsolaris_logerror(0, line); sun_delete_doorfiles(); exit(1); } @@ -306,8 +303,7 @@ sun_open_door(void) "errno=%d\n", line, err); (void) strcat(line, " - fatal"); - errno = err; - //logerror(line); + imsolaris_logerror(err, line); sun_delete_doorfiles(); exit(1); } @@ -333,9 +329,8 @@ sun_open_door(void) DBGPRINTF("open_door: error: %s, " "errno=%d\n", line, err); - errno = err; (void) strcat(line, " - fatal"); - //logerror(line); + imsolaris_logerror(err, line); sun_delete_doorfiles(); exit(1); } @@ -356,8 +351,7 @@ sun_open_door(void) (void) sprintf(line, "door_create() failed - fatal"); DBGPRINTF("open_door: error: %s, errno=%d\n", line, err); - errno = err; - //logerror(line); + imsolaris_logerror(err, line); sun_delete_doorfiles(); exit(1); } @@ -378,8 +372,7 @@ sun_open_door(void) " %d to %s failed - fatal", DoorFd, DoorFileName); DBGPRINTF("open_door: error: %s, errno=%d\n", line, err); - errno = err; - //logerror(line); + imsolaris_logerror(err, line); sun_delete_doorfiles(); exit(1); } @@ -394,15 +387,16 @@ sun_open_door(void) * and return a file descriptor. */ rsRetVal -sun_openklog(char *name, int *fd) +sun_openklog(char *name) { DEFiRet; + int fd; struct strioctl str; char errBuf[1024]; - if((*fd = open(name, O_RDONLY)) < 0) { + if((fd = open(name, O_RDONLY)) < 0) { rs_strerror_r(errno, errBuf, sizeof(errBuf)); - dbgprintf("imsolaris:openklog: cannot open %s: %s\n", + DBGPRINTF("imsolaris:openklog: cannot open %s: %s\n", name, errBuf); ABORT_FINALIZE(RS_RET_ERR_OPEN_KLOG); } @@ -410,87 +404,16 @@ sun_openklog(char *name, int *fd) str.ic_timout = 0; str.ic_len = 0; str.ic_dp = NULL; - if (ioctl(*fd, I_STR, &str) < 0) { + if (ioctl(fd, I_STR, &str) < 0) { rs_strerror_r(errno, errBuf, sizeof(errBuf)); - dbgprintf("imsolaris:openklog: cannot register to log " + DBGPRINTF("imsolaris:openklog: cannot register to log " "console messages: %s\n", errBuf); ABORT_FINALIZE(RS_RET_ERR_AQ_CONLOG); } - Pfd.fd = *fd; - Pfd.events = POLLIN; - dbgprintf("imsolaris/openklog: opened '%s' as fd %d.\n", name, *fd); + sun_Pfd.fd = fd; + sun_Pfd.events = POLLIN; + DBGPRINTF("imsolaris/openklog: opened '%s' as fd %d.\n", name, fd); finalize_it: RETiRet; } - - -/* this thread listens to the local stream log driver for log messages - * generated by this host, formats them, and queues them to the logger - * thread. - */ -/*ARGSUSED*/ -void -sun_sys_poll() -{ - int nfds; - - dbgprintf("imsolaris:sys_poll: sys_thread started\n"); - - for (;;) { - errno = 0; - - dbgprintf("imsolaris:sys_poll waiting for next message...\n"); - nfds = poll(&Pfd, 1, INFTIM); - - if (nfds == 0) - continue; - - if (nfds < 0) { - if (errno != EINTR) - dbgprintf("imsolaris:poll error"); - continue; - } - if (Pfd.revents & POLLIN) { - solaris_readLog(Pfd.fd); - } else { - /* TODO: shutdown, the rsyslog way (in v5!) -- check shutdown flag */ - if (Pfd.revents & (POLLNVAL|POLLHUP|POLLERR)) { - // TODO: trigger retry logic -/* logerror("kernel log driver poll error"); - (void) close(Pfd.fd); - Pfd.fd = -1; - */ - } - } - - } - /*NOTREACHED*/ -} - - -/* Open the log device, and pull up all pending messages. - */ -void -prepare_sys_poll() -{ - int nfds; - - Pfd.events = POLLIN; - - for (;;) { - dbgprintf("imsolaris:prepare_sys_poll waiting for next message...\n"); - nfds = poll(&Pfd, 1, 0); - if (nfds <= 0) { - break; - } - - if (Pfd.revents & POLLIN) { - solaris_readLog(Pfd.fd); - } else if (Pfd.revents & (POLLNVAL|POLLHUP|POLLERR)) { - //logerror("kernel log driver poll error"); - break; - } - } - -} diff --git a/plugins/imsolaris/sun_cddl.h b/plugins/imsolaris/sun_cddl.h index 0139b3d8..42e4b799 100644 --- a/plugins/imsolaris/sun_cddl.h +++ b/plugins/imsolaris/sun_cddl.h @@ -1,5 +1,7 @@ -rsRetVal sun_openklog(char *name, int *); +rsRetVal sun_openklog(char *name); void prepare_sys_poll(void); void sun_sys_poll(void); void sun_open_door(void); void sun_delete_doorfiles(void); + +extern struct pollfd sun_Pfd; /* Pollfd for local log device */ -- cgit