From 85d218f6caded4b830774478827755baec4df80e Mon Sep 17 00:00:00 2001 From: Mark Reynolds Date: May 21 2020 20:17:40 +0000 Subject: Issue 51095 - Check the return code from the time functions Bug Description: On some cloud platforms we see issues with replication time skew. It's not clear if the time functions are failing or just returning the wrong value. We do not check the result code from any of the time functions. Fix Description: Add a wrapper function for clock_gettime() that will check for errors, log a message, and retry a few times. We retry because getting the time is so critical to things like replication and logging. Also added a check for large jump in CSN generation times, and we log a message if detected. relates: https://pagure.io/389-ds-base/issue/51095 Reviewed by: tbordaz, lkrispen, mhonek, and firstyear(Thanks!) --- diff --git a/.gitignore b/.gitignore index 7a92af0..d4edd33 100644 --- a/.gitignore +++ b/.gitignore @@ -234,3 +234,4 @@ src/cockpit/389-console/node_modules/ ldap/servers/slapd/rust-slapi-private.h vendor vendor.tar.gz +/build/ diff --git a/ldap/servers/slapd/csngen.c b/ldap/servers/slapd/csngen.c index 3e26351..e1fa6d4 100644 --- a/ldap/servers/slapd/csngen.c +++ b/ldap/servers/slapd/csngen.c @@ -185,6 +185,12 @@ csngen_new_csn(CSNGen *gen, CSN **csn, PRBool notify) /* check if the time should be adjusted */ delta = cur_time - gen->state.sampled_time; + if (delta > _SEC_PER_DAY || delta < (-1 * _SEC_PER_DAY)) { + /* We had a jump larger than a day */ + slapi_log_err(SLAPI_LOG_REPL, "csngen_new_csn", + "Detected large jump in CSN time. Delta: %d (current time: %ld vs previous time: %ld)\n", + delta, cur_time, gen->state.sampled_time); + } if (delta > 0) { rc = _csngen_adjust_local_time(gen, cur_time); if (rc != CSN_SUCCESS) { diff --git a/ldap/servers/slapd/time.c b/ldap/servers/slapd/time.c index 8048a33..142aa2f 100644 --- a/ldap/servers/slapd/time.c +++ b/ldap/servers/slapd/time.c @@ -61,6 +61,42 @@ poll_current_time() return 0; } +/* + * On some cloud platforms the time functions don't always work as expected. + * We should be checking for errors. Since getting the time is so important + * to the server, if we do get an error we should retry a few times before + * giving up. + */ +static int32_t +slapi_clock_gettime(int64_t clock_id, struct timespec *tp) +{ + int32_t count = 0; + + while (count < 5) { + if (clock_gettime(clock_id, tp) == 0) { + /* Success, we got the time */ + if (count > 0) { + /* This succeeded on a retry, log a follow up message */ + slapi_log_err(SLAPI_LOG_NOTICE, "slapi_clock_gettime", + "Successfully retrieved system time\n"); + } + return 0; + } else { + /* Failed to get the system time */ + slapi_log_err(SLAPI_LOG_ERR, "slapi_clock_gettime", + "Getting system time failed! Errno (%d) (%s) Trying again ...\n", + errno, slapd_system_strerror(errno)); + DS_Sleep(PR_MillisecondsToInterval(100)); + count++; + } + } + slapi_log_err(SLAPI_LOG_CRIT, "slapi_clock_gettime", + "Getting system time failed! Errno (%d) (%s) Giving up.\n", + errno, slapd_system_strerror(errno)); + return -1; + +} + time_t current_time(void) { @@ -69,8 +105,8 @@ current_time(void) * but this should be removed in favour of the * more accurately named slapi_current_utc_time */ - struct timespec now; - clock_gettime(CLOCK_REALTIME, &now); + struct timespec now = {0}; + slapi_clock_gettime(CLOCK_REALTIME, &now); return now.tv_sec; } @@ -83,24 +119,24 @@ slapi_current_time(void) struct timespec slapi_current_rel_time_hr(void) { - struct timespec now; - clock_gettime(CLOCK_MONOTONIC, &now); + struct timespec now = {0}; + slapi_clock_gettime(CLOCK_MONOTONIC, &now); return now; } struct timespec slapi_current_utc_time_hr(void) { - struct timespec ltnow; - clock_gettime(CLOCK_REALTIME, <now); + struct timespec ltnow = {0}; + slapi_clock_gettime(CLOCK_REALTIME, <now); return ltnow; } time_t slapi_current_utc_time(void) { - struct timespec ltnow; - clock_gettime(CLOCK_REALTIME, <now); + struct timespec ltnow = {0}; + slapi_clock_gettime(CLOCK_REALTIME, <now); return ltnow.tv_sec; } @@ -108,9 +144,9 @@ void slapi_timestamp_utc_hr(char *buf, size_t bufsize) { PR_ASSERT(bufsize >= SLAPI_TIMESTAMP_BUFSIZE); - struct timespec ltnow; + struct timespec ltnow = {0}; struct tm utctm; - clock_gettime(CLOCK_REALTIME, <now); + slapi_clock_gettime(CLOCK_REALTIME, <now); gmtime_r(&(ltnow.tv_sec), &utctm); strftime(buf, bufsize, "%Y%m%d%H%M%SZ", &utctm); } @@ -252,7 +288,7 @@ slapi_timespec_expire_at(time_t timeout, struct timespec *expire) expire->tv_sec = 0; expire->tv_nsec = 0; } else { - clock_gettime(CLOCK_MONOTONIC, expire); + slapi_clock_gettime(CLOCK_MONOTONIC, expire); expire->tv_sec += timeout; } } @@ -278,8 +314,8 @@ slapi_timespec_expire_check(struct timespec *expire) if (expire->tv_sec == 0 && expire->tv_nsec == 0) { return TIMER_CONTINUE; } - struct timespec now; - clock_gettime(CLOCK_MONOTONIC, &now); + struct timespec now = {0}; + slapi_clock_gettime(CLOCK_MONOTONIC, &now); if (now.tv_sec > expire->tv_sec || (expire->tv_sec == now.tv_sec && now.tv_sec > expire->tv_nsec)) { return TIMER_EXPIRED;