summaryrefslogtreecommitdiffstats
path: root/ldap
diff options
context:
space:
mode:
authorRich Megginson <rmeggins@redhat.com>2008-08-30 14:00:49 +0000
committerRich Megginson <rmeggins@redhat.com>2008-08-30 14:00:49 +0000
commit8039b885768370d16a3ddc8ef3c96ff81af9d227 (patch)
treef97abe24679583768b9c8495347ad1803fe6888c /ldap
parent921dcc35ed8f37b5c8d9c899db603af1adff8758 (diff)
downloadds-8039b885768370d16a3ddc8ef3c96ff81af9d227.tar.gz
ds-8039b885768370d16a3ddc8ef3c96ff81af9d227.tar.xz
ds-8039b885768370d16a3ddc8ef3c96ff81af9d227.zip
Bug Description: replica_generate_next_csn opcsn adjustment errors during concurrent MMR load Reviewed by: nhosoi (Thanks!) Fix Description: In csngen_adjust_time, even if the time diff <= remote_offset, we still need to keep track of it and use it so that we generate CSNs that have the same timestamp as the remote CSN. We use the local_offset to store that time diff. This sort of fits the semantics of local_offset as the diff is usually caused by the sampled time update thread running slightly behind, not in sync with the remote server. The code in _csngen_adjust_local_time will take the local_offset into consideration when updating the sampled time. One thing we have to be careful of is to _not_ reset the sequence number if the new generated time will be the same as the old generated time. If the old time is the same as the new time, we have to preserve the sequence number so that the next CSN generated will be greater than the previous one. The sequence number must be reset if the time was increased. The right thing to do is set the sequence number to the remote sequence number + 1, in the case where we have advanced the time. If we have not advanced the time, we can't make the sequence number less, because we would then issue CSNs less than or equal to CSNs already issued. In csngen_adjust_time, we have to take care to reset the seqnum properly - if the new timestamp is greater than the old timestamp, we set the sequence number to be the remote + 1, regardless of whether or not the remote seq is < or > than the current seq. With this patch, running the replication stress test, I do not see any time skew, I do not see any generate_next_csn error messages, I do not see any duplicate or retrograde CSN issuance. Platforms tested: RHEL5, Fedora 8, Fedora 9 Flag Day: no Doc impact: no
Diffstat (limited to 'ldap')
-rw-r--r--ldap/servers/slapd/csngen.c88
1 files changed, 87 insertions, 1 deletions
diff --git a/ldap/servers/slapd/csngen.c b/ldap/servers/slapd/csngen.c
index 137b995c..6cf7fd08 100644
--- a/ldap/servers/slapd/csngen.c
+++ b/ldap/servers/slapd/csngen.c
@@ -322,6 +322,15 @@ int csngen_adjust_time (CSNGen *gen, const CSN* csn)
PR_RWLock_Wlock (gen->lock);
+ if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
+ cur_time = CSN_CALC_TSTAMP(gen);
+ slapi_log_error (SLAPI_LOG_REPL, NULL, "csngen_adjust_time: "
+ "gen state before %08lx%04x:%ld:%ld:%ld\n",
+ cur_time, gen->state.seq_num,
+ gen->state.sampled_time,
+ gen->state.local_offset,
+ gen->state.remote_offset);
+ }
/* make sure we have the current time */
csngen_update_time();
cur_time = g_sampled_time;
@@ -341,6 +350,8 @@ int csngen_adjust_time (CSNGen *gen, const CSN* csn)
cur_time = CSN_CALC_TSTAMP(gen);
if (remote_time >= cur_time)
{
+ time_t new_time = 0;
+
if (remote_seqnum > gen->state.seq_num )
{
if (remote_seqnum < CSN_MAX_SEQNUM)
@@ -370,6 +381,31 @@ int csngen_adjust_time (CSNGen *gen, const CSN* csn)
return CSN_LIMIT_EXCEEDED;
}
}
+ else if (remote_offset > 0) { /* still need to account for this */
+ gen->state.local_offset += remote_offset;
+ }
+
+ new_time = CSN_CALC_TSTAMP(gen);
+ /* let's revisit the seq num - if the new time is > the old
+ tiem, we should reset the seq number to remote + 1 if
+ this won't cause a wrap around */
+ if (new_time > cur_time) {
+ /* just set seq_num regardless of whether the current one
+ is < or > than the remote one - the goal of this function
+ is to make sure we generate CSNs > the remote CSN - if
+ we have increased the time, we can decrease the seqnum
+ and still guarantee that any new CSNs generated will be
+ > any current CSNs we have generated */
+ gen->state.seq_num = remote_seqnum + 1;
+ }
+ if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
+ slapi_log_error (SLAPI_LOG_REPL, NULL, "csngen_adjust_time: "
+ "gen state after %08lx%04x:%ld:%ld:%ld\n",
+ new_time, gen->state.seq_num,
+ gen->state.sampled_time,
+ gen->state.local_offset,
+ gen->state.remote_offset);
+ }
}
else if (gen->state.remote_offset > 0)
{
@@ -613,18 +649,58 @@ _csngen_adjust_local_time (CSNGen *gen, time_t cur_time)
}
else if (time_diff > 0)
{
+ time_t ts_before = CSN_CALC_TSTAMP(gen);
+ time_t ts_after = 0;
+ if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
+ time_t new_time = CSN_CALC_TSTAMP(gen);
+ slapi_log_error (SLAPI_LOG_REPL, NULL, "_csngen_adjust_local_time: "
+ "gen state before %08lx%04x:%ld:%ld:%ld\n",
+ new_time, gen->state.seq_num,
+ gen->state.sampled_time,
+ gen->state.local_offset,
+ gen->state.remote_offset);
+ }
+
gen->state.sampled_time = cur_time;
if (time_diff > gen->state.local_offset)
gen->state.local_offset = 0;
else
gen->state.local_offset = gen->state.local_offset - time_diff;
- gen->state.seq_num = 0;
+ /* only reset the seq_num if the new timestamp part of the CSN
+ is going to be greater than the old one - if they are the
+ same after the above adjustment (which can happen if
+ csngen_adjust_time has to store the offset in the
+ local_offset field) we must not allow the CSN to regress or
+ generate duplicate numbers */
+ ts_after = CSN_CALC_TSTAMP(gen);
+ if (ts_after > ts_before) {
+ gen->state.seq_num = 0; /* only reset if new time > old time */
+ }
+ if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
+ time_t new_time = CSN_CALC_TSTAMP(gen);
+ slapi_log_error (SLAPI_LOG_REPL, NULL, "_csngen_adjust_local_time: "
+ "gen state after %08lx%04x:%ld:%ld:%ld\n",
+ new_time, gen->state.seq_num,
+ gen->state.sampled_time,
+ gen->state.local_offset,
+ gen->state.remote_offset);
+ }
return CSN_SUCCESS;
}
else /* time was turned back */
{
+ if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
+ time_t new_time = CSN_CALC_TSTAMP(gen);
+ slapi_log_error (SLAPI_LOG_REPL, NULL, "_csngen_adjust_local_time: "
+ "gen state back before %08lx%04x:%ld:%ld:%ld\n",
+ new_time, gen->state.seq_num,
+ gen->state.sampled_time,
+ gen->state.local_offset,
+ gen->state.remote_offset);
+ }
+
if (abs (time_diff) > CSN_MAX_TIME_ADJUST)
{
slapi_log_error (SLAPI_LOG_FATAL, NULL, "_csngen_adjust_local_time: "
@@ -637,6 +713,16 @@ _csngen_adjust_local_time (CSNGen *gen, time_t cur_time)
gen->state.local_offset = MAX_VAL (gen->state.local_offset, abs (time_diff));
gen->state.seq_num = 0;
+ if (slapi_is_loglevel_set(SLAPI_LOG_REPL)) {
+ time_t new_time = CSN_CALC_TSTAMP(gen);
+ slapi_log_error (SLAPI_LOG_REPL, NULL, "_csngen_adjust_local_time: "
+ "gen state back after %08lx%04x:%ld:%ld:%ld\n",
+ new_time, gen->state.seq_num,
+ gen->state.sampled_time,
+ gen->state.local_offset,
+ gen->state.remote_offset);
+ }
+
return CSN_SUCCESS;
}
}