summaryrefslogtreecommitdiffstats
path: root/ldap/servers/plugins/replication
diff options
context:
space:
mode:
authorRich Megginson <rmeggins@redhat.com>2007-11-19 17:23:50 +0000
committerRich Megginson <rmeggins@redhat.com>2007-11-19 17:23:50 +0000
commitd2e30358c476ccc738fa29cac6d4170ad12e975c (patch)
treee9611d56c1de37999cf1fb3025b6b69e379d019a /ldap/servers/plugins/replication
parent7c58411ef851a5c8a10dde57f4d2eeee076f15c1 (diff)
Resolves: bug 388021
Bug Description: MMR breaks from master that has been reinited Reviewed by: nkinder (Thanks!) Fix Description: This problem occurs when you have two or more masters, and you have updates that have originated at a master that have been sent to other masters (so that the other masters have a valid min/max csn for that replica in the ruv). If that master needs to be reinitialized for some reason (crash, etc.) the reinit will erase the changelog. The RUV for that master will now contain CSNs that are not in the changelog. If that master attempts to update another master, it will first look at the RUV from the consumer, which will contain the old CSNs, and it will look for those CSNs in the changelog, fail, and abort the update process, meaning this master can no longer send updates to other servers. The solution is for the master to just use the min CSN in its own RUV as the new starting point, if it has not been purged. In the case of purging, if the CSN is not found, this means the consumer is too far behind and must be reinitialized. Platforms tested: RHEL5 x86_64 Flag Day: no Doc impact: no
Diffstat (limited to 'ldap/servers/plugins/replication')
-rw-r--r--ldap/servers/plugins/replication/cl5_api.c36
1 files changed, 34 insertions, 2 deletions
diff --git a/ldap/servers/plugins/replication/cl5_api.c b/ldap/servers/plugins/replication/cl5_api.c
index b8ab11d3..034174ab 100644
--- a/ldap/servers/plugins/replication/cl5_api.c
+++ b/ldap/servers/plugins/replication/cl5_api.c
@@ -5707,18 +5707,47 @@ static int _cl5PositionCursorForReplay (ReplicaId consumerRID, const RUV *consum
to any consumers; that is, we can assume that no changes were lost due to
either changelog purging or database reload - bug# 603061 - richm@netscape.com
*/
- if (rc == 0 || (rc == DB_NOTFOUND && !ruv_has_csns(file->purgeRUV)))
+ if ((rc == DB_NOTFOUND) && !ruv_has_csns(file->purgeRUV))
{
+ /* use the supplier min csn for the buffer start csn - we know
+ this csn is in our changelog */
+ slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name_cl,
+ "%s: CSN %s not found and no purging, probably a reinit\n",
+ agmt_name, csnStr);
+ if ((RUV_SUCCESS == ruv_get_min_csn(supplierRuv, &startCSN)) &&
+ startCSN)
+ { /* must now free startCSN */
+ csn_as_string(startCSN, PR_FALSE, csnStr);
+ slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name_cl,
+ "%s: Will try to use supplier min CSN %s to load changelog\n",
+ agmt_name, csnStr);
+ rc = clcache_load_buffer (clcache, startCSN, DB_SET);
+ }
+ else
+ {
+ slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name_cl,
+ "%s: Could not get the min csn from the supplier RUV\n",
+ agmt_name);
+ }
+ }
+
+ if (rc == 0) {
haveChanges = PR_TRUE;
rc = CL5_SUCCESS;
- slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name_cl,
+ slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name_cl,
"%s: CSN %s found, position set for replay\n", agmt_name, csnStr);
+ if (startCSN != csns[i]) {
+ csn_free(&startCSN);
+ }
break;
}
else if (rc == DB_NOTFOUND) /* entry not found */
{
/* check whether this csn should be present */
rc = _cl5CheckMissingCSN (startCSN, supplierRuv, file);
+ if (startCSN != csns[i]) {
+ csn_free(&startCSN);
+ }
if (rc == CL5_MISSING_DATA) /* we should have had the change but we don't */
{
slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name_cl,
@@ -5735,6 +5764,9 @@ static int _cl5PositionCursorForReplay (ReplicaId consumerRID, const RUV *consum
}
else
{
+ if (startCSN != csns[i]) {
+ csn_free(&startCSN);
+ }
/* db error */
slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name_cl,