summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNoriko Hosoi <nhosoi@redhat.com>2010-03-03 10:37:18 -0800
committerNoriko Hosoi <nhosoi@redhat.com>2010-03-03 14:22:39 -0800
commitbe57c970629e65df13921d4628dddc30457110cc (patch)
treef0fa4e6d5862dcd8d4416809c6efdbf9764eb3e1
parente8f50642bd3e19ad528b453850304611ab86506d (diff)
downloadds-be57c970629e65df13921d4628dddc30457110cc.tar.gz
ds-be57c970629e65df13921d4628dddc30457110cc.tar.xz
ds-be57c970629e65df13921d4628dddc30457110cc.zip
539618 - Replication bulk import reports Invalid read/write
https://bugzilla.redhat.com/show_bug.cgi?id=539618 Back off this commit: commit 4205086e4f237a52eb9113cd95f9cf87b39e9ed4 Date: Mon Feb 22 08:49:49 2010 -0800 since this change could cause the deadlock between the thread eventually calling prot_free, which acquired the agreement lock, and other threads waiting for the agreement lock, which prevents the protocol stop. Instead of waiting for prot_thread_main done in prot_free, let prot_thread_main check the existence of the protocol field in the agreement. If it's not available, prot_thread_main quits.
-rw-r--r--ldap/servers/plugins/replication/repl5.h2
-rw-r--r--ldap/servers/plugins/replication/repl5_agmt.c14
-rw-r--r--ldap/servers/plugins/replication/repl5_protocol.c49
3 files changed, 27 insertions, 38 deletions
diff --git a/ldap/servers/plugins/replication/repl5.h b/ldap/servers/plugins/replication/repl5.h
index 332bfb8b..97ce5569 100644
--- a/ldap/servers/plugins/replication/repl5.h
+++ b/ldap/servers/plugins/replication/repl5.h
@@ -410,7 +410,7 @@ void prot_initialize_replica(Repl_Protocol *rp);
/* stop protocol session in progress */
void prot_stop(Repl_Protocol *rp);
void prot_delete(Repl_Protocol **rpp);
-void prot_free(Repl_Protocol **rpp, int wait_for_done);
+void prot_free(Repl_Protocol **rpp);
PRBool prot_set_active_protocol (Repl_Protocol *rp, PRBool total);
void prot_clear_active_protocol (Repl_Protocol *rp);
Repl_Connection *prot_get_connection(Repl_Protocol *rp);
diff --git a/ldap/servers/plugins/replication/repl5_agmt.c b/ldap/servers/plugins/replication/repl5_agmt.c
index 2571d33c..13db1acd 100644
--- a/ldap/servers/plugins/replication/repl5_agmt.c
+++ b/ldap/servers/plugins/replication/repl5_agmt.c
@@ -558,7 +558,7 @@ agmt_start(Repl_Agmt *ra)
if (ra->protocol != NULL) {
slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name, "replication already started for agreement \"%s\"\n", agmt_get_long_name(ra));
PR_Unlock(ra->lock);
- prot_free(&prot, 0);
+ prot_free(&prot);
return 0;
}
@@ -606,7 +606,7 @@ windows_agmt_start(Repl_Agmt *ra)
if (ra->protocol != NULL) {
slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name, "replication already started for agreement \"%s\"\n", agmt_get_long_name(ra));
PR_Unlock(ra->lock);
- prot_free(&prot, 0);
+ prot_free(&prot);
return 0;
}
@@ -645,7 +645,7 @@ agmt_stop(Repl_Agmt *ra)
PR_Lock(ra->lock);
ra->stop_in_progress = PR_FALSE;
/* we do not reuse the protocol object so free it */
- prot_free(&ra->protocol, 1);
+ prot_free(&ra->protocol);
PR_Unlock(ra->lock);
return return_value;
}
@@ -2261,3 +2261,11 @@ ReplicaId agmt_get_consumerRID(Repl_Agmt *ra)
return ra->consumerRID;
}
+int
+agmt_has_protocol(Repl_Agmt *agmt)
+{
+ if (agmt) {
+ return NULL != agmt->protocol;
+ }
+ return 0;
+}
diff --git a/ldap/servers/plugins/replication/repl5_protocol.c b/ldap/servers/plugins/replication/repl5_protocol.c
index e909ed45..927c450a 100644
--- a/ldap/servers/plugins/replication/repl5_protocol.c
+++ b/ldap/servers/plugins/replication/repl5_protocol.c
@@ -77,7 +77,6 @@ typedef struct repl_protocol
/* States */
#define STATE_FINISHED 503
-#define STATE_DONE 504
#define STATE_BAD_STATE_SHOULD_NEVER_HAPPEN 599
/* Forward declarations */
@@ -174,10 +173,8 @@ prot_get_agreement(Repl_Protocol *rp)
-/*
- */
void
-prot_free(Repl_Protocol **rpp, int wait_for_done)
+prot_free(Repl_Protocol **rpp)
{
Repl_Protocol *rp = NULL;
PRIntervalTime interval;
@@ -185,30 +182,6 @@ prot_free(Repl_Protocol **rpp, int wait_for_done)
if (rpp == NULL || *rpp == NULL) return;
rp = *rpp;
- /*
- * This function has to wait until prot_thread_main exits if
- * prot_start is successfully called and prot_thread_main is
- * running. Otherwise, we may free Repl_Protocol while it's
- * being used.
- *
- * This function is supposed to be called when the protocol is
- * stopped either after prot_stop is called or when protocol
- * hasn't been started.
- *
- * The latter case: prot_free is called with wait_for_done = 0.
- * The former case: prot_free is called with wait_for_done = 1.
- * prot_stop had set STATE_FINISHED to next_state and stopped
- * the current activity. But depending upon the threads'
- * scheduling, prot_thread_main may not have gotten out of the
- * while loop at this moment. To make sure prot_thread_main
- * finished referring Repl_Protocol, we wait for the state set
- * to STATE_DONE.
- */
- interval = PR_MillisecondsToInterval(1000);
- while (wait_for_done && STATE_DONE != rp->state)
- {
- DS_Sleep(interval);
- }
PR_Lock(rp->lock);
if (NULL != rp->prp_incremental)
@@ -247,7 +220,7 @@ prot_delete(Repl_Protocol **rpp)
if (NULL != rp)
{
prot_stop(rp);
- prot_free(rpp, 1);
+ prot_free(rpp);
}
}
@@ -319,11 +292,13 @@ prot_thread_main(void *arg)
{
Repl_Protocol *rp = (Repl_Protocol *)arg;
int done;
+ Repl_Agmt *agmt = NULL;
PR_ASSERT(NULL != rp);
- if (rp->agmt) {
- set_thread_private_agmtname (agmt_get_long_name(rp->agmt));
+ agmt = rp->agmt;
+ if (agmt) {
+ set_thread_private_agmtname (agmt_get_long_name(agmt));
}
done = 0;
@@ -355,7 +330,7 @@ prot_thread_main(void *arg)
dev_debug("prot_thread_main(STATE_PERFORMING_TOTAL_UPDATE): end");
/* update the agreement entry to notify clients that
replica initialization is completed. */
- agmt_replica_init_done (rp->agmt);
+ agmt_replica_init_done (agmt);
break;
case STATE_FINISHED:
@@ -363,9 +338,15 @@ prot_thread_main(void *arg)
done = 1;
break;
}
- rp->state = rp->next_state;
+ if (agmt_has_protocol(agmt))
+ {
+ rp->state = rp->next_state;
+ }
+ else
+ {
+ done = 1;
+ }
}
- rp->state = STATE_DONE;
}