summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRich Megginson <rmeggins@redhat.com>2009-03-11 13:41:13 +0000
committerRich Megginson <rmeggins@redhat.com>2009-03-11 13:41:13 +0000
commit0e53c2ae9d98fb7eef5d3d311b98fc63a437f894 (patch)
tree6b3847cdd09738d051e5746ed7519548dc47d82b
parent2e2a3437d25de630eb298bf60f9375f9c8d4497a (diff)
downloadds-0e53c2ae9d98fb7eef5d3d311b98fc63a437f894.tar.gz
ds-0e53c2ae9d98fb7eef5d3d311b98fc63a437f894.tar.xz
ds-0e53c2ae9d98fb7eef5d3d311b98fc63a437f894.zip
Resolves: bug 488866
Bug Description: crash in reliab15 test Reviewed by: nkinder (Thanks!) Fix Description: My earlier fix was for the case where the result reader thread disconnects. But it looks like there is still a problem if the update sender thread disconnects out from under the reader thread. We need to use conn_connected() to test to see if the connection is connected before we attempt to access conn->ld in the result reader thread. I also improved the error messages so that I could tell if the errors were coming from the update sender thread or the result reader thread. Platforms tested: RHEL5 Flag Day: no Doc impact: no
-rw-r--r--ldap/servers/plugins/replication/repl5_connection.c16
-rw-r--r--ldap/servers/plugins/replication/repl5_inc_protocol.c8
2 files changed, 19 insertions, 5 deletions
diff --git a/ldap/servers/plugins/replication/repl5_connection.c b/ldap/servers/plugins/replication/repl5_connection.c
index 82bc76e1..37bec7ea 100644
--- a/ldap/servers/plugins/replication/repl5_connection.c
+++ b/ldap/servers/plugins/replication/repl5_connection.c
@@ -306,6 +306,11 @@ conn_read_result_ex(Repl_Connection *conn, char **retoidp, struct berval **retda
while (1)
{
+ if (!conn_connected(conn)) {
+ rc = -1;
+ return_value = CONN_NOT_CONNECTED;
+ break;
+ }
rc = ldap_result(conn->ld, LDAP_RES_ANY , 1, &local_timeout, &res);
if (0 != rc)
{
@@ -344,11 +349,20 @@ conn_read_result_ex(Repl_Connection *conn, char **retoidp, struct berval **retda
conn->last_ldap_error = LDAP_TIMEOUT;
return_value = CONN_TIMEOUT;
}
+ else if ((-1 == rc) && (CONN_NOT_CONNECTED == return_value))
+ {
+ /* must not access conn->ld if disconnected in another thread */
+ /* the other thread that actually did the conn_disconnect() */
+ /* will set the status and error info */
+ slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name,
+ "%s: Connection disconnected by another thread\n",
+ agmt_get_long_name(conn->agmt));
+ }
else if (-1 == rc)
{
/* Error */
char *s = NULL;
-
+
rc = ldap_get_lderrno(conn->ld, NULL, &s);
conn->last_ldap_errmsg = s;
conn->last_ldap_error = rc;
diff --git a/ldap/servers/plugins/replication/repl5_inc_protocol.c b/ldap/servers/plugins/replication/repl5_inc_protocol.c
index ebf06669..4e733dec 100644
--- a/ldap/servers/plugins/replication/repl5_inc_protocol.c
+++ b/ldap/servers/plugins/replication/repl5_inc_protocol.c
@@ -1798,7 +1798,7 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu
agmt_inc_last_update_changecount (prp->agmt, csn_get_replicaid(entry.op->csn), 1 /*skipped*/);
}
slapi_log_error(finished ? SLAPI_LOG_FATAL : slapi_log_urp, repl_plugin_name,
- "%s: Consumer failed to replay change (uniqueid %s, CSN %s): %s. %s.\n",
+ "%s: Failed to send update operation to consumer (uniqueid %s, CSN %s): %s. %s.\n",
agmt_get_long_name(prp->agmt),
entry.op->target_address.uniqueid, csn_str,
ldap_err2string(error),
@@ -1811,7 +1811,7 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu
return_value = UPDATE_CONNECTION_LOST;
finished = 1;
slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name,
- "%s: Consumer failed to replay change (uniqueid %s, CSN %s): "
+ "%s: Failed to send update operation to consumer (uniqueid %s, CSN %s): "
"%s. Will retry later.\n",
agmt_get_long_name(prp->agmt),
entry.op->target_address.uniqueid, csn_str,
@@ -1822,7 +1822,7 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu
return_value = UPDATE_TIMEOUT;
finished = 1;
slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name,
- "%s: Consumer timed out to replay change (uniqueid %s, CSN %s): "
+ "%s: Timed out sending update operation to consumer (uniqueid %s, CSN %s): "
"%s.\n",
agmt_get_long_name(prp->agmt),
entry.op->target_address.uniqueid, csn_str,
@@ -1837,7 +1837,7 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu
return_value = UPDATE_TRANSIENT_ERROR;
finished = 1;
slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name,
- "%s: Failed to replay change (uniqueid %s, CSN %s): "
+ "%s: Failed to send update operation to consumer (uniqueid %s, CSN %s): "
"Local error. Will retry later.\n",
agmt_get_long_name(prp->agmt),
entry.op->target_address.uniqueid, csn_str);