From 0e53c2ae9d98fb7eef5d3d311b98fc63a437f894 Mon Sep 17 00:00:00 2001 From: Rich Megginson Date: Wed, 11 Mar 2009 13:41:13 +0000 Subject: Resolves: bug 488866 Bug Description: crash in reliab15 test Reviewed by: nkinder (Thanks!) Fix Description: My earlier fix was for the case where the result reader thread disconnects. But it looks like there is still a problem if the update sender thread disconnects out from under the reader thread. We need to use conn_connected() to test to see if the connection is connected before we attempt to access conn->ld in the result reader thread. I also improved the error messages so that I could tell if the errors were coming from the update sender thread or the result reader thread. Platforms tested: RHEL5 Flag Day: no Doc impact: no --- ldap/servers/plugins/replication/repl5_connection.c | 16 +++++++++++++++- ldap/servers/plugins/replication/repl5_inc_protocol.c | 8 ++++---- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/ldap/servers/plugins/replication/repl5_connection.c b/ldap/servers/plugins/replication/repl5_connection.c index 82bc76e1..37bec7ea 100644 --- a/ldap/servers/plugins/replication/repl5_connection.c +++ b/ldap/servers/plugins/replication/repl5_connection.c @@ -306,6 +306,11 @@ conn_read_result_ex(Repl_Connection *conn, char **retoidp, struct berval **retda while (1) { + if (!conn_connected(conn)) { + rc = -1; + return_value = CONN_NOT_CONNECTED; + break; + } rc = ldap_result(conn->ld, LDAP_RES_ANY , 1, &local_timeout, &res); if (0 != rc) { @@ -344,11 +349,20 @@ conn_read_result_ex(Repl_Connection *conn, char **retoidp, struct berval **retda conn->last_ldap_error = LDAP_TIMEOUT; return_value = CONN_TIMEOUT; } + else if ((-1 == rc) && (CONN_NOT_CONNECTED == return_value)) + { + /* must not access conn->ld if disconnected in another thread */ + /* the other thread that actually did the conn_disconnect() */ + /* will set the status and error info */ + slapi_log_error(SLAPI_LOG_REPL, repl_plugin_name, + "%s: Connection disconnected by another thread\n", + agmt_get_long_name(conn->agmt)); + } else if (-1 == rc) { /* Error */ char *s = NULL; - + rc = ldap_get_lderrno(conn->ld, NULL, &s); conn->last_ldap_errmsg = s; conn->last_ldap_error = rc; diff --git a/ldap/servers/plugins/replication/repl5_inc_protocol.c b/ldap/servers/plugins/replication/repl5_inc_protocol.c index ebf06669..4e733dec 100644 --- a/ldap/servers/plugins/replication/repl5_inc_protocol.c +++ b/ldap/servers/plugins/replication/repl5_inc_protocol.c @@ -1798,7 +1798,7 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu agmt_inc_last_update_changecount (prp->agmt, csn_get_replicaid(entry.op->csn), 1 /*skipped*/); } slapi_log_error(finished ? SLAPI_LOG_FATAL : slapi_log_urp, repl_plugin_name, - "%s: Consumer failed to replay change (uniqueid %s, CSN %s): %s. %s.\n", + "%s: Failed to send update operation to consumer (uniqueid %s, CSN %s): %s. %s.\n", agmt_get_long_name(prp->agmt), entry.op->target_address.uniqueid, csn_str, ldap_err2string(error), @@ -1811,7 +1811,7 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu return_value = UPDATE_CONNECTION_LOST; finished = 1; slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, - "%s: Consumer failed to replay change (uniqueid %s, CSN %s): " + "%s: Failed to send update operation to consumer (uniqueid %s, CSN %s): " "%s. Will retry later.\n", agmt_get_long_name(prp->agmt), entry.op->target_address.uniqueid, csn_str, @@ -1822,7 +1822,7 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu return_value = UPDATE_TIMEOUT; finished = 1; slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, - "%s: Consumer timed out to replay change (uniqueid %s, CSN %s): " + "%s: Timed out sending update operation to consumer (uniqueid %s, CSN %s): " "%s.\n", agmt_get_long_name(prp->agmt), entry.op->target_address.uniqueid, csn_str, @@ -1837,7 +1837,7 @@ send_updates(Private_Repl_Protocol *prp, RUV *remote_update_vector, PRUint32 *nu return_value = UPDATE_TRANSIENT_ERROR; finished = 1; slapi_log_error(SLAPI_LOG_FATAL, repl_plugin_name, - "%s: Failed to replay change (uniqueid %s, CSN %s): " + "%s: Failed to send update operation to consumer (uniqueid %s, CSN %s): " "Local error. Will retry later.\n", agmt_get_long_name(prp->agmt), entry.op->target_address.uniqueid, csn_str); -- cgit