From ce408229bc135aa89754861eaae58de379fd45c0 Mon Sep 17 00:00:00 2001 From: Mark Reynolds Date: Fri, 21 Sep 2012 12:38:32 -0400 Subject: [PATCH] Ticket 467 - CLEANALLRUV abort task should be able to ignore down replicas Bug Description: The abort task previously would wait for all the replicas to be online, and receive the extended op task. This made it impossible to abort the cleanallruv task, if you wanted to abort because a server was down. Fix Description: Changed the default behavior to ignore down replicas, and added a new attribute to the abort task entry: replica-certify-all: (yes,no). Default is "no". If set to "yes", then it will wait until all the replicas have received the abort task. Also fixed a crash caused by running the abort task agsint a server that does not have replication setup. The crashed was caused by freeing uninitialized pointers. https://fedorahosted.org/389/ticket/467 Reviewed by: richm(Thanks!) --- ldap/servers/plugins/replication/repl5.h | 1 + ldap/servers/plugins/replication/repl5_replica.c | 4 + .../plugins/replication/repl5_replica_config.c | 68 ++++++++++++++----- ldap/servers/plugins/replication/repl_extop.c | 5 ++ 4 files changed, 60 insertions(+), 18 deletions(-) diff --git a/ldap/servers/plugins/replication/repl5.h b/ldap/servers/plugins/replication/repl5.h index 7b6cb8c..61b51b0 100644 --- a/ldap/servers/plugins/replication/repl5.h +++ b/ldap/servers/plugins/replication/repl5.h @@ -642,6 +642,7 @@ typedef struct _cleanruv_data CSN *maxcsn; char *repl_root; Slapi_DN *sdn; + char *certify; } cleanruv_data; /* replutil.c */ diff --git a/ldap/servers/plugins/replication/repl5_replica.c b/ldap/servers/plugins/replication/repl5_replica.c index 9e0fb24..2b1d958 100644 --- a/ldap/servers/plugins/replication/repl5_replica.c +++ b/ldap/servers/plugins/replication/repl5_replica.c @@ -1914,6 +1914,7 @@ done: char *ridstr = NULL; char *repl_root; char *token = NULL; + char *certify = NULL; ReplicaId rid; int i; @@ -1935,6 +1936,7 @@ done: } repl_root = ldap_utf8strtok_r(iter, ":", &iter); + certify = ldap_utf8strtok_r(iter, ":", &iter); stop_ruv_cleaning(); maxcsn = replica_get_cleanruv_maxcsn(r, rid); delete_cleaned_rid(r, rid, maxcsn); @@ -1966,6 +1968,7 @@ done: data->payload = payload; data->repl_root = slapi_ch_strdup(repl_root); data->sdn = slapi_sdn_dup(r->repl_root); + data->certify = slapi_ch_strdup(certify); thread = PR_CreateThread(PR_USER_THREAD, replica_abort_task_thread, (void *)data, PR_PRIORITY_NORMAL, PR_GLOBAL_THREAD, @@ -1976,6 +1979,7 @@ done: slapi_sdn_free(&data->sdn); ber_bvfree(data->payload); slapi_ch_free_string(&data->repl_root); + slapi_ch_free_string(&data->certify); slapi_ch_free((void **)&data); } } diff --git a/ldap/servers/plugins/replication/repl5_replica_config.c b/ldap/servers/plugins/replication/repl5_replica_config.c index 38fce99..21f63d2 100644 --- a/ldap/servers/plugins/replication/repl5_replica_config.c +++ b/ldap/servers/plugins/replication/repl5_replica_config.c @@ -1262,7 +1262,8 @@ replica_cleanall_ruv_task(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter, PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Invalid replica id (%d) for task - (%s)", rid, slapi_sdn_get_dn(task_dn)); cleanruv_log(task, CLEANALLRUV_ID, "%s", returntext); - rc = LDAP_OPERATIONS_ERROR; + *returncode = LDAP_OPERATIONS_ERROR; + rc = SLAPI_DSE_CALLBACK_ERROR; goto out; } /* @@ -1270,7 +1271,9 @@ replica_cleanall_ruv_task(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter, */ dn = slapi_sdn_new_dn_byval(base_dn); if((r = replica_get_replica_from_dn(dn)) == NULL){ - *returncode = LDAP_OPERATIONS_ERROR ; + PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Could not find replica from dn(%s)",slapi_sdn_get_dn(dn)); + cleanruv_log(task, CLEANALLRUV_ID, "%s", returntext); + *returncode = LDAP_OPERATIONS_ERROR; rc = SLAPI_DSE_CALLBACK_ERROR; goto out; } @@ -1281,9 +1284,7 @@ replica_cleanall_ruv_task(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter, out: if(rc){ cleanruv_log(task, CLEANALLRUV_ID, "Task failed...(%d)", rc); - *returncode = rc; slapi_task_finish(task, *returncode); - rc = SLAPI_DSE_CALLBACK_ERROR; } else { rc = SLAPI_DSE_CALLBACK_OK; } @@ -2182,18 +2183,19 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter Replica *replica; ReplicaId rid; cleanruv_data *data = NULL; - Slapi_DN *sdn; + Slapi_DN *sdn = NULL; Object *r; - CSN *maxcsn; + CSN *maxcsn = NULL; const char *base_dn; const char *rid_str; - char *ridstr; + const char *certify_all; + char *ridstr = NULL; int rc = SLAPI_DSE_CALLBACK_OK; if(get_abort_cleanruv_task_count() >= CLEANRIDSIZ){ /* we are already running the maximum number of tasks */ - cleanruv_log(task, ABORT_CLEANALLRUV_ID, - "Exceeded maximum number of active ABORT CLEANALLRUV tasks(%d)",CLEANRIDSIZ); + PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Exceeded maximum number of active ABORT CLEANALLRUV tasks(%d)",CLEANRIDSIZ); + cleanruv_log(task, ABORT_CLEANALLRUV_ID, "%s", returntext); *returncode = LDAP_OPERATIONS_ERROR; return SLAPI_DSE_CALLBACK_ERROR; } @@ -2204,17 +2206,20 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter * Get our task settings */ if ((rid_str = fetch_attr(e, "replica-id", 0)) == NULL){ - cleanruv_log(task, ABORT_CLEANALLRUV_ID,"Missing required attr \"replica-id\""); + PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Missing required attr \"replica-id\""); + cleanruv_log(task, ABORT_CLEANALLRUV_ID, "%s", returntext); *returncode = LDAP_OBJECT_CLASS_VIOLATION; rc = SLAPI_DSE_CALLBACK_ERROR; goto out; } if ((base_dn = fetch_attr(e, "replica-base-dn", 0)) == NULL){ - cleanruv_log(task, ABORT_CLEANALLRUV_ID,"Missing required attr \"replica-base-dn\""); + PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Missing required attr \"replica-base-dn\""); + cleanruv_log(task, ABORT_CLEANALLRUV_ID, "%s", returntext); *returncode = LDAP_OBJECT_CLASS_VIOLATION; rc = SLAPI_DSE_CALLBACK_ERROR; goto out; } + certify_all = fetch_attr(e, "replica-certify-all", 0); /* * Check the rid */ @@ -2232,15 +2237,31 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter */ sdn = slapi_sdn_new_dn_byval(base_dn); if((r = replica_get_replica_from_dn(sdn)) == NULL){ - cleanruv_log(task, ABORT_CLEANALLRUV_ID,"Failed to find replica from dn(%s)", base_dn); + PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Failed to find replica from dn(%s)", base_dn); + cleanruv_log(task, ABORT_CLEANALLRUV_ID, "%s", returntext); *returncode = LDAP_OPERATIONS_ERROR; rc = SLAPI_DSE_CALLBACK_ERROR; goto out; } /* + * Check verify value + */ + if(certify_all){ + if(strcasecmp(certify_all,"yes") && strcasecmp(certify_all,"no")){ + PR_snprintf(returntext, SLAPI_DSE_RETURNTEXT_SIZE, "Invalid value for \"replica-certify-all\", the value " + "must be \"yes\" or \"no\"."); + cleanruv_log(task, ABORT_CLEANALLRUV_ID, "%s", returntext); + *returncode = LDAP_OPERATIONS_ERROR; + rc = SLAPI_DSE_CALLBACK_ERROR; + goto out; + } + } else { + certify_all = "no"; + } + /* * Create payload */ - ridstr = slapi_ch_smprintf("%d:%s", rid, base_dn); + ridstr = slapi_ch_smprintf("%d:%s:%s", rid, base_dn, certify_all); payload = create_ruv_payload(ridstr); if(payload == NULL){ @@ -2274,6 +2295,7 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter data->rid = rid; data->repl_root = slapi_ch_strdup(base_dn); data->sdn = NULL; + data->certify = slapi_ch_strdup(certify_all); thread = PR_CreateThread(PR_USER_THREAD, replica_abort_task_thread, (void *)data, PR_PRIORITY_NORMAL, PR_GLOBAL_THREAD, @@ -2286,7 +2308,6 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb, Slapi_Entry *e, Slapi_Entry *eAfter } out: - csn_free(&maxcsn); slapi_ch_free_string(&ridstr); slapi_sdn_free(&sdn); @@ -2321,7 +2342,7 @@ replica_abort_task_thread(void *arg) * to timing issues, we need to wait to grab the replica obj until we get here. */ if((data->repl_obj = replica_get_replica_from_dn(data->sdn)) == NULL){ - cleanruv_log(data->task, ABORT_CLEANALLRUV_ID, "Failed to get replica from dn (%s).", slapi_sdn_get_dn(data->sdn)); + cleanruv_log(data->task, ABORT_CLEANALLRUV_ID, "Failed to get replica object from dn (%s).", slapi_sdn_get_dn(data->sdn)); goto done; } if(data->replica == NULL && data->repl_obj){ @@ -2335,6 +2356,10 @@ replica_abort_task_thread(void *arg) */ while(agmt_not_notified && !slapi_is_shutting_down()){ agmt_obj = agmtlist_get_first_agreement_for_replica (data->replica); + if(agmt_obj == NULL){ + agmt_not_notified = 0; + break; + } while (agmt_obj){ agmt = (Repl_Agmt*)object_get_data (agmt_obj); if(!agmt_is_enabled(agmt) || get_agmt_agreement_type(agmt) == REPLICA_TYPE_WINDOWS){ @@ -2342,8 +2367,14 @@ replica_abort_task_thread(void *arg) continue; } if(replica_cleanallruv_send_abort_extop(agmt, data->task, data->payload)){ - agmt_not_notified = 1; - break; + if(strcasecmp(data->certify,"yes") == 0){ + /* we are verifying all the replicas receive the abort task */ + agmt_not_notified = 1; + break; + } else { + /* we do not care if we could not reach a replica, just continue as if we did */ + agmt_not_notified = 0; + } } else { /* success */ agmt_not_notified = 0; @@ -2373,7 +2404,7 @@ replica_abort_task_thread(void *arg) done: if(agmt_not_notified){ /* failure */ - cleanruv_log(data->task, ABORT_CLEANALLRUV_ID,"Abort task failed, will resume the task at the next server startup."); + cleanruv_log(data->task, ABORT_CLEANALLRUV_ID,"Abort task failed, will resume the task at the next server startup."); } else { /* * Clean up the config @@ -2390,6 +2421,7 @@ done: ber_bvfree(data->payload); } slapi_ch_free_string(&data->repl_root); + slapi_ch_free_string(&data->certify); slapi_sdn_free(&data->sdn); slapi_ch_free((void **)&data); } diff --git a/ldap/servers/plugins/replication/repl_extop.c b/ldap/servers/plugins/replication/repl_extop.c index 645e749..1b72dfb 100644 --- a/ldap/servers/plugins/replication/repl_extop.c +++ b/ldap/servers/plugins/replication/repl_extop.c @@ -1455,6 +1455,7 @@ multimaster_extop_abort_cleanruv(Slapi_PBlock *pb) char *extop_oid; char *repl_root; char *payload = NULL; + char *certify_all; char *iter; int rc = 0; @@ -1475,6 +1476,7 @@ multimaster_extop_abort_cleanruv(Slapi_PBlock *pb) } rid = atoi(ldap_utf8strtok_r(payload, ":", &iter)); repl_root = ldap_utf8strtok_r(iter, ":", &iter); + certify_all = ldap_utf8strtok_r(iter, ":", &iter); if(!is_cleaned_rid(rid) || is_task_aborted(rid)){ /* This replica has already been aborted, or was never cleaned, or already finished cleaning */ @@ -1521,6 +1523,7 @@ multimaster_extop_abort_cleanruv(Slapi_PBlock *pb) data->payload = slapi_ch_bvdup(extop_payload); data->rid = rid; data->repl_root = slapi_ch_strdup(repl_root); + data->certify = slapi_ch_strdup(certify_all); /* * Stop the cleaning, and delete the rid */ @@ -1541,6 +1544,8 @@ multimaster_extop_abort_cleanruv(Slapi_PBlock *pb) } slapi_log_error( SLAPI_LOG_REPL, repl_plugin_name, "Abort cleanAllRUV task: unable to create abort " "thread. Aborting task.\n"); + slapi_ch_free_string(&data->repl_root); + slapi_ch_free_string(&data->certify); rc = LDAP_OPERATIONS_ERROR; } -- 1.7.1