From dc13ffb1e0dc00f8e46a793e205c4da13e3678ce Mon Sep 17 00:00:00 2001 From: Thierry Bordaz Date: Fri, 1 Dec 2017 16:23:11 +0100 Subject: [PATCH] Ticket 49463: After cleanALLruv, there is a flow of keep alive DEL Bug Description: When cleanAllRuv is launched, it spawn cleanAllRuv on all replicas. Each replica will clean its changelog and database RUV AND in addition will DEL the keep alive entry of the target ReplicaID. So for the same entry (keep alive) there will be as many DEL as there are replicas This flow of DEL is useless as only one DEL is enough. In addition because of https://pagure.io/389-ds-base/issue/49466, replication may loop on each of those DELs. Fix Description: The fix is only to prevent the flow of DEL. It adds a flag ('original_task') in the task payload. The server receiving the task (replica_execute_cleanall_ruv_task) flags the task payload as 'original_task'. In the opposite, the propagated cleanAllRuv (multimaster_extop_cleanruv) does not flag the task payload as 'original_task' Only original task does the DEL of the keep alive entry. https://pagure.io/389-ds-base/issue/49466 Reviewed by: ? Platforms tested: F23 Flag Day: no Doc impact: no --- ldap/servers/plugins/replication/repl5.h | 1 + ldap/servers/plugins/replication/repl5_replica.c | 14 ++++++++++++++ ldap/servers/plugins/replication/repl5_replica_config.c | 14 +++++++++++++- ldap/servers/plugins/replication/repl_extop.c | 2 ++ 4 files changed, 30 insertions(+), 1 deletion(-) diff --git a/ldap/servers/plugins/replication/repl5.h b/ldap/servers/plugins/replication/repl5.h index 4e206a0..8c51a67 100644 --- a/ldap/servers/plugins/replication/repl5.h +++ b/ldap/servers/plugins/replication/repl5.h @@ -822,6 +822,7 @@ typedef struct _cleanruv_data Slapi_DN *sdn; char *certify; char *force; + PRBool original_task; } cleanruv_data; typedef struct _cleanruv_purge_data diff --git a/ldap/servers/plugins/replication/repl5_replica.c b/ldap/servers/plugins/replication/repl5_replica.c index db3a8a0..3e578c3 100644 --- a/ldap/servers/plugins/replication/repl5_replica.c +++ b/ldap/servers/plugins/replication/repl5_replica.c @@ -2190,6 +2190,14 @@ replica_check_for_tasks(Replica *r, Slapi_Entry *e) data->force = slapi_ch_strdup(forcing); data->repl_root = NULL; + /* This is a corner case, a cleanAllRuv task was interrupted by a shutdown or a crash + * Let's assum this replica was the original receiver of the task. + * It will delete the keep alive entry (and the DEL will be replicated) + * As it is a corner case, if it is not the original receiver of the task, + * it will just add one more DEL + */ + data->original_task = PR_TRUE; + thread = PR_CreateThread(PR_USER_THREAD, replica_cleanallruv_thread_ext, (void *)data, PR_PRIORITY_NORMAL, PR_GLOBAL_THREAD, PR_UNJOINABLE_THREAD, SLAPD_DEFAULT_THREAD_STACKSIZE); @@ -2284,6 +2292,12 @@ replica_check_for_tasks(Replica *r, Slapi_Entry *e) data->sdn = slapi_sdn_dup(r->repl_root); data->certify = slapi_ch_strdup(certify); + /* This is a corner case, a cleanAllRuv task was interrupted by a shutdown or a crash + * Let's assum this replica was the original receiver of the task. + * This flag has no impact on Abort cleanAllRuv + */ + data->original_task = PR_TRUE; + thread = PR_CreateThread(PR_USER_THREAD, replica_abort_task_thread, (void *)data, PR_PRIORITY_NORMAL, PR_GLOBAL_THREAD, PR_UNJOINABLE_THREAD, SLAPD_DEFAULT_THREAD_STACKSIZE); diff --git a/ldap/servers/plugins/replication/repl5_replica_config.c b/ldap/servers/plugins/replication/repl5_replica_config.c index e025f34..ba9ff30 100644 --- a/ldap/servers/plugins/replication/repl5_replica_config.c +++ b/ldap/servers/plugins/replication/repl5_replica_config.c @@ -1573,6 +1573,11 @@ replica_execute_cleanall_ruv_task(Object *r, ReplicaId rid, Slapi_Task *task, co data->repl_root = slapi_ch_strdup(basedn); data->force = slapi_ch_strdup(force_cleaning); + /* It is either a consequence of a direct ADD cleanAllRuv task + * or modify of the replica to add nsds5task: cleanAllRuv + */ + data->original_task = PR_TRUE; + thread = PR_CreateThread(PR_USER_THREAD, replica_cleanallruv_thread, (void *)data, PR_PRIORITY_NORMAL, PR_GLOBAL_THREAD, PR_UNJOINABLE_THREAD, SLAPD_DEFAULT_THREAD_STACKSIZE); @@ -1872,7 +1877,13 @@ done: */ delete_cleaned_rid_config(data); check_replicas_are_done_cleaning(data); - remove_keep_alive_entry(data->task, data->rid, data->repl_root); + if (data->original_task) { + cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, SLAPI_LOG_INFO, "Original task deletes Keep alive entry (%d).", data->rid); + remove_keep_alive_entry(data->task, data->rid, data->repl_root); + } else { + cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, SLAPI_LOG_INFO, "Propagated task does not delete Keep alive entry (%d).", data->rid); + } + clean_agmts(data); remove_cleaned_rid(data->rid); cleanruv_log(data->task, data->rid, CLEANALLRUV_ID, SLAPI_LOG_INFO, "Successfully cleaned rid(%d).", data->rid); @@ -2955,6 +2966,7 @@ replica_cleanall_ruv_abort(Slapi_PBlock *pb __attribute__((unused)), data->repl_root = slapi_ch_strdup(base_dn); data->sdn = NULL; data->certify = slapi_ch_strdup(certify_all); + data->original_task = PR_TRUE; thread = PR_CreateThread(PR_USER_THREAD, replica_abort_task_thread, (void *)data, PR_PRIORITY_NORMAL, PR_GLOBAL_THREAD, diff --git a/ldap/servers/plugins/replication/repl_extop.c b/ldap/servers/plugins/replication/repl_extop.c index c49c6bd..68e2544 100644 --- a/ldap/servers/plugins/replication/repl_extop.c +++ b/ldap/servers/plugins/replication/repl_extop.c @@ -1412,6 +1412,7 @@ multimaster_extop_abort_cleanruv(Slapi_PBlock *pb) data->rid = rid; data->repl_root = slapi_ch_strdup(repl_root); data->certify = slapi_ch_strdup(certify_all); + data->original_task = PR_FALSE; /* * Set the aborted rid and stop the cleaning */ @@ -1555,6 +1556,7 @@ multimaster_extop_cleanruv(Slapi_PBlock *pb) data->payload = slapi_ch_bvdup(extop_payload); data->force = slapi_ch_strdup(force); data->repl_root = slapi_ch_strdup(repl_root); + data->original_task = PR_FALSE; thread = PR_CreateThread(PR_USER_THREAD, replica_cleanallruv_thread_ext, (void *)data, PR_PRIORITY_NORMAL, PR_GLOBAL_THREAD, -- 2.5.5