From faacd5ca7946d23deeff604c82a5e9e707a37ac2 Mon Sep 17 00:00:00 2001 From: Michael Adam Date: Thu, 3 Dec 2009 17:59:49 +0100 Subject: server: add a new control CTDB_CONTROL_TRANS3_COMMIT This is a simplified version of the trans2 commit control: It just rolls out the marshall buffer to all active nodes. It is the main ctdbd part of the re-implementation of the persistent transactions. The client code is changed to take a global lock to start a transactions and store into the marshal buffer instead of writing to the local tdb under a local transaction. The old transaction implementation is going to be removed in a later commit. Michael (This used to be ctdb commit f66428f9d2013080a414404c1ba6117888352fd6) --- ctdb/include/ctdb_private.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'ctdb/include') diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 142bbd5c71..0c611c287d 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -625,6 +625,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0, CTDB_CONTROL_TRANS2_ACTIVE = 116, CTDB_CONTROL_GET_LOG = 117, CTDB_CONTROL_CLEAR_LOG = 118, + CTDB_CONTROL_TRANS3_COMMIT = 119, }; /* @@ -1426,6 +1427,10 @@ int32_t ctdb_control_trans2_commit(struct ctdb_context *ctdb, struct ctdb_req_control *c, TDB_DATA recdata, bool *async_reply); +int32_t ctdb_control_trans3_commit(struct ctdb_context *ctdb, + struct ctdb_req_control *c, + TDB_DATA recdata, bool *async_reply); + int32_t ctdb_control_transaction_start(struct ctdb_context *ctdb, uint32_t id); int32_t ctdb_control_transaction_commit(struct ctdb_context *ctdb, uint32_t id); int32_t ctdb_control_transaction_cancel(struct ctdb_context *ctdb); -- cgit From 24d04a3e89da9c8fbaa7b908e280ace26f432289 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Tue, 8 Dec 2009 17:00:55 +0100 Subject: Rename a struct member for clarity (This used to be ctdb commit 6af5e74a21546d723008d69d6752ebebf898c947) --- ctdb/include/ctdb_private.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'ctdb/include') diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 0c611c287d..da0c7563e1 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -474,7 +474,7 @@ struct ctdb_db_context { struct tdb_wrap *ltdb; struct ctdb_registered_call *calls; /* list of registered calls */ uint32_t seqnum; - struct timed_event *te; + struct timed_event *seqnum_update; struct ctdb_traverse_local_handle *traverse; bool transaction_active; struct ctdb_vacuum_handle *vacuum_handle; -- cgit From 8dedde81cd7b3e8405932d3eb84d55b184275f54 Mon Sep 17 00:00:00 2001 From: Michael Adam Date: Fri, 11 Dec 2009 14:19:55 +0100 Subject: define CTDB_DB_SEQNUM_KEY - used with the new implementation of transactions. Michael (This used to be ctdb commit 4b1dbcf0853bdc4832d39a477823ae34f216da52) --- ctdb/include/ctdb.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'ctdb/include') diff --git a/ctdb/include/ctdb.h b/ctdb/include/ctdb.h index 0270925025..552726a25d 100644 --- a/ctdb/include/ctdb.h +++ b/ctdb/include/ctdb.h @@ -127,6 +127,9 @@ struct ctdb_call_info { /* the key used for transaction locking on persistent databases */ #define CTDB_TRANSACTION_LOCK_KEY "__transaction_lock__" +/* the key used to store persistent db sequence number */ +#define CTDB_DB_SEQNUM_KEY "__db_sequence_number__" + enum control_state {CTDB_CONTROL_WAIT, CTDB_CONTROL_DONE, CTDB_CONTROL_ERROR, CTDB_CONTROL_TIMEOUT}; struct ctdb_client_control_state { -- cgit From 46de365e78c937ebbb7b219b36773a0aab20e7df Mon Sep 17 00:00:00 2001 From: Michael Adam Date: Fri, 11 Dec 2009 15:31:02 +0100 Subject: Add a new control CTDB_GET_DB_SEQNUM - fetch a persistent db's sequence number. Michael (This used to be ctdb commit a7e3b5fac6b3f5d74473f26eb86c067b35647996) --- ctdb/include/ctdb_private.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'ctdb/include') diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index da0c7563e1..e4f4aba091 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -626,6 +626,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0, CTDB_CONTROL_GET_LOG = 117, CTDB_CONTROL_CLEAR_LOG = 118, CTDB_CONTROL_TRANS3_COMMIT = 119, + CTDB_CONTROL_GET_DB_SEQNUM = 120, }; /* @@ -1537,4 +1538,8 @@ struct ctdb_log_state *ctdb_fork_with_logging(TALLOC_CTX *mem_ctx, int32_t ctdb_control_process_exists(struct ctdb_context *ctdb, pid_t pid); struct ctdb_client *ctdb_find_client_by_pid(struct ctdb_context *ctdb, pid_t pid); +int32_t ctdb_control_get_db_seqnum(struct ctdb_context *ctdb, + TDB_DATA indata, + TDB_DATA *outdata); + #endif -- cgit From f6ea3e6bcfce636d41bebb5599aa6b948b9bb884 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Wed, 9 Dec 2009 15:11:45 +0100 Subject: Make fetch_locked more scalable This patch improves the handling of the fetch_lock operation on non-persistent databases that ctdb clients have to do very frequently. The normal flow how this goes is the following: 1. Client does a local fetch_lock on the database 2. Client looks if the local node is dmaster. If yes, everything is fine If no, continue here 3. Client unlocks the local record 4. Client issues a "get me the record" call to ctdbd 5. ctdbd goes out and fetches the dmaster role 6. ctdbd tells the client to retry 7. Client starts over again The problem is between step 6 and 7: Before the client has had the chance to retry (i.e. catch the record with a fetch_locked), another node might have come asking ctdbd to migrate away the record again. This is a real problem, I've seen >20 loops of this kind in real workloads. This patch does the following: Whenever ctdb receives a record as result of step 5, it puts the key on a "holdback list". As long as a key is on this list, a request to migrate away the dmaster is put on hold. It is the client's duty to issue the "CTDB_CONTROL_GOTIT" control when it has successfully done step 2 after having asked ctdb to fetch the record. This will release the key from the "holdback list" and re-issue all dmaster migration requests. As a safeguard against malicious clients, once a second (default 1000msecs, tunable "HoldbackCleanupInterval" in milliseconds) ctdbd goes over the list of held back keys, deletes them and releases all held back migration requests. (This used to be ctdb commit 5736e17c139c9a8049e235429aeae0c6c9d0e93d) --- ctdb/include/ctdb_private.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'ctdb/include') diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index e4f4aba091..041c0e3f89 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -95,6 +95,7 @@ struct ctdb_tunable { uint32_t traverse_timeout; uint32_t keepalive_interval; uint32_t keepalive_limit; + uint32_t holdback_cleanup_interval; uint32_t max_lacount; uint32_t recover_timeout; uint32_t recover_interval; @@ -397,6 +398,7 @@ struct ctdb_context { uint32_t recovery_mode; TALLOC_CTX *tickle_update_context; TALLOC_CTX *keepalive_ctx; + struct timed_event *holdback_cleanup_te; struct ctdb_tunable tunable; enum ctdb_freeze_mode freeze_mode[NUM_DB_PRIORITIES+1]; struct ctdb_freeze_handle *freeze_handles[NUM_DB_PRIORITIES+1]; @@ -478,6 +480,17 @@ struct ctdb_db_context { struct ctdb_traverse_local_handle *traverse; bool transaction_active; struct ctdb_vacuum_handle *vacuum_handle; + + /* + * The keys to hold back until CTDB_CONTROL_GOTIT is being + * sent by a client having forced a migration to us. + */ + uint8_t **holdback_keys; + + /* + * The CTDB_REQ_CALLs held back according to "holdback_keys" + */ + struct ctdb_req_header **held_back; }; @@ -627,6 +640,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0, CTDB_CONTROL_CLEAR_LOG = 118, CTDB_CONTROL_TRANS3_COMMIT = 119, CTDB_CONTROL_GET_DB_SEQNUM = 120, + CTDB_CONTROL_GOTIT = 121, }; /* @@ -1170,6 +1184,11 @@ struct ctdb_control_wipe_database { uint32_t transaction_id; }; +struct ctdb_control_gotit { + uint32_t db_id; + uint8_t key[1]; +}; + /* state of a in-progress ctdb call in client */ @@ -1238,6 +1257,10 @@ void ctdb_start_keepalive(struct ctdb_context *ctdb); void ctdb_stop_keepalive(struct ctdb_context *ctdb); int32_t ctdb_run_eventscripts(struct ctdb_context *ctdb, struct ctdb_req_control *c, TDB_DATA data, bool *async_reply); +void ctdb_start_holdback_cleanup(struct ctdb_context *ctdb); +void ctdb_stop_holdback_cleanup(struct ctdb_context *ctdb); +int32_t ctdb_control_gotit(struct ctdb_context *ctdb, TDB_DATA indata); + void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node); void ctdb_call_resend_all(struct ctdb_context *ctdb); -- cgit From 0982299beda2f966e1485213ffe1280fac8b7f2b Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 15 Dec 2009 14:26:28 +1100 Subject: Revert "Make fetch_locked more scalable" This reverts commit 5736e17c139c9a8049e235429aeae0c6c9d0e93d. (This used to be ctdb commit 3d2d877d877146ca09a28a3a44f4840eb36fd377) --- ctdb/include/ctdb_private.h | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'ctdb/include') diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index d49e05143d..b6c4b2fa1a 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -95,7 +95,6 @@ struct ctdb_tunable { uint32_t traverse_timeout; uint32_t keepalive_interval; uint32_t keepalive_limit; - uint32_t holdback_cleanup_interval; uint32_t max_lacount; uint32_t recover_timeout; uint32_t recover_interval; @@ -396,7 +395,6 @@ struct ctdb_context { uint32_t recovery_mode; TALLOC_CTX *tickle_update_context; TALLOC_CTX *keepalive_ctx; - struct timed_event *holdback_cleanup_te; struct ctdb_tunable tunable; enum ctdb_freeze_mode freeze_mode[NUM_DB_PRIORITIES+1]; struct ctdb_freeze_handle *freeze_handles[NUM_DB_PRIORITIES+1]; @@ -478,17 +476,6 @@ struct ctdb_db_context { struct ctdb_traverse_local_handle *traverse; bool transaction_active; struct ctdb_vacuum_handle *vacuum_handle; - - /* - * The keys to hold back until CTDB_CONTROL_GOTIT is being - * sent by a client having forced a migration to us. - */ - uint8_t **holdback_keys; - - /* - * The CTDB_REQ_CALLs held back according to "holdback_keys" - */ - struct ctdb_req_header **held_back; }; @@ -638,7 +625,6 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0, CTDB_CONTROL_CLEAR_LOG = 118, CTDB_CONTROL_TRANS3_COMMIT = 119, CTDB_CONTROL_GET_DB_SEQNUM = 120, - CTDB_CONTROL_GOTIT = 121, }; /* @@ -1182,11 +1168,6 @@ struct ctdb_control_wipe_database { uint32_t transaction_id; }; -struct ctdb_control_gotit { - uint32_t db_id; - uint8_t key[1]; -}; - /* state of a in-progress ctdb call in client */ @@ -1255,10 +1236,6 @@ void ctdb_start_keepalive(struct ctdb_context *ctdb); void ctdb_stop_keepalive(struct ctdb_context *ctdb); int32_t ctdb_run_eventscripts(struct ctdb_context *ctdb, struct ctdb_req_control *c, TDB_DATA data, bool *async_reply); -void ctdb_start_holdback_cleanup(struct ctdb_context *ctdb); -void ctdb_stop_holdback_cleanup(struct ctdb_context *ctdb); -int32_t ctdb_control_gotit(struct ctdb_context *ctdb, TDB_DATA indata); - void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node); void ctdb_call_resend_all(struct ctdb_context *ctdb); -- cgit