From df5dd43e7c6a2c1cb71bf7ca1566c3a29a2fc1e3 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 5 Nov 2007 13:36:11 +1100 Subject: add a new tunable "CheckNodesFile" that when set to 0 will disable the check in the recovery daemon that all nodes are using the same /etc/ctdb/nodes file. Also add some more missing checks that the pnn used is a valid pnn before using it to dereferencing the ctdb->nodes array This is useful since it allows us to add more physical nodes to a an existing cluster without having to bring down the entire cluster. The to add an additional node to an existing cluster would then be 1, on all nodes set CheckNodesFile=0 using 'ctdb setvar' 2, on all nodes add CTDB_SET_CheckNodesFile=0 to /etc/sysconfig/ctdb For each each node, one at a time : 3, use 'ctdb disable' to stop the hosted services 4, service ctdb stop 5, service ctdb start Once all nodes have been restarted 6, on all nodes remove CTDB_SET_CheckNodesFile=0 from /etc/sysconfig/ctdb 7, on all nodes set CheckNodesFile=0 using 'ctdb setvar' 8, configure and start up the new node During this procedure, only one node at a time was brought down/restarted and was so only for a short period. (This used to be ctdb commit 462501a32143e943ce350bd904a47c0955414a51) --- ctdb/include/ctdb_private.h | 1 + ctdb/server/ctdb_persistent.c | 13 ++++++++++++- ctdb/server/ctdb_recoverd.c | 26 +++++++++++++++++++++----- ctdb/server/ctdb_server.c | 12 +++++++++++- ctdb/server/ctdb_takeover.c | 6 ++++++ ctdb/server/ctdb_tunables.c | 1 + 6 files changed, 52 insertions(+), 7 deletions(-) (limited to 'ctdb') diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 7b98683e9f..9df80b64af 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -88,6 +88,7 @@ struct ctdb_tunable { uint32_t database_hash_size; uint32_t rerecovery_timeout; uint32_t enable_bans; + uint32_t check_nodes_file; uint32_t deterministic_public_ips; }; diff --git a/ctdb/server/ctdb_persistent.c b/ctdb/server/ctdb_persistent.c index 16e2806546..3f941de606 100644 --- a/ctdb/server/ctdb_persistent.c +++ b/ctdb/server/ctdb_persistent.c @@ -91,9 +91,20 @@ int32_t ctdb_control_persistent_store(struct ctdb_context *ctdb, state->c = c; for (i=0;ivnn_map->size;i++) { - struct ctdb_node *node = ctdb->nodes[ctdb->vnn_map->map[i]]; + uint32_t nodeid; + struct ctdb_node *node; int ret; + + nodeid = ctdb->vnn_map->map[i]; + if ( !ctdb_validate_pnn(ctdb, nodeid) ) { + DEBUG(0, ("Cant update persistent record to non-existent vnn node:%u.\n", nodeid)); + continue; + } + + + node = ctdb->nodes[nodeid]; + /* only send to active nodes */ if (node->flags & NODE_FLAGS_INACTIVE) { continue; diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 9c5fdda954..cd75bb82b8 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -1786,6 +1786,8 @@ again: they are the same as for this node */ for (j=0; jnum; j++) { + uint32_t node_count; + if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { continue; } @@ -1801,20 +1803,34 @@ again: goto again; } - /* if the nodes disagree on how many nodes there are - then this is a good reason to try recovery + /* If the nodes disagree on how many nodes there are + then this is a good reason to try recovery. + This check might be disabled, for example when adding nodes + to an existing cluster. */ if (remote_nodemap->num != nodemap->num) { DEBUG(0, (__location__ " Remote node:%u has different node count. %u vs %u of the local node\n", nodemap->nodes[j].pnn, remote_nodemap->num, nodemap->num)); - do_recovery(rec, mem_ctx, pnn, num_active, nodemap, vnnmap, nodemap->nodes[j].pnn); - goto again; + + /* If we have disabled checking the nodes file we just + print a message and continue. + */ + if (ctdb->tunable.check_nodes_file == 0) { + DEBUG(0, ("Node file checking is disabled. Ignoring that node-count is inconsistent\n")); + } else { + do_recovery(rec, mem_ctx, pnn, num_active, nodemap, vnnmap, nodemap->nodes[j].pnn); + goto again; + } } /* if the nodes disagree on which nodes exist and are active, then that is also a good reason to do recovery */ - for (i=0;inum;i++) { + node_count = nodemap->num; + if (remote_nodemap->num < node_count) { + node_count = remote_nodemap->num; + } + for (i=0;inodes[i].pnn != nodemap->nodes[i].pnn) { DEBUG(0, (__location__ " Remote node:%u has different nodemap pnn for %d (%u vs %u).\n", nodemap->nodes[j].pnn, i, diff --git a/ctdb/server/ctdb_server.c b/ctdb/server/ctdb_server.c index dddf90753b..497ae1f695 100644 --- a/ctdb/server/ctdb_server.c +++ b/ctdb/server/ctdb_server.c @@ -204,7 +204,17 @@ uint32_t ctdb_get_num_active_nodes(struct ctdb_context *ctdb) int i; uint32_t count=0; for (i=0;ivnn_map->size;i++) { - struct ctdb_node *node = ctdb->nodes[ctdb->vnn_map->map[i]]; + uint32_t nodeid; + struct ctdb_node *node; + + nodeid = ctdb->vnn_map->map[i]; + if ( !ctdb_validate_pnn(ctdb, nodeid) ) { + DEBUG(0, ("Ignoring non-existent vnn node:%u when counting number of active nodes.\n", nodeid)); + continue; + } + + + node = ctdb->nodes[nodeid]; if (!(node->flags & NODE_FLAGS_INACTIVE)) { count++; } diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index ec3455e4c0..324ab70370 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -694,6 +694,12 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap) if (tmp_ip->pnn == -1) { continue; } + + if (!ctdb_validate_pnn(ctdb, tmp_ip->pnn)) { + tmp_ip->pnn = -1; + continue; + } + if (nodemap->nodes[tmp_ip->pnn].flags & mask) { tmp_ip->pnn = -1; } diff --git a/ctdb/server/ctdb_tunables.c b/ctdb/server/ctdb_tunables.c index da1f6e6ba7..f738ae3260 100644 --- a/ctdb/server/ctdb_tunables.c +++ b/ctdb/server/ctdb_tunables.c @@ -44,6 +44,7 @@ static const struct { { "DatabaseHashSize", 10000, offsetof(struct ctdb_tunable, database_hash_size) }, { "RerecoveryTimeout", 10, offsetof(struct ctdb_tunable, rerecovery_timeout) }, { "EnableBans", 1, offsetof(struct ctdb_tunable, enable_bans) }, + { "CheckNodesFile", 1, offsetof(struct ctdb_tunable, check_nodes_file) }, { "DeterministicIPs", 0, offsetof(struct ctdb_tunable, deterministic_public_ips) }, }; -- cgit From 3c1f9882a804c7e78704f41b6e2765a1fe938fa0 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 12 Nov 2007 10:23:35 +1100 Subject: revert 773 (This used to be ctdb commit 5a1c8f458ddc9b0ff532afda6007e32db10a71c8) --- ctdb/include/ctdb_private.h | 1 - ctdb/server/ctdb_persistent.c | 13 +------------ ctdb/server/ctdb_recoverd.c | 26 +++++--------------------- ctdb/server/ctdb_server.c | 12 +----------- ctdb/server/ctdb_takeover.c | 6 ------ ctdb/server/ctdb_tunables.c | 1 - 6 files changed, 7 insertions(+), 52 deletions(-) (limited to 'ctdb') diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 9df80b64af..7b98683e9f 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -88,7 +88,6 @@ struct ctdb_tunable { uint32_t database_hash_size; uint32_t rerecovery_timeout; uint32_t enable_bans; - uint32_t check_nodes_file; uint32_t deterministic_public_ips; }; diff --git a/ctdb/server/ctdb_persistent.c b/ctdb/server/ctdb_persistent.c index 3f941de606..16e2806546 100644 --- a/ctdb/server/ctdb_persistent.c +++ b/ctdb/server/ctdb_persistent.c @@ -91,20 +91,9 @@ int32_t ctdb_control_persistent_store(struct ctdb_context *ctdb, state->c = c; for (i=0;ivnn_map->size;i++) { - uint32_t nodeid; - struct ctdb_node *node; + struct ctdb_node *node = ctdb->nodes[ctdb->vnn_map->map[i]]; int ret; - - nodeid = ctdb->vnn_map->map[i]; - if ( !ctdb_validate_pnn(ctdb, nodeid) ) { - DEBUG(0, ("Cant update persistent record to non-existent vnn node:%u.\n", nodeid)); - continue; - } - - - node = ctdb->nodes[nodeid]; - /* only send to active nodes */ if (node->flags & NODE_FLAGS_INACTIVE) { continue; diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index cd75bb82b8..9c5fdda954 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -1786,8 +1786,6 @@ again: they are the same as for this node */ for (j=0; jnum; j++) { - uint32_t node_count; - if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { continue; } @@ -1803,34 +1801,20 @@ again: goto again; } - /* If the nodes disagree on how many nodes there are - then this is a good reason to try recovery. - This check might be disabled, for example when adding nodes - to an existing cluster. + /* if the nodes disagree on how many nodes there are + then this is a good reason to try recovery */ if (remote_nodemap->num != nodemap->num) { DEBUG(0, (__location__ " Remote node:%u has different node count. %u vs %u of the local node\n", nodemap->nodes[j].pnn, remote_nodemap->num, nodemap->num)); - - /* If we have disabled checking the nodes file we just - print a message and continue. - */ - if (ctdb->tunable.check_nodes_file == 0) { - DEBUG(0, ("Node file checking is disabled. Ignoring that node-count is inconsistent\n")); - } else { - do_recovery(rec, mem_ctx, pnn, num_active, nodemap, vnnmap, nodemap->nodes[j].pnn); - goto again; - } + do_recovery(rec, mem_ctx, pnn, num_active, nodemap, vnnmap, nodemap->nodes[j].pnn); + goto again; } /* if the nodes disagree on which nodes exist and are active, then that is also a good reason to do recovery */ - node_count = nodemap->num; - if (remote_nodemap->num < node_count) { - node_count = remote_nodemap->num; - } - for (i=0;inum;i++) { if (remote_nodemap->nodes[i].pnn != nodemap->nodes[i].pnn) { DEBUG(0, (__location__ " Remote node:%u has different nodemap pnn for %d (%u vs %u).\n", nodemap->nodes[j].pnn, i, diff --git a/ctdb/server/ctdb_server.c b/ctdb/server/ctdb_server.c index 497ae1f695..dddf90753b 100644 --- a/ctdb/server/ctdb_server.c +++ b/ctdb/server/ctdb_server.c @@ -204,17 +204,7 @@ uint32_t ctdb_get_num_active_nodes(struct ctdb_context *ctdb) int i; uint32_t count=0; for (i=0;ivnn_map->size;i++) { - uint32_t nodeid; - struct ctdb_node *node; - - nodeid = ctdb->vnn_map->map[i]; - if ( !ctdb_validate_pnn(ctdb, nodeid) ) { - DEBUG(0, ("Ignoring non-existent vnn node:%u when counting number of active nodes.\n", nodeid)); - continue; - } - - - node = ctdb->nodes[nodeid]; + struct ctdb_node *node = ctdb->nodes[ctdb->vnn_map->map[i]]; if (!(node->flags & NODE_FLAGS_INACTIVE)) { count++; } diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index 324ab70370..ec3455e4c0 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -694,12 +694,6 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap) if (tmp_ip->pnn == -1) { continue; } - - if (!ctdb_validate_pnn(ctdb, tmp_ip->pnn)) { - tmp_ip->pnn = -1; - continue; - } - if (nodemap->nodes[tmp_ip->pnn].flags & mask) { tmp_ip->pnn = -1; } diff --git a/ctdb/server/ctdb_tunables.c b/ctdb/server/ctdb_tunables.c index f738ae3260..da1f6e6ba7 100644 --- a/ctdb/server/ctdb_tunables.c +++ b/ctdb/server/ctdb_tunables.c @@ -44,7 +44,6 @@ static const struct { { "DatabaseHashSize", 10000, offsetof(struct ctdb_tunable, database_hash_size) }, { "RerecoveryTimeout", 10, offsetof(struct ctdb_tunable, rerecovery_timeout) }, { "EnableBans", 1, offsetof(struct ctdb_tunable, enable_bans) }, - { "CheckNodesFile", 1, offsetof(struct ctdb_tunable, check_nodes_file) }, { "DeterministicIPs", 0, offsetof(struct ctdb_tunable, deterministic_public_ips) }, }; -- cgit From dfa6829621adddb1fdcca1d3e067ec7fee5e473c Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 14 Nov 2007 16:17:52 +1100 Subject: add CTDB_MANAGES_WINBIND to /etc/sysconfig/ctdb to allow ctdb to be used in environments where samba is used without winbind (This used to be ctdb commit 1ae5af14f90fd81a20b14c02c0c5ad355a609134) --- ctdb/config/ctdb.sysconfig | 3 +++ ctdb/config/events.d/50.samba | 26 ++++++++++++++++++-------- ctdb/web/samba.html | 17 ++++++++++++++++- 3 files changed, 37 insertions(+), 9 deletions(-) (limited to 'ctdb') diff --git a/ctdb/config/ctdb.sysconfig b/ctdb/config/ctdb.sysconfig index d290321b99..bbb6f270b1 100644 --- a/ctdb/config/ctdb.sysconfig +++ b/ctdb/config/ctdb.sysconfig @@ -33,6 +33,9 @@ # default is to not manage Samba # CTDB_MANAGES_SAMBA=yes +# should ctdb manage starting/stopping Winbind service? +# CTDB_MANAGES_WINBIND=yes + # you may wish to raise the file descriptor limit for ctdb # use a ulimit command here. ctdb needs one file descriptor per # connected client (ie. one per connected client in Samba) diff --git a/ctdb/config/events.d/50.samba b/ctdb/config/events.d/50.samba index 5f9fa4fafa..88d8e4e438 100755 --- a/ctdb/config/events.d/50.samba +++ b/ctdb/config/events.d/50.samba @@ -32,17 +32,21 @@ case $cmd in # make sure samba is not already started service smb stop > /dev/null 2>&1 - service winbind stop > /dev/null 2>&1 - killall -0 -q smbd winbindd && { - sleep 1 - # make absolutely sure samba is dead - killall -q -9 smbd winbindd + + # restart the winbind service + [ "$CTDB_MANAGES_WINBIND" = "yes" ] && { + service winbind stop > /dev/null 2>&1 + killall -0 -q smbd winbindd && { + sleep 1 + # make absolutely sure samba is dead + killall -q -9 smbd winbindd + } + service winbind start } # start Samba service. Start it reniced, as under very heavy load # the number of smbd processes will mean that it leaves few cycles for # anything else - service winbind start nice service smb start ;; @@ -62,7 +66,11 @@ case $cmd in shutdown) # shutdown Samba when ctdb goes down service smb stop - service winbind stop + + # stop the winbind service + [ "$CTDB_MANAGES_WINBIND" = "yes" ] && { + service winbind stop + } ;; monitor) @@ -89,7 +97,9 @@ case $cmd in ctdb_check_tcp_ports "Samba" $smb_ports # check winbind is OK - ctdb_check_command "winbind" "wbinfo -p" + [ "$CTDB_MANAGES_WINBIND" = "yes" ] && { + ctdb_check_command "winbind" "wbinfo -p" + } ;; esac diff --git a/ctdb/web/samba.html b/ctdb/web/samba.html index 0b5be7f6bc..08fc683ab8 100644 --- a/ctdb/web/samba.html +++ b/ctdb/web/samba.html @@ -64,7 +64,7 @@ A suitable file can be found in the dbench distribution at http://samba.org/ftp/

CTDB_MANAGES_SAMBA

This is a parameter in /etc/sysconfig/ctdb

-When this parameter is set to "yes" CTDB will start/stop/restart the localo samba daemon as the cluster configuration changes.

+When this parameter is set to "yes" CTDB will start/stop/restart the local samba daemon as the cluster configuration changes.

When this parameter is set you should also make sure that samba is NOT started by default by the linux system when it boots:
   chkconfig samba off
@@ -77,5 +77,20 @@ Example:
 
 It is strongly recommended that you set this parameter to "yes" if you intend to use clustered samba.
 
+

CTDB_MANAGES_WINBIND

+This is a parameter in /etc/sysconfig/ctdb

+When this parameter is set to "yes" CTDB will start/stop/restart the local winbind daemon as the cluster configuration changes.

+When this parameter is set you should also make sure that winbind is NOT started by default by the linux system when it boots: +
+  chkconfig winbind off
+
+ +Example: +
+  CTDB_MANAGES_WINBIND="yes"
+
+ +It is strongly recommended that you set this parameter to "yes" if you intend to use clustered samba. + -- cgit From 9f4b0dab03d832407e6d573c10c2645bd25b335d Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 15 Nov 2007 06:56:02 +1100 Subject: only check port 21 when monitoring vsftpd (This used to be ctdb commit 41b0d71aaee186138eddc97d49503841fa26f234) --- ctdb/config/events.d/40.vsftpd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'ctdb') diff --git a/ctdb/config/events.d/40.vsftpd b/ctdb/config/events.d/40.vsftpd index d5f8541bb7..bc420fd42d 100755 --- a/ctdb/config/events.d/40.vsftpd +++ b/ctdb/config/events.d/40.vsftpd @@ -42,7 +42,7 @@ case $cmd in ;; monitor) - ctdb_check_tcp_ports "ftp" 20 21 + ctdb_check_tcp_ports "ftp" 21 ;; esac -- cgit From b09d3de7596054689cb18fdbd642ced63059e075 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Fri, 16 Nov 2007 13:37:27 +1100 Subject: from Christian A when monitoring that all nfs shares are available, allow both ' ' and '\t' characters to separate the exported directory from the options in /etc/exports (This used to be ctdb commit ac6cfe9de0acdcf9461068684fa890504454aae4) --- ctdb/config/events.d/60.nfs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'ctdb') diff --git a/ctdb/config/events.d/60.nfs b/ctdb/config/events.d/60.nfs index 6c04f1e303..ed9d6da63b 100755 --- a/ctdb/config/events.d/60.nfs +++ b/ctdb/config/events.d/60.nfs @@ -69,7 +69,7 @@ case $cmd in ctdb_check_rpc "NFS" 100003 3 # and that its directories are available - nfs_dirs=`grep -v '^#' < /etc/exports | cut -d' ' -f1` + nfs_dirs=$(grep -v '^#' < /etc/exports | awk {'print $1;'}) ctdb_check_directories "nfs" $nfs_dirs # check that lockd responds to rpc requests -- cgit