diff options
author | Alasdair Kergon <agk@redhat.com> | 2004-06-24 08:02:38 +0000 |
---|---|---|
committer | Alasdair Kergon <agk@redhat.com> | 2004-06-24 08:02:38 +0000 |
commit | d3c8211fefbe801bb6db49d6fc9e117946d1538e (patch) | |
tree | 3f81e7c3cbfa490fe6f1b6deef0053d58de09134 /daemons/clvmd/clvmd-cman.c | |
parent | 244a32b3d5e9ba20f1005528ec51b68c86139085 (diff) | |
download | lvm2-d3c8211fefbe801bb6db49d6fc9e117946d1538e.tar.gz lvm2-d3c8211fefbe801bb6db49d6fc9e117946d1538e.tar.xz lvm2-d3c8211fefbe801bb6db49d6fc9e117946d1538e.zip |
Add cluster support.
Diffstat (limited to 'daemons/clvmd/clvmd-cman.c')
-rw-r--r-- | daemons/clvmd/clvmd-cman.c | 499 |
1 files changed, 499 insertions, 0 deletions
diff --git a/daemons/clvmd/clvmd-cman.c b/daemons/clvmd/clvmd-cman.c new file mode 100644 index 00000000..751f4ddf --- /dev/null +++ b/daemons/clvmd/clvmd-cman.c @@ -0,0 +1,499 @@ +/* + * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved. + * Copyright (C) 2004 Red Hat, Inc. All rights reserved. + * + * This file is part of LVM2. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* + * CMAN communication layer for clvmd. + */ + +#include <pthread.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <sys/uio.h> +#include <sys/un.h> +#include <sys/time.h> +#include <sys/ioctl.h> +#include <sys/utsname.h> +#include <syslog.h> +#include <netinet/in.h> +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <signal.h> +#include <unistd.h> +#include <fcntl.h> +#include <getopt.h> +#include <errno.h> + +#include "clvmd-comms.h" +#include "clvm.h" +#include "libdlm.h" +#include "log.h" +#include "clvmd.h" +#include "lvm-functions.h" + +#define LOCKSPACE_NAME "clvmd" + +static int cluster_sock; +static int num_nodes; +static struct cl_cluster_node *nodes = NULL; +static int count_nodes; /* size of allocated nodes array */ +static int max_updown_nodes = 50; /* Current size of the allocated array */ +/* Node up/down status, indexed by nodeid */ +static int *node_updown = NULL; +static dlm_lshandle_t *lockspace; + +static void sigusr1_handler(int sig); +static void count_clvmds_running(void); +static void get_members(void); +static int nodeid_from_csid(char *csid); +static int name_from_nodeid(int nodeid, char *name); + +struct lock_wait { + pthread_cond_t cond; + pthread_mutex_t mutex; + struct dlm_lksb lksb; +}; + +int init_cluster() +{ + struct sockaddr_cl saddr; + int port = CLUSTER_PORT_CLVMD; + + /* Open the cluster communication socket */ + cluster_sock = socket(AF_CLUSTER, SOCK_DGRAM, CLPROTO_CLIENT); + if (cluster_sock == -1) { + perror("Can't open cluster socket"); + return -1; + } + + /* Bind to our port number on the cluster. + Writes to this will block if the cluster loses quorum */ + saddr.scl_family = AF_CLUSTER; + saddr.scl_port = port; + + if (bind + (cluster_sock, (struct sockaddr *) &saddr, + sizeof(struct sockaddr_cl))) { + log_error("Can't bind cluster socket: %m"); + return -1; + } + + /* Get the cluster members list */ + get_members(); + count_clvmds_running(); + + /* Create a lockspace for LV & VG locks to live in */ + lockspace = dlm_create_lockspace(LOCKSPACE_NAME, 0600); + if (!lockspace) { + log_error("Unable to create lockspace for CLVM\n"); + return -1; + } + dlm_ls_pthread_init(lockspace); + return 0; +} + +int get_main_cluster_fd() +{ + return cluster_sock; +} + +int get_num_nodes() +{ + return num_nodes; +} + +/* send_message with the fd check removed */ +int cluster_send_message(void *buf, int msglen, char *csid, const char *errtext) +{ + struct iovec iov[2]; + struct msghdr msg; + struct sockaddr_cl saddr; + int len = 0; + + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_iovlen = 1; + msg.msg_iov = iov; + msg.msg_flags = 0; + iov[0].iov_len = msglen; + iov[0].iov_base = buf; + + saddr.scl_family = AF_CLUSTER; + saddr.scl_port = CLUSTER_PORT_CLVMD; + if (csid) { + msg.msg_name = &saddr; + msg.msg_namelen = sizeof(saddr); + memcpy(&saddr.scl_nodeid, csid, MAX_CSID_LEN); + } else { /* Cluster broadcast */ + + msg.msg_name = NULL; + msg.msg_namelen = 0; + } + + do { + len = sendmsg(cluster_sock, &msg, 0); + if (len < 0 && errno != EAGAIN) + log_error(errtext); + + } while (len == -1 && errno == EAGAIN); + return len; +} + +void get_our_csid(char *csid) +{ + int i; + memset(csid, 0, MAX_CSID_LEN); + + for (i = 0; i < num_nodes; i++) { + if (nodes[i].us) + memcpy(csid, &nodes[i].node_id, MAX_CSID_LEN); + } +} + +/* Call a callback routine for each node that known (down mean not running a clvmd) */ +int cluster_do_node_callback(struct local_client *client, + void (*callback) (struct local_client *, char *, + int)) +{ + int i; + int somedown = 0; + + for (i = 0; i < get_num_nodes(); i++) { + callback(client, (char *)&nodes[i].node_id, node_updown[nodes[i].node_id]); + if (!node_updown[nodes[i].node_id]) + somedown = -1; + } + return somedown; +} + +/* Process OOB message from the cluster socket, + this currently just means that a node has stopped listening on our port */ +static void process_oob_msg(char *buf, int len, int nodeid) +{ + char namebuf[256]; + switch (buf[0]) { + case CLUSTER_OOB_MSG_PORTCLOSED: + name_from_nodeid(nodeid, namebuf); + log_notice("clvmd on node %s has died\n", namebuf); + DEBUGLOG("Got OOB message, removing node %s\n", namebuf); + + node_updown[nodeid] = 0; + break; + + case CLUSTER_OOB_MSG_STATECHANGE: + DEBUGLOG("Got OOB message, Cluster state change\n"); + get_members(); + break; + default: + /* ERROR */ + DEBUGLOG("Got unknown OOB message: %d\n", buf[0]); + } +} + +int cluster_fd_callback(struct local_client *fd, char *buf, int len, char *csid, + struct local_client **new_client) +{ + struct iovec iov[2]; + struct msghdr msg; + struct sockaddr_cl saddr; + + /* We never return a new client */ + *new_client = NULL; + + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_iovlen = 1; + msg.msg_iov = iov; + msg.msg_name = &saddr; + msg.msg_flags = 0; + msg.msg_namelen = sizeof(saddr); + iov[0].iov_len = len; + iov[0].iov_base = buf; + + len = recvmsg(cluster_sock, &msg, MSG_OOB | O_NONBLOCK); + if (len < 0 && errno == EAGAIN) + return len; + + DEBUGLOG("Read on cluster socket, len = %d\n", len); + + /* A real error */ + if (len < 0) { + log_error("read error on cluster socket: %m"); + return 0; + } + + /* EOF - we have left the cluster */ + if (len == 0) + return 0; + + /* Is it OOB? probably a node gone down */ + if (msg.msg_flags & MSG_OOB) { + process_oob_msg(iov[0].iov_base, len, saddr.scl_nodeid); + + /* Tell the upper layer to ignore this message */ + len = -1; + errno = EAGAIN; + } + memcpy(csid, &saddr.scl_nodeid, sizeof(saddr.scl_nodeid)); + return len; +} + +void add_up_node(char *csid) +{ + /* It's up ! */ + int nodeid = nodeid_from_csid(csid); + + if (nodeid >= max_updown_nodes) { + int *new_updown = realloc(node_updown, max_updown_nodes + 10); + + if (new_updown) { + node_updown = new_updown; + max_updown_nodes += 10; + DEBUGLOG("realloced more space for nodes. now %d\n", + max_updown_nodes); + } else { + log_error + ("Realloc failed. Node status for clvmd will be wrong\n"); + return; + } + } + node_updown[nodeid] = 1; + DEBUGLOG("Added new node %d to updown list\n", nodeid); +} + +void cluster_closedown() +{ + unlock_all(); + dlm_release_lockspace(LOCKSPACE_NAME, lockspace, 1); + close(cluster_sock); +} + +static int is_listening(int nodeid) +{ + struct cl_listen_request rq; + int status; + + rq.port = CLUSTER_PORT_CLVMD; + rq.nodeid = nodeid; + + do { + status = ioctl(cluster_sock, SIOCCLUSTER_ISLISTENING, &rq); + if (status < 0 && errno == EBUSY) { /* Don't busywait */ + sleep(1); + errno = EBUSY; /* In case sleep trashes it */ + } + } + while (status < 0 && errno == EBUSY); + + return status; +} + +/* Populate the list of CLVMDs running. + called only at startup time */ +void count_clvmds_running(void) +{ + int i; + + for (i = 0; i < num_nodes; i++) { + node_updown[nodes[i].node_id] = is_listening(nodes[i].node_id); + } +} + +/* Get a list of active cluster members */ +static void get_members() +{ + struct cl_cluster_nodelist nodelist; + + num_nodes = ioctl(cluster_sock, SIOCCLUSTER_GETMEMBERS, 0); + if (num_nodes == -1) { + perror("get nodes"); + } else { + /* Not enough room for new nodes list ? */ + if (num_nodes > count_nodes && nodes) { + free(nodes); + nodes = NULL; + } + + if (nodes == NULL) { + count_nodes = num_nodes + 10; /* Overallocate a little */ + nodes = malloc(count_nodes * sizeof(struct cl_cluster_node)); + if (!nodes) { + perror("Unable to allocate nodes array\n"); + exit(5); + } + } + nodelist.max_members = count_nodes; + nodelist.nodes = nodes; + + num_nodes = ioctl(cluster_sock, SIOCCLUSTER_GETMEMBERS, &nodelist); + if (num_nodes <= 0) { + perror("get node details"); + exit(6); + } + + /* Sanity check struct */ + if (nodes[0].size != sizeof(struct cl_cluster_node)) { + log_error + ("sizeof(cl_cluster_node) does not match size returned from the kernel: aborting\n"); + exit(10); + } + + if (node_updown == NULL) { + node_updown = + (int *) malloc(sizeof(int) * + max(num_nodes, max_updown_nodes)); + memset(node_updown, 0, + sizeof(int) * max(num_nodes, max_updown_nodes)); + } + } +} + +/* Convert a node name to a CSID */ +int csid_from_name(char *csid, char *name) +{ + int i; + + for (i = 0; i < num_nodes; i++) { + if (strcmp(name, nodes[i].name) == 0) { + memcpy(csid, &nodes[i].node_id, MAX_CSID_LEN); + return 0; + } + } + return -1; +} + +/* Convert a CSID to a node name */ +int name_from_csid(char *csid, char *name) +{ + int i; + + for (i = 0; i < num_nodes; i++) { + if (memcmp(csid, &nodes[i].node_id, MAX_CSID_LEN) == 0) { + strcpy(name, nodes[i].name); + return 0; + } + } + /* Who?? */ + strcpy(name, "Unknown"); + return -1; +} + +/* Convert a node ID to a node name */ +int name_from_nodeid(int nodeid, char *name) +{ + int i; + + for (i = 0; i < num_nodes; i++) { + if (nodeid == nodes[i].node_id) { + strcpy(name, nodes[i].name); + return 0; + } + } + /* Who?? */ + strcpy(name, "Unknown"); + return -1; +} + +/* Convert a CSID to a node ID */ +static int nodeid_from_csid(char *csid) +{ + int nodeid; + + memcpy(&nodeid, csid, MAX_CSID_LEN); + + return nodeid; +} + +int is_quorate() +{ + return ioctl(cluster_sock, SIOCCLUSTER_ISQUORATE, 0); +} + +static void sync_ast_routine(void *arg) +{ + struct lock_wait *lwait = arg; + + pthread_mutex_lock(&lwait->mutex); + pthread_cond_signal(&lwait->cond); + pthread_mutex_unlock(&lwait->mutex); +} + +int sync_lock(const char *resource, int mode, int flags, int *lockid) +{ + int status; + struct lock_wait lwait; + + if (!lockid) { + errno = EINVAL; + return -1; + } + + /* Conversions need the lockid in the LKSB */ + if (flags & LKF_CONVERT) + lwait.lksb.sb_lkid = *lockid; + + pthread_cond_init(&lwait.cond, NULL); + pthread_mutex_init(&lwait.mutex, NULL); + pthread_mutex_lock(&lwait.mutex); + + status = dlm_ls_lock(lockspace, + mode, + &lwait.lksb, + flags, + resource, + strlen(resource), + 0, sync_ast_routine, &lwait, NULL, NULL); + if (status) + return status; + + /* Wait for it to complete */ + pthread_cond_wait(&lwait.cond, &lwait.mutex); + pthread_mutex_unlock(&lwait.mutex); + + *lockid = lwait.lksb.sb_lkid; + + errno = lwait.lksb.sb_status; + if (lwait.lksb.sb_status) + return -1; + else + return 0; +} + +int sync_unlock(const char *resource /* UNUSED */, int lockid) +{ + int status; + struct lock_wait lwait; + + pthread_cond_init(&lwait.cond, NULL); + pthread_mutex_init(&lwait.mutex, NULL); + pthread_mutex_lock(&lwait.mutex); + + status = dlm_ls_unlock(lockspace, lockid, 0, &lwait.lksb, &lwait); + + if (status) + return status; + + /* Wait for it to complete */ + pthread_cond_wait(&lwait.cond, &lwait.mutex); + pthread_mutex_unlock(&lwait.mutex); + + errno = lwait.lksb.sb_status; + if (lwait.lksb.sb_status != EUNLOCK) + return -1; + else + return 0; + +} |