/* * Copyright (C) 2009-2012 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions * of the GNU Lesser General Public License v.2.1. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * This provides the interface between clvmd and corosync/DLM as the cluster * and lock manager. */ #include "clvmd-common.h" #include #include "clvm.h" #include "clvmd-comms.h" #include "clvmd.h" #include "lvm-functions.h" #include "locking.h" #include #include #ifdef HAVE_COROSYNC_CONFDB_H # include #elif defined HAVE_COROSYNC_CMAP_H # include #else # error "Either HAVE_COROSYNC_CONFDB_H or HAVE_COROSYNC_CMAP_H must be defined." #endif #include #include /* Timeout value for several corosync calls */ #define LOCKSPACE_NAME "clvmd" static void corosync_cpg_deliver_callback (cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len); static void corosync_cpg_confchg_callback(cpg_handle_t handle, const struct cpg_name *groupName, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries); static void _cluster_closedown(void); /* Hash list of nodes in the cluster */ static struct dm_hash_table *node_hash; /* Number of active nodes */ static int num_nodes; static unsigned int our_nodeid; static struct local_client *cluster_client; /* Corosync handles */ static cpg_handle_t cpg_handle; static quorum_handle_t quorum_handle; /* DLM Handle */ static dlm_lshandle_t *lockspace; static struct cpg_name cpg_group_name; /* Corosync callback structs */ cpg_callbacks_t corosync_cpg_callbacks = { .cpg_deliver_fn = corosync_cpg_deliver_callback, .cpg_confchg_fn = corosync_cpg_confchg_callback, }; quorum_callbacks_t quorum_callbacks = { .quorum_notify_fn = NULL, }; struct node_info { enum {NODE_UNKNOWN, NODE_DOWN, NODE_UP, NODE_CLVMD} state; int nodeid; }; /* Set errno to something approximating the right value and return 0 or -1 */ static int cs_to_errno(cs_error_t err) { switch(err) { case CS_OK: return 0; case CS_ERR_LIBRARY: errno = EINVAL; break; case CS_ERR_VERSION: errno = EINVAL; break; case CS_ERR_INIT: errno = EINVAL; break; case CS_ERR_TIMEOUT: errno = ETIME; break; case CS_ERR_TRY_AGAIN: errno = EAGAIN; break; case CS_ERR_INVALID_PARAM: errno = EINVAL; break; case CS_ERR_NO_MEMORY: errno = ENOMEM; break; case CS_ERR_BAD_HANDLE: errno = EINVAL; break; case CS_ERR_BUSY: errno = EBUSY; break; case CS_ERR_ACCESS: errno = EPERM; break; case CS_ERR_NOT_EXIST: errno = ENOENT; break; case CS_ERR_NAME_TOO_LONG: errno = ENAMETOOLONG; break; case CS_ERR_EXIST: errno = EEXIST; break; case CS_ERR_NO_SPACE: errno = ENOSPC; break; case CS_ERR_INTERRUPT: errno = EINTR; break; case CS_ERR_NAME_NOT_FOUND: errno = ENOENT; break; case CS_ERR_NO_RESOURCES: errno = ENOMEM; break; case CS_ERR_NOT_SUPPORTED: errno = EOPNOTSUPP; break; case CS_ERR_BAD_OPERATION: errno = EINVAL; break; case CS_ERR_FAILED_OPERATION: errno = EIO; break; case CS_ERR_MESSAGE_ERROR: errno = EIO; break; case CS_ERR_QUEUE_FULL: errno = EXFULL; break; case CS_ERR_QUEUE_NOT_AVAILABLE: errno = EINVAL; break; case CS_ERR_BAD_FLAGS: errno = EINVAL; break; case CS_ERR_TOO_BIG: errno = E2BIG; break; case CS_ERR_NO_SECTIONS: errno = ENOMEM; break; default: errno = EINVAL; break; } return -1; } static char *print_corosync_csid(const char *csid) { static char buf[128]; int id; memcpy(&id, csid, sizeof(int)); sprintf(buf, "%d", id); return buf; } static void corosync_cpg_deliver_callback (cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { int target_nodeid; memcpy(&target_nodeid, msg, COROSYNC_CSID_LEN); DEBUGLOG("%u got message from nodeid %d for %d. len %zd\n", our_nodeid, nodeid, target_nodeid, msg_len-4); if (nodeid != our_nodeid) if (target_nodeid == our_nodeid || target_nodeid == 0) process_message(cluster_client, (char *)msg+COROSYNC_CSID_LEN, msg_len-COROSYNC_CSID_LEN, (char*)&nodeid); } static void corosync_cpg_confchg_callback(cpg_handle_t handle, const struct cpg_name *groupName, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { int i; struct node_info *ninfo; DEBUGLOG("confchg callback. %zd joined, %zd left, %zd members\n", joined_list_entries, left_list_entries, member_list_entries); for (i=0; inodeid = joined_list[i].nodeid; dm_hash_insert_binary(node_hash, (char *)&ninfo->nodeid, COROSYNC_CSID_LEN, ninfo); } } ninfo->state = NODE_CLVMD; } for (i=0; istate = NODE_DOWN; } for (i=0; inodeid = member_list[i].nodeid; dm_hash_insert_binary(node_hash, (char *)&ninfo->nodeid, COROSYNC_CSID_LEN, ninfo); } } ninfo->state = NODE_CLVMD; } num_nodes = member_list_entries; } static int _init_cluster(void) { cs_error_t err; #ifdef QUORUM_SET /* corosync/quorum.h */ uint32_t quorum_type; #endif node_hash = dm_hash_create(100); err = cpg_initialize(&cpg_handle, &corosync_cpg_callbacks); if (err != CS_OK) { syslog(LOG_ERR, "Cannot initialise Corosync CPG service: %d", err); DEBUGLOG("Cannot initialise Corosync CPG service: %d", err); return cs_to_errno(err); } #ifdef QUORUM_SET err = quorum_initialize(&quorum_handle, &quorum_callbacks, &quorum_type); if (quorum_type != QUORUM_SET) { syslog(LOG_ERR, "Corosync quorum service is not configured"); DEBUGLOG("Corosync quorum service is not configured"); return EINVAL; } #else err = quorum_initialize(&quorum_handle, &quorum_callbacks); #endif if (err != CS_OK) { syslog(LOG_ERR, "Cannot initialise Corosync quorum service: %d", err); DEBUGLOG("Cannot initialise Corosync quorum service: %d", err); return cs_to_errno(err); } /* Create a lockspace for LV & VG locks to live in */ lockspace = dlm_open_lockspace(LOCKSPACE_NAME); if (!lockspace) { lockspace = dlm_create_lockspace(LOCKSPACE_NAME, 0600); if (!lockspace) { syslog(LOG_ERR, "Unable to create DLM lockspace for CLVM: %m"); return -1; } DEBUGLOG("Created DLM lockspace for CLVMD.\n"); } else DEBUGLOG("Opened existing DLM lockspace for CLVMD.\n"); dlm_ls_pthread_init(lockspace); DEBUGLOG("DLM initialisation complete\n"); /* Connect to the clvmd group */ strcpy((char *)cpg_group_name.value, "clvmd"); cpg_group_name.length = strlen((char *)cpg_group_name.value); err = cpg_join(cpg_handle, &cpg_group_name); if (err != CS_OK) { cpg_finalize(cpg_handle); quorum_finalize(quorum_handle); dlm_release_lockspace(LOCKSPACE_NAME, lockspace, 1); syslog(LOG_ERR, "Cannot join clvmd process group"); DEBUGLOG("Cannot join clvmd process group: %d\n", err); return cs_to_errno(err); } err = cpg_local_get(cpg_handle, &our_nodeid); if (err != CS_OK) { cpg_finalize(cpg_handle); quorum_finalize(quorum_handle); dlm_release_lockspace(LOCKSPACE_NAME, lockspace, 1); syslog(LOG_ERR, "Cannot get local node id\n"); return cs_to_errno(err); } DEBUGLOG("Our local node id is %d\n", our_nodeid); DEBUGLOG("Connected to Corosync\n"); return 0; } static void _cluster_closedown(void) { DEBUGLOG("cluster_closedown\n"); destroy_lvhash(); dlm_release_lockspace(LOCKSPACE_NAME, lockspace, 1); cpg_finalize(cpg_handle); quorum_finalize(quorum_handle); } static void _get_our_csid(char *csid) { memcpy(csid, &our_nodeid, sizeof(int)); } /* Corosync doesn't really have nmode names so we just use the node ID in hex instead */ static int _csid_from_name(char *csid, const char *name) { int nodeid; struct node_info *ninfo; if (sscanf(name, "%x", &nodeid) == 1) { ninfo = dm_hash_lookup_binary(node_hash, csid, COROSYNC_CSID_LEN); if (ninfo) return nodeid; } return -1; } static int _name_from_csid(const char *csid, char *name) { struct node_info *ninfo; ninfo = dm_hash_lookup_binary(node_hash, csid, COROSYNC_CSID_LEN); if (!ninfo) { sprintf(name, "UNKNOWN %s", print_corosync_csid(csid)); return -1; } sprintf(name, "%x", ninfo->nodeid); return 0; } static int _get_num_nodes() { DEBUGLOG("num_nodes = %d\n", num_nodes); return num_nodes; } /* Node is now known to be running a clvmd */ static void _add_up_node(const char *csid) { struct node_info *ninfo; ninfo = dm_hash_lookup_binary(node_hash, csid, COROSYNC_CSID_LEN); if (!ninfo) { DEBUGLOG("corosync_add_up_node no node_hash entry for csid %s\n", print_corosync_csid(csid)); return; } DEBUGLOG("corosync_add_up_node %d\n", ninfo->nodeid); ninfo->state = NODE_CLVMD; return; } /* Call a callback for each node, so the caller knows whether it's up or down */ static int _cluster_do_node_callback(struct local_client *master_client, void (*callback)(struct local_client *, const char *csid, int node_up)) { struct dm_hash_node *hn; struct node_info *ninfo; int somedown = 0; dm_hash_iterate(hn, node_hash) { char csid[COROSYNC_CSID_LEN]; ninfo = dm_hash_get_data(node_hash, hn); memcpy(csid, dm_hash_get_key(node_hash, hn), COROSYNC_CSID_LEN); DEBUGLOG("down_callback. node %d, state = %d\n", ninfo->nodeid, ninfo->state); if (ninfo->state != NODE_DOWN) callback(master_client, csid, ninfo->state == NODE_CLVMD); if (ninfo->state != NODE_CLVMD) somedown = -1; } return somedown; } /* Real locking */ static int _lock_resource(const char *resource, int mode, int flags, int *lockid) { struct dlm_lksb lksb; int err; DEBUGLOG("lock_resource '%s', flags=%d, mode=%d\n", resource, flags, mode); if (flags & LKF_CONVERT) lksb.sb_lkid = *lockid; err = dlm_ls_lock_wait(lockspace, mode, &lksb, flags, resource, strlen(resource), 0, NULL, NULL, NULL); if (err != 0) { DEBUGLOG("dlm_ls_lock returned %d\n", errno); return err; } if (lksb.sb_status != 0) { DEBUGLOG("dlm_ls_lock returns lksb.sb_status %d\n", lksb.sb_status); errno = lksb.sb_status; return -1; } DEBUGLOG("lock_resource returning %d, lock_id=%x\n", err, lksb.sb_lkid); *lockid = lksb.sb_lkid; return 0; } static int _unlock_resource(const char *resource, int lockid) { struct dlm_lksb lksb; int err; DEBUGLOG("unlock_resource: %s lockid: %x\n", resource, lockid); lksb.sb_lkid = lockid; err = dlm_ls_unlock_wait(lockspace, lockid, 0, &lksb); if (err != 0) { DEBUGLOG("Unlock returned %d\n", err); return err; } if (lksb.sb_status != EUNLOCK) { DEBUGLOG("dlm_ls_unlock_wait returns lksb.sb_status: %d\n", lksb.sb_status); errno = lksb.sb_status; return -1; } return 0; } static int _is_quorate() { int quorate; if (quorum_getquorate(quorum_handle, &quorate) == CS_OK) return quorate; else return 0; } static int _get_main_cluster_fd(void) { int select_fd; cpg_fd_get(cpg_handle, &select_fd); return select_fd; } static int _cluster_fd_callback(struct local_client *fd, char *buf, int len, const char *csid, struct local_client **new_client) { cluster_client = fd; *new_client = NULL; cpg_dispatch(cpg_handle, CS_DISPATCH_ONE); return 1; } static int _cluster_send_message(const void *buf, int msglen, const char *csid, const char *errtext) { struct iovec iov[2]; cs_error_t err; int target_node; if (csid) memcpy(&target_node, csid, COROSYNC_CSID_LEN); else target_node = 0; iov[0].iov_base = &target_node; iov[0].iov_len = sizeof(int); iov[1].iov_base = (char *)buf; iov[1].iov_len = msglen; err = cpg_mcast_joined(cpg_handle, CPG_TYPE_AGREED, iov, 2); return cs_to_errno(err); } #ifdef HAVE_COROSYNC_CONFDB_H /* * We are not necessarily connected to a Red Hat Cluster system, * but if we are, this returns the cluster name from cluster.conf. * I've used confdb rather than ccs to reduce the inter-package * dependancies as well as to allow people to set a cluster name * for themselves even if they are not running on RH cluster. */ static int _get_cluster_name(char *buf, int buflen) { confdb_handle_t handle; int result; size_t namelen = buflen; hdb_handle_t cluster_handle; confdb_callbacks_t callbacks = { .confdb_key_change_notify_fn = NULL, .confdb_object_create_change_notify_fn = NULL, .confdb_object_delete_change_notify_fn = NULL }; /* This is a default in case everything else fails */ strncpy(buf, "Corosync", buflen); /* Look for a cluster name in confdb */ result = confdb_initialize (&handle, &callbacks); if (result != CS_OK) return 0; result = confdb_object_find_start(handle, OBJECT_PARENT_HANDLE); if (result != CS_OK) goto out; result = confdb_object_find(handle, OBJECT_PARENT_HANDLE, (void *)"cluster", strlen("cluster"), &cluster_handle); if (result != CS_OK) goto out; result = confdb_key_get(handle, cluster_handle, (void *)"name", strlen("name"), buf, &namelen); if (result != CS_OK) goto out; buf[namelen] = '\0'; out: confdb_finalize(handle); return 0; } #elif defined HAVE_COROSYNC_CMAP_H static int _get_cluster_name(char *buf, int buflen) { cmap_handle_t cmap_handle = 0; int result; char *name = NULL; /* This is a default in case everything else fails */ strncpy(buf, "Corosync", buflen); /* Look for a cluster name in cmap */ result = cmap_initialize(&cmap_handle); if (result != CS_OK) return 0; result = cmap_get_string(cmap_handle, "totem.cluster_name", &name); if (result != CS_OK) goto out; memset(buf, 0, buflen); strncpy(buf, name, buflen - 1); out: if (name) free(name); cmap_finalize(cmap_handle); return 0; } #endif static struct cluster_ops _cluster_corosync_ops = { .name = "corosync", .cluster_init_completed = NULL, .cluster_send_message = _cluster_send_message, .name_from_csid = _name_from_csid, .csid_from_name = _csid_from_name, .get_num_nodes = _get_num_nodes, .cluster_fd_callback = _cluster_fd_callback, .get_main_cluster_fd = _get_main_cluster_fd, .cluster_do_node_callback = _cluster_do_node_callback, .is_quorate = _is_quorate, .get_our_csid = _get_our_csid, .add_up_node = _add_up_node, .reread_config = NULL, .cluster_closedown = _cluster_closedown, .get_cluster_name = _get_cluster_name, .sync_lock = _lock_resource, .sync_unlock = _unlock_resource, }; struct cluster_ops *init_corosync_cluster(void) { if (!_init_cluster()) return &_cluster_corosync_ops; else return NULL; }