/*
Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
General Public License, version 3 or any later version (LGPLv3 or
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
#include <inttypes.h>
#if defined(GF_LINUX_HOST_OS)
#include <mntent.h>
#else
#include "mntent_compat.h"
#endif
#include <dlfcn.h>
#if (HAVE_LIB_XML)
#include <libxml/encoding.h>
#include <libxml/xmlwriter.h>
#endif
#include "glusterfs.h"
#include "compat.h"
#include "dict.h"
#include "xlator.h"
#include "logging.h"
#include "glusterd-messages.h"
#include "timer.h"
#include "defaults.h"
#include "compat.h"
#include "syncop.h"
#include "run.h"
#include "compat-errno.h"
#include "statedump.h"
#include "syscall.h"
#include "glusterd-mem-types.h"
#include "glusterd.h"
#include "glusterd-op-sm.h"
#include "glusterd-geo-rep.h"
#include "glusterd-sm.h"
#include "glusterd-utils.h"
#include "glusterd-store.h"
#include "glusterd-volgen.h"
#include "glusterd-pmap.h"
#include "glusterfs-acl.h"
#include "glusterd-syncop.h"
#include "glusterd-mgmt.h"
#include "glusterd-locks.h"
#include "glusterd-messages.h"
#include "glusterd-volgen.h"
#include "glusterd-snapshot-utils.h"
#include "glusterd-svc-mgmt.h"
#include "glusterd-svc-helper.h"
#include "glusterd-shd-svc.h"
#include "glusterd-nfs-svc.h"
#include "glusterd-quotad-svc.h"
#include "glusterd-snapd-svc.h"
#include "glusterd-bitd-svc.h"
#include "glusterd-gfproxyd-svc.h"
#include "glusterd-server-quorum.h"
#include "quota-common-utils.h"
#include "common-utils.h"
#include "xdr-generic.h"
#include <sys/resource.h>
#include <inttypes.h>
#include <signal.h>
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <rpc/pmap_clnt.h>
#include <unistd.h>
#include <fnmatch.h>
#include <sys/statvfs.h>
#include <ifaddrs.h>
#ifdef HAVE_BD_XLATOR
#include <lvm2app.h>
#endif
#ifdef GF_SOLARIS_HOST_OS
#include <sys/sockio.h>
#endif
#define NFS_PROGRAM 100003
#define NFSV3_VERSION 3
#define MOUNT_PROGRAM 100005
#define MOUNTV3_VERSION 3
#define MOUNTV1_VERSION 1
#define NLM_PROGRAM 100021
#define NLMV4_VERSION 4
#define NLMV1_VERSION 1
gf_boolean_t
is_brick_mx_enabled(void)
{
char *value = NULL;
int ret = 0;
gf_boolean_t enabled = _gf_false;
xlator_t *this = NULL;
glusterd_conf_t *priv = NULL;
this = THIS;
priv = this->private;
ret = dict_get_strn(priv->opts, GLUSTERD_BRICK_MULTIPLEX_KEY,
SLEN(GLUSTERD_BRICK_MULTIPLEX_KEY), &value);
if (!ret)
ret = gf_string2boolean(value, &enabled);
return ret ? _gf_false : enabled;
}
int
get_mux_limit_per_process(int *mux_limit)
{
char *value = NULL;
int ret = -1;
int max_bricks_per_proc = 0;
xlator_t *this = NULL;
glusterd_conf_t *priv = NULL;
this = THIS;
GF_VALIDATE_OR_GOTO("glusterd", this, out);
priv = this->private;
GF_VALIDATE_OR_GOTO(this->name, priv, out);
if (!is_brick_mx_enabled()) {
max_bricks_per_proc = 1;
ret = 0;
goto out;
}
ret = dict_get_strn(priv->opts, GLUSTERD_BRICKMUX_LIMIT_KEY,
SLEN(GLUSTERD_BRICKMUX_LIMIT_KEY), &value);
if (ret) {
gf_msg_debug(this->name, 0,
"Limit for number of bricks per "
"brick process not yet set in dict. Returning "
"limit as 0 denoting that multiplexing can "
"happen with no limit set.");
ret = 0;
goto out;
}
ret = gf_string2int(value, &max_bricks_per_proc);
if (ret)
goto out;
out:
*mux_limit = max_bricks_per_proc;
gf_msg_debug("glusterd", 0, "Mux limit set to %d bricks per process",
*mux_limit);
return ret;
}
extern struct volopt_map_entry glusterd_volopt_map[];
extern glusterd_all_vol_opts valid_all_vol_opts[];
static glusterd_lock_t lock;
static int
_brick_for_each(glusterd_volinfo_t *volinfo, dict_t *mod_dict, void *data,
int (*fn)(glusterd_volinfo_t *, glusterd_brickinfo_t *,
dict_t *mod_dict, void *))
{
int ret = 0;
glusterd_brickinfo_t *brickinfo = NULL;
xlator_t *this = THIS;
cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
{
gf_msg_debug(this->name, 0, "Found a brick - %s:%s",
brickinfo->hostname, brickinfo->path);
ret = fn(volinfo, brickinfo, mod_dict, data);
if (ret)
goto out;
}
out:
return ret;
}
/* This is going to be a O(n^2) operation as we have to pick a brick,
make sure it belong to this machine, and compare another brick belonging
to this machine (if exists), is sharing the backend */
static void
gd_set_shared_brick_count(glusterd_volinfo_t *volinfo)
{
glusterd_brickinfo_t *brickinfo = NULL;
glusterd_brickinfo_t *trav = NULL;
cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
{
if (gf_uuid_compare(brickinfo->uuid, MY_UUID))
continue;
brickinfo->fs_share_count = 0;
cds_list_for_each_entry(trav, &volinfo->bricks, brick_list)
{
if (!gf_uuid_compare(trav->uuid, MY_UUID) &&
(trav->statfs_fsid == brickinfo->statfs_fsid)) {
brickinfo->fs_share_count++;
}
}
}
return;
}
int
glusterd_volume_brick_for_each(glusterd_volinfo_t *volinfo, void *data,
int (*fn)(glusterd_volinfo_t *,
glusterd_brickinfo_t *,
dict_t *mod_dict, void *))
{
dict_t *mod_dict = NULL;
glusterd_volinfo_t *dup_volinfo = NULL;
int ret = 0;
gd_set_shared_brick_count(volinfo);
if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
ret = _brick_for_each(volinfo, NULL, data, fn);
if (ret)
goto out;
} else {
ret = glusterd_create_sub_tier_volinfo(volinfo, &dup_volinfo, _gf_true,
volinfo->volname);
if (ret)
goto out;
mod_dict = dict_new();
if (!mod_dict) {
ret = -1;
goto out;
}
ret = dict_set_nstrn(mod_dict, "hot-brick", SLEN("hot-brick"), "on",
SLEN("on"));
if (ret)
goto out;
ret = _brick_for_each(dup_volinfo, mod_dict, data, fn);
if (ret)
goto out;
GF_FREE(dup_volinfo);
dup_volinfo = NULL;
ret = glusterd_create_sub_tier_volinfo(volinfo, &dup_volinfo, _gf_false,
volinfo->volname);
if (ret)
goto out;
ret = _brick_for_each(dup_volinfo, NULL, data, fn);
if (ret)
goto out;
}
out:
if (dup_volinfo)
glusterd_volinfo_delete(dup_volinfo);
if (mod_dict)
dict_unref(mod_dict);
return ret;
}
int32_t
glusterd_get_lock_owner(uuid_t *uuid)
{
gf_uuid_copy(*uuid, lock.owner);
return 0;
}
static int32_t
glusterd_set_lock_owner(uuid_t owner)
{
gf_uuid_copy(lock.owner, owner);
// TODO: set timestamp
return 0;
}
static int32_t
glusterd_unset_lock_owner(uuid_t owner)
{
gf_uuid_clear(lock.owner);
// TODO: set timestamp
return 0;
}
gf_boolean_t
glusterd_is_fuse_available()
{
int fd = 0;
#ifdef __NetBSD__
fd = open("/dev/puffs", O_RDWR);
#else
fd = open("/dev/fuse", O_RDWR);
#endif
if (fd > -1 && !sys_close(fd))
return _gf_true;
else
return _gf_false;
}
int32_t
glusterd_lock(uuid_t uuid)
{
uuid_t owner;
char new_owner_str[50] = "";
char owner_str[50] = "";
int ret = -1;
xlator_t *this = NULL;
this = THIS;
GF_ASSERT(this);
GF_ASSERT(uuid);
glusterd_get_lock_owner(&owner);
if (!gf_uuid_is_null(owner)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_LOCK_FAIL,
"Unable to get lock"
" for uuid: %s, lock held by: %s",
uuid_utoa_r(uuid, new_owner_str), uuid_utoa_r(owner, owner_str));
goto out;
}
ret = glusterd_set_lock_owner(uuid);
if (!ret) {
gf_msg_debug(this->name, 0,
"Cluster lock held by"
" %s",
uuid_utoa(uuid));
}
out:
return ret;
}
int32_t
glusterd_unlock(uuid_t uuid)
{
uuid_t owner;
char new_owner_str[50] = "";
char owner_str[50] = "";
int32_t ret = -1;
xlator_t *this = NULL;
this = THIS;
GF_ASSERT(this);
GF_ASSERT(uuid);
glusterd_get_lock_owner(&owner);
if (gf_uuid_is_null(owner)) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_LOCK_FAIL,
"Cluster lock not held!");
goto out;
}
ret = gf_uuid_compare(uuid, owner);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_LOCK_FAIL,
"Cluster lock held by %s ,"
"unlock req from %s!",
uuid_utoa_r(owner, owner_str), uuid_utoa_r(uuid, new_owner_str));
goto out;
}
ret = glusterd_unset_lock_owner(uuid);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_UNLOCK_FAIL,
"Unable to clear cluster "
"lock");
goto out;
}
ret = 0;
out:
return ret;
}
int
glusterd_get_uuid(uuid_t *uuid)
{
glusterd_conf_t *priv = NULL;
priv = THIS->private;
GF_ASSERT(priv);
gf_uuid_copy(*uuid, MY_UUID);
return 0;
}
int
glusterd_submit_request(struct rpc_clnt *rpc, void *req, call_frame_t *frame,
rpc_clnt_prog_t *prog, int procnum,
struct iobref *iobref, xlator_t *this,
fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
{
char new_iobref = 0;
int ret = -1;
int count = 0;
ssize_t req_size = 0;
struct iobuf *iobuf = NULL;
struct iovec iov = {
0,
};
GF_ASSERT(rpc);
GF_ASSERT(this);
if (req) {
req_size = xdr_sizeof(xdrproc, req);
iobuf = iobuf_get2(this->ctx->iobuf_pool, req_size);
if (!iobuf) {
goto out;
};
if (!iobref) {
iobref = iobref_new();
if (!iobref) {
goto out;
}
new_iobref = 1;
}
iobref_add(iobref, iobuf);
iov.iov_base = iobuf->ptr;
iov.iov_len = iobuf_pagesize(iobuf);
/* Create the xdr payload */
ret = xdr_serialize_generic(iov, req, xdrproc);
if (ret == -1) {
goto out;
}
iov.iov_len = ret;
count = 1;
}
/* Send the msg */
rpc_clnt_submit(rpc, prog, procnum, cbkfn, &iov, count, NULL, 0, iobref,
frame, NULL, 0, NULL, 0, NULL);
/* Unconditionally set ret to 0 here. This is to guard against a double
* STACK_DESTROY in case of a failure in rpc_clnt_submit AFTER the
* request is sent over the wire: once in the callback function of the
* request and once in the error codepath of some of the callers of
* glusterd_submit_request().
*/
ret = 0;
out:
if (new_iobref) {
iobref_unref(iobref);
}
iobuf_unref(iobuf);
return ret;
}
struct iobuf *
glusterd_serialize_reply(rpcsvc_request_t *req, void *arg, struct iovec *outmsg,
xdrproc_t xdrproc)
{
struct iobuf *iob = NULL;
ssize_t retlen = -1;
ssize_t rsp_size = 0;
/* First, get the io buffer into which the reply in arg will
* be serialized.
*/
rsp_size = xdr_sizeof(xdrproc, arg);
iob = iobuf_get2(req->svc->ctx->iobuf_pool, rsp_size);
if (!iob) {
gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
"Failed to get iobuf");
goto ret;
}
iobuf_to_iovec(iob, outmsg);
/* Use the given serializer to translate the give C structure in arg
* to XDR format which will be written into the buffer in outmsg.
*/
/* retlen is used to received the error since size_t is unsigned and we
* need -1 for error notification during encoding.
*/
retlen = xdr_serialize_generic(*outmsg, arg, xdrproc);
if (retlen == -1) {
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_ENCODE_FAIL,
"Failed to encode message");
goto ret;
}
outmsg->iov_len = retlen;
ret:
if (retlen == -1) {
iobuf_unref(iob);
iob = NULL;
}
return iob;
}
int
glusterd_submit_reply(rpcsvc_request_t *req, void *arg, struct iovec *payload,
int payloadcount, struct iobref *iobref,
xdrproc_t xdrproc)
{
struct iobuf *iob = NULL;
int ret = -1;
struct iovec rsp = {
0,
};
char new_iobref = 0;
if (!req) {
GF_ASSERT(req);
goto out;
}
if (!iobref) {
iobref = iobref_new();
if (!iobref) {
gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
"out of memory");
goto out;
}
new_iobref = 1;
}
iob = glusterd_serialize_reply(req, arg, &rsp, xdrproc);
if (!iob) {
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SERIALIZE_MSG_FAIL,
"Failed to serialize reply");
} else {
iobref_add(iobref, iob);
}
ret = rpcsvc_submit_generic(req, &rsp, 1, payload, payloadcount, iobref);
/* Now that we've done our job of handing the message to the RPC layer
* we can safely unref the iob in the hope that RPC layer must have
* ref'ed the iob on receiving into the txlist.
*/
if (ret == -1) {
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_REPLY_SUBMIT_FAIL,
"Reply submission failed");
goto out;
}
ret = 0;
out:
if (new_iobref) {
iobref_unref(iobref);
}
if (iob)
iobuf_unref(iob);
return ret;
}
gf_boolean_t
glusterd_check_volume_exists(char *volname)
{
glusterd_volinfo_t *volinfo = NULL;
return (glusterd_volinfo_find(volname, &volinfo) == 0);
}
glusterd_volinfo_t *
glusterd_volinfo_unref(glusterd_volinfo_t *volinfo)
{
int refcnt = -1;
pthread_mutex_lock(&volinfo->reflock);
{
refcnt = --volinfo->refcnt;
}
pthread_mutex_unlock(&volinfo->reflock);
if (!refcnt) {
glusterd_volinfo_delete(volinfo);
return NULL;
}
return volinfo;
}
glusterd_volinfo_t *
glusterd_volinfo_ref(glusterd_volinfo_t *volinfo)
{
pthread_mutex_lock(&volinfo->reflock);
{
++volinfo->refcnt;
}
pthread_mutex_unlock(&volinfo->reflock);
return volinfo;
}
int32_t
glusterd_volinfo_new(glusterd_volinfo_t **volinfo)
{
glusterd_volinfo_t *new_volinfo = NULL;
int32_t ret = -1;
GF_ASSERT(volinfo);
new_volinfo = GF_CALLOC(1, sizeof(*new_volinfo),
gf_gld_mt_glusterd_volinfo_t);
if (!new_volinfo)
goto out;
LOCK_INIT(&new_volinfo->lock);
CDS_INIT_LIST_HEAD(&new_volinfo->vol_list);
CDS_INIT_LIST_HEAD(&new_volinfo->snapvol_list);
CDS_INIT_LIST_HEAD(&new_volinfo->bricks);
CDS_INIT_LIST_HEAD(&new_volinfo->snap_volumes);
new_volinfo->dict = dict_new();
if (!new_volinfo->dict) {
GF_FREE(new_volinfo);
goto out;
}
new_volinfo->gsync_slaves = dict_new();
if (!new_volinfo->gsync_slaves) {
dict_unref(new_volinfo->dict);
GF_FREE(new_volinfo);
goto out;
}
new_volinfo->gsync_active_slaves = dict_new();
if (!new_volinfo->gsync_active_slaves) {
dict_unref(new_volinfo->dict);
dict_unref(new_volinfo->gsync_slaves);
GF_FREE(new_volinfo);
goto out;
}
snprintf(new_volinfo->parent_volname, GD_VOLUME_NAME_MAX, "N/A");
new_volinfo->snap_max_hard_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
new_volinfo->xl = THIS;
glusterd_snapdsvc_build(&new_volinfo->snapd.svc);
glusterd_tierdsvc_build(&new_volinfo->tierd.svc);
glusterd_gfproxydsvc_build(&new_volinfo->gfproxyd.svc);
pthread_mutex_init(&new_volinfo->reflock, NULL);
*volinfo = glusterd_volinfo_ref(new_volinfo);
ret = 0;
out:
gf_msg_debug(THIS->name, 0, "Returning %d", ret);
return ret;
}
/* This function will create a new volinfo and then
* dup the entries from volinfo to the new_volinfo.
*
* @param volinfo volinfo which will be duplicated
* @param dup_volinfo new volinfo which will be created
* @param set_userauth if this true then auth info is also set
*
* @return 0 on success else -1
*/
int32_t
glusterd_volinfo_dup(glusterd_volinfo_t *volinfo,
glusterd_volinfo_t **dup_volinfo,
gf_boolean_t set_userauth)
{
int32_t ret = -1;
xlator_t *this = NULL;
glusterd_volinfo_t *new_volinfo = NULL;
this = THIS;
GF_ASSERT(this);
GF_VALIDATE_OR_GOTO(this->name, volinfo, out);
GF_VALIDATE_OR_GOTO(this->name, dup_volinfo, out);
ret = glusterd_volinfo_new(&new_volinfo);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_SET_FAIL,
"not able to create the "
"duplicate volinfo for the volume %s",
volinfo->volname);
goto out;
}
new_volinfo->type = volinfo->type;
new_volinfo->replica_count = volinfo->replica_count;
new_volinfo->arbiter_count = volinfo->arbiter_count;
new_volinfo->stripe_count = volinfo->stripe_count;
new_volinfo->disperse_count = volinfo->disperse_count;
new_volinfo->redundancy_count = volinfo->redundancy_count;
new_volinfo->dist_leaf_count = volinfo->dist_leaf_count;
new_volinfo->sub_count = volinfo->sub_count;
new_volinfo->subvol_count = volinfo->subvol_count;
new_volinfo->transport_type = volinfo->transport_type;
new_volinfo->brick_count = volinfo->brick_count;
new_volinfo->tier_info = volinfo->tier_info;
new_volinfo->quota_conf_version = volinfo->quota_conf_version;
new_volinfo->quota_xattr_version = volinfo->quota_xattr_version;
new_volinfo->snap_max_hard_limit = volinfo->snap_max_hard_limit;
new_volinfo->quota_conf_cksum = volinfo->quota_conf_cksum;
dict_copy(volinfo->dict, new_volinfo->dict);
dict_copy(volinfo->gsync_slaves, new_volinfo->gsync_slaves);
dict_copy(volinfo->gsync_active_slaves, new_volinfo->gsync_active_slaves);
gd_update_volume_op_versions(new_volinfo);
if (set_userauth) {
glusterd_auth_set_username(new_volinfo, volinfo->auth.username);
glusterd_auth_set_password(new_volinfo, volinfo->auth.password);
}
*dup_volinfo = new_volinfo;
ret = 0;
out:
if (ret && (NULL != new_volinfo)) {
(void)glusterd_volinfo_delete(new_volinfo);
}
return ret;
}
/* This function will duplicate brickinfo
*
* @param brickinfo Source brickinfo
* @param dup_brickinfo Destination brickinfo
*
* @return 0 on success else -1
*/
int32_t
glusterd_brickinfo_dup(glusterd_brickinfo_t *brickinfo,
glusterd_brickinfo_t *dup_brickinfo)
{
int32_t ret = -1;
xlator_t *this = NULL;
this = THIS;
GF_ASSERT(this);
GF_VALIDATE_OR_GOTO(this->name, brickinfo, out);
GF_VALIDATE_OR_GOTO(this->name, dup_brickinfo, out);
strcpy(dup_brickinfo->hostname, brickinfo->hostname);
strcpy(dup_brickinfo->path, brickinfo->path);
strcpy(dup_brickinfo->real_path, brickinfo->real_path);
strcpy(dup_brickinfo->device_path, brickinfo->device_path);
strcpy(dup_brickinfo->fstype, brickinfo->fstype);
strcpy(dup_brickinfo->mnt_opts, brickinfo->mnt_opts);
ret = gf_canonicalize_path(dup_brickinfo->path);
if (ret) {
gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_CANONICALIZE_FAIL,
"Failed to canonicalize "
"brick path");
goto out;
}
gf_uuid_copy(dup_brickinfo->uuid, brickinfo->uuid);
dup_brickinfo->port = brickinfo->port;
dup_brickinfo->rdma_port = brickinfo->rdma_port;
if (NULL != brickinfo->logfile) {
dup_brickinfo->logfile = gf_strdup(brickinfo->logfile);
if (NULL == dup_brickinfo->logfile) {
ret = -1;
goto out;
}
}
strcpy(dup_brickinfo->brick_id, brickinfo->brick_id);
strcpy(dup_brickinfo->mount_dir, brickinfo->mount_dir);
dup_brickinfo->status = brickinfo->status;
dup_brickinfo->snap_status = brickinfo->snap_status;
out:
return ret;
}
int32_t
glusterd_create_sub_tier_volinfo(glusterd_volinfo_t *volinfo,
glusterd_volinfo_t **dup_volinfo,
gf_boolean_t is_hot_tier,
const char *new_volname)
{
glusterd_brickinfo_t *brickinfo = NULL;
glusterd_brickinfo_t *brickinfo_dup = NULL;
gd_tier_info_t *tier_info = NULL;
int i = 0;
int ret = -1;
tier_info = &(volinfo->tier_info);
ret = glusterd_volinfo_dup(volinfo, dup_volinfo, _gf_true);
if (ret) {
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_OP_FAILED,
"Failed to create volinfo");
return ret;
}
gf_uuid_copy((*dup_volinfo)->volume_id, volinfo->volume_id);
(*dup_volinfo)->is_snap_volume = volinfo->is_snap_volume;
(*dup_volinfo)->status = volinfo->status;
(*dup_volinfo)->snapshot = volinfo->snapshot;
if (snprintf((*dup_volinfo)->volname, sizeof((*dup_volinfo)->volname), "%s",
new_volname) >= sizeof((*dup_volinfo)->volname)) {
ret = -1;
goto out;
}
memcpy(&(*dup_volinfo)->tier_info, &volinfo->tier_info,
sizeof(volinfo->tier_info));
cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
{
i++;
if (is_hot_tier) {
if (i > volinfo->tier_info.hot_brick_count)
break;
} else {
if (i <= volinfo->tier_info.hot_brick_count)
continue;
}
ret = glusterd_brickinfo_new(&brickinfo_dup);
if (ret) {
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRICK_NEW_INFO_FAIL,
"Failed to create "
"new brickinfo");
goto out;
}
glusterd_brickinfo_dup(brickinfo, brickinfo_dup);
cds_list_add_tail(&brickinfo_dup->brick_list,
&((*dup_volinfo)->bricks));
}
if (is_hot_tier) {
(*dup_volinfo)->type = tier_info->hot_type;
(*dup_volinfo)->replica_count = tier_info->hot_replica_count;
(*dup_volinfo)->brick_count = tier_info->hot_brick_count;
(*dup_volinfo)->dist_leaf_count = glusterd_get_dist_leaf_count(
*dup_volinfo);
} else {
(*dup_volinfo)->type = tier_info->cold_type;
(*dup_volinfo)->replica_count = tier_info->cold_replica_count;
(*dup_volinfo)->disperse_count = tier_info->cold_disperse_count;
(*dup_volinfo)->redundancy_count = tier_info->cold_redundancy_count;
(*dup_volinfo)->dist_leaf_count = tier_info->cold_dist_leaf_count;
(*dup_volinfo)->brick_count = tier_info->cold_brick_count;
}
out:
if (ret && *dup_volinfo) {
glusterd_volinfo_delete(*dup_volinfo);
*dup_volinfo = NULL;
}
return ret;
}
/*
* gd_vol_is_geo_rep_active:
* This function checks for any running geo-rep session for
* the volume given.
*
* Return Value:
* _gf_true : If any running geo-rep session.
* _gf_false: If no running geo-rep session.
*/
gf_boolean_t
gd_vol_is_geo_rep_active(glusterd_volinfo_t *volinfo)
{
gf_boolean_t active = _gf_false;
GF_ASSERT(volinfo);
if (volinfo->gsync_active_slaves && volinfo->gsync_active_slaves->count > 0)
active = _gf_true;
return active;
}
void
glusterd_auth_cleanup(glusterd_volinfo_t *volinfo)
{
GF_ASSERT(volinfo);
GF_FREE(volinfo->auth.username);
GF_FREE(volinfo->auth.password);
}
char *
glusterd_auth_get_username(glusterd_volinfo_t *volinfo)
{
GF_ASSERT(volinfo);
return volinfo->auth.username;
}
char *
glusterd_auth_get_password(glusterd_volinfo_t *volinfo)
{
GF_ASSERT(volinfo);
return volinfo->auth.password;
}
int32_t
glusterd_auth_set_username(glusterd_volinfo_t *volinfo, char *username)
{
GF_ASSERT(volinfo);
GF_ASSERT(username);
volinfo->auth.username = gf_strdup(username);
return 0;
}
int32_t
glusterd_auth_set_password(glusterd_volinfo_t *volinfo, char *password)
{
GF_ASSERT(volinfo);
GF_ASSERT(password);
volinfo->auth.password = gf_strdup(password);
return 0;
}
int32_t
glusterd_brickinfo_delete(glusterd_brickinfo_t *brickinfo)
{
int32_t ret = -1;
GF_ASSERT(brickinfo);
cds_list_del_init(&brickinfo->brick_list);
(void)gf_store_handle_destroy(brickinfo->shandle);
GF_FREE(brickinfo->logfile);
GF_FREE(brickinfo);
ret = 0;
return ret;
}
int32_t
glusterd_volume_brickinfos_delete(glusterd_volinfo_t *volinfo)
{
glusterd_brickinfo_t *brickinfo = NULL;
glusterd_brickinfo_t *tmp = NULL;
int32_t ret = 0;
GF_ASSERT(volinfo);
cds_list_for_each_entry_safe(brickinfo, tmp, &volinfo->bricks, brick_list)
{
ret = glusterd_brickinfo_delete(brickinfo);
if (ret)
goto out;
}
out:
gf_msg_debug(THIS->name, 0, "Returning %d", ret);
return ret;
}
int
glusterd_volinfo_remove(glusterd_volinfo_t *volinfo)
{
cds_list_del_init(&volinfo->vol_list);
glusterd_volinfo_unref(volinfo);
return 0;
}
int32_t
glusterd_volinfo_delete(glusterd_volinfo_t *volinfo)
{
int32_t ret = -1;
GF_ASSERT(volinfo);
cds_list_del_init(&volinfo->vol_list);
cds_list_del_init(&volinfo->snapvol_list);
ret = glusterd_volume_brickinfos_delete(volinfo);
if (ret)
goto out;
if (volinfo->dict)
dict_unref(volinfo->dict);
if (volinfo->gsync_slaves)
dict_unref(volinfo->gsync_slaves);
if (volinfo->gsync_active_slaves)
dict_unref(volinfo->gsync_active_slaves);
GF_FREE(volinfo->logdir);
if (volinfo->rebal.dict)
dict_unref(volinfo->rebal.dict);
/* Destroy the connection object for per volume svc daemons */
glusterd_conn_term(&volinfo->snapd.svc.conn);
glusterd_conn_term(&volinfo->tierd.svc.conn);
glusterd_conn_term(&volinfo->gfproxyd.svc.conn);
gf_store_handle_destroy(volinfo->quota_conf_shandle);
gf_store_handle_destroy(volinfo->shandle);
gf_store_handle_destroy(volinfo->node_state_shandle);
gf_store_handle_destroy(volinfo->snapd.handle);
glusterd_auth_cleanup(volinfo);
pthread_mutex_destroy(&volinfo->reflock);
GF_FREE(volinfo);
ret = 0;
out:
gf_msg_debug(THIS->name, 0, "Returning %d", ret);
return ret;
}
int32_t
glusterd_brickprocess_new(glusterd_brick_proc_t **brickprocess)
{
glusterd_brick_proc_t *new_brickprocess = NULL;
int32_t ret = -1;
GF_VALIDATE_OR_GOTO(THIS->name, brickprocess, out);
new_brickprocess = GF_CALLOC(1, sizeof(*new_brickprocess),
gf_gld_mt_glusterd_brick_proc_t);
if (!new_brickprocess)
goto out;
CDS_INIT_LIST_HEAD(&new_brickprocess->bricks);
CDS_INIT_LIST_HEAD(&new_brickprocess->brick_proc_list);
new_brickprocess->brick_count = 0;
*brickprocess = new_brickprocess;
ret = 0;
out:
gf_msg_debug(THIS->name, 0, "Returning %d", ret);
return ret;
}
int32_t
glusterd_brickinfo_new(glusterd_brickinfo_t **brickinfo)
{
glusterd_brickinfo_t *new_brickinfo = NULL;
int32_t ret = -1;
GF_ASSERT(brickinfo);
new_brickinfo = GF_CALLOC(1, sizeof(*new_brickinfo),
gf_gld_mt_glusterd_brickinfo_t);
if (!new_brickinfo)
goto out;
CDS_INIT_LIST_HEAD(&new_brickinfo->brick_list);
CDS_INIT_LIST_HEAD(&new_brickinfo->mux_bricks);
pthread_mutex_init(&new_brickinfo->restart_mutex, NULL);
*brickinfo = new_brickinfo;
ret = 0;
out:
gf_msg_debug(THIS->name, 0, "Returning %d", ret);
return ret;
}
int
glusterd_get_next_available_brickid(glusterd_volinfo_t *volinfo)
{
glusterd_brickinfo_t *brickinfo = NULL;
char *token = NULL;
int brickid = 0;
int max_brickid = -1;
int ret = -1;
cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
{
token = strrchr(brickinfo->brick_id, '-');
ret = gf_string2int32(++token, &brickid);
if (ret < 0) {
gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ID_GEN_FAILED,
"Unable to generate brick ID");
return ret;
}
if (brickid > max_brickid)
max_brickid = brickid;
}
return max_brickid + 1;
}
int32_t
glusterd_resolve_brick(glusterd_brickinfo_t *brickinfo)
{
int32_t ret = -1;
xlator_t *this = NULL;
this = THIS;
GF_ASSERT(this);
GF_ASSERT(brickinfo);
if (!gf_uuid_compare(brickinfo->uuid, MY_UUID) ||
(glusterd_peerinfo_find_by_uuid(brickinfo->uuid) != NULL)) {
ret = 0;
goto out;
}
ret = glusterd_hostname_to_uuid(brickinfo->hostname, brickinfo->uuid);
out:
gf_msg_debug(this->name, 0, "Returning %d", ret);
return ret;
}
int32_t
glusterd_get_brick_mount_dir(char *brickpath, char *hostname, char *mount_dir)
{
char *mnt_pt = NULL;
char *brick_dir = NULL;
int32_t ret = -1;
uuid_t brick_uuid = {
0,
};
xlator_t *this = NULL;
this = THIS;
GF_ASSERT(this);
GF_ASSERT(brickpath);
GF_ASSERT(hostname);
GF_ASSERT(mount_dir);
ret = glusterd_hostname_to_uuid(hostname, brick_uuid);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_TO_UUID_FAIL,
"Failed to convert hostname %s to uuid", hostname);
goto out;
}
if (!gf_uuid_compare(brick_uuid, MY_UUID)) {
ret = glusterd_get_brick_root(brickpath, &mnt_pt);
if (ret) {
gf_msg(this->name, GF_LOG_WARNING, 0,
GD_MSG_BRICKPATH_ROOT_GET_FAIL,
"Could not get the root of the brick path %s", brickpath);
goto out;
}
if (strncmp(brickpath, mnt_pt, strlen(mnt_pt))) {
gf_msg(this->name, GF_LOG_WARNING, 0,
GD_MSG_BRKPATH_MNTPNT_MISMATCH, "brick: %s brick mount: %s",
brickpath, mnt_pt);
ret = -1;
goto out;
}
brick_dir = &brickpath[strlen(mnt_pt)];
brick_dir++;
snprintf(mount_dir, VALID_GLUSTERD_PATHMAX, "/%s", brick_dir);
}
out:
if (mnt_pt)
GF_FREE(mnt_pt);
gf_msg_trace(this->name, 0, "Returning %d", ret);
return ret;
}
int32_t
glusterd_brickinfo_new_from_brick(char *brick, glusterd_brickinfo_t **brickinfo,
gf_boolean_t construct_real_path,
char **op_errstr)
{
char *hostname = NULL;
char *path = NULL;
char *tmp_host = NULL;
char *tmp_path = NULL;
#ifdef HAVE_BD_XLATOR
char *vg = NULL;
#endif
int32_t ret = -1;
glusterd_brickinfo_t *new_brickinfo = NULL;
xlator_t *this = NULL;
char abspath[PATH_MAX] = "";
this = THIS;
GF_ASSERT(this);
GF_ASSERT(brick);
GF_ASSERT(brickinfo);
tmp_host = gf_strdup(brick);
if (tmp_host && !get_host_name(tmp_host, &hostname))
goto out;
tmp_path = gf_strdup(brick);
if (tmp_path && !get_path_name(tmp_path, &path))
goto out;
GF_ASSERT(hostname);
GF_ASSERT(path);
ret = glusterd_brickinfo_new(&new_brickinfo);
if (ret)
goto out;
#ifdef HAVE_BD_XLATOR
vg = strchr(path, '?');
/* ? is used as a delimiter for vg */
if (vg) {
if (snprintf(new_brickinfo->vg, PATH_MAX, "%s", vg + 1) >= PATH_MAX) {
ret = -1;
goto out;
}
*vg = '\0';
}
new_brickinfo->caps = CAPS_BD;
#endif
ret = gf_canonicalize_path(path);
if (ret)
goto out;
ret = snprintf(new_brickinfo->hostname, sizeof(new_brickinfo->hostname),
"%s", hostname);
if (ret < 0 || ret >= sizeof(new_brickinfo->hostname)) {
ret = -1;
goto out;
}
ret = snprintf(new_brickinfo->path, sizeof(new_brickinfo->path), "%s",
path);
if (ret < 0 || ret >= sizeof(new_brickinfo->path)) {
ret = -1;
goto out;
}
if (construct_real_path) {
ret = glusterd_hostname_to_uuid(new_brickinfo->hostname,
new_brickinfo->uuid);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_TO_UUID_FAIL,
"Failed to convert hostname %s to uuid", hostname);
if (op_errstr)
gf_asprintf(op_errstr,
"Host %s is not in "
"\'Peer in Cluster\' state",
new_brickinfo->hostname);
goto out;
}
}
if (construct_real_path && !gf_uuid_compare(new_brickinfo->uuid, MY_UUID) &&
new_brickinfo->real_path[0] == '\0') {
if (!realpath(new_brickinfo->path, abspath)) {
/* ENOENT indicates that brick path has not been created
* which is a valid scenario */
if (errno != ENOENT) {
gf_msg(this->name, GF_LOG_CRITICAL, errno,
GD_MSG_BRICKINFO_CREATE_FAIL,
"realpath"
" () failed for brick %s. The "
"underlying filesystem may be in bad "
"state. Error - %s",
new_brickinfo->path, strerror(errno));
ret = -1;
goto out;
}
}
if (strlen(abspath) >= sizeof(new_brickinfo->real_path)) {
ret = -1;
goto out;
}
(void)strncpy(new_brickinfo->real_path, abspath,
sizeof(new_brickinfo->real_path));
}
*brickinfo = new_brickinfo;
ret = 0;
out:
GF_FREE(tmp_host);
if (tmp_host)
GF_FREE(tmp_path);
gf_msg_debug(this->name, 0, "Returning %d", ret);
return ret;
}
static gf_boolean_t
_is_prefix(char *str1, char *str2)
{
GF_ASSERT(str1);
GF_ASSERT(str2);
int i = 0;
int len1 = 0;
int len2 = 0;
int small_len = 0;
char *bigger = NULL;
gf_boolean_t prefix = _gf_true;
len1 = strlen(str1);
len2 = strlen(str2);
small_len = min(len1, len2);
/*
* If either one (not both) of the strings are 0-length, they are not
* prefixes of each other.
*/
if ((small_len == 0) && (len1 != len2)) {
return _gf_false;
}
for (i = 0; i < small_len; i++) {
if (str1[i] != str2[i]) {
prefix = _gf_false;
break;
}
}
if (len1 < len2)
bigger = str2;
else if (len1 > len2)
bigger = str1;
else
return prefix;
if (bigger[small_len] != '/')
prefix = _gf_false;
return prefix;
}
/* Checks if @path is available in the peer identified by @uuid
* 'availability' is determined by querying current state of volumes
* in the cluster. */
gf_boolean_t
glusterd_is_brickpath_available(uuid_t uuid, char *path)
{
glusterd_brickinfo_t *brickinfo = NULL;
glusterd_volinfo_t *volinfo = NULL;
glusterd_conf_t *priv = NULL;
gf_boolean_t available = _gf_false;
char tmp_path[PATH_MAX] = "";
priv = THIS->private;
if (snprintf(tmp_path, PATH_MAX, "%s", path) >= PATH_MAX)
goto out;
/* path may not yet exist */
if (!realpath(path, tmp_path)) {
if (errno != ENOENT) {
gf_msg(THIS->name, GF_LOG_CRITICAL, errno,
GD_MSG_BRICKINFO_CREATE_FAIL,
"realpath"
" () failed for brick %s. The "
"underlying filesystem may be in bad "
"state. Error - %s",
path, strerror(errno));
goto out;
}
/* When realpath(3) fails, tmp_path is undefined. */
(void)snprintf(tmp_path, sizeof(tmp_path), "%s", path);
}
cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
{
cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
{
if (gf_uuid_compare(uuid, brickinfo->uuid))
continue;
if (_is_prefix(brickinfo->real_path, tmp_path)) {
gf_msg(THIS->name, GF_LOG_CRITICAL, 0,
GD_MSG_BRICKINFO_CREATE_FAIL,
"_is_prefix call failed for brick %s "
"against brick %s",
tmp_path, brickinfo->real_path);
goto out;
}
}
}
available = _gf_true;
out:
return available;
}
#ifdef HAVE_BD_XLATOR
/*
* Sets the tag of the format "trusted.glusterfs.volume-id:<uuid>" in
* the brick VG. It is used to avoid using same VG for another brick.
* @volume-id - gfid, @brick - brick info, @msg - Error message returned
* to the caller
*/
int
glusterd_bd_set_vg_tag(unsigned char *volume_id, glusterd_brickinfo_t *brick,
char *msg, int msg_size)
{
lvm_t handle = NULL;
vg_t vg = NULL;
char *uuid = NULL;
int ret = -1;
gf_asprintf(&uuid, "%s:%s", GF_XATTR_VOL_ID_KEY, uuid_utoa(volume_id));
if (!uuid) {
snprintf(msg, sizeof(*msg),
"Could not allocate memory "
"for tag");
return -1;
}
handle = lvm_init(NULL);
if (!handle) {
snprintf(msg, sizeof(*msg), "lvm_init failed");
goto out;
}
vg = lvm_vg_open(handle, brick->vg, "w", 0);
if (!vg) {
snprintf(msg, sizeof(*msg), "Could not open VG %s", brick->vg);
goto out;
}
if (lvm_vg_add_tag(vg, uuid) < 0) {
snprintf(msg, sizeof(*msg),
"Could not set tag %s for "
"VG %s",
uuid, brick->vg);
goto out;
}
lvm_vg_write(vg);
ret = 0;
out:
GF_FREE(uuid);
if (vg)
lvm_vg_close(vg);
if (handle)
lvm_quit(handle);
return ret;
}
#endif
int
glusterd_validate_and_create_brickpath(glusterd_brickinfo_t *brickinfo,
uuid_t volume_id, char *volname,
char **op_errstr, gf_boolean_t is_force,
gf_boolean_t ignore_partition)
{
int ret = -1;
char parentdir[PATH_MAX] = "";
struct stat parent_st = {
0,
};
struct stat brick_st = {
0,
};
struct stat root_st = {
0,
};
char msg[2048] = "";
gf_boolean_t is_created = _gf_false;
char glusterfs_dir_path[PATH_MAX] = "";
int32_t len = 0;
ret = sys_mkdir(brickinfo->path, 0777);
if (ret) {
if (errno != EEXIST) {
len = snprintf(msg, sizeof(msg),
"Failed to create "
"brick directory for brick %s:%s. "
"Reason : %s ",
brickinfo->hostname, brickinfo->path,
strerror(errno));
goto out;
}
} else {
is_created = _gf_true;
}
ret = sys_lstat(brickinfo->path, &brick_st);
if (ret) {
len = snprintf(msg, sizeof(msg),
"lstat failed on %s. "
"Reason : %s",
brickinfo->path, strerror(errno));
goto out;
}
if ((!is_created) && (!S_ISDIR(brick_st.st_mode))) {
len = snprintf(msg, sizeof(msg),
"The provided path %s "
"which is already present, is not a directory",
brickinfo->path);
ret = -1;
goto out;
}
len = snprintf(parentdir, sizeof(parentdir), "%s/..", brickinfo->path);
if ((len < 0) || (len >= sizeof(parentdir))) {
ret = -1;
goto out;
}
ret = sys_lstat("/", &root_st);
if (ret) {
len = snprintf(msg, sizeof(msg),
"lstat failed on /. "
"Reason : %s",
strerror(errno));
goto out;
}
ret = sys_lstat(parentdir, &parent_st);
if (ret) {
len = snprintf(msg, sizeof(msg),
"lstat failed on %s. "
"Reason : %s",
parentdir, strerror(errno));
goto out;
}
if (strncmp(volname, GLUSTER_SHARED_STORAGE,
SLEN(GLUSTER_SHARED_STORAGE)) &&
sizeof(GLUSTERD_DEFAULT_WORKDIR) <= (strlen(brickinfo->path) + 1) &&
!strncmp(brickinfo->path, GLUSTERD_DEFAULT_WORKDIR,
(sizeof(GLUSTERD_DEFAULT_WORKDIR) - 1))) {
len = snprintf(msg, sizeof(msg),
"Brick isn't allowed to be "
"created inside glusterd's working directory.");
ret = -1;
goto out;
}
if (!is_force) {
if (brick_st.st_dev != parent_st.st_dev) {
len = snprintf(msg, sizeof(msg),
"The brick %s:%s "
"is a mount point. Please create a "
"sub-directory under the mount point "
"and use that as the brick directory. "
"Or use 'force' at the end of the "
"command if you want to override this "
"behavior.",
brickinfo->hostname, brickinfo->path);
ret = -1;
goto out;
} else if (parent_st.st_dev == root_st.st_dev) {
len = snprintf(msg, sizeof(msg),
"The brick %s:%s "
"is being created in the root "
"partition. It is recommended that "
"you don't use the system's root "
"partition for storage backend. Or "
"use 'force' at the end of the "
"command if you want to override this "
"behavior.",
brickinfo->hostname, brickinfo->path);
/* If --wignore-partition flag is used, ignore warnings
* related to bricks being on root partition when 'force'
* is not used */
if ((len < 0) || (len >= sizeof(msg)) || !ignore_partition) {
ret = -1;
goto out;
}
}
}
#ifdef HAVE_BD_XLATOR
if (brickinfo->vg[0]) {
ret = glusterd_bd_set_vg_tag(volume_id, brickinfo, msg, sizeof(msg));
if (ret)
goto out;
}
#endif
ret = glusterd_check_and_set_brick_xattr(
brickinfo->hostname, brickinfo->path, volume_id, op_errstr, is_force);
if (ret)
goto out;
/* create .glusterfs directory */
len = snprintf(glusterfs_dir_path, sizeof(glusterfs_dir_path), "%s/%s",
brickinfo->path, ".glusterfs");
if ((len < 0) || (len >= sizeof(glusterfs_dir_path))) {
ret = -1;
goto out;
}
ret = sys_mkdir(glusterfs_dir_path, 0600);
if (ret && (errno != EEXIST)) {
len = snprintf(msg, sizeof(msg),
"Failed to create "
".glusterfs directory for brick %s:%s. "
"Reason : %s ",
brickinfo->hostname, brickinfo->path, strerror(errno));
goto out;
}
ret = 0;
out:
if (len < 0) {
ret = -1;
}
if (ret && is_created) {
(void)recursive_rmdir(brickinfo->path);
}
if (ret && !*op_errstr && msg[0] != '\0')
*op_errstr = gf_strdup(msg);
return ret;
}
int32_t
glusterd_volume_brickinfo_get(uuid_t uuid, char *hostname, char *path,
glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t **brickinfo)
{
glusterd_brickinfo_t *brickiter = NULL;
uuid_t peer_uuid = {0};
int32_t ret = -1;
xlator_t *this = NULL;
this = THIS;
if (uuid) {
gf_uuid_copy(peer_uuid, uuid);
} else {
ret = glusterd_hostname_to_uuid(hostname, peer_uuid);
if (ret)
goto out;
}
ret = -1;
cds_list_for_each_entry(brickiter, &volinfo->bricks, brick_list)
{
if ((gf_uuid_is_null(brickiter->uuid)) &&
(glusterd_resolve_brick(brickiter) != 0))
goto out;
if (gf_uuid_compare(peer_uuid, brickiter->uuid))
continue;
if (strcmp(brickiter->path, path) == 0) {
gf_msg_debug(this->name, 0, LOGSTR_FOUND_BRICK, brickiter->hostname,
brickiter->path, volinfo->volname);
ret = 0;
if (brickinfo)
*brickinfo = brickiter;
break;
}
}
out:
gf_msg_debug(this->name, 0, "Returning %d", ret);
return ret;
}
int32_t
glusterd_volume_brickinfo_get_by_brick(char *brick, glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t **brickinfo,
gf_boolean_t construct_real_path)
{
int32_t ret = -1;
glusterd_brickinfo_t *tmp_brickinfo = NULL;
GF_ASSERT(brick);
GF_ASSERT(volinfo);
ret = glusterd_brickinfo_new_from_brick(brick, &tmp_brickinfo,
construct_real_path, NULL);
if (ret)
goto out;
ret = glusterd_volume_brickinfo_get(
NULL, tmp_brickinfo->hostname, tmp_brickinfo->path, volinfo, brickinfo);
(void)glusterd_brickinfo_delete(tmp_brickinfo);
out:
gf_msg_debug("glusterd", 0, "Returning %d", ret);
return ret;
}
gf_boolean_t
glusterd_is_brick_decommissioned(glusterd_volinfo_t *volinfo, char *hostname,
char *path)
{
gf_boolean_t decommissioned = _gf_false;
glusterd_brickinfo_t *brickinfo = NULL;
int ret = -1;
ret = glusterd_volume_brickinfo_get(NULL, hostname, path, volinfo,
&brickinfo);
if (ret)
goto out;
decommissioned = brickinfo->decommissioned;
out:
return decommissioned;
}
int
glusterd_volinfo_find_by_volume_id(uuid_t volume_id,
glusterd_volinfo_t **volinfo)
{
int32_t ret = -1;
xlator_t *this = NULL;
glusterd_volinfo_t *voliter = NULL;
glusterd_conf_t *priv = NULL;
if (!volume_id)
return -1;
this = THIS;
priv = this->private;
cds_list_for_each_entry(voliter, &priv->volumes, vol_list)
{
if (gf_uuid_compare(volume_id, voliter->volume_id))
continue;
*volinfo = voliter;
ret = 0;
gf_msg_debug(this->name, 0, "Volume %s found", voliter->volname);
break;
}
return ret;
}
int32_t
glusterd_volinfo_find(const char *volname, glusterd_volinfo_t **volinfo)
{
glusterd_volinfo_t *tmp_volinfo = NULL;
int32_t ret = -1;
xlator_t *this = NULL;
glusterd_conf_t *priv = NULL;
GF_ASSERT(volname);
this = THIS;
GF_ASSERT(this);
priv = this->private;
GF_ASSERT(priv);
cds_list_for_each_entry(tmp_volinfo, &priv->volumes, vol_list)
{
if (!strcmp(tmp_volinfo->volname, volname)) {
gf_msg_debug(this->name, 0, "Volume %s found", volname);
ret = 0;
*volinfo = tmp_volinfo;
break;
}
}
gf_msg_debug(this->name, 0, "Returning %d", ret);
return ret;
}
int32_t
glusterd_service_stop(const char *service, char *pidfile, int sig,
gf_boolean_t force_kill)
{
int32_t ret = -1;
pid_t pid = -1;
xlator_t *this = NULL;
this = THIS;
GF_ASSERT(this);
if (!gf_is_service_running(pidfile, &pid)) {
ret = 0;
gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_ALREADY_STOPPED,
"%s already stopped", service);
goto out;
}
gf_msg_debug(this->name, 0,
"Stopping gluster %s running in pid: "
"%d",
service, pid);
ret = kill(pid, sig);
if (ret) {
switch (errno) {
case ESRCH:
gf_msg_debug(this->name, 0, "%s is already stopped", service);
ret = 0;
goto out;
default:
gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_SVC_KILL_FAIL,
"Unable to kill %s "
"service, reason:%s",
service, strerror(errno));
}
}
if (!force_kill)
goto out;
sleep(1);
if (gf_is_service_running(pidfile, &pid)) {
ret = kill(pid, SIGKILL);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_PID_KILL_FAIL,
"Unable to kill pid:%d, "
"reason:%s",
pid, strerror(errno));
goto out;
}
}
ret = 0;
out:
return ret;
}
int32_t
glusterd_service_stop_nolock(const char *service, char *pidfile, int sig,
gf_boolean_t force_kill)
{
int32_t ret = -1;
pid_t pid = -1;
xlator_t *this = NULL;
FILE *file = NULL;
this = THIS;
GF_ASSERT(this);
file = fopen(pidfile, "r+");
if (file) {
ret = fscanf(file, "%d", &pid);
if (ret <= 0) {
gf_msg_debug(this->name, 0, "Unable to read pidfile: %s", pidfile);
goto out;
}
}
if (kill(pid, 0) < 0) {
ret = 0;
gf_msg_debug(this->name, 0, "%s process not running: (%d) %s", service,
pid, strerror(errno));
goto out;
}
gf_msg_debug(this->name, 0,
"Stopping gluster %s service running with "
"pid: %d",
service, pid);
ret = kill(pid, sig);
if (ret) {
switch (errno) {
case ESRCH:
gf_msg_debug(this->name, 0, "%s is already stopped", service);
ret = 0;
goto out;
default:
gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_SVC_KILL_FAIL,
"Unable to kill %s "
"service, reason:%s",
service, strerror(errno));
}
}
if (!force_kill)
goto out;
sleep(1);
if (kill(pid, 0) == 0) {
ret = kill(pid, SIGKILL);
if (ret) {
/* Process is already dead, don't fail */
if (errno == ESRCH) {
gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_PID_KILL_FAIL,
"Unable to find pid:%d, "
"must be dead already. Ignoring.",
pid);
ret = 0;
} else {
gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_PID_KILL_FAIL,
"Unable to kill pid:%d, "
"reason:%s",
pid, strerror(errno));
goto out;
}
}
}
ret = 0;
out:
if (file)
fclose(file);
return ret;
}
void
glusterd_set_socket_filepath(char *sock_filepath, char *sockpath, size_t len)
{
char xxh64[GF_XXH64_DIGEST_LENGTH * 2 + 1] = {
0,
};
gf_xxh64_wrapper((unsigned char *)sock_filepath, strlen(sock_filepath),
GF_XXHSUM64_DEFAULT_SEED, xxh64);
snprintf(sockpath, len, "%s/%s.socket", GLUSTERD_SOCK_DIR, xxh64);
}
void
glusterd_set_brick_socket_filepath(glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo,
char *sockpath, size_t len)
{
char volume_dir[PATH_MAX] = "";
xlator_t *this = NULL;
glusterd_conf_t *priv = NULL;
int expected_file_len = 0;
char export_path[PATH_MAX] = "";
char sock_filepath[PATH_MAX] = "";
int32_t slen = 0;
expected_file_len = SLEN(GLUSTERD_SOCK_DIR) + SLEN("/") +
SHA256_DIGEST_LENGTH * 2 + SLEN(".socket") + 1;
GF_ASSERT(len >= expected_file_len);
this = THIS;
GF_ASSERT(this);
priv = this->private;
GLUSTERD_GET_VOLUME_PID_DIR(volume_dir, volinfo, priv);
GLUSTERD_REMOVE_SLASH_FROM_PATH(brickinfo->path, export_path);
slen = snprintf(sock_filepath, PATH_MAX, "%s/run/%s-%s", volume_dir,
brickinfo->hostname, export_path);
if (slen < 0) {
sock_filepath[0] = 0;
}
glusterd_set_socket_filepath(sock_filepath, sockpath, len);
}
/* connection happens only if it is not already connected,
* reconnections are taken care by rpc-layer
*/
int32_t
glusterd_brick_connect(glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo, char *socketpath)
{
int ret = 0;
char volume_id_str[64] = "";
char *brickid = NULL;
dict_t *options = NULL;
struct rpc_clnt *rpc = NULL;
GF_ASSERT(volinfo);
GF_ASSERT(brickinfo);
GF_ASSERT(socketpath);
if (brickinfo->rpc == NULL) {
/* Setting frame-timeout to 10mins (600seconds).
* Unix domain sockets ensures that the connection is reliable.
* The default timeout of 30mins used for unreliable network
* connections is too long for unix domain socket connections.
*/
ret = rpc_transport_unix_options_build(&options, socketpath, 600);
if (ret)
goto out;
uuid_utoa_r(volinfo->volume_id, volume_id_str);
ret = gf_asprintf(&brickid, "%s:%s:%s", volume_id_str,
brickinfo->hostname, brickinfo->path);
if (ret < 0)
goto out;
ret = glusterd_rpc_create(&rpc, options, glusterd_brick_rpc_notify,
brickid, _gf_false);
if (ret) {
GF_FREE(brickid);
goto out;
}
brickinfo->rpc = rpc;
}
out:
gf_msg_debug("glusterd", 0, "Returning %d", ret);
return ret;
}
static int
_mk_rundir_p(glusterd_volinfo_t *volinfo)
{
char rundir[PATH_MAX] = "";
glusterd_conf_t *priv = NULL;
xlator_t *this = NULL;
int ret = -1;
this = THIS;
priv = this->private;
GLUSTERD_GET_VOLUME_PID_DIR(rundir, volinfo, priv);
ret = mkdir_p(rundir, 0777, _gf_true);
if (ret)
gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED,
"Failed to create rundir");
return ret;
}
int32_t
glusterd_volume_start_glusterfs(glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo,
gf_boolean_t wait)
{
int32_t ret = -1;
xlator_t *this = NULL;
glusterd_conf_t *priv = NULL;
char pidfile[PATH_MAX + 1] = "";
char volfile[PATH_MAX] = "";
runner_t runner = {
0,
};
char exp_path[PATH_MAX] = "";
char logfile[PATH_MAX] = "";
int port = 0;
int rdma_port = 0;
char *bind_address = NULL;
char *localtime_logging = NULL;
char socketpath[PATH_MAX] = "";
char glusterd_uuid[1024] = "";
char valgrind_logfile[PATH_MAX] = "";
char rdma_brick_path[PATH_MAX] = "";
struct rpc_clnt *rpc = NULL;
rpc_clnt_connection_t *conn = NULL;
int pid = -1;
int32_t len = 0;
glusterd_brick_proc_t *brick_proc = NULL;
GF_ASSERT(volinfo);
GF_ASSERT(brickinfo);
this = THIS;
GF_ASSERT(this);
priv = this->private;
GF_ASSERT(priv);
if (brickinfo->snap_status == -1) {
gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SNAPSHOT_PENDING,
"Snapshot is pending on %s:%s. "
"Hence not starting the brick",
brickinfo->hostname, brickinfo->path);
ret = 0;
goto out;
}
GLUSTERD_GET_BRICK_PIDFILE(pidfile, volinfo, brickinfo, priv);
if (gf_is_service_running(pidfile, &pid)) {
goto connect;
}
/*
* There are all sorts of races in the start/stop code that could leave
* a UNIX-domain socket or RPC-client object associated with a
* long-dead incarnation of this brick, while the new incarnation is
* listening on a new socket at the same path and wondering why we
* haven't shown up. To avoid the whole mess and be on the safe side,
* we just blow away anything that might have been left over, and start
* over again.
*/
glusterd_set_brick_socket_filepath(volinfo, brickinfo, socketpath,
sizeof(socketpath));
(void)glusterd_unlink_file(socketpath);
rpc = brickinfo->rpc;
if (rpc) {
brickinfo->rpc = NULL;
conn = &rpc->conn;
pthread_mutex_lock(&conn->lock);
if (conn->reconnect) {
(void)gf_timer_call_cancel(rpc->ctx, conn->reconnect);
conn->reconnect = NULL;
}
pthread_mutex_unlock(&conn->lock);
rpc_clnt_unref(rpc);
}
port = pmap_assign_port(THIS, brickinfo->port, brickinfo->path);
if (!port) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PORTS_EXHAUSTED,
"All the ports in the range are exhausted, can't start "
"brick %s for volume %s",
brickinfo->path, volinfo->volname);
ret = -1;
goto out;
}
/* Build the exp_path, before starting the glusterfsd even in
valgrind mode. Otherwise all the glusterfsd processes start
writing the valgrind log to the same file.
*/
GLUSTERD_REMOVE_SLASH_FROM_PATH(brickinfo->path, exp_path);
retry:
runinit(&runner);
if (this->ctx->cmd_args.valgrind) {
/* Run bricks with valgrind */
if (volinfo->logdir) {
len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s-%s.log",
volinfo->logdir, volinfo->volname, exp_path);
} else {
len = snprintf(
valgrind_logfile, PATH_MAX, "%s/bricks/valgrind-%s-%s.log",
DEFAULT_LOG_FILE_DIRECTORY, volinfo->volname, exp_path);
}
if ((len < 0) || (len >= PATH_MAX)) {
ret = -1;
goto out;
}
runner_add_args(&runner, "valgrind", "--leak-check=full",
"--trace-children=yes", "--track-origins=yes", NULL);
runner_argprintf(&runner, "--log-file=%s", valgrind_logfile);
}
if (volinfo->is_snap_volume) {
len = snprintf(volfile, PATH_MAX, "/%s/%s/%s/%s.%s.%s",
GLUSTERD_VOL_SNAP_DIR_PREFIX,
volinfo->snapshot->snapname, volinfo->volname,
volinfo->volname, brickinfo->hostname, exp_path);
} else {
len = snprintf(volfile, PATH_MAX, "%s.%s.%s", volinfo->volname,
brickinfo->hostname, exp_path);
}
if ((len < 0) || (len >= PATH_MAX)) {
ret = -1;
goto out;
}
if (volinfo->logdir) {
len = snprintf(logfile, PATH_MAX, "%s/%s.log", volinfo->logdir,
exp_path);
} else {
len = snprintf(logfile, PATH_MAX, "%s/bricks/%s.log",
DEFAULT_LOG_FILE_DIRECTORY, exp_path);
}
if ((len < 0) || (len >= PATH_MAX)) {
ret = -1;
goto out;
}
if (!brickinfo->logfile)
brickinfo->logfile = gf_strdup(logfile);
(void)snprintf(glusterd_uuid, 1024, "*-posix.glusterd-uuid=%s",
uuid_utoa(MY_UUID));
runner_add_args(&runner, SBIN_DIR "/glusterfsd", "-s", brickinfo->hostname,
"--volfile-id", volfile, "-p", pidfile, "-S", socketpath,
"--brick-name", brickinfo->path, "-l", brickinfo->logfile,
"--xlator-option", glusterd_uuid, "--process-name", "brick",
NULL);
if (dict_get_strn(priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY,
SLEN(GLUSTERD_LOCALTIME_LOGGING_KEY),
&localtime_logging) == 0) {
if (strcmp(localtime_logging, "enable") == 0)
runner_add_arg(&runner, "--localtime-logging");
}
runner_add_arg(&runner, "--brick-port");
if (volinfo->transport_type != GF_TRANSPORT_BOTH_TCP_RDMA) {
runner_argprintf(&runner, "%d", port);
} else {
len = snprintf(rdma_brick_path, sizeof(rdma_brick_path), "%s.rdma",
brickinfo->path);
if ((len < 0) || (len >= sizeof(rdma_brick_path))) {
ret = -1;
goto out;
}
rdma_port = pmap_assign_port(THIS, brickinfo->rdma_port,
rdma_brick_path);
if (!rdma_port) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PORTS_EXHAUSTED,
"All rdma ports in the "
"range are exhausted, can't start brick %s for "
"volume %s",
rdma_brick_path, volinfo->volname);
ret = -1;
goto out;
}
runner_argprintf(&runner, "%d,%d", port, rdma_port);
runner_add_arg(&runner, "--xlator-option");
runner_argprintf(&runner, "%s-server.transport.rdma.listen-port=%d",
volinfo->volname, rdma_port);
}
runner_add_arg(&runner, "--xlator-option");
runner_argprintf(&runner, "%s-server.listen-port=%d", volinfo->volname,
port);
if (dict_get_strn(this->options, "transport.socket.bind-address",
SLEN("transport.socket.bind-address"),
&bind_address) == 0) {
runner_add_arg(&runner, "--xlator-option");
runner_argprintf(&runner, "transport.socket.bind-address=%s",
bind_address);
}
if (volinfo->transport_type == GF_TRANSPORT_RDMA)
runner_argprintf(&runner, "--volfile-server-transport=rdma");
else if (volinfo->transport_type == GF_TRANSPORT_BOTH_TCP_RDMA)
runner_argprintf(&runner, "--volfile-server-transport=socket,rdma");
if (volinfo->memory_accounting)
runner_add_arg(&runner, "--mem-accounting");
runner_log(&runner, "", 0, "Starting GlusterFS");
brickinfo->port = port;
brickinfo->rdma_port = rdma_port;
brickinfo->status = GF_BRICK_STARTING;
brickinfo->port_registered = _gf_false;
if (wait) {
synclock_unlock(&priv->big_lock);
ret = runner_run(&runner);
synclock_lock(&priv->big_lock);
if (ret == EADDRINUSE) {
/* retry after getting a new port */
gf_msg(this->name, GF_LOG_WARNING, -ret,
GD_MSG_SRC_BRICK_PORT_UNAVAIL,
"Port %d is used by other process", port);
port = pmap_registry_alloc(this);
if (!port) {
gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_NO_FREE_PORTS,
"Couldn't allocate a port");
ret = -1;
goto out;
}
gf_msg(this->name, GF_LOG_NOTICE, 0, GD_MSG_RETRY_WITH_NEW_PORT,
"Retrying to start brick %s with new port %d",
brickinfo->path, port);
goto retry;
}
} else {
ret = runner_run_nowait(&runner);
}
if (ret) {
brickinfo->port = 0;
brickinfo->rdma_port = 0;
goto out;
}
ret = glusterd_brickprocess_new(&brick_proc);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICKPROC_NEW_FAILED,
"Failed to create "
"new brick process instance");
goto out;
}
brick_proc->port = brickinfo->port;
cds_list_add_tail(&brick_proc->brick_proc_list, &priv->brick_procs);
brickinfo->brick_proc = brick_proc;
cds_list_add_tail(&brickinfo->mux_bricks, &brick_proc->bricks);
brickinfo->brick_proc = brick_proc;
brick_proc->brick_count++;
connect:
ret = glusterd_brick_connect(volinfo, brickinfo, socketpath);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_DISCONNECTED,
"Failed to connect to brick %s:%s on %s", brickinfo->hostname,
brickinfo->path, socketpath);
goto out;
}
out:
if (ret)
brickinfo->status = GF_BRICK_STOPPED;
return ret;
}
int32_t
glusterd_brick_unlink_socket_file(glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo)
{
char path[PATH_MAX] = "";
char socketpath[PATH_MAX] = "";
xlator_t *this = NULL;
glusterd_conf_t *priv = NULL;
GF_ASSERT(volinfo);
GF_ASSERT(brickinfo);
this = THIS;
GF_ASSERT(this);
priv = this->private;
GLUSTERD_GET_VOLUME_DIR(path, volinfo, priv);
glusterd_set_brick_socket_filepath(volinfo, brickinfo, socketpath,
sizeof(socketpath));
return glusterd_unlink_file(socketpath);
}
int32_t
glusterd_brick_disconnect(glusterd_brickinfo_t *brickinfo)
{
rpc_clnt_t *rpc = NULL;
glusterd_conf_t *priv = THIS->private;
GF_ASSERT(brickinfo);
if (!brickinfo) {
gf_msg_callingfn("glusterd", GF_LOG_WARNING, EINVAL,
GD_MSG_BRICK_NOT_FOUND, "!brickinfo");
return -1;
}
rpc = brickinfo->rpc;
brickinfo->rpc = NULL;
if (rpc) {
glusterd_rpc_clnt_unref(priv, rpc);
}
return 0;
}
static gf_boolean_t
unsafe_option(dict_t *this, char *key, data_t *value, void *arg)
{
/*
* Certain options are safe because they're already being handled other
* ways, such as being copied down to the bricks (all auth options) or
* being made irrelevant (event-threads). All others are suspect and
* must be checked in the next function.
*/
if (fnmatch("*auth*", key, 0) == 0) {
return _gf_false;
}
if (fnmatch("*event-threads", key, 0) == 0) {
return _gf_false;
}
if (fnmatch("*diagnostics.brick-log*", key, 0) == 0) {
return _gf_false;
}
return _gf_true;
}
static int
opts_mismatch(dict_t *dict1, char *key, data_t *value1, void *dict2)
{
data_t *value2 = dict_get(dict2, key);
int32_t min_len;
/*
* If the option is only present on one, we can either look at the
* default or assume a mismatch. Looking at the default is pretty
* hard, because that's part of a structure within each translator and
* there's no dlopen interface to get at it, so we assume a mismatch.
* If the user really wants them to match (and for their bricks to be
* multiplexed, they can always reset the option).
*/
if (!value2) {
gf_log(THIS->name, GF_LOG_DEBUG, "missing option %s", key);
return -1;
}
min_len = MIN(value1->len, value2->len);
if (strncmp(value1->data, value2->data, min_len) != 0) {
gf_log(THIS->name, GF_LOG_DEBUG, "option mismatch, %s, %s != %s", key,
value1->data, value2->data);
return -1;
}
return 0;
}
int
glusterd_brickprocess_delete(glusterd_brick_proc_t *brick_proc)
{
cds_list_del_init(&brick_proc->brick_proc_list);
cds_list_del_init(&brick_proc->bricks);
GF_FREE(brick_proc);
return 0;
}
int
glusterd_brick_process_remove_brick(glusterd_brickinfo_t *brickinfo,
int *last_brick)
{
int ret = -1;
xlator_t *this = NULL;
glusterd_conf_t *priv = NULL;
glusterd_brick_proc_t *brick_proc = NULL;
this = THIS;
GF_VALIDATE_OR_GOTO("glusterd", this, out);
priv = this->private;
GF_VALIDATE_OR_GOTO(this->name, priv, out);
GF_VALIDATE_OR_GOTO(this->name, brickinfo, out);
brick_proc = brickinfo->brick_proc;
if (!brick_proc) {
if (brickinfo->status != GF_BRICK_STARTED) {
/* this function will be called from gluster_pmap_signout and
* glusterd_volume_stop_glusterfs. So it is possible to have
* brick_proc set as null.
*/
ret = 0;
}
goto out;
}
GF_VALIDATE_OR_GOTO(this->name, (brick_proc->brick_count > 0), out);
cds_list_del_init(&brickinfo->mux_bricks);
brick_proc->brick_count--;
/* If all bricks have been removed, delete the brick process */
if (brick_proc->brick_count == 0) {
if (last_brick != NULL)
*last_brick = 1;
ret = glusterd_brickprocess_delete(brick_proc);
if (ret)
goto out;
}
brickinfo->brick_proc = NULL;
ret = 0;
out:
return ret;
}
int
glusterd_brick_process_add_brick(glusterd_brickinfo_t *brickinfo,
glusterd_brickinfo_t *parent_brickinfo)
{
int ret = -1;
xlator_t *this = NULL;
glusterd_conf_t *priv = NULL;
glusterd_brick_proc_t *brick_proc = NULL;
this = THIS;
GF_VALIDATE_OR_GOTO("glusterd", this, out);
priv = this->private;
GF_VALIDATE_OR_GOTO(this->name, priv, out);
GF_VALIDATE_OR_GOTO(this->name, brickinfo, out);
if (!parent_brickinfo) {
ret = glusterd_brick_proc_for_port(brickinfo->port, &brick_proc);
if (ret) {
ret = glusterd_brickprocess_new(&brick_proc);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICKPROC_NEW_FAILED,
"Failed to create "
"new brick process instance");
goto out;
}
brick_proc->port = brickinfo->port;
cds_list_add_tail(&brick_proc->brick_proc_list, &priv->brick_procs);
}
} else {
ret = 0;
brick_proc = parent_brickinfo->brick_proc;
}
cds_list_add_tail(&brickinfo->mux_bricks, &brick_proc->bricks);
brickinfo->brick_proc = brick_proc;
brick_proc->brick_count++;
out:
return ret;
}
/* ret = 0 only when you get a brick process associated with the port
* ret = -1 otherwise
*/
int
glusterd_brick_proc_for_port(int port, glusterd_brick_proc_t **brickprocess)
{
int ret = -1;
xlator_t *this = NULL;
glusterd_conf_t *priv = NULL;
glusterd_brick_proc_t *brick_proc = NULL;
this = THIS;
GF_VALIDATE_OR_GOTO("glusterd", this, out);
priv = this->private;
GF_VALIDATE_OR_GOTO(this->name, priv, out);
cds_list_for_each_entry(brick_proc, &priv->brick_procs, brick_proc_list)
{
if (brick_proc->port == port) {
*brickprocess = brick_proc;
ret = 0;
break;
}
}
out:
return ret;
}
int32_t
glusterd_volume_stop_glusterfs(glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo,
gf_boolean_t del_brick)
{
xlator_t *this = NULL;
glusterd_conf_t *conf = NULL;
int ret = -1;
char *op_errstr = NULL;
char pidfile[PATH_MAX] = "";
int last_brick = -1;
GF_ASSERT(volinfo);
GF_ASSERT(brickinfo);
this = THIS;
GF_ASSERT(this);
conf = this->private;
GF_VALIDATE_OR_GOTO(this->name, conf, out);
ret = 0;
ret = glusterd_brick_process_remove_brick(brickinfo, &last_brick);
if (ret) {
gf_msg_debug(this->name, 0,
"Couldn't remove brick from"
" brick process");
goto out;
}
if (del_brick)
cds_list_del_init(&brickinfo->brick_list);
if (GLUSTERD_STATUS_STARTED == volinfo->status) {
/*
* In a post-multiplexing world, even if we're not actually
* doing any multiplexing, just dropping the RPC connection
* isn't enough. There might be many such connections during
* the brick daemon's lifetime, even if we only consider the
* management RPC port (because tests etc. might be manually
* attaching and detaching bricks). Therefore, we have to send
* an actual signal instead.
*/
if (is_brick_mx_enabled() && last_brick != 1) {
ret = send_attach_req(this, brickinfo->rpc, brickinfo->path, NULL,
NULL, GLUSTERD_BRICK_TERMINATE);
if (ret && brickinfo->status == GF_BRICK_STARTED) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_STOP_FAIL,
"Failed to send"
" detach request for brick %s",
brickinfo->path);
goto out;
}
gf_log(this->name, GF_LOG_INFO,
"Detach request for "
"brick %s:%s is sent successfully",
brickinfo->hostname, brickinfo->path);
} else {
gf_msg_debug(this->name, 0,
"About to stop glusterfsd"
" for brick %s:%s",
brickinfo->hostname, brickinfo->path);
ret = glusterd_brick_terminate(volinfo, brickinfo, NULL, 0,
&op_errstr);
if (ret && brickinfo->status == GF_BRICK_STARTED) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_STOP_FAIL,
"Failed to kill"
" the brick %s",
brickinfo->path);
goto out;
}
if (op_errstr) {
GF_FREE(op_errstr);
}
if (is_brick_mx_enabled()) {
/* In case of brick multiplexing we need to make
* sure the port is cleaned up from here as the
* RPC connection may not have been originated
* for the same brick instance
*/
pmap_registry_remove(THIS, brickinfo->port, brickinfo->path,
GF_PMAP_PORT_BRICKSERVER, NULL, _gf_true);
}
}
(void)glusterd_brick_disconnect(brickinfo);
ret = 0;
}
GLUSTERD_GET_BRICK_PIDFILE(pidfile, volinfo, brickinfo, conf);
gf_msg_debug(this->name, 0, "Unlinking pidfile %s", pidfile);
(void)sys_unlink(pidfile);
brickinfo->status = GF_BRICK_STOPPED;
brickinfo->start_triggered = _gf_false;
brickinfo->brick_proc = NULL;
if (del_brick)
glusterd_delete_brick(volinfo, brickinfo);
out:
return ret;
}
/* Free LINE[0..N-1] and then the LINE buffer. */
static void
free_lines(char **line, size_t n)
{
size_t i;
for (i = 0; i < n; i++)
GF_FREE(line[i]);
GF_FREE(line);
}
char **
glusterd_readin_file(const char *filepath, int *line_count)
{
int ret = -1;
int n = 8;
int counter = 0;
char buffer[PATH_MAX + 256] = "";
char **lines = NULL;
FILE *fp = NULL;
void *p;
fp = fopen(filepath, "r");
if (!fp)
goto out;
lines = GF_CALLOC(1, n * sizeof(*lines), gf_gld_mt_charptr);
if (!lines)
goto out;
for (counter = 0; fgets(buffer, sizeof(buffer), fp); counter++) {
if (counter == n - 1) {
n *= 2;
p = GF_REALLOC(lines, n * sizeof(char *));
if (!p) {
free_lines(lines, n / 2);
lines = NULL;
goto out;
}
lines = p;
}
lines[counter] = gf_strdup(buffer);
}
lines[counter] = NULL;
/* Reduce allocation to minimal size. */
p = GF_REALLOC(lines, (counter + 1) * sizeof(char *));
if (!p) {
free_lines(lines, counter);
lines = NULL;
goto out;
}
lines = p;
*line_count = counter;
ret = 0;
out:
if (ret)
gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_READIN_FILE_FAILED, "%s",
strerror(errno));
if (fp)
fclose(fp);
return lines;
}
int
glusterd_compare_lines(const void *a, const void *b)
{
return strcmp(*(char *const *)a, *(char *const *)b);
}
int
glusterd_sort_and_redirect(const char *src_filepath, int dest_fd)
{
int ret = -1;
int line_count = 0;
int counter = 0;
char **lines = NULL;
if (!src_filepath || dest_fd < 0)
goto out;
lines = glusterd_readin_file(src_filepath, &line_count);
if (!lines)
goto out;
qsort(lines, line_count, sizeof(*lines), glusterd_compare_lines);
for (counter = 0; lines[counter]; counter++) {
ret = sys_write(dest_fd, lines[counter], strlen(lines[counter]));
if (ret < 0)
goto out;
GF_FREE(lines[counter]);
}
ret = 0;
out:
GF_FREE(lines);
return ret;
}
int
glusterd_volume_compute_cksum(glusterd_volinfo_t *volinfo, char *cksum_path,
char *filepath, gf_boolean_t is_quota_conf,
uint32_t *cs)
{
int32_t ret = -1;
uint32_t cksum = 0;
int fd = -1;
int sort_fd = 0;
char sort_filepath[PATH_MAX] = "";
char *cksum_path_final = NULL;
char buf[4096] = "";
gf_boolean_t unlink_sortfile = _gf_false;
glusterd_conf_t *priv = NULL;
xlator_t *this = NULL;
mode_t orig_umask = 0;
GF_ASSERT(volinfo);
this = THIS;
priv = THIS->private;
GF_ASSERT(priv);
fd = open(cksum_path, O_RDWR | O_APPEND | O_CREAT | O_TRUNC, 0600);
if (-1 == fd) {
gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
"Unable to open %s,"
" errno: %d",
cksum_path, errno);
ret = -1;
goto out;
}
if (!is_quota_conf) {
snprintf(sort_filepath, sizeof(sort_filepath), "/tmp/%s.XXXXXX",
volinfo->volname);
orig_umask = umask(S_IRWXG | S_IRWXO);
sort_fd = mkstemp(sort_filepath);
umask(orig_umask);
if (sort_fd < 0) {
gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
"Could not generate "
"temp file, reason: %s for volume: %s",
strerror(errno), volinfo->volname);
goto out;
} else {
unlink_sortfile = _gf_true;
}
/* sort the info file, result in sort_filepath */
ret = glusterd_sort_and_redirect(filepath, sort_fd);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED,
"sorting info file "
"failed");
goto out;
}
ret = sys_close(sort_fd);
if (ret)
goto out;
}
cksum_path_final = is_quota_conf ? filepath : sort_filepath;
ret = get_checksum_for_path(cksum_path_final, &cksum);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CKSUM_GET_FAIL,
"unable to get "
"checksum for path: %s",
cksum_path_final);
goto out;
}
if (!is_quota_conf) {
snprintf(buf, sizeof(buf), "%s=%u\n", "info", cksum);
ret = sys_write(fd, buf, strlen(buf));
if (ret <= 0) {
ret = -1;
goto out;
}
}
ret = get_checksum_for_file(fd, &cksum);
if (ret)
goto out;
*cs = cksum;
out:
if (fd != -1)
sys_close(fd);
if (unlink_sortfile)
sys_unlink(sort_filepath);
gf_msg_debug(this->name, 0, "Returning with %d", ret);
return ret;
}
int
glusterd_compute_cksum(glusterd_volinfo_t *volinfo, gf_boolean_t is_quota_conf)
{
int ret = -1;
uint32_t cs = 0;
char cksum_path[PATH_MAX] = "";
char path[PATH_MAX] = "";
char filepath[PATH_MAX] = "";
glusterd_conf_t *conf = NULL;
xlator_t *this = NULL;
int32_t len1 = 0;
int32_t len2 = 0;
this = THIS;
GF_ASSERT(this);
conf = this->private;
GF_ASSERT(conf);
GLUSTERD_GET_VOLUME_DIR(path, volinfo, conf);
if (is_quota_conf) {
len1 = snprintf(cksum_path, sizeof(cksum_path), "%s/%s", path,
GLUSTERD_VOL_QUOTA_CKSUM_FILE);
len2 = snprintf(filepath, sizeof(filepath), "%s/%s", path,
GLUSTERD_VOLUME_QUOTA_CONFIG);
} else {
len1 = snprintf(cksum_path, sizeof(cksum_path), "%s/%s", path,
GLUSTERD_CKSUM_FILE);
len2 = snprintf(filepath, sizeof(filepath), "%s/%s", path,
GLUSTERD_VOLUME_INFO_FILE);
}
if ((len1 < 0) || (len2 < 0) || (len1 >= sizeof(cksum_path)) ||
(len2 >= sizeof(filepath))) {
goto out;
}
ret = glusterd_volume_compute_cksum(volinfo, cksum_path, filepath,
is_quota_conf, &cs);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CKSUM_COMPUTE_FAIL,
"Failed to compute checksum "
"for volume %s",
volinfo->volname);
goto out;
}
if (is_quota_conf)
volinfo->quota_conf_cksum = cs;
else
volinfo->cksum = cs;
ret = 0;
out:
return ret;
}
int
_add_dict_to_prdict(dict_t *this, char *key, data_t *value, void *data)
{
glusterd_dict_ctx_t *ctx = NULL;
char optkey[512] = "";
int ret = -1;
ctx = data;
ret = snprintf(optkey, sizeof(optkey), "%s.%s%d", ctx->prefix,
ctx->key_name, ctx->opt_count);
ret = dict_set_strn(ctx->dict, optkey, ret, key);
if (ret)
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
"option add for %s%d %s", ctx->key_name, ctx->opt_count, key);
ret = snprintf(optkey, sizeof(optkey), "%s.%s%d", ctx->prefix,
ctx->val_name, ctx->opt_count);
ret = dict_set_strn(ctx->dict, optkey, ret, value->data);
if (ret)
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
"option add for %s%d %s", ctx->val_name, ctx->opt_count,
value->data);
ctx->opt_count++;
return ret;
}
int32_t
glusterd_add_bricks_hname_path_to_dict(dict_t *dict,
glusterd_volinfo_t *volinfo)
{
glusterd_brickinfo_t *brickinfo = NULL;
int ret = 0;
char key[64] = "";
int index = 0;
cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
{
ret = snprintf(key, sizeof(key), "%d-hostname", index);
ret = dict_set_strn(dict, key, ret, brickinfo->hostname);
if (ret)
goto out;
ret = snprintf(key, sizeof(key), "%d-path", index);
ret = dict_set_strn(dict, key, ret, brickinfo->path);
if (ret)
goto out;
index++;
}
out:
return ret;
}
/* The prefix represents the type of volume to be added.
* It will be "volume" for normal volumes, and snap# like
* snap1, snap2, for snapshot volumes
*/
int32_t
glusterd_add_volume_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,
int32_t count, char *prefix)
{
int32_t ret = -1;
char pfx[512] = "";
char key[512] = "";
int keylen;
glusterd_brickinfo_t *brickinfo = NULL;
int32_t i = 1;
char *volume_id_str = NULL;
char *str = NULL;
glusterd_dict_ctx_t ctx = {0};
char *rebalance_id_str = NULL;
char *rb_id_str = NULL;
xlator_t *this = NULL;
this = THIS;
GF_ASSERT(this);
GF_ASSERT(dict);
GF_ASSERT(volinfo);
GF_ASSERT(prefix);
keylen = snprintf(key, sizeof(key), "%s%d.name", prefix, count);
ret = dict_set_strn(dict, key, keylen, volinfo->volname);
if (ret)
goto out;
keylen = snprintf(key, sizeof(key), "%s%d.type", prefix, count);
ret = dict_set_int32n(dict, key, keylen, volinfo->type);
if (ret)
goto out;
keylen = snprintf(key, sizeof(key), "%s%d.brick_count", prefix, count);
ret = dict_set_int32n(dict, key, keylen, volinfo->brick_count);
if (ret)
goto out;
keylen = snprintf(key, sizeof(key), "%s%d.version", prefix, count);
ret = dict_set_int32n(dict, key, keylen, volinfo->version);
if (ret)
goto out;
keylen = snprintf(key, sizeof(key), "%s%d.status", prefix, count);
ret = dict_set_int32n(dict, key, keylen, volinfo->status);
if (ret)
goto out;
keylen = snprintf(key, sizeof(key), "%s%d.sub_count", prefix, count);
ret = dict_set_int32n(dict, key, keylen, volinfo->sub_count);
if (ret)
goto out;
keylen = snprintf(key, sizeof(key), "%s%d.stripe_count", prefix, count);
ret = dict_set_int32n(dict, key, keylen, volinfo->stripe_count);
if (ret)
goto out;
keylen = snprintf(key, sizeof(key), "%s%d.replica_count", prefix, count);
ret = dict_set_int32n(dict, key, keylen, volinfo->replica_count);
if (ret)
goto out;
keylen = snprintf(key, sizeof(key), "%s%d.arbiter_count", prefix, count);
ret = dict_set_int32n(dict, key, keylen, volinfo->arbiter_count);
if (ret)
goto out;
keylen = snprintf(key, sizeof(key), "%s%d.disperse_count", prefix, count);
ret = dict_set_int32n(dict, key, keylen, volinfo->disperse_count);
if (ret)
goto out;
keylen = snprintf(key, sizeof(key), "%s%d.redundancy_count", prefix, count);
ret = dict_set_int32n(dict, key, keylen, volinfo->redundancy_count);
if (ret)
goto out;
keylen = snprintf(key, sizeof(key), "%s%d.dist_count", prefix, count);
ret = dict_set_int32n(dict, key, keylen, volinfo->dist_leaf_count);
if (ret)
goto out;
snprintf(key, sizeof(key), "%s%d.ckusm", prefix, count);
ret = dict_set_int64(dict, key, volinfo->cksum);
if (ret)
goto out;
snprintf(key, sizeof(key), "%s%d.transport_type", prefix, count);
ret = dict_set_uint32(dict, key, volinfo->transport_type);
if (ret)
goto out;
snprintf(key, sizeof(key), "%s%d.stage_deleted", prefix, count);
ret = dict_set_uint32(dict, key, (uint32_t)volinfo->stage_deleted);
if (ret)
goto out;
/* tiering related variables */
snprintf(key, sizeof(key), "%s%d.cold_brick_count", prefix, count);
ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_brick_count);
if (ret)
goto out;
snprintf(key, sizeof(key), "%s%d.cold_type", prefix, count);
ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_type);
if (ret)
goto out;
snprintf(key, sizeof(key), "%s%d.cold_replica_count", prefix, count);
ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_replica_count);
if (ret)
goto out;
snprintf(key, sizeof(key), "%s%d.cold_disperse_count", prefix, count);
ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_disperse_count);
if (ret)
goto out;
snprintf(key, sizeof(key), "%s%d.cold_redundancy_count", prefix, count);
ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_redundancy_count);
if (ret)
goto out;
snprintf(key, sizeof(key), "%s%d.cold_dist_count", prefix, count);
ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_dist_leaf_count);
if (ret)
goto out;
snprintf(key, sizeof(key), "%s%d.hot_brick_count", prefix, count);
ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_brick_count);
if (ret)
goto out;
snprintf(key, sizeof(key), "%s%d.hot_type", prefix, count);
ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_type);
if (ret)
goto out;
snprintf(key, sizeof(key), "%s%d.hot_replica_count", prefix, count);
ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_replica_count);
if (ret)
goto out;
snprintf(key, sizeof(key), "%s%d", prefix, count);
ret = gd_add_vol_snap_details_to_dict(dict, key, volinfo);
if (ret)
goto out;
volume_id_str = gf_strdup(uuid_utoa(volinfo->volume_id));
if (!volume_id_str) {
ret = -1;
goto out;
}
keylen = snprintf(key, sizeof(key), "%s%d.volume_id", prefix, count);
ret = dict_set_dynstrn(dict, key, keylen, volume_id_str);
if (ret)
goto out;
volume_id_str = NULL;
keylen = snprintf(key, sizeof(key), "%s%d.username", prefix, count);
str = glusterd_auth_get_username(volinfo);
if (str) {
ret = dict_set_dynstrn(dict, key, keylen, gf_strdup(str));
if (ret)
goto out;
}
keylen = snprintf(key, sizeof(key), "%s%d.password", prefix, count);
str = glusterd_auth_get_password(volinfo);
if (str) {
ret = dict_set_dynstrn(dict, key, keylen, gf_strdup(str));
if (ret)
goto out;
}
keylen = snprintf(key, sizeof(key), "%s%d.rebalance", prefix, count);
ret = dict_set_int32n(dict, key, keylen, volinfo->rebal.defrag_cmd);
if (ret)
goto out;
rebalance_id_str = gf_strdup(uuid_utoa(volinfo->rebal.rebalance_id));
if (!rebalance_id_str) {
ret = -1;
goto out;
}
keylen = snprintf(key, sizeof(key), "%s%d.rebalance-id", prefix, count);
ret = dict_set_dynstrn(dict, key, keylen, rebalance_id_str);
if (ret)
goto out;
rebalance_id_str = NULL;
snprintf(key, sizeof(key), "%s%d.rebalance-op", prefix, count);
ret = dict_set_uint32(dict, key, volinfo->rebal.op);
if (ret)
goto out;
if (volinfo->rebal.dict) {
snprintf(pfx, sizeof(pfx), "%s%d", prefix, count);
ctx.dict = dict;
ctx.prefix = pfx;
ctx.opt_count = 1;
ctx.key_name = "rebal-dict-key";
ctx.val_name = "rebal-dict-value";
dict_foreach(volinfo->rebal.dict, _add_dict_to_prdict, &ctx);
ctx.opt_count--;
keylen = snprintf(key, sizeof(key), "volume%d.rebal-dict-count", count);
ret = dict_set_int32n(dict, key, keylen, ctx.opt_count);
if (ret)
goto out;
}
snprintf(pfx, sizeof(pfx), "%s%d", prefix, count);
ctx.dict = dict;
ctx.prefix = pfx;
ctx.opt_count = 1;
ctx.key_name = "key";
ctx.val_name = "value";
GF_ASSERT(volinfo->dict);
dict_foreach(volinfo->dict, _add_dict_to_prdict, &ctx);
ctx.opt_count--;
keylen = snprintf(key, sizeof(key), "%s%d.opt-count", prefix, count);
ret = dict_set_int32n(dict, key, keylen, ctx.opt_count);
if (ret)
goto out;
ctx.dict = dict;
ctx.prefix = pfx;
ctx.opt_count = 1;
ctx.key_name = "slave-num";
ctx.val_name = "slave-val";
GF_ASSERT(volinfo->gsync_slaves);
dict_foreach(volinfo->gsync_slaves, _add_dict_to_prdict, &ctx);
ctx.opt_count--;
keylen = snprintf(key, sizeof(key), "%s%d.gsync-count", prefix, count);
ret = dict_set_int32n(dict, key, keylen, ctx.opt_count);
if (ret)
goto out;
cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
{
keylen = snprintf(key, sizeof(key), "%s%d.brick%d.hostname", prefix,
count, i);
ret = dict_set_strn(dict, key, keylen, brickinfo->hostname);
if (ret)
goto out;
keylen = snprintf(key, sizeof(key), "%s%d.brick%d.path", prefix, count,
i);
ret = dict_set_strn(dict, key, keylen, brickinfo->path);
if (ret)
goto out;
keylen = snprintf(key, sizeof(key), "%s%d.brick%d.decommissioned",
prefix, count, i);
ret = dict_set_int32n(dict, key, keylen, brickinfo->decommissioned);
if (ret)
goto out;
keylen = snprintf(key, sizeof(key), "%s%d.brick%d.brick_id", prefix,
count, i);
ret = dict_set_strn(dict, key, keylen, brickinfo->brick_id);
if (ret)
goto out;
snprintf(key, sizeof(key), "%s%d.brick%d.uuid", prefix, count, i);
ret = dict_set_dynstr_with_alloc(dict, key, uuid_utoa(brickinfo->uuid));
if (ret)
goto out;
snprintf(key, sizeof(key), "%s%d.brick%d", prefix, count, i);
ret = gd_add_brick_snap_details_to_dict(dict, key, brickinfo);
if (ret)
goto out;
i++;
}
/* Add volume op-versions to dict. This prevents volume inconsistencies
* in the cluster
*/
keylen = snprintf(key, sizeof(key), "%s%d.op-version", prefix, count);
ret = dict_set_int32n(dict, key, keylen, volinfo->op_version);
if (ret)
goto out;
keylen = snprintf(key, sizeof(key), "%s%d.client-op-version", prefix,
count);
ret = dict_set_int32n(dict, key, keylen, volinfo->client_op_version);
if (ret)
goto out;
/*Add volume Capability (BD Xlator) to dict*/
keylen = snprintf(key, sizeof(key), "%s%d.caps", prefix, count);
ret = dict_set_int32n(dict, key, keylen, volinfo->caps);
keylen = snprintf(key, sizeof(key), "%s%d.quota-xattr-version", prefix,
count);
ret = dict_set_int32n(dict, key, keylen, volinfo->quota_xattr_version);
out:
GF_FREE(volume_id_str);
GF_FREE(rebalance_id_str);
GF_FREE(rb_id_str);
gf_msg_debug(this->name, 0, "Returning with %d", ret);
return ret;
}
/* The prefix represents the type of volume to be added.
* It will be "volume" for normal volumes, and snap# like
* snap1, snap2, for snapshot volumes
*/
int
glusterd_vol_add_quota_conf_to_dict(glusterd_volinfo_t *volinfo, dict_t *load,
int vol_idx, char *prefix)
{
int fd = -1;
unsigned char buf[16] = "";
char key[PATH_MAX] = "";
int gfid_idx = 0;
int ret = -1;
xlator_t *this = NULL;
char type = 0;
float version = 0.0f;
this = THIS;
GF_ASSERT(this);
GF_ASSERT(prefix);
ret = glusterd_store_create_quota_conf_sh_on_absence(volinfo);
if (ret)
goto out;
fd = open(volinfo->quota_conf_shandle->path, O_RDONLY);
if (fd == -1) {
ret = -1;
goto out;
}
ret = quota_conf_read_version(fd, &version);
if (ret)
goto out;
for (gfid_idx = 0;; gfid_idx++) {
ret = quota_conf_read_gfid(fd, buf, &type, version);
if (ret == 0) {
break;
} else if (ret < 0) {
gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_QUOTA_CONF_CORRUPT,
"Quota "
"configuration store may be corrupt.");
goto out;
}
snprintf(key, sizeof(key) - 1, "%s%d.gfid%d", prefix, vol_idx,
gfid_idx);
ret = dict_set_dynstr_with_alloc(load, key, uuid_utoa(buf));
if (ret)
goto out;
snprintf(key, sizeof(key) - 1, "%s%d.gfid-type%d", prefix, vol_idx,
gfid_idx);
ret = dict_set_int8(load, key, type);
if (ret)
goto out;
}
ret = snprintf(key, sizeof(key), "%s%d.gfid-count", prefix, vol_idx);
ret = dict_set_int32n(load, key, ret, gfid_idx);
if (ret)
goto out;
snprintf(key, sizeof(key), "%s%d.quota-cksum", prefix, vol_idx);
ret = dict_set_uint32(load, key, volinfo->quota_conf_cksum);
if (ret)
goto out;
snprintf(key, sizeof(key), "%s%d.quota-version", prefix, vol_idx);
ret = dict_set_uint32(load, key, volinfo->quota_conf_version);
if (ret)
goto out;
ret = 0;
out:
if (fd != -1)
sys_close(fd);
return ret;
}
int32_t
glusterd_add_volumes_to_export_dict(dict_t **peer_data)
{
int32_t ret = -1;
dict_t *dict = NULL;
glusterd_conf_t *priv = NULL;
glusterd_volinfo_t *volinfo = NULL;
int32_t count = 0;
glusterd_dict_ctx_t ctx = {0};
xlator_t *this = NULL;
this = THIS;
GF_ASSERT(this);
priv = this->private;
GF_ASSERT(priv);
dict = dict_new();
if (!dict)
goto out;
cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
{
count++;
ret = glusterd_add_volume_to_dict(volinfo, dict, count, "volume");
if (ret)
goto out;
if (!glusterd_is_volume_quota_enabled(volinfo))
continue;
ret = glusterd_vol_add_quota_conf_to_dict(volinfo, dict, count,
"volume");
if (ret)
goto out;
}
ret = dict_set_int32n(dict, "count", SLEN("count"), count);
if (ret)
goto out;
ctx.dict = dict;
ctx.prefix = "global";
ctx.opt_count = 1;
ctx.key_name = "key";
ctx.val_name = "val";
dict_foreach(priv->opts, _add_dict_to_prdict, &ctx);
ctx.opt_count--;
ret = dict_set_int32n(dict, "global-opt-count", SLEN("global-opt-count"),
ctx.opt_count);
if (ret)
goto out;
*peer_data = dict;
out:
if (ret)
dict_unref(dict);
gf_msg_trace(this->name, 0, "Returning %d", ret);
return ret;
}
int32_t
glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
int32_t *status, char *hostname)
{
int32_t ret = -1;
char key[64] = "";
int keylen;
glusterd_volinfo_t *volinfo = NULL;
char *volname = NULL;
uint32_t cksum = 0;
uint32_t quota_cksum = 0;
uint32_t quota_version = 0;
uint32_t stage_deleted = 0;
int32_t version = 0;
xlator_t *this = NULL;
GF_ASSERT(peer_data);
GF_ASSERT(status);
this = THIS;
GF_ASSERT(this);
keylen = snprintf(key, sizeof(key), "volume%d.name", count);
ret = dict_get_strn(peer_data, key, keylen, &volname);
if (ret)
goto out;
ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
snprintf(key, sizeof(key), "volume%d.stage_deleted", count);
ret = dict_get_uint32(peer_data, key, &stage_deleted);
/* stage_deleted = 1 means the volume is still in the process of
* deleting a volume, so we shouldn't be trying to create a
* fresh volume here which would lead to a stale entry
*/
if (stage_deleted == 0)
*status = GLUSTERD_VOL_COMP_UPDATE_REQ;
ret = 0;
goto out;
}
keylen = snprintf(key, sizeof(key), "volume%d.version", count);
ret = dict_get_int32n(peer_data, key, keylen, &version);
if (ret)
goto out;
if (version > volinfo->version) {
// Mismatch detected
ret = 0;
gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_VOL_VERS_MISMATCH,
"Version of volume %s differ. local version = %d, "
"remote version = %d on peer %s",
volinfo->volname, volinfo->version, version, hostname);
*status = GLUSTERD_VOL_COMP_UPDATE_REQ;
goto out;
} else if (version < volinfo->version) {
*status = GLUSTERD_VOL_COMP_SCS;
goto out;
}
// Now, versions are same, compare cksums.
//
snprintf(key, sizeof(key), "volume%d.ckusm", count);
ret = dict_get_uint32(peer_data, key, &cksum);
if (ret)
goto out;
if (cksum != volinfo->cksum) {
ret = 0;
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CKSUM_VERS_MISMATCH,
"Version of Cksums %s differ. local cksum = %u, remote "
"cksum = %u on peer %s",
volinfo->volname, volinfo->cksum, cksum, hostname);
*status = GLUSTERD_VOL_COMP_RJT;
goto out;
}
snprintf(key, sizeof(key), "volume%d.quota-version", count);
ret = dict_get_uint32(peer_data, key, "a_version);
if (ret) {
gf_msg_debug(this->name, 0,
"quota-version key absent for"
" volume %s in peer %s's response",
volinfo->volname, hostname);
ret = 0;
} else {
if (quota_version > volinfo->quota_conf_version) {
// Mismatch detected
ret = 0;
gf_msg(this->name, GF_LOG_INFO, 0,
GD_MSG_QUOTA_CONFIG_VERS_MISMATCH,
"Quota configuration versions of volume %s "
"differ. local version = %d, remote version = "
"%d on peer %s",
volinfo->volname, volinfo->quota_conf_version, quota_version,
hostname);
*status = GLUSTERD_VOL_COMP_UPDATE_REQ;
goto out;
} else if (quota_version < volinfo->quota_conf_version) {
*status = GLUSTERD_VOL_COMP_SCS;
goto out;
}
}
// Now, versions are same, compare cksums.
//
snprintf(key, sizeof(key), "volume%d.quota-cksum", count);
ret = dict_get_uint32(peer_data, key, "a_cksum);
if (ret) {
gf_msg_debug(this->name, 0,
"quota checksum absent for "
"volume %s in peer %s's response",
volinfo->volname, hostname);
ret = 0;
} else {
if (quota_cksum != volinfo->quota_conf_cksum) {
ret = 0;
gf_msg(this->name, GF_LOG_ERROR, 0,
GD_MSG_QUOTA_CONFIG_CKSUM_MISMATCH,
"Cksums of "
"quota configuration of volume %s differ. local"
" cksum = %u, remote cksum = %u on peer %s",
volinfo->volname, volinfo->quota_conf_cksum, quota_cksum,
hostname);
*status = GLUSTERD_VOL_COMP_RJT;
goto out;
}
}
*status = GLUSTERD_VOL_COMP_SCS;
out:
keylen = snprintf(key, sizeof(key), "volume%d.update", count);
if (*status == GLUSTERD_VOL_COMP_UPDATE_REQ) {
ret = dict_set_int32n(peer_data, key, keylen, 1);
} else {
ret = dict_set_int32n(peer_data, key, keylen, 0);
}
if (*status == GLUSTERD_VOL_COMP_RJT) {
gf_event(EVENT_COMPARE_FRIEND_VOLUME_FAILED, "volume=%s",
volinfo->volname);
}
gf_msg_debug(this->name, 0, "Returning with ret: %d, status: %d", ret,
*status);
return ret;
}
static int32_t
import_prdict_dict(dict_t *peer_data, dict_t *dst_dict, char *key_prefix,
char *value_prefix, int opt_count, char *prefix)
{
char key[512] = "";
int keylen;
int32_t ret = 0;
int i = 1;
char *opt_key = NULL;
char *opt_val = NULL;
char *dup_opt_val = NULL;
char msg[2048] = "";
while (i <= opt_count) {
keylen = snprintf(key, sizeof(key), "%s.%s%d", prefix, key_prefix, i);
ret = dict_get_strn(peer_data, key, keylen, &opt_key);
if (ret) {
snprintf(msg, sizeof(msg),
"Volume dict key not "
"specified");
goto out;
}
keylen = snprintf(key, sizeof(key), "%s.%s%d", prefix, value_prefix, i);
ret = dict_get_strn(peer_data, key, keylen, &opt_val);
if (ret) {
snprintf(msg, sizeof(msg),
"Volume dict value not "
"specified");
goto out;
}
dup_opt_val = gf_strdup(opt_val);
if (!dup_opt_val) {
ret = -1;
goto out;
}
ret = dict_set_dynstr(dst_dict, opt_key, dup_opt_val);
if (ret) {
snprintf(msg, sizeof(msg),
"Volume set %s %s "
"unsuccessful",
opt_key, dup_opt_val);
goto out;
}
i++;
}
out:
if (msg[0])
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_IMPORT_PRDICT_DICT, "%s",
msg);
gf_msg_debug("glusterd", 0, "Returning with %d", ret);
return ret;
}
int
glusterd_spawn_daemons(void *opaque)
{
glusterd_conf_t *conf = THIS->private;
int ret = -1;
synclock_lock(&conf->big_lock);
glusterd_restart_bricks();
glusterd_restart_gsyncds(conf);
glusterd_restart_rebalance(conf);
ret = glusterd_snapdsvc_restart();
ret = glusterd_tierdsvc_restart();
ret = glusterd_gfproxydsvc_restart();
return ret;
}
int32_t
glusterd_import_friend_volume_opts(dict_t *peer_data, int count,
glusterd_volinfo_t *volinfo, char *prefix)
{
char key[512] = "";
int keylen;
int32_t ret = -1;
int opt_count = 0;
char msg[2048] = "";
char volume_prefix[1024] = "";
GF_ASSERT(peer_data);
GF_ASSERT(volinfo);
keylen = snprintf(key, sizeof(key), "%s%d.opt-count", prefix, count);
ret = dict_get_int32n(peer_data, key, keylen, &opt_count);
if (ret) {
snprintf(msg, sizeof(msg),
"Volume option count not "
"specified for %s",
volinfo->volname);
goto out;
}
snprintf(volume_prefix, sizeof(volume_prefix), "%s%d", prefix, count);
ret = import_prdict_dict(peer_data, volinfo->dict, "key", "value",
opt_count, volume_prefix);
if (ret) {
snprintf(msg, sizeof(msg),
"Unable to import options dict "
"specified for %s",
volinfo->volname);
goto out;
}
keylen = snprintf(key, sizeof(key), "%s%d.gsync-count", prefix, count);
|