From b13c483dca20e4015b958f8959328e665a357f60 Mon Sep 17 00:00:00 2001 From: Avra Sengupta Date: Sat, 1 Jun 2013 16:17:57 +0530 Subject: gsyncd: distribute the crawling load * also consume changelog for change detection. * Status fixes * Use new libgfchangelog done API * process (and sync) one changelog at a time Change-Id: I24891615bb762e0741b1819ddfdef8802326cb16 BUG: 847839 Original Author: Csaba Henk Original Author: Aravinda VK Original Author: Venky Shankar Original Author: Amar Tumballi Original Author: Avra Sengupta Signed-off-by: Avra Sengupta Reviewed-on: http://review.gluster.org/5131 Reviewed-by: Vijay Bellur Tested-by: Vijay Bellur --- extras/geo-rep/Makefile.am | 2 + extras/geo-rep/generate-gfid-file.sh | 53 +++++++++++++++ extras/geo-rep/get-gfid.sh | 7 ++ extras/geo-rep/gsync-sync-gfid.c | 106 ++++++++++++++++++++++++++++++ extras/geo-rep/gsync-upgrade.sh | 123 +++++++++++++++++++++++++++++++++++ extras/geo-rep/slave-upgrade.sh | 102 +++++++++++++++++++++++++++++ 6 files changed, 393 insertions(+) create mode 100644 extras/geo-rep/Makefile.am create mode 100644 extras/geo-rep/generate-gfid-file.sh create mode 100755 extras/geo-rep/get-gfid.sh create mode 100644 extras/geo-rep/gsync-sync-gfid.c create mode 100644 extras/geo-rep/gsync-upgrade.sh create mode 100644 extras/geo-rep/slave-upgrade.sh (limited to 'extras/geo-rep') diff --git a/extras/geo-rep/Makefile.am b/extras/geo-rep/Makefile.am new file mode 100644 index 0000000000..fc5f56d54f --- /dev/null +++ b/extras/geo-rep/Makefile.am @@ -0,0 +1,2 @@ +EXTRA_DIST = gsync-sync-gfid.c gsync-upgrade.sh generate-gfid-file.sh \ + get-gfid.sh slave-upgrade.sh diff --git a/extras/geo-rep/generate-gfid-file.sh b/extras/geo-rep/generate-gfid-file.sh new file mode 100644 index 0000000000..c6739fbf14 --- /dev/null +++ b/extras/geo-rep/generate-gfid-file.sh @@ -0,0 +1,53 @@ +#!/bin/bash +#Usage: generate-gfid-file.sh + +function get_gfids() +{ + GET_GFID_CMD=$1 + OUTPUT_FILE=$2 + find . -exec $GET_GFID_CMD {} \; > $OUTPUT_FILE +} + +function mount_client() +{ + local T; # temporary mount + local i; # inode number + + VOLFILE_SERVER=$1; + VOLUME=$2; + GFID_CMD=$3; + OUTPUT=$4; + + T=$(mktemp -d); + + glusterfs -s $VOLFILE_SERVER --volfile-id $VOLUME $T; + + i=$(stat -c '%i' $T); + + [ "x$i" = "x1" ] || fatal "could not mount volume $MASTER on $T"; + + cd $T; + + get_gfids $GFID_CMD $OUTPUT + + cd -; + + umount $T || fatal "could not umount $MASTER from $T"; + + rmdir $T || warn "rmdir of $T failed"; +} + + +function main() +{ + SLAVE=$1 + GET_GFID_CMD=$2 + OUTPUT=$3 + + VOLFILE_SERVER=`echo $SLAVE | sed -e 's/\(.*\):.*/\1/'` + VOLUME_NAME=`echo $SLAVE | sed -e 's/.*:\(.*\)/\1/'` + + mount_client $VOLFILE_SERVER $VOLUME_NAME $GET_GFID_CMD $OUTPUT +} + +main "$@"; diff --git a/extras/geo-rep/get-gfid.sh b/extras/geo-rep/get-gfid.sh new file mode 100755 index 0000000000..a4d609b0bc --- /dev/null +++ b/extras/geo-rep/get-gfid.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +ATTR_STR=`getfattr -h $1 -n glusterfs.gfid.string` +GLFS_PATH=`echo $ATTR_STR | sed -e 's/# file: \(.*\) glusterfs.gfid.string*/\1/g'` +GFID=`echo $ATTR_STR | sed -e 's/.*glusterfs.gfid.string="\(.*\)"/\1/g'` + +echo "$GFID $GLFS_PATH" diff --git a/extras/geo-rep/gsync-sync-gfid.c b/extras/geo-rep/gsync-sync-gfid.c new file mode 100644 index 0000000000..601f4720e2 --- /dev/null +++ b/extras/geo-rep/gsync-sync-gfid.c @@ -0,0 +1,106 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef UUID_CANONICAL_FORM_LEN +#define UUID_CANONICAL_FORM_LEN 36 +#endif + +#ifndef GF_FUSE_AUX_GFID_HEAL +#define GF_FUSE_AUX_GFID_HEAL "glusterfs.gfid.heal" +#endif + +#define GLFS_LINE_MAX (PATH_MAX + (2 * UUID_CANONICAL_FORM_LEN)) + +int +main (int argc, char *argv[]) +{ + char *file = NULL; + char *tmp = NULL; + char *tmp1 = NULL; + char *parent_dir = NULL; + char *gfid = NULL; + char *bname = NULL; + int ret = -1; + int len = 0; + FILE *fp = NULL; + char line[GLFS_LINE_MAX] = {0,}; + char *path = NULL; + void *blob = NULL; + void *tmp_blob = NULL; + + if (argc != 2) { + /* each line in the file has the following format + * uuid-in-canonical-form path-relative-to-gluster-mount. + * Both uuid and relative path are from master mount. + */ + fprintf (stderr, "usage: %s \n", + argv[0]); + goto out; + } + + file = argv[1]; + + fp = fopen (file, "r"); + if (fp == NULL) { + fprintf (stderr, "cannot open %s for reading (%s)\n", + file, strerror (errno)); + goto out; + } + + while (fgets (line, GLFS_LINE_MAX, fp) != NULL) { + tmp = line; + path = gfid = line; + + path += UUID_CANONICAL_FORM_LEN + 1; + + while(isspace (*path)) + path++; + + if ((strlen (line) < GLFS_LINE_MAX) && + (line[strlen (line) - 1] == '\n')) + line[strlen (line) - 1] = '\0'; + + line[UUID_CANONICAL_FORM_LEN] = '\0'; + + tmp = strdup (path); + tmp1 = strdup (path); + parent_dir = dirname (tmp); + bname = basename (tmp1); + + /* gfid + '\0' + bname + '\0' */ + len = UUID_CANONICAL_FORM_LEN + 1 + strlen (bname) + 1; + + blob = calloc (1, len); + + memcpy (blob, gfid, UUID_CANONICAL_FORM_LEN); + + tmp_blob = blob + UUID_CANONICAL_FORM_LEN + 1; + + memcpy (tmp_blob, bname, strlen (bname)); + + ret = setxattr (parent_dir, GF_FUSE_AUX_GFID_HEAL, blob, len, + 0); + if (ret < 0) { + fprintf (stderr, "setxattr on %s/%s failed (%s)\n", + parent_dir, bname, strerror (errno)); + } + memset (line, 0, GLFS_LINE_MAX); + + free (blob); + free (tmp); free (tmp1); + blob = NULL; + } + + ret = 0; +out: + return ret; +} + diff --git a/extras/geo-rep/gsync-upgrade.sh b/extras/geo-rep/gsync-upgrade.sh new file mode 100644 index 0000000000..b179487365 --- /dev/null +++ b/extras/geo-rep/gsync-upgrade.sh @@ -0,0 +1,123 @@ +#!/bin/bash +#usage: gsync-upgrade.sh +# +#: a machine on which gluster cli can fetch slave volume info. +# slave-volfile-server defaults to localhost. +# +#: a file containing paths and their associated gfids +# on master. The paths are relative to master mount point +# (not absolute). An example extract of can be, +# +# +# 22114455-57c5-46e9-a783-c40f83a72b09 /dir +# 25772386-3eb8-4550-a802-c3fdc938ca80 /dir/file +# +# +#: file from which the identity (private key) for public key authentication is read. + +SLAVE_MOUNT='/tmp/glfs_slave' + +function SSH() +{ + HOST=$1 + SSHKEY=$2 + + shift 2 + + ssh -qi $SSHKEY \ + -oPasswordAuthentication=no \ + -oStrictHostKeyChecking=no \ + "$HOST" "$@"; +} + +function get_bricks() +{ + SSHKEY=$3 + + SSH $1 $SSHKEY "gluster volume info $2" | grep -E 'Brick[0-9]+' | sed -e 's/[^:]*:\(.*\)/\1/g' +} + +function cleanup_brick() +{ + HOST=$1 + BRICK=$2 + SSHKEY=$3 + + # TODO: write a C program to receive a list of files and does cleanup on + # them instead of spawning a new setfattr process for each file if + # performance is bad. + SSH -i $SSHKEY $HOST "rm -rf $BRICK/.glusterfs/* && find $BRICK -exec setfattr -x trusted.gfid {} \;" +} + +function cleanup_slave() +{ + SSHKEY=$2 + + VOLFILE_SERVER=`echo $1 | sed -e 's/\(.*\):.*/\1/'` + VOLUME_NAME=`echo $1 | sed -e 's/.*:\(.*\)/\1/'` + + BRICKS=`get_bricks $VOLFILE_SERVER $VOLUME_NAME $SSHKEY` + + for i in $BRICKS; do + HOST=`echo $i | sed -e 's/\(.*\):.*/\1/'` + BRICK=`echo $i | sed -e 's/.*:\(.*\)/\1/'` + cleanup_brick $HOST $BRICK $SSHKEY + done + + SSH -i $SSHKEY $VOLFILE_SERVER "gluster --mode=script volume stop $VOLUME_NAME; gluster volume start $VOLUME_NAME"; + +} + +function mount_client() +{ + local T; # temporary mount + local i; # inode number + GFID_FILE=$3 + SYNC_CMD=$4 + + T=$(mktemp -d); + + glusterfs --aux-gfid-mount -s $1 --volfile-id $2 $T; + + i=$(stat -c '%i' $T); + + [ "x$i" = "x1" ] || fatal "could not mount volume $MASTER on $T"; + + cd $T; + + $SYNC_CMD $GFID_FILE + + cd -; + + umount -l $T || fatal "could not umount $MASTER from $T"; + + rmdir $T || warn "rmdir of $T failed"; +} + +function sync_gfids() +{ + SLAVE=$1 + GFID_FILE=$2 + + SLAVE_VOLFILE_SERVER=`echo $SLAVE | sed -e 's/\(.*\):.*/\1/'` + SLAVE_VOLUME_NAME=`echo $SLAVE | sed -e 's/.*:\(.*\)/\1/'` + + if [ "x$SLAVE_VOLFILE_SERVER" = "x" ]; then + SLAVE_VOLFILE_SERVER="localhost" + fi + + mount_client $SLAVE_VOLFILE_SERVER $SLAVE_VOLUME_NAME $GFID_FILE $3 +} + +function upgrade() +{ + SLAVE=$1 + GFID_FILE=$2 + SYNC_CMD=$3 + SSHKEY=$4 + + cleanup_slave $SLAVE $SSHKEY + sync_gfids $SLAVE $GFID_FILE $SYNC_CMD +} + +upgrade "$@" diff --git a/extras/geo-rep/slave-upgrade.sh b/extras/geo-rep/slave-upgrade.sh new file mode 100644 index 0000000000..6198f408af --- /dev/null +++ b/extras/geo-rep/slave-upgrade.sh @@ -0,0 +1,102 @@ +#!/bin/bash +#usage: slave-upgrade.sh +# +#: a machine on which gluster cli can fetch slave volume info. +# slave-volfile-server defaults to localhost. +# +#: a file containing paths and their associated gfids +# on master. The paths are relative to master mount point +# (not absolute). An example extract of can be, +# +# +# 22114455-57c5-46e9-a783-c40f83a72b09 /dir +# 25772386-3eb8-4550-a802-c3fdc938ca80 /dir/file +# + +function get_bricks() +{ + gluster volume info $1 | grep -E 'Brick[0-9]+' | sed -e 's/[^:]*:\(.*\)/\1/g' +} + +function cleanup_brick() +{ + HOST=$1 + BRICK=$2 + + # TODO: write a C program to receive a list of files and does cleanup on + # them instead of spawning a new setfattr process for each file if + # performance is bad. + ssh $HOST "rm -rf $BRICK/.glusterfs/* && find $BRICK -exec setfattr -x trusted.gfid {} \; 2>/dev/null" +} + +function cleanup_slave() +{ + VOLUME_NAME=`echo $1 | sed -e 's/.*:\(.*\)/\1/'` + + BRICKS=`get_bricks $VOLUME_NAME` + + for i in $BRICKS; do + HOST=`echo $i | sed -e 's/\(.*\):.*/\1/'` + BRICK=`echo $i | sed -e 's/.*:\(.*\)/\1/'` + cleanup_brick $HOST $BRICK + done + + # Now restart the volume + gluster --mode=script volume stop $VOLUME_NAME; + gluster volume start $VOLUME_NAME; +} + +function mount_client() +{ + local T; # temporary mount + local i; # inode number + + VOLUME_NAME=$2; + GFID_FILE=$3 + SYNC_CMD=$4 + + T=$(mktemp -d); + + glusterfs --aux-gfid-mount -s $1 --volfile-id $VOLUME_NAME $T; + + i=$(stat -c '%i' $T); + + cd $T; + + $SYNC_CMD $GFID_FILE + + cd -; + + umount $T || fatal "could not umount $MASTER from $T"; + + rmdir $T || warn "rmdir of $T failed"; +} + +function sync_gfids() +{ + SLAVE=$1 + GFID_FILE=$2 + SYNC_CMD=$3 + + SLAVE_VOLFILE_SERVER=`echo $SLAVE | sed -e 's/\(.*\):.*/\1/'` + SLAVE_VOLUME_NAME=`echo $SLAVE | sed -e 's/.*:\(.*\)/\1/'` + + if [ "x$SLAVE_VOLFILE_SERVER" = "x" ]; then + SLAVE_VOLFILE_SERVER="localhost" + fi + + mount_client $SLAVE_VOLFILE_SERVER $SLAVE_VOLUME_NAME $GFID_FILE $SYNC_CMD +} + +function upgrade() +{ + SLAVE=$1 + GFID_FILE=$2 + SYNC_CMD=$3 + + cleanup_slave $SLAVE + + sync_gfids $SLAVE $GFID_FILE $SYNC_CMD +} + +upgrade "$@" -- cgit