summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2014-09-23 12:43:02 +0530
committerVijay Bellur <vbellur@redhat.com>2014-09-23 09:49:10 -0700
commit1b27b8231e2d69c3bfd4710ab3f631cd3604e362 (patch)
tree4fe5d5e5a43d5ae38c2401b4bff13586fc52db2c
parente582cc6577913760d6b8636cef3d7267890ac1f1 (diff)
downloadglusterfs-1b27b8231e2d69c3bfd4710ab3f631cd3604e362.tar.gz
glusterfs-1b27b8231e2d69c3bfd4710ab3f631cd3604e362.tar.xz
glusterfs-1b27b8231e2d69c3bfd4710ab3f631cd3604e362.zip
cluster/afr: Don't start heal when lookup succeeds on < 2 children
Backport of http://review.gluster.org/8698 Problem: When self-heal code doesn't see at least 2 successes on looking up children, then self-heal can't be done. What is happening now is if all the lookups fail then the pending changelog is all zeros in xattrs so all the children are becoming sources and leading to crashes when the code paths further assume that some data structures are populated properly Fix: Don't proceed with self-heals when < 2 children succeed lookups. BUG: 1145726 Change-Id: I65465843f0e554c8ccdd8fa930ab42ac123ec023 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Reviewed-on: http://review.gluster.org/8824 Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c17
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c4
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c6
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c4
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-name.c2
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h4
6 files changed, 29 insertions, 8 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 0158948d72..b104e6b786 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -324,6 +324,12 @@ afr_selfheal_find_direction (xlator_t *this, struct afr_reply *replies,
accused = alloca0 (priv->child_count);
matrix = ALLOC_MATRIX(priv->child_count, int);
+ if (afr_success_count (replies,
+ priv->child_count) < AFR_SH_MIN_PARTICIPANTS) {
+ /* Treat this just like locks not being acquired */
+ return -ENOTCONN;
+ }
+
/* First construct the pending matrix for further analysis */
afr_selfheal_extract_xattr (this, replies, type, dirty, matrix);
@@ -502,6 +508,17 @@ afr_selfheal_unlocked_discover (call_frame_t *frame, inode_t *inode,
priv->child_up);
}
+unsigned int
+afr_success_count (struct afr_reply *replies, unsigned int count)
+{
+ int i = 0;
+ unsigned int success = 0;
+
+ for (i = 0; i < count; i++)
+ if (replies[i].valid && replies[i].op_ret == 0)
+ success++;
+ return success;
+}
int
afr_selfheal_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 455648b756..bee7682a23 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -508,7 +508,7 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd,
ret = afr_selfheal_inodelk (frame, this, fd->inode, this->name, 0, 0,
data_lock);
{
- if (ret < 2) {
+ if (ret < AFR_SH_MIN_PARTICIPANTS) {
ret = -ENOTCONN;
goto unlock;
}
@@ -611,7 +611,7 @@ afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode)
ret = afr_selfheal_tryinodelk (frame, this, inode, priv->sh_domain, 0, 0,
locked_on);
{
- if (ret < 2) {
+ if (ret < AFR_SH_MIN_PARTICIPANTS) {
/* Either less than two subvols available, or another
selfheal (from another server) is in progress. Skip
for now in any case there isn't anything to do.
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 0cf65009c5..45ce881e12 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -283,7 +283,7 @@ afr_selfheal_entry_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd,
ret = afr_selfheal_entrylk (frame, this, fd->inode, this->name,
name, locked_on);
{
- if (ret < 2) {
+ if (ret < AFR_SH_MIN_PARTICIPANTS) {
ret = -ENOTCONN;
goto unlock;
}
@@ -491,7 +491,7 @@ __afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd,
ret = afr_selfheal_entrylk (frame, this, fd->inode, this->name, NULL,
data_lock);
{
- if (ret < 2) {
+ if (ret < AFR_SH_MIN_PARTICIPANTS) {
ret = -ENOTCONN;
goto unlock;
}
@@ -567,7 +567,7 @@ afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode)
ret = afr_selfheal_tryentrylk (frame, this, inode, priv->sh_domain, NULL,
locked_on);
{
- if (ret < 2) {
+ if (ret < AFR_SH_MIN_PARTICIPANTS) {
/* Either less than two subvols available, or another
selfheal (from another server) is in progress. Skip
for now in any case there isn't anything to do.
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index e98728ba54..2c5f3fd652 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -228,7 +228,7 @@ __afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode,
ret = afr_selfheal_inodelk (frame, this, inode, this->name,
LLONG_MAX - 1, 0, data_lock);
{
- if (ret < 2) {
+ if (ret < AFR_SH_MIN_PARTICIPANTS) {
ret = -ENOTCONN;
goto unlock;
}
@@ -274,7 +274,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode)
ret = afr_selfheal_tryinodelk (frame, this, inode, priv->sh_domain, 0, 0,
locked_on);
{
- if (ret < 2) {
+ if (ret < AFR_SH_MIN_PARTICIPANTS) {
/* Either less than two subvols available, or another
selfheal (from another server) is in progress. Skip
for now in any case there isn't anything to do.
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
index a3020f4e1a..c5d126185c 100644
--- a/xlators/cluster/afr/src/afr-self-heal-name.c
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
@@ -571,7 +571,7 @@ afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent,
ret = afr_selfheal_entrylk (frame, this, parent, this->name, bname,
locked_on);
{
- if (ret < 2) {
+ if (ret < AFR_SH_MIN_PARTICIPANTS) {
ret = -ENOTCONN;
goto unlock;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index 31f12a4e74..7936659e5e 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -12,6 +12,7 @@
#ifndef _AFR_SELFHEAL_H
#define _AFR_SELFHEAL_H
+#define AFR_SH_MIN_PARTICIPANTS 2
/* Perform fop on all UP subvolumes and wait for all callbacks to return */
@@ -181,4 +182,7 @@ afr_selfheal_newentry_mark (call_frame_t *frame, xlator_t *this, inode_t *inode,
inode_t*
afr_inode_link (inode_t *inode, struct iatt *iatt);
+
+unsigned int
+afr_success_count (struct afr_reply *replies, unsigned int count);
#endif /* !_AFR_SELFHEAL_H */