diff options
author | Raghavendra Bhat <raghavendra@redhat.com> | 2019-04-15 14:09:34 -0400 |
---|---|---|
committer | Amar Tumballi <amarts@redhat.com> | 2019-04-17 03:27:24 +0000 |
commit | b922793588ad23a9b12ce65abd29e8f45ac87998 (patch) | |
tree | dd60c0e9a3b99846c94ed0dd2aaad6d7dad66255 /xlators | |
parent | e5ff6cc397e7a23dff4024efb6806cb004a89ee6 (diff) | |
download | glusterfs-b922793588ad23a9b12ce65abd29e8f45ac87998.tar.gz glusterfs-b922793588ad23a9b12ce65abd29e8f45ac87998.tar.xz glusterfs-b922793588ad23a9b12ce65abd29e8f45ac87998.zip |
features/bit-rot-stub: clean the mutex after cancelling the signer thread
When bit-rot feature is disabled, the signer thread from the bit-rot-stub
xlator (the thread which performs the setxattr of the signature on to the
disk) is cancelled. But, if the cancelled signer thread had already held
the mutex (&priv->lock) which it uses to monitor the queue of files to
be signed, then the mutex is never released. This creates problems in
future when the feature is enabled again. Both the new instance of the
signer thread and the regular thread which enqueues the files to be
signed will be blocked on this mutex.
So, as part of cancelling the signer thread, unlock the mutex associated
with it as well using pthread_cleanup_push and pthread_cleanup_pop.
Change-Id: Ib761910caed90b268e69794ddeb108165487af40
updates: bz#1700078
Signed-off-by: Raghavendra Bhat <raghavendra@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h | 4 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/stub/bit-rot-stub.c | 62 |
2 files changed, 59 insertions, 7 deletions
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h index 7f07f2929a..155802bba6 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h @@ -39,6 +39,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED, BRS_MSG_BAD_HANDLE_DIR_NULL, BRS_MSG_BAD_OBJ_THREAD_FAIL, BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL, BRS_MSG_LINK_FAIL, BRS_MSG_BAD_OBJ_UNLINK_FAIL, BRS_MSG_DICT_SET_FAILED, - BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL); + BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL, + BRS_MSG_SPAWN_SIGN_THRD_FAILED, BRS_MSG_KILL_SIGN_THREAD, + BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL); #endif /* !_BITROT_STUB_MESSAGES_H_ */ diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c index 3f48a4bda0..c3f81bcbb7 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c @@ -26,6 +26,15 @@ #define BR_STUB_REQUEST_COOKIE 0x1 +void +br_stub_lock_cleaner(void *arg) +{ + pthread_mutex_t *clean_mutex = arg; + + pthread_mutex_unlock(clean_mutex); + return; +} + void * br_stub_signth(void *); @@ -166,8 +175,11 @@ init(xlator_t *this) ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this, "brssign"); - if (ret != 0) + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SPAWN_SIGN_THRD_FAILED, + "failed to create the new thread for signer"); goto cleanup_lock; + } ret = br_stub_bad_object_container_init(this, priv); if (ret) { @@ -214,11 +226,15 @@ reconfigure(xlator_t *this, dict_t *options) priv = this->private; GF_OPTION_RECONF("bitrot", priv->do_versioning, options, bool, err); - if (priv->do_versioning) { + if (priv->do_versioning && !priv->signth) { ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this, "brssign"); - if (ret != 0) + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, + BRS_MSG_SPAWN_SIGN_THRD_FAILED, + "failed to create the new thread for signer"); goto err; + } ret = br_stub_bad_object_container_init(this, priv); if (ret) { @@ -232,8 +248,11 @@ reconfigure(xlator_t *this, dict_t *options) gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CANCEL_SIGN_THREAD_FAILED, "Could not cancel sign serializer thread"); + } else { + gf_msg(this->name, GF_LOG_INFO, 0, BRS_MSG_KILL_SIGN_THREAD, + "killed the signer thread"); + priv->signth = 0; } - priv->signth = 0; } if (priv->container.thread) { @@ -902,6 +921,24 @@ br_stub_signth(void *arg) THIS = this; while (1) { + /* + * Disabling bit-rot feature leads to this particular thread + * getting cleaned up by reconfigure via a call to the function + * gf_thread_cleanup_xint (which in turn calls pthread_cancel + * and pthread_join). But, if this thread had held the mutex + * &priv->lock at the time of cancellation, then it leads to + * deadlock in future when bit-rot feature is enabled (which + * again spawns this thread which cant hold the lock as the + * mutex is still held by the previous instance of the thread + * which got killed). Also, the br_stub_handle_object_signature + * function which is called whenever file has to be signed + * also gets blocked as it too attempts to acquire &priv->lock. + * + * So, arrange for the lock to be unlocked as part of the + * cleanup of this thread using pthread_cleanup_push and + * pthread_cleanup_pop. + */ + pthread_cleanup_push(br_stub_lock_cleaner, &priv->lock); pthread_mutex_lock(&priv->lock); { while (list_empty(&priv->squeue)) @@ -912,6 +949,7 @@ br_stub_signth(void *arg) list_del_init(&sigstub->list); } pthread_mutex_unlock(&priv->lock); + pthread_cleanup_pop(0); call_resume(sigstub->stub); @@ -1042,12 +1080,22 @@ br_stub_handle_object_signature(call_frame_t *frame, xlator_t *this, fd_t *fd, priv = this->private; - if (frame->root->pid != GF_CLIENT_PID_BITD) + if (frame->root->pid != GF_CLIENT_PID_BITD) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, BRS_MSG_NON_BITD_PID, + "PID %d from where signature request" + "came, does not belong to bit-rot daemon." + "Unwinding the fop", + frame->root->pid); goto dofop; + } ret = br_stub_prepare_signature(this, dict, fd->inode, sign, &fakesuccess); - if (ret) + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_PREPARE_FAIL, + "failed to prepare the signature for %s. Unwinding the fop", + uuid_utoa(fd->inode->gfid)); goto dofop; + } if (fakesuccess) { op_ret = op_errno = 0; goto dofop; @@ -1387,6 +1435,8 @@ br_stub_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, /* object signature request */ ret = dict_get_bin(dict, GLUSTERFS_SET_OBJECT_SIGNATURE, (void **)&sign); if (!ret) { + gf_msg_debug(this->name, 0, "got SIGNATURE request on %s", + uuid_utoa(fd->inode->gfid)); br_stub_handle_object_signature(frame, this, fd, dict, sign, xdata); goto done; } |