diff options
author | Csaba Henk <csaba@redhat.com> | 2018-05-03 10:22:18 +0200 |
---|---|---|
committer | Amar Tumballi <amarts@redhat.com> | 2018-05-04 17:42:12 +0000 |
commit | 2ac79ed8048753dfd2494d3a4d3b0e9411673e3a (patch) | |
tree | 080da10738f9dade1fcb787d43f333c641dffe04 | |
parent | 15866ac9773e89cd9e017e7d3bf8aa01a87edfd8 (diff) | |
download | glusterfs-2ac79ed8048753dfd2494d3a4d3b0e9411673e3a.tar.gz glusterfs-2ac79ed8048753dfd2494d3a4d3b0e9411673e3a.tar.xz glusterfs-2ac79ed8048753dfd2494d3a4d3b0e9411673e3a.zip |
fuse: add support for kernel writeback cache
- Added kernel-writeback-cache command line and xlator
option for requesting utilisation of the writeback
cache of the kernel in FUSE_INIT (see [1]).
- Added attr-times-granularity command line and xlator
option via which granularity of the {a,m,c}time in
stat (attr) data that we support can be indicated to
kernel. This is a means to avoid divergence of the
attr times between kernel and userspace that could
occur with writeback-cache, while still maintaining
maximum time precision the FUSE server is capable of
(see [2]).
- Handling FATTR_CTIME flag in FUSE_SETATTR that
indicates presence of ctime in setattr payload.
Currently we cannot associate arbitrary ctimes to
files on backend, so we just touch them to update
their ctimes to current time. Having ctimes in setattr
payload is also a side effect of writeback cache
(see [3] and [4]).
[1]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=4d99ff8,
"fuse: Turn writeback cache on"
[2]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e27c9d3,
"fuse: fuse: add time_gran to INIT_OUT"
[3]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=1e18bda,
"fuse: add .write_inode"
[4]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=ab9e13f,
"fuse: allow ctime flushing to userspace"
Updates: #435
Change-Id: Id174c8e0c815c4456c35f8c53e41a6a507d91855
Signed-off-by: Csaba Henk <csaba@redhat.com>
-rw-r--r-- | doc/glusterfs.8 | 6 | ||||
-rw-r--r-- | doc/mount.glusterfs.8 | 6 | ||||
-rw-r--r-- | glusterfsd/src/glusterfsd.c | 70 | ||||
-rw-r--r-- | glusterfsd/src/glusterfsd.h | 2 | ||||
-rw-r--r-- | libglusterfs/src/glusterfs.h | 4 | ||||
-rw-r--r-- | libglusterfs/src/xlator.h | 1 | ||||
-rw-r--r-- | xlators/mount/fuse/src/fuse-bridge.c | 50 | ||||
-rw-r--r-- | xlators/mount/fuse/src/fuse-bridge.h | 4 | ||||
-rwxr-xr-x | xlators/mount/fuse/utils/mount.glusterfs.in | 14 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix-inode-fd-ops.c | 17 |
10 files changed, 170 insertions, 4 deletions
diff --git a/doc/glusterfs.8 b/doc/glusterfs.8 index 985f30a586..592dedb675 100644 --- a/doc/glusterfs.8 +++ b/doc/glusterfs.8 @@ -109,6 +109,9 @@ Mount subdirectory instead of the '/' of volume. .PP .TP +\fB\-\-attr\-times\-granularity=NANOSECONDS\fR +Declare supported granularity of file attribute times (default is 0 which kernel handles as unspecified; valid real values are between 1 and 1000000000). +.TP \fB\-\-attribute\-timeout=SECONDS\fR Set attribute timeout to SECONDS for inodes in fuse kernel module (the default is 1). .TP @@ -130,6 +133,9 @@ Set entry timeout to SECONDS in fuse kernel module (the default is 1). \fB\-\-gid\-timeout=SECONDS\fR Set auxiliary group list timeout to SECONDS for fuse translator (the default is 0). .TP +\fB\-\-kernel-writeback-cache=BOOL\fR +Enable fuse in-kernel writeback cache. +.TP \fB\-\-negative\-timeout=SECONDS\fR Set negative timeout to SECONDS in fuse kernel module (the default is 0). .TP diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8 index 6a51fc9aef..0cc7a6f853 100644 --- a/doc/mount.glusterfs.8 +++ b/doc/mount.glusterfs.8 @@ -133,6 +133,12 @@ enable root squashing for the trusted client [default: on] .TP \fBuse\-readdirp=\fRBOOL Use readdirp() mode in fuse kernel module [default: on] +.TP +\fBkernel\-writeback\-cache=\fRBOOL +Enable fuse in-kernel writeback cache [default: off] +.TP +\fBattr\-times\-granularity=\fRNS +Declare supported granularity of file attribute [default: 0] .PP .SH FILES .TP diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c index b5b8e4d30a..3de12bc125 100644 --- a/glusterfsd/src/glusterfsd.c +++ b/glusterfsd/src/glusterfsd.c @@ -256,6 +256,11 @@ static struct argp_option gf_options[] = { OPTION_ARG_OPTIONAL, "disable/enable fuse event-history"}, {"reader-thread-count", ARGP_READER_THREAD_COUNT_KEY, "INTEGER", OPTION_ARG_OPTIONAL, "set fuse reader thread count"}, + {"kernel-writeback-cache", ARGP_KERNEL_WRITEBACK_CACHE_KEY, "BOOL", + OPTION_ARG_OPTIONAL, "enable fuse in-kernel writeback cache"}, + {"attr-times-granularity", ARGP_ATTR_TIMES_GRANULARITY_KEY, "NS", + OPTION_ARG_OPTIONAL, "declare supported granularity of file attribute" + " times in nanoseconds"}, {0, 0, 0, 0, "Miscellaneous Options:"}, {0, } }; @@ -617,6 +622,44 @@ set_fuse_mount_options (glusterfs_ctx_t *ctx, dict_t *options) goto err; } } + switch (cmd_args->kernel_writeback_cache) { + case GF_OPTION_ENABLE: + ret = dict_set_static_ptr(options, "kernel-writeback-cache", + "on"); + if (ret < 0) { + gf_msg ("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4, + "failed to set dict value for key " + "kernel-writeback-cache"); + goto err; + } + break; + case GF_OPTION_DISABLE: + ret = dict_set_static_ptr(options, "kernel-writeback-cache", + "off"); + if (ret < 0) { + gf_msg ("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4, + "failed to set dict value for key " + "kernel-writeback-cache"); + goto err; + } + break; + case GF_OPTION_DEFERRED: /* default */ + default: + gf_msg_debug ("glusterfsd", 0, "kernel-writeback-cache mode %d", + cmd_args->kernel_writeback_cache); + break; + } + if (cmd_args->attr_times_granularity) { + ret = dict_set_uint32 (options, "attr-times-granularity", + cmd_args->attr_times_granularity); + if (ret < 0) { + gf_msg ("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4, + "failed to set dict value for key " + "attr-times-granularity"); + goto err; + } + } + ret = 0; err: @@ -1385,6 +1428,32 @@ no_oom_api: break; + case ARGP_KERNEL_WRITEBACK_CACHE_KEY: + if (!arg) + arg = "yes"; + + if (gf_string2boolean (arg, &b) == 0) { + cmd_args->kernel_writeback_cache = b; + + break; + } + + argp_failure (state, -1, 0, + "unknown kernel writeback cache setting \"%s\"", arg); + break; + case ARGP_ATTR_TIMES_GRANULARITY_KEY: + if (gf_string2uint32 (arg, &cmd_args->attr_times_granularity)) { + argp_failure (state, -1, 0, + "unknown attribute times granularity option %s", + arg); + } else if (cmd_args->attr_times_granularity > 1000000000) { + argp_failure (state, -1, 0, + "Invalid attribute times granularity value %s. " + "Valid range: [\"0, 1000000000\"]", arg); + } + + break; + } return 0; } @@ -1690,6 +1759,7 @@ glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx) cmd_args->fuse_attribute_timeout = -1; cmd_args->fuse_entry_timeout = -1; cmd_args->fopen_keep_cache = GF_OPTION_DEFERRED; + cmd_args->kernel_writeback_cache = GF_OPTION_DEFERRED; if (ctx->mem_acct_enable) cmd_args->mem_acct = 1; diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h index 2a03ec09fa..496a4d9535 100644 --- a/glusterfsd/src/glusterfsd.h +++ b/glusterfsd/src/glusterfsd.h @@ -105,6 +105,8 @@ enum argp_option_keys { ARGP_PRINT_XLATORDIR_KEY = 183, ARGP_PRINT_STATEDUMPDIR_KEY = 184, ARGP_PRINT_LOGDIR_KEY = 185, + ARGP_KERNEL_WRITEBACK_CACHE_KEY = 186, + ARGP_ATTR_TIMES_GRANULARITY_KEY = 187, }; struct _gfd_vol_top_priv { diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index 904d4b60d7..90f2762b99 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -532,6 +532,10 @@ struct _cmd_args { char *event_history; int thin_client; uint32_t reader_thread_count; + + /* FUSE writeback cache support */ + int kernel_writeback_cache; + uint32_t attr_times_granularity; }; typedef struct _cmd_args cmd_args_t; diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h index 4f18d1cd2a..d476cf2644 100644 --- a/libglusterfs/src/xlator.h +++ b/libglusterfs/src/xlator.h @@ -33,6 +33,7 @@ #define GF_SET_ATTR_SIZE 0x8 #define GF_SET_ATTR_ATIME 0x10 #define GF_SET_ATTR_MTIME 0x20 +#define GF_SET_ATTR_CTIME 0x40 #define gf_attr_mode_set(mode) ((mode) & GF_SET_ATTR_MODE) #define gf_attr_uid_set(mode) ((mode) & GF_SET_ATTR_UID) diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index f509d84a15..322b4deeff 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -1233,6 +1233,11 @@ fattr_to_gf_set_attr (int32_t valid) if (valid & FATTR_MTIME) gf_valid |= GF_SET_ATTR_MTIME; +#if FUSE_KERNEL_MINOR_VERSION >= 23 + if (valid & FATTR_CTIME) + gf_valid |= GF_SET_ATTR_CTIME; +#endif + if (valid & FATTR_SIZE) gf_valid |= GF_SET_ATTR_SIZE; @@ -1271,7 +1276,11 @@ fuse_setattr_resume (fuse_state_t *state) if ((state->valid & (FATTR_MASK)) != FATTR_SIZE) { if (state->fd && !((state->valid & FATTR_ATIME) || - (state->valid & FATTR_MTIME))) { + (state->valid & FATTR_MTIME) +#if FUSE_KERNEL_MINOR_VERSION >= 23 + || (state->valid & FATTR_CTIME) +#endif + )) { /* there is no "futimes" call, so don't send fsetattr if ATIME or MTIME is set @@ -1346,8 +1355,14 @@ fuse_setattr (xlator_t *this, fuse_in_header_t *finh, void *msg, state->attr.ia_size = fsi->size; state->attr.ia_atime = fsi->atime; state->attr.ia_mtime = fsi->mtime; +#if FUSE_KERNEL_MINOR_VERSION >= 23 + state->attr.ia_ctime = fsi->ctime; +#endif state->attr.ia_atime_nsec = fsi->atimensec; state->attr.ia_mtime_nsec = fsi->mtimensec; +#if FUSE_KERNEL_MINOR_VERSION >= 23 + state->attr.ia_ctime_nsec = fsi->ctimensec; +#endif state->attr.ia_prot = ia_prot_from_st_mode (fsi->mode); state->attr.ia_uid = fsi->uid; @@ -4253,14 +4268,23 @@ fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg, if (fini->flags & FUSE_ASYNC_DIO) fino.flags |= FUSE_ASYNC_DIO; #endif + + size = sizeof (fino); +#if FUSE_KERNEL_MINOR_VERSION >= 23 /* FUSE 7.23 and newer added attributes to the fuse_init_out struct */ - if (fini->minor > 22) { - size = sizeof (fino); - } else { + if (fini->minor < 23) { /* reduce the size, chop off unused attributes from &fino */ size = FUSE_COMPAT_22_INIT_OUT_SIZE; } + /* Writeback cache support */ + if (fini->minor >= 23) { + if (priv->kernel_writeback_cache) + fino.flags |= FUSE_WRITEBACK_CACHE; + fino.time_gran = priv->attr_times_granularity; + } +#endif + ret = send_fuse_data (this, finh, &fino, size); if (ret == 0) gf_log ("glusterfs-fuse", GF_LOG_INFO, @@ -5770,6 +5794,12 @@ init (xlator_t *this_xl) GF_OPTION_INIT("thin-client", priv->thin_client, bool, cleanup_exit); + /* Writeback cache support */ + GF_OPTION_INIT("kernel-writeback-cache", priv->kernel_writeback_cache, + bool, cleanup_exit); + GF_OPTION_INIT("attr-times-granularity", priv->attr_times_granularity, + int32, cleanup_exit); + /* user has set only background-qlen, not congestion-threshold, use the fuse kernel driver formula to set congestion. ie, 75% */ if (dict_get (this_xl->options, "background-qlen") && @@ -6093,5 +6123,17 @@ struct volume_options options[] = { .max = 64, .description = "Sets fuse reader thread count.", }, + { .key = {"kernel-writeback-cache"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + .description = "Enables fuse in-kernel writeback cache.", + }, + { .key = {"attr-times-granularity"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "0", + .min = 0, + .max = 1000000000, + .description = "Supported granularity of file attribute times.", + }, { .key = {NULL} }, }; diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h index 6cf9d2f7cf..b26b5e2110 100644 --- a/xlators/mount/fuse/src/fuse-bridge.h +++ b/xlators/mount/fuse/src/fuse-bridge.h @@ -147,6 +147,10 @@ struct fuse_private { gf_boolean_t mount_finished; gf_boolean_t handle_graph_switch; pthread_cond_t migrate_cond; + + /* Writeback cache support */ + gf_boolean_t kernel_writeback_cache; + int attr_times_granularity; }; typedef struct fuse_private fuse_private_t; diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in index 6890ff0012..9d9069aa1f 100755 --- a/xlators/mount/fuse/utils/mount.glusterfs.in +++ b/xlators/mount/fuse/utils/mount.glusterfs.in @@ -269,6 +269,14 @@ start_glusterfs () cmd_line=$(echo "$cmd_line --xlator-option=$xlator_option"); fi + if [ -n "$kernel_writeback_cache" ]; then + cmd_line=$(echo "$cmd_line --kernel-writeback-cache=$kernel_writeback_cache"); + fi + + if [ -n "$attr_times_granularity" ]; then + cmd_line=$(echo "$cmd_line --attr-times-granularity=$attr_times_granularity"); + fi + if [ -n "$process_name" ]; then cmd_line=$(echo "$cmd_line --process-name fuse.$process_name"); else @@ -520,6 +528,12 @@ with_options() [ $value = "false" ] ; then no_root_squash=1; fi ;; + "kernel-writeback-cache") + kernel_writeback_cache=$value + ;; + "attr-times-granularity") + attr_times_granularity=$value + ;; "context"|"fscontext"|"defcontext"|"rootcontext") # standard SElinux mount options to pass to the kernel [ -z "$fuse_mountopts" ] || fuse_mountopts="$fuse_mountopts," diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c index 496f6a7905..4aa70baf3b 100644 --- a/xlators/storage/posix/src/posix-inode-fd-ops.c +++ b/xlators/storage/posix/src/posix-inode-fd-ops.c @@ -357,6 +357,23 @@ posix_setattr (call_frame_t *frame, xlator_t *this, } } + if (valid & GF_SET_ATTR_CTIME) { + /* + * At the moment we have no means to associate an arbitrary + * ctime with the file, so we ignore the ctime payload + * and update the file ctime to current time (which POSIX + * lets us to do). + */ + op_ret = PATH_SET_TIMESPEC_OR_TIMEVAL (real_path, NULL); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_UTIMES_FAILED, "setattr (utimes) on %s " + "failed", real_path); + goto out; + } + } + if (!valid) { op_ret = sys_lchown (real_path, -1, -1); if (op_ret == -1) { |