From 5bea53fe7ed7dd7afa8e76f7f84f76fdb89d59d9 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Thu, 15 Feb 2018 13:17:53 -0600 Subject: device-mapper-multipath-0.7.4-1.git07e7bd5 Update Source to the latest upstream commit * Previous patches 0001-0006 are included in this commit * Previous patches 0007-0014 are now patches 0015-0022 Add 0001-libmultipath-fix-tur-checker-locking.patch * Fixed spinlock bug. posted upstream Add 0002-multipath-fix-DEF_TIMEOUT-use.patch * Add missing sec to ms conversion. posted upstream Add 0003-multipathd-remove-coalesce_paths-from-ev_add_map.patch * Remove unused code. posted upstream Add 0004-multipathd-remove-unused-configure-parameter.patch * Remove unused code. posted upstream Add 0005-Fix-set_no_path_retry-regression.patch * Fix issue with queueing and path addition. posted upstream Add 0006-multipathd-change-spurious-uevent-msg-priority.patch * Change message priority to Notice. posted upstream Add 0007-multipath-print-sysfs-state-in-fast-list-mode.patch * Show sysfs state correctly in fast list mode (-l). posted upstream Add 0008-libmultipath-move-remove_map-waiter-code-to-multipat.patch * Move code around. posted upstream Add 0009-move-waiter-code-from-libmultipath-to-multipathd.patch * Move code around. posted upstream Add 0010-call-start_waiter_thread-before-setup_multipath.patch * Fix race on multipath device creations. posted upstream Add 0011-libmultipath-add-helper-functions.patch * posted upstream Add 0012-multipathd-RFC-add-new-polling-dmevents-waiter-threa.patch * Add alternate method of getting dmevents, that doesn't require a thread per device. posted upstream Add 0013-libmultipath-condlog-log-to-stderr.patch * change condlog to log to stderr instead of stdout. posted upstream Add 0014-multipathd-fix-compiler-warning-for-uev_pathfail_che.patch * fix indentation issue. posted upstream --- ...RFC-add-new-polling-dmevents-waiter-threa.patch | 643 +++++++++++++++++++++ 1 file changed, 643 insertions(+) create mode 100644 0012-multipathd-RFC-add-new-polling-dmevents-waiter-threa.patch (limited to '0012-multipathd-RFC-add-new-polling-dmevents-waiter-threa.patch') diff --git a/0012-multipathd-RFC-add-new-polling-dmevents-waiter-threa.patch b/0012-multipathd-RFC-add-new-polling-dmevents-waiter-threa.patch new file mode 100644 index 0000000..64db4ad --- /dev/null +++ b/0012-multipathd-RFC-add-new-polling-dmevents-waiter-threa.patch @@ -0,0 +1,643 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Benjamin Marzinski +Date: Tue, 6 Feb 2018 15:36:39 -0600 +Subject: [PATCH] multipathd: RFC add new polling dmevents waiter thread + +The current method of waiting for dmevents on multipath devices involves +creating a seperate thread for each device. This can become very +wasteful when there are large numbers of multipath devices. Also, since +multipathd needs to grab the vecs lock to update the devices, the +additional threads don't actually provide much parallelism. + +The patch adds a new method of updating multipath devices on dmevents, +which uses the new device-mapper event polling interface. This means +that there is only one dmevent waiting thread which will wait for events +on all of the multipath devices. Currently the code to get the event +number from the list of device names and to re-arm the polling interface +is not in libdevmapper, so the patch does that work. Obviously, these +bits need to go into libdevmapper, so that multipathd can use a standard +interface. + +I haven't touched any of the existing event waiting code, since event +polling was only added to device-mapper in version 4.37.0. multipathd +checks this version, and defaults to using the polling code if +device-mapper supports it. This can be overridden by running multipathd +with "-w", to force it to use the old event waiting code. + +Signed-off-by: Benjamin Marzinski +--- + multipathd/Makefile | 3 +- + multipathd/dmevents.c | 396 ++++++++++++++++++++++++++++++++++++++++++++++++++ + multipathd/dmevents.h | 13 ++ + multipathd/main.c | 58 +++++++- + 4 files changed, 461 insertions(+), 9 deletions(-) + create mode 100644 multipathd/dmevents.c + create mode 100644 multipathd/dmevents.h + +diff --git a/multipathd/Makefile b/multipathd/Makefile +index 85f29a7..4c438f0 100644 +--- a/multipathd/Makefile ++++ b/multipathd/Makefile +@@ -22,7 +22,8 @@ ifdef SYSTEMD + endif + endif + +-OBJS = main.o pidfile.o uxlsnr.o uxclnt.o cli.o cli_handlers.o waiter.o ++OBJS = main.o pidfile.o uxlsnr.o uxclnt.o cli.o cli_handlers.o waiter.o \ ++ dmevents.o + + EXEC = multipathd + +diff --git a/multipathd/dmevents.c b/multipathd/dmevents.c +new file mode 100644 +index 0000000..a56c055 +--- /dev/null ++++ b/multipathd/dmevents.c +@@ -0,0 +1,396 @@ ++/* ++ * Copyright (c) 2004, 2005 Christophe Varoqui ++ * Copyright (c) 2005 Kiyoshi Ueda, NEC ++ * Copyright (c) 2005 Edward Goggin, EMC ++ * Copyright (c) 2005, 2018 Benjamin Marzinski, Redhat ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "vector.h" ++#include "structs.h" ++#include "structs_vec.h" ++#include "devmapper.h" ++#include "debug.h" ++#include "dmevents.h" ++ ++#ifndef DM_DEV_ARM_POLL ++#define DM_DEV_ARM_POLL _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD + 1, struct dm_ioctl) ++#endif ++ ++enum event_actions { ++ EVENT_NOTHING, ++ EVENT_REMOVE, ++ EVENT_UPDATE, ++}; ++ ++struct dev_event { ++ char name[WWID_SIZE]; ++ uint32_t evt_nr; ++ enum event_actions action; ++}; ++ ++struct dmevent_waiter { ++ int fd; ++ struct vectors *vecs; ++ vector events; ++ pthread_mutex_t events_lock; ++}; ++ ++static struct dmevent_waiter *waiter; ++ ++int dmevent_poll_supported(void) ++{ ++ unsigned int minv[3] = {4, 37, 0}; ++ unsigned int v[3]; ++ ++ if (dm_drv_version(v)) ++ return 0; ++ ++ if (VERSION_GE(v, minv)) ++ return 1; ++ return 0; ++} ++ ++ ++int alloc_dmevent_waiter(struct vectors *vecs) ++{ ++ if (!vecs) { ++ condlog(0, "can't create waiter structure. invalid vectors"); ++ goto fail; ++ } ++ waiter = (struct dmevent_waiter *)malloc(sizeof(struct dmevent_waiter)); ++ if (!waiter) { ++ condlog(0, "failed to allocate waiter structure"); ++ goto fail; ++ } ++ memset(waiter, 0, sizeof(struct dmevent_waiter)); ++ waiter->events = vector_alloc(); ++ if (!waiter->events) { ++ condlog(0, "failed to allocate waiter events vector"); ++ goto fail_waiter; ++ } ++ waiter->fd = open("/dev/mapper/control", O_RDWR); ++ if (waiter->fd < 0) { ++ condlog(0, "failed to open /dev/mapper/control for waiter"); ++ goto fail_events; ++ } ++ pthread_mutex_init(&waiter->events_lock, NULL); ++ waiter->vecs = vecs; ++ ++ return 0; ++fail_events: ++ vector_free(waiter->events); ++fail_waiter: ++ free(waiter); ++fail: ++ waiter = NULL; ++ return -1; ++} ++ ++void free_dmevent_waiter(void) ++{ ++ struct dev_event *dev_evt; ++ int i; ++ ++ if (!waiter) ++ return; ++ pthread_mutex_destroy(&waiter->events_lock); ++ close(waiter->fd); ++ vector_foreach_slot(waiter->events, dev_evt, i) ++ free(dev_evt); ++ vector_free(waiter->events); ++ free(waiter); ++ waiter = NULL; ++} ++ ++static int arm_dm_event_poll(int fd) ++{ ++ struct dm_ioctl dmi; ++ memset(&dmi, 0, sizeof(dmi)); ++ dmi.version[0] = DM_VERSION_MAJOR; ++ dmi.version[1] = DM_VERSION_MINOR; ++ dmi.version[2] = DM_VERSION_PATCHLEVEL; ++ dmi.flags = 0x4; ++ dmi.data_start = offsetof(struct dm_ioctl, data); ++ dmi.data_size = sizeof(dmi); ++ return ioctl(fd, DM_DEV_ARM_POLL, &dmi); ++} ++ ++/* ++ * As of version 4.37.0 device-mapper stores the event number in the ++ * dm_names structure after the name, when DM_DEVICE_LIST is called ++ */ ++static uint32_t dm_event_nr(struct dm_names *n) ++{ ++ return *(uint32_t *)(((uintptr_t)(strchr(n->name, 0) + 1) + 7) & ~7); ++} ++ ++static int dm_get_events(void) ++{ ++ struct dm_task *dmt; ++ struct dm_names *names; ++ struct dev_event *dev_evt; ++ int i; ++ ++ if (!(dmt = libmp_dm_task_create(DM_DEVICE_LIST))) ++ return -1; ++ ++ dm_task_no_open_count(dmt); ++ ++ if (!dm_task_run(dmt)) ++ goto fail; ++ ++ if (!(names = dm_task_get_names(dmt))) ++ goto fail; ++ ++ pthread_mutex_lock(&waiter->events_lock); ++ vector_foreach_slot(waiter->events, dev_evt, i) ++ dev_evt->action = EVENT_REMOVE; ++ while (names->dev) { ++ uint32_t event_nr; ++ ++ if (!dm_is_mpath(names->name)) ++ goto next; ++ ++ event_nr = dm_event_nr(names); ++ vector_foreach_slot(waiter->events, dev_evt, i) { ++ if (!strcmp(dev_evt->name, names->name)) { ++ if (event_nr != dev_evt->evt_nr) { ++ dev_evt->evt_nr = event_nr; ++ dev_evt->action = EVENT_UPDATE; ++ } else ++ dev_evt->action = EVENT_NOTHING; ++ break; ++ } ++ } ++next: ++ if (!names->next) ++ break; ++ names = (void *)names + names->next; ++ } ++ pthread_mutex_unlock(&waiter->events_lock); ++ dm_task_destroy(dmt); ++ return 0; ++ ++fail: ++ dm_task_destroy(dmt); ++ return -1; ++} ++ ++/* You must call update_multipath() after calling this function, to ++ * deal with any events that came in before the device was added */ ++int watch_dmevents(char *name) ++{ ++ int event_nr; ++ struct dev_event *dev_evt, *old_dev_evt; ++ int i; ++ ++ if (!dm_is_mpath(name)) { ++ condlog(0, "%s: not a multipath device. can't watch events", ++ name); ++ return -1; ++ } ++ ++ if ((event_nr = dm_geteventnr(name)) < 0) ++ return -1; ++ ++ dev_evt = (struct dev_event *)malloc(sizeof(struct dev_event)); ++ if (!dev_evt) { ++ condlog(0, "%s: can't allocate event waiter structure", name); ++ return -1; ++ } ++ ++ strncpy(dev_evt->name, name, WWID_SIZE); ++ dev_evt->name[WWID_SIZE - 1] = 0; ++ dev_evt->evt_nr = event_nr; ++ dev_evt->action = EVENT_NOTHING; ++ ++ pthread_mutex_lock(&waiter->events_lock); ++ vector_foreach_slot(waiter->events, old_dev_evt, i){ ++ if (!strcmp(dev_evt->name, old_dev_evt->name)) { ++ /* caller will be updating this device */ ++ old_dev_evt->evt_nr = event_nr; ++ old_dev_evt->action = EVENT_NOTHING; ++ pthread_mutex_unlock(&waiter->events_lock); ++ condlog(2, "%s: already waiting for events on device", ++ name); ++ free(dev_evt); ++ return 0; ++ } ++ } ++ if (!vector_alloc_slot(waiter->events)) { ++ pthread_mutex_unlock(&waiter->events_lock); ++ free(dev_evt); ++ return -1; ++ } ++ vector_set_slot(waiter->events, dev_evt); ++ pthread_mutex_unlock(&waiter->events_lock); ++ return 0; ++} ++ ++void unwatch_all_dmevents(void) ++{ ++ struct dev_event *dev_evt; ++ int i; ++ ++ pthread_mutex_lock(&waiter->events_lock); ++ vector_foreach_slot(waiter->events, dev_evt, i) ++ free(dev_evt); ++ vector_reset(waiter->events); ++ pthread_mutex_unlock(&waiter->events_lock); ++} ++ ++static void unwatch_dmevents(char *name) ++{ ++ struct dev_event *dev_evt; ++ int i; ++ ++ pthread_mutex_lock(&waiter->events_lock); ++ vector_foreach_slot(waiter->events, dev_evt, i) { ++ if (!strcmp(dev_evt->name, name)) { ++ vector_del_slot(waiter->events, i); ++ free(dev_evt); ++ break; ++ } ++ } ++ pthread_mutex_unlock(&waiter->events_lock); ++} ++ ++/* ++ * returns the reschedule delay ++ * negative means *stop* ++ */ ++ ++/* poll, arm, update, return */ ++static int dmevent_loop (void) ++{ ++ int r, i = 0; ++ struct pollfd pfd; ++ struct dev_event *dev_evt; ++ ++ pfd.fd = waiter->fd; ++ pfd.events = POLLIN; ++ r = poll(&pfd, 1, -1); ++ if (r <= 0) { ++ condlog(0, "failed polling for dm events: %s", strerror(errno)); ++ /* sleep 1s and hope things get better */ ++ return 1; ++ } ++ ++ if (arm_dm_event_poll(waiter->fd) != 0) { ++ condlog(0, "Cannot re-arm event polling: %s", strerror(errno)); ++ /* sleep 1s and hope things get better */ ++ return 1; ++ } ++ ++ if (dm_get_events() != 0) { ++ condlog(0, "failed getting dm events: %s", strerror(errno)); ++ /* sleep 1s and hope things get better */ ++ return 1; ++ } ++ ++ /* ++ * upon event ... ++ */ ++ ++ while (1) { ++ int done = 1; ++ struct dev_event curr_dev; ++ struct multipath *mpp; ++ ++ pthread_mutex_lock(&waiter->events_lock); ++ vector_foreach_slot(waiter->events, dev_evt, i) { ++ if (dev_evt->action != EVENT_NOTHING) { ++ curr_dev = *dev_evt; ++ if (dev_evt->action == EVENT_REMOVE) { ++ vector_del_slot(waiter->events, i); ++ free(dev_evt); ++ } else ++ dev_evt->action = EVENT_NOTHING; ++ done = 0; ++ break; ++ } ++ } ++ pthread_mutex_unlock(&waiter->events_lock); ++ if (done) ++ return 1; ++ ++ condlog(3, "%s: devmap event #%i", curr_dev.name, ++ curr_dev.evt_nr); ++ ++ /* ++ * event might be : ++ * ++ * 1) a table reload, which means our mpp structure is ++ * obsolete : refresh it through update_multipath() ++ * 2) a path failed by DM : mark as such through ++ * update_multipath() ++ * 3) map has gone away : stop the thread. ++ * 4) a path reinstate : nothing to do ++ * 5) a switch group : nothing to do ++ */ ++ pthread_cleanup_push(cleanup_lock, &waiter->vecs->lock); ++ lock(&waiter->vecs->lock); ++ pthread_testcancel(); ++ r = 0; ++ if (curr_dev.action == EVENT_REMOVE) { ++ mpp = find_mp_by_alias(waiter->vecs->mpvec, ++ curr_dev.name); ++ if (mpp) ++ remove_map(mpp, waiter->vecs, 1); ++ } else ++ r = update_multipath(waiter->vecs, curr_dev.name, 1); ++ lock_cleanup_pop(&waiter->vecs->lock); ++ ++ if (r) { ++ condlog(2, "%s: stopped watching dmevents", ++ curr_dev.name); ++ unwatch_dmevents(curr_dev.name); ++ } ++ } ++ condlog(0, "dmevent waiter thread unexpectedly quit"); ++ return -1; /* never reach there */ ++} ++ ++static void rcu_unregister(void *param) ++{ ++ rcu_unregister_thread(); ++} ++ ++void *wait_dmevents (void *unused) ++{ ++ int r; ++ ++ ++ if (!waiter) { ++ condlog(0, "dmevents waiter not intialized"); ++ return NULL; ++ } ++ ++ pthread_cleanup_push(rcu_unregister, NULL); ++ rcu_register_thread(); ++ mlockall(MCL_CURRENT | MCL_FUTURE); ++ ++ while (1) { ++ r = dmevent_loop(); ++ ++ if (r < 0) ++ break; ++ ++ sleep(r); ++ } ++ ++ pthread_cleanup_pop(1); ++ return NULL; ++} +diff --git a/multipathd/dmevents.h b/multipathd/dmevents.h +new file mode 100644 +index 0000000..569e855 +--- /dev/null ++++ b/multipathd/dmevents.h +@@ -0,0 +1,13 @@ ++#ifndef _DMEVENTS_H ++#define _DMEVENTS_H ++ ++#include "structs_vec.h" ++ ++int dmevent_poll_supported(void); ++int alloc_dmevent_waiter(struct vectors *vecs); ++void free_dmevent_waiter(void); ++int watch_dmevents(char *name); ++void unwatch_all_dmevents(void); ++void *wait_dmevents (void *unused); ++ ++#endif /* _DMEVENTS_H */ +diff --git a/multipathd/main.c b/multipathd/main.c +index 2963bde..6dabf2c 100644 +--- a/multipathd/main.c ++++ b/multipathd/main.c +@@ -82,6 +82,7 @@ static int use_watchdog; + #include "cli_handlers.h" + #include "lock.h" + #include "waiter.h" ++#include "dmevents.h" + #include "io_err_stat.h" + #include "wwids.h" + #include "../third-party/valgrind/drd.h" +@@ -108,6 +109,7 @@ int uxsock_timeout; + int verbosity; + int bindings_read_only; + int ignore_new_devs; ++int poll_dmevents = 1; + enum daemon_status running_state = DAEMON_INIT; + pid_t daemon_pid; + pthread_mutex_t config_lock = PTHREAD_MUTEX_INITIALIZER; +@@ -288,11 +290,23 @@ switch_pathgroup (struct multipath * mpp) + mpp->alias, mpp->bestpg); + } + ++static int ++wait_for_events(struct multipath *mpp, struct vectors *vecs) ++{ ++ if (poll_dmevents) ++ return watch_dmevents(mpp->alias); ++ else ++ return start_waiter_thread(mpp, vecs); ++} ++ + static void + remove_map_and_stop_waiter(struct multipath *mpp, struct vectors *vecs, + int purge_vec) + { +- stop_waiter_thread(mpp, vecs); ++ /* devices are automatically removed by the dmevent polling code, ++ * so they don't need to be manually removed here */ ++ if (!poll_dmevents) ++ stop_waiter_thread(mpp, vecs); + remove_map(mpp, vecs, purge_vec); + } + +@@ -305,8 +319,12 @@ remove_maps_and_stop_waiters(struct vectors *vecs) + if (!vecs) + return; + +- vector_foreach_slot(vecs->mpvec, mpp, i) +- stop_waiter_thread(mpp, vecs); ++ if (!poll_dmevents) { ++ vector_foreach_slot(vecs->mpvec, mpp, i) ++ stop_waiter_thread(mpp, vecs); ++ } ++ else ++ unwatch_all_dmevents(); + + remove_maps(vecs); + } +@@ -351,7 +369,7 @@ retry: + dm_lib_release(); + + fail: +- if (new_map && (retries < 0 || start_waiter_thread(mpp, vecs))) { ++ if (new_map && (retries < 0 || wait_for_events(mpp, vecs))) { + condlog(0, "%s: failed to create new map", mpp->alias); + remove_map(mpp, vecs, 1); + return 1; +@@ -870,7 +888,7 @@ retry: + + if ((mpp->action == ACT_CREATE || + (mpp->action == ACT_NOTHING && start_waiter && !mpp->waiter)) && +- start_waiter_thread(mpp, vecs)) ++ wait_for_events(mpp, vecs)) + goto fail_map; + + /* +@@ -2173,7 +2191,7 @@ configure (struct vectors * vecs) + * start dm event waiter threads for these new maps + */ + vector_foreach_slot(vecs->mpvec, mpp, i) { +- if (start_waiter_thread(mpp, vecs)) { ++ if (wait_for_events(mpp, vecs)) { + remove_map(mpp, vecs, 1); + i--; + continue; +@@ -2414,7 +2432,7 @@ set_oom_adj (void) + static int + child (void * param) + { +- pthread_t check_thr, uevent_thr, uxlsnr_thr, uevq_thr; ++ pthread_t check_thr, uevent_thr, uxlsnr_thr, uevq_thr, dmevent_thr; + pthread_attr_t log_attr, misc_attr, uevent_attr; + struct vectors * vecs; + struct multipath * mpp; +@@ -2476,6 +2494,8 @@ child (void * param) + goto failed; + } + ++ if (poll_dmevents) ++ poll_dmevents = dmevent_poll_supported(); + setlogmask(LOG_UPTO(conf->verbosity + 3)); + + envp = getenv("LimitNOFILE"); +@@ -2542,6 +2562,19 @@ child (void * param) + + init_path_check_interval(vecs); + ++ if (poll_dmevents) { ++ if (alloc_dmevent_waiter(vecs)) { ++ condlog(0, "failed to allocate dmevents waiter info"); ++ goto failed; ++ } ++ if ((rc = pthread_create(&dmevent_thr, &misc_attr, ++ wait_dmevents, NULL))) { ++ condlog(0, "failed to create dmevent waiter thread: %d", ++ rc); ++ goto failed; ++ } ++ } ++ + /* + * Start uevent listener early to catch events + */ +@@ -2615,11 +2648,15 @@ child (void * param) + pthread_cancel(uevent_thr); + pthread_cancel(uxlsnr_thr); + pthread_cancel(uevq_thr); ++ if (poll_dmevents) ++ pthread_cancel(dmevent_thr); + + pthread_join(check_thr, NULL); + pthread_join(uevent_thr, NULL); + pthread_join(uxlsnr_thr, NULL); + pthread_join(uevq_thr, NULL); ++ if (poll_dmevents) ++ pthread_join(dmevent_thr, NULL); + + stop_io_err_stat_thread(); + +@@ -2634,6 +2671,8 @@ child (void * param) + + cleanup_checkers(); + cleanup_prio(); ++ if (poll_dmevents) ++ free_dmevent_waiter(); + + dm_lib_release(); + dm_lib_exit(); +@@ -2765,7 +2804,7 @@ main (int argc, char *argv[]) + udev = udev_new(); + libmp_udev_set_sync_support(0); + +- while ((arg = getopt(argc, argv, ":dsv:k::Bn")) != EOF ) { ++ while ((arg = getopt(argc, argv, ":dsv:k::Bniw")) != EOF ) { + switch(arg) { + case 'd': + foreground = 1; +@@ -2799,6 +2838,9 @@ main (int argc, char *argv[]) + case 'n': + ignore_new_devs = 1; + break; ++ case 'w': ++ poll_dmevents = 0; ++ break; + default: + fprintf(stderr, "Invalid argument '-%c'\n", + optopt); +-- +2.7.4 + -- cgit