/* * Copyright (c) 2010 David Teigland * All Rights Reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License V2 * as published by the Free Software Foundation. * * This program is distributed in the hope that it would be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "libdlm.h" #define LKM_IVMODE -1 #define MAX_CLIENTS 4 #define MAX_LOCKS 32 #define MAX_RESOURCES 32 static dlm_lshandle_t *dh; static int libdlm_fd; static uint32_t iterations = 0; static int maxn = MAX_LOCKS; static int maxr = MAX_RESOURCES; static int openclose = 0; static int quiet = 0; static int timewarn = 0; static uint64_t our_xid = 0; static uint32_t ast_count = 0; struct client { int fd; char type[32]; }; static int client_size = MAX_CLIENTS; static struct client client[MAX_CLIENTS]; static struct pollfd pollfd[MAX_CLIENTS]; enum { Op_request = 1, Op_convert, Op_unlock, Op_cancel, }; struct lk { int id; int locked_stable; int unlocked_stable; int wait_request; int wait_convert; int wait_unlock; int wait_cancel; int rqmode; int grmode; int lastop; int last_status; int bast; int cancel_busy; time_t wait_start; struct dlm_lksb lksb; }; static struct lk *locks; #define log_debug(fmt, args...) \ do { \ if (!quiet) \ printf(fmt "\n", ##args); \ } while (0) #define log_error(fmt, args...) \ do { \ printf("ERROR " fmt "\n", ##args); \ exit(-1); \ } while (0) static int rand_int(int a, int b) { return a + (int) (((float)(b - a + 1)) * random() / (RAND_MAX+1.0)); } static const char *status_str(int status) { static char sts_str[8]; switch (status) { case 0: return "0 "; case EUNLOCK: return "EUNLOCK"; case ECANCEL: return "ECANCEL"; case EAGAIN: return "EAGAIN "; case EBUSY: return "EBUSY "; case ETIMEDOUT: return "ETIMEDO"; case EDEADLK: return "EDEADLK"; default: snprintf(sts_str, 8, "%8x", status); return sts_str; } } static const char *op_str(int op) { switch (op) { case Op_request: return "request"; case Op_convert: return "convert"; case Op_unlock: return "unlock"; case Op_cancel: return "cancel"; default: return "unknown"; } } static struct lk *get_lock(int i) { if (i < 0) return NULL; if (i >= maxn) return NULL; return &locks[i]; } static void dump(void) { struct lk *lk; int i; for (i = 0; i < maxn; i++) { lk = get_lock(i); log_debug("lk %03u id %08x wait r%d c%d u%d c%d lastop %s gr %d rq %d\n", i, lk->lksb.sb_lkid, lk->wait_request, lk->wait_convert, lk->wait_unlock, lk->wait_cancel, op_str(lk->lastop), lk->grmode, lk->rqmode); } } static void bastfn(void *arg) { struct lk *lk = arg; lk->bast = 1; ast_count++; log_debug("lk %03u bast %04u", lk->id, ast_count); } static void astfn(void *arg) { struct lk *lk = arg; int status = lk->lksb.sb_status; int i = lk->id; ast_count++; log_debug("lk %03u cast %04u %s %08x " "wait r%d c%d u%d c%d " "gr %2d rq %2d last %s", i, ast_count, status_str(status), lk->lksb.sb_lkid, lk->wait_request, lk->wait_convert, lk->wait_unlock, lk->wait_cancel, lk->grmode, lk->rqmode, op_str(lk->lastop)); switch (status) { case 0: case EAGAIN: if (lk->wait_request || lk->wait_convert) { if (lk->wait_request && (status == EAGAIN)) { lk->locked_stable = 0; lk->unlocked_stable = 1; } else { lk->locked_stable = 1; lk->unlocked_stable = 0; } lk->wait_request = 0; lk->wait_convert = 0; if (!status) { lk->grmode = lk->rqmode; lk->rqmode = LKM_IVMODE; } else { lk->rqmode = LKM_IVMODE; } } else { /* wait state error */ log_error("lk %03u cast %04u %s %08x " "wait r%d c%d u%d c%d " "gr %2d rq %2d last %s wait state error 1", i, ast_count, status_str(status), lk->lksb.sb_lkid, lk->wait_request, lk->wait_convert, lk->wait_unlock, lk->wait_cancel, lk->grmode, lk->rqmode, op_str(lk->lastop)); } if (lk->wait_cancel) { lk->wait_cancel = 0; } if (lk->wait_unlock) { /* wait state error */ log_error("lk %03u cast %04u %s %08x " "wait r%d c%d u%d c%d " "gr %2d rq %2d last %s wait state error 2", i, ast_count, status_str(status), lk->lksb.sb_lkid, lk->wait_request, lk->wait_convert, lk->wait_unlock, lk->wait_cancel, lk->grmode, lk->rqmode, op_str(lk->lastop)); } break; case EUNLOCK: if (lk->wait_unlock) { lk->locked_stable = 0; lk->unlocked_stable = 1; lk->wait_unlock = 0; lk->grmode = LKM_IVMODE; lk->bast = 0; } else { /* wait state error */ log_error("lk %03u cast %04u %s %08x " "wait r%d c%d u%d c%d " "gr %2d rq %2d last %s wait state error 3", i, ast_count, status_str(status), lk->lksb.sb_lkid, lk->wait_request, lk->wait_convert, lk->wait_unlock, lk->wait_cancel, lk->grmode, lk->rqmode, op_str(lk->lastop)); } if (lk->wait_request || lk->wait_convert || lk->wait_cancel) { /* wait state error */ log_error("lk %03u cast %04u %s %08x " "wait r%d c%d u%d c%d " "gr %2d rq %2d last %s wait state error 4", i, ast_count, status_str(status), lk->lksb.sb_lkid, lk->wait_request, lk->wait_convert, lk->wait_unlock, lk->wait_cancel, lk->grmode, lk->rqmode, op_str(lk->lastop)); } break; case ECANCEL: if (lk->wait_cancel) { if (lk->grmode > LKM_IVMODE) { lk->locked_stable = 1; lk->unlocked_stable = 0; } else { lk->locked_stable = 0; lk->unlocked_stable = 1; } lk->wait_cancel = 0; lk->rqmode = LKM_IVMODE; } else { /* wait state error */ log_error("lk %03u cast %04u %s %08x " "wait r%d c%d u%d c%d " "gr %2d rq %2d last %s wait state error 5", i, ast_count, status_str(status), lk->lksb.sb_lkid, lk->wait_request, lk->wait_convert, lk->wait_unlock, lk->wait_cancel, lk->grmode, lk->rqmode, op_str(lk->lastop)); } if (lk->wait_request || lk->wait_convert) { lk->wait_request = 0; lk->wait_convert = 0; } if (lk->wait_unlock) { /* wait state error */ log_error("lk %03u cast %04u %s %08x " "wait r%d c%d u%d c%d " "gr %2d rq %2d last %s wait state error 6", i, ast_count, status_str(status), lk->lksb.sb_lkid, lk->wait_request, lk->wait_convert, lk->wait_unlock, lk->wait_cancel, lk->grmode, lk->rqmode, op_str(lk->lastop)); } break; case ETIMEDOUT: log_error("lk %03u cast %04u %s %08x not using timeouts", i, ast_count, status_str(status), lk->lksb.sb_lkid); break; case EDEADLK: if (lk->wait_convert) { lk->locked_stable = 1; lk->unlocked_stable = 0; lk->wait_convert = 0; lk->rqmode = LKM_IVMODE; } else { /* wait state error */ log_error("lk %03u cast %04u %s %08x " "wait r%d c%d u%d c%d " "gr %2d rq %2d last %s wait state error 7", i, ast_count, status_str(status), lk->lksb.sb_lkid, lk->wait_request, lk->wait_convert, lk->wait_unlock, lk->wait_cancel, lk->grmode, lk->rqmode, op_str(lk->lastop)); } if (lk->wait_request || lk->wait_unlock || lk->wait_cancel) { /* wait state error */ log_error("lk %03u cast %04u %s %08x " "wait r%d c%d u%d c%d " "gr %2d rq %2d last %s wait state error 8", i, ast_count, status_str(status), lk->lksb.sb_lkid, lk->wait_request, lk->wait_convert, lk->wait_unlock, lk->wait_cancel, lk->grmode, lk->rqmode, op_str(lk->lastop)); } break; default: log_error("lk %03u cast %04u %d %08x error unexpected status", i, ast_count, status, lk->lksb.sb_lkid); }; } /* FIXME: ran two dlm_load's on four nodes over the weekend, and one instance of dlm_load hit: lk 007 unlock lk 007 cast 3000665 EUNLOCK 022c0912 wait r0 c0 u1 c0 gr 2 rq -1 last unlock lk 007 request mode 2 noqueue 1 lk 007 cast 3000705 EAGAIN 00680909 wait r1 c0 u0 c0 gr -1 rq 2 last request lk 007 request mode 0 noqueue 1 lk 007 cast 3000732 EAGAIN 017b090e wait r1 c0 u0 c0 gr -1 rq 0 last request lk 007 request mode 6 noqueue 0 ERROR lk 007 request errno 22 wait r0 c0 u0 c0 lastop request */ static int do_request(struct lk *lk, int mode, int noqueue) { char name[DLM_RESNAME_MAXLEN]; uint64_t *timeout_arg = NULL; uint32_t flags = 0; int i = lk->id; int rv; if (noqueue) flags |= LKF_NOQUEUE; memset(name, 0, sizeof(name)); snprintf(name, sizeof(name), "test%d", (i % maxr)); log_debug("lk %03u request mode %d noqueue %d", i, mode, noqueue); rv = dlm_ls_lockx(dh, mode, &lk->lksb, flags, name, strlen(name), 0, astfn, (void *)lk, bastfn, &our_xid, timeout_arg); if (rv) { log_error("lk %03u request errno %d wait r%d c%d u%d c%d lastop %s", i, errno, lk->wait_request, lk->wait_convert, lk->wait_unlock, lk->wait_cancel, op_str(lk->lastop)); } return rv; } static int do_convert(struct lk *lk, int mode, int noqueue) { char name[DLM_RESNAME_MAXLEN]; uint64_t *timeout_arg = NULL; uint32_t flags = LKF_CONVERT; int i = lk->id; int rv; if (noqueue) flags |= LKF_NOQUEUE; memset(name, 0, sizeof(name)); snprintf(name, sizeof(name), "test%d", (i % maxr)); log_debug("lk %03u convert mode %d noqueue %d", i, mode, noqueue); rv = dlm_ls_lockx(dh, mode, &lk->lksb, flags, name, strlen(name), 0, astfn, (void *)lk, bastfn, &our_xid, timeout_arg); /* Unfortunately we don't have a way to know when the dlm will be done dealing with a cancel and won't return -EBUSY for ops. (We don't get a callback for the cancel when got a callback for the original op and the cancel did nothing.) */ if (rv == -1 && errno == EBUSY && lk->lastop == Op_cancel) { lk->cancel_busy++; log_debug("lk %03u convert cancel_busy %d", lk->cancel_busy); goto out; } else { lk->cancel_busy = 0; } if (rv) { log_error("lk %03u convert errno %d wait r%d c%d u%d c%d lastop %s", i, errno, lk->wait_request, lk->wait_convert, lk->wait_unlock, lk->wait_cancel, op_str(lk->lastop)); } out: return rv; } static int do_unlock(struct lk *lk) { uint32_t lkid = lk->lksb.sb_lkid; uint32_t flags = 0; int i = lk->id; int rv; log_debug("lk %03u unlock", i); rv = dlm_ls_unlock(dh, lkid, flags, &lk->lksb, lk); /* See comment above. */ if (rv == -1 && errno == EBUSY && lk->lastop == Op_cancel) { lk->cancel_busy++; log_debug("lk %03u unlock cancel_busy %d", lk->cancel_busy); goto out; } else { lk->cancel_busy = 0; } if (rv) { log_error("lk %03u unlock errno %d wait r%d c%d u%d c%d lastop %s", i, errno, lk->wait_request, lk->wait_convert, lk->wait_unlock, lk->wait_cancel, op_str(lk->lastop)); } out: return rv; } /* The op following a cancel can return -EBUSY. I don't think there is any way to know in this case when we can do the next op without getting EBUSY back. See lk->cancel_busy. Without the cancel_busy check we get this error: lk 025 request mode 4 noqueue 0 lk 025 busy locked 0 unlocked 0 wait r1 c0 u0 c0 lk 025 cancel lk 025 busy locked 0 unlocked 0 wait r1 c0 u0 c1 lk 025 cast 0706 0 01060001 wait r1 c0 u0 c1 gr -1 rq 4 last cancel lk 025 bast 0707 lk 025 convert mode 0 noqueue 0 ERROR lk 025 convert errno 16 wait r0 c0 u0 c0 lastop cancel */ static int do_cancel(struct lk *lk) { uint32_t lkid = lk->lksb.sb_lkid; uint32_t flags = LKF_CANCEL; int i = lk->id; int rv; log_debug("lk %03u cancel", i); rv = dlm_ls_unlock(dh, lkid, flags, &lk->lksb, lk); if (rv) { log_error("lk %03u cancel errno %d wait r%d c%d u%d c%d lastop %s", i, errno, lk->wait_request, lk->wait_convert, lk->wait_unlock, lk->wait_cancel, op_str(lk->lastop)); } return rv; } static void loop(void) { struct lk *lk; uint32_t n = 0; int locknum, noqueue, rqmode, rv; while (1) { dlm_dispatch(libdlm_fd); n++; if (iterations && n == iterations) break; if (!(n % 10)) usleep(200000); locknum = rand_int(0, maxn - 1); noqueue = rand_int(0, 1); rqmode = rand_int(0, LKM_EXMODE); lk = get_lock(locknum); if (!lk) continue; if (lk->unlocked_stable) { rv = do_request(lk, rqmode, noqueue); if (!rv) { lk->lastop = Op_request; lk->rqmode = rqmode; lk->wait_start = time(NULL); lk->wait_request = 1; lk->locked_stable = 0; lk->unlocked_stable = 0; } continue; } if (lk->locked_stable && lk->bast) { if (rand_int(0, 1)) { rv = do_convert(lk, LKM_NLMODE, 0); if (!rv) { lk->lastop = Op_convert; lk->rqmode = LKM_NLMODE; lk->bast = 0; lk->wait_start = time(NULL); lk->wait_convert = 1; lk->locked_stable = 0; lk->unlocked_stable = 0; } } else { rv = do_unlock(lk); if (!rv) { lk->lastop = Op_unlock; lk->rqmode = LKM_IVMODE; lk->bast = 0; lk->wait_unlock = 1; lk->locked_stable = 0; lk->unlocked_stable = 0; } } continue; } if (lk->locked_stable) { if (rand_int(0, 1)) { rv = do_convert(lk, rqmode, noqueue); if (!rv) { lk->lastop = Op_convert; lk->rqmode = rqmode; lk->wait_start = time(NULL); lk->wait_convert = 1; lk->locked_stable = 0; lk->unlocked_stable = 0; } } else { rv = do_unlock(lk); if (!rv) { lk->lastop = Op_unlock; lk->rqmode = LKM_IVMODE; lk->wait_unlock = 1; lk->locked_stable = 0; lk->unlocked_stable = 0; } } continue; } if ((lk->wait_request || lk->wait_convert) && !lk->wait_cancel) { if (time(NULL) - lk->wait_start > 2) { rv = do_cancel(lk); if (!rv) { lk->lastop = Op_cancel; lk->wait_cancel = 1; lk->locked_stable = 0; lk->unlocked_stable = 0; } } } log_debug("lk %03u busy locked %d unlocked %d " "wait r%d c%d u%d c%d", lk->id, lk->locked_stable, lk->unlocked_stable, lk->wait_request, lk->wait_convert, lk->wait_unlock, lk->wait_cancel); } } static int client_add(int fd, int *maxi) { int i; for (i = 0; i < client_size; i++) { if (client[i].fd == -1) { client[i].fd = fd; pollfd[i].fd = fd; pollfd[i].events = POLLIN; if (i > *maxi) *maxi = i; log_debug("client %d fd %d added", i, fd); return i; } } log_error("client add failed"); return -1; } static void client_dead(int ci) { log_debug("client %d fd %d dead", ci, client[ci].fd); close(client[ci].fd); client[ci].fd = -1; pollfd[ci].fd = -1; } static void client_init(void) { int i; for (i = 0; i < client_size; i++) client[i].fd = -1; } static void print_usage(void) { printf("Options:\n"); printf("\n"); printf(" -n The number of locks to work with, default %d\n", MAX_LOCKS); printf(" -r The number of resources to work with, default %d\n", MAX_RESOURCES); printf(" -i Iterations in looping stress test, default 0 is no limit\n"); printf(" -o Open/close existing lockspace\n"); printf(" -q Quiet output, only print errors\n"); } static void decode_arguments(int argc, char **argv) { int cont = 1; int optchar; while (cont) { optchar = getopt(argc, argv, "n:r:i:qoh"); switch (optchar) { case 'n': maxn = atoi(optarg); break; case 'r': maxr = atoi(optarg); break; case 'i': iterations = atoi(optarg); break; case 'o': openclose = 1; break; case 'q': quiet = 1; break; case 'h': print_usage(); exit(EXIT_SUCCESS); break; case 'V': printf("%s (built %s %s)\n", argv[0], __DATE__, __TIME__); exit(EXIT_SUCCESS); break; case ':': case '?': fprintf(stderr, "Please use '-h' for usage.\n"); exit(EXIT_FAILURE); break; case EOF: cont = 0; break; default: fprintf(stderr, "unknown option: %c\n", optchar); exit(EXIT_FAILURE); break; }; } } int main(int argc, char *argv[]) { uint32_t major, minor, patch; struct lk *lk; int i, rv, maxi = 0, quit = 0; srandom(time(NULL)); decode_arguments(argc, argv); if (maxn < maxr) { log_error("number of resources must be >= number of locks"); return -1; } if (maxn % maxr) { log_error("number of locks must be multiple of number of resources"); return -1; } log_debug("maxn = %d", maxn); log_debug("maxr = %d", maxr); log_debug("locks per resource = %d", maxn / maxr); client_init(); locks = malloc(maxn * sizeof(struct lk)); if (!locks) { log_error("no mem for %d locks", maxn); return 0; } memset(locks, 0, sizeof(*locks)); lk = locks; for (i = 0; i < maxn; i++) { lk->id = i; lk->grmode = -1; lk->rqmode = -1; lk->unlocked_stable = 1; lk++; } rv = dlm_kernel_version(&major, &minor, &patch); if (rv < 0) { log_error("can't detect dlm in kernel %d", errno); return -1; } log_debug("dlm kernel version: %u.%u.%u", major, minor, patch); dlm_library_version(&major, &minor, &patch); log_debug("dlm library version: %u.%u.%u", major, minor, patch); if (openclose) { log_debug("dlm_open_lockspace..."); dh = dlm_open_lockspace("test"); if (!dh) { log_error("dlm_open_lockspace error %lu %d", (unsigned long)dh, errno); return -ENOTCONN; } } else { log_debug("dlm_new_lockspace..."); dh = dlm_new_lockspace("test", 0600, timewarn ? DLM_LSFL_TIMEWARN : 0); if (!dh) { log_error("dlm_new_lockspace error %lu %d", (unsigned long)dh, errno); return -ENOTCONN; } } rv = dlm_ls_get_fd(dh); if (rv < 0) { log_error("dlm_ls_get_fd error %d %d", rv, errno); dlm_release_lockspace("test", dh, 1); return rv; } libdlm_fd = rv; client_add(libdlm_fd, &maxi); loop(); if (openclose) { log_debug("dlm_close_lockspace"); rv = dlm_close_lockspace(dh); if (rv < 0) log_error("dlm_close_lockspace error %d %d", rv, errno); } else { log_debug("dlm_release_lockspace"); rv = dlm_release_lockspace("test", dh, 1); if (rv < 0) log_error("dlm_release_lockspace error %d %d", rv, errno); } return 0; }