From aa83b1c3b1bc430af1f2e682a18181488465f87b Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 9 Apr 2010 16:25:13 -0500 Subject: dlm_load: handle EBUSY after cancel Nothing we can do to be certain we won't get EBUSY after cancel sometimes, so don't make it an error. Signed-off-by: David Teigland --- dlm/dlm_load.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'dlm/dlm_load.c') diff --git a/dlm/dlm_load.c b/dlm/dlm_load.c index d54f414..30cf161 100644 --- a/dlm/dlm_load.c +++ b/dlm/dlm_load.c @@ -80,6 +80,7 @@ struct lk { int lastop; int last_status; int bast; + int cancel_busy; time_t wait_start; struct dlm_lksb lksb; }; @@ -428,11 +429,25 @@ static int do_convert(struct lk *lk, int mode, int noqueue) rv = dlm_ls_lockx(dh, mode, &lk->lksb, flags, name, strlen(name), 0, astfn, (void *)lk, bastfn, &our_xid, timeout_arg); + /* Unfortunately we don't have a way to know when the dlm will be + done dealing with a cancel and won't return -EBUSY for ops. + (We don't get a callback for the cancel when got a callback for + the original op and the cancel did nothing.) */ + + if (rv == -1 && errno == EBUSY && lk->lastop == Op_cancel) { + lk->cancel_busy++; + log_debug("lk %03u convert cancel_busy %d", lk->cancel_busy); + goto out; + } else { + lk->cancel_busy = 0; + } + if (rv) { log_error("lk %03u convert errno %d wait r%d c%d u%d c%d lastop %s", i, errno, lk->wait_request, lk->wait_convert, lk->wait_unlock, lk->wait_cancel, op_str(lk->lastop)); } + out: return rv; } @@ -447,14 +462,39 @@ static int do_unlock(struct lk *lk) rv = dlm_ls_unlock(dh, lkid, flags, &lk->lksb, lk); + /* See comment above. */ + + if (rv == -1 && errno == EBUSY && lk->lastop == Op_cancel) { + lk->cancel_busy++; + log_debug("lk %03u unlock cancel_busy %d", lk->cancel_busy); + goto out; + } else { + lk->cancel_busy = 0; + } + if (rv) { log_error("lk %03u unlock errno %d wait r%d c%d u%d c%d lastop %s", i, errno, lk->wait_request, lk->wait_convert, lk->wait_unlock, lk->wait_cancel, op_str(lk->lastop)); } + out: return rv; } +/* The op following a cancel can return -EBUSY. I don't think there is any + way to know in this case when we can do the next op without getting + EBUSY back. See lk->cancel_busy. Without the cancel_busy check we get + this error: + lk 025 request mode 4 noqueue 0 + lk 025 busy locked 0 unlocked 0 wait r1 c0 u0 c0 + lk 025 cancel + lk 025 busy locked 0 unlocked 0 wait r1 c0 u0 c1 + lk 025 cast 0706 0 01060001 wait r1 c0 u0 c1 gr -1 rq 4 last cancel + lk 025 bast 0707 + lk 025 convert mode 0 noqueue 0 + ERROR lk 025 convert errno 16 wait r0 c0 u0 c0 lastop cancel +*/ + static int do_cancel(struct lk *lk) { uint32_t lkid = lk->lksb.sb_lkid; -- cgit