/* Unix SMB/CIFS implementation. byte range locking code Updated to handle range splits/merges. Copyright (C) Andrew Tridgell 1992-2000 Copyright (C) Jeremy Allison 1992-2000 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* This module implements a tdb based byte range locking service, replacing the fcntl() based byte range locking previously used. This allows us to provide the same semantics as NT */ #include "includes.h" #include "system/filesys.h" #include "locking/proto.h" #include "smbd/globals.h" #include "dbwrap/dbwrap.h" #include "dbwrap/dbwrap_open.h" #include "serverid.h" #include "messages.h" #include "util_tdb.h" #undef DBGC_CLASS #define DBGC_CLASS DBGC_LOCKING #define ZERO_ZERO 0 /* The open brlock.tdb database. */ static struct db_context *brlock_db; struct byte_range_lock { struct files_struct *fsp; unsigned int num_locks; bool modified; bool have_read_oplocks; struct lock_struct *lock_data; struct db_record *record; }; /**************************************************************************** Debug info at level 10 for lock struct. ****************************************************************************/ static void print_lock_struct(unsigned int i, const struct lock_struct *pls) { DEBUG(10,("[%u]: smblctx = %llu, tid = %u, pid = %s, ", i, (unsigned long long)pls->context.smblctx, (unsigned int)pls->context.tid, server_id_str(talloc_tos(), &pls->context.pid) )); DEBUG(10,("start = %.0f, size = %.0f, fnum = %llu, %s %s\n", (double)pls->start, (double)pls->size, (unsigned long long)pls->fnum, lock_type_name(pls->lock_type), lock_flav_name(pls->lock_flav) )); } unsigned int brl_num_locks(const struct byte_range_lock *brl) { return brl->num_locks; } struct files_struct *brl_fsp(struct byte_range_lock *brl) { return brl->fsp; } bool brl_have_read_oplocks(const struct byte_range_lock *brl) { return brl->have_read_oplocks; } void brl_set_have_read_oplocks(struct byte_range_lock *brl, bool have_read_oplocks) { DEBUG(10, ("Setting have_read_oplocks to %s\n", have_read_oplocks ? "true" : "false")); SMB_ASSERT(brl->record != NULL); /* otherwise we're readonly */ brl->have_read_oplocks = have_read_oplocks; brl->modified = true; } /**************************************************************************** See if two locking contexts are equal. ****************************************************************************/ static bool brl_same_context(const struct lock_context *ctx1, const struct lock_context *ctx2) { return (serverid_equal(&ctx1->pid, &ctx2->pid) && (ctx1->smblctx == ctx2->smblctx) && (ctx1->tid == ctx2->tid)); } /**************************************************************************** See if lck1 and lck2 overlap. ****************************************************************************/ static bool brl_overlap(const struct lock_struct *lck1, const struct lock_struct *lck2) { /* XXX Remove for Win7 compatibility. */ /* this extra check is not redundant - it copes with locks that go beyond the end of 64 bit file space */ if (lck1->size != 0 && lck1->start == lck2->start && lck1->size == lck2->size) { return True; } if (lck1->start >= (lck2->start+lck2->size) || lck2->start >= (lck1->start+lck1->size)) { return False; } return True; } /**************************************************************************** See if lock2 can be added when lock1 is in place. ****************************************************************************/ static bool brl_conflict(const struct lock_struct *lck1, const struct lock_struct *lck2) { /* Ignore PENDING locks. */ if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type)) return False; /* Read locks never conflict. */ if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) { return False; } /* A READ lock can stack on top of a WRITE lock if they have the same * context & fnum. */ if (lck1->lock_type == WRITE_LOCK && lck2->lock_type == READ_LOCK && brl_same_context(&lck1->context, &lck2->context) && lck1->fnum == lck2->fnum) { return False; } return brl_overlap(lck1, lck2); } /**************************************************************************** See if lock2 can be added when lock1 is in place - when both locks are POSIX flavour. POSIX locks ignore fnum - they only care about dev/ino which we know already match. ****************************************************************************/ static bool brl_conflict_posix(const struct lock_struct *lck1, const struct lock_struct *lck2) { #if defined(DEVELOPER) SMB_ASSERT(lck1->lock_flav == POSIX_LOCK); SMB_ASSERT(lck2->lock_flav == POSIX_LOCK); #endif /* Ignore PENDING locks. */ if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type)) return False; /* Read locks never conflict. */ if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) { return False; } /* Locks on the same context don't conflict. Ignore fnum. */ if (brl_same_context(&lck1->context, &lck2->context)) { return False; } /* One is read, the other write, or the context is different, do they overlap ? */ return brl_overlap(lck1, lck2); } #if ZERO_ZERO static bool brl_conflict1(const struct lock_struct *lck1, const struct lock_struct *lck2) { if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type)) return False; if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) { return False; } if (brl_same_context(&lck1->context, &lck2->context) && lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) { return False; } if (lck2->start == 0 && lck2->size == 0 && lck1->size != 0) { return True; } if (lck1->start >= (lck2->start + lck2->size) || lck2->start >= (lck1->start + lck1->size)) { return False; } return True; } #endif /**************************************************************************** Check to see if this lock conflicts, but ignore our own locks on the same fnum only. This is the read/write lock check code path. This is never used in the POSIX lock case. ****************************************************************************/ static bool brl_conflict_other(const struct lock_struct *lck1, const struct lock_struct *lck2) { if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type)) return False; if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) return False; /* POSIX flavour locks never conflict here - this is only called in the read/write path. */ if (lck1->lock_flav == POSIX_LOCK && lck2->lock_flav == POSIX_LOCK) return False; /* * Incoming WRITE locks conflict with existing READ locks even * if the context is the same. JRA. See LOCKTEST7 in smbtorture. */ if (!(lck2->lock_type == WRITE_LOCK && lck1->lock_type == READ_LOCK)) { if (brl_same_context(&lck1->context, &lck2->context) && lck1->fnum == lck2->fnum) return False; } return brl_overlap(lck1, lck2); } /**************************************************************************** Check if an unlock overlaps a pending lock. ****************************************************************************/ static bool brl_pending_overlap(const struct lock_struct *lock, const struct lock_struct *pend_lock) { if ((lock->start <= pend_lock->start) && (lock->start + lock->size > pend_lock->start)) return True; if ((lock->start >= pend_lock->start) && (lock->start <= pend_lock->start + pend_lock->size)) return True; return False; } /**************************************************************************** Amazingly enough, w2k3 "remembers" whether the last lock failure on a fnum is the same as this one and changes its error code. I wonder if any app depends on this ? ****************************************************************************/ static NTSTATUS brl_lock_failed(files_struct *fsp, const struct lock_struct *lock, bool blocking_lock) { if (lock->start >= 0xEF000000 && (lock->start >> 63) == 0) { /* amazing the little things you learn with a test suite. Locks beyond this offset (as a 64 bit number!) always generate the conflict error code, unless the top bit is set */ if (!blocking_lock) { fsp->last_lock_failure = *lock; } return NT_STATUS_FILE_LOCK_CONFLICT; } if (serverid_equal(&lock->context.pid, &fsp->last_lock_failure.context.pid) && lock->context.tid == fsp->last_lock_failure.context.tid && lock->fnum == fsp->last_lock_failure.fnum && lock->start == fsp->last_lock_failure.start) { return NT_STATUS_FILE_LOCK_CONFLICT; } if (!blocking_lock) { fsp->last_lock_failure = *lock; } return NT_STATUS_LOCK_NOT_GRANTED; } /**************************************************************************** Open up the brlock.tdb database. ****************************************************************************/ void brl_init(bool read_only) { int tdb_flags; if (brlock_db) { return; } tdb_flags = TDB_DEFAULT|TDB_VOLATILE|TDB_CLEAR_IF_FIRST|TDB_INCOMPATIBLE_HASH; if (!lp_clustering()) { /* * We can't use the SEQNUM trick to cache brlock * entries in the clustering case because ctdb seqnum * propagation has a delay. */ tdb_flags |= TDB_SEQNUM; } brlock_db = db_open(NULL, lock_path("brlock.tdb"), lp_open_files_db_hash_size(), tdb_flags, read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644, DBWRAP_LOCK_ORDER_2); if (!brlock_db) { DEBUG(0,("Failed to open byte range locking database %s\n", lock_path("brlock.tdb"))); return; } } /**************************************************************************** Close down the brlock.tdb database. ****************************************************************************/ void brl_shutdown(void) { TALLOC_FREE(brlock_db); } #if ZERO_ZERO /**************************************************************************** Compare two locks for sorting. ****************************************************************************/ static int lock_compare(const struct lock_struct *lck1, const struct lock_struct *lck2) { if (lck1->start != lck2->start) { return (lck1->start - lck2->start); } if (lck2->size != lck1->size) { return ((int)lck1->size - (int)lck2->size); } return 0; } #endif /**************************************************************************** Lock a range of bytes - Windows lock semantics. ****************************************************************************/ NTSTATUS brl_lock_windows_default(struct byte_range_lock *br_lck, struct lock_struct *plock, bool blocking_lock) { unsigned int i; files_struct *fsp = br_lck->fsp; struct lock_struct *locks = br_lck->lock_data; NTSTATUS status; SMB_ASSERT(plock->lock_type != UNLOCK_LOCK); if ((plock->start + plock->size - 1 < plock->start) && plock->size != 0) { return NT_STATUS_INVALID_LOCK_RANGE; } for (i=0; i < br_lck->num_locks; i++) { /* Do any Windows or POSIX locks conflict ? */ if (brl_conflict(&locks[i], plock)) { /* Remember who blocked us. */ plock->context.smblctx = locks[i].context.smblctx; return brl_lock_failed(fsp,plock,blocking_lock); } #if ZERO_ZERO if (plock->start == 0 && plock->size == 0 && locks[i].size == 0) { break; } #endif } if (!IS_PENDING_LOCK(plock->lock_type)) { contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WINDOWS_BRL); } /* We can get the Windows lock, now see if it needs to be mapped into a lower level POSIX one, and if so can we get it ? */ if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(fsp->conn->params)) { int errno_ret; if (!set_posix_lock_windows_flavour(fsp, plock->start, plock->size, plock->lock_type, &plock->context, locks, br_lck->num_locks, &errno_ret)) { /* We don't know who blocked us. */ plock->context.smblctx = 0xFFFFFFFFFFFFFFFFLL; if (errno_ret == EACCES || errno_ret == EAGAIN) { status = NT_STATUS_FILE_LOCK_CONFLICT; goto fail; } else { status = map_nt_error_from_unix(errno); goto fail; } } } /* no conflicts - add it to the list of locks */ locks = talloc_realloc(br_lck, locks, struct lock_struct, (br_lck->num_locks + 1)); if (!locks) { status = NT_STATUS_NO_MEMORY; goto fail; } memcpy(&locks[br_lck->num_locks], plock, sizeof(struct lock_struct)); br_lck->num_locks += 1; br_lck->lock_data = locks; br_lck->modified = True; return NT_STATUS_OK; fail: if (!IS_PENDING_LOCK(plock->lock_type)) { contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WINDOWS_BRL); } return status; } /**************************************************************************** Cope with POSIX range splits and merges. ****************************************************************************/ static unsigned int brlock_posix_split_merge(struct lock_struct *lck_arr, /* Output array. */ struct lock_struct *ex, /* existing lock. */ struct lock_struct *plock) /* proposed lock. */ { bool lock_types_differ = (ex->lock_type != plock->lock_type); /* We can't merge non-conflicting locks on different context - ignore fnum. */ if (!brl_same_context(&ex->context, &plock->context)) { /* Just copy. */ memcpy(&lck_arr[0], ex, sizeof(struct lock_struct)); return 1; } /* We now know we have the same context. */ /* Did we overlap ? */ /********************************************* +---------+ | ex | +---------+ +-------+ | plock | +-------+ OR.... +---------+ | ex | +---------+ **********************************************/ if ( (ex->start > (plock->start + plock->size)) || (plock->start > (ex->start + ex->size))) { /* No overlap with this lock - copy existing. */ memcpy(&lck_arr[0], ex, sizeof(struct lock_struct)); return 1; } /********************************************* +---------------------------+ | ex | +---------------------------+ +---------------------------+ | plock | -> replace with plock. +---------------------------+ OR +---------------+ | ex | +---------------+ +---------------------------+ | plock | -> replace with plock. +---------------------------+ **********************************************/ if ( (ex->start >= plock->start) && (ex->start + ex->size <= plock->start + plock->size) ) { /* Replace - discard existing lock. */ return 0; } /********************************************* Adjacent after. +-------+ | ex | +-------+ +---------------+ | plock | +---------------+ BECOMES.... +---------------+-------+ | plock | ex | - different lock types. +---------------+-------+ OR.... (merge) +-----------------------+ | plock | - same lock type. +-----------------------+ **********************************************/ if (plock->start + plock->size == ex->start) { /* If the lock types are the same, we merge, if different, we add the remainder of the old lock. */ if (lock_types_differ) { /* Add existing. */ memcpy(&lck_arr[0], ex, sizeof(struct lock_struct)); return 1; } else { /* Merge - adjust incoming lock as we may have more * merging to come. */ plock->size += ex->size; return 0; } } /********************************************* Adjacent before. +-------+ | ex | +-------+ +---------------+ | plock | +---------------+ BECOMES.... +-------+---------------+ | ex | plock | - different lock types +-------+---------------+ OR.... (merge) +-----------------------+ | plock | - same lock type. +-----------------------+ **********************************************/ if (ex->start + ex->size == plock->start) { /* If the lock types are the same, we merge, if different, we add the existing lock. */ if (lock_types_differ) { memcpy(&lck_arr[0], ex, sizeof(struct lock_struct)); return 1; } else { /* Merge - adjust incoming lock as we may have more * merging to come. */ plock->start = ex->start; plock->size += ex->size; return 0; } } /********************************************* Overlap after. +-----------------------+ | ex | +-----------------------+ +---------------+ | plock | +---------------+ OR +----------------+ | ex | +----------------+ +---------------+ | plock | +---------------+ BECOMES.... +---------------+-------+ | plock | ex | - different lock types. +---------------+-------+ OR.... (merge) +-----------------------+ | plock | - same lock type. +-----------------------+ **********************************************/ if ( (ex->start >= plock->start) && (ex->start <= plock->start + plock->size) && (ex->start + ex->size > plock->start + plock->size) ) { /* If the lock types are the same, we merge, if different, we add the remainder of the old lock. */ if (lock_types_differ) { /* Add remaining existing. */ memcpy(&lck_arr[0], ex, sizeof(struct lock_struct)); /* Adjust existing start and size. */ lck_arr[0].start = plock->start + plock->size; lck_arr[0].size = (ex->start + ex->size) - (plock->start + plock->size); return 1; } else { /* Merge - adjust incoming lock as we may have more * merging to come. */ plock->size += (ex->start + ex->size) - (plock->start + plock->size); return 0; } } /********************************************* Overlap before. +-----------------------+ | ex | +-----------------------+ +---------------+ | plock | +---------------+ OR +-------------+ | ex | +-------------+ +---------------+ | plock | +---------------+ BECOMES.... +-------+---------------+ | ex | plock | - different lock types +-------+---------------+ OR.... (merge) +-----------------------+ | plock | - same lock type. +-----------------------+ **********************************************/ if ( (ex->start < plock->start) && (ex->start + ex->size >= plock->start) && (ex->start + ex->size <= plock->start + plock->size) ) { /* If the lock types are the same, we merge, if different, we add the truncated old lock. */ if (lock_types_differ) { memcpy(&lck_arr[0], ex, sizeof(struct lock_struct)); /* Adjust existing size. */ lck_arr[0].size = plock->start - ex->start; return 1; } else { /* Merge - adjust incoming lock as we may have more * merging to come. MUST ADJUST plock SIZE FIRST ! */ plock->size += (plock->start - ex->start); plock->start = ex->start; return 0; } } /********************************************* Complete overlap. +---------------------------+ | ex | +---------------------------+ +---------+ | plock | +---------+ BECOMES..... +-------+---------+---------+ | ex | plock | ex | - different lock types. +-------+---------+---------+ OR +---------------------------+ | plock | - same lock type. +---------------------------+ **********************************************/ if ( (ex->start < plock->start) && (ex->start + ex->size > plock->start + plock->size) ) { if (lock_types_differ) { /* We have to split ex into two locks here. */ memcpy(&lck_arr[0], ex, sizeof(struct lock_struct)); memcpy(&lck_arr[1], ex, sizeof(struct lock_struct)); /* Adjust first existing size. */ lck_arr[0].size = plock->start - ex->start; /* Adjust second existing start and size. */ lck_arr[1].start = plock->start + plock->size; lck_arr[1].size = (ex->start + ex->size) - (plock->start + plock->size); return 2; } else { /* Just eat the existing locks, merge them into plock. */ plock->start = ex->start; plock->size = ex->size; return 0; } } /* Never get here. */ smb_panic("brlock_posix_split_merge"); /* Notreached. */ /* Keep some compilers happy. */ return 0; } /**************************************************************************** Lock a range of bytes - POSIX lock semantics. We must cope with range splits and merges. ****************************************************************************/ static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx, struct byte_range_lock *br_lck, struct lock_struct *plock) { unsigned int i, count, posix_count; struct lock_struct *locks = br_lck->lock_data; struct lock_struct *tp; bool signal_pending_read = False; bool break_oplocks = false; NTSTATUS status; /* No zero-zero locks for POSIX. */ if (plock->start == 0 && plock->size == 0) { return NT_STATUS_INVALID_PARAMETER; } /* Don't allow 64-bit lock wrap. */ if (plock->start + plock->size - 1 < plock->start) { return NT_STATUS_INVALID_PARAMETER; } /* The worst case scenario here is we have to split an existing POSIX lock range into two, and add our lock, so we need at most 2 more entries. */ tp = talloc_array(br_lck, struct lock_struct, br_lck->num_locks + 2); if (!tp) { return NT_STATUS_NO_MEMORY; } count = posix_count = 0; for (i=0; i < br_lck->num_locks; i++) { struct lock_struct *curr_lock = &locks[i]; /* If we have a pending read lock, a lock downgrade should trigger a lock re-evaluation. */ if (curr_lock->lock_type == PENDING_READ_LOCK && brl_pending_overlap(plock, curr_lock)) { signal_pending_read = True; } if (curr_lock->lock_flav == WINDOWS_LOCK) { /* Do any Windows flavour locks conflict ? */ if (brl_conflict(curr_lock, plock)) { /* No games with error messages. */ TALLOC_FREE(tp); /* Remember who blocked us. */ plock->context.smblctx = curr_lock->context.smblctx; return NT_STATUS_FILE_LOCK_CONFLICT; } /* Just copy the Windows lock into the new array. */ memcpy(&tp[count], curr_lock, sizeof(struct lock_struct)); count++; } else { unsigned int tmp_count = 0; /* POSIX conflict semantics are different. */ if (brl_conflict_posix(curr_lock, plock)) { /* Can't block ourselves with POSIX locks. */ /* No games with error messages. */ TALLOC_FREE(tp); /* Remember who blocked us. */ plock->context.smblctx = curr_lock->context.smblctx; return NT_STATUS_FILE_LOCK_CONFLICT; } /* Work out overlaps. */ tmp_count += brlock_posix_split_merge(&tp[count], curr_lock, plock); posix_count += tmp_count; count += tmp_count; } } /* * Break oplocks while we hold a brl. Since lock() and unlock() calls * are not symetric with POSIX semantics, we cannot guarantee our * contend_level2_oplocks_begin/end calls will be acquired and * released one-for-one as with Windows semantics. Therefore we only * call contend_level2_oplocks_begin if this is the first POSIX brl on * the file. */ break_oplocks = (!IS_PENDING_LOCK(plock->lock_type) && posix_count == 0); if (break_oplocks) { contend_level2_oplocks_begin(br_lck->fsp, LEVEL2_CONTEND_POSIX_BRL); } /* Try and add the lock in order, sorted by lock start. */ for (i=0; i < count; i++) { struct lock_struct *curr_lock = &tp[i]; if (curr_lock->start <= plock->start) { continue; } } if (i < count) { memmove(&tp[i+1], &tp[i], (count - i)*sizeof(struct lock_struct)); } memcpy(&tp[i], plock, sizeof(struct lock_struct)); count++; /* We can get the POSIX lock, now see if it needs to be mapped into a lower level POSIX one, and if so can we get it ? */ if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(br_lck->fsp->conn->params)) { int errno_ret; /* The lower layer just needs to attempt to get the system POSIX lock. We've weeded out any conflicts above. */ if (!set_posix_lock_posix_flavour(br_lck->fsp, plock->start, plock->size, plock->lock_type, &errno_ret)) { /* We don't know who blocked us. */ plock->context.smblctx = 0xFFFFFFFFFFFFFFFFLL; if (errno_ret == EACCES || errno_ret == EAGAIN) { TALLOC_FREE(tp); status = NT_STATUS_FILE_LOCK_CONFLICT; goto fail; } else { TALLOC_FREE(tp); status = map_nt_error_from_unix(errno); goto fail; } } } /* If we didn't use all the allocated size, * Realloc so we don't leak entries per lock call. */ if (count < br_lck->num_locks + 2) { tp = talloc_realloc(br_lck, tp, struct lock_struct, count); if (!tp) { status = NT_STATUS_NO_MEMORY; goto fail; } } br_lck->num_locks = count; TALLOC_FREE(br_lck->lock_data); br_lck->lock_data = tp; locks = tp; br_lck->modified = True; /* A successful downgrade from write to read lock can trigger a lock re-evalutation where waiting readers can now proceed. */ if (signal_pending_read) { /* Send unlock messages to any pending read waiters that overlap. */ for (i=0; i < br_lck->num_locks; i++) { struct lock_struct *pend_lock = &locks[i]; /* Ignore non-pending locks. */ if (!IS_PENDING_LOCK(pend_lock->lock_type)) { continue; } if (pend_lock->lock_type == PENDING_READ_LOCK && brl_pending_overlap(plock, pend_lock)) { DEBUG(10,("brl_lock_posix: sending unlock message to pid %s\n", procid_str_static(&pend_lock->context.pid ))); messaging_send(msg_ctx, pend_lock->context.pid, MSG_SMB_UNLOCK, &data_blob_null); } } } return NT_STATUS_OK; fail: if (break_oplocks) { contend_level2_oplocks_end(br_lck->fsp, LEVEL2_CONTEND_POSIX_BRL); } return status; } NTSTATUS smb_vfs_call_brl_lock_windows(struct vfs_handle_struct *handle, struct byte_range_lock *br_lck, struct lock_struct *plock, bool blocking_lock, struct blocking_lock_record *blr) { VFS_FIND(brl_lock_windows); return handle->fns->brl_lock_windows_fn(handle, br_lck, plock, blocking_lock, blr); } /**************************************************************************** Lock a range of bytes. ****************************************************************************/ NTSTATUS brl_lock(struct messaging_context *msg_ctx, struct byte_range_lock *br_lck, uint64_t smblctx, struct server_id pid, br_off start, br_off size, enum brl_type lock_type, enum brl_flavour lock_flav, bool blocking_lock, uint64_t *psmblctx, struct blocking_lock_record *blr) { NTSTATUS ret; struct lock_struct lock; #if !ZERO_ZERO if (start == 0 && size == 0) { DEBUG(0,("client sent 0/0 lock - please report this\n")); } #endif #ifdef DEVELOPER /* Quieten valgrind on test. */ ZERO_STRUCT(lock); #endif lock.context.smblctx = smblctx; lock.context.pid = pid; lock.context.tid = br_lck->fsp->conn->cnum; lock.start = start; lock.size = size; lock.fnum = br_lck->fsp->fnum; lock.lock_type = lock_type; lock.lock_flav = lock_flav; if (lock_flav == WINDOWS_LOCK) { ret = SMB_VFS_BRL_LOCK_WINDOWS(br_lck->fsp->conn, br_lck, &lock, blocking_lock, blr); } else { ret = brl_lock_posix(msg_ctx, br_lck, &lock); } #if ZERO_ZERO /* sort the lock list */ TYPESAFE_QSORT(br_lck->lock_data, (size_t)br_lck->num_locks, lock_compare); #endif /* If we're returning an error, return who blocked us. */ if (!NT_STATUS_IS_OK(ret) && psmblctx) { *psmblctx = lock.context.smblctx; } return ret; } /**************************************************************************** Unlock a range of bytes - Windows semantics. ****************************************************************************/ bool brl_unlock_windows_default(struct messaging_context *msg_ctx, struct byte_range_lock *br_lck, const struct lock_struct *plock) { unsigned int i, j; struct lock_struct *locks = br_lck->lock_data; enum brl_type deleted_lock_type = READ_LOCK; /* shut the compiler up.... */ SMB_ASSERT(plock->lock_type == UNLOCK_LOCK); #if ZERO_ZERO /* Delete write locks by preference... The lock list is sorted in the zero zero case. */ for (i = 0; i < br_lck->num_locks; i++) { struct lock_struct *lock = &locks[i]; if (lock->lock_type == WRITE_LOCK && brl_same_context(&lock->context, &plock->context) && lock->fnum == plock->fnum && lock->lock_flav == WINDOWS_LOCK && lock->start == plock->start && lock->size == plock->size) { /* found it - delete it */ deleted_lock_type = lock->lock_type; break; } } if (i != br_lck->num_locks) { /* We found it - don't search again. */ goto unlock_continue; } #endif for (i = 0; i < br_lck->num_locks; i++) { struct lock_struct *lock = &locks[i]; if (IS_PENDING_LOCK(lock->lock_type)) { continue; } /* Only remove our own locks that match in start, size, and flavour. */ if (brl_same_context(&lock->context, &plock->context) && lock->fnum == plock->fnum && lock->lock_flav == WINDOWS_LOCK && lock->start == plock->start && lock->size == plock->size ) { deleted_lock_type = lock->lock_type; break; } } if (i == br_lck->num_locks) { /* we didn't find it */ return False; } #if ZERO_ZERO unlock_continue: #endif /* Actually delete the lock. */ if (i < br_lck->num_locks - 1) { memmove(&locks[i], &locks[i+1], sizeof(*locks)*((br_lck->num_locks-1) - i)); } br_lck->num_locks -= 1; br_lck->modified = True; /* Unlock the underlying POSIX regions. */ if(lp_posix_locking(br_lck->fsp->conn->params)) { release_posix_lock_windows_flavour(br_lck->fsp, plock->start, plock->size, deleted_lock_type, &plock->context, locks, br_lck->num_locks); } /* Send unlock messages to any pending waiters that overlap. */ for (j=0; j < br_lck->num_locks; j++) { struct lock_struct *pend_lock = &locks[j]; /* Ignore non-pending locks. */ if (!IS_PENDING_LOCK(pend_lock->lock_type)) { continue; } /* We could send specific lock info here... */ if (brl_pending_overlap(plock, pend_lock)) { DEBUG(10,("brl_unlock: sending unlock message to pid %s\n", procid_str_static(&pend_lock->context.pid ))); messaging_send(msg_ctx, pend_lock->context.pid, MSG_SMB_UNLOCK, &data_blob_null); } } contend_level2_oplocks_end(br_lck->fsp, LEVEL2_CONTEND_WINDOWS_BRL); return True; } /**************************************************************************** Unlock a range of bytes - POSIX semantics. ****************************************************************************/ static bool brl_unlock_posix(struct messaging_context *msg_ctx, struct byte_range_lock *br_lck, struct lock_struct *plock) { unsigned int i, j, count; struct lock_struct *tp; struct lock_struct *locks = br_lck->lock_data; bool overlap_found = False; /* No zero-zero locks for POSIX. */ if (plock->start == 0 && plock->size == 0) { return False; } /* Don't allow 64-bit lock wrap. */ if (plock->start + plock->size < plock->start || plock->start + plock->size < plock->size) { DEBUG(10,("brl_unlock_posix: lock wrap\n")); return False; } /* The worst case scenario here is we have to split an existing POSIX lock range into two, so we need at most 1 more entry. */ tp = talloc_array(br_lck, struct lock_struct, br_lck->num_locks + 1); if (!tp) { DEBUG(10,("brl_unlock_posix: malloc fail\n")); return False; } count = 0; for (i = 0; i < br_lck->num_locks; i++) { struct lock_struct *lock = &locks[i]; unsigned int tmp_count; /* Only remove our own locks - ignore fnum. */ if (IS_PENDING_LOCK(lock->lock_type) || !brl_same_context(&lock->context, &plock->context)) { memcpy(&tp[count], lock, sizeof(struct lock_struct)); count++; continue; } if (lock->lock_flav == WINDOWS_LOCK) { /* Do any Windows flavour locks conflict ? */ if (brl_conflict(lock, plock)) { TALLOC_FREE(tp); return false; } /* Just copy the Windows lock into the new array. */ memcpy(&tp[count], lock, sizeof(struct lock_struct)); count++; continue; } /* Work out overlaps. */ tmp_count = brlock_posix_split_merge(&tp[count], lock, plock); if (tmp_count == 0) { /* plock overlapped the existing lock completely, or replaced it. Don't copy the existing lock. */ overlap_found = true; } else if (tmp_count == 1) { /* Either no overlap, (simple copy of existing lock) or * an overlap of an existing lock. */ /* If the lock changed size, we had an overlap. */ if (tp[count].size != lock->size) { overlap_found = true; } count += tmp_count; } else if (tmp_count == 2) { /* We split a lock range in two. */ overlap_found = true; count += tmp_count; /* Optimisation... */ /* We know we're finished here as we can't overlap any more POSIX locks. Copy the rest of the lock array. */ if (i < br_lck->num_locks - 1) { memcpy(&tp[count], &locks[i+1], sizeof(*locks)*((br_lck->num_locks-1) - i)); count += ((br_lck->num_locks-1) - i); } break; } } if (!overlap_found) { /* Just ignore - no change. */ TALLOC_FREE(tp); DEBUG(10,("brl_unlock_posix: No overlap - unlocked.\n")); return True; } /* Unlock any POSIX regions. */ if(lp_posix_locking(br_lck->fsp->conn->params)) { release_posix_lock_posix_flavour(br_lck->fsp, plock->start, plock->size, &plock->context, tp, count); } /* Realloc so we don't leak entries per unlock call. */ if (count) { tp = talloc_realloc(br_lck, tp, struct lock_struct, count); if (!tp) { DEBUG(10,("brl_unlock_posix: realloc fail\n")); return False; } } else { /* We deleted the last lock. */ TALLOC_FREE(tp); tp = NULL; } contend_level2_oplocks_end(br_lck->fsp, LEVEL2_CONTEND_POSIX_BRL); br_lck->num_locks = count; TALLOC_FREE(br_lck->lock_data); locks = tp; br_lck->lock_data = tp; br_lck->modified = True; /* Send unlock messages to any pending waiters that overlap. */ for (j=0; j < br_lck->num_locks; j++) { struct lock_struct *pend_lock = &locks[j]; /* Ignore non-pending locks. */ if (!IS_PENDING_LOCK(pend_lock->lock_type)) { continue; } /* We could send specific lock info here... */ if (brl_pending_overlap(plock, pend_lock)) { DEBUG(10,("brl_unlock: sending unlock message to pid %s\n", procid_str_static(&pend_lock->context.pid ))); messaging_send(msg_ctx, pend_lock->context.pid, MSG_SMB_UNLOCK, &data_blob_null); } } return True; } bool smb_vfs_call_brl_unlock_windows(struct vfs_handle_struct *handle, struct messaging_context *msg_ctx, struct byte_range_lock *br_lck, const struct lock_struct *plock) { VFS_FIND(brl_unlock_windows); return handle->fns->brl_unlock_windows_fn(handle, msg_ctx, br_lck, plock); } /**************************************************************************** Unlock a range of bytes. ****************************************************************************/ bool brl_unlock(struct messaging_context *msg_ctx, struct byte_range_lock *br_lck, uint64_t smblctx, struct server_id pid, br_off start, br_off size, enum brl_flavour lock_flav) { struct lock_struct lock; lock.context.smblctx = smblctx; lock.context.pid = pid; lock.context.tid = br_lck->fsp->conn->cnum; lock.start = start; lock.size = size; lock.fnum = br_lck->fsp->fnum; lock.lock_type = UNLOCK_LOCK; lock.lock_flav = lock_flav; if (lock_flav == WINDOWS_LOCK) { return SMB_VFS_BRL_UNLOCK_WINDOWS(br_lck->fsp->conn, msg_ctx, br_lck, &lock); } else { return brl_unlock_posix(msg_ctx, br_lck, &lock); } } /**************************************************************************** Test if we could add a lock if we wanted to. Returns True if the region required is currently unlocked, False if locked. ****************************************************************************/ bool brl_locktest(struct byte_range_lock *br_lck, uint64_t smblctx, struct server_id pid, br_off start, br_off size, enum brl_type lock_type, enum brl_flavour lock_flav) { bool ret = True; unsigned int i; struct lock_struct lock; const struct lock_struct *locks = br_lck->lock_data; files_struct *fsp = br_lck->fsp; lock.context.smblctx = smblctx; lock.context.pid = pid; lock.context.tid = br_lck->fsp->conn->cnum; lock.start = start; lock.size = size; lock.fnum = fsp->fnum; lock.lock_type = lock_type; lock.lock_flav = lock_flav; /* Make sure existing locks don't conflict */ for (i=0; i < br_lck->num_locks; i++) { /* * Our own locks don't conflict. */ if (brl_conflict_other(&locks[i], &lock)) { return False; } } /* * There is no lock held by an SMB daemon, check to * see if there is a POSIX lock from a UNIX or NFS process. * This only conflicts with Windows locks, not POSIX locks. */ if(lp_posix_locking(fsp->conn->params) && (lock_flav == WINDOWS_LOCK)) { ret = is_posix_locked(fsp, &start, &size, &lock_type, WINDOWS_LOCK); DEBUG(10,("brl_locktest: posix start=%.0f len=%.0f %s for %s file %s\n", (double)start, (double)size, ret ? "locked" : "unlocked", fsp_fnum_dbg(fsp), fsp_str_dbg(fsp))); /* We need to return the inverse of is_posix_locked. */ ret = !ret; } /* no conflicts - we could have added it */ return ret; } /**************************************************************************** Query for existing locks. ****************************************************************************/ NTSTATUS brl_lockquery(struct byte_range_lock *br_lck, uint64_t *psmblctx, struct server_id pid, br_off *pstart, br_off *psize, enum brl_type *plock_type, enum brl_flavour lock_flav) { unsigned int i; struct lock_struct lock; const struct lock_struct *locks = br_lck->lock_data; files_struct *fsp = br_lck->fsp; lock.context.smblctx = *psmblctx; lock.context.pid = pid; lock.context.tid = br_lck->fsp->conn->cnum; lock.start = *pstart; lock.size = *psize; lock.fnum = fsp->fnum; lock.lock_type = *plock_type; lock.lock_flav = lock_flav; /* Make sure existing locks don't conflict */ for (i=0; i < br_lck->num_locks; i++) { const struct lock_struct *exlock = &locks[i]; bool conflict = False; if (exlock->lock_flav == WINDOWS_LOCK) { conflict = brl_conflict(exlock, &lock); } else { conflict = brl_conflict_posix(exlock, &lock); } if (conflict) { *psmblctx = exlock->context.smblctx; *pstart = exlock->start; *psize = exlock->size; *plock_type = exlock->lock_type; return NT_STATUS_LOCK_NOT_GRANTED; } } /* * There is no lock held by an SMB daemon, check to * see if there is a POSIX lock from a UNIX or NFS process. */ if(lp_posix_locking(fsp->conn->params)) { bool ret = is_posix_locked(fsp, pstart, psize, plock_type, POSIX_LOCK); DEBUG(10,("brl_lockquery: posix start=%.0f len=%.0f %s for %s file %s\n", (double)*pstart, (double)*psize, ret ? "locked" : "unlocked", fsp_fnum_dbg(fsp), fsp_str_dbg(fsp))); if (ret) { /* Hmmm. No clue what to set smblctx to - use -1. */ *psmblctx = 0xFFFFFFFFFFFFFFFFLL; return NT_STATUS_LOCK_NOT_GRANTED; } } return NT_STATUS_OK; } bool smb_vfs_call_brl_cancel_windows(struct vfs_handle_struct *handle, struct byte_range_lock *br_lck, struct lock_struct *plock, struct blocking_lock_record *blr) { VFS_FIND(brl_cancel_windows); return handle->fns->brl_cancel_windows_fn(handle, br_lck, plock, blr); } /**************************************************************************** Remove a particular pending lock. ****************************************************************************/ bool brl_lock_cancel(struct byte_range_lock *br_lck, uint64_t smblctx, struct server_id pid, br_off start, br_off size, enum brl_flavour lock_flav, struct blocking_lock_record *blr) { bool ret; struct lock_struct lock; lock.context.smblctx = smblctx; lock.context.pid = pid; lock.context.tid = br_lck->fsp->conn->cnum; lock.start = start; lock.size = size; lock.fnum = br_lck->fsp->fnum; lock.lock_flav = lock_flav; /* lock.lock_type doesn't matter */ if (lock_flav == WINDOWS_LOCK) { ret = SMB_VFS_BRL_CANCEL_WINDOWS(br_lck->fsp->conn, br_lck, &lock, blr); } else { ret = brl_lock_cancel_default(br_lck, &lock); } return ret; } bool brl_lock_cancel_default(struct byte_range_lock *br_lck, struct lock_struct *plock) { unsigned int i; struct lock_struct *locks = br_lck->lock_data; SMB_ASSERT(plock); for (i = 0; i < br_lck->num_locks; i++) { struct lock_struct *lock = &locks[i]; /* For pending locks we *always* care about the fnum. */ if (brl_same_context(&lock->context, &plock->context) && lock->fnum == plock->fnum && IS_PENDING_LOCK(lock->lock_type) && lock->lock_flav == plock->lock_flav && lock->start == plock->start && lock->size == plock->size) { break; } } if (i == br_lck->num_locks) { /* Didn't find it. */ return False; } if (i < br_lck->num_locks - 1) { /* Found this particular pending lock - delete it */ memmove(&locks[i], &locks[i+1], sizeof(*locks)*((br_lck->num_locks-1) - i)); } br_lck->num_locks -= 1; br_lck->modified = True; return True; } /**************************************************************************** Remove any locks associated with a open file. We return True if this process owns any other Windows locks on this fd and so we should not immediately close the fd. ****************************************************************************/ void brl_close_fnum(struct messaging_context *msg_ctx, struct byte_range_lock *br_lck) { files_struct *fsp = br_lck->fsp; uint32_t tid = fsp->conn->cnum; uint64_t fnum = fsp->fnum; unsigned int i; struct lock_struct *locks = br_lck->lock_data; struct server_id pid = messaging_server_id(fsp->conn->sconn->msg_ctx); struct lock_struct *locks_copy; unsigned int num_locks_copy; /* Copy the current lock array. */ if (br_lck->num_locks) { locks_copy = (struct lock_struct *)talloc_memdup(br_lck, locks, br_lck->num_locks * sizeof(struct lock_struct)); if (!locks_copy) { smb_panic("brl_close_fnum: talloc failed"); } } else { locks_copy = NULL; } num_locks_copy = br_lck->num_locks; for (i=0; i < num_locks_copy; i++) { struct lock_struct *lock = &locks_copy[i]; if (lock->context.tid == tid && serverid_equal(&lock->context.pid, &pid) && (lock->fnum == fnum)) { brl_unlock(msg_ctx, br_lck, lock->context.smblctx, pid, lock->start, lock->size, lock->lock_flav); } } } bool brl_mark_disconnected(struct files_struct *fsp) { uint32_t tid = fsp->conn->cnum; uint64_t smblctx = fsp->op->global->open_persistent_id; uint64_t fnum = fsp->fnum; unsigned int i; struct server_id self = messaging_server_id(fsp->conn->sconn->msg_ctx); struct byte_range_lock *br_lck = NULL; if (!fsp->op->global->durable) { return false; } if (fsp->current_lock_count == 0) { return true; } br_lck = brl_get_locks(talloc_tos(), fsp); if (br_lck == NULL) { return false; } for (i=0; i < br_lck->num_locks; i++) { struct lock_struct *lock = &br_lck->lock_data[i]; /* * as this is a durable handle, we only expect locks * of the current file handle! */ if (lock->context.smblctx != smblctx) { TALLOC_FREE(br_lck); return false; } if (lock->context.tid != tid) { TALLOC_FREE(br_lck); return false; } if (!serverid_equal(&lock->context.pid, &self)) { TALLOC_FREE(br_lck); return false; } if (lock->fnum != fnum) { TALLOC_FREE(br_lck); return false; } server_id_set_disconnected(&lock->context.pid); lock->context.tid = TID_FIELD_INVALID; lock->fnum = FNUM_FIELD_INVALID; } br_lck->modified = true; TALLOC_FREE(br_lck); return true; } bool brl_reconnect_disconnected(struct files_struct *fsp) { uint32_t tid = fsp->conn->cnum; uint64_t smblctx = fsp->op->global->open_persistent_id; uint64_t fnum = fsp->fnum; unsigned int i; struct server_id self = messaging_server_id(fsp->conn->sconn->msg_ctx); struct byte_range_lock *br_lck = NULL; if (!fsp->op->global->durable) { return false; } /* * When reconnecting, we do not want to validate the brlock entries * and thereby remove our own (disconnected) entries but reactivate * them instead. */ fsp->lockdb_clean = true; br_lck = brl_get_locks(talloc_tos(), fsp); if (br_lck == NULL) { return false; } if (br_lck->num_locks == 0) { TALLOC_FREE(br_lck); return true; } for (i=0; i < br_lck->num_locks; i++) { struct lock_struct *lock = &br_lck->lock_data[i]; /* * as this is a durable handle we only expect locks * of the current file handle! */ if (lock->context.smblctx != smblctx) { TALLOC_FREE(br_lck); return false; } if (lock->context.tid != TID_FIELD_INVALID) { TALLOC_FREE(br_lck); return false; } if (!server_id_is_disconnected(&lock->context.pid)) { TALLOC_FREE(br_lck); return false; } if (lock->fnum != FNUM_FIELD_INVALID) { TALLOC_FREE(br_lck); return false; } lock->context.pid = self; lock->context.tid = tid; lock->fnum = fnum; } fsp->current_lock_count = br_lck->num_locks; br_lck->modified = true; TALLOC_FREE(br_lck); return true; } /**************************************************************************** Ensure this set of lock entries is valid. ****************************************************************************/ static bool validate_lock_entries(TALLOC_CTX *mem_ctx, unsigned int *pnum_entries, struct lock_struct **pplocks, bool keep_disconnected) { unsigned int i; unsigned int num_valid_entries = 0; struct lock_struct *locks = *pplocks; TALLOC_CTX *frame; struct server_id *ids; bool *exists; if (*pnum_entries == 0) { return true; } frame = talloc_stackframe(); ids = talloc_array(frame, struct server_id, *pnum_entries); if (ids == NULL) { DEBUG(0, ("validate_lock_entries: " "talloc_array(struct server_id, %u) failed\n", *pnum_entries)); talloc_free(frame); return false; } exists = talloc_array(frame, bool, *pnum_entries); if (exists == NULL) { DEBUG(0, ("validate_lock_entries: " "talloc_array(bool, %u) failed\n", *pnum_entries)); talloc_free(frame); return false; } for (i = 0; i < *pnum_entries; i++) { ids[i] = locks[i].context.pid; } if (!serverids_exist(ids, *pnum_entries, exists)) { DEBUG(3, ("validate_lock_entries: serverids_exists failed\n")); talloc_free(frame); return false; } for (i = 0; i < *pnum_entries; i++) { if (exists[i]) { num_valid_entries++; continue; } if (keep_disconnected && server_id_is_disconnected(&ids[i])) { num_valid_entries++; continue; } /* This process no longer exists - mark this entry as invalid by zeroing it. */ ZERO_STRUCTP(&locks[i]); } TALLOC_FREE(frame); if (num_valid_entries != *pnum_entries) { struct lock_struct *new_lock_data = NULL; if (num_valid_entries) { new_lock_data = talloc_array( mem_ctx, struct lock_struct, num_valid_entries); if (!new_lock_data) { DEBUG(3, ("malloc fail\n")); return False; } num_valid_entries = 0; for (i = 0; i < *pnum_entries; i++) { struct lock_struct *lock_data = &locks[i]; if (lock_data->context.smblctx && lock_data->context.tid) { /* Valid (nonzero) entry - copy it. */ memcpy(&new_lock_data[num_valid_entries], lock_data, sizeof(struct lock_struct)); num_valid_entries++; } } } TALLOC_FREE(*pplocks); *pplocks = new_lock_data; *pnum_entries = num_valid_entries; } return True; } struct brl_forall_cb { void (*fn)(struct file_id id, struct server_id pid, enum brl_type lock_type, enum brl_flavour lock_flav, br_off start, br_off size, void *private_data); void *private_data; }; /**************************************************************************** Traverse the whole database with this function, calling traverse_callback on each lock. ****************************************************************************/ static int brl_traverse_fn(struct db_record *rec, void *state) { struct brl_forall_cb *cb = (struct brl_forall_cb *)state; struct lock_struct *locks; struct file_id *key; unsigned int i; unsigned int num_locks = 0; unsigned int orig_num_locks = 0; TDB_DATA dbkey; TDB_DATA value; dbkey = dbwrap_record_get_key(rec); value = dbwrap_record_get_value(rec); /* In a traverse function we must make a copy of dbuf before modifying it. */ locks = (struct lock_struct *)talloc_memdup( talloc_tos(), value.dptr, value.dsize); if (!locks) { return -1; /* Terminate traversal. */ } key = (struct file_id *)dbkey.dptr; orig_num_locks = num_locks = value.dsize/sizeof(*locks); /* Ensure the lock db is clean of entries from invalid processes. */ if (!validate_lock_entries(talloc_tos(), &num_locks, &locks, true)) { TALLOC_FREE(locks); return -1; /* Terminate traversal */ } if (orig_num_locks != num_locks) { if (num_locks) { TDB_DATA data; data.dptr = (uint8_t *)locks; data.dsize = num_locks*sizeof(struct lock_struct); dbwrap_record_store(rec, data, TDB_REPLACE); } else { dbwrap_record_delete(rec); } } if (cb->fn) { for ( i=0; ifn(*key, locks[i].context.pid, locks[i].lock_type, locks[i].lock_flav, locks[i].start, locks[i].size, cb->private_data); } } TALLOC_FREE(locks); return 0; } /******************************************************************* Call the specified function on each lock in the database. ********************************************************************/ int brl_forall(void (*fn)(struct file_id id, struct server_id pid, enum brl_type lock_type, enum brl_flavour lock_flav, br_off start, br_off size, void *private_data), void *private_data) { struct brl_forall_cb cb; NTSTATUS status; int count = 0; if (!brlock_db) { return 0; } cb.fn = fn; cb.private_data = private_data; status = dbwrap_traverse(brlock_db, brl_traverse_fn, &cb, &count); if (!NT_STATUS_IS_OK(status)) { return -1; } else { return count; } } /******************************************************************* Store a potentially modified set of byte range lock data back into the database. Unlock the record. ********************************************************************/ static void byte_range_lock_flush(struct byte_range_lock *br_lck) { size_t data_len; if (!br_lck->modified) { DEBUG(10, ("br_lck not modified\n")); goto done; } data_len = br_lck->num_locks * sizeof(struct lock_struct); if (br_lck->have_read_oplocks) { data_len += 1; } DEBUG(10, ("data_len=%d\n", (int)data_len)); if (data_len == 0) { /* No locks - delete this entry. */ NTSTATUS status = dbwrap_record_delete(br_lck->record); if (!NT_STATUS_IS_OK(status)) { DEBUG(0, ("delete_rec returned %s\n", nt_errstr(status))); smb_panic("Could not delete byte range lock entry"); } } else { TDB_DATA data; NTSTATUS status; data.dsize = data_len; data.dptr = talloc_array(talloc_tos(), uint8_t, data_len); SMB_ASSERT(data.dptr != NULL); memcpy(data.dptr, br_lck->lock_data, br_lck->num_locks * sizeof(struct lock_struct)); if (br_lck->have_read_oplocks) { data.dptr[data_len-1] = 1; } status = dbwrap_record_store(br_lck->record, data, TDB_REPLACE); TALLOC_FREE(data.dptr); if (!NT_STATUS_IS_OK(status)) { DEBUG(0, ("store returned %s\n", nt_errstr(status))); smb_panic("Could not store byte range mode entry"); } } DEBUG(10, ("seqnum=%d\n", dbwrap_get_seqnum(brlock_db))); done: br_lck->modified = false; TALLOC_FREE(br_lck->record); } static int byte_range_lock_destructor(struct byte_range_lock *br_lck) { byte_range_lock_flush(br_lck); return 0; } /******************************************************************* Fetch a set of byte range lock data from the database. Leave the record locked. TALLOC_FREE(brl) will release the lock in the destructor. ********************************************************************/ struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx, files_struct *fsp) { TDB_DATA key, data; struct byte_range_lock *br_lck = talloc(mem_ctx, struct byte_range_lock); if (br_lck == NULL) { return NULL; } br_lck->fsp = fsp; br_lck->num_locks = 0; br_lck->have_read_oplocks = false; br_lck->modified = False; key.dptr = (uint8 *)&fsp->file_id; key.dsize = sizeof(struct file_id); br_lck->record = dbwrap_fetch_locked(brlock_db, br_lck, key); if (br_lck->record == NULL) { DEBUG(3, ("Could not lock byte range lock entry\n")); TALLOC_FREE(br_lck); return NULL; } data = dbwrap_record_get_value(br_lck->record); br_lck->lock_data = NULL; talloc_set_destructor(br_lck, byte_range_lock_destructor); br_lck->num_locks = data.dsize / sizeof(struct lock_struct); if (br_lck->num_locks != 0) { br_lck->lock_data = talloc_array( br_lck, struct lock_struct, br_lck->num_locks); if (br_lck->lock_data == NULL) { DEBUG(0, ("malloc failed\n")); TALLOC_FREE(br_lck); return NULL; } memcpy(br_lck->lock_data, data.dptr, talloc_get_size(br_lck->lock_data)); } DEBUG(10, ("data.dsize=%d\n", (int)data.dsize)); if ((data.dsize % sizeof(struct lock_struct)) == 1) { br_lck->have_read_oplocks = (data.dptr[data.dsize-1] == 1); } if (!fsp->lockdb_clean) { int orig_num_locks = br_lck->num_locks; /* * This is the first time we access the byte range lock * record with this fsp. Go through and ensure all entries * are valid - remove any that don't. * This makes the lockdb self cleaning at low cost. * * Note: Disconnected entries belong to disconnected * durable handles. So at this point, we have a new * handle on the file and the disconnected durable has * already been closed (we are not a durable reconnect). * So we need to clean the disconnected brl entry. */ if (!validate_lock_entries(br_lck, &br_lck->num_locks, &br_lck->lock_data, false)) { TALLOC_FREE(br_lck); return NULL; } /* Ensure invalid locks are cleaned up in the destructor. */ if (orig_num_locks != br_lck->num_locks) { br_lck->modified = True; } /* Mark the lockdb as "clean" as seen from this open file. */ fsp->lockdb_clean = True; } if (DEBUGLEVEL >= 10) { unsigned int i; struct lock_struct *locks = br_lck->lock_data; DEBUG(10,("brl_get_locks_internal: %u current locks on file_id %s\n", br_lck->num_locks, file_id_string_tos(&fsp->file_id))); for( i = 0; i < br_lck->num_locks; i++) { print_lock_struct(i, &locks[i]); } } return br_lck; } struct brl_get_locks_readonly_state { TALLOC_CTX *mem_ctx; struct byte_range_lock **br_lock; }; static void brl_get_locks_readonly_parser(TDB_DATA key, TDB_DATA data, void *private_data) { struct brl_get_locks_readonly_state *state = (struct brl_get_locks_readonly_state *)private_data; struct byte_range_lock *br_lock; br_lock = talloc_pooled_object( state->mem_ctx, struct byte_range_lock, 1, data.dsize); if (br_lock == NULL) { *state->br_lock = NULL; return; } br_lock->lock_data = (struct lock_struct *)talloc_memdup( br_lock, data.dptr, data.dsize); br_lock->num_locks = data.dsize / sizeof(struct lock_struct); if ((data.dsize % sizeof(struct lock_struct)) == 1) { br_lock->have_read_oplocks = (data.dptr[data.dsize-1] == 1); } DEBUG(10, ("Got %d bytes, have_read_oplocks: %s\n", (int)data.dsize, br_lock->have_read_oplocks ? "true" : "false")); *state->br_lock = br_lock; } struct byte_range_lock *brl_get_locks_readonly(files_struct *fsp) { struct byte_range_lock *br_lock = NULL; struct byte_range_lock *rw = NULL; DEBUG(10, ("seqnum=%d, fsp->brlock_seqnum=%d\n", dbwrap_get_seqnum(brlock_db), fsp->brlock_seqnum)); if ((fsp->brlock_rec != NULL) && (dbwrap_get_seqnum(brlock_db) == fsp->brlock_seqnum)) { /* * We have cached the brlock_rec and the database did not * change. */ return fsp->brlock_rec; } if (!fsp->lockdb_clean) { /* * Fetch the record in R/W mode to give validate_lock_entries * a chance to kick in once. */ rw = brl_get_locks(talloc_tos(), fsp); if (rw == NULL) { return NULL; } fsp->lockdb_clean = true; } if (rw != NULL) { size_t lock_data_size; /* * Make a copy of the already retrieved and sanitized rw record */ lock_data_size = rw->num_locks * sizeof(struct lock_struct); br_lock = talloc_pooled_object( fsp, struct byte_range_lock, 1, lock_data_size); if (br_lock == NULL) { goto fail; } br_lock->have_read_oplocks = rw->have_read_oplocks; br_lock->num_locks = rw->num_locks; br_lock->lock_data = (struct lock_struct *)talloc_memdup( br_lock, rw->lock_data, lock_data_size); } else { struct brl_get_locks_readonly_state state; NTSTATUS status; /* * Parse the record fresh from the database */ state.mem_ctx = fsp; state.br_lock = &br_lock; status = dbwrap_parse_record( brlock_db, make_tdb_data((uint8_t *)&fsp->file_id, sizeof(fsp->file_id)), brl_get_locks_readonly_parser, &state); if (!NT_STATUS_IS_OK(status)) { DEBUG(3, ("Could not parse byte range lock record: " "%s\n", nt_errstr(status))); goto fail; } if (br_lock == NULL) { goto fail; } } br_lock->fsp = fsp; br_lock->modified = false; br_lock->record = NULL; if (lp_clustering()) { /* * In the cluster case we can't cache the brlock struct * because dbwrap_get_seqnum does not work reliably over * ctdb. Thus we have to throw away the brlock struct soon. */ talloc_steal(talloc_tos(), br_lock); } else { /* * Cache the brlock struct, invalidated when the dbwrap_seqnum * changes. See beginning of this routine. */ TALLOC_FREE(fsp->brlock_rec); fsp->brlock_rec = br_lock; fsp->brlock_seqnum = dbwrap_get_seqnum(brlock_db); } fail: TALLOC_FREE(rw); return br_lock; } struct brl_revalidate_state { ssize_t array_size; uint32 num_pids; struct server_id *pids; }; /* * Collect PIDs of all processes with pending entries */ static void brl_revalidate_collect(struct file_id id, struct server_id pid, enum brl_type lock_type, enum brl_flavour lock_flav, br_off start, br_off size, void *private_data) { struct brl_revalidate_state *state = (struct brl_revalidate_state *)private_data; if (!IS_PENDING_LOCK(lock_type)) { return; } add_to_large_array(state, sizeof(pid), (void *)&pid, &state->pids, &state->num_pids, &state->array_size); } /* * qsort callback to sort the processes */ static int compare_procids(const void *p1, const void *p2) { const struct server_id *i1 = (const struct server_id *)p1; const struct server_id *i2 = (const struct server_id *)p2; if (i1->pid < i2->pid) return -1; if (i1->pid > i2->pid) return 1; return 0; } /* * Send a MSG_SMB_UNLOCK message to all processes with pending byte range * locks so that they retry. Mainly used in the cluster code after a node has * died. * * Done in two steps to avoid double-sends: First we collect all entries in an * array, then qsort that array and only send to non-dupes. */ void brl_revalidate(struct messaging_context *msg_ctx, void *private_data, uint32_t msg_type, struct server_id server_id, DATA_BLOB *data) { struct brl_revalidate_state *state; uint32 i; struct server_id last_pid; if (!(state = talloc_zero(NULL, struct brl_revalidate_state))) { DEBUG(0, ("talloc failed\n")); return; } brl_forall(brl_revalidate_collect, state); if (state->array_size == -1) { DEBUG(0, ("talloc failed\n")); goto done; } if (state->num_pids == 0) { goto done; } TYPESAFE_QSORT(state->pids, state->num_pids, compare_procids); ZERO_STRUCT(last_pid); for (i=0; inum_pids; i++) { if (serverid_equal(&last_pid, &state->pids[i])) { /* * We've seen that one already */ continue; } messaging_send(msg_ctx, state->pids[i], MSG_SMB_UNLOCK, &data_blob_null); last_pid = state->pids[i]; } done: TALLOC_FREE(state); return; } bool brl_cleanup_disconnected(struct file_id fid, uint64_t open_persistent_id) { bool ret = false; TALLOC_CTX *frame = talloc_stackframe(); TDB_DATA key, val; struct db_record *rec; struct lock_struct *lock; unsigned n, num; NTSTATUS status; key = make_tdb_data((void*)&fid, sizeof(fid)); rec = dbwrap_fetch_locked(brlock_db, frame, key); if (rec == NULL) { DEBUG(5, ("brl_cleanup_disconnected: failed to fetch record " "for file %s\n", file_id_string(frame, &fid))); goto done; } val = dbwrap_record_get_value(rec); lock = (struct lock_struct*)val.dptr; num = val.dsize / sizeof(struct lock_struct); if (lock == NULL) { DEBUG(10, ("brl_cleanup_disconnected: no byte range locks for " "file %s\n", file_id_string(frame, &fid))); ret = true; goto done; } for (n=0; npid)) { DEBUG(5, ("brl_cleanup_disconnected: byte range lock " "%s used by server %s, do not cleanup\n", file_id_string(frame, &fid), server_id_str(frame, &ctx->pid))); goto done; } if (ctx->smblctx != open_persistent_id) { DEBUG(5, ("brl_cleanup_disconnected: byte range lock " "%s expected smblctx %llu but found %llu" ", do not cleanup\n", file_id_string(frame, &fid), (unsigned long long)open_persistent_id, (unsigned long long)ctx->smblctx)); goto done; } } status = dbwrap_record_delete(rec); if (!NT_STATUS_IS_OK(status)) { DEBUG(5, ("brl_cleanup_disconnected: failed to delete record " "for file %s from %s, open %llu: %s\n", file_id_string(frame, &fid), dbwrap_name(brlock_db), (unsigned long long)open_persistent_id, nt_errstr(status))); goto done; } DEBUG(10, ("brl_cleanup_disconnected: " "file %s cleaned up %u entries from open %llu\n", file_id_string(frame, &fid), num, (unsigned long long)open_persistent_id)); ret = true; done: talloc_free(frame); return ret; }