/* * device-process.c: detailed processing of device information sent * from kernel. * * Copyright (c) 2006 The Regents of the University of Michigan. * All rights reserved. * * Andy Adamson * Fred Isaman * * Copyright (c) 2010 EMC Corporation, Haiying Tang * * Used codes in linux/fs/nfs/blocklayout/blocklayoutdev.c. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include "device-discovery.h" uint32_t *blk_overflow(uint32_t * p, uint32_t * end, size_t nbytes) { uint32_t *q = p + ((nbytes + 3) >> 2); if (q > end || q < p) return NULL; return p; } static int decode_blk_signature(uint32_t **pp, uint32_t * end, struct bl_sig *sig) { int i; uint32_t siglen, *p = *pp; BLK_READBUF(p, end, 4); READ32(sig->si_num_comps); if (sig->si_num_comps == 0) { BL_LOG_ERR("0 components in sig\n"); goto out_err; } if (sig->si_num_comps >= BLOCK_MAX_SIG_COMP) { BL_LOG_ERR("number of sig comps %i >= BLOCK_MAX_SIG_COMP\n", sig->si_num_comps); goto out_err; } for (i = 0; i < sig->si_num_comps; i++) { struct bl_sig_comp *comp = &sig->si_comps[i]; BLK_READBUF(p, end, 12); READ64(comp->bs_offset); READ32(siglen); comp->bs_length = siglen; BLK_READBUF(p, end, siglen); /* Note we rely here on fact that sig is used immediately * for mapping, then thrown away. */ comp->bs_string = (char *)p; p += ((siglen + 3) >> 2); } *pp = p; return 0; out_err: return -EIO; } /* * Read signature from device and compare to sig_comp * return: 0=match, 1=no match, -1=error */ static int read_cmp_blk_sig(struct bl_disk *disk, int fd, struct bl_sig_comp *comp) { const char *dev_name = disk->valid_path->full_path; int ret = -1; ssize_t siglen = comp->bs_length; int64_t bs_offset = comp->bs_offset; char *sig = NULL; sig = (char *)malloc(siglen); if (!sig) { BL_LOG_ERR("%s: Out of memory\n", __func__); goto out; } if (bs_offset < 0) bs_offset += (((int64_t) disk->size) << 9); if (lseek64(fd, bs_offset, SEEK_SET) == -1) { BL_LOG_ERR("File %s lseek error\n", dev_name); goto out; } if (read(fd, sig, siglen) != siglen) { BL_LOG_ERR("File %s read error\n", dev_name); goto out; } ret = memcmp(sig, comp->bs_string, siglen); out: if (sig) free(sig); return ret; } /* * All signatures in sig must be found on disk for verification. * Returns True if sig matches, False otherwise. */ static int verify_sig(struct bl_disk *disk, struct bl_sig *sig) { const char *dev_name = disk->valid_path->full_path; int fd, i, rv; fd = open(dev_name, O_RDONLY | O_LARGEFILE); if (fd < 0) { BL_LOG_ERR("%s: %s could not be opened for read\n", __func__, dev_name); return 0; } rv = 1; for (i = 0; i < sig->si_num_comps; i++) { if (read_cmp_blk_sig(disk, fd, &sig->si_comps[i])) { rv = 0; break; } } if (fd >= 0) close(fd); return rv; } /* * map_sig_to_device() * Given a signature, walk the list of visible disks searching for * a match. Returns True if mapping was done, False otherwise. * * While we're at it, fill in the vol->bv_size. */ static int map_sig_to_device(struct bl_sig *sig, struct bl_volume *vol) { int mapped = 0; struct bl_disk *disk; /* scan disk list to find out match device */ for (disk = visible_disk_list; disk; disk = disk->next) { /* FIXME: should we use better algorithm for disk scan? */ mapped = verify_sig(disk, sig); if (mapped) { vol->param.bv_dev = disk->dev; vol->bv_size = disk->size; break; } } return mapped; } /* We are given an array of XDR encoded array indices, each of which should * refer to a previously decoded device. Translate into a list of pointers * to the appropriate pnfs_blk_volume's. */ static int set_vol_array(uint32_t **pp, uint32_t *end, struct bl_volume *vols, int working) { int i, index; uint32_t *p = *pp; struct bl_volume **array = vols[working].bv_vols; for (i = 0; i < vols[working].bv_vol_n; i++) { BLK_READBUF(p, end, 4); READ32(index); if ((index < 0) || (index >= working)) { BL_LOG_ERR("set_vol_array: Id %i out of range\n", index); goto out_err; } array[i] = &vols[index]; } *pp = p; return 0; out_err: return -EIO; } static uint64_t sum_subvolume_sizes(struct bl_volume *vol) { int i; uint64_t sum = 0; for (i = 0; i < vol->bv_vol_n; i++) sum += vol->bv_vols[i]->bv_size; return sum; } static int decode_blk_volume(uint32_t **pp, uint32_t *end, struct bl_volume *vols, int voln, int *array_cnt) { int status = 0, j; struct bl_sig sig; uint32_t *p = *pp; struct bl_volume *vol = &vols[voln]; uint64_t tmp; BLK_READBUF(p, end, 4); READ32(vol->bv_type); switch (vol->bv_type) { case BLOCK_VOLUME_SIMPLE: *array_cnt = 0; status = decode_blk_signature(&p, end, &sig); if (status) return status; status = map_sig_to_device(&sig, vol); if (!status) { BL_LOG_ERR("Could not find disk for device\n"); return -ENXIO; } BL_LOG_INFO("%s: simple %d\n", __func__, voln); status = 0; break; case BLOCK_VOLUME_SLICE: BLK_READBUF(p, end, 16); READ_SECTOR(vol->param.bv_offset); READ_SECTOR(vol->bv_size); *array_cnt = vol->bv_vol_n = 1; BL_LOG_INFO("%s: slice %d\n", __func__, voln); status = set_vol_array(&p, end, vols, voln); break; case BLOCK_VOLUME_STRIPE: BLK_READBUF(p, end, 8); READ_SECTOR(vol->param.bv_stripe_unit); off_t stripe_unit = vol->param.bv_stripe_unit; /* Check limitations imposed by device-mapper */ if ((stripe_unit & (stripe_unit - 1)) != 0 || stripe_unit < (off_t) (sysconf(_SC_PAGE_SIZE) >> 9)) return -EIO; BLK_READBUF(p, end, 4); READ32(vol->bv_vol_n); if (!vol->bv_vol_n) return -EIO; *array_cnt = vol->bv_vol_n; BL_LOG_INFO("%s: stripe %d nvols=%d unit=%ld\n", __func__, voln, vol->bv_vol_n, (long)stripe_unit); status = set_vol_array(&p, end, vols, voln); if (status) return status; for (j = 1; j < vol->bv_vol_n; j++) { if (vol->bv_vols[j]->bv_size != vol->bv_vols[0]->bv_size) { BL_LOG_ERR("varying subvol size\n"); return -EIO; } } vol->bv_size = vol->bv_vols[0]->bv_size * vol->bv_vol_n; break; case BLOCK_VOLUME_CONCAT: BLK_READBUF(p, end, 4); READ32(vol->bv_vol_n); if (!vol->bv_vol_n) return -EIO; *array_cnt = vol->bv_vol_n; BL_LOG_INFO("%s: concat %d %d\n", __func__, voln, vol->bv_vol_n); status = set_vol_array(&p, end, vols, voln); if (status) return status; vol->bv_size = sum_subvolume_sizes(vol); break; default: BL_LOG_ERR("Unknown volume type %i\n", vol->bv_type); out_err: return -EIO; } *pp = p; return status; } uint64_t process_deviceinfo(const char *dev_addr_buf, unsigned int dev_addr_len, uint32_t *major, uint32_t *minor) { int num_vols, i, status, count; uint32_t *p, *end; struct bl_volume *vols = NULL, **arrays = NULL, **arrays_ptr = NULL; uint64_t dev = 0; p = (uint32_t *) dev_addr_buf; end = (uint32_t *) ((char *)p + dev_addr_len); /* Decode block volume */ BLK_READBUF(p, end, 4); READ32(num_vols); BL_LOG_INFO("%s: %d vols\n", __func__, num_vols); if (num_vols <= 0) goto out_err; vols = (struct bl_volume *)malloc(num_vols * sizeof(struct bl_volume)); if (!vols) { BL_LOG_ERR("%s: Out of memory\n", __func__); goto out_err; } /* Each volume in vols array needs its own array. Save time by * allocating them all in one large hunk. Because each volume * array can only reference previous volumes, and because once * a concat or stripe references a volume, it may never be * referenced again, the volume arrays are guaranteed to fit * in the suprisingly small space allocated. */ arrays_ptr = arrays = (struct bl_volume **)malloc(num_vols * 2 * sizeof(struct bl_volume *)); if (!arrays) { BL_LOG_ERR("%s: Out of memory\n", __func__); goto out_err; } for (i = 0; i < num_vols; i++) { vols[i].bv_vols = arrays_ptr; status = decode_blk_volume(&p, end, vols, i, &count); if (status) goto out_err; arrays_ptr += count; } if (p != end) { BL_LOG_ERR("p is not equal to end!\n"); goto out_err; } dev = dm_device_create(vols, num_vols); if (dev) { *major = MAJOR(dev); *minor = MINOR(dev); } out_err: if (vols) free(vols); if (arrays) free(arrays); return dev; }