From c15227de132f1295f3db6b7df9079956b1020fd8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Sep 2009 13:52:12 +0200 Subject: block: use normal I/O path for discard requests prepare_discard_fn() was being called in a place where memory allocation was effectively impossible. This makes it inappropriate for all but the most trivial translations of Linux's DISCARD operation to the block command set. Additionally adding a payload there makes the ownership of the bio backing unclear as it's now allocated by the device driver and not the submitter as usual. It is replaced with QUEUE_FLAG_DISCARD which is used to indicate whether the queue supports discard operations or not. blkdev_issue_discard now allocates a one-page, sector-length payload which is the right thing for the common ATA and SCSI implementations. The mtd implementation of prepare_discard_fn() is replaced with simply checking for the request being a discard. Largely based on a previous patch from Matthew Wilcox which did the prepare_discard_fn but not the different payload allocation yet. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-barrier.c | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) (limited to 'block/blk-barrier.c') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 6593ab39cfe..21f5025c394 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -350,6 +350,7 @@ static void blkdev_discard_end_io(struct bio *bio, int err) if (bio->bi_private) complete(bio->bi_private); + __free_page(bio_page(bio)); bio_put(bio); } @@ -372,26 +373,44 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, struct request_queue *q = bdev_get_queue(bdev); int type = flags & DISCARD_FL_BARRIER ? DISCARD_BARRIER : DISCARD_NOBARRIER; + struct bio *bio; + struct page *page; int ret = 0; if (!q) return -ENXIO; - if (!q->prepare_discard_fn) + if (!blk_queue_discard(q)) return -EOPNOTSUPP; while (nr_sects && !ret) { - struct bio *bio = bio_alloc(gfp_mask, 0); - if (!bio) - return -ENOMEM; + unsigned int sector_size = q->limits.logical_block_size; + bio = bio_alloc(gfp_mask, 1); + if (!bio) + goto out; + bio->bi_sector = sector; bio->bi_end_io = blkdev_discard_end_io; bio->bi_bdev = bdev; if (flags & DISCARD_FL_WAIT) bio->bi_private = &wait; - bio->bi_sector = sector; + /* + * Add a zeroed one-sector payload as that's what + * our current implementations need. If we'll ever need + * more the interface will need revisiting. + */ + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + goto out_free_bio; + if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size) + goto out_free_page; + /* + * And override the bio size - the way discard works we + * touch many more blocks on disk than the actual payload + * length. + */ if (nr_sects > queue_max_hw_sectors(q)) { bio->bi_size = queue_max_hw_sectors(q) << 9; nr_sects -= queue_max_hw_sectors(q); @@ -414,5 +433,11 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, bio_put(bio); } return ret; +out_free_page: + __free_page(page); +out_free_bio: + bio_put(bio); +out: + return -ENOMEM; } EXPORT_SYMBOL(blkdev_issue_discard); -- cgit From 67efc9258010da35b27b3854d0880c7e193004ed Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Sep 2009 13:54:20 +0200 Subject: block: allow large discard requests Currently we set the bio size to the byte equivalent of the blocks to be trimmed when submitting the initial DISCARD ioctl. That means it is subject to the max_hw_sectors limitation of the HBA which is much lower than the size of a DISCARD request we can support. Add a separate max_discard_sectors tunable to limit the size for discard requests. We limit the max discard request size in bytes to 32bit as that is the limit for bio->bi_size. This could be much larger if we had a way to pass that information through the block layer. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-barrier.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'block/blk-barrier.c') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 21f5025c394..8873b9b439f 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -385,6 +385,8 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, while (nr_sects && !ret) { unsigned int sector_size = q->limits.logical_block_size; + unsigned int max_discard_sectors = + min(q->limits.max_discard_sectors, UINT_MAX >> 9); bio = bio_alloc(gfp_mask, 1); if (!bio) @@ -411,10 +413,10 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, * touch many more blocks on disk than the actual payload * length. */ - if (nr_sects > queue_max_hw_sectors(q)) { - bio->bi_size = queue_max_hw_sectors(q) << 9; - nr_sects -= queue_max_hw_sectors(q); - sector += queue_max_hw_sectors(q); + if (nr_sects > max_discard_sectors) { + bio->bi_size = max_discard_sectors << 9; + nr_sects -= max_discard_sectors; + sector += max_discard_sectors; } else { bio->bi_size = nr_sects << 9; nr_sects = 0; -- cgit