From 4363ac7c13a9a4b763c6e8d9fdbfc2468f3b8ca4 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 18 Sep 2012 12:19:27 -0400 Subject: block: Implement support for WRITE SAME The WRITE SAME command supported on some SCSI devices allows the same block to be efficiently replicated throughout a block range. Only a single logical block is transferred from the host and the storage device writes the same data to all blocks described by the I/O. This patch implements support for WRITE SAME in the block layer. The blkdev_issue_write_same() function can be used by filesystems and block drivers to replicate a buffer across a block range. This can be used to efficiently initialize software RAID devices, etc. Signed-off-by: Martin K. Petersen Acked-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-core.c | 14 ++++++++-- block/blk-lib.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++ block/blk-merge.c | 9 +++++++ block/blk-settings.c | 16 ++++++++++++ block/blk-sysfs.c | 13 +++++++++ 5 files changed, 124 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 33eded00c5b1..3b080541098e 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1704,6 +1704,11 @@ generic_make_request_checks(struct bio *bio) goto end_io; } + if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) { + err = -EOPNOTSUPP; + goto end_io; + } + /* * Various block parts want %current->io_context and lazy ioc * allocation ends up trading a lot of pain for a small amount of @@ -1809,8 +1814,6 @@ EXPORT_SYMBOL(generic_make_request); */ void submit_bio(int rw, struct bio *bio) { - int count = bio_sectors(bio); - bio->bi_rw |= rw; /* @@ -1818,6 +1821,13 @@ void submit_bio(int rw, struct bio *bio) * go through the normal accounting stuff before submission. */ if (bio_has_data(bio)) { + unsigned int count; + + if (unlikely(rw & REQ_WRITE_SAME)) + count = bdev_logical_block_size(bio->bi_bdev) >> 9; + else + count = bio_sectors(bio); + if (rw & WRITE) { count_vm_events(PGPGOUT, count); } else { diff --git a/block/blk-lib.c b/block/blk-lib.c index 19cc761cacb2..a062543c58ac 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -129,6 +129,80 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, } EXPORT_SYMBOL(blkdev_issue_discard); +/** + * blkdev_issue_write_same - queue a write same operation + * @bdev: target blockdev + * @sector: start sector + * @nr_sects: number of sectors to write + * @gfp_mask: memory allocation flags (for bio_alloc) + * @page: page containing data to write + * + * Description: + * Issue a write same request for the sectors in question. + */ +int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask, + struct page *page) +{ + DECLARE_COMPLETION_ONSTACK(wait); + struct request_queue *q = bdev_get_queue(bdev); + unsigned int max_write_same_sectors; + struct bio_batch bb; + struct bio *bio; + int ret = 0; + + if (!q) + return -ENXIO; + + max_write_same_sectors = q->limits.max_write_same_sectors; + + if (max_write_same_sectors == 0) + return -EOPNOTSUPP; + + atomic_set(&bb.done, 1); + bb.flags = 1 << BIO_UPTODATE; + bb.wait = &wait; + + while (nr_sects) { + bio = bio_alloc(gfp_mask, 1); + if (!bio) { + ret = -ENOMEM; + break; + } + + bio->bi_sector = sector; + bio->bi_end_io = bio_batch_end_io; + bio->bi_bdev = bdev; + bio->bi_private = &bb; + bio->bi_vcnt = 1; + bio->bi_io_vec->bv_page = page; + bio->bi_io_vec->bv_offset = 0; + bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev); + + if (nr_sects > max_write_same_sectors) { + bio->bi_size = max_write_same_sectors << 9; + nr_sects -= max_write_same_sectors; + sector += max_write_same_sectors; + } else { + bio->bi_size = nr_sects << 9; + nr_sects = 0; + } + + atomic_inc(&bb.done); + submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio); + } + + /* Wait for bios in-flight */ + if (!atomic_dec_and_test(&bb.done)) + wait_for_completion(&wait); + + if (!test_bit(BIO_UPTODATE, &bb.flags)) + ret = -ENOTSUPP; + + return ret; +} +EXPORT_SYMBOL(blkdev_issue_write_same); + /** * blkdev_issue_zeroout - generate number of zero filed write bios * @bdev: blockdev to issue diff --git a/block/blk-merge.c b/block/blk-merge.c index 642b862608a1..936a110de0b9 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -419,6 +419,10 @@ static int attempt_merge(struct request_queue *q, struct request *req, || next->special) return 0; + if (req->cmd_flags & REQ_WRITE_SAME && + !blk_write_same_mergeable(req->bio, next->bio)) + return 0; + /* * If we are allowed to merge, then append bio list * from next to rq and release next. merge_requests_fn @@ -518,6 +522,11 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) if (bio_integrity(bio) != blk_integrity_rq(rq)) return false; + /* must be using the same buffer */ + if (rq->cmd_flags & REQ_WRITE_SAME && + !blk_write_same_mergeable(rq->bio, bio)) + return false; + return true; } diff --git a/block/blk-settings.c b/block/blk-settings.c index 565a6786032f..779bb7646bcd 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -113,6 +113,7 @@ void blk_set_default_limits(struct queue_limits *lim) lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS; + lim->max_write_same_sectors = 0; lim->max_discard_sectors = 0; lim->discard_granularity = 0; lim->discard_alignment = 0; @@ -144,6 +145,7 @@ void blk_set_stacking_limits(struct queue_limits *lim) lim->max_segments = USHRT_MAX; lim->max_hw_sectors = UINT_MAX; lim->max_sectors = UINT_MAX; + lim->max_write_same_sectors = UINT_MAX; } EXPORT_SYMBOL(blk_set_stacking_limits); @@ -285,6 +287,18 @@ void blk_queue_max_discard_sectors(struct request_queue *q, } EXPORT_SYMBOL(blk_queue_max_discard_sectors); +/** + * blk_queue_max_write_same_sectors - set max sectors for a single write same + * @q: the request queue for the device + * @max_write_same_sectors: maximum number of sectors to write per command + **/ +void blk_queue_max_write_same_sectors(struct request_queue *q, + unsigned int max_write_same_sectors) +{ + q->limits.max_write_same_sectors = max_write_same_sectors; +} +EXPORT_SYMBOL(blk_queue_max_write_same_sectors); + /** * blk_queue_max_segments - set max hw segments for a request for this queue * @q: the request queue for the device @@ -510,6 +524,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); + t->max_write_same_sectors = min(t->max_write_same_sectors, + b->max_write_same_sectors); t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn); t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index ea51d827a0bb..247dbfd42621 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -180,6 +180,13 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag return queue_var_show(queue_discard_zeroes_data(q), page); } +static ssize_t queue_write_same_max_show(struct request_queue *q, char *page) +{ + return sprintf(page, "%llu\n", + (unsigned long long)q->limits.max_write_same_sectors << 9); +} + + static ssize_t queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) { @@ -385,6 +392,11 @@ static struct queue_sysfs_entry queue_discard_zeroes_data_entry = { .show = queue_discard_zeroes_data_show, }; +static struct queue_sysfs_entry queue_write_same_max_entry = { + .attr = {.name = "write_same_max_bytes", .mode = S_IRUGO }, + .show = queue_write_same_max_show, +}; + static struct queue_sysfs_entry queue_nonrot_entry = { .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, .show = queue_show_nonrot, @@ -432,6 +444,7 @@ static struct attribute *default_attrs[] = { &queue_discard_granularity_entry.attr, &queue_discard_max_entry.attr, &queue_discard_zeroes_data_entry.attr, + &queue_write_same_max_entry.attr, &queue_nonrot_entry.attr, &queue_nomerges_entry.attr, &queue_rq_affinity_entry.attr, -- cgit v1.2.3