From dd22f551ac0ad366f92f601835f6623b83adc331 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 7 Jan 2015 18:05:34 +0200 Subject: block: Change direct_access calling convention In order to support accesses to larger chunks of memory, pass in a 'size' parameter (counted in bytes), and return the amount available at that address. Add a new helper function, bdev_direct_access(), to handle common functionality including partition handling, checking the length requested is positive, checking for the sector being page-aligned, and checking the length of the request does not pass the end of the partition. Signed-off-by: Matthew Wilcox Reviewed-by: Jan Kara Reviewed-by: Boaz Harrosh Signed-off-by: Jens Axboe --- drivers/block/brd.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 3598110d2cef..89e90ec52f28 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -370,25 +370,25 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector, } #ifdef CONFIG_BLK_DEV_XIP -static int brd_direct_access(struct block_device *bdev, sector_t sector, - void **kaddr, unsigned long *pfn) +static long brd_direct_access(struct block_device *bdev, sector_t sector, + void **kaddr, unsigned long *pfn, long size) { struct brd_device *brd = bdev->bd_disk->private_data; struct page *page; if (!brd) return -ENODEV; - if (sector & (PAGE_SECTORS-1)) - return -EINVAL; - if (sector + PAGE_SECTORS > get_capacity(bdev->bd_disk)) - return -ERANGE; page = brd_insert_page(brd, sector); if (!page) return -ENOSPC; *kaddr = page_address(page); *pfn = page_to_pfn(page); - return 0; + /* + * TODO: If size > PAGE_SIZE, we could look to see if the next page in + * the file happens to be mapped to the next page of physical RAM. + */ + return PAGE_SIZE; } #endif -- cgit v1.2.3 From d93ba7a5a97c9f315bacdcdb8de4e5f368e7b396 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 20 Jan 2015 20:06:30 -0500 Subject: block: Add discard flag to blkdev_issue_zeroout() function blkdev_issue_discard() will zero a given block range. This is done by way of explicit writing, thus provisioning or allocating the blocks on disk. There are use cases where the desired behavior is to zero the blocks but unprovision them if possible. The blocks must deterministically contain zeroes when they are subsequently read back. This patch adds a flag to blkdev_issue_zeroout() that provides this variant. If the discard flag is set and a block device guarantees discard_zeroes_data we will use REQ_DISCARD to clear the block range. If the device does not support discard_zeroes_data or if the discard request fails we will fall back to first REQ_WRITE_SAME and then a regular REQ_WRITE. Also update the callers of blkdev_issue_zero() to reflect the new flag and make sb_issue_zeroout() prefer the discard approach. Signed-off-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-lib.c | 30 ++++++++++++++++++++++++++---- block/ioctl.c | 2 +- drivers/block/drbd/drbd_receiver.c | 2 +- include/linux/blkdev.h | 4 ++-- 4 files changed, 30 insertions(+), 8 deletions(-) (limited to 'drivers/block') diff --git a/block/blk-lib.c b/block/blk-lib.c index 8411be3c19d3..715e948f58a4 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -283,23 +283,45 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, * @sector: start sector * @nr_sects: number of sectors to write * @gfp_mask: memory allocation flags (for bio_alloc) + * @discard: whether to discard the block range * * Description: - * Generate and issue number of bios with zerofiled pages. + + * Zero-fill a block range. If the discard flag is set and the block + * device guarantees that subsequent READ operations to the block range + * in question will return zeroes, the blocks will be discarded. Should + * the discard request fail, if the discard flag is not set, or if + * discard_zeroes_data is not supported, this function will resort to + * zeroing the blocks manually, thus provisioning (allocating, + * anchoring) them. If the block device supports the WRITE SAME command + * blkdev_issue_zeroout() will use it to optimize the process of + * clearing the block range. Otherwise the zeroing will be performed + * using regular WRITE calls. */ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask) + sector_t nr_sects, gfp_t gfp_mask, bool discard) { + struct request_queue *q = bdev_get_queue(bdev); + unsigned char bdn[BDEVNAME_SIZE]; + + if (discard && blk_queue_discard(q) && q->limits.discard_zeroes_data) { + + if (!blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, 0)) + return 0; + + bdevname(bdev, bdn); + pr_warn("%s: DISCARD failed. Manually zeroing.\n", bdn); + } + if (bdev_write_same(bdev)) { - unsigned char bdn[BDEVNAME_SIZE]; if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, ZERO_PAGE(0))) return 0; bdevname(bdev, bdn); - pr_err("%s: WRITE SAME failed. Manually zeroing.\n", bdn); + pr_warn("%s: WRITE SAME failed. Manually zeroing.\n", bdn); } return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask); diff --git a/block/ioctl.c b/block/ioctl.c index 6c7bf903742f..7d8befde2aca 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -198,7 +198,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, uint64_t start, if (start + len > (i_size_read(bdev->bd_inode) >> 9)) return -EINVAL; - return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL); + return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL, false); } static int put_ushort(unsigned long arg, unsigned short val) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index d169b4a79267..cee20354ac37 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1388,7 +1388,7 @@ int drbd_submit_peer_request(struct drbd_device *device, list_add_tail(&peer_req->w.list, &device->active_ee); spin_unlock_irq(&device->resource->req_lock); if (blkdev_issue_zeroout(device->ldev->backing_bdev, - sector, data_size >> 9, GFP_NOIO)) + sector, data_size >> 9, GFP_NOIO, false)) peer_req->flags |= EE_WAS_ERROR; drbd_endio_write_sec_final(peer_req); return 0; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e9086be6d9a0..4c4b732d7556 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1162,7 +1162,7 @@ extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct page *page); extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask); + sector_t nr_sects, gfp_t gfp_mask, bool discard); static inline int sb_issue_discard(struct super_block *sb, sector_t block, sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) { @@ -1176,7 +1176,7 @@ static inline int sb_issue_zeroout(struct super_block *sb, sector_t block, return blkdev_issue_zeroout(sb->s_bdev, block << (sb->s_blocksize_bits - 9), nr_blocks << (sb->s_blocksize_bits - 9), - gfp_mask); + gfp_mask, true); } extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); -- cgit v1.2.3 From ee1b6f7aff94019c09e73837054979063f722046 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 15 Jan 2015 17:32:25 -0800 Subject: block: support different tag allocation policy The libata tag allocation is using a round-robin policy. Next patch will make libata use block generic tag allocation, so let's add a policy to tag allocation. Currently two policies: FIFO (default) and round-robin. Cc: Jens Axboe Cc: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- block/blk-tag.c | 33 +++++++++++++++++++++++++-------- drivers/block/osdblk.c | 2 +- drivers/scsi/scsi_scan.c | 3 ++- include/linux/blkdev.h | 8 ++++++-- include/scsi/scsi_host.h | 3 +++ include/scsi/scsi_tcq.h | 3 ++- 6 files changed, 39 insertions(+), 13 deletions(-) (limited to 'drivers/block') diff --git a/block/blk-tag.c b/block/blk-tag.c index a185b86741e5..f0344e6939d5 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c @@ -119,7 +119,7 @@ fail: } static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q, - int depth) + int depth, int alloc_policy) { struct blk_queue_tag *tags; @@ -131,6 +131,8 @@ static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q, goto fail; atomic_set(&tags->refcnt, 1); + tags->alloc_policy = alloc_policy; + tags->next_tag = 0; return tags; fail: kfree(tags); @@ -140,10 +142,11 @@ fail: /** * blk_init_tags - initialize the tag info for an external tag map * @depth: the maximum queue depth supported + * @alloc_policy: tag allocation policy **/ -struct blk_queue_tag *blk_init_tags(int depth) +struct blk_queue_tag *blk_init_tags(int depth, int alloc_policy) { - return __blk_queue_init_tags(NULL, depth); + return __blk_queue_init_tags(NULL, depth, alloc_policy); } EXPORT_SYMBOL(blk_init_tags); @@ -152,19 +155,20 @@ EXPORT_SYMBOL(blk_init_tags); * @q: the request queue for the device * @depth: the maximum queue depth supported * @tags: the tag to use + * @alloc_policy: tag allocation policy * * Queue lock must be held here if the function is called to resize an * existing map. **/ int blk_queue_init_tags(struct request_queue *q, int depth, - struct blk_queue_tag *tags) + struct blk_queue_tag *tags, int alloc_policy) { int rc; BUG_ON(tags && q->queue_tags && tags != q->queue_tags); if (!tags && !q->queue_tags) { - tags = __blk_queue_init_tags(q, depth); + tags = __blk_queue_init_tags(q, depth, alloc_policy); if (!tags) return -ENOMEM; @@ -344,9 +348,21 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq) } do { - tag = find_first_zero_bit(bqt->tag_map, max_depth); - if (tag >= max_depth) - return 1; + if (bqt->alloc_policy == BLK_TAG_ALLOC_FIFO) { + tag = find_first_zero_bit(bqt->tag_map, max_depth); + if (tag >= max_depth) + return 1; + } else { + int start = bqt->next_tag; + int size = min_t(int, bqt->max_depth, max_depth + start); + tag = find_next_zero_bit(bqt->tag_map, size, start); + if (tag >= size && start + size > bqt->max_depth) { + size = start + size - bqt->max_depth; + tag = find_first_zero_bit(bqt->tag_map, size); + } + if (tag >= size) + return 1; + } } while (test_and_set_bit_lock(tag, bqt->tag_map)); /* @@ -354,6 +370,7 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq) * See blk_queue_end_tag for details. */ + bqt->next_tag = (tag + 1) % bqt->max_depth; rq->cmd_flags |= REQ_QUEUED; rq->tag = tag; bqt->tag_index[tag] = rq; diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c index 79aa179305b5..e22942596207 100644 --- a/drivers/block/osdblk.c +++ b/drivers/block/osdblk.c @@ -423,7 +423,7 @@ static int osdblk_init_disk(struct osdblk_device *osdev) } /* switch queue to TCQ mode; allocate tag map */ - rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL); + rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL, BLK_TAG_ALLOC_FIFO); if (rc) { blk_cleanup_queue(q); put_disk(disk); diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 983aed10ff2f..921a8c897eb2 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -290,7 +290,8 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, if (!shost_use_blk_mq(sdev->host) && (shost->bqt || shost->hostt->use_blk_tags)) { blk_queue_init_tags(sdev->request_queue, - sdev->host->cmd_per_lun, shost->bqt); + sdev->host->cmd_per_lun, shost->bqt, + shost->hostt->tag_alloc_policy); } scsi_change_queue_depth(sdev, sdev->host->cmd_per_lun); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4c4b732d7556..6f388fd1c11c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -272,7 +272,11 @@ struct blk_queue_tag { int max_depth; /* what we will send to device */ int real_max_depth; /* what the array can hold */ atomic_t refcnt; /* map can be shared */ + int alloc_policy; /* tag allocation policy */ + int next_tag; /* next tag */ }; +#define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */ +#define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */ #define BLK_SCSI_MAX_CMDS (256) #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) @@ -1139,11 +1143,11 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) extern int blk_queue_start_tag(struct request_queue *, struct request *); extern struct request *blk_queue_find_tag(struct request_queue *, int); extern void blk_queue_end_tag(struct request_queue *, struct request *); -extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *); +extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *, int); extern void blk_queue_free_tags(struct request_queue *); extern int blk_queue_resize_tags(struct request_queue *, int); extern void blk_queue_invalidate_tags(struct request_queue *); -extern struct blk_queue_tag *blk_init_tags(int); +extern struct blk_queue_tag *blk_init_tags(int, int); extern void blk_free_tags(struct blk_queue_tag *); static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 019e66858ce6..e113c757d555 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -402,6 +402,9 @@ struct scsi_host_template { */ unsigned char present; + /* If use block layer to manage tags, this is tag allocation policy */ + int tag_alloc_policy; + /* * Let the block layer assigns tags to all commands. */ diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h index 9708b28bd2aa..b27977e8aaed 100644 --- a/include/scsi/scsi_tcq.h +++ b/include/scsi/scsi_tcq.h @@ -66,7 +66,8 @@ static inline int scsi_init_shared_tag_map(struct Scsi_Host *shost, int depth) * devices on the shared host (for libata) */ if (!shost->bqt) { - shost->bqt = blk_init_tags(depth); + shost->bqt = blk_init_tags(depth, + shost->hostt->tag_alloc_policy); if (!shost->bqt) return -ENOMEM; } -- cgit v1.2.3