From 6958f145459ca7ad9715024de97445addacb8510 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: kill QUEUE_ORDERED_BY_TAG Nobody is making meaningful use of ORDERED_BY_TAG now and queue draining for barrier requests will be removed soon which will render the advantage of tag ordering moot. Kill ORDERED_BY_TAG. The following users are affected. * brd: converted to ORDERED_DRAIN. * virtio_blk: ORDERED_TAG path was already marked deprecated. Removed. * xen-blkfront: ORDERED_TAG case dropped. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Cc: Nick Piggin Cc: Michael S. Tsirkin Cc: Jeremy Fitzhardinge Cc: Chris Wright Signed-off-by: Jens Axboe --- block/blk-barrier.c | 35 +++++++---------------------------- 1 file changed, 7 insertions(+), 28 deletions(-) (limited to 'block') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index f0faefca032f..c807e9ca3a68 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -26,10 +26,7 @@ int blk_queue_ordered(struct request_queue *q, unsigned ordered) if (ordered != QUEUE_ORDERED_NONE && ordered != QUEUE_ORDERED_DRAIN && ordered != QUEUE_ORDERED_DRAIN_FLUSH && - ordered != QUEUE_ORDERED_DRAIN_FUA && - ordered != QUEUE_ORDERED_TAG && - ordered != QUEUE_ORDERED_TAG_FLUSH && - ordered != QUEUE_ORDERED_TAG_FUA) { + ordered != QUEUE_ORDERED_DRAIN_FUA) { printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); return -EINVAL; } @@ -155,21 +152,9 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp) * For an empty barrier, there's no actual BAR request, which * in turn makes POSTFLUSH unnecessary. Mask them off. */ - if (!blk_rq_sectors(rq)) { + if (!blk_rq_sectors(rq)) q->ordered &= ~(QUEUE_ORDERED_DO_BAR | QUEUE_ORDERED_DO_POSTFLUSH); - /* - * Empty barrier on a write-through device w/ ordered - * tag has no command to issue and without any command - * to issue, ordering by tag can't be used. Drain - * instead. - */ - if ((q->ordered & QUEUE_ORDERED_BY_TAG) && - !(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) { - q->ordered &= ~QUEUE_ORDERED_BY_TAG; - q->ordered |= QUEUE_ORDERED_BY_DRAIN; - } - } /* stash away the original request */ blk_dequeue_request(rq); @@ -210,7 +195,7 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp) } else skip |= QUEUE_ORDSEQ_PREFLUSH; - if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && queue_in_flight(q)) + if (queue_in_flight(q)) rq = NULL; else skip |= QUEUE_ORDSEQ_DRAIN; @@ -257,16 +242,10 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp) rq != &q->pre_flush_rq && rq != &q->post_flush_rq) return true; - if (q->ordered & QUEUE_ORDERED_BY_TAG) { - /* Ordered by tag. Blocking the next barrier is enough. */ - if (is_barrier && rq != &q->bar_rq) - *rqp = NULL; - } else { - /* Ordered by draining. Wait for turn. */ - WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); - if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) - *rqp = NULL; - } + /* Ordered by draining. Wait for turn. */ + WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); + if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) + *rqp = NULL; return true; } -- cgit v1.2.3 From 4913efe456c987057e5d36a3f0a55422a9072cae Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: deprecate barrier and replace blk_queue_ordered() with blk_queue_flush() Barrier is deemed too heavy and will soon be replaced by FLUSH/FUA requests. Deprecate barrier. All REQ_HARDBARRIERs are failed with -EOPNOTSUPP and blk_queue_ordered() is replaced with simpler blk_queue_flush(). blk_queue_flush() takes combinations of REQ_FLUSH and FUA. If a device has write cache and can flush it, it should set REQ_FLUSH. If the device can handle FUA writes, it should also set REQ_FUA. All blk_queue_ordered() users are converted. * ORDERED_DRAIN is mapped to 0 which is the default value. * ORDERED_DRAIN_FLUSH is mapped to REQ_FLUSH. * ORDERED_DRAIN_FLUSH_FUA is mapped to REQ_FLUSH | REQ_FUA. Signed-off-by: Tejun Heo Acked-by: Boaz Harrosh Cc: Christoph Hellwig Cc: Nick Piggin Cc: Michael S. Tsirkin Cc: Jeremy Fitzhardinge Cc: Chris Wright Cc: FUJITA Tomonori Cc: Geert Uytterhoeven Cc: David S. Miller Cc: Alasdair G Kergon Cc: Pierre Ossman Cc: Stefan Weinhuber Signed-off-by: Jens Axboe --- block/blk-barrier.c | 29 ----------------------------- block/blk-core.c | 6 ++++-- block/blk-settings.c | 20 ++++++++++++++++++++ drivers/block/brd.c | 1 - drivers/block/loop.c | 2 +- drivers/block/osdblk.c | 2 +- drivers/block/ps3disk.c | 2 +- drivers/block/virtio_blk.c | 25 +++++++++---------------- drivers/block/xen-blkfront.c | 43 ++++++++++++------------------------------- drivers/ide/ide-disk.c | 13 ++++++------- drivers/md/dm.c | 2 +- drivers/mmc/card/queue.c | 1 - drivers/s390/block/dasd.c | 1 - drivers/scsi/sd.c | 16 ++++++++-------- include/linux/blkdev.h | 6 ++++-- 15 files changed, 67 insertions(+), 102 deletions(-) (limited to 'block') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index c807e9ca3a68..ed0aba5463ab 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -9,35 +9,6 @@ #include "blk.h" -/** - * blk_queue_ordered - does this queue support ordered writes - * @q: the request queue - * @ordered: one of QUEUE_ORDERED_* - * - * Description: - * For journalled file systems, doing ordered writes on a commit - * block instead of explicitly doing wait_on_buffer (which is bad - * for performance) can be a big win. Block drivers supporting this - * feature should call this function and indicate so. - * - **/ -int blk_queue_ordered(struct request_queue *q, unsigned ordered) -{ - if (ordered != QUEUE_ORDERED_NONE && - ordered != QUEUE_ORDERED_DRAIN && - ordered != QUEUE_ORDERED_DRAIN_FLUSH && - ordered != QUEUE_ORDERED_DRAIN_FUA) { - printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); - return -EINVAL; - } - - q->ordered = ordered; - q->next_ordered = ordered; - - return 0; -} -EXPORT_SYMBOL(blk_queue_ordered); - /* * Cache flushing for ordered writes handling */ diff --git a/block/blk-core.c b/block/blk-core.c index ee1a1e7e63cc..f06354183b29 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1203,11 +1203,13 @@ static int __make_request(struct request_queue *q, struct bio *bio) const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK; int rw_flags; - if ((bio->bi_rw & REQ_HARDBARRIER) && - (q->next_ordered == QUEUE_ORDERED_NONE)) { + /* REQ_HARDBARRIER is no more */ + if (WARN_ONCE(bio->bi_rw & REQ_HARDBARRIER, + "block: HARDBARRIER is deprecated, use FLUSH/FUA instead\n")) { bio_endio(bio, -EOPNOTSUPP); return 0; } + /* * low level driver can indicate that it wants pages above a * certain limit bounced to low memory (ie for highmem, or even diff --git a/block/blk-settings.c b/block/blk-settings.c index a234f4bf1d6f..9b18afcfe925 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -794,6 +794,26 @@ void blk_queue_update_dma_alignment(struct request_queue *q, int mask) } EXPORT_SYMBOL(blk_queue_update_dma_alignment); +/** + * blk_queue_flush - configure queue's cache flush capability + * @q: the request queue for the device + * @flush: 0, REQ_FLUSH or REQ_FLUSH | REQ_FUA + * + * Tell block layer cache flush capability of @q. If it supports + * flushing, REQ_FLUSH should be set. If it supports bypassing + * write cache for individual writes, REQ_FUA should be set. + */ +void blk_queue_flush(struct request_queue *q, unsigned int flush) +{ + WARN_ON_ONCE(flush & ~(REQ_FLUSH | REQ_FUA)); + + if (WARN_ON_ONCE(!(flush & REQ_FLUSH) && (flush & REQ_FUA))) + flush &= ~REQ_FUA; + + q->flush_flags = flush & (REQ_FLUSH | REQ_FUA); +} +EXPORT_SYMBOL_GPL(blk_queue_flush); + static int __init blk_settings_init(void) { blk_max_low_pfn = max_low_pfn - 1; diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 47a41272d26b..fa33f97722ba 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -482,7 +482,6 @@ static struct brd_device *brd_alloc(int i) if (!brd->brd_queue) goto out_free_dev; blk_queue_make_request(brd->brd_queue, brd_make_request); - blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_DRAIN); blk_queue_max_hw_sectors(brd->brd_queue, 1024); blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index c3a4a2e176da..953d1e12f4d4 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -832,7 +832,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, lo->lo_queue->unplug_fn = loop_unplug; if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) - blk_queue_ordered(lo->lo_queue, QUEUE_ORDERED_DRAIN_FLUSH); + blk_queue_flush(lo->lo_queue, REQ_FLUSH); set_capacity(lo->lo_disk, size); bd_set_size(bdev, size << 9); diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c index 2284b4f05c62..72d62462433d 100644 --- a/drivers/block/osdblk.c +++ b/drivers/block/osdblk.c @@ -439,7 +439,7 @@ static int osdblk_init_disk(struct osdblk_device *osdev) blk_queue_stack_limits(q, osd_request_queue(osdev->osd)); blk_queue_prep_rq(q, blk_queue_start_tag); - blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH); + blk_queue_flush(q, REQ_FLUSH); disk->queue = q; diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index e9da874d0419..4911f9e57bc7 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -468,7 +468,7 @@ static int __devinit ps3disk_probe(struct ps3_system_bus_device *_dev) blk_queue_dma_alignment(queue, dev->blk_size-1); blk_queue_logical_block_size(queue, dev->blk_size); - blk_queue_ordered(queue, QUEUE_ORDERED_DRAIN_FLUSH); + blk_queue_flush(queue, REQ_FLUSH); blk_queue_max_segments(queue, -1); blk_queue_max_segment_size(queue, dev->bounce_size); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 79652809eee8..d10b635b3946 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -388,22 +388,15 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) vblk->disk->driverfs_dev = &vdev->dev; index++; - if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH)) { - /* - * If the FLUSH feature is supported we do have support for - * flushing a volatile write cache on the host. Use that - * to implement write barrier support. - */ - blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH); - } else { - /* - * If the FLUSH feature is not supported we must assume that - * the host does not perform any kind of volatile write - * caching. We still need to drain the queue to provider - * proper barrier semantics. - */ - blk_queue_ordered(q, QUEUE_ORDERED_DRAIN); - } + /* + * If the FLUSH feature is supported we do have support for + * flushing a volatile write cache on the host. Use that to + * implement write barrier support; otherwise, we must assume + * that the host does not perform any kind of volatile write + * caching. + */ + if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH)) + blk_queue_flush(q, REQ_FLUSH); /* If disk is read-only in the host, the guest should obey */ if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 50ec6f834996..0b1eea643262 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -95,7 +95,7 @@ struct blkfront_info struct gnttab_free_callback callback; struct blk_shadow shadow[BLK_RING_SIZE]; unsigned long shadow_free; - int feature_barrier; + unsigned int feature_flush; int is_ready; }; @@ -418,25 +418,12 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) } -static int xlvbd_barrier(struct blkfront_info *info) +static void xlvbd_flush(struct blkfront_info *info) { - int err; - const char *barrier; - - switch (info->feature_barrier) { - case QUEUE_ORDERED_DRAIN: barrier = "enabled"; break; - case QUEUE_ORDERED_NONE: barrier = "disabled"; break; - default: return -EINVAL; - } - - err = blk_queue_ordered(info->rq, info->feature_barrier); - - if (err) - return err; - + blk_queue_flush(info->rq, info->feature_flush); printk(KERN_INFO "blkfront: %s: barriers %s\n", - info->gd->disk_name, barrier); - return 0; + info->gd->disk_name, + info->feature_flush ? "enabled" : "disabled"); } @@ -515,7 +502,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, info->rq = gd->queue; info->gd = gd; - xlvbd_barrier(info); + xlvbd_flush(info); if (vdisk_info & VDISK_READONLY) set_disk_ro(gd, 1); @@ -661,8 +648,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) printk(KERN_WARNING "blkfront: %s: write barrier op failed\n", info->gd->disk_name); error = -EOPNOTSUPP; - info->feature_barrier = QUEUE_ORDERED_NONE; - xlvbd_barrier(info); + info->feature_flush = 0; + xlvbd_flush(info); } /* fall through */ case BLKIF_OP_READ: @@ -1075,19 +1062,13 @@ static void blkfront_connect(struct blkfront_info *info) /* * If there's no "feature-barrier" defined, then it means * we're dealing with a very old backend which writes - * synchronously; draining will do what needs to get done. + * synchronously; nothing to do. * * If there are barriers, then we use flush. - * - * If barriers are not supported, then there's no much we can - * do, so just set ordering to NONE. */ - if (err) - info->feature_barrier = QUEUE_ORDERED_DRAIN; - else if (barrier) - info->feature_barrier = QUEUE_ORDERED_DRAIN_FLUSH; - else - info->feature_barrier = QUEUE_ORDERED_NONE; + info->feature_flush = 0; + if (!err && barrier) + info->feature_flush = REQ_FLUSH; err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); if (err) { diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 7433e07de30e..7c5b01ce51d2 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -516,10 +516,10 @@ static int ide_do_setfeature(ide_drive_t *drive, u8 feature, u8 nsect) return ide_no_data_taskfile(drive, &cmd); } -static void update_ordered(ide_drive_t *drive) +static void update_flush(ide_drive_t *drive) { u16 *id = drive->id; - unsigned ordered = QUEUE_ORDERED_NONE; + unsigned flush = 0; if (drive->dev_flags & IDE_DFLAG_WCACHE) { unsigned long long capacity; @@ -543,13 +543,12 @@ static void update_ordered(ide_drive_t *drive) drive->name, barrier ? "" : "not "); if (barrier) { - ordered = QUEUE_ORDERED_DRAIN_FLUSH; + flush = REQ_FLUSH; blk_queue_prep_rq(drive->queue, idedisk_prep_fn); } - } else - ordered = QUEUE_ORDERED_DRAIN; + } - blk_queue_ordered(drive->queue, ordered); + blk_queue_flush(drive->queue, flush); } ide_devset_get_flag(wcache, IDE_DFLAG_WCACHE); @@ -572,7 +571,7 @@ static int set_wcache(ide_drive_t *drive, int arg) } } - update_ordered(drive); + update_flush(drive); return err; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ac384b2a6a33..b1d92be8f990 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2245,7 +2245,7 @@ static int dm_init_request_based_queue(struct mapped_device *md) blk_queue_softirq_done(md->queue, dm_softirq_done); blk_queue_prep_rq(md->queue, dm_prep_fn); blk_queue_lld_busy(md->queue, dm_lld_busy); - blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH); + blk_queue_flush(md->queue, REQ_FLUSH); elv_register_queue(md->queue); diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c index e876678176be..9c0b42bfe089 100644 --- a/drivers/mmc/card/queue.c +++ b/drivers/mmc/card/queue.c @@ -128,7 +128,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock mq->req = NULL; blk_queue_prep_rq(mq->queue, mmc_prep_request); - blk_queue_ordered(mq->queue, QUEUE_ORDERED_DRAIN); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue); if (mmc_can_erase(card)) { queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mq->queue); diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 8373ca0de8e0..9b106d83b0cd 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -2197,7 +2197,6 @@ static void dasd_setup_queue(struct dasd_block *block) */ blk_queue_max_segment_size(block->request_queue, PAGE_SIZE); blk_queue_segment_boundary(block->request_queue, PAGE_SIZE - 1); - blk_queue_ordered(block->request_queue, QUEUE_ORDERED_DRAIN); } /* diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index cdfc51ab9cf2..63bd01ae534f 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2109,7 +2109,7 @@ static int sd_revalidate_disk(struct gendisk *disk) struct scsi_disk *sdkp = scsi_disk(disk); struct scsi_device *sdp = sdkp->device; unsigned char *buffer; - unsigned ordered; + unsigned flush = 0; SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_revalidate_disk\n")); @@ -2151,15 +2151,15 @@ static int sd_revalidate_disk(struct gendisk *disk) /* * We now have all cache related info, determine how we deal - * with ordered requests. + * with flush requests. */ - if (sdkp->WCE) - ordered = sdkp->DPOFUA - ? QUEUE_ORDERED_DRAIN_FUA : QUEUE_ORDERED_DRAIN_FLUSH; - else - ordered = QUEUE_ORDERED_DRAIN; + if (sdkp->WCE) { + flush |= REQ_FLUSH; + if (sdkp->DPOFUA) + flush |= REQ_FUA; + } - blk_queue_ordered(sdkp->disk->queue, ordered); + blk_queue_flush(sdkp->disk->queue, flush); set_capacity(disk, sdkp->capacity); kfree(buffer); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7077bc0d6138..e97911d4dec3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -355,8 +355,10 @@ struct request_queue struct blk_trace *blk_trace; #endif /* - * reserved for flush operations + * for flush operations */ + unsigned int flush_flags; + unsigned int ordered, next_ordered, ordseq; int orderr, ordcolor; struct request pre_flush_rq, bar_rq, post_flush_rq; @@ -865,8 +867,8 @@ extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); +extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -extern int blk_queue_ordered(struct request_queue *, unsigned); extern bool blk_do_ordered(struct request_queue *, struct request **); extern unsigned blk_ordered_cur_seq(struct request_queue *); extern unsigned blk_ordered_req_seq(struct request *); -- cgit v1.2.3 From dd831006d5be7f74c3fe7aef82380c51c3637960 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: misc cleanups in barrier code Make the following cleanups in preparation of barrier/flush update. * blk_do_ordered() declaration is moved from include/linux/blkdev.h to block/blk.h. * blk_do_ordered() now returns pointer to struct request, with %NULL meaning "try the next request" and ERR_PTR(-EAGAIN) "try again later". The third case will be dropped with further changes. * In the initialization of proxy barrier request, data direction is already set by init_request_from_bio(). Drop unnecessary explicit REQ_WRITE setting and move init_request_from_bio() above REQ_FUA flag setting. * add_request() is collapsed into __make_request(). These changes don't make any functional difference. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-barrier.c | 32 ++++++++++++++------------------ block/blk-core.c | 21 ++++----------------- block/blk.h | 7 +++++-- include/linux/blkdev.h | 1 - 4 files changed, 23 insertions(+), 38 deletions(-) (limited to 'block') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index ed0aba5463ab..f1be85ba2bb5 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -110,9 +110,9 @@ static void queue_flush(struct request_queue *q, unsigned which) elv_insert(q, rq, ELEVATOR_INSERT_FRONT); } -static inline bool start_ordered(struct request_queue *q, struct request **rqp) +static inline struct request *start_ordered(struct request_queue *q, + struct request *rq) { - struct request *rq = *rqp; unsigned skip = 0; q->orderr = 0; @@ -149,11 +149,9 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp) /* initialize proxy request and queue it */ blk_rq_init(q, rq); - if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) - rq->cmd_flags |= REQ_WRITE; + init_request_from_bio(rq, q->orig_bar_rq->bio); if (q->ordered & QUEUE_ORDERED_DO_FUA) rq->cmd_flags |= REQ_FUA; - init_request_from_bio(rq, q->orig_bar_rq->bio); rq->end_io = bar_end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); @@ -171,27 +169,26 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp) else skip |= QUEUE_ORDSEQ_DRAIN; - *rqp = rq; - /* * Complete skipped sequences. If whole sequence is complete, - * return false to tell elevator that this request is gone. + * return %NULL to tell elevator that this request is gone. */ - return !blk_ordered_complete_seq(q, skip, 0); + if (blk_ordered_complete_seq(q, skip, 0)) + rq = NULL; + return rq; } -bool blk_do_ordered(struct request_queue *q, struct request **rqp) +struct request *blk_do_ordered(struct request_queue *q, struct request *rq) { - struct request *rq = *rqp; const int is_barrier = rq->cmd_type == REQ_TYPE_FS && (rq->cmd_flags & REQ_HARDBARRIER); if (!q->ordseq) { if (!is_barrier) - return true; + return rq; if (q->next_ordered != QUEUE_ORDERED_NONE) - return start_ordered(q, rqp); + return start_ordered(q, rq); else { /* * Queue ordering not supported. Terminate @@ -199,8 +196,7 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp) */ blk_dequeue_request(rq); __blk_end_request_all(rq, -EOPNOTSUPP); - *rqp = NULL; - return false; + return NULL; } } @@ -211,14 +207,14 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp) /* Special requests are not subject to ordering rules. */ if (rq->cmd_type != REQ_TYPE_FS && rq != &q->pre_flush_rq && rq != &q->post_flush_rq) - return true; + return rq; /* Ordered by draining. Wait for turn. */ WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) - *rqp = NULL; + rq = ERR_PTR(-EAGAIN); - return true; + return rq; } static void bio_end_empty_barrier(struct bio *bio, int err) diff --git a/block/blk-core.c b/block/blk-core.c index f06354183b29..f8d37a8e2c55 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1037,22 +1037,6 @@ void blk_insert_request(struct request_queue *q, struct request *rq, } EXPORT_SYMBOL(blk_insert_request); -/* - * add-request adds a request to the linked list. - * queue lock is held and interrupts disabled, as we muck with the - * request queue list. - */ -static inline void add_request(struct request_queue *q, struct request *req) -{ - drive_stat_acct(req, 1); - - /* - * elevator indicated where it wants this request to be - * inserted at elevator_merge time - */ - __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); -} - static void part_round_stats_single(int cpu, struct hd_struct *part, unsigned long now) { @@ -1316,7 +1300,10 @@ get_rq: req->cpu = blk_cpu_to_group(smp_processor_id()); if (queue_should_plug(q) && elv_queue_empty(q)) blk_plug_device(q); - add_request(q, req); + + /* insert the request into the elevator */ + drive_stat_acct(req, 1); + __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); out: if (unplug || !queue_should_plug(q)) __generic_unplug_device(q); diff --git a/block/blk.h b/block/blk.h index 6e7dc87141e4..874eb4ea8093 100644 --- a/block/blk.h +++ b/block/blk.h @@ -51,6 +51,8 @@ static inline void blk_clear_rq_complete(struct request *rq) */ #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) +struct request *blk_do_ordered(struct request_queue *q, struct request *rq); + static inline struct request *__elv_next_request(struct request_queue *q) { struct request *rq; @@ -58,8 +60,9 @@ static inline struct request *__elv_next_request(struct request_queue *q) while (1) { while (!list_empty(&q->queue_head)) { rq = list_entry_rq(q->queue_head.next); - if (blk_do_ordered(q, &rq)) - return rq; + rq = blk_do_ordered(q, rq); + if (rq) + return !IS_ERR(rq) ? rq : NULL; } if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e97911d4dec3..996549d71923 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -869,7 +869,6 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -extern bool blk_do_ordered(struct request_queue *, struct request **); extern unsigned blk_ordered_cur_seq(struct request_queue *); extern unsigned blk_ordered_req_seq(struct request *); extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int); -- cgit v1.2.3 From 28e7d1845216538303bb95d679d8fd4de50e2f1a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: drop barrier ordering by queue draining Filesystems will take all the responsibilities for ordering requests around commit writes and will only indicate how the commit writes themselves should be handled by block layers. This patch drops barrier ordering by queue draining from block layer. Ordering by draining implementation was somewhat invasive to request handling. List of notable changes follow. * Each queue has 1 bit color which is flipped on each barrier issue. This is used to track whether a given request is issued before the current barrier or not. REQ_ORDERED_COLOR flag and coloring implementation in __elv_add_request() are removed. * Requests which shouldn't be processed yet for draining were stalled by returning -EAGAIN from blk_do_ordered() according to the test result between blk_ordered_req_seq() and blk_blk_ordered_cur_seq(). This logic is removed. * Draining completion logic in elv_completed_request() removed. * All barrier sequence requests were queued to request queue and then trckled to lower layer according to progress and thus maintaining request orders during requeue was necessary. This is replaced by queueing the next request in the barrier sequence only after the current one is complete from blk_ordered_complete_seq(), which removes the need for multiple proxy requests in struct request_queue and the request sorting logic in the ELEVATOR_INSERT_REQUEUE path of elv_insert(). * As barriers no longer have ordering constraints, there's no need to dump the whole elevator onto the dispatch queue on each barrier. Insert barriers at the front instead. * If other barrier requests come to the front of the dispatch queue while one is already in progress, they are stored in q->pending_barriers and restored to dispatch queue one-by-one after each barrier completion from blk_ordered_complete_seq(). Signed-off-by: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-barrier.c | 220 ++++++++++++++++++---------------------------- block/blk-core.c | 11 ++- block/blk.h | 2 +- block/elevator.c | 79 ++--------------- include/linux/blk_types.h | 2 - include/linux/blkdev.h | 19 ++-- 6 files changed, 113 insertions(+), 220 deletions(-) (limited to 'block') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index f1be85ba2bb5..e8b2e5c091b1 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -9,6 +9,8 @@ #include "blk.h" +static struct request *queue_next_ordseq(struct request_queue *q); + /* * Cache flushing for ordered writes handling */ @@ -19,38 +21,10 @@ unsigned blk_ordered_cur_seq(struct request_queue *q) return 1 << ffz(q->ordseq); } -unsigned blk_ordered_req_seq(struct request *rq) -{ - struct request_queue *q = rq->q; - - BUG_ON(q->ordseq == 0); - - if (rq == &q->pre_flush_rq) - return QUEUE_ORDSEQ_PREFLUSH; - if (rq == &q->bar_rq) - return QUEUE_ORDSEQ_BAR; - if (rq == &q->post_flush_rq) - return QUEUE_ORDSEQ_POSTFLUSH; - - /* - * !fs requests don't need to follow barrier ordering. Always - * put them at the front. This fixes the following deadlock. - * - * http://thread.gmane.org/gmane.linux.kernel/537473 - */ - if (rq->cmd_type != REQ_TYPE_FS) - return QUEUE_ORDSEQ_DRAIN; - - if ((rq->cmd_flags & REQ_ORDERED_COLOR) == - (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) - return QUEUE_ORDSEQ_DRAIN; - else - return QUEUE_ORDSEQ_DONE; -} - -bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) +static struct request *blk_ordered_complete_seq(struct request_queue *q, + unsigned seq, int error) { - struct request *rq; + struct request *next_rq = NULL; if (error && !q->orderr) q->orderr = error; @@ -58,16 +32,22 @@ bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) BUG_ON(q->ordseq & seq); q->ordseq |= seq; - if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) - return false; - - /* - * Okay, sequence complete. - */ - q->ordseq = 0; - rq = q->orig_bar_rq; - __blk_end_request_all(rq, q->orderr); - return true; + if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) { + /* not complete yet, queue the next ordered sequence */ + next_rq = queue_next_ordseq(q); + } else { + /* complete this barrier request */ + __blk_end_request_all(q->orig_bar_rq, q->orderr); + q->orig_bar_rq = NULL; + q->ordseq = 0; + + /* dispatch the next barrier if there's one */ + if (!list_empty(&q->pending_barriers)) { + next_rq = list_entry_rq(q->pending_barriers.next); + list_move(&next_rq->queuelist, &q->queue_head); + } + } + return next_rq; } static void pre_flush_end_io(struct request *rq, int error) @@ -88,133 +68,105 @@ static void post_flush_end_io(struct request *rq, int error) blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); } -static void queue_flush(struct request_queue *q, unsigned which) +static void queue_flush(struct request_queue *q, struct request *rq, + rq_end_io_fn *end_io) { - struct request *rq; - rq_end_io_fn *end_io; - - if (which == QUEUE_ORDERED_DO_PREFLUSH) { - rq = &q->pre_flush_rq; - end_io = pre_flush_end_io; - } else { - rq = &q->post_flush_rq; - end_io = post_flush_end_io; - } - blk_rq_init(q, rq); rq->cmd_type = REQ_TYPE_FS; - rq->cmd_flags = REQ_HARDBARRIER | REQ_FLUSH; + rq->cmd_flags = REQ_FLUSH; rq->rq_disk = q->orig_bar_rq->rq_disk; rq->end_io = end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); } -static inline struct request *start_ordered(struct request_queue *q, - struct request *rq) +static struct request *queue_next_ordseq(struct request_queue *q) { - unsigned skip = 0; - - q->orderr = 0; - q->ordered = q->next_ordered; - q->ordseq |= QUEUE_ORDSEQ_STARTED; - - /* - * For an empty barrier, there's no actual BAR request, which - * in turn makes POSTFLUSH unnecessary. Mask them off. - */ - if (!blk_rq_sectors(rq)) - q->ordered &= ~(QUEUE_ORDERED_DO_BAR | - QUEUE_ORDERED_DO_POSTFLUSH); - - /* stash away the original request */ - blk_dequeue_request(rq); - q->orig_bar_rq = rq; - rq = NULL; - - /* - * Queue ordered sequence. As we stack them at the head, we - * need to queue in reverse order. Note that we rely on that - * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs - * request gets inbetween ordered sequence. - */ - if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) { - queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH); - rq = &q->post_flush_rq; - } else - skip |= QUEUE_ORDSEQ_POSTFLUSH; + struct request *rq = &q->bar_rq; - if (q->ordered & QUEUE_ORDERED_DO_BAR) { - rq = &q->bar_rq; + switch (blk_ordered_cur_seq(q)) { + case QUEUE_ORDSEQ_PREFLUSH: + queue_flush(q, rq, pre_flush_end_io); + break; + case QUEUE_ORDSEQ_BAR: /* initialize proxy request and queue it */ blk_rq_init(q, rq); init_request_from_bio(rq, q->orig_bar_rq->bio); + rq->cmd_flags &= ~REQ_HARDBARRIER; if (q->ordered & QUEUE_ORDERED_DO_FUA) rq->cmd_flags |= REQ_FUA; rq->end_io = bar_end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); - } else - skip |= QUEUE_ORDSEQ_BAR; + break; - if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) { - queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH); - rq = &q->pre_flush_rq; - } else - skip |= QUEUE_ORDSEQ_PREFLUSH; + case QUEUE_ORDSEQ_POSTFLUSH: + queue_flush(q, rq, post_flush_end_io); + break; - if (queue_in_flight(q)) - rq = NULL; - else - skip |= QUEUE_ORDSEQ_DRAIN; - - /* - * Complete skipped sequences. If whole sequence is complete, - * return %NULL to tell elevator that this request is gone. - */ - if (blk_ordered_complete_seq(q, skip, 0)) - rq = NULL; + default: + BUG(); + } return rq; } struct request *blk_do_ordered(struct request_queue *q, struct request *rq) { - const int is_barrier = rq->cmd_type == REQ_TYPE_FS && - (rq->cmd_flags & REQ_HARDBARRIER); - - if (!q->ordseq) { - if (!is_barrier) - return rq; - - if (q->next_ordered != QUEUE_ORDERED_NONE) - return start_ordered(q, rq); - else { - /* - * Queue ordering not supported. Terminate - * with prejudice. - */ - blk_dequeue_request(rq); - __blk_end_request_all(rq, -EOPNOTSUPP); - return NULL; - } + unsigned skip = 0; + + if (!(rq->cmd_flags & REQ_HARDBARRIER)) + return rq; + + if (q->ordseq) { + /* + * Barrier is already in progress and they can't be + * processed in parallel. Queue for later processing. + */ + list_move_tail(&rq->queuelist, &q->pending_barriers); + return NULL; + } + + if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) { + /* + * Queue ordering not supported. Terminate + * with prejudice. + */ + blk_dequeue_request(rq); + __blk_end_request_all(rq, -EOPNOTSUPP); + return NULL; } /* - * Ordered sequence in progress + * Start a new ordered sequence */ + q->orderr = 0; + q->ordered = q->next_ordered; + q->ordseq |= QUEUE_ORDSEQ_STARTED; - /* Special requests are not subject to ordering rules. */ - if (rq->cmd_type != REQ_TYPE_FS && - rq != &q->pre_flush_rq && rq != &q->post_flush_rq) - return rq; + /* + * For an empty barrier, there's no actual BAR request, which + * in turn makes POSTFLUSH unnecessary. Mask them off. + */ + if (!blk_rq_sectors(rq)) + q->ordered &= ~(QUEUE_ORDERED_DO_BAR | + QUEUE_ORDERED_DO_POSTFLUSH); - /* Ordered by draining. Wait for turn. */ - WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); - if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) - rq = ERR_PTR(-EAGAIN); + /* stash away the original request */ + blk_dequeue_request(rq); + q->orig_bar_rq = rq; - return rq; + if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) + skip |= QUEUE_ORDSEQ_PREFLUSH; + + if (!(q->ordered & QUEUE_ORDERED_DO_BAR)) + skip |= QUEUE_ORDSEQ_BAR; + + if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH)) + skip |= QUEUE_ORDSEQ_POSTFLUSH; + + /* complete skipped sequences and return the first sequence */ + return blk_ordered_complete_seq(q, skip, 0); } static void bio_end_empty_barrier(struct bio *bio, int err) diff --git a/block/blk-core.c b/block/blk-core.c index f8d37a8e2c55..d316662682c8 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -520,6 +520,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) init_timer(&q->unplug_timer); setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); INIT_LIST_HEAD(&q->timeout_list); + INIT_LIST_HEAD(&q->pending_barriers); INIT_WORK(&q->unplug_work, blk_unplug_work); kobject_init(&q->kobj, &blk_queue_ktype); @@ -1185,6 +1186,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) const bool sync = (bio->bi_rw & REQ_SYNC); const bool unplug = (bio->bi_rw & REQ_UNPLUG); const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK; + int where = ELEVATOR_INSERT_SORT; int rw_flags; /* REQ_HARDBARRIER is no more */ @@ -1203,7 +1205,12 @@ static int __make_request(struct request_queue *q, struct bio *bio) spin_lock_irq(q->queue_lock); - if (unlikely((bio->bi_rw & REQ_HARDBARRIER)) || elv_queue_empty(q)) + if (bio->bi_rw & REQ_HARDBARRIER) { + where = ELEVATOR_INSERT_FRONT; + goto get_rq; + } + + if (elv_queue_empty(q)) goto get_rq; el_ret = elv_merge(q, &req, bio); @@ -1303,7 +1310,7 @@ get_rq: /* insert the request into the elevator */ drive_stat_acct(req, 1); - __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); + __elv_add_request(q, req, where, 0); out: if (unplug || !queue_should_plug(q)) __generic_unplug_device(q); diff --git a/block/blk.h b/block/blk.h index 874eb4ea8093..08081e4b294e 100644 --- a/block/blk.h +++ b/block/blk.h @@ -62,7 +62,7 @@ static inline struct request *__elv_next_request(struct request_queue *q) rq = list_entry_rq(q->queue_head.next); rq = blk_do_ordered(q, rq); if (rq) - return !IS_ERR(rq) ? rq : NULL; + return rq; } if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) diff --git a/block/elevator.c b/block/elevator.c index ec585c9554d3..241c69c45c5f 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -617,8 +617,6 @@ void elv_quiesce_end(struct request_queue *q) void elv_insert(struct request_queue *q, struct request *rq, int where) { - struct list_head *pos; - unsigned ordseq; int unplug_it = 1; trace_block_rq_insert(q, rq); @@ -626,9 +624,16 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) rq->q = q; switch (where) { + case ELEVATOR_INSERT_REQUEUE: + /* + * Most requeues happen because of a busy condition, + * don't force unplug of the queue for that case. + * Clear unplug_it and fall through. + */ + unplug_it = 0; + case ELEVATOR_INSERT_FRONT: rq->cmd_flags |= REQ_SOFTBARRIER; - list_add(&rq->queuelist, &q->queue_head); break; @@ -668,36 +673,6 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) q->elevator->ops->elevator_add_req_fn(q, rq); break; - case ELEVATOR_INSERT_REQUEUE: - /* - * If ordered flush isn't in progress, we do front - * insertion; otherwise, requests should be requeued - * in ordseq order. - */ - rq->cmd_flags |= REQ_SOFTBARRIER; - - /* - * Most requeues happen because of a busy condition, - * don't force unplug of the queue for that case. - */ - unplug_it = 0; - - if (q->ordseq == 0) { - list_add(&rq->queuelist, &q->queue_head); - break; - } - - ordseq = blk_ordered_req_seq(rq); - - list_for_each(pos, &q->queue_head) { - struct request *pos_rq = list_entry_rq(pos); - if (ordseq <= blk_ordered_req_seq(pos_rq)) - break; - } - - list_add_tail(&rq->queuelist, pos); - break; - default: printk(KERN_ERR "%s: bad insertion point %d\n", __func__, where); @@ -716,26 +691,8 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) void __elv_add_request(struct request_queue *q, struct request *rq, int where, int plug) { - if (q->ordcolor) - rq->cmd_flags |= REQ_ORDERED_COLOR; - if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { - /* - * toggle ordered color - */ - if (rq->cmd_flags & REQ_HARDBARRIER) - q->ordcolor ^= 1; - - /* - * barriers implicitly indicate back insertion - */ - if (where == ELEVATOR_INSERT_SORT) - where = ELEVATOR_INSERT_BACK; - - /* - * this request is scheduling boundary, update - * end_sector - */ + /* barriers are scheduling boundary, update end_sector */ if (rq->cmd_type == REQ_TYPE_FS || (rq->cmd_flags & REQ_DISCARD)) { q->end_sector = rq_end_sector(rq); @@ -855,24 +812,6 @@ void elv_completed_request(struct request_queue *q, struct request *rq) e->ops->elevator_completed_req_fn) e->ops->elevator_completed_req_fn(q, rq); } - - /* - * Check if the queue is waiting for fs requests to be - * drained for flush sequence. - */ - if (unlikely(q->ordseq)) { - struct request *next = NULL; - - if (!list_empty(&q->queue_head)) - next = list_entry_rq(q->queue_head.next); - - if (!queue_in_flight(q) && - blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && - (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) { - blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); - __blk_run_queue(q); - } - } } #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index ca83a97c9715..9192282b4259 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -143,7 +143,6 @@ enum rq_flag_bits { __REQ_FAILED, /* set if the request failed */ __REQ_QUIET, /* don't worry about errors */ __REQ_PREEMPT, /* set for "ide_preempt" requests */ - __REQ_ORDERED_COLOR, /* is before or after barrier */ __REQ_ALLOCED, /* request came from our alloc pool */ __REQ_COPY_USER, /* contains copies of user pages */ __REQ_INTEGRITY, /* integrity metadata has been remapped */ @@ -184,7 +183,6 @@ enum rq_flag_bits { #define REQ_FAILED (1 << __REQ_FAILED) #define REQ_QUIET (1 << __REQ_QUIET) #define REQ_PREEMPT (1 << __REQ_PREEMPT) -#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) #define REQ_ALLOCED (1 << __REQ_ALLOCED) #define REQ_COPY_USER (1 << __REQ_COPY_USER) #define REQ_INTEGRITY (1 << __REQ_INTEGRITY) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 996549d71923..20a3710a481b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -360,9 +360,10 @@ struct request_queue unsigned int flush_flags; unsigned int ordered, next_ordered, ordseq; - int orderr, ordcolor; - struct request pre_flush_rq, bar_rq, post_flush_rq; + int orderr; + struct request bar_rq; struct request *orig_bar_rq; + struct list_head pending_barriers; struct mutex sysfs_lock; @@ -491,12 +492,11 @@ enum { /* * Ordered operation sequence */ - QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */ - QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */ - QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */ - QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */ - QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */ - QUEUE_ORDSEQ_DONE = 0x20, + QUEUE_ORDSEQ_STARTED = (1 << 0), /* flushing in progress */ + QUEUE_ORDSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ + QUEUE_ORDSEQ_BAR = (1 << 2), /* barrier write in progress */ + QUEUE_ORDSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ + QUEUE_ORDSEQ_DONE = (1 << 4), }; #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) @@ -869,9 +869,6 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -extern unsigned blk_ordered_cur_seq(struct request_queue *); -extern unsigned blk_ordered_req_seq(struct request *); -extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int); extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); extern void blk_dump_rq_flags(struct request *, char *); -- cgit v1.2.3 From 8839a0e055d9abd6c011d533373a8dd266cad011 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: rename blk-barrier.c to blk-flush.c Without ordering requirements, barrier and ordering are minomers. Rename block/blk-barrier.c to block/blk-flush.c. Rename of symbols will follow. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- block/Makefile | 2 +- block/blk-barrier.c | 248 ---------------------------------------------------- block/blk-flush.c | 248 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 249 insertions(+), 249 deletions(-) delete mode 100644 block/blk-barrier.c create mode 100644 block/blk-flush.c (limited to 'block') diff --git a/block/Makefile b/block/Makefile index 0bb499a739cd..f627e4b1a9da 100644 --- a/block/Makefile +++ b/block/Makefile @@ -3,7 +3,7 @@ # obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ - blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ + blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o diff --git a/block/blk-barrier.c b/block/blk-barrier.c deleted file mode 100644 index e8b2e5c091b1..000000000000 --- a/block/blk-barrier.c +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Functions related to barrier IO handling - */ -#include -#include -#include -#include -#include - -#include "blk.h" - -static struct request *queue_next_ordseq(struct request_queue *q); - -/* - * Cache flushing for ordered writes handling - */ -unsigned blk_ordered_cur_seq(struct request_queue *q) -{ - if (!q->ordseq) - return 0; - return 1 << ffz(q->ordseq); -} - -static struct request *blk_ordered_complete_seq(struct request_queue *q, - unsigned seq, int error) -{ - struct request *next_rq = NULL; - - if (error && !q->orderr) - q->orderr = error; - - BUG_ON(q->ordseq & seq); - q->ordseq |= seq; - - if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) { - /* not complete yet, queue the next ordered sequence */ - next_rq = queue_next_ordseq(q); - } else { - /* complete this barrier request */ - __blk_end_request_all(q->orig_bar_rq, q->orderr); - q->orig_bar_rq = NULL; - q->ordseq = 0; - - /* dispatch the next barrier if there's one */ - if (!list_empty(&q->pending_barriers)) { - next_rq = list_entry_rq(q->pending_barriers.next); - list_move(&next_rq->queuelist, &q->queue_head); - } - } - return next_rq; -} - -static void pre_flush_end_io(struct request *rq, int error) -{ - elv_completed_request(rq->q, rq); - blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); -} - -static void bar_end_io(struct request *rq, int error) -{ - elv_completed_request(rq->q, rq); - blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); -} - -static void post_flush_end_io(struct request *rq, int error) -{ - elv_completed_request(rq->q, rq); - blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); -} - -static void queue_flush(struct request_queue *q, struct request *rq, - rq_end_io_fn *end_io) -{ - blk_rq_init(q, rq); - rq->cmd_type = REQ_TYPE_FS; - rq->cmd_flags = REQ_FLUSH; - rq->rq_disk = q->orig_bar_rq->rq_disk; - rq->end_io = end_io; - - elv_insert(q, rq, ELEVATOR_INSERT_FRONT); -} - -static struct request *queue_next_ordseq(struct request_queue *q) -{ - struct request *rq = &q->bar_rq; - - switch (blk_ordered_cur_seq(q)) { - case QUEUE_ORDSEQ_PREFLUSH: - queue_flush(q, rq, pre_flush_end_io); - break; - - case QUEUE_ORDSEQ_BAR: - /* initialize proxy request and queue it */ - blk_rq_init(q, rq); - init_request_from_bio(rq, q->orig_bar_rq->bio); - rq->cmd_flags &= ~REQ_HARDBARRIER; - if (q->ordered & QUEUE_ORDERED_DO_FUA) - rq->cmd_flags |= REQ_FUA; - rq->end_io = bar_end_io; - - elv_insert(q, rq, ELEVATOR_INSERT_FRONT); - break; - - case QUEUE_ORDSEQ_POSTFLUSH: - queue_flush(q, rq, post_flush_end_io); - break; - - default: - BUG(); - } - return rq; -} - -struct request *blk_do_ordered(struct request_queue *q, struct request *rq) -{ - unsigned skip = 0; - - if (!(rq->cmd_flags & REQ_HARDBARRIER)) - return rq; - - if (q->ordseq) { - /* - * Barrier is already in progress and they can't be - * processed in parallel. Queue for later processing. - */ - list_move_tail(&rq->queuelist, &q->pending_barriers); - return NULL; - } - - if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) { - /* - * Queue ordering not supported. Terminate - * with prejudice. - */ - blk_dequeue_request(rq); - __blk_end_request_all(rq, -EOPNOTSUPP); - return NULL; - } - - /* - * Start a new ordered sequence - */ - q->orderr = 0; - q->ordered = q->next_ordered; - q->ordseq |= QUEUE_ORDSEQ_STARTED; - - /* - * For an empty barrier, there's no actual BAR request, which - * in turn makes POSTFLUSH unnecessary. Mask them off. - */ - if (!blk_rq_sectors(rq)) - q->ordered &= ~(QUEUE_ORDERED_DO_BAR | - QUEUE_ORDERED_DO_POSTFLUSH); - - /* stash away the original request */ - blk_dequeue_request(rq); - q->orig_bar_rq = rq; - - if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) - skip |= QUEUE_ORDSEQ_PREFLUSH; - - if (!(q->ordered & QUEUE_ORDERED_DO_BAR)) - skip |= QUEUE_ORDSEQ_BAR; - - if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH)) - skip |= QUEUE_ORDSEQ_POSTFLUSH; - - /* complete skipped sequences and return the first sequence */ - return blk_ordered_complete_seq(q, skip, 0); -} - -static void bio_end_empty_barrier(struct bio *bio, int err) -{ - if (err) { - if (err == -EOPNOTSUPP) - set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); - clear_bit(BIO_UPTODATE, &bio->bi_flags); - } - if (bio->bi_private) - complete(bio->bi_private); - bio_put(bio); -} - -/** - * blkdev_issue_flush - queue a flush - * @bdev: blockdev to issue flush for - * @gfp_mask: memory allocation flags (for bio_alloc) - * @error_sector: error sector - * @flags: BLKDEV_IFL_* flags to control behaviour - * - * Description: - * Issue a flush for the block device in question. Caller can supply - * room for storing the error offset in case of a flush error, if they - * wish to. If WAIT flag is not passed then caller may check only what - * request was pushed in some internal queue for later handling. - */ -int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, - sector_t *error_sector, unsigned long flags) -{ - DECLARE_COMPLETION_ONSTACK(wait); - struct request_queue *q; - struct bio *bio; - int ret = 0; - - if (bdev->bd_disk == NULL) - return -ENXIO; - - q = bdev_get_queue(bdev); - if (!q) - return -ENXIO; - - /* - * some block devices may not have their queue correctly set up here - * (e.g. loop device without a backing file) and so issuing a flush - * here will panic. Ensure there is a request function before issuing - * the barrier. - */ - if (!q->make_request_fn) - return -ENXIO; - - bio = bio_alloc(gfp_mask, 0); - bio->bi_end_io = bio_end_empty_barrier; - bio->bi_bdev = bdev; - if (test_bit(BLKDEV_WAIT, &flags)) - bio->bi_private = &wait; - - bio_get(bio); - submit_bio(WRITE_BARRIER, bio); - if (test_bit(BLKDEV_WAIT, &flags)) { - wait_for_completion(&wait); - /* - * The driver must store the error location in ->bi_sector, if - * it supports it. For non-stacked drivers, this should be - * copied from blk_rq_pos(rq). - */ - if (error_sector) - *error_sector = bio->bi_sector; - } - - if (bio_flagged(bio, BIO_EOPNOTSUPP)) - ret = -EOPNOTSUPP; - else if (!bio_flagged(bio, BIO_UPTODATE)) - ret = -EIO; - - bio_put(bio); - return ret; -} -EXPORT_SYMBOL(blkdev_issue_flush); diff --git a/block/blk-flush.c b/block/blk-flush.c new file mode 100644 index 000000000000..e8b2e5c091b1 --- /dev/null +++ b/block/blk-flush.c @@ -0,0 +1,248 @@ +/* + * Functions related to barrier IO handling + */ +#include +#include +#include +#include +#include + +#include "blk.h" + +static struct request *queue_next_ordseq(struct request_queue *q); + +/* + * Cache flushing for ordered writes handling + */ +unsigned blk_ordered_cur_seq(struct request_queue *q) +{ + if (!q->ordseq) + return 0; + return 1 << ffz(q->ordseq); +} + +static struct request *blk_ordered_complete_seq(struct request_queue *q, + unsigned seq, int error) +{ + struct request *next_rq = NULL; + + if (error && !q->orderr) + q->orderr = error; + + BUG_ON(q->ordseq & seq); + q->ordseq |= seq; + + if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) { + /* not complete yet, queue the next ordered sequence */ + next_rq = queue_next_ordseq(q); + } else { + /* complete this barrier request */ + __blk_end_request_all(q->orig_bar_rq, q->orderr); + q->orig_bar_rq = NULL; + q->ordseq = 0; + + /* dispatch the next barrier if there's one */ + if (!list_empty(&q->pending_barriers)) { + next_rq = list_entry_rq(q->pending_barriers.next); + list_move(&next_rq->queuelist, &q->queue_head); + } + } + return next_rq; +} + +static void pre_flush_end_io(struct request *rq, int error) +{ + elv_completed_request(rq->q, rq); + blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); +} + +static void bar_end_io(struct request *rq, int error) +{ + elv_completed_request(rq->q, rq); + blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); +} + +static void post_flush_end_io(struct request *rq, int error) +{ + elv_completed_request(rq->q, rq); + blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); +} + +static void queue_flush(struct request_queue *q, struct request *rq, + rq_end_io_fn *end_io) +{ + blk_rq_init(q, rq); + rq->cmd_type = REQ_TYPE_FS; + rq->cmd_flags = REQ_FLUSH; + rq->rq_disk = q->orig_bar_rq->rq_disk; + rq->end_io = end_io; + + elv_insert(q, rq, ELEVATOR_INSERT_FRONT); +} + +static struct request *queue_next_ordseq(struct request_queue *q) +{ + struct request *rq = &q->bar_rq; + + switch (blk_ordered_cur_seq(q)) { + case QUEUE_ORDSEQ_PREFLUSH: + queue_flush(q, rq, pre_flush_end_io); + break; + + case QUEUE_ORDSEQ_BAR: + /* initialize proxy request and queue it */ + blk_rq_init(q, rq); + init_request_from_bio(rq, q->orig_bar_rq->bio); + rq->cmd_flags &= ~REQ_HARDBARRIER; + if (q->ordered & QUEUE_ORDERED_DO_FUA) + rq->cmd_flags |= REQ_FUA; + rq->end_io = bar_end_io; + + elv_insert(q, rq, ELEVATOR_INSERT_FRONT); + break; + + case QUEUE_ORDSEQ_POSTFLUSH: + queue_flush(q, rq, post_flush_end_io); + break; + + default: + BUG(); + } + return rq; +} + +struct request *blk_do_ordered(struct request_queue *q, struct request *rq) +{ + unsigned skip = 0; + + if (!(rq->cmd_flags & REQ_HARDBARRIER)) + return rq; + + if (q->ordseq) { + /* + * Barrier is already in progress and they can't be + * processed in parallel. Queue for later processing. + */ + list_move_tail(&rq->queuelist, &q->pending_barriers); + return NULL; + } + + if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) { + /* + * Queue ordering not supported. Terminate + * with prejudice. + */ + blk_dequeue_request(rq); + __blk_end_request_all(rq, -EOPNOTSUPP); + return NULL; + } + + /* + * Start a new ordered sequence + */ + q->orderr = 0; + q->ordered = q->next_ordered; + q->ordseq |= QUEUE_ORDSEQ_STARTED; + + /* + * For an empty barrier, there's no actual BAR request, which + * in turn makes POSTFLUSH unnecessary. Mask them off. + */ + if (!blk_rq_sectors(rq)) + q->ordered &= ~(QUEUE_ORDERED_DO_BAR | + QUEUE_ORDERED_DO_POSTFLUSH); + + /* stash away the original request */ + blk_dequeue_request(rq); + q->orig_bar_rq = rq; + + if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) + skip |= QUEUE_ORDSEQ_PREFLUSH; + + if (!(q->ordered & QUEUE_ORDERED_DO_BAR)) + skip |= QUEUE_ORDSEQ_BAR; + + if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH)) + skip |= QUEUE_ORDSEQ_POSTFLUSH; + + /* complete skipped sequences and return the first sequence */ + return blk_ordered_complete_seq(q, skip, 0); +} + +static void bio_end_empty_barrier(struct bio *bio, int err) +{ + if (err) { + if (err == -EOPNOTSUPP) + set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); + clear_bit(BIO_UPTODATE, &bio->bi_flags); + } + if (bio->bi_private) + complete(bio->bi_private); + bio_put(bio); +} + +/** + * blkdev_issue_flush - queue a flush + * @bdev: blockdev to issue flush for + * @gfp_mask: memory allocation flags (for bio_alloc) + * @error_sector: error sector + * @flags: BLKDEV_IFL_* flags to control behaviour + * + * Description: + * Issue a flush for the block device in question. Caller can supply + * room for storing the error offset in case of a flush error, if they + * wish to. If WAIT flag is not passed then caller may check only what + * request was pushed in some internal queue for later handling. + */ +int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, + sector_t *error_sector, unsigned long flags) +{ + DECLARE_COMPLETION_ONSTACK(wait); + struct request_queue *q; + struct bio *bio; + int ret = 0; + + if (bdev->bd_disk == NULL) + return -ENXIO; + + q = bdev_get_queue(bdev); + if (!q) + return -ENXIO; + + /* + * some block devices may not have their queue correctly set up here + * (e.g. loop device without a backing file) and so issuing a flush + * here will panic. Ensure there is a request function before issuing + * the barrier. + */ + if (!q->make_request_fn) + return -ENXIO; + + bio = bio_alloc(gfp_mask, 0); + bio->bi_end_io = bio_end_empty_barrier; + bio->bi_bdev = bdev; + if (test_bit(BLKDEV_WAIT, &flags)) + bio->bi_private = &wait; + + bio_get(bio); + submit_bio(WRITE_BARRIER, bio); + if (test_bit(BLKDEV_WAIT, &flags)) { + wait_for_completion(&wait); + /* + * The driver must store the error location in ->bi_sector, if + * it supports it. For non-stacked drivers, this should be + * copied from blk_rq_pos(rq). + */ + if (error_sector) + *error_sector = bio->bi_sector; + } + + if (bio_flagged(bio, BIO_EOPNOTSUPP)) + ret = -EOPNOTSUPP; + else if (!bio_flagged(bio, BIO_UPTODATE)) + ret = -EIO; + + bio_put(bio); + return ret; +} +EXPORT_SYMBOL(blkdev_issue_flush); -- cgit v1.2.3 From dd4c133f387c48f526022860ad70354637a80f4c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: rename barrier/ordered to flush With ordering requirements dropped, barrier and ordered are misnomers. Now all block layer does is sequencing FLUSH and FUA. Rename them to flush. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 21 ++++++----- block/blk-flush.c | 98 +++++++++++++++++++++++++------------------------- block/blk.h | 4 +-- include/linux/blkdev.h | 24 ++++++------- 4 files changed, 72 insertions(+), 75 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index d316662682c8..8870ae40179d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -136,7 +136,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio, { struct request_queue *q = rq->q; - if (&q->bar_rq != rq) { + if (&q->flush_rq != rq) { if (error) clear_bit(BIO_UPTODATE, &bio->bi_flags); else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) @@ -160,13 +160,12 @@ static void req_bio_endio(struct request *rq, struct bio *bio, if (bio->bi_size == 0) bio_endio(bio, error); } else { - /* - * Okay, this is the barrier request in progress, just - * record the error; + * Okay, this is the sequenced flush request in + * progress, just record the error; */ - if (error && !q->orderr) - q->orderr = error; + if (error && !q->flush_err) + q->flush_err = error; } } @@ -520,7 +519,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) init_timer(&q->unplug_timer); setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); INIT_LIST_HEAD(&q->timeout_list); - INIT_LIST_HEAD(&q->pending_barriers); + INIT_LIST_HEAD(&q->pending_flushes); INIT_WORK(&q->unplug_work, blk_unplug_work); kobject_init(&q->kobj, &blk_queue_ktype); @@ -1764,11 +1763,11 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes) static void blk_account_io_done(struct request *req) { /* - * Account IO completion. bar_rq isn't accounted as a normal - * IO on queueing nor completion. Accounting the containing - * request is enough. + * Account IO completion. flush_rq isn't accounted as a + * normal IO on queueing nor completion. Accounting the + * containing request is enough. */ - if (blk_do_io_stat(req) && req != &req->q->bar_rq) { + if (blk_do_io_stat(req) && req != &req->q->flush_rq) { unsigned long duration = jiffies - req->start_time; const int rw = rq_data_dir(req); struct hd_struct *part; diff --git a/block/blk-flush.c b/block/blk-flush.c index e8b2e5c091b1..dd873225da97 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -9,41 +9,38 @@ #include "blk.h" -static struct request *queue_next_ordseq(struct request_queue *q); +static struct request *queue_next_fseq(struct request_queue *q); -/* - * Cache flushing for ordered writes handling - */ -unsigned blk_ordered_cur_seq(struct request_queue *q) +unsigned blk_flush_cur_seq(struct request_queue *q) { - if (!q->ordseq) + if (!q->flush_seq) return 0; - return 1 << ffz(q->ordseq); + return 1 << ffz(q->flush_seq); } -static struct request *blk_ordered_complete_seq(struct request_queue *q, - unsigned seq, int error) +static struct request *blk_flush_complete_seq(struct request_queue *q, + unsigned seq, int error) { struct request *next_rq = NULL; - if (error && !q->orderr) - q->orderr = error; + if (error && !q->flush_err) + q->flush_err = error; - BUG_ON(q->ordseq & seq); - q->ordseq |= seq; + BUG_ON(q->flush_seq & seq); + q->flush_seq |= seq; - if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) { - /* not complete yet, queue the next ordered sequence */ - next_rq = queue_next_ordseq(q); + if (blk_flush_cur_seq(q) != QUEUE_FSEQ_DONE) { + /* not complete yet, queue the next flush sequence */ + next_rq = queue_next_fseq(q); } else { - /* complete this barrier request */ - __blk_end_request_all(q->orig_bar_rq, q->orderr); - q->orig_bar_rq = NULL; - q->ordseq = 0; - - /* dispatch the next barrier if there's one */ - if (!list_empty(&q->pending_barriers)) { - next_rq = list_entry_rq(q->pending_barriers.next); + /* complete this flush request */ + __blk_end_request_all(q->orig_flush_rq, q->flush_err); + q->orig_flush_rq = NULL; + q->flush_seq = 0; + + /* dispatch the next flush if there's one */ + if (!list_empty(&q->pending_flushes)) { + next_rq = list_entry_rq(q->pending_flushes.next); list_move(&next_rq->queuelist, &q->queue_head); } } @@ -53,19 +50,19 @@ static struct request *blk_ordered_complete_seq(struct request_queue *q, static void pre_flush_end_io(struct request *rq, int error) { elv_completed_request(rq->q, rq); - blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); + blk_flush_complete_seq(rq->q, QUEUE_FSEQ_PREFLUSH, error); } -static void bar_end_io(struct request *rq, int error) +static void flush_data_end_io(struct request *rq, int error) { elv_completed_request(rq->q, rq); - blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); + blk_flush_complete_seq(rq->q, QUEUE_FSEQ_DATA, error); } static void post_flush_end_io(struct request *rq, int error) { elv_completed_request(rq->q, rq); - blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); + blk_flush_complete_seq(rq->q, QUEUE_FSEQ_POSTFLUSH, error); } static void queue_flush(struct request_queue *q, struct request *rq, @@ -74,34 +71,34 @@ static void queue_flush(struct request_queue *q, struct request *rq, blk_rq_init(q, rq); rq->cmd_type = REQ_TYPE_FS; rq->cmd_flags = REQ_FLUSH; - rq->rq_disk = q->orig_bar_rq->rq_disk; + rq->rq_disk = q->orig_flush_rq->rq_disk; rq->end_io = end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); } -static struct request *queue_next_ordseq(struct request_queue *q) +static struct request *queue_next_fseq(struct request_queue *q) { - struct request *rq = &q->bar_rq; + struct request *rq = &q->flush_rq; - switch (blk_ordered_cur_seq(q)) { - case QUEUE_ORDSEQ_PREFLUSH: + switch (blk_flush_cur_seq(q)) { + case QUEUE_FSEQ_PREFLUSH: queue_flush(q, rq, pre_flush_end_io); break; - case QUEUE_ORDSEQ_BAR: + case QUEUE_FSEQ_DATA: /* initialize proxy request and queue it */ blk_rq_init(q, rq); - init_request_from_bio(rq, q->orig_bar_rq->bio); + init_request_from_bio(rq, q->orig_flush_rq->bio); rq->cmd_flags &= ~REQ_HARDBARRIER; if (q->ordered & QUEUE_ORDERED_DO_FUA) rq->cmd_flags |= REQ_FUA; - rq->end_io = bar_end_io; + rq->end_io = flush_data_end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); break; - case QUEUE_ORDSEQ_POSTFLUSH: + case QUEUE_FSEQ_POSTFLUSH: queue_flush(q, rq, post_flush_end_io); break; @@ -111,19 +108,20 @@ static struct request *queue_next_ordseq(struct request_queue *q) return rq; } -struct request *blk_do_ordered(struct request_queue *q, struct request *rq) +struct request *blk_do_flush(struct request_queue *q, struct request *rq) { unsigned skip = 0; if (!(rq->cmd_flags & REQ_HARDBARRIER)) return rq; - if (q->ordseq) { + if (q->flush_seq) { /* - * Barrier is already in progress and they can't be - * processed in parallel. Queue for later processing. + * Sequenced flush is already in progress and they + * can't be processed in parallel. Queue for later + * processing. */ - list_move_tail(&rq->queuelist, &q->pending_barriers); + list_move_tail(&rq->queuelist, &q->pending_flushes); return NULL; } @@ -138,11 +136,11 @@ struct request *blk_do_ordered(struct request_queue *q, struct request *rq) } /* - * Start a new ordered sequence + * Start a new flush sequence */ - q->orderr = 0; + q->flush_err = 0; q->ordered = q->next_ordered; - q->ordseq |= QUEUE_ORDSEQ_STARTED; + q->flush_seq |= QUEUE_FSEQ_STARTED; /* * For an empty barrier, there's no actual BAR request, which @@ -154,19 +152,19 @@ struct request *blk_do_ordered(struct request_queue *q, struct request *rq) /* stash away the original request */ blk_dequeue_request(rq); - q->orig_bar_rq = rq; + q->orig_flush_rq = rq; if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) - skip |= QUEUE_ORDSEQ_PREFLUSH; + skip |= QUEUE_FSEQ_PREFLUSH; if (!(q->ordered & QUEUE_ORDERED_DO_BAR)) - skip |= QUEUE_ORDSEQ_BAR; + skip |= QUEUE_FSEQ_DATA; if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH)) - skip |= QUEUE_ORDSEQ_POSTFLUSH; + skip |= QUEUE_FSEQ_POSTFLUSH; /* complete skipped sequences and return the first sequence */ - return blk_ordered_complete_seq(q, skip, 0); + return blk_flush_complete_seq(q, skip, 0); } static void bio_end_empty_barrier(struct bio *bio, int err) diff --git a/block/blk.h b/block/blk.h index 08081e4b294e..24b92bd78f37 100644 --- a/block/blk.h +++ b/block/blk.h @@ -51,7 +51,7 @@ static inline void blk_clear_rq_complete(struct request *rq) */ #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) -struct request *blk_do_ordered(struct request_queue *q, struct request *rq); +struct request *blk_do_flush(struct request_queue *q, struct request *rq); static inline struct request *__elv_next_request(struct request_queue *q) { @@ -60,7 +60,7 @@ static inline struct request *__elv_next_request(struct request_queue *q) while (1) { while (!list_empty(&q->queue_head)) { rq = list_entry_rq(q->queue_head.next); - rq = blk_do_ordered(q, rq); + rq = blk_do_flush(q, rq); if (rq) return rq; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 20a3710a481b..1cd83ec077db 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -357,13 +357,13 @@ struct request_queue /* * for flush operations */ + unsigned int ordered, next_ordered; unsigned int flush_flags; - - unsigned int ordered, next_ordered, ordseq; - int orderr; - struct request bar_rq; - struct request *orig_bar_rq; - struct list_head pending_barriers; + unsigned int flush_seq; + int flush_err; + struct request flush_rq; + struct request *orig_flush_rq; + struct list_head pending_flushes; struct mutex sysfs_lock; @@ -490,13 +490,13 @@ enum { QUEUE_ORDERED_DO_FUA, /* - * Ordered operation sequence + * FLUSH/FUA sequences. */ - QUEUE_ORDSEQ_STARTED = (1 << 0), /* flushing in progress */ - QUEUE_ORDSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ - QUEUE_ORDSEQ_BAR = (1 << 2), /* barrier write in progress */ - QUEUE_ORDSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ - QUEUE_ORDSEQ_DONE = (1 << 4), + QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ + QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ + QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ + QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ + QUEUE_FSEQ_DONE = (1 << 4), }; #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) -- cgit v1.2.3 From 4fed947cb311e5aa51781d316cefca836352f6ce Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:17 +0200 Subject: block: implement REQ_FLUSH/FUA based interface for FLUSH/FUA requests Now that the backend conversion is complete, export sequenced FLUSH/FUA capability through REQ_FLUSH/FUA flags. REQ_FLUSH means the device cache should be flushed before executing the request. REQ_FUA means that the data in the request should be on non-volatile media on completion. Block layer will choose the correct way of implementing the semantics and execute it. The request may be passed to the device directly if the device can handle it; otherwise, it will be sequenced using one or more proxy requests. Devices will never see REQ_FLUSH and/or FUA which it doesn't support. Also, unlike the original REQ_HARDBARRIER, REQ_FLUSH/FUA requests are never failed with -EOPNOTSUPP. If the underlying device doesn't support FLUSH/FUA, the block layer simply make those noop. IOW, it no longer distinguishes between writeback cache which doesn't support cache flush and writethrough/no cache. Devices which have WB cache w/o flush are very difficult to come by these days and there's nothing much we can do anyway, so it doesn't make sense to require everyone to implement -EOPNOTSUPP handling. This will simplify filesystems and block drivers as they can drop -EOPNOTSUPP retry logic for barriers. * QUEUE_ORDERED_* are removed and QUEUE_FSEQ_* are moved into blk-flush.c. * REQ_FLUSH w/o data can also be directly passed to drivers without sequencing but some drivers assume that zero length requests don't have rq->bio which isn't true for these requests requiring the use of proxy requests. * REQ_COMMON_MASK now includes REQ_FLUSH | REQ_FUA so that they are copied from bio to request. * WRITE_BARRIER is marked deprecated and WRITE_FLUSH, WRITE_FUA and WRITE_FLUSH_FUA are added. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- block/blk-flush.c | 85 ++++++++++++++++++++++++--------------------- block/blk.h | 3 ++ include/linux/blk_types.h | 2 +- include/linux/blkdev.h | 38 ++------------------ include/linux/buffer_head.h | 2 +- include/linux/fs.h | 19 ++++++---- 7 files changed, 67 insertions(+), 84 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 8870ae40179d..18455c4f618a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1204,7 +1204,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) spin_lock_irq(q->queue_lock); - if (bio->bi_rw & REQ_HARDBARRIER) { + if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { where = ELEVATOR_INSERT_FRONT; goto get_rq; } diff --git a/block/blk-flush.c b/block/blk-flush.c index dd873225da97..452c552e9ead 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -1,5 +1,5 @@ /* - * Functions related to barrier IO handling + * Functions to sequence FLUSH and FUA writes. */ #include #include @@ -9,6 +9,15 @@ #include "blk.h" +/* FLUSH/FUA sequences */ +enum { + QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ + QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ + QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ + QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ + QUEUE_FSEQ_DONE = (1 << 4), +}; + static struct request *queue_next_fseq(struct request_queue *q); unsigned blk_flush_cur_seq(struct request_queue *q) @@ -79,6 +88,7 @@ static void queue_flush(struct request_queue *q, struct request *rq, static struct request *queue_next_fseq(struct request_queue *q) { + struct request *orig_rq = q->orig_flush_rq; struct request *rq = &q->flush_rq; switch (blk_flush_cur_seq(q)) { @@ -87,12 +97,11 @@ static struct request *queue_next_fseq(struct request_queue *q) break; case QUEUE_FSEQ_DATA: - /* initialize proxy request and queue it */ + /* initialize proxy request, inherit FLUSH/FUA and queue it */ blk_rq_init(q, rq); - init_request_from_bio(rq, q->orig_flush_rq->bio); - rq->cmd_flags &= ~REQ_HARDBARRIER; - if (q->ordered & QUEUE_ORDERED_DO_FUA) - rq->cmd_flags |= REQ_FUA; + init_request_from_bio(rq, orig_rq->bio); + rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA); + rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA); rq->end_io = flush_data_end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); @@ -110,60 +119,58 @@ static struct request *queue_next_fseq(struct request_queue *q) struct request *blk_do_flush(struct request_queue *q, struct request *rq) { + unsigned int fflags = q->flush_flags; /* may change, cache it */ + bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA; + bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH); + bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA); unsigned skip = 0; - if (!(rq->cmd_flags & REQ_HARDBARRIER)) + /* + * Special case. If there's data but flush is not necessary, + * the request can be issued directly. + * + * Flush w/o data should be able to be issued directly too but + * currently some drivers assume that rq->bio contains + * non-zero data if it isn't NULL and empty FLUSH requests + * getting here usually have bio's without data. + */ + if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) { + rq->cmd_flags &= ~REQ_FLUSH; + if (!has_fua) + rq->cmd_flags &= ~REQ_FUA; return rq; + } + /* + * Sequenced flushes can't be processed in parallel. If + * another one is already in progress, queue for later + * processing. + */ if (q->flush_seq) { - /* - * Sequenced flush is already in progress and they - * can't be processed in parallel. Queue for later - * processing. - */ list_move_tail(&rq->queuelist, &q->pending_flushes); return NULL; } - if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) { - /* - * Queue ordering not supported. Terminate - * with prejudice. - */ - blk_dequeue_request(rq); - __blk_end_request_all(rq, -EOPNOTSUPP); - return NULL; - } - /* * Start a new flush sequence */ q->flush_err = 0; - q->ordered = q->next_ordered; q->flush_seq |= QUEUE_FSEQ_STARTED; - /* - * For an empty barrier, there's no actual BAR request, which - * in turn makes POSTFLUSH unnecessary. Mask them off. - */ - if (!blk_rq_sectors(rq)) - q->ordered &= ~(QUEUE_ORDERED_DO_BAR | - QUEUE_ORDERED_DO_POSTFLUSH); - - /* stash away the original request */ + /* adjust FLUSH/FUA of the original request and stash it away */ + rq->cmd_flags &= ~REQ_FLUSH; + if (!has_fua) + rq->cmd_flags &= ~REQ_FUA; blk_dequeue_request(rq); q->orig_flush_rq = rq; - if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) + /* skip unneded sequences and return the first one */ + if (!do_preflush) skip |= QUEUE_FSEQ_PREFLUSH; - - if (!(q->ordered & QUEUE_ORDERED_DO_BAR)) + if (!blk_rq_sectors(rq)) skip |= QUEUE_FSEQ_DATA; - - if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH)) + if (!do_postflush) skip |= QUEUE_FSEQ_POSTFLUSH; - - /* complete skipped sequences and return the first sequence */ return blk_flush_complete_seq(q, skip, 0); } diff --git a/block/blk.h b/block/blk.h index 24b92bd78f37..a09c18b19116 100644 --- a/block/blk.h +++ b/block/blk.h @@ -60,6 +60,9 @@ static inline struct request *__elv_next_request(struct request_queue *q) while (1) { while (!list_empty(&q->queue_head)) { rq = list_entry_rq(q->queue_head.next); + if (!(rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) || + rq == &q->flush_rq) + return rq; rq = blk_do_flush(q, rq); if (rq) return rq; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 9192282b4259..179799479e6f 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -167,7 +167,7 @@ enum rq_flag_bits { (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) #define REQ_COMMON_MASK \ (REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \ - REQ_META| REQ_DISCARD | REQ_NOIDLE) + REQ_META | REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) #define REQ_UNPLUG (1 << __REQ_UNPLUG) #define REQ_RAHEAD (1 << __REQ_RAHEAD) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1cd83ec077db..8ef705f800ab 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -357,7 +357,6 @@ struct request_queue /* * for flush operations */ - unsigned int ordered, next_ordered; unsigned int flush_flags; unsigned int flush_seq; int flush_err; @@ -465,40 +464,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) __clear_bit(flag, &q->queue_flags); } -enum { - /* - * Hardbarrier is supported with one of the following methods. - * - * NONE : hardbarrier unsupported - * DRAIN : ordering by draining is enough - * DRAIN_FLUSH : ordering by draining w/ pre and post flushes - * DRAIN_FUA : ordering by draining w/ pre flush and FUA write - */ - QUEUE_ORDERED_DO_PREFLUSH = 0x10, - QUEUE_ORDERED_DO_BAR = 0x20, - QUEUE_ORDERED_DO_POSTFLUSH = 0x40, - QUEUE_ORDERED_DO_FUA = 0x80, - - QUEUE_ORDERED_NONE = 0x00, - - QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_DO_BAR, - QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_POSTFLUSH, - QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_FUA, - - /* - * FLUSH/FUA sequences. - */ - QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ - QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ - QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ - QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ - QUEUE_FSEQ_DONE = (1 << 4), -}; - #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) @@ -578,7 +543,8 @@ static inline void blk_clear_queue_full(struct request_queue *q, int sync) * it already be started by driver. */ #define RQ_NOMERGE_FLAGS \ - (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) + (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER | \ + REQ_FLUSH | REQ_FUA) #define rq_mergeable(rq) \ (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ (((rq)->cmd_flags & REQ_DISCARD) || \ diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index ec94c12f21da..fc999f583fda 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -32,7 +32,7 @@ enum bh_state_bits { BH_Delay, /* Buffer is not yet allocated on disk */ BH_Boundary, /* Block is followed by a discontiguity */ BH_Write_EIO, /* I/O error on write */ - BH_Eopnotsupp, /* operation not supported (barrier) */ + BH_Eopnotsupp, /* DEPRECATED: operation not supported (barrier) */ BH_Unwritten, /* Buffer is allocated on disk but not written */ BH_Quiet, /* Buffer Error Prinks to be quiet */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 76041b614758..352c48627381 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -135,12 +135,13 @@ struct inodes_stat_t { * immediately after submission. The write equivalent * of READ_SYNC. * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. - * WRITE_BARRIER Like WRITE_SYNC, but tells the block layer that all - * previously submitted writes must be safely on storage - * before this one is started. Also guarantees that when - * this write is complete, it itself is also safely on - * storage. Prevents reordering of writes on both sides - * of this IO. + * WRITE_BARRIER DEPRECATED. Always fails. Use FLUSH/FUA instead. + * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush. + * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on + * non-volatile media on completion. + * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded + * by a cache flush and data is guaranteed to be on + * non-volatile media on completion. * */ #define RW_MASK REQ_WRITE @@ -158,6 +159,12 @@ struct inodes_stat_t { #define WRITE_META (WRITE | REQ_META) #define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ REQ_HARDBARRIER) +#define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ + REQ_FLUSH) +#define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ + REQ_FUA) +#define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ + REQ_FLUSH | REQ_FUA) /* * These aren't really reads or writes, they pass down information about -- cgit v1.2.3 From 1e87901e189c8f01750d67485009fe3827c691bf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:17 +0200 Subject: block: filter flush bio's in __generic_make_request() There are a number of make_request based drivers which don't support cache flushes. Filter out flush bio's in __generic_make_request() so that they don't have to worry about them. All FLUSH/FUA requests with data are converted to regular IO requests and empty ones are completed immediately. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-core.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 18455c4f618a..495bdc4a23da 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1509,6 +1509,19 @@ static inline void __generic_make_request(struct bio *bio) if (bio_check_eod(bio, nr_sectors)) goto end_io; + /* + * Filter flush bio's early so that make_request based + * drivers without flush support don't have to worry + * about them. + */ + if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) { + bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA); + if (!nr_sectors) { + err = 0; + goto end_io; + } + } + if ((bio->bi_rw & REQ_DISCARD) && (!blk_queue_discard(q) || ((bio->bi_rw & REQ_SECURE) && -- cgit v1.2.3 From cde4c406d8fb051c5aafc917643adbb9dbd0abc2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 3 Sep 2010 11:56:17 +0200 Subject: block: simplify queue_next_fseq We need to call blk_rq_init and elv_insert for all cases in queue_next_fseq, so take these calls into common code. Also move the end_io initialization from queue_flush into queue_next_fseq and rename queue_flush to init_flush_request now that it's old name doesn't apply anymore. Signed-off-by: Christoph Hellwig Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-flush.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'block') diff --git a/block/blk-flush.c b/block/blk-flush.c index 452c552e9ead..72905f862d31 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -74,16 +74,11 @@ static void post_flush_end_io(struct request *rq, int error) blk_flush_complete_seq(rq->q, QUEUE_FSEQ_POSTFLUSH, error); } -static void queue_flush(struct request_queue *q, struct request *rq, - rq_end_io_fn *end_io) +static void init_flush_request(struct request *rq, struct gendisk *disk) { - blk_rq_init(q, rq); rq->cmd_type = REQ_TYPE_FS; rq->cmd_flags = REQ_FLUSH; - rq->rq_disk = q->orig_flush_rq->rq_disk; - rq->end_io = end_io; - - elv_insert(q, rq, ELEVATOR_INSERT_FRONT); + rq->rq_disk = disk; } static struct request *queue_next_fseq(struct request_queue *q) @@ -91,29 +86,28 @@ static struct request *queue_next_fseq(struct request_queue *q) struct request *orig_rq = q->orig_flush_rq; struct request *rq = &q->flush_rq; + blk_rq_init(q, rq); + switch (blk_flush_cur_seq(q)) { case QUEUE_FSEQ_PREFLUSH: - queue_flush(q, rq, pre_flush_end_io); + init_flush_request(rq, orig_rq->rq_disk); + rq->end_io = pre_flush_end_io; break; - case QUEUE_FSEQ_DATA: - /* initialize proxy request, inherit FLUSH/FUA and queue it */ - blk_rq_init(q, rq); init_request_from_bio(rq, orig_rq->bio); rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA); rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA); rq->end_io = flush_data_end_io; - - elv_insert(q, rq, ELEVATOR_INSERT_FRONT); break; - case QUEUE_FSEQ_POSTFLUSH: - queue_flush(q, rq, post_flush_end_io); + init_flush_request(rq, orig_rq->rq_disk); + rq->end_io = post_flush_end_io; break; - default: BUG(); } + + elv_insert(q, rq, ELEVATOR_INSERT_FRONT); return rq; } -- cgit v1.2.3 From 337238be1bf52e1242f940fc6fe83fb395e55057 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:17 +0200 Subject: block: initialize flush request with WRITE_FLUSH instead of REQ_FLUSH init_flush_request() only set REQ_FLUSH when initializing flush requests making them READ requests. Use WRITE_FLUSH instead. Signed-off-by: Tejun Heo Reported-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-flush.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/blk-flush.c b/block/blk-flush.c index 72905f862d31..f357f1fc411c 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -77,7 +77,7 @@ static void post_flush_end_io(struct request *rq, int error) static void init_flush_request(struct request *rq, struct gendisk *disk) { rq->cmd_type = REQ_TYPE_FS; - rq->cmd_flags = REQ_FLUSH; + rq->cmd_flags = WRITE_FLUSH; rq->rq_disk = disk; } -- cgit v1.2.3 From 47f70d5a6ca78c40a1c799d43506efbfed914f7b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:17 +0200 Subject: block: kick queue after sequencing REQ_FLUSH/FUA While completing a request from a REQ_FLUSH/FUA sequence, another request can be pushed to the request queue. If a driver tests elv_queue_empty() before completing a request and runs the queue again only if the queue wasn't empty, this may lead to hang. Please note that most drivers either kick the queue unconditionally or test queue emptiness after completing the current request and don't have this problem. This patch removes this possibility by making REQ_FLUSH/FUA sequence code kick the queue if the queue was empty before completing a request from REQ_FLUSH/FUA sequence. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-flush.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'block') diff --git a/block/blk-flush.c b/block/blk-flush.c index f357f1fc411c..cb4c8440a1fc 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -56,22 +56,38 @@ static struct request *blk_flush_complete_seq(struct request_queue *q, return next_rq; } +static void blk_flush_complete_seq_end_io(struct request_queue *q, + unsigned seq, int error) +{ + bool was_empty = elv_queue_empty(q); + struct request *next_rq; + + next_rq = blk_flush_complete_seq(q, seq, error); + + /* + * Moving a request silently to empty queue_head may stall the + * queue. Kick the queue in those cases. + */ + if (was_empty && next_rq) + __blk_run_queue(q); +} + static void pre_flush_end_io(struct request *rq, int error) { elv_completed_request(rq->q, rq); - blk_flush_complete_seq(rq->q, QUEUE_FSEQ_PREFLUSH, error); + blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_PREFLUSH, error); } static void flush_data_end_io(struct request *rq, int error) { elv_completed_request(rq->q, rq); - blk_flush_complete_seq(rq->q, QUEUE_FSEQ_DATA, error); + blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_DATA, error); } static void post_flush_end_io(struct request *rq, int error) { elv_completed_request(rq->q, rq); - blk_flush_complete_seq(rq->q, QUEUE_FSEQ_POSTFLUSH, error); + blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_POSTFLUSH, error); } static void init_flush_request(struct request *rq, struct gendisk *disk) -- cgit v1.2.3 From 09d60c701b64b509f328cac72970eb894f485b9e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:17 +0200 Subject: block: make sure FSEQ_DATA request has the same rq_disk as the original rq->rq_disk and bio->bi_bdev->bd_disk may differ if a request has passed through remapping drivers. FSEQ_DATA request incorrectly followed bio->bi_bdev->bd_disk ending up being issued w/ mismatching rq_disk. Make it follow orig_rq->rq_disk. Signed-off-by: Tejun Heo Reported-by: Kiyoshi Ueda Tested-by: Kiyoshi Ueda Signed-off-by: Jens Axboe --- block/blk-flush.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'block') diff --git a/block/blk-flush.c b/block/blk-flush.c index cb4c8440a1fc..7d1fc982e78f 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -111,6 +111,13 @@ static struct request *queue_next_fseq(struct request_queue *q) break; case QUEUE_FSEQ_DATA: init_request_from_bio(rq, orig_rq->bio); + /* + * orig_rq->rq_disk may be different from + * bio->bi_bdev->bd_disk if orig_rq got here through + * remapping drivers. Make sure rq->rq_disk points + * to the same one as orig_rq. + */ + rq->rq_disk = orig_rq->rq_disk; rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA); rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA); rq->end_io = flush_data_end_io; -- cgit v1.2.3 From d391a2dda2f1c993f094bdb3a8a342c5e0546553 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:17 +0200 Subject: block: use REQ_FLUSH in blkdev_issue_flush() Update blkdev_issue_flush() to use new REQ_FLUSH interface. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-flush.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'block') diff --git a/block/blk-flush.c b/block/blk-flush.c index 7d1fc982e78f..62b7df9bca9d 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -191,13 +191,10 @@ struct request *blk_do_flush(struct request_queue *q, struct request *rq) return blk_flush_complete_seq(q, skip, 0); } -static void bio_end_empty_barrier(struct bio *bio, int err) +static void bio_end_flush(struct bio *bio, int err) { - if (err) { - if (err == -EOPNOTSUPP) - set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); + if (err) clear_bit(BIO_UPTODATE, &bio->bi_flags); - } if (bio->bi_private) complete(bio->bi_private); bio_put(bio); @@ -235,19 +232,19 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, * some block devices may not have their queue correctly set up here * (e.g. loop device without a backing file) and so issuing a flush * here will panic. Ensure there is a request function before issuing - * the barrier. + * the flush. */ if (!q->make_request_fn) return -ENXIO; bio = bio_alloc(gfp_mask, 0); - bio->bi_end_io = bio_end_empty_barrier; + bio->bi_end_io = bio_end_flush; bio->bi_bdev = bdev; if (test_bit(BLKDEV_WAIT, &flags)) bio->bi_private = &wait; bio_get(bio); - submit_bio(WRITE_BARRIER, bio); + submit_bio(WRITE_FLUSH, bio); if (test_bit(BLKDEV_WAIT, &flags)) { wait_for_completion(&wait); /* @@ -259,9 +256,7 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, *error_sector = bio->bi_sector; } - if (bio_flagged(bio, BIO_EOPNOTSUPP)) - ret = -EOPNOTSUPP; - else if (!bio_flagged(bio, BIO_UPTODATE)) + if (!bio_flagged(bio, BIO_UPTODATE)) ret = -EIO; bio_put(bio); -- cgit v1.2.3 From 3a2edd0d6ddbd5fa3b389ea6db811285415ce6c8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:18 +0200 Subject: block: make __blk_rq_prep_clone() copy most command flags Currently __blk_rq_prep_clone() copies only REQ_WRITE and REQ_DISCARD. There's no reason to omit other command flags and REQ_FUA needs to be copied to implement FUA support in request-based dm. REQ_COMMON_MASK which specifies flags to be copied from bio to request already identifies all the command flags. Define REQ_CLONE_MASK to be the same as REQ_COMMON_MASK for clarity and make __blk_rq_prep_clone() copy all flags in the mask. Signed-off-by: Tejun Heo Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 4 +--- include/linux/blk_types.h | 1 + 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 495bdc4a23da..2a5b19204546 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2505,9 +2505,7 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); static void __blk_rq_prep_clone(struct request *dst, struct request *src) { dst->cpu = src->cpu; - dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE); - if (src->cmd_flags & REQ_DISCARD) - dst->cmd_flags |= REQ_DISCARD; + dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE; dst->cmd_type = src->cmd_type; dst->__sector = blk_rq_pos(src); dst->__data_len = blk_rq_bytes(src); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 179799479e6f..36edadf5b41a 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -168,6 +168,7 @@ enum rq_flag_bits { #define REQ_COMMON_MASK \ (REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \ REQ_META | REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) +#define REQ_CLONE_MASK REQ_COMMON_MASK #define REQ_UNPLUG (1 << __REQ_UNPLUG) #define REQ_RAHEAD (1 << __REQ_RAHEAD) -- cgit v1.2.3 From 8c5553678237b7121355108e03c36086037d8975 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Aug 2010 05:29:22 -0400 Subject: block: remove the BLKDEV_IFL_BARRIER flag Remove support for barriers on discards, which is unused now. Also remove the DISCARD_NOBARRIER I/O type in favour of just setting the rw flags up locally in blkdev_issue_discard. tj: Also remove DISCARD_SECURE and use REQ_SECURE directly. Signed-off-by: Christoph Hellwig Acked-by: Mike Snitzer Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-lib.c | 18 ++---------------- include/linux/blkdev.h | 2 -- include/linux/fs.h | 8 -------- 3 files changed, 2 insertions(+), 26 deletions(-) (limited to 'block') diff --git a/block/blk-lib.c b/block/blk-lib.c index c392029a104e..fe2e6ed0f510 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -39,8 +39,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, { DECLARE_COMPLETION_ONSTACK(wait); struct request_queue *q = bdev_get_queue(bdev); - int type = flags & BLKDEV_IFL_BARRIER ? - DISCARD_BARRIER : DISCARD_NOBARRIER; + int type = REQ_WRITE | REQ_DISCARD; unsigned int max_discard_sectors; struct bio *bio; int ret = 0; @@ -65,7 +64,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, if (flags & BLKDEV_IFL_SECURE) { if (!blk_queue_secdiscard(q)) return -EOPNOTSUPP; - type |= DISCARD_SECURE; + type |= REQ_SECURE; } while (nr_sects && !ret) { @@ -162,12 +161,6 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, bb.wait = &wait; bb.end_io = NULL; - if (flags & BLKDEV_IFL_BARRIER) { - /* issue async barrier before the data */ - ret = blkdev_issue_flush(bdev, gfp_mask, NULL, 0); - if (ret) - return ret; - } submit: ret = 0; while (nr_sects != 0) { @@ -199,13 +192,6 @@ submit: issued++; submit_bio(WRITE, bio); } - /* - * When all data bios are in flight. Send final barrier if requeted. - */ - if (nr_sects == 0 && flags & BLKDEV_IFL_BARRIER) - ret = blkdev_issue_flush(bdev, gfp_mask, NULL, - flags & BLKDEV_IFL_WAIT); - if (flags & BLKDEV_IFL_WAIT) /* Wait for bios in-flight */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6b305eb4a343..cfcb3a610605 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -869,11 +869,9 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, } enum{ BLKDEV_WAIT, /* wait for completion */ - BLKDEV_BARRIER, /* issue request with barrier */ BLKDEV_SECURE, /* secure discard */ }; #define BLKDEV_IFL_WAIT (1 << BLKDEV_WAIT) -#define BLKDEV_IFL_BARRIER (1 << BLKDEV_BARRIER) #define BLKDEV_IFL_SECURE (1 << BLKDEV_SECURE) extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *, unsigned long); diff --git a/include/linux/fs.h b/include/linux/fs.h index d6add69bc170..6b0f6e9993a3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -163,14 +163,6 @@ struct inodes_stat_t { #define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ REQ_FLUSH | REQ_FUA) -/* - * These aren't really reads or writes, they pass down information about - * parts of device that are now unused by the file system. - */ -#define DISCARD_NOBARRIER (WRITE | REQ_DISCARD) -#define DISCARD_BARRIER (WRITE | REQ_DISCARD | REQ_HARDBARRIER) -#define DISCARD_SECURE (DISCARD_NOBARRIER | REQ_SECURE) - #define SEL_IN 1 #define SEL_OUT 2 #define SEL_EX 4 -- cgit v1.2.3 From dd3932eddf428571762596e17b65f5dc92ca361b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 16 Sep 2010 20:51:46 +0200 Subject: block: remove BLKDEV_IFL_WAIT All the blkdev_issue_* helpers can only sanely be used for synchronous caller. To issue cache flushes or barriers asynchronously the caller needs to set up a bio by itself with a completion callback to move the asynchronous state machine ahead. So drop the BLKDEV_IFL_WAIT flag that is always specified when calling blkdev_issue_* and also remove the now unused flags argument to blkdev_issue_flush and blkdev_issue_zeroout. For blkdev_issue_discard we need to keep it for the secure discard flag, which gains a more descriptive name and loses the bitops vs flag confusion. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-flush.c | 25 +++++++++++-------------- block/blk-lib.c | 21 ++++++++------------- block/ioctl.c | 4 ++-- drivers/block/drbd/drbd_int.h | 3 +-- drivers/block/drbd/drbd_receiver.c | 2 +- fs/block_dev.c | 2 +- fs/btrfs/extent-tree.c | 3 +-- fs/ext3/fsync.c | 3 +-- fs/ext4/fsync.c | 5 ++--- fs/ext4/mballoc.c | 3 +-- fs/fat/fatent.c | 3 +-- fs/gfs2/rgrp.c | 5 ++--- fs/jbd2/checkpoint.c | 3 +-- fs/jbd2/commit.c | 6 ++---- fs/nilfs2/the_nilfs.c | 4 ++-- fs/reiserfs/file.c | 3 +-- fs/xfs/linux-2.6/xfs_super.c | 3 +-- include/linux/blkdev.h | 14 +++++--------- mm/swapfile.c | 6 +++--- 19 files changed, 47 insertions(+), 71 deletions(-) (limited to 'block') diff --git a/block/blk-flush.c b/block/blk-flush.c index 62b7df9bca9d..54b123d6563e 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -205,7 +205,6 @@ static void bio_end_flush(struct bio *bio, int err) * @bdev: blockdev to issue flush for * @gfp_mask: memory allocation flags (for bio_alloc) * @error_sector: error sector - * @flags: BLKDEV_IFL_* flags to control behaviour * * Description: * Issue a flush for the block device in question. Caller can supply @@ -214,7 +213,7 @@ static void bio_end_flush(struct bio *bio, int err) * request was pushed in some internal queue for later handling. */ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, - sector_t *error_sector, unsigned long flags) + sector_t *error_sector) { DECLARE_COMPLETION_ONSTACK(wait); struct request_queue *q; @@ -240,21 +239,19 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, bio = bio_alloc(gfp_mask, 0); bio->bi_end_io = bio_end_flush; bio->bi_bdev = bdev; - if (test_bit(BLKDEV_WAIT, &flags)) - bio->bi_private = &wait; + bio->bi_private = &wait; bio_get(bio); submit_bio(WRITE_FLUSH, bio); - if (test_bit(BLKDEV_WAIT, &flags)) { - wait_for_completion(&wait); - /* - * The driver must store the error location in ->bi_sector, if - * it supports it. For non-stacked drivers, this should be - * copied from blk_rq_pos(rq). - */ - if (error_sector) - *error_sector = bio->bi_sector; - } + wait_for_completion(&wait); + + /* + * The driver must store the error location in ->bi_sector, if + * it supports it. For non-stacked drivers, this should be + * copied from blk_rq_pos(rq). + */ + if (error_sector) + *error_sector = bio->bi_sector; if (!bio_flagged(bio, BIO_UPTODATE)) ret = -EIO; diff --git a/block/blk-lib.c b/block/blk-lib.c index fe2e6ed0f510..1a320d2406b0 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -61,7 +61,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, max_discard_sectors &= ~(disc_sects - 1); } - if (flags & BLKDEV_IFL_SECURE) { + if (flags & BLKDEV_DISCARD_SECURE) { if (!blk_queue_secdiscard(q)) return -EOPNOTSUPP; type |= REQ_SECURE; @@ -77,8 +77,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, bio->bi_sector = sector; bio->bi_end_io = blkdev_discard_end_io; bio->bi_bdev = bdev; - if (flags & BLKDEV_IFL_WAIT) - bio->bi_private = &wait; + bio->bi_private = &wait; if (nr_sects > max_discard_sectors) { bio->bi_size = max_discard_sectors << 9; @@ -92,8 +91,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, bio_get(bio); submit_bio(type, bio); - if (flags & BLKDEV_IFL_WAIT) - wait_for_completion(&wait); + wait_for_completion(&wait); if (bio_flagged(bio, BIO_EOPNOTSUPP)) ret = -EOPNOTSUPP; @@ -139,7 +137,6 @@ static void bio_batch_end_io(struct bio *bio, int err) * @sector: start sector * @nr_sects: number of sectors to write * @gfp_mask: memory allocation flags (for bio_alloc) - * @flags: BLKDEV_IFL_* flags to control behaviour * * Description: * Generate and issue number of bios with zerofiled pages. @@ -148,7 +145,7 @@ static void bio_batch_end_io(struct bio *bio, int err) */ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, unsigned long flags) + sector_t nr_sects, gfp_t gfp_mask) { int ret; struct bio *bio; @@ -174,8 +171,7 @@ submit: bio->bi_sector = sector; bio->bi_bdev = bdev; bio->bi_end_io = bio_batch_end_io; - if (flags & BLKDEV_IFL_WAIT) - bio->bi_private = &bb; + bio->bi_private = &bb; while (nr_sects != 0) { sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects); @@ -193,10 +189,9 @@ submit: submit_bio(WRITE, bio); } - if (flags & BLKDEV_IFL_WAIT) - /* Wait for bios in-flight */ - while ( issued != atomic_read(&bb.done)) - wait_for_completion(&wait); + /* Wait for bios in-flight */ + while (issued != atomic_read(&bb.done)) + wait_for_completion(&wait); if (!test_bit(BIO_UPTODATE, &bb.flags)) /* One of bios in the batch was completed with error.*/ diff --git a/block/ioctl.c b/block/ioctl.c index d8052f0dabd3..cb2b9099862b 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -116,7 +116,7 @@ static int blkdev_reread_part(struct block_device *bdev) static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, uint64_t len, int secure) { - unsigned long flags = BLKDEV_IFL_WAIT; + unsigned long flags = 0; if (start & 511) return -EINVAL; @@ -128,7 +128,7 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, if (start + len > (bdev->bd_inode->i_size >> 9)) return -EINVAL; if (secure) - flags |= BLKDEV_IFL_SECURE; + flags |= BLKDEV_DISCARD_SECURE; return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags); } diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 352441b0f92f..c2ef476f5711 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -2321,8 +2321,7 @@ static inline void drbd_md_flush(struct drbd_conf *mdev) if (test_bit(MD_NO_BARRIER, &mdev->flags)) return; - r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL, - BLKDEV_IFL_WAIT); + r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL); if (r) { set_bit(MD_NO_BARRIER, &mdev->flags); dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 081522d3c742..df15e7f0e7b7 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -975,7 +975,7 @@ static enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct d if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) { rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL, - NULL, BLKDEV_IFL_WAIT); + NULL); if (rv) { dev_err(DEV, "local disk flush failed with status %d\n", rv); /* would rather check on EOPNOTSUPP, but that is not reliable. diff --git a/fs/block_dev.c b/fs/block_dev.c index 50e8c8582faa..b737451e2e9d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -370,7 +370,7 @@ int blkdev_fsync(struct file *filp, int datasync) */ mutex_unlock(&bd_inode->i_mutex); - error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL, BLKDEV_IFL_WAIT); + error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL); if (error == -EOPNOTSUPP) error = 0; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 43dc9ea9aef6..0b81ecdb101c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1695,8 +1695,7 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, static void btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len) { - blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, - BLKDEV_IFL_WAIT); + blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 0); } static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index d7e9f74dc3a6..09b13bb34c94 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c @@ -90,7 +90,6 @@ int ext3_sync_file(struct file *file, int datasync) * storage */ if (needs_barrier) - blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, - BLKDEV_IFL_WAIT); + blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); return ret; } diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 592adf2e546e..3f3ff5ee8f9d 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -128,10 +128,9 @@ int ext4_sync_file(struct file *file, int datasync) (journal->j_fs_dev != journal->j_dev) && (journal->j_flags & JBD2_BARRIER)) blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, - NULL, BLKDEV_IFL_WAIT); + NULL); ret = jbd2_log_wait_commit(journal, commit_tid); } else if (journal->j_flags & JBD2_BARRIER) - blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, - BLKDEV_IFL_WAIT); + blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); return ret; } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index a22bfef3da95..19aa0d44d822 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2566,8 +2566,7 @@ static inline void ext4_issue_discard(struct super_block *sb, discard_block = block + ext4_group_first_block_no(sb, block_group); trace_ext4_discard_blocks(sb, (unsigned long long) discard_block, count); - ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, - BLKDEV_IFL_WAIT); + ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); if (ret == EOPNOTSUPP) { ext4_warning(sb, "discard not supported, disabling"); clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index f9a0b7ae8648..b47d2c9f4fa1 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -578,8 +578,7 @@ int fat_free_clusters(struct inode *inode, int cluster) sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl), nr_clus * sbi->sec_per_clus, - GFP_NOFS, - BLKDEV_IFL_WAIT); + GFP_NOFS, 0); first_cl = cluster; } diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 379316472918..38b3ea1abacc 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -854,7 +854,7 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, if ((start + nr_sects) != blk) { rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, - BLKDEV_IFL_WAIT); + 0); if (rv) goto fail; nr_sects = 0; @@ -868,8 +868,7 @@ start_new_extent: } } if (nr_sects) { - rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, - BLKDEV_IFL_WAIT); + rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0); if (rv) goto fail; } diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 5247e7ffdcb4..6571a056e55d 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -532,8 +532,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal) */ if ((journal->j_fs_dev != journal->j_dev) && (journal->j_flags & JBD2_BARRIER)) - blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL, - BLKDEV_IFL_WAIT); + blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); if (!(journal->j_flags & JBD2_ABORT)) jbd2_journal_update_superblock(journal, 1); return 0; diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index f204e27f44d1..cb43c605cfaa 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -684,8 +684,7 @@ start_journal_io: if (commit_transaction->t_flushed_data_blocks && (journal->j_fs_dev != journal->j_dev) && (journal->j_flags & JBD2_BARRIER)) - blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL, - BLKDEV_IFL_WAIT); + blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); /* Done it all: now write the commit record asynchronously. */ if (JBD2_HAS_INCOMPAT_FEATURE(journal, @@ -810,8 +809,7 @@ wait_for_iobuf: if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) && journal->j_flags & JBD2_BARRIER) { - blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL, - BLKDEV_IFL_WAIT); + blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL); } if (err) diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 400b2caef4d8..d97310f07bef 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -774,7 +774,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, ret = blkdev_issue_discard(nilfs->ns_bdev, start * sects_per_block, nblocks * sects_per_block, - GFP_NOFS, BLKDEV_IFL_WAIT); + GFP_NOFS, 0); if (ret < 0) return ret; nblocks = 0; @@ -784,7 +784,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, ret = blkdev_issue_discard(nilfs->ns_bdev, start * sects_per_block, nblocks * sects_per_block, - GFP_NOFS, BLKDEV_IFL_WAIT); + GFP_NOFS, 0); return ret; } diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 6846371498b6..91f080cc76c8 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -152,8 +152,7 @@ static int reiserfs_sync_file(struct file *filp, int datasync) barrier_done = reiserfs_commit_for_inode(inode); reiserfs_write_unlock(inode->i_sb); if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb)) - blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, - BLKDEV_IFL_WAIT); + blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); if (barrier_done < 0) return barrier_done; return (err < 0) ? -EIO : 0; diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 15c35b62ff14..5fa7a30cc3f0 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -693,8 +693,7 @@ void xfs_blkdev_issue_flush( xfs_buftarg_t *buftarg) { - blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL, - BLKDEV_IFL_WAIT); + blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL); } STATIC void diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cfcb3a610605..accbd0e5c893 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -867,18 +867,14 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, return NULL; return bqt->tag_index[tag]; } -enum{ - BLKDEV_WAIT, /* wait for completion */ - BLKDEV_SECURE, /* secure discard */ -}; -#define BLKDEV_IFL_WAIT (1 << BLKDEV_WAIT) -#define BLKDEV_IFL_SECURE (1 << BLKDEV_SECURE) -extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *, - unsigned long); + +#define BLKDEV_DISCARD_SECURE 0x01 /* secure discard */ + +extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); + sector_t nr_sects, gfp_t gfp_mask); static inline int sb_issue_discard(struct super_block *sb, sector_t block, sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) { diff --git a/mm/swapfile.c b/mm/swapfile.c index 68cda164dff6..e132e1708acc 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -141,7 +141,7 @@ static int discard_swap(struct swap_info_struct *si) nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9); if (nr_blocks) { err = blkdev_issue_discard(si->bdev, start_block, - nr_blocks, GFP_KERNEL, BLKDEV_IFL_WAIT); + nr_blocks, GFP_KERNEL, 0); if (err) return err; cond_resched(); @@ -152,7 +152,7 @@ static int discard_swap(struct swap_info_struct *si) nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9); err = blkdev_issue_discard(si->bdev, start_block, - nr_blocks, GFP_KERNEL, BLKDEV_IFL_WAIT); + nr_blocks, GFP_KERNEL, 0); if (err) break; @@ -191,7 +191,7 @@ static void discard_swap_cluster(struct swap_info_struct *si, start_block <<= PAGE_SHIFT - 9; nr_blocks <<= PAGE_SHIFT - 9; if (blkdev_issue_discard(si->bdev, start_block, - nr_blocks, GFP_NOIO, BLKDEV_IFL_WAIT)) + nr_blocks, GFP_NOIO, 0)) break; } -- cgit v1.2.3