From ef295ecf090d3e86e5b742fc6ab34f1122a43773 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Oct 2016 08:48:16 -0600 Subject: block: better op and flags encoding Now that we don't need the common flags to overflow outside the range of a 32-bit type we can encode them the same way for both the bio and request fields. This in addition allows us to place the operation first (and make some room for more ops while we're at it) and to stop having to shift around the operation values. In addition this allows passing around only one value in the block layer instead of two (and eventuall also in the file systems, but we can do that later) and thus clean up a lot of code. Last but not least this allows decreasing the size of the cmd_flags field in struct request to 32-bits. Various functions passing this value could also be updated, but I'd like to avoid the churn for now. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/btrfs/inode.c | 5 ++--- fs/buffer.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/gfs2/lops.c | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2b790bda7998..9a377079af26 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8427,7 +8427,7 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip, if (!bio) return -ENOMEM; - bio_set_op_attrs(bio, bio_op(orig_bio), bio_flags(orig_bio)); + bio->bi_opf = orig_bio->bi_opf; bio->bi_private = dip; bio->bi_end_io = btrfs_end_dio_bio; btrfs_io_bio(bio)->logical = file_offset; @@ -8465,8 +8465,7 @@ next_block: start_sector, GFP_NOFS); if (!bio) goto out_err; - bio_set_op_attrs(bio, bio_op(orig_bio), - bio_flags(orig_bio)); + bio->bi_opf = orig_bio->bi_opf; bio->bi_private = dip; bio->bi_end_io = btrfs_end_dio_bio; btrfs_io_bio(bio)->logical = file_offset; diff --git a/fs/buffer.c b/fs/buffer.c index b205a629001d..a29335867e30 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3118,7 +3118,7 @@ EXPORT_SYMBOL(submit_bh); /** * ll_rw_block: low-level access to block devices (DEPRECATED) * @op: whether to %READ or %WRITE - * @op_flags: rq_flag_bits + * @op_flags: req_flag_bits * @nr: number of &struct buffer_heads in the array * @bhs: array of pointers to &struct buffer_head * diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9e8de18a168a..2cf4f7f09e32 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -688,7 +688,7 @@ struct f2fs_io_info { struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */ enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ int op; /* contains REQ_OP_ */ - int op_flags; /* rq_flag_bits */ + int op_flags; /* req_flag_bits */ block_t new_blkaddr; /* new block address to be written */ block_t old_blkaddr; /* old block address before Cow */ struct page *page; /* page to be written */ diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 49d5a1b61b06..b1f9144b42c7 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -231,7 +231,7 @@ static void gfs2_end_log_write(struct bio *bio) * gfs2_log_flush_bio - Submit any pending log bio * @sdp: The superblock * @op: REQ_OP - * @op_flags: rq_flag_bits + * @op_flags: req_flag_bits * * Submit any pending part-built or full bio to the block device. If * there is no pending bio, then this is a no-op. -- cgit v1.2.3 From 67f055c798c72c49ee0c844eae0cd6e9c83b1b16 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Nov 2016 07:40:06 -0600 Subject: btrfs: use op_is_sync to check for synchronous requests Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/volumes.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3a57f99d96aa..c8454a8e35f2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -930,7 +930,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, atomic_inc(&fs_info->nr_async_submits); - if (bio->bi_opf & REQ_SYNC) + if (op_is_sync(bio->bi_opf)) btrfs_set_work_high_priority(&async->work); btrfs_queue_work(fs_info->workers, &async->work); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 71a60cc01451..deda46cf1292 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6100,7 +6100,7 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root, bio->bi_next = NULL; spin_lock(&device->io_lock); - if (bio->bi_opf & REQ_SYNC) + if (op_is_sync(bio->bi_opf)) pending_bios = &device->pending_sync_bios; else pending_bios = &device->pending_bios; -- cgit v1.2.3 From 70fd76140a6cb63262bd47b68d57b42e889c10ee Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Nov 2016 07:40:10 -0600 Subject: block,fs: use REQ_* flags directly Remove the WRITE_* and READ_SYNC wrappers, and just use the flags directly. Where applicable this also drops usage of the bio_set_op_attrs wrapper. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-flush.c | 4 ++-- drivers/block/drbd/drbd_receiver.c | 2 +- drivers/block/xen-blkback/blkback.c | 10 ++++---- drivers/md/bcache/btree.c | 4 ++-- drivers/md/bcache/debug.c | 4 ++-- drivers/md/bcache/request.c | 2 +- drivers/md/bcache/super.c | 4 ++-- drivers/md/dm-bufio.c | 2 +- drivers/md/dm-log.c | 2 +- drivers/md/dm-raid1.c | 4 ++-- drivers/md/dm-snap-persistent.c | 4 ++-- drivers/md/dm.c | 2 +- drivers/md/md.c | 4 ++-- drivers/md/raid5-cache.c | 4 ++-- drivers/md/raid5.c | 2 +- drivers/nvme/target/io-cmd.c | 4 ++-- drivers/target/target_core_iblock.c | 8 +++---- fs/btrfs/disk-io.c | 6 ++--- fs/btrfs/extent_io.c | 16 ++++++------- fs/btrfs/inode.c | 6 ++--- fs/btrfs/scrub.c | 2 +- fs/btrfs/volumes.c | 2 +- fs/btrfs/volumes.h | 2 +- fs/buffer.c | 8 +++---- fs/direct-io.c | 2 +- fs/ext4/mmp.c | 6 ++--- fs/ext4/page-io.c | 2 +- fs/ext4/super.c | 2 +- fs/f2fs/checkpoint.c | 4 ++-- fs/f2fs/data.c | 16 ++++++------- fs/f2fs/gc.c | 6 ++--- fs/f2fs/inline.c | 2 +- fs/f2fs/node.c | 4 ++-- fs/f2fs/segment.c | 8 +++---- fs/f2fs/super.c | 2 +- fs/gfs2/log.c | 4 ++-- fs/gfs2/meta_io.c | 6 ++--- fs/gfs2/ops_fstype.c | 2 +- fs/hfsplus/super.c | 4 ++-- fs/jbd2/checkpoint.c | 2 +- fs/jbd2/commit.c | 9 +++---- fs/jbd2/journal.c | 15 ++++++------ fs/jbd2/revoke.c | 2 +- fs/jfs/jfs_logmgr.c | 4 ++-- fs/mpage.c | 6 ++--- fs/nilfs2/super.c | 2 +- fs/ocfs2/cluster/heartbeat.c | 2 +- fs/reiserfs/journal.c | 6 +++-- fs/xfs/xfs_aops.c | 11 +++++---- fs/xfs/xfs_buf.c | 2 +- include/linux/fs.h | 47 ------------------------------------- include/trace/events/f2fs.h | 10 ++++---- kernel/power/swap.c | 19 +++++++-------- 53 files changed, 133 insertions(+), 182 deletions(-) (limited to 'fs') diff --git a/block/blk-flush.c b/block/blk-flush.c index 95f1d4d357df..d35beca18481 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -330,7 +330,7 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq) } flush_rq->cmd_type = REQ_TYPE_FS; - flush_rq->cmd_flags = REQ_OP_FLUSH | WRITE_FLUSH; + flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH; flush_rq->rq_flags |= RQF_FLUSH_SEQ; flush_rq->rq_disk = first_rq->rq_disk; flush_rq->end_io = flush_end_io; @@ -486,7 +486,7 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, bio = bio_alloc(gfp_mask, 0); bio->bi_bdev = bdev; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; ret = submit_bio_wait(bio); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 942384f34e22..a89538cb3eaa 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1266,7 +1266,7 @@ static void submit_one_flush(struct drbd_device *device, struct issue_flush_cont bio->bi_bdev = device->ldev->backing_bdev; bio->bi_private = octx; bio->bi_end_io = one_flush_endio; - bio_set_op_attrs(bio, REQ_OP_FLUSH, WRITE_FLUSH); + bio->bi_opf = REQ_OP_FLUSH | REQ_PREFLUSH; device->flush_jif = jiffies; set_bit(FLUSH_PENDING, &device->flags); diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 4a80ee752597..726c32e35db9 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -1253,14 +1253,14 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, case BLKIF_OP_WRITE: ring->st_wr_req++; operation = REQ_OP_WRITE; - operation_flags = WRITE_ODIRECT; + operation_flags = REQ_SYNC | REQ_IDLE; break; case BLKIF_OP_WRITE_BARRIER: drain = true; case BLKIF_OP_FLUSH_DISKCACHE: ring->st_f_req++; operation = REQ_OP_WRITE; - operation_flags = WRITE_FLUSH; + operation_flags = REQ_PREFLUSH; break; default: operation = 0; /* make gcc happy */ @@ -1272,7 +1272,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, nseg = req->operation == BLKIF_OP_INDIRECT ? req->u.indirect.nr_segments : req->u.rw.nr_segments; - if (unlikely(nseg == 0 && operation_flags != WRITE_FLUSH) || + if (unlikely(nseg == 0 && operation_flags != REQ_PREFLUSH) || unlikely((req->operation != BLKIF_OP_INDIRECT) && (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) || unlikely((req->operation == BLKIF_OP_INDIRECT) && @@ -1334,7 +1334,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, } /* Wait on all outstanding I/O's and once that has been completed - * issue the WRITE_FLUSH. + * issue the flush. */ if (drain) xen_blk_drain_io(pending_req->ring); @@ -1380,7 +1380,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, /* This will be hit if the operation was a flush or discard. */ if (!bio) { - BUG_ON(operation_flags != WRITE_FLUSH); + BUG_ON(operation_flags != REQ_PREFLUSH); bio = bio_alloc(GFP_KERNEL, 0); if (unlikely(bio == NULL)) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 81d3db40cd7b..6fdd8e252760 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -297,7 +297,7 @@ static void bch_btree_node_read(struct btree *b) bio->bi_iter.bi_size = KEY_SIZE(&b->key) << 9; bio->bi_end_io = btree_node_read_endio; bio->bi_private = &cl; - bio_set_op_attrs(bio, REQ_OP_READ, REQ_META|READ_SYNC); + bio->bi_opf = REQ_OP_READ | REQ_META; bch_bio_map(bio, b->keys.set[0].data); @@ -393,7 +393,7 @@ static void do_btree_node_write(struct btree *b) b->bio->bi_end_io = btree_node_write_endio; b->bio->bi_private = cl; b->bio->bi_iter.bi_size = roundup(set_bytes(i), block_bytes(b->c)); - bio_set_op_attrs(b->bio, REQ_OP_WRITE, REQ_META|WRITE_SYNC|REQ_FUA); + b->bio->bi_opf = REQ_OP_WRITE | REQ_META | REQ_FUA; bch_bio_map(b->bio, i); /* diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 333a1e5f6ae6..1c9130ae0073 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -52,7 +52,7 @@ void bch_btree_verify(struct btree *b) bio->bi_bdev = PTR_CACHE(b->c, &b->key, 0)->bdev; bio->bi_iter.bi_sector = PTR_OFFSET(&b->key, 0); bio->bi_iter.bi_size = KEY_SIZE(&v->key) << 9; - bio_set_op_attrs(bio, REQ_OP_READ, REQ_META|READ_SYNC); + bio->bi_opf = REQ_OP_READ | REQ_META; bch_bio_map(bio, sorted); submit_bio_wait(bio); @@ -113,7 +113,7 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio) check = bio_clone(bio, GFP_NOIO); if (!check) return; - bio_set_op_attrs(check, REQ_OP_READ, READ_SYNC); + check->bi_opf = REQ_OP_READ; if (bio_alloc_pages(check, GFP_NOIO)) goto out_put; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index e8a2b693c928..0d99b5f4b3e6 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -923,7 +923,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) flush->bi_bdev = bio->bi_bdev; flush->bi_end_io = request_endio; flush->bi_private = cl; - bio_set_op_attrs(flush, REQ_OP_WRITE, WRITE_FLUSH); + flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; closure_bio_submit(flush, cl); } diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 849ad441cd76..988edf928466 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -381,7 +381,7 @@ static char *uuid_read(struct cache_set *c, struct jset *j, struct closure *cl) return "bad uuid pointer"; bkey_copy(&c->uuid_bucket, k); - uuid_io(c, REQ_OP_READ, READ_SYNC, k, cl); + uuid_io(c, REQ_OP_READ, 0, k, cl); if (j->version < BCACHE_JSET_VERSION_UUIDv1) { struct uuid_entry_v0 *u0 = (void *) c->uuids; @@ -600,7 +600,7 @@ static void prio_read(struct cache *ca, uint64_t bucket) ca->prio_last_buckets[bucket_nr] = bucket; bucket_nr++; - prio_io(ca, bucket, REQ_OP_READ, READ_SYNC); + prio_io(ca, bucket, REQ_OP_READ, 0); if (p->csum != bch_crc64(&p->magic, bucket_bytes(ca) - 8)) pr_warn("bad csum reading priorities"); diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 125aedc3875f..b3ba142e59a4 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1316,7 +1316,7 @@ int dm_bufio_issue_flush(struct dm_bufio_client *c) { struct dm_io_request io_req = { .bi_op = REQ_OP_WRITE, - .bi_op_flags = WRITE_FLUSH, + .bi_op_flags = REQ_PREFLUSH, .mem.type = DM_IO_KMEM, .mem.ptr.addr = NULL, .client = c->dm_io, diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 07fc1ad42ec5..33e71ea6cc14 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -308,7 +308,7 @@ static int flush_header(struct log_c *lc) }; lc->io_req.bi_op = REQ_OP_WRITE; - lc->io_req.bi_op_flags = WRITE_FLUSH; + lc->io_req.bi_op_flags = REQ_PREFLUSH; return dm_io(&lc->io_req, 1, &null_location, NULL); } diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index bdf1606f67bc..1a176d7c8b90 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -261,7 +261,7 @@ static int mirror_flush(struct dm_target *ti) struct mirror *m; struct dm_io_request io_req = { .bi_op = REQ_OP_WRITE, - .bi_op_flags = WRITE_FLUSH, + .bi_op_flags = REQ_PREFLUSH, .mem.type = DM_IO_KMEM, .mem.ptr.addr = NULL, .client = ms->io_client, @@ -657,7 +657,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio) struct mirror *m; struct dm_io_request io_req = { .bi_op = REQ_OP_WRITE, - .bi_op_flags = bio->bi_opf & WRITE_FLUSH_FUA, + .bi_op_flags = bio->bi_opf & (REQ_FUA | REQ_PREFLUSH), .mem.type = DM_IO_BIO, .mem.ptr.bio = bio, .notify.fn = write_callback, diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index b8cf956b577b..b93476c3ba3f 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -741,7 +741,7 @@ static void persistent_commit_exception(struct dm_exception_store *store, /* * Commit exceptions to disk. */ - if (ps->valid && area_io(ps, REQ_OP_WRITE, WRITE_FLUSH_FUA)) + if (ps->valid && area_io(ps, REQ_OP_WRITE, REQ_PREFLUSH | REQ_FUA)) ps->valid = 0; /* @@ -818,7 +818,7 @@ static int persistent_commit_merge(struct dm_exception_store *store, for (i = 0; i < nr_merged; i++) clear_exception(ps, ps->current_committed - 1 - i); - r = area_io(ps, REQ_OP_WRITE, WRITE_FLUSH_FUA); + r = area_io(ps, REQ_OP_WRITE, REQ_PREFLUSH | REQ_FUA); if (r < 0) return r; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 147af9536d0c..b2abfa41af3e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1527,7 +1527,7 @@ static struct mapped_device *alloc_dev(int minor) bio_init(&md->flush_bio); md->flush_bio.bi_bdev = md->bdev; - bio_set_op_attrs(&md->flush_bio, REQ_OP_WRITE, WRITE_FLUSH); + md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; dm_stats_init(&md->stats); diff --git a/drivers/md/md.c b/drivers/md/md.c index eac84d8ff724..b69ec7da4bae 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -394,7 +394,7 @@ static void submit_flushes(struct work_struct *ws) bi->bi_end_io = md_end_flush; bi->bi_private = rdev; bi->bi_bdev = rdev->bdev; - bio_set_op_attrs(bi, REQ_OP_WRITE, WRITE_FLUSH); + bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; atomic_inc(&mddev->flush_pending); submit_bio(bi); rcu_read_lock(); @@ -743,7 +743,7 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev, bio_add_page(bio, page, size, 0); bio->bi_private = rdev; bio->bi_end_io = super_written; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH_FUA); + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA; atomic_inc(&mddev->pending_writes); submit_bio(bio); diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 1b1ab4a1d132..28d015c6fffe 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -685,7 +685,7 @@ void r5l_flush_stripe_to_raid(struct r5l_log *log) bio_reset(&log->flush_bio); log->flush_bio.bi_bdev = log->rdev->bdev; log->flush_bio.bi_end_io = r5l_log_flush_endio; - bio_set_op_attrs(&log->flush_bio, REQ_OP_WRITE, WRITE_FLUSH); + log->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; submit_bio(&log->flush_bio); } @@ -1053,7 +1053,7 @@ static int r5l_log_write_empty_meta_block(struct r5l_log *log, sector_t pos, mb->checksum = cpu_to_le32(crc); if (!sync_page_io(log->rdev, pos, PAGE_SIZE, page, REQ_OP_WRITE, - WRITE_FUA, false)) { + REQ_FUA, false)) { __free_page(page); return -EIO; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 92ac251e91e6..70acdd379e44 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -913,7 +913,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) { op = REQ_OP_WRITE; if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags)) - op_flags = WRITE_FUA; + op_flags = REQ_FUA; if (test_bit(R5_Discard, &sh->dev[i].flags)) op = REQ_OP_DISCARD; } else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c index 4a96c2049b7b..c2784cfc5e29 100644 --- a/drivers/nvme/target/io-cmd.c +++ b/drivers/nvme/target/io-cmd.c @@ -58,7 +58,7 @@ static void nvmet_execute_rw(struct nvmet_req *req) if (req->cmd->rw.opcode == nvme_cmd_write) { op = REQ_OP_WRITE; - op_flags = WRITE_ODIRECT; + op_flags = REQ_SYNC | REQ_IDLE; if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) op_flags |= REQ_FUA; } else { @@ -109,7 +109,7 @@ static void nvmet_execute_flush(struct nvmet_req *req) bio->bi_bdev = req->ns->bdev; bio->bi_private = req; bio->bi_end_io = nvmet_bio_done; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; submit_bio(bio); } diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 372d744315f3..d316ed537d59 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -388,7 +388,7 @@ iblock_execute_sync_cache(struct se_cmd *cmd) bio = bio_alloc(GFP_KERNEL, 0); bio->bi_end_io = iblock_end_io_flush; bio->bi_bdev = ib_dev->ibd_bd; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; if (!immed) bio->bi_private = cmd; submit_bio(bio); @@ -686,15 +686,15 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, struct iblock_dev *ib_dev = IBLOCK_DEV(dev); struct request_queue *q = bdev_get_queue(ib_dev->ibd_bd); /* - * Force writethrough using WRITE_FUA if a volatile write cache + * Force writethrough using REQ_FUA if a volatile write cache * is not enabled, or if initiator set the Force Unit Access bit. */ op = REQ_OP_WRITE; if (test_bit(QUEUE_FLAG_FUA, &q->queue_flags)) { if (cmd->se_cmd_flags & SCF_FUA) - op_flags = WRITE_FUA; + op_flags = REQ_FUA; else if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) - op_flags = WRITE_FUA; + op_flags = REQ_FUA; } } else { op = REQ_OP_READ; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c8454a8e35f2..fe10afd51e02 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3485,9 +3485,9 @@ static int write_dev_supers(struct btrfs_device *device, * to go down lazy. */ if (i == 0) - ret = btrfsic_submit_bh(REQ_OP_WRITE, WRITE_FUA, bh); + ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_FUA, bh); else - ret = btrfsic_submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh); + ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); if (ret) errors++; } @@ -3551,7 +3551,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait) bio->bi_end_io = btrfs_end_empty_barrier; bio->bi_bdev = device->bdev; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; init_completion(&device->flush_wait); bio->bi_private = &device->flush_wait; device->flush_bio = bio; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 66a755150056..ff87bff7bdb6 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -127,7 +127,7 @@ struct extent_page_data { */ unsigned int extent_locked:1; - /* tells the submit_bio code to use a WRITE_SYNC */ + /* tells the submit_bio code to use REQ_SYNC */ unsigned int sync_io:1; }; @@ -2047,7 +2047,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, return -EIO; } bio->bi_bdev = dev->bdev; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC); + bio->bi_opf = REQ_OP_WRITE | REQ_SYNC; bio_add_page(bio, page, length, pg_offset); if (btrfsic_submit_bio_wait(bio)) { @@ -2388,7 +2388,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, struct inode *inode = page->mapping->host; struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; struct bio *bio; - int read_mode; + int read_mode = 0; int ret; BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); @@ -2404,9 +2404,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, } if (failed_bio->bi_vcnt > 1) - read_mode = READ_SYNC | REQ_FAILFAST_DEV; - else - read_mode = READ_SYNC; + read_mode |= REQ_FAILFAST_DEV; phy_offset >>= inode->i_sb->s_blocksize_bits; bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, @@ -3484,7 +3482,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, unsigned long nr_written = 0; if (wbc->sync_mode == WB_SYNC_ALL) - write_flags = WRITE_SYNC; + write_flags = REQ_SYNC; trace___extent_writepage(page, inode, wbc); @@ -3729,7 +3727,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, unsigned long i, num_pages; unsigned long bio_flags = 0; unsigned long start, end; - int write_flags = (epd->sync_io ? WRITE_SYNC : 0) | REQ_META; + int write_flags = (epd->sync_io ? REQ_SYNC : 0) | REQ_META; int ret = 0; clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags); @@ -4076,7 +4074,7 @@ static void flush_epd_write_bio(struct extent_page_data *epd) int ret; bio_set_op_attrs(epd->bio, REQ_OP_WRITE, - epd->sync_io ? WRITE_SYNC : 0); + epd->sync_io ? REQ_SYNC : 0); ret = submit_one_bio(epd->bio, 0, epd->bio_flags); BUG_ON(ret < 0); /* -ENOMEM */ diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9a377079af26..c8eb82a416b3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7917,7 +7917,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio, struct io_failure_record *failrec; struct bio *bio; int isector; - int read_mode; + int read_mode = 0; int ret; BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); @@ -7936,9 +7936,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio, if ((failed_bio->bi_vcnt > 1) || (failed_bio->bi_io_vec->bv_len > BTRFS_I(inode)->root->sectorsize)) - read_mode = READ_SYNC | REQ_FAILFAST_DEV; - else - read_mode = READ_SYNC; + read_mode |= REQ_FAILFAST_DEV; isector = start - btrfs_io_bio(failed_bio)->logical; isector >>= inode->i_sb->s_blocksize_bits; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index fffb9ab8526e..ff3078234d94 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -4440,7 +4440,7 @@ static int write_page_nocow(struct scrub_ctx *sctx, bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = physical_for_dev_replace >> 9; bio->bi_bdev = dev->bdev; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC); + bio->bi_opf = REQ_OP_WRITE | REQ_SYNC; ret = bio_add_page(bio, page, PAGE_SIZE, 0); if (ret != PAGE_SIZE) { leave_with_eio: diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index deda46cf1292..0d7d635d8bfb 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6023,7 +6023,7 @@ static void btrfs_end_bio(struct bio *bio) else btrfs_dev_stat_inc(dev, BTRFS_DEV_STAT_READ_ERRS); - if ((bio->bi_opf & WRITE_FLUSH) == WRITE_FLUSH) + if (bio->bi_opf & REQ_PREFLUSH) btrfs_dev_stat_inc(dev, BTRFS_DEV_STAT_FLUSH_ERRS); btrfs_dev_stat_print_on_error(dev); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 09ed29c67848..f137ffe6654c 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -62,7 +62,7 @@ struct btrfs_device { int running_pending; /* regular prio bios */ struct btrfs_pending_bios pending_bios; - /* WRITE_SYNC bios */ + /* sync bios */ struct btrfs_pending_bios pending_sync_bios; struct block_device *bdev; diff --git a/fs/buffer.c b/fs/buffer.c index a29335867e30..bc7c2bb30a9b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -753,7 +753,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) * still in flight on potentially older * contents. */ - write_dirty_buffer(bh, WRITE_SYNC); + write_dirty_buffer(bh, REQ_SYNC); /* * Kick off IO for the previous mapping. Note @@ -1684,7 +1684,7 @@ static struct buffer_head *create_page_buffers(struct page *page, struct inode * * prevents this contention from occurring. * * If block_write_full_page() is called with wbc->sync_mode == - * WB_SYNC_ALL, the writes are posted using WRITE_SYNC; this + * WB_SYNC_ALL, the writes are posted using REQ_SYNC; this * causes the writes to be flagged as synchronous writes. */ int __block_write_full_page(struct inode *inode, struct page *page, @@ -1697,7 +1697,7 @@ int __block_write_full_page(struct inode *inode, struct page *page, struct buffer_head *bh, *head; unsigned int blocksize, bbits; int nr_underway = 0; - int write_flags = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0); + int write_flags = (wbc->sync_mode == WB_SYNC_ALL ? REQ_SYNC : 0); head = create_page_buffers(page, inode, (1 << BH_Dirty)|(1 << BH_Uptodate)); @@ -3210,7 +3210,7 @@ EXPORT_SYMBOL(__sync_dirty_buffer); int sync_dirty_buffer(struct buffer_head *bh) { - return __sync_dirty_buffer(bh, WRITE_SYNC); + return __sync_dirty_buffer(bh, REQ_SYNC); } EXPORT_SYMBOL(sync_dirty_buffer); diff --git a/fs/direct-io.c b/fs/direct-io.c index fb9aa16a7727..a5138c564019 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1209,7 +1209,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, dio->inode = inode; if (iov_iter_rw(iter) == WRITE) { dio->op = REQ_OP_WRITE; - dio->op_flags = WRITE_ODIRECT; + dio->op_flags = REQ_SYNC | REQ_IDLE; } else { dio->op = REQ_OP_READ; } diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index d89754ef1aab..eb9835638680 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -35,7 +35,7 @@ static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) } /* - * Write the MMP block using WRITE_SYNC to try to get the block on-disk + * Write the MMP block using REQ_SYNC to try to get the block on-disk * faster. */ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) @@ -52,7 +52,7 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) lock_buffer(bh); bh->b_end_io = end_buffer_write_sync; get_bh(bh); - submit_bh(REQ_OP_WRITE, WRITE_SYNC | REQ_META | REQ_PRIO, bh); + submit_bh(REQ_OP_WRITE, REQ_SYNC | REQ_META | REQ_PRIO, bh); wait_on_buffer(bh); sb_end_write(sb); if (unlikely(!buffer_uptodate(bh))) @@ -88,7 +88,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, get_bh(*bh); lock_buffer(*bh); (*bh)->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ, READ_SYNC | REQ_META | REQ_PRIO, *bh); + submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, *bh); wait_on_buffer(*bh); if (!buffer_uptodate(*bh)) { ret = -EIO; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 0094923e5ebf..e0b3b54cdef3 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -340,7 +340,7 @@ void ext4_io_submit(struct ext4_io_submit *io) if (bio) { int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ? - WRITE_SYNC : 0; + REQ_SYNC : 0; bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags); submit_bio(io->io_bio); } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6db81fbcbaa6..f31eb286af90 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4553,7 +4553,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) unlock_buffer(sbh); if (sync) { error = __sync_dirty_buffer(sbh, - test_opt(sb, BARRIER) ? WRITE_FUA : WRITE_SYNC); + test_opt(sb, BARRIER) ? REQ_FUA : REQ_SYNC); if (error) return error; diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 7e9b504bd8b2..d935c06a84f0 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -65,7 +65,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, .sbi = sbi, .type = META, .op = REQ_OP_READ, - .op_flags = READ_SYNC | REQ_META | REQ_PRIO, + .op_flags = REQ_META | REQ_PRIO, .old_blkaddr = index, .new_blkaddr = index, .encrypted_page = NULL, @@ -160,7 +160,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, .sbi = sbi, .type = META, .op = REQ_OP_READ, - .op_flags = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : REQ_RAHEAD, + .op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD, .encrypted_page = NULL, }; struct blk_plug plug; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9ae194fd2fdb..b80bf10603d7 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -198,11 +198,9 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, if (type >= META_FLUSH) { io->fio.type = META_FLUSH; io->fio.op = REQ_OP_WRITE; - if (test_opt(sbi, NOBARRIER)) - io->fio.op_flags = WRITE_FLUSH | REQ_META | REQ_PRIO; - else - io->fio.op_flags = WRITE_FLUSH_FUA | REQ_META | - REQ_PRIO; + io->fio.op_flags = REQ_PREFLUSH | REQ_META | REQ_PRIO; + if (!test_opt(sbi, NOBARRIER)) + io->fio.op_flags |= REQ_FUA; } __submit_merged_bio(io); out: @@ -483,7 +481,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index) return page; f2fs_put_page(page, 0); - page = get_read_data_page(inode, index, READ_SYNC, false); + page = get_read_data_page(inode, index, 0, false); if (IS_ERR(page)) return page; @@ -509,7 +507,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index, struct address_space *mapping = inode->i_mapping; struct page *page; repeat: - page = get_read_data_page(inode, index, READ_SYNC, for_write); + page = get_read_data_page(inode, index, 0, for_write); if (IS_ERR(page)) return page; @@ -1251,7 +1249,7 @@ static int f2fs_write_data_page(struct page *page, .sbi = sbi, .type = DATA, .op = REQ_OP_WRITE, - .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0, + .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? REQ_SYNC : 0, .page = page, .encrypted_page = NULL, }; @@ -1663,7 +1661,7 @@ repeat: err = PTR_ERR(bio); goto fail; } - bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC); + bio->bi_opf = REQ_OP_READ; if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_put(bio); err = -EFAULT; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 93985c64d8a8..9eb11b2244ea 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -550,7 +550,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx) .sbi = F2FS_I_SB(inode), .type = DATA, .op = REQ_OP_READ, - .op_flags = READ_SYNC, + .op_flags = 0, .encrypted_page = NULL, }; struct dnode_of_data dn; @@ -625,7 +625,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx) f2fs_wait_on_page_writeback(dn.node_page, NODE, true); fio.op = REQ_OP_WRITE; - fio.op_flags = WRITE_SYNC; + fio.op_flags = REQ_SYNC; fio.new_blkaddr = newaddr; f2fs_submit_page_mbio(&fio); @@ -663,7 +663,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type) .sbi = F2FS_I_SB(inode), .type = DATA, .op = REQ_OP_WRITE, - .op_flags = WRITE_SYNC, + .op_flags = REQ_SYNC, .page = page, .encrypted_page = NULL, }; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 5f1a67f756af..2e7f54c191b4 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -111,7 +111,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) .sbi = F2FS_I_SB(dn->inode), .type = DATA, .op = REQ_OP_WRITE, - .op_flags = WRITE_SYNC | REQ_PRIO, + .op_flags = REQ_SYNC | REQ_PRIO, .page = page, .encrypted_page = NULL, }; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 01177ecdeab8..932f3f8bb57b 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1134,7 +1134,7 @@ repeat: if (!page) return ERR_PTR(-ENOMEM); - err = read_node_page(page, READ_SYNC); + err = read_node_page(page, 0); if (err < 0) { f2fs_put_page(page, 1); return ERR_PTR(err); @@ -1570,7 +1570,7 @@ static int f2fs_write_node_page(struct page *page, .sbi = sbi, .type = NODE, .op = REQ_OP_WRITE, - .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0, + .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? REQ_SYNC : 0, .page = page, .encrypted_page = NULL, }; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fc886f008449..f1b4a1775ebe 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -259,7 +259,7 @@ static int __commit_inmem_pages(struct inode *inode, .sbi = sbi, .type = DATA, .op = REQ_OP_WRITE, - .op_flags = WRITE_SYNC | REQ_PRIO, + .op_flags = REQ_SYNC | REQ_PRIO, .encrypted_page = NULL, }; bool submit_bio = false; @@ -420,7 +420,7 @@ repeat: fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); bio->bi_bdev = sbi->sb->s_bdev; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; ret = submit_bio_wait(bio); llist_for_each_entry_safe(cmd, next, @@ -454,7 +454,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) atomic_inc(&fcc->submit_flush); bio->bi_bdev = sbi->sb->s_bdev; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; ret = submit_bio_wait(bio); atomic_dec(&fcc->submit_flush); bio_put(bio); @@ -1515,7 +1515,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) .sbi = sbi, .type = META, .op = REQ_OP_WRITE, - .op_flags = WRITE_SYNC | REQ_META | REQ_PRIO, + .op_flags = REQ_SYNC | REQ_META | REQ_PRIO, .old_blkaddr = page->index, .new_blkaddr = page->index, .page = page, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6132b4ce4e4c..2cac6bb86080 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1238,7 +1238,7 @@ static int __f2fs_commit_super(struct buffer_head *bh, unlock_buffer(bh); /* it's rare case, we can do fua all the time */ - return __sync_dirty_buffer(bh, WRITE_FLUSH_FUA); + return __sync_dirty_buffer(bh, REQ_PREFLUSH | REQ_FUA); } static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index e58ccef09c91..27c00a16def0 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -657,7 +657,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags) struct gfs2_log_header *lh; unsigned int tail; u32 hash; - int op_flags = WRITE_FLUSH_FUA | REQ_META; + int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META; struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO); enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); lh = page_address(page); @@ -682,7 +682,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags) if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) { gfs2_ordered_wait(sdp); log_flush_wait(sdp); - op_flags = WRITE_SYNC | REQ_META | REQ_PRIO; + op_flags = REQ_SYNC | REQ_META | REQ_PRIO; } sdp->sd_log_idle = (tail == sdp->sd_log_flush_head); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 373639a59782..e562b1191c9c 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -38,7 +38,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb struct buffer_head *bh, *head; int nr_underway = 0; int write_flags = REQ_META | REQ_PRIO | - (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0); + (wbc->sync_mode == WB_SYNC_ALL ? REQ_SYNC : 0); BUG_ON(!PageLocked(page)); BUG_ON(!page_has_buffers(page)); @@ -285,7 +285,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, } } - gfs2_submit_bhs(REQ_OP_READ, READ_SYNC | REQ_META | REQ_PRIO, bhs, num); + gfs2_submit_bhs(REQ_OP_READ, REQ_META | REQ_PRIO, bhs, num); if (!(flags & DIO_WAIT)) return 0; @@ -453,7 +453,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) if (buffer_uptodate(first_bh)) goto out; if (!buffer_locked(first_bh)) - ll_rw_block(REQ_OP_READ, READ_SYNC | REQ_META, 1, &first_bh); + ll_rw_block(REQ_OP_READ, REQ_META, 1, &first_bh); dblock++; extlen--; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index ff72ac6439c8..a34308df927f 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -246,7 +246,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent) bio->bi_end_io = end_bio_io_page; bio->bi_private = page; - bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC | REQ_META); + bio_set_op_attrs(bio, REQ_OP_READ, REQ_META); submit_bio(bio); wait_on_page_locked(page); bio_put(bio); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 11854dd84572..67aedf4c2e7c 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -221,7 +221,7 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait) error2 = hfsplus_submit_bio(sb, sbi->part_start + HFSPLUS_VOLHEAD_SECTOR, sbi->s_vhdr_buf, NULL, REQ_OP_WRITE, - WRITE_SYNC); + REQ_SYNC); if (!error) error = error2; if (!write_backup) @@ -230,7 +230,7 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait) error2 = hfsplus_submit_bio(sb, sbi->part_start + sbi->sect_count - 2, sbi->s_backup_vhdr_buf, NULL, REQ_OP_WRITE, - WRITE_SYNC); + REQ_SYNC); if (!error) error2 = error; out: diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 684996c8a3a4..4055f51617ef 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -186,7 +186,7 @@ __flush_batch(journal_t *journal, int *batch_count) blk_start_plug(&plug); for (i = 0; i < *batch_count; i++) - write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE_SYNC); + write_dirty_buffer(journal->j_chkpt_bhs[i], REQ_SYNC); blk_finish_plug(&plug); for (i = 0; i < *batch_count; i++) { diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 31f8ca046639..8c514367ba5a 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -155,9 +155,10 @@ static int journal_submit_commit_record(journal_t *journal, if (journal->j_flags & JBD2_BARRIER && !jbd2_has_feature_async_commit(journal)) - ret = submit_bh(REQ_OP_WRITE, WRITE_SYNC | WRITE_FLUSH_FUA, bh); + ret = submit_bh(REQ_OP_WRITE, + REQ_SYNC | REQ_PREFLUSH | REQ_FUA, bh); else - ret = submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh); + ret = submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); *cbh = bh; return ret; @@ -402,7 +403,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) jbd2_journal_update_sb_log_tail(journal, journal->j_tail_sequence, journal->j_tail, - WRITE_SYNC); + REQ_SYNC); mutex_unlock(&journal->j_checkpoint_mutex); } else { jbd_debug(3, "superblock not updated\n"); @@ -717,7 +718,7 @@ start_journal_io: clear_buffer_dirty(bh); set_buffer_uptodate(bh); bh->b_end_io = journal_end_buffer_io_sync; - submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh); + submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); } cond_resched(); stats.run.rs_blocks_logged += bufs; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 927da4956a89..8ed971eeab44 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -913,7 +913,7 @@ int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) * space and if we lose sb update during power failure we'd replay * old transaction with possibly newly overwritten data. */ - ret = jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA); + ret = jbd2_journal_update_sb_log_tail(journal, tid, block, REQ_FUA); if (ret) goto out; @@ -1306,7 +1306,7 @@ static int journal_reset(journal_t *journal) /* Lock here to make assertions happy... */ mutex_lock(&journal->j_checkpoint_mutex); /* - * Update log tail information. We use WRITE_FUA since new + * Update log tail information. We use REQ_FUA since new * transaction will start reusing journal space and so we * must make sure information about current log tail is on * disk before that. @@ -1314,7 +1314,7 @@ static int journal_reset(journal_t *journal) jbd2_journal_update_sb_log_tail(journal, journal->j_tail_sequence, journal->j_tail, - WRITE_FUA); + REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); } return jbd2_journal_start_thread(journal); @@ -1454,7 +1454,7 @@ void jbd2_journal_update_sb_errno(journal_t *journal) sb->s_errno = cpu_to_be32(journal->j_errno); read_unlock(&journal->j_state_lock); - jbd2_write_superblock(journal, WRITE_FUA); + jbd2_write_superblock(journal, REQ_FUA); } EXPORT_SYMBOL(jbd2_journal_update_sb_errno); @@ -1720,7 +1720,8 @@ int jbd2_journal_destroy(journal_t *journal) ++journal->j_transaction_sequence; write_unlock(&journal->j_state_lock); - jbd2_mark_journal_empty(journal, WRITE_FLUSH_FUA); + jbd2_mark_journal_empty(journal, + REQ_PREFLUSH | REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); } else err = -EIO; @@ -1979,7 +1980,7 @@ int jbd2_journal_flush(journal_t *journal) * the magic code for a fully-recovered superblock. Any future * commits of data to the journal will restore the current * s_start value. */ - jbd2_mark_journal_empty(journal, WRITE_FUA); + jbd2_mark_journal_empty(journal, REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); write_lock(&journal->j_state_lock); J_ASSERT(!journal->j_running_transaction); @@ -2025,7 +2026,7 @@ int jbd2_journal_wipe(journal_t *journal, int write) if (write) { /* Lock to make assertions happy... */ mutex_lock(&journal->j_checkpoint_mutex); - jbd2_mark_journal_empty(journal, WRITE_FUA); + jbd2_mark_journal_empty(journal, REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); } diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 91171dc352cb..cfc38b552118 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -648,7 +648,7 @@ static void flush_descriptor(journal_t *journal, set_buffer_jwrite(descriptor); BUFFER_TRACE(descriptor, "write"); set_buffer_dirty(descriptor); - write_dirty_buffer(descriptor, WRITE_SYNC); + write_dirty_buffer(descriptor, REQ_SYNC); } #endif diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index a21ea8b3e5fa..bb1da1feafeb 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -2002,7 +2002,7 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp) bio->bi_end_io = lbmIODone; bio->bi_private = bp; - bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC); + bio->bi_opf = REQ_OP_READ; /*check if journaling to disk has been disabled*/ if (log->no_integrity) { bio->bi_iter.bi_size = 0; @@ -2146,7 +2146,7 @@ static void lbmStartIO(struct lbuf * bp) bio->bi_end_io = lbmIODone; bio->bi_private = bp; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC); + bio->bi_opf = REQ_OP_WRITE | REQ_SYNC; /* check if journaling to disk has been disabled */ if (log->no_integrity) { diff --git a/fs/mpage.c b/fs/mpage.c index d2413af0823a..f35e2819d0c6 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -489,7 +489,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc, struct buffer_head map_bh; loff_t i_size = i_size_read(inode); int ret = 0; - int op_flags = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0); + int op_flags = (wbc->sync_mode == WB_SYNC_ALL ? REQ_SYNC : 0); if (page_has_buffers(page)) { struct buffer_head *head = page_buffers(page); @@ -705,7 +705,7 @@ mpage_writepages(struct address_space *mapping, ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd); if (mpd.bio) { int op_flags = (wbc->sync_mode == WB_SYNC_ALL ? - WRITE_SYNC : 0); + REQ_SYNC : 0); mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio); } } @@ -726,7 +726,7 @@ int mpage_writepage(struct page *page, get_block_t get_block, int ret = __mpage_writepage(page, wbc, &mpd); if (mpd.bio) { int op_flags = (wbc->sync_mode == WB_SYNC_ALL ? - WRITE_SYNC : 0); + REQ_SYNC : 0); mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio); } return ret; diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index c95d369e90aa..12eeae62a2b1 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -189,7 +189,7 @@ static int nilfs_sync_super(struct super_block *sb, int flag) set_buffer_dirty(nilfs->ns_sbh[0]); if (nilfs_test_opt(nilfs, BARRIER)) { err = __sync_dirty_buffer(nilfs->ns_sbh[0], - WRITE_SYNC | WRITE_FLUSH_FUA); + REQ_SYNC | REQ_PREFLUSH | REQ_FUA); } else { err = sync_dirty_buffer(nilfs->ns_sbh[0]); } diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 636abcbd4650..52eef16edb01 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -627,7 +627,7 @@ static int o2hb_issue_node_write(struct o2hb_region *reg, slot = o2nm_this_node(); bio = o2hb_setup_one_bio(reg, write_wc, &slot, slot+1, REQ_OP_WRITE, - WRITE_SYNC); + REQ_SYNC); if (IS_ERR(bio)) { status = PTR_ERR(bio); mlog_errno(status); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index bc2dde2423c2..aa40c242f1db 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -1111,7 +1111,8 @@ static int flush_commit_list(struct super_block *s, mark_buffer_dirty(jl->j_commit_bh) ; depth = reiserfs_write_unlock_nested(s); if (reiserfs_barrier_flush(s)) - __sync_dirty_buffer(jl->j_commit_bh, WRITE_FLUSH_FUA); + __sync_dirty_buffer(jl->j_commit_bh, + REQ_PREFLUSH | REQ_FUA); else sync_dirty_buffer(jl->j_commit_bh); reiserfs_write_lock_nested(s, depth); @@ -1269,7 +1270,8 @@ static int _update_journal_header_block(struct super_block *sb, depth = reiserfs_write_unlock_nested(sb); if (reiserfs_barrier_flush(sb)) - __sync_dirty_buffer(journal->j_header_bh, WRITE_FLUSH_FUA); + __sync_dirty_buffer(journal->j_header_bh, + REQ_PREFLUSH | REQ_FUA); else sync_dirty_buffer(journal->j_header_bh); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 3e57a56cf829..594e02c485b2 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -495,8 +495,10 @@ xfs_submit_ioend( ioend->io_bio->bi_private = ioend; ioend->io_bio->bi_end_io = xfs_end_bio; - bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE, - (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0); + ioend->io_bio->bi_opf = REQ_OP_WRITE; + if (wbc->sync_mode == WB_SYNC_ALL) + ioend->io_bio->bi_opf |= REQ_SYNC; + /* * If we are failing the IO now, just mark the ioend with an * error and finish it. This will run IO completion immediately @@ -567,8 +569,9 @@ xfs_chain_bio( bio_chain(ioend->io_bio, new); bio_get(ioend->io_bio); /* for xfs_destroy_ioend */ - bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE, - (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0); + ioend->io_bio->bi_opf = REQ_OP_WRITE; + if (wbc->sync_mode == WB_SYNC_ALL) + ioend->io_bio->bi_opf |= REQ_SYNC; submit_bio(ioend->io_bio); ioend->io_bio = new; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index b5b9bffe3520..33c435f3316c 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1304,7 +1304,7 @@ _xfs_buf_ioapply( if (bp->b_flags & XBF_WRITE) { op = REQ_OP_WRITE; if (bp->b_flags & XBF_SYNCIO) - op_flags = WRITE_SYNC; + op_flags = REQ_SYNC; if (bp->b_flags & XBF_FUA) op_flags |= REQ_FUA; if (bp->b_flags & XBF_FLUSH) diff --git a/include/linux/fs.h b/include/linux/fs.h index 46a74209917f..7a1b78ab7c15 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -151,58 +151,11 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, */ #define CHECK_IOVEC_ONLY -1 -/* - * The below are the various read and write flags that we support. Some of - * them include behavioral modifiers that send information down to the - * block layer and IO scheduler. They should be used along with a req_op. - * Terminology: - * - * The block layer uses device plugging to defer IO a little bit, in - * the hope that we will see more IO very shortly. This increases - * coalescing of adjacent IO and thus reduces the number of IOs we - * have to send to the device. It also allows for better queuing, - * if the IO isn't mergeable. If the caller is going to be waiting - * for the IO, then he must ensure that the device is unplugged so - * that the IO is dispatched to the driver. - * - * All IO is handled async in Linux. This is fine for background - * writes, but for reads or writes that someone waits for completion - * on, we want to notify the block layer and IO scheduler so that they - * know about it. That allows them to make better scheduling - * decisions. So when the below references 'sync' and 'async', it - * is referencing this priority hint. - * - * With that in mind, the available types are: - * - * READ A normal read operation. Device will be plugged. - * READ_SYNC A synchronous read. Device is not plugged, caller can - * immediately wait on this read without caring about - * unplugging. - * WRITE A normal async write. Device will be plugged. - * WRITE_SYNC Synchronous write. Identical to WRITE, but passes down - * the hint that someone will be waiting on this IO - * shortly. The write equivalent of READ_SYNC. - * WRITE_ODIRECT Special case write for O_DIRECT only. - * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush. - * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on - * non-volatile media on completion. - * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded - * by a cache flush and data is guaranteed to be on - * non-volatile media on completion. - * - */ #define RW_MASK REQ_OP_WRITE #define READ REQ_OP_READ #define WRITE REQ_OP_WRITE -#define READ_SYNC 0 -#define WRITE_SYNC REQ_SYNC -#define WRITE_ODIRECT (REQ_SYNC | REQ_IDLE) -#define WRITE_FLUSH REQ_PREFLUSH -#define WRITE_FUA REQ_FUA -#define WRITE_FLUSH_FUA (REQ_PREFLUSH | REQ_FUA) - /* * Attribute flags. These should be or-ed together to figure out what * has been changed! diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index a9d34424450d..5da2c829a718 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -55,7 +55,7 @@ TRACE_DEFINE_ENUM(CP_DISCARD); { IPU, "IN-PLACE" }, \ { OPU, "OUT-OF-PLACE" }) -#define F2FS_BIO_FLAG_MASK(t) (t & (REQ_RAHEAD | WRITE_FLUSH_FUA)) +#define F2FS_BIO_FLAG_MASK(t) (t & (REQ_RAHEAD | REQ_PREFLUSH | REQ_FUA)) #define F2FS_BIO_EXTRA_MASK(t) (t & (REQ_META | REQ_PRIO)) #define show_bio_type(op_flags) show_bio_op_flags(op_flags), \ @@ -65,11 +65,9 @@ TRACE_DEFINE_ENUM(CP_DISCARD); __print_symbolic(F2FS_BIO_FLAG_MASK(flags), \ { 0, "WRITE" }, \ { REQ_RAHEAD, "READAHEAD" }, \ - { READ_SYNC, "READ_SYNC" }, \ - { WRITE_SYNC, "WRITE_SYNC" }, \ - { WRITE_FLUSH, "WRITE_FLUSH" }, \ - { WRITE_FUA, "WRITE_FUA" }, \ - { WRITE_FLUSH_FUA, "WRITE_FLUSH_FUA" }) + { REQ_SYNC, "REQ_SYNC" }, \ + { REQ_PREFLUSH, "REQ_PREFLUSH" }, \ + { REQ_FUA, "REQ_FUA" }) #define show_bio_extra(type) \ __print_symbolic(F2FS_BIO_EXTRA_MASK(type), \ diff --git a/kernel/power/swap.c b/kernel/power/swap.c index a3b1e617bcdc..32e0c232efba 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -307,7 +307,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) { int error; - hib_submit_io(REQ_OP_READ, READ_SYNC, swsusp_resume_block, + hib_submit_io(REQ_OP_READ, 0, swsusp_resume_block, swsusp_header, NULL); if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) || !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) { @@ -317,7 +317,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) swsusp_header->flags = flags; if (flags & SF_CRC32_MODE) swsusp_header->crc32 = handle->crc32; - error = hib_submit_io(REQ_OP_WRITE, WRITE_SYNC, + error = hib_submit_io(REQ_OP_WRITE, REQ_SYNC, swsusp_resume_block, swsusp_header, NULL); } else { printk(KERN_ERR "PM: Swap header not found!\n"); @@ -397,7 +397,7 @@ static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb) } else { src = buf; } - return hib_submit_io(REQ_OP_WRITE, WRITE_SYNC, offset, src, hb); + return hib_submit_io(REQ_OP_WRITE, REQ_SYNC, offset, src, hb); } static void release_swap_writer(struct swap_map_handle *handle) @@ -1000,8 +1000,7 @@ static int get_swap_reader(struct swap_map_handle *handle, return -ENOMEM; } - error = hib_submit_io(REQ_OP_READ, READ_SYNC, offset, - tmp->map, NULL); + error = hib_submit_io(REQ_OP_READ, 0, offset, tmp->map, NULL); if (error) { release_swap_reader(handle); return error; @@ -1025,7 +1024,7 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf, offset = handle->cur->entries[handle->k]; if (!offset) return -EFAULT; - error = hib_submit_io(REQ_OP_READ, READ_SYNC, offset, buf, hb); + error = hib_submit_io(REQ_OP_READ, 0, offset, buf, hb); if (error) return error; if (++handle->k >= MAP_PAGE_ENTRIES) { @@ -1534,7 +1533,7 @@ int swsusp_check(void) if (!IS_ERR(hib_resume_bdev)) { set_blocksize(hib_resume_bdev, PAGE_SIZE); clear_page(swsusp_header); - error = hib_submit_io(REQ_OP_READ, READ_SYNC, + error = hib_submit_io(REQ_OP_READ, 0, swsusp_resume_block, swsusp_header, NULL); if (error) @@ -1543,7 +1542,7 @@ int swsusp_check(void) if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) { memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10); /* Reset swap signature now */ - error = hib_submit_io(REQ_OP_WRITE, WRITE_SYNC, + error = hib_submit_io(REQ_OP_WRITE, REQ_SYNC, swsusp_resume_block, swsusp_header, NULL); } else { @@ -1588,11 +1587,11 @@ int swsusp_unmark(void) { int error; - hib_submit_io(REQ_OP_READ, READ_SYNC, swsusp_resume_block, + hib_submit_io(REQ_OP_READ, 0, swsusp_resume_block, swsusp_header, NULL); if (!memcmp(HIBERNATE_SIG,swsusp_header->sig, 10)) { memcpy(swsusp_header->sig,swsusp_header->orig_sig, 10); - error = hib_submit_io(REQ_OP_WRITE, WRITE_SYNC, + error = hib_submit_io(REQ_OP_WRITE, REQ_SYNC, swsusp_resume_block, swsusp_header, NULL); } else { -- cgit v1.2.3 From 2f8b544477e627a42e66902e948d87f86554aeca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Nov 2016 07:40:13 -0600 Subject: block,fs: untangle fs.h and blk_types.h Nothing in fs.h should require blk_types.h to be included. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/9p/vfs_addr.c | 1 + fs/cifs/connect.c | 1 + fs/cifs/transport.c | 1 + fs/gfs2/dir.c | 1 + fs/isofs/compress.c | 1 + fs/ntfs/logfile.c | 1 + fs/ocfs2/buffer_head_io.c | 1 + fs/orangefs/inode.c | 1 + fs/reiserfs/stree.c | 1 + fs/squashfs/block.c | 1 + fs/udf/dir.c | 1 + fs/udf/directory.c | 1 + fs/udf/inode.c | 1 + fs/ufs/balloc.c | 1 + include/linux/fs.h | 2 +- include/linux/swap.h | 1 + include/linux/writeback.h | 2 ++ lib/iov_iter.c | 1 + 18 files changed, 19 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 6181ad79e1a5..5ca1fb0043f6 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index aab5227979e2..db726e8311ca 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "cifspdu.h" #include "cifsglob.h" diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 206a597b2293..5f02edc819af 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 3cdde5f5d399..79113219be5f 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -62,6 +62,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c index 44af14b2e916..9bb2fe35799d 100644 --- a/fs/isofs/compress.c +++ b/fs/isofs/compress.c @@ -18,6 +18,7 @@ #include #include +#include #include #include diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c index 761f12f7f3ef..353379ff6057 100644 --- a/fs/ntfs/logfile.c +++ b/fs/ntfs/logfile.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "attrib.h" #include "aops.h" diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 8f040f88ade4..d9ebe11c8990 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -26,6 +26,7 @@ #include #include #include +#include #include diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index ef3b4eb54cf2..551bc74ed2b8 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -8,6 +8,7 @@ * Linux VFS inode operations. */ +#include #include "protocol.h" #include "orangefs-kernel.h" #include "orangefs-bufmap.h" diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index a97e352d05d3..0037aea97d39 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "reiserfs.h" #include #include diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index ce62a380314f..2751476e6b6e 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "squashfs_fs.h" #include "squashfs_fs_sb.h" diff --git a/fs/udf/dir.c b/fs/udf/dir.c index aaec13c95253..2d0e028067eb 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "udf_i.h" #include "udf_sb.h" diff --git a/fs/udf/directory.c b/fs/udf/directory.c index 988d5352bdb8..7aa48bd7cbaf 100644 --- a/fs/udf/directory.c +++ b/fs/udf/directory.c @@ -16,6 +16,7 @@ #include #include +#include struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos, struct udf_fileident_bh *fibh, diff --git a/fs/udf/inode.c b/fs/udf/inode.c index aad46401ede5..0f3db71753aa 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "udf_i.h" #include "udf_sb.h" diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 67e085d591d8..b035af54f538 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "ufs_fs.h" diff --git a/include/linux/fs.h b/include/linux/fs.h index 5b0a9b77534d..8533e9d59c29 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include @@ -38,6 +37,7 @@ struct backing_dev_info; struct bdi_writeback; +struct bio; struct export_operations; struct hd_geometry; struct iovec; diff --git a/include/linux/swap.h b/include/linux/swap.h index a56523cefb9b..3a6aebc23001 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -11,6 +11,7 @@ #include #include #include +#include #include struct notifier_block; diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 797100e10010..e4c38703bf4e 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -10,6 +10,8 @@ #include #include +struct bio; + DECLARE_PER_CPU(int, dirty_throttle_leaks); /* diff --git a/lib/iov_iter.c b/lib/iov_iter.c index f0c7f1481bae..efc953c47572 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1,4 +1,5 @@ #include +#include #include #include #include -- cgit v1.2.3 From be297968da22cf40c9c419df51e71ba8856a2ec2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Nov 2016 07:40:16 -0600 Subject: mm: only include blk_types in swap.h if CONFIG_SWAP is enabled It's only needed for the CONFIG_SWAP-only use of bio_end_io_t. Because CONFIG_SWAP implies CONFIG_BLOCK this will allow to drop some ifdefs in blk_types.h. Instead we'll need to add a few explicit includes that were implicit before, though. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/staging/lustre/include/linux/lnet/types.h | 1 + drivers/staging/lustre/lustre/llite/rw.c | 1 + fs/ntfs/aops.c | 1 + fs/ntfs/mft.c | 1 + fs/reiserfs/inode.c | 1 + fs/splice.c | 1 + include/linux/swap.h | 4 +++- 7 files changed, 9 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/drivers/staging/lustre/include/linux/lnet/types.h b/drivers/staging/lustre/include/linux/lnet/types.h index f8be0e2f7bf7..8ca1e9d0cfe2 100644 --- a/drivers/staging/lustre/include/linux/lnet/types.h +++ b/drivers/staging/lustre/include/linux/lnet/types.h @@ -34,6 +34,7 @@ #define __LNET_TYPES_H__ #include +#include /** \addtogroup lnet * @{ diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c index 50c0152ba022..76a6836cdf70 100644 --- a/drivers/staging/lustre/lustre/llite/rw.c +++ b/drivers/staging/lustre/lustre/llite/rw.c @@ -47,6 +47,7 @@ #include /* current_is_kswapd() */ #include +#include #define DEBUG_SUBSYSTEM S_LLITE diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index fe251f187ff8..d0cf6fee5c77 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "aops.h" #include "attrib.h" diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index d3c009626032..b6f402194f02 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "attrib.h" #include "aops.h" diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 58b2dedb2a3a..cfeae9b0a2b7 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -19,6 +19,7 @@ #include #include #include +#include int reiserfs_commit_write(struct file *f, struct page *page, unsigned from, unsigned to); diff --git a/fs/splice.c b/fs/splice.c index 153d4f3bd441..51492f26915a 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -17,6 +17,7 @@ * Copyright (C) 2006 Ingo Molnar * */ +#include #include #include #include diff --git a/include/linux/swap.h b/include/linux/swap.h index 3a6aebc23001..bfee1af1f54f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -11,7 +11,6 @@ #include #include #include -#include #include struct notifier_block; @@ -352,6 +351,9 @@ extern int kswapd_run(int nid); extern void kswapd_stop(int nid); #ifdef CONFIG_SWAP + +#include /* for bio_end_io_t */ + /* linux/mm/page_io.c */ extern int swap_readpage(struct page *); extern int swap_writepage(struct page *page, struct writeback_control *wbc); -- cgit v1.2.3 From 7637241e651ec36e409412869f986dd5f097735f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 1 Nov 2016 10:00:38 -0600 Subject: writeback: add wbc_to_write_flags() Add wbc_to_write_flags(), which returns the write modifier flags to use, based on a struct writeback_control. No functional changes in this patch, but it prepares us for factoring other wbc fields for write type. Signed-off-by: Jens Axboe Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig --- fs/buffer.c | 2 +- fs/f2fs/data.c | 2 +- fs/f2fs/node.c | 2 +- fs/gfs2/meta_io.c | 3 +-- fs/mpage.c | 2 +- fs/xfs/xfs_aops.c | 8 ++------ include/linux/writeback.h | 9 +++++++++ mm/page_io.c | 5 +---- 8 files changed, 17 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/buffer.c b/fs/buffer.c index bc7c2bb30a9b..af5776da814a 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1697,7 +1697,7 @@ int __block_write_full_page(struct inode *inode, struct page *page, struct buffer_head *bh, *head; unsigned int blocksize, bbits; int nr_underway = 0; - int write_flags = (wbc->sync_mode == WB_SYNC_ALL ? REQ_SYNC : 0); + int write_flags = wbc_to_write_flags(wbc); head = create_page_buffers(page, inode, (1 << BH_Dirty)|(1 << BH_Uptodate)); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b80bf10603d7..9e5561fa4cb6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1249,7 +1249,7 @@ static int f2fs_write_data_page(struct page *page, .sbi = sbi, .type = DATA, .op = REQ_OP_WRITE, - .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? REQ_SYNC : 0, + .op_flags = wbc_to_write_flags(wbc), .page = page, .encrypted_page = NULL, }; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 932f3f8bb57b..d1e29deb4598 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1570,7 +1570,7 @@ static int f2fs_write_node_page(struct page *page, .sbi = sbi, .type = NODE, .op = REQ_OP_WRITE, - .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? REQ_SYNC : 0, + .op_flags = wbc_to_write_flags(wbc), .page = page, .encrypted_page = NULL, }; diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index e562b1191c9c..49db8ef13fdf 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -37,8 +37,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb { struct buffer_head *bh, *head; int nr_underway = 0; - int write_flags = REQ_META | REQ_PRIO | - (wbc->sync_mode == WB_SYNC_ALL ? REQ_SYNC : 0); + int write_flags = REQ_META | REQ_PRIO | wbc_to_write_flags(wbc); BUG_ON(!PageLocked(page)); BUG_ON(!page_has_buffers(page)); diff --git a/fs/mpage.c b/fs/mpage.c index f35e2819d0c6..98fc11aa7e0b 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -489,7 +489,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc, struct buffer_head map_bh; loff_t i_size = i_size_read(inode); int ret = 0; - int op_flags = (wbc->sync_mode == WB_SYNC_ALL ? REQ_SYNC : 0); + int op_flags = wbc_to_write_flags(wbc); if (page_has_buffers(page)) { struct buffer_head *head = page_buffers(page); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 594e02c485b2..6be5204a06d3 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -495,9 +495,7 @@ xfs_submit_ioend( ioend->io_bio->bi_private = ioend; ioend->io_bio->bi_end_io = xfs_end_bio; - ioend->io_bio->bi_opf = REQ_OP_WRITE; - if (wbc->sync_mode == WB_SYNC_ALL) - ioend->io_bio->bi_opf |= REQ_SYNC; + ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); /* * If we are failing the IO now, just mark the ioend with an @@ -569,9 +567,7 @@ xfs_chain_bio( bio_chain(ioend->io_bio, new); bio_get(ioend->io_bio); /* for xfs_destroy_ioend */ - ioend->io_bio->bi_opf = REQ_OP_WRITE; - if (wbc->sync_mode == WB_SYNC_ALL) - ioend->io_bio->bi_opf |= REQ_SYNC; + ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); submit_bio(ioend->io_bio); ioend->io_bio = new; } diff --git a/include/linux/writeback.h b/include/linux/writeback.h index e4c38703bf4e..50c96ee8108f 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -9,6 +9,7 @@ #include #include #include +#include struct bio; @@ -102,6 +103,14 @@ struct writeback_control { #endif }; +static inline int wbc_to_write_flags(struct writeback_control *wbc) +{ + if (wbc->sync_mode == WB_SYNC_ALL) + return REQ_SYNC; + + return 0; +} + /* * A wb_domain represents a domain that wb's (bdi_writeback's) belong to * and are measured against each other in. There always is one global diff --git a/mm/page_io.c b/mm/page_io.c index a2651f58c86a..23f6d0d3470f 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -320,10 +320,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, ret = -ENOMEM; goto out; } - if (wbc->sync_mode == WB_SYNC_ALL) - bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC); - else - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); count_vm_event(PSWPOUT); set_page_writeback(page); unlock_page(page); -- cgit v1.2.3 From bbd7bb7017d5c2b1e75f3818b4ce88fa58bb0eab Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 4 Nov 2016 09:34:34 -0600 Subject: block: move poll code to blk-mq The poll code is blk-mq specific, let's move it to blk-mq.c. This is a prep patch for improving the polling code. Signed-off-by: Jens Axboe Reviewed-by: Christoph Hellwig --- block/blk-core.c | 46 ------------------------------------- block/blk-mq.c | 54 ++++++++++++++++++++++++++++++++++++++++++++ drivers/nvme/target/io-cmd.c | 2 +- fs/direct-io.c | 2 +- include/linux/blkdev.h | 2 +- 5 files changed, 57 insertions(+), 49 deletions(-) (limited to 'fs') diff --git a/block/blk-core.c b/block/blk-core.c index 59f8129a4295..eea246567884 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -3312,52 +3312,6 @@ void blk_finish_plug(struct blk_plug *plug) } EXPORT_SYMBOL(blk_finish_plug); -bool blk_poll(struct request_queue *q, blk_qc_t cookie) -{ - struct blk_plug *plug; - long state; - unsigned int queue_num; - struct blk_mq_hw_ctx *hctx; - - if (!q->mq_ops || !q->mq_ops->poll || !blk_qc_t_valid(cookie) || - !test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) - return false; - - queue_num = blk_qc_t_to_queue_num(cookie); - hctx = q->queue_hw_ctx[queue_num]; - hctx->poll_considered++; - - plug = current->plug; - if (plug) - blk_flush_plug_list(plug, false); - - state = current->state; - while (!need_resched()) { - int ret; - - hctx->poll_invoked++; - - ret = q->mq_ops->poll(hctx, blk_qc_t_to_tag(cookie)); - if (ret > 0) { - hctx->poll_success++; - set_current_state(TASK_RUNNING); - return true; - } - - if (signal_pending_state(state, current)) - set_current_state(TASK_RUNNING); - - if (current->state == TASK_RUNNING) - return true; - if (ret < 0) - break; - cpu_relax(); - } - - return false; -} -EXPORT_SYMBOL_GPL(blk_poll); - #ifdef CONFIG_PM /** * blk_pm_runtime_init - Block layer runtime PM initialization routine diff --git a/block/blk-mq.c b/block/blk-mq.c index 77110aed24ea..ae8df5ec20d3 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2461,6 +2461,60 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) } EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues); +static bool __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq) +{ + struct request_queue *q = hctx->queue; + long state; + + hctx->poll_considered++; + + state = current->state; + while (!need_resched()) { + int ret; + + hctx->poll_invoked++; + + ret = q->mq_ops->poll(hctx, rq->tag); + if (ret > 0) { + hctx->poll_success++; + set_current_state(TASK_RUNNING); + return true; + } + + if (signal_pending_state(state, current)) + set_current_state(TASK_RUNNING); + + if (current->state == TASK_RUNNING) + return true; + if (ret < 0) + break; + cpu_relax(); + } + + return false; +} + +bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie) +{ + struct blk_mq_hw_ctx *hctx; + struct blk_plug *plug; + struct request *rq; + + if (!q->mq_ops || !q->mq_ops->poll || !blk_qc_t_valid(cookie) || + !test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + return false; + + plug = current->plug; + if (plug) + blk_flush_plug_list(plug, false); + + hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)]; + rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie)); + + return __blk_mq_poll(hctx, rq); +} +EXPORT_SYMBOL_GPL(blk_mq_poll); + void blk_mq_disable_hotplug(void) { mutex_lock(&all_q_mutex); diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c index c2784cfc5e29..ef52b1e70144 100644 --- a/drivers/nvme/target/io-cmd.c +++ b/drivers/nvme/target/io-cmd.c @@ -96,7 +96,7 @@ static void nvmet_execute_rw(struct nvmet_req *req) cookie = submit_bio(bio); - blk_poll(bdev_get_queue(req->ns->bdev), cookie); + blk_mq_poll(bdev_get_queue(req->ns->bdev), cookie); } static void nvmet_execute_flush(struct nvmet_req *req) diff --git a/fs/direct-io.c b/fs/direct-io.c index a5138c564019..835e23a4ee4b 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -457,7 +457,7 @@ static struct bio *dio_await_one(struct dio *dio) dio->waiter = current; spin_unlock_irqrestore(&dio->bio_lock, flags); if (!(dio->iocb->ki_flags & IOCB_HIPRI) || - !blk_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie)) + !blk_mq_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie)) io_schedule(); /* wake up sets us TASK_RUNNING */ spin_lock_irqsave(&dio->bio_lock, flags); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 15da9e430f90..bab18ee5810d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -952,7 +952,7 @@ extern int blk_execute_rq(struct request_queue *, struct gendisk *, extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, struct request *, int, rq_end_io_fn *); -bool blk_poll(struct request_queue *q, blk_qc_t cookie); +bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie); static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { -- cgit v1.2.3 From 189ce2b9dcc3494410a576fbecbedbb6b21e51e0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 31 Oct 2016 11:59:25 -0600 Subject: block: fast-path for small and simple direct I/O requests This patch adds a small and simple fast patch for small direct I/O requests on block devices that don't use AIO. Between the neat bio_iov_iter_get_pages helper that avoids allocating a page array for get_user_pages and the on-stack bio and biovec this avoid memory allocations and atomic operations entirely in the direct I/O code (lower levels might still do memory allocations and will usually have at least some atomic operations, though). Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe Tested-By: Stephen Bates Reviewed-By: Stephen Bates --- fs/block_dev.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index 05b553368bb4..7c3ec6049073 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include "internal.h" @@ -175,12 +176,91 @@ static struct inode *bdev_file_inode(struct file *file) return file->f_mapping->host; } +#define DIO_INLINE_BIO_VECS 4 + +static void blkdev_bio_end_io_simple(struct bio *bio) +{ + struct task_struct *waiter = bio->bi_private; + + WRITE_ONCE(bio->bi_private, NULL); + wake_up_process(waiter); +} + +static ssize_t +__blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, + int nr_pages) +{ + struct file *file = iocb->ki_filp; + struct block_device *bdev = I_BDEV(bdev_file_inode(file)); + unsigned blkbits = blksize_bits(bdev_logical_block_size(bdev)); + struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *bvec; + loff_t pos = iocb->ki_pos; + bool should_dirty = false; + struct bio bio; + ssize_t ret; + blk_qc_t qc; + int i; + + if ((pos | iov_iter_alignment(iter)) & ((1 << blkbits) - 1)) + return -EINVAL; + + bio_init(&bio); + bio.bi_max_vecs = nr_pages; + bio.bi_io_vec = inline_vecs; + bio.bi_bdev = bdev; + bio.bi_iter.bi_sector = pos >> blkbits; + bio.bi_private = current; + bio.bi_end_io = blkdev_bio_end_io_simple; + + ret = bio_iov_iter_get_pages(&bio, iter); + if (unlikely(ret)) + return ret; + ret = bio.bi_iter.bi_size; + + if (iov_iter_rw(iter) == READ) { + bio_set_op_attrs(&bio, REQ_OP_READ, 0); + if (iter_is_iovec(iter)) + should_dirty = true; + } else { + bio_set_op_attrs(&bio, REQ_OP_WRITE, REQ_SYNC | REQ_IDLE); + task_io_account_write(ret); + } + + qc = submit_bio(&bio); + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (!READ_ONCE(bio.bi_private)) + break; + if (!(iocb->ki_flags & IOCB_HIPRI) || + !blk_mq_poll(bdev_get_queue(bdev), qc)) + io_schedule(); + } + __set_current_state(TASK_RUNNING); + + bio_for_each_segment_all(bvec, &bio, i) { + if (should_dirty && !PageCompound(bvec->bv_page)) + set_page_dirty_lock(bvec->bv_page); + put_page(bvec->bv_page); + } + + if (unlikely(bio.bi_error)) + return bio.bi_error; + iocb->ki_pos += ret; + return ret; +} + static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct inode *inode = bdev_file_inode(file); + int nr_pages; + nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES); + if (!nr_pages) + return 0; + if (is_sync_kiocb(iocb) && nr_pages <= DIO_INLINE_BIO_VECS) + return __blkdev_direct_IO_simple(iocb, iter, nr_pages); return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, blkdev_get_block, NULL, NULL, DIO_SKIP_DIO_COUNT); -- cgit v1.2.3 From 72ecad22d9f198aafee64218512e02ffa7818671 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 16 Nov 2016 23:11:42 -0700 Subject: block: support a full bio worth of IO for simplified bdev direct-io Just alloc the bio_vec array if we exceed the inline limit. Signed-off-by: Jens Axboe --- fs/block_dev.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index 7c3ec6049073..dad97e19bb1f 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -193,7 +193,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, struct file *file = iocb->ki_filp; struct block_device *bdev = I_BDEV(bdev_file_inode(file)); unsigned blkbits = blksize_bits(bdev_logical_block_size(bdev)); - struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *bvec; + struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs, *bvec; loff_t pos = iocb->ki_pos; bool should_dirty = false; struct bio bio; @@ -204,9 +204,17 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, if ((pos | iov_iter_alignment(iter)) & ((1 << blkbits) - 1)) return -EINVAL; + if (nr_pages <= DIO_INLINE_BIO_VECS) + vecs = inline_vecs; + else { + vecs = kmalloc(nr_pages * sizeof(struct bio_vec), GFP_KERNEL); + if (!vecs) + return -ENOMEM; + } + bio_init(&bio); bio.bi_max_vecs = nr_pages; - bio.bi_io_vec = inline_vecs; + bio.bi_io_vec = vecs; bio.bi_bdev = bdev; bio.bi_iter.bi_sector = pos >> blkbits; bio.bi_private = current; @@ -243,6 +251,9 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, put_page(bvec->bv_page); } + if (vecs != inline_vecs) + kfree(vecs); + if (unlikely(bio.bi_error)) return bio.bi_error; iocb->ki_pos += ret; @@ -256,10 +267,10 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) struct inode *inode = bdev_file_inode(file); int nr_pages; - nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES); + nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES + 1); if (!nr_pages) return 0; - if (is_sync_kiocb(iocb) && nr_pages <= DIO_INLINE_BIO_VECS) + if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_PAGES) return __blkdev_direct_IO_simple(iocb, iter, nr_pages); return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, blkdev_get_block, NULL, NULL, -- cgit v1.2.3 From 78250c02d9739b5b20f1884f8c1b30efc2f861e4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 17 Nov 2016 17:50:47 +0100 Subject: block: make __blkdev_direct_IO_sync() support O_SYNC/DSYNC Split the op setting code into a helper, use it in both places. Signed-off-by: Jens Axboe --- fs/block_dev.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index dad97e19bb1f..a1b9abe6231c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -176,6 +176,16 @@ static struct inode *bdev_file_inode(struct file *file) return file->f_mapping->host; } +static unsigned int dio_bio_write_op(struct kiocb *iocb) +{ + unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE; + + /* avoid the need for a I/O completion work item */ + if (iocb->ki_flags & IOCB_DSYNC) + op |= REQ_FUA; + return op; +} + #define DIO_INLINE_BIO_VECS 4 static void blkdev_bio_end_io_simple(struct bio *bio) @@ -226,11 +236,11 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, ret = bio.bi_iter.bi_size; if (iov_iter_rw(iter) == READ) { - bio_set_op_attrs(&bio, REQ_OP_READ, 0); + bio.bi_opf = REQ_OP_READ; if (iter_is_iovec(iter)) should_dirty = true; } else { - bio_set_op_attrs(&bio, REQ_OP_WRITE, REQ_SYNC | REQ_IDLE); + bio.bi_opf = dio_bio_write_op(iocb); task_io_account_write(ret); } -- cgit v1.2.3 From 542ff7bf18c63cf403e36a4a1c71d86dc120d924 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Nov 2016 23:14:22 -0700 Subject: block: new direct I/O implementation Similar to the simple fast path, but we now need a dio structure to track multiple-bio completions. It's basically a cut-down version of the new iomap-based direct I/O code for filesystems, but without all the logic to call into the filesystem for extent lookup or allocation, and without the complex I/O completion workqueue handler for AIO - instead we just use the FUA bit on the bios to ensure data is flushed to stable storage. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/block_dev.c | 166 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 162 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index a1b9abe6231c..35cc494f2e1a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -270,11 +270,161 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, return ret; } +struct blkdev_dio { + union { + struct kiocb *iocb; + struct task_struct *waiter; + }; + size_t size; + atomic_t ref; + bool multi_bio : 1; + bool should_dirty : 1; + bool is_sync : 1; + struct bio bio; +}; + +static struct bio_set *blkdev_dio_pool __read_mostly; + +static void blkdev_bio_end_io(struct bio *bio) +{ + struct blkdev_dio *dio = bio->bi_private; + bool should_dirty = dio->should_dirty; + + if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) { + if (bio->bi_error && !dio->bio.bi_error) + dio->bio.bi_error = bio->bi_error; + } else { + if (!dio->is_sync) { + struct kiocb *iocb = dio->iocb; + ssize_t ret = dio->bio.bi_error; + + if (likely(!ret)) { + ret = dio->size; + iocb->ki_pos += ret; + } + + dio->iocb->ki_complete(iocb, ret, 0); + bio_put(&dio->bio); + } else { + struct task_struct *waiter = dio->waiter; + + WRITE_ONCE(dio->waiter, NULL); + wake_up_process(waiter); + } + } + + if (should_dirty) { + bio_check_pages_dirty(bio); + } else { + struct bio_vec *bvec; + int i; + + bio_for_each_segment_all(bvec, bio, i) + put_page(bvec->bv_page); + bio_put(bio); + } +} + static ssize_t -blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) +__blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) { struct file *file = iocb->ki_filp; struct inode *inode = bdev_file_inode(file); + struct block_device *bdev = I_BDEV(inode); + unsigned blkbits = blksize_bits(bdev_logical_block_size(bdev)); + struct blkdev_dio *dio; + struct bio *bio; + bool is_read = (iov_iter_rw(iter) == READ); + loff_t pos = iocb->ki_pos; + blk_qc_t qc = BLK_QC_T_NONE; + int ret; + + if ((pos | iov_iter_alignment(iter)) & ((1 << blkbits) - 1)) + return -EINVAL; + + bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, blkdev_dio_pool); + bio_get(bio); /* extra ref for the completion handler */ + + dio = container_of(bio, struct blkdev_dio, bio); + dio->is_sync = is_sync_kiocb(iocb); + if (dio->is_sync) + dio->waiter = current; + else + dio->iocb = iocb; + + dio->size = 0; + dio->multi_bio = false; + dio->should_dirty = is_read && (iter->type == ITER_IOVEC); + + for (;;) { + bio->bi_bdev = bdev; + bio->bi_iter.bi_sector = pos >> blkbits; + bio->bi_private = dio; + bio->bi_end_io = blkdev_bio_end_io; + + ret = bio_iov_iter_get_pages(bio, iter); + if (unlikely(ret)) { + bio->bi_error = ret; + bio_endio(bio); + break; + } + + if (is_read) { + bio->bi_opf = REQ_OP_READ; + if (dio->should_dirty) + bio_set_pages_dirty(bio); + } else { + bio->bi_opf = dio_bio_write_op(iocb); + task_io_account_write(bio->bi_iter.bi_size); + } + + dio->size += bio->bi_iter.bi_size; + pos += bio->bi_iter.bi_size; + + nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES); + if (!nr_pages) { + qc = submit_bio(bio); + break; + } + + if (!dio->multi_bio) { + dio->multi_bio = true; + atomic_set(&dio->ref, 2); + } else { + atomic_inc(&dio->ref); + } + + submit_bio(bio); + bio = bio_alloc(GFP_KERNEL, nr_pages); + } + + if (!dio->is_sync) + return -EIOCBQUEUED; + + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (!READ_ONCE(dio->waiter)) + break; + + if (!(iocb->ki_flags & IOCB_HIPRI) || + !blk_mq_poll(bdev_get_queue(bdev), qc)) + io_schedule(); + } + __set_current_state(TASK_RUNNING); + + ret = dio->bio.bi_error; + if (likely(!ret)) { + ret = dio->size; + iocb->ki_pos += ret; + } + + bio_put(&dio->bio); + return ret; +} + +static ssize_t +blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) +{ int nr_pages; nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES + 1); @@ -282,10 +432,18 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) return 0; if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_PAGES) return __blkdev_direct_IO_simple(iocb, iter, nr_pages); - return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, - blkdev_get_block, NULL, NULL, - DIO_SKIP_DIO_COUNT); + + return __blkdev_direct_IO(iocb, iter, min(nr_pages, BIO_MAX_PAGES)); +} + +static __init int blkdev_init(void) +{ + blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio)); + if (!blkdev_dio_pool) + return -ENOMEM; + return 0; } +module_init(blkdev_init); int __sync_blockdev(struct block_device *bdev, int wait) { -- cgit v1.2.3 From 4d1a4765426fb57439164089b3cb537ec65356eb Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 22 Nov 2016 15:38:49 +0900 Subject: block_dev: Fixed direct I/O bio sector calculation A direct I/O alignment must be always checked against the device blocks size, but the I/O offset (bio->bi_iter.bi_sector must always use 512B sector unit, and not the actual logical block size. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/block_dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index 35cc494f2e1a..e49fb797e447 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -226,7 +226,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, bio.bi_max_vecs = nr_pages; bio.bi_io_vec = vecs; bio.bi_bdev = bdev; - bio.bi_iter.bi_sector = pos >> blkbits; + bio.bi_iter.bi_sector = pos >> 9; bio.bi_private = current; bio.bi_end_io = blkdev_bio_end_io_simple; @@ -358,7 +358,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) for (;;) { bio->bi_bdev = bdev; - bio->bi_iter.bi_sector = pos >> blkbits; + bio->bi_iter.bi_sector = pos >> 9; bio->bi_private = dio; bio->bi_end_io = blkdev_bio_end_io; -- cgit v1.2.3 From 9a794fb9bddeda0b8c8c13858038318f3cbd4b7e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 22 Nov 2016 08:12:39 -0700 Subject: block_dev: get rid of blksize bits calculation We store the bits in the bdev sector size locally, but we don't use the calculation anymore. All we do with it is shift it back up to the bdev sector size. So let's just use that directly and kill the variable and bits calculation. Signed-off-by: Jens Axboe --- fs/block_dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index e49fb797e447..b0c790a19db9 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -202,7 +202,6 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, { struct file *file = iocb->ki_filp; struct block_device *bdev = I_BDEV(bdev_file_inode(file)); - unsigned blkbits = blksize_bits(bdev_logical_block_size(bdev)); struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs, *bvec; loff_t pos = iocb->ki_pos; bool should_dirty = false; @@ -211,7 +210,8 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, blk_qc_t qc; int i; - if ((pos | iov_iter_alignment(iter)) & ((1 << blkbits) - 1)) + if ((pos | iov_iter_alignment(iter)) & + (bdev_logical_block_size(bdev) - 1)) return -EINVAL; if (nr_pages <= DIO_INLINE_BIO_VECS) @@ -331,7 +331,6 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) struct file *file = iocb->ki_filp; struct inode *inode = bdev_file_inode(file); struct block_device *bdev = I_BDEV(inode); - unsigned blkbits = blksize_bits(bdev_logical_block_size(bdev)); struct blkdev_dio *dio; struct bio *bio; bool is_read = (iov_iter_rw(iter) == READ); @@ -339,7 +338,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) blk_qc_t qc = BLK_QC_T_NONE; int ret; - if ((pos | iov_iter_alignment(iter)) & ((1 << blkbits) - 1)) + if ((pos | iov_iter_alignment(iter)) & + (bdev_logical_block_size(bdev) - 1)) return -EINVAL; bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, blkdev_dio_pool); -- cgit v1.2.3 From 3a83f4677539bce8eaa2bca9ee9c20e172d7ab04 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 22 Nov 2016 08:57:21 -0700 Subject: block: bio: pass bvec table to bio_init() Some drivers often use external bvec table, so introduce this helper for this case. It is always safe to access the bio->bi_io_vec in this way for this case. After converting to this usage, it will becomes a bit easier to evaluate the remaining direct access to bio->bi_io_vec, so it can help to prepare for the following multipage bvec support. Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Fixed up the new O_DIRECT cases. Signed-off-by: Jens Axboe --- block/bio.c | 8 ++++++-- drivers/block/floppy.c | 3 +-- drivers/md/bcache/io.c | 4 +--- drivers/md/bcache/journal.c | 4 +--- drivers/md/bcache/movinggc.c | 6 ++---- drivers/md/bcache/request.c | 2 +- drivers/md/bcache/super.c | 12 +++--------- drivers/md/bcache/writeback.c | 5 ++--- drivers/md/dm-bufio.c | 4 +--- drivers/md/dm.c | 2 +- drivers/md/multipath.c | 2 +- drivers/md/raid5-cache.c | 2 +- drivers/md/raid5.c | 9 ++------- drivers/nvme/target/io-cmd.c | 4 +--- fs/block_dev.c | 4 +--- fs/logfs/dev_bdev.c | 4 +--- include/linux/bio.h | 3 ++- 17 files changed, 28 insertions(+), 50 deletions(-) (limited to 'fs') diff --git a/block/bio.c b/block/bio.c index 2cf6ebabc68c..de257ced69b1 100644 --- a/block/bio.c +++ b/block/bio.c @@ -270,11 +270,15 @@ static void bio_free(struct bio *bio) } } -void bio_init(struct bio *bio) +void bio_init(struct bio *bio, struct bio_vec *table, + unsigned short max_vecs) { memset(bio, 0, sizeof(*bio)); atomic_set(&bio->__bi_remaining, 1); atomic_set(&bio->__bi_cnt, 1); + + bio->bi_io_vec = table; + bio->bi_max_vecs = max_vecs; } EXPORT_SYMBOL(bio_init); @@ -480,7 +484,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) return NULL; bio = p + front_pad; - bio_init(bio); + bio_init(bio, NULL, 0); if (nr_iovecs > inline_vecs) { unsigned long idx = 0; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index e3d8e4ced4a2..6a3ff2b2e3ae 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3806,8 +3806,7 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive) cbdata.drive = drive; - bio_init(&bio); - bio.bi_io_vec = &bio_vec; + bio_init(&bio, &bio_vec, 1); bio_vec.bv_page = page; bio_vec.bv_len = size; bio_vec.bv_offset = 0; diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index e97b0acf7b8d..db45a88c0ce9 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -24,9 +24,7 @@ struct bio *bch_bbio_alloc(struct cache_set *c) struct bbio *b = mempool_alloc(c->bio_meta, GFP_NOIO); struct bio *bio = &b->bio; - bio_init(bio); - bio->bi_max_vecs = bucket_pages(c); - bio->bi_io_vec = bio->bi_inline_vecs; + bio_init(bio, bio->bi_inline_vecs, bucket_pages(c)); return bio; } diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 6925023e12d4..1198e53d5670 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -448,13 +448,11 @@ static void do_journal_discard(struct cache *ca) atomic_set(&ja->discard_in_flight, DISCARD_IN_FLIGHT); - bio_init(bio); + bio_init(bio, bio->bi_inline_vecs, 1); bio_set_op_attrs(bio, REQ_OP_DISCARD, 0); bio->bi_iter.bi_sector = bucket_to_sector(ca->set, ca->sb.d[ja->discard_idx]); bio->bi_bdev = ca->bdev; - bio->bi_max_vecs = 1; - bio->bi_io_vec = bio->bi_inline_vecs; bio->bi_iter.bi_size = bucket_bytes(ca); bio->bi_end_io = journal_discard_endio; diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index 5c4bddecfaf0..13b8a907006d 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -77,15 +77,13 @@ static void moving_init(struct moving_io *io) { struct bio *bio = &io->bio.bio; - bio_init(bio); + bio_init(bio, bio->bi_inline_vecs, + DIV_ROUND_UP(KEY_SIZE(&io->w->key), PAGE_SECTORS)); bio_get(bio); bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); bio->bi_iter.bi_size = KEY_SIZE(&io->w->key) << 9; - bio->bi_max_vecs = DIV_ROUND_UP(KEY_SIZE(&io->w->key), - PAGE_SECTORS); bio->bi_private = &io->cl; - bio->bi_io_vec = bio->bi_inline_vecs; bch_bio_map(bio, NULL); } diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 0d99b5f4b3e6..f49c5417527d 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -623,7 +623,7 @@ static void do_bio_hook(struct search *s, struct bio *orig_bio) { struct bio *bio = &s->bio.bio; - bio_init(bio); + bio_init(bio, NULL, 0); __bio_clone_fast(bio, orig_bio); bio->bi_end_io = request_endio; bio->bi_private = &s->cl; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 988edf928466..2fb5bfeb43e2 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1152,9 +1152,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page, dc->bdev = bdev; dc->bdev->bd_holder = dc; - bio_init(&dc->sb_bio); - dc->sb_bio.bi_max_vecs = 1; - dc->sb_bio.bi_io_vec = dc->sb_bio.bi_inline_vecs; + bio_init(&dc->sb_bio, dc->sb_bio.bi_inline_vecs, 1); dc->sb_bio.bi_io_vec[0].bv_page = sb_page; get_page(sb_page); @@ -1814,9 +1812,7 @@ static int cache_alloc(struct cache *ca) __module_get(THIS_MODULE); kobject_init(&ca->kobj, &bch_cache_ktype); - bio_init(&ca->journal.bio); - ca->journal.bio.bi_max_vecs = 8; - ca->journal.bio.bi_io_vec = ca->journal.bio.bi_inline_vecs; + bio_init(&ca->journal.bio, ca->journal.bio.bi_inline_vecs, 8); free = roundup_pow_of_two(ca->sb.nbuckets) >> 10; @@ -1852,9 +1848,7 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page, ca->bdev = bdev; ca->bdev->bd_holder = ca; - bio_init(&ca->sb_bio); - ca->sb_bio.bi_max_vecs = 1; - ca->sb_bio.bi_io_vec = ca->sb_bio.bi_inline_vecs; + bio_init(&ca->sb_bio, ca->sb_bio.bi_inline_vecs, 1); ca->sb_bio.bi_io_vec[0].bv_page = sb_page; get_page(sb_page); diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index e51644e503a5..69e1ae59cab8 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -106,14 +106,13 @@ static void dirty_init(struct keybuf_key *w) struct dirty_io *io = w->private; struct bio *bio = &io->bio; - bio_init(bio); + bio_init(bio, bio->bi_inline_vecs, + DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS)); if (!io->dc->writeback_percent) bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)); bio->bi_iter.bi_size = KEY_SIZE(&w->key) << 9; - bio->bi_max_vecs = DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS); bio->bi_private = w; - bio->bi_io_vec = bio->bi_inline_vecs; bch_bio_map(bio, NULL); } diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index b3ba142e59a4..262e75365cc0 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -611,9 +611,7 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block, char *ptr; int len; - bio_init(&b->bio); - b->bio.bi_io_vec = b->bio_vec; - b->bio.bi_max_vecs = DM_BUFIO_INLINE_VECS; + bio_init(&b->bio, b->bio_vec, DM_BUFIO_INLINE_VECS); b->bio.bi_iter.bi_sector = block << b->c->sectors_per_block_bits; b->bio.bi_bdev = b->c->bdev; b->bio.bi_end_io = inline_endio; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index b2abfa41af3e..7915467d3726 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1525,7 +1525,7 @@ static struct mapped_device *alloc_dev(int minor) if (!md->bdev) goto bad; - bio_init(&md->flush_bio); + bio_init(&md->flush_bio, NULL, 0); md->flush_bio.bi_bdev = md->bdev; md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 673efbd6fc47..4da06d813b8f 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -130,7 +130,7 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio) } multipath = conf->multipaths + mp_bh->path; - bio_init(&mp_bh->bio); + bio_init(&mp_bh->bio, NULL, 0); __bio_clone_fast(&mp_bh->bio, bio); mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset; diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 28d015c6fffe..25e9622d7d80 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -1201,7 +1201,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) INIT_LIST_HEAD(&log->io_end_ios); INIT_LIST_HEAD(&log->flushing_ios); INIT_LIST_HEAD(&log->finished_ios); - bio_init(&log->flush_bio); + bio_init(&log->flush_bio, NULL, 0); log->io_kc = KMEM_CACHE(r5l_io_unit, 0); if (!log->io_kc) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 70acdd379e44..5f9e28443c8a 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2004,13 +2004,8 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, for (i = 0; i < disks; i++) { struct r5dev *dev = &sh->dev[i]; - bio_init(&dev->req); - dev->req.bi_io_vec = &dev->vec; - dev->req.bi_max_vecs = 1; - - bio_init(&dev->rreq); - dev->rreq.bi_io_vec = &dev->rvec; - dev->rreq.bi_max_vecs = 1; + bio_init(&dev->req, &dev->vec, 1); + bio_init(&dev->rreq, &dev->rvec, 1); } } return sh; diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c index ef52b1e70144..c4dc9ea8ade0 100644 --- a/drivers/nvme/target/io-cmd.c +++ b/drivers/nvme/target/io-cmd.c @@ -37,9 +37,7 @@ static void nvmet_inline_bio_init(struct nvmet_req *req) { struct bio *bio = &req->inline_bio; - bio_init(bio); - bio->bi_max_vecs = NVMET_MAX_INLINE_BIOVEC; - bio->bi_io_vec = req->inline_bvec; + bio_init(bio, req->inline_bvec, NVMET_MAX_INLINE_BIOVEC); } static void nvmet_execute_rw(struct nvmet_req *req) diff --git a/fs/block_dev.c b/fs/block_dev.c index b0c790a19db9..7022ddc55b12 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -222,9 +222,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, return -ENOMEM; } - bio_init(&bio); - bio.bi_max_vecs = nr_pages; - bio.bi_io_vec = vecs; + bio_init(&bio, vecs, nr_pages); bio.bi_bdev = bdev; bio.bi_iter.bi_sector = pos >> 9; bio.bi_private = current; diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index a8329cc47dec..dc8cafeee038 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c @@ -19,9 +19,7 @@ static int sync_request(struct page *page, struct block_device *bdev, int op) struct bio bio; struct bio_vec bio_vec; - bio_init(&bio); - bio.bi_max_vecs = 1; - bio.bi_io_vec = &bio_vec; + bio_init(&bio, &bio_vec, 1); bio_vec.bv_page = page; bio_vec.bv_len = PAGE_SIZE; bio_vec.bv_offset = 0; diff --git a/include/linux/bio.h b/include/linux/bio.h index d367cd37a7f7..70a7244f08a7 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -420,7 +420,8 @@ extern int bio_phys_segments(struct request_queue *, struct bio *); extern int submit_bio_wait(struct bio *bio); extern void bio_advance(struct bio *, unsigned); -extern void bio_init(struct bio *); +extern void bio_init(struct bio *bio, struct bio_vec *table, + unsigned short max_vecs); extern void bio_reset(struct bio *); void bio_chain(struct bio *, struct bio *); -- cgit v1.2.3 From 739a9975468cc64b346da4d40c0a2e61b1f69af0 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 11 Nov 2016 20:05:37 +0800 Subject: fs: logfs: convert to bio_add_page() in sync_request() Always bio_add_page() is the standard and preferred way to do the task. Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- fs/logfs/dev_bdev.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index dc8cafeee038..e01075790600 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c @@ -20,13 +20,9 @@ static int sync_request(struct page *page, struct block_device *bdev, int op) struct bio_vec bio_vec; bio_init(&bio, &bio_vec, 1); - bio_vec.bv_page = page; - bio_vec.bv_len = PAGE_SIZE; - bio_vec.bv_offset = 0; - bio.bi_vcnt = 1; bio.bi_bdev = bdev; + bio_add_page(&bio, page, PAGE_SIZE, 0); bio.bi_iter.bi_sector = page->index * (PAGE_SIZE >> 9); - bio.bi_iter.bi_size = PAGE_SIZE; bio_set_op_attrs(&bio, op, 0); return submit_bio_wait(&bio); -- cgit v1.2.3 From d4f98a89f9cdc42926fadb49ed6652c5da20fb54 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 11 Nov 2016 20:05:38 +0800 Subject: fs: logfs: use bio_add_page() in __bdev_writeseg() Also this patch simplify the code a bit. Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- fs/logfs/dev_bdev.c | 51 ++++++++++++++++++++------------------------------- 1 file changed, 20 insertions(+), 31 deletions(-) (limited to 'fs') diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index e01075790600..a2e8511b89d6 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c @@ -71,56 +71,45 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, { struct logfs_super *super = logfs_super(sb); struct address_space *mapping = super->s_mapping_inode->i_mapping; - struct bio *bio; + struct bio *bio = NULL; struct page *page; unsigned int max_pages; - int i; + int i, ret; max_pages = min_t(size_t, nr_pages, BIO_MAX_PAGES); - bio = bio_alloc(GFP_NOFS, max_pages); - BUG_ON(!bio); - for (i = 0; i < nr_pages; i++) { - if (i >= max_pages) { - /* Block layer cannot split bios :( */ - bio->bi_vcnt = i; - bio->bi_iter.bi_size = i * PAGE_SIZE; + if (!bio) { + bio = bio_alloc(GFP_NOFS, max_pages); + BUG_ON(!bio); + bio->bi_bdev = super->s_bdev; bio->bi_iter.bi_sector = ofs >> 9; bio->bi_private = sb; bio->bi_end_io = writeseg_end_io; bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - atomic_inc(&super->s_pending_writes); - submit_bio(bio); - - ofs += i * PAGE_SIZE; - index += i; - nr_pages -= i; - i = 0; - - bio = bio_alloc(GFP_NOFS, max_pages); - BUG_ON(!bio); } page = find_lock_page(mapping, index + i); BUG_ON(!page); - bio->bi_io_vec[i].bv_page = page; - bio->bi_io_vec[i].bv_len = PAGE_SIZE; - bio->bi_io_vec[i].bv_offset = 0; + ret = bio_add_page(bio, page, PAGE_SIZE, 0); BUG_ON(PageWriteback(page)); set_page_writeback(page); unlock_page(page); + + if (!ret) { + /* Block layer cannot split bios :( */ + ofs += bio->bi_iter.bi_size; + atomic_inc(&super->s_pending_writes); + submit_bio(bio); + bio = NULL; + } + } + + if (bio) { + atomic_inc(&super->s_pending_writes); + submit_bio(bio); } - bio->bi_vcnt = nr_pages; - bio->bi_iter.bi_size = nr_pages * PAGE_SIZE; - bio->bi_bdev = super->s_bdev; - bio->bi_iter.bi_sector = ofs >> 9; - bio->bi_private = sb; - bio->bi_end_io = writeseg_end_io; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - atomic_inc(&super->s_pending_writes); - submit_bio(bio); return 0; } -- cgit v1.2.3 From c12484367865187107af131717a6e8ab44588fb6 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 11 Nov 2016 20:05:39 +0800 Subject: fs: logfs: use bio_add_page() in do_erase() Also code gets simplified a bit. Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- fs/logfs/dev_bdev.c | 44 +++++++++++++++----------------------------- 1 file changed, 15 insertions(+), 29 deletions(-) (limited to 'fs') diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index a2e8511b89d6..b9188be0bf83 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c @@ -153,49 +153,35 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index, size_t nr_pages) { struct logfs_super *super = logfs_super(sb); - struct bio *bio; + struct bio *bio = NULL; unsigned int max_pages; - int i; + int i, ret; max_pages = min_t(size_t, nr_pages, BIO_MAX_PAGES); - bio = bio_alloc(GFP_NOFS, max_pages); - BUG_ON(!bio); - for (i = 0; i < nr_pages; i++) { - if (i >= max_pages) { - /* Block layer cannot split bios :( */ - bio->bi_vcnt = i; - bio->bi_iter.bi_size = i * PAGE_SIZE; + if (!bio) { + bio = bio_alloc(GFP_NOFS, max_pages); + BUG_ON(!bio); + bio->bi_bdev = super->s_bdev; bio->bi_iter.bi_sector = ofs >> 9; bio->bi_private = sb; bio->bi_end_io = erase_end_io; bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + } + ret = bio_add_page(bio, super->s_erase_page, PAGE_SIZE, 0); + if (!ret) { + /* Block layer cannot split bios :( */ + ofs += bio->bi_iter.bi_size; atomic_inc(&super->s_pending_writes); submit_bio(bio); - - ofs += i * PAGE_SIZE; - index += i; - nr_pages -= i; - i = 0; - - bio = bio_alloc(GFP_NOFS, max_pages); - BUG_ON(!bio); } - bio->bi_io_vec[i].bv_page = super->s_erase_page; - bio->bi_io_vec[i].bv_len = PAGE_SIZE; - bio->bi_io_vec[i].bv_offset = 0; } - bio->bi_vcnt = nr_pages; - bio->bi_iter.bi_size = nr_pages * PAGE_SIZE; - bio->bi_bdev = super->s_bdev; - bio->bi_iter.bi_sector = ofs >> 9; - bio->bi_private = sb; - bio->bi_end_io = erase_end_io; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - atomic_inc(&super->s_pending_writes); - submit_bio(bio); + if (bio) { + atomic_inc(&super->s_pending_writes); + submit_bio(bio); + } return 0; } -- cgit v1.2.3 From 05aea81b4bc9e51dabd4559666c1d8d004f3662a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 11 Nov 2016 20:05:40 +0800 Subject: fs: logfs: remove unnecesary check The check on bio->bi_vcnt doesn't make sense in erase_end_io(). Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- fs/logfs/dev_bdev.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index b9188be0bf83..9bfa0151d7c9 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c @@ -143,7 +143,6 @@ static void erase_end_io(struct bio *bio) struct logfs_super *super = logfs_super(sb); BUG_ON(bio->bi_error); /* FIXME: Retry io or write elsewhere */ - BUG_ON(bio->bi_vcnt == 0); bio_put(bio); if (atomic_dec_and_test(&super->s_pending_writes)) wake_up(&wq); -- cgit v1.2.3 From af309226db916e2c6e08d3eba3fa5c34225200c4 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Thu, 1 Dec 2016 09:18:28 +0100 Subject: block: protect iterate_bdevs() against concurrent close If a block device is closed while iterate_bdevs() is handling it, the following NULL pointer dereference occurs because bdev->b_disk is NULL in bdev_get_queue(), which is called from blk_get_backing_dev_info() (in turn called by the mapping_cap_writeback_dirty() call in __filemap_fdatawrite_range()): BUG: unable to handle kernel NULL pointer dereference at 0000000000000508 IP: [] blk_get_backing_dev_info+0x10/0x20 PGD 9e62067 PUD 9ee8067 PMD 0 Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: CPU: 1 PID: 2422 Comm: sync Not tainted 4.5.0-rc7+ #400 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) task: ffff880009f4d700 ti: ffff880009f5c000 task.ti: ffff880009f5c000 RIP: 0010:[] [] blk_get_backing_dev_info+0x10/0x20 RSP: 0018:ffff880009f5fe68 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff88000ec17a38 RCX: ffffffff81a4e940 RDX: 7fffffffffffffff RSI: 0000000000000000 RDI: ffff88000ec176c0 RBP: ffff880009f5fe68 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: ffff88000ec17860 R13: ffffffff811b25c0 R14: ffff88000ec178e0 R15: ffff88000ec17a38 FS: 00007faee505d700(0000) GS:ffff88000fb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000508 CR3: 0000000009e8a000 CR4: 00000000000006e0 Stack: ffff880009f5feb8 ffffffff8112e7f5 0000000000000000 7fffffffffffffff 0000000000000000 0000000000000000 7fffffffffffffff 0000000000000001 ffff88000ec178e0 ffff88000ec17860 ffff880009f5fec8 ffffffff8112e81f Call Trace: [] __filemap_fdatawrite_range+0x85/0x90 [] filemap_fdatawrite+0x1f/0x30 [] fdatawrite_one_bdev+0x16/0x20 [] iterate_bdevs+0xf2/0x130 [] sys_sync+0x63/0x90 [] entry_SYSCALL_64_fastpath+0x12/0x76 Code: 0f 1f 44 00 00 48 8b 87 f0 00 00 00 55 48 89 e5 <48> 8b 80 08 05 00 00 5d RIP [] blk_get_backing_dev_info+0x10/0x20 RSP CR2: 0000000000000508 ---[ end trace 2487336ceb3de62d ]--- The crash is easily reproducible by running the following command, if an msleep(100) is inserted before the call to func() in iterate_devs(): while :; do head -c1 /dev/nullb0; done > /dev/null & while :; do sync; done Fix it by holding the bd_mutex across the func() call and only calling func() if the bdev is opened. Cc: stable@vger.kernel.org Fixes: 5c0d6b60a0ba ("vfs: Create function for iterating over block devices") Reported-and-tested-by: Wei Fang Signed-off-by: Rabin Vincent Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/block_dev.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index 7022ddc55b12..95acbd2ebc5d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -2207,6 +2207,7 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg) spin_lock(&blockdev_superblock->s_inode_list_lock); list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) { struct address_space *mapping = inode->i_mapping; + struct block_device *bdev; spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) || @@ -2227,8 +2228,12 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg) */ iput(old_inode); old_inode = inode; + bdev = I_BDEV(inode); - func(I_BDEV(inode), arg); + mutex_lock(&bdev->bd_mutex); + if (bdev->bd_openers) + func(bdev, arg); + mutex_unlock(&bdev->bd_mutex); spin_lock(&blockdev_superblock->s_inode_list_lock); } -- cgit v1.2.3