diff options
Diffstat (limited to 'block/blk-core.c')
-rw-r--r-- | block/blk-core.c | 217 |
1 files changed, 168 insertions, 49 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 538cbc725620..03252af8c82c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -20,6 +20,7 @@ #include <linux/blk-mq.h> #include <linux/highmem.h> #include <linux/mm.h> +#include <linux/pagemap.h> #include <linux/kernel_stat.h> #include <linux/string.h> #include <linux/init.h> @@ -38,6 +39,8 @@ #include <linux/debugfs.h> #include <linux/bpf.h> #include <linux/psi.h> +#include <linux/sched/sysctl.h> +#include <linux/blk-crypto.h> #define CREATE_TRACE_POINTS #include <trace/events/block.h> @@ -120,6 +123,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq) rq->start_time_ns = ktime_get_ns(); rq->part = NULL; refcount_set(&rq->ref, 1); + blk_crypto_rq_set_defaults(rq); } EXPORT_SYMBOL(blk_rq_init); @@ -135,6 +139,7 @@ static const char *const blk_op_name[] = { REQ_OP_NAME(ZONE_OPEN), REQ_OP_NAME(ZONE_CLOSE), REQ_OP_NAME(ZONE_FINISH), + REQ_OP_NAME(ZONE_APPEND), REQ_OP_NAME(WRITE_SAME), REQ_OP_NAME(WRITE_ZEROES), REQ_OP_NAME(SCSI_IN), @@ -240,6 +245,17 @@ static void req_bio_endio(struct request *rq, struct bio *bio, bio_advance(bio, nbytes); + if (req_op(rq) == REQ_OP_ZONE_APPEND && error == BLK_STS_OK) { + /* + * Partial zone append completions cannot be supported as the + * BIO fragments may end up not being written sequentially. + */ + if (bio->bi_iter.bi_size) + bio->bi_status = BLK_STS_IOERR; + else + bio->bi_iter.bi_sector = rq->__sector; + } + /* don't actually finish bio if it's part of flush sequence */ if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ)) bio_endio(bio); @@ -621,6 +637,16 @@ void blk_put_request(struct request *req) } EXPORT_SYMBOL(blk_put_request); +static void blk_account_io_merge_bio(struct request *req) +{ + if (!blk_do_io_stat(req)) + return; + + part_stat_lock(); + part_stat_inc(req->part, merges[op_stat_group(req_op(req))]); + part_stat_unlock(); +} + bool bio_attempt_back_merge(struct request *req, struct bio *bio, unsigned int nr_segs) { @@ -639,7 +665,9 @@ bool bio_attempt_back_merge(struct request *req, struct bio *bio, req->biotail = bio; req->__data_len += bio->bi_iter.bi_size; - blk_account_io_start(req, false); + bio_crypt_free_ctx(bio); + + blk_account_io_merge_bio(req); return true; } @@ -663,7 +691,9 @@ bool bio_attempt_front_merge(struct request *req, struct bio *bio, req->__sector = bio->bi_iter.bi_sector; req->__data_len += bio->bi_iter.bi_size; - blk_account_io_start(req, false); + bio_crypt_do_front_merge(req, bio); + + blk_account_io_merge_bio(req); return true; } @@ -685,7 +715,7 @@ bool bio_attempt_discard_merge(struct request_queue *q, struct request *req, req->__data_len += bio->bi_iter.bi_size; req->nr_phys_segments = segments + 1; - blk_account_io_start(req, false); + blk_account_io_merge_bio(req); return true; no_merge: req_set_nomerge(q, req); @@ -887,6 +917,41 @@ out: return ret; } +/* + * Check write append to a zoned block device. + */ +static inline blk_status_t blk_check_zone_append(struct request_queue *q, + struct bio *bio) +{ + sector_t pos = bio->bi_iter.bi_sector; + int nr_sectors = bio_sectors(bio); + + /* Only applicable to zoned block devices */ + if (!blk_queue_is_zoned(q)) + return BLK_STS_NOTSUPP; + + /* The bio sector must point to the start of a sequential zone */ + if (pos & (blk_queue_zone_sectors(q) - 1) || + !blk_queue_zone_is_seq(q, pos)) + return BLK_STS_IOERR; + + /* + * Not allowed to cross zone boundaries. Otherwise, the BIO will be + * split and could result in non-contiguous sectors being written in + * different zones. + */ + if (nr_sectors > q->limits.chunk_sectors) + return BLK_STS_IOERR; + + /* Make sure the BIO is small enough and will not get split */ + if (nr_sectors > q->limits.max_zone_append_sectors) + return BLK_STS_IOERR; + + bio->bi_opf |= REQ_NOMERGE; + + return BLK_STS_OK; +} + static noinline_for_stack bool generic_make_request_checks(struct bio *bio) { @@ -907,14 +972,11 @@ generic_make_request_checks(struct bio *bio) } /* - * Non-mq queues do not honor REQ_NOWAIT, so complete a bio - * with BLK_STS_AGAIN status in order to catch -EAGAIN and - * to give a chance to the caller to repeat request gracefully. + * For a REQ_NOWAIT based request, return -EOPNOTSUPP + * if queue is not a request based queue. */ - if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_mq(q)) { - status = BLK_STS_AGAIN; - goto end_io; - } + if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_mq(q)) + goto not_supported; if (should_fail_bio(bio)) goto end_io; @@ -959,6 +1021,11 @@ generic_make_request_checks(struct bio *bio) if (!q->limits.max_write_same_sectors) goto not_supported; break; + case REQ_OP_ZONE_APPEND: + status = blk_check_zone_append(q, bio); + if (status != BLK_STS_OK) + goto end_io; + break; case REQ_OP_ZONE_RESET: case REQ_OP_ZONE_OPEN: case REQ_OP_ZONE_CLOSE: @@ -1007,6 +1074,20 @@ end_io: return false; } +static blk_qc_t do_make_request(struct bio *bio) +{ + struct request_queue *q = bio->bi_disk->queue; + blk_qc_t ret = BLK_QC_T_NONE; + + if (blk_crypto_bio_prep(&bio)) { + if (!q->make_request_fn) + return blk_mq_make_request(q, bio); + ret = q->make_request_fn(q, bio); + } + blk_queue_exit(q); + return ret; +} + /** * generic_make_request - re-submit a bio to the block device layer for I/O * @bio: The bio describing the location in memory and on the device. @@ -1072,12 +1153,7 @@ blk_qc_t generic_make_request(struct bio *bio) /* Create a fresh bio_list for all subordinate requests */ bio_list_on_stack[1] = bio_list_on_stack[0]; bio_list_init(&bio_list_on_stack[0]); - if (q->make_request_fn) - ret = q->make_request_fn(q, bio); - else - ret = blk_mq_make_request(q, bio); - - blk_queue_exit(q); + ret = do_make_request(bio); /* sort new bios into those for a lower level * and those for the same level @@ -1114,7 +1190,6 @@ EXPORT_SYMBOL(generic_make_request); blk_qc_t direct_make_request(struct bio *bio) { struct request_queue *q = bio->bi_disk->queue; - blk_qc_t ret; if (WARN_ON_ONCE(q->make_request_fn)) { bio_io_error(bio); @@ -1124,9 +1199,11 @@ blk_qc_t direct_make_request(struct bio *bio) return BLK_QC_T_NONE; if (unlikely(bio_queue_enter(bio))) return BLK_QC_T_NONE; - ret = blk_mq_make_request(q, bio); - blk_queue_exit(q); - return ret; + if (!blk_crypto_bio_prep(&bio)) { + blk_queue_exit(q); + return BLK_QC_T_NONE; + } + return blk_mq_make_request(q, bio); } EXPORT_SYMBOL_GPL(direct_make_request); @@ -1256,8 +1333,11 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request * should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq))) return BLK_STS_IOERR; + if (blk_crypto_insert_cloned_request(rq)) + return BLK_STS_IOERR; + if (blk_queue_io_stat(q)) - blk_account_io_start(rq, true); + blk_account_io_start(rq); /* * Since we have a scheduler attached on the top device, @@ -1309,7 +1389,22 @@ unsigned int blk_rq_err_bytes(const struct request *rq) } EXPORT_SYMBOL_GPL(blk_rq_err_bytes); -void blk_account_io_completion(struct request *req, unsigned int bytes) +static void update_io_ticks(struct hd_struct *part, unsigned long now, bool end) +{ + unsigned long stamp; +again: + stamp = READ_ONCE(part->stamp); + if (unlikely(stamp != now)) { + if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) + __part_stat_add(part, io_ticks, end ? now - stamp : 1); + } + if (part->partno) { + part = &part_to_disk(part)->part0; + goto again; + } +} + +static void blk_account_io_completion(struct request *req, unsigned int bytes) { if (req->part && blk_do_io_stat(req)) { const int sgrp = op_stat_group(req_op(req)); @@ -1340,48 +1435,57 @@ void blk_account_io_done(struct request *req, u64 now) update_io_ticks(part, jiffies, true); part_stat_inc(part, ios[sgrp]); part_stat_add(part, nsecs[sgrp], now - req->start_time_ns); - part_dec_in_flight(req->q, part, rq_data_dir(req)); + part_stat_unlock(); hd_struct_put(part); - part_stat_unlock(); } } -void blk_account_io_start(struct request *rq, bool new_io) +void blk_account_io_start(struct request *rq) { - struct hd_struct *part; - int rw = rq_data_dir(rq); - if (!blk_do_io_stat(rq)) return; + rq->part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); + part_stat_lock(); + update_io_ticks(rq->part, jiffies, false); + part_stat_unlock(); +} - if (!new_io) { - part = rq->part; - part_stat_inc(part, merges[rw]); - } else { - part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); - if (!hd_struct_try_get(part)) { - /* - * The partition is already being removed, - * the request will be accounted on the disk only - * - * We take a reference on disk->part0 although that - * partition will never be deleted, so we can treat - * it as any other partition. - */ - part = &rq->rq_disk->part0; - hd_struct_get(part); - } - part_inc_in_flight(rq->q, part, rw); - rq->part = part; - } +unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, + unsigned int op) +{ + struct hd_struct *part = &disk->part0; + const int sgrp = op_stat_group(op); + unsigned long now = READ_ONCE(jiffies); + + part_stat_lock(); + update_io_ticks(part, now, false); + part_stat_inc(part, ios[sgrp]); + part_stat_add(part, sectors[sgrp], sectors); + part_stat_local_inc(part, in_flight[op_is_write(op)]); + part_stat_unlock(); - update_io_ticks(part, jiffies, false); + return now; +} +EXPORT_SYMBOL(disk_start_io_acct); +void disk_end_io_acct(struct gendisk *disk, unsigned int op, + unsigned long start_time) +{ + struct hd_struct *part = &disk->part0; + const int sgrp = op_stat_group(op); + unsigned long now = READ_ONCE(jiffies); + unsigned long duration = now - start_time; + + part_stat_lock(); + update_io_ticks(part, now, true); + part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration)); + part_stat_local_dec(part, in_flight[op_is_write(op)]); part_stat_unlock(); } +EXPORT_SYMBOL(disk_end_io_acct); /* * Steal bios from a request and add them to a bio list. @@ -1632,6 +1736,9 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, rq->nr_phys_segments = rq_src->nr_phys_segments; rq->ioprio = rq_src->ioprio; + if (rq->bio) + blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask); + return 0; free_and_out: @@ -1772,6 +1879,18 @@ void blk_finish_plug(struct blk_plug *plug) } EXPORT_SYMBOL(blk_finish_plug); +void blk_io_schedule(void) +{ + /* Prevent hang_check timer from firing at us during very long I/O */ + unsigned long timeout = sysctl_hung_task_timeout_secs * HZ / 2; + + if (timeout) + io_schedule_timeout(timeout); + else + io_schedule(); +} +EXPORT_SYMBOL_GPL(blk_io_schedule); + int __init blk_dev_init(void) { BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS)); |