diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/Kconfig | 5 | ||||
-rw-r--r-- | block/Kconfig.iosched | 44 | ||||
-rw-r--r-- | block/Makefile | 1 | ||||
-rw-r--r-- | block/bio.c | 73 | ||||
-rw-r--r-- | block/blk-cgroup.c | 1 | ||||
-rw-r--r-- | block/blk-core.c | 42 | ||||
-rw-r--r-- | block/blk-ioc.c | 45 | ||||
-rw-r--r-- | block/blk-map.c | 1 | ||||
-rw-r--r-- | block/blk-mq-sched.c | 45 | ||||
-rw-r--r-- | block/blk-mq-sched.h | 26 | ||||
-rw-r--r-- | block/blk-mq-sysfs.c | 40 | ||||
-rw-r--r-- | block/blk-mq-tag.c | 5 | ||||
-rw-r--r-- | block/blk-mq-tag.h | 6 | ||||
-rw-r--r-- | block/blk-mq-virtio.c | 54 | ||||
-rw-r--r-- | block/blk-mq.c | 223 | ||||
-rw-r--r-- | block/blk-mq.h | 12 | ||||
-rw-r--r-- | block/blk-softirq.c | 1 | ||||
-rw-r--r-- | block/blk-sysfs.c | 2 | ||||
-rw-r--r-- | block/blk-throttle.c | 2 | ||||
-rw-r--r-- | block/bsg.c | 6 | ||||
-rw-r--r-- | block/cfq-iosched.c | 1 | ||||
-rw-r--r-- | block/elevator.c | 21 | ||||
-rw-r--r-- | block/genhd.c | 36 | ||||
-rw-r--r-- | block/ioprio.c | 11 | ||||
-rw-r--r-- | block/sed-opal.c | 587 |
25 files changed, 709 insertions, 581 deletions
diff --git a/block/Kconfig b/block/Kconfig index a2a92e57a87d..e9f780f815f5 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -189,4 +189,9 @@ config BLK_MQ_PCI depends on BLOCK && PCI default y +config BLK_MQ_VIRTIO + bool + depends on BLOCK && VIRTIO + default y + source block/Kconfig.iosched diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 0715ce93daef..58fc8684788d 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -69,50 +69,6 @@ config MQ_IOSCHED_DEADLINE ---help--- MQ version of the deadline IO scheduler. -config MQ_IOSCHED_NONE - bool - default y - -choice - prompt "Default single-queue blk-mq I/O scheduler" - default DEFAULT_SQ_NONE - help - Select the I/O scheduler which will be used by default for blk-mq - managed block devices with a single queue. - - config DEFAULT_SQ_DEADLINE - bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y - - config DEFAULT_SQ_NONE - bool "None" - -endchoice - -config DEFAULT_SQ_IOSCHED - string - default "mq-deadline" if DEFAULT_SQ_DEADLINE - default "none" if DEFAULT_SQ_NONE - -choice - prompt "Default multi-queue blk-mq I/O scheduler" - default DEFAULT_MQ_NONE - help - Select the I/O scheduler which will be used by default for blk-mq - managed block devices with multiple queues. - - config DEFAULT_MQ_DEADLINE - bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y - - config DEFAULT_MQ_NONE - bool "None" - -endchoice - -config DEFAULT_MQ_IOSCHED - string - default "mq-deadline" if DEFAULT_MQ_DEADLINE - default "none" if DEFAULT_MQ_NONE - endmenu endif diff --git a/block/Makefile b/block/Makefile index 2ad7c304e3f5..081bb680789b 100644 --- a/block/Makefile +++ b/block/Makefile @@ -25,6 +25,7 @@ obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o +obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o obj-$(CONFIG_BLK_WBT) += blk-wbt.o obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o diff --git a/block/bio.c b/block/bio.c index 4b564d0c3e29..e75878f8b14a 100644 --- a/block/bio.c +++ b/block/bio.c @@ -376,10 +376,14 @@ static void punt_bios_to_rescuer(struct bio_set *bs) bio_list_init(&punt); bio_list_init(&nopunt); - while ((bio = bio_list_pop(current->bio_list))) + while ((bio = bio_list_pop(¤t->bio_list[0]))) bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio); + current->bio_list[0] = nopunt; - *current->bio_list = nopunt; + bio_list_init(&nopunt); + while ((bio = bio_list_pop(¤t->bio_list[1]))) + bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio); + current->bio_list[1] = nopunt; spin_lock(&bs->rescue_lock); bio_list_merge(&bs->rescue_list, &punt); @@ -466,7 +470,9 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) * we retry with the original gfp_flags. */ - if (current->bio_list && !bio_list_empty(current->bio_list)) + if (current->bio_list && + (!bio_list_empty(¤t->bio_list[0]) || + !bio_list_empty(¤t->bio_list[1]))) gfp_mask &= ~__GFP_DIRECT_RECLAIM; p = mempool_alloc(bs->bio_pool, gfp_mask); @@ -625,21 +631,20 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs) } EXPORT_SYMBOL(bio_clone_fast); -/** - * bio_clone_bioset - clone a bio - * @bio_src: bio to clone - * @gfp_mask: allocation priority - * @bs: bio_set to allocate from - * - * Clone bio. Caller will own the returned bio, but not the actual data it - * points to. Reference count of returned bio will be one. - */ -struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask, - struct bio_set *bs) +static struct bio *__bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask, + struct bio_set *bs, int offset, + int size) { struct bvec_iter iter; struct bio_vec bv; struct bio *bio; + struct bvec_iter iter_src = bio_src->bi_iter; + + /* for supporting partial clone */ + if (offset || size != bio_src->bi_iter.bi_size) { + bio_advance_iter(bio_src, &iter_src, offset); + iter_src.bi_size = size; + } /* * Pre immutable biovecs, __bio_clone() used to just do a memcpy from @@ -663,7 +668,8 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask, * __bio_clone_fast() anyways. */ - bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs); + bio = bio_alloc_bioset(gfp_mask, __bio_segments(bio_src, + &iter_src), bs); if (!bio) return NULL; bio->bi_bdev = bio_src->bi_bdev; @@ -680,7 +686,7 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask, bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0]; break; default: - bio_for_each_segment(bv, bio_src, iter) + __bio_for_each_segment(bv, bio_src, iter, iter_src) bio->bi_io_vec[bio->bi_vcnt++] = bv; break; } @@ -699,9 +705,44 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask, return bio; } + +/** + * bio_clone_bioset - clone a bio + * @bio_src: bio to clone + * @gfp_mask: allocation priority + * @bs: bio_set to allocate from + * + * Clone bio. Caller will own the returned bio, but not the actual data it + * points to. Reference count of returned bio will be one. + */ +struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask, + struct bio_set *bs) +{ + return __bio_clone_bioset(bio_src, gfp_mask, bs, 0, + bio_src->bi_iter.bi_size); +} EXPORT_SYMBOL(bio_clone_bioset); /** + * bio_clone_bioset_partial - clone a partial bio + * @bio_src: bio to clone + * @gfp_mask: allocation priority + * @bs: bio_set to allocate from + * @offset: cloned starting from the offset + * @size: size for the cloned bio + * + * Clone bio. Caller will own the returned bio, but not the actual data it + * points to. Reference count of returned bio will be one. + */ +struct bio *bio_clone_bioset_partial(struct bio *bio_src, gfp_t gfp_mask, + struct bio_set *bs, int offset, + int size) +{ + return __bio_clone_bioset(bio_src, gfp_mask, bs, offset, size); +} +EXPORT_SYMBOL(bio_clone_bioset_partial); + +/** * bio_add_pc_page - attempt to add page to bio * @q: the target queue * @bio: destination bio diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 295e98c2c8cc..bbe7ee00bd3d 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -17,6 +17,7 @@ #include <linux/ioprio.h> #include <linux/kdev_t.h> #include <linux/module.h> +#include <linux/sched/signal.h> #include <linux/err.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> diff --git a/block/blk-core.c b/block/blk-core.c index b9e857f4afe8..d772c221cc17 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -578,9 +578,6 @@ void blk_cleanup_queue(struct request_queue *q) q->queue_lock = &q->__queue_lock; spin_unlock_irq(lock); - bdi_unregister(q->backing_dev_info); - put_disk_devt(q->disk_devt); - /* @q is and will stay empty, shutdown and put */ blk_put_queue(q); } @@ -1976,7 +1973,14 @@ end_io: */ blk_qc_t generic_make_request(struct bio *bio) { - struct bio_list bio_list_on_stack; + /* + * bio_list_on_stack[0] contains bios submitted by the current + * make_request_fn. + * bio_list_on_stack[1] contains bios that were submitted before + * the current make_request_fn, but that haven't been processed + * yet. + */ + struct bio_list bio_list_on_stack[2]; blk_qc_t ret = BLK_QC_T_NONE; if (!generic_make_request_checks(bio)) @@ -1993,7 +1997,7 @@ blk_qc_t generic_make_request(struct bio *bio) * should be added at the tail */ if (current->bio_list) { - bio_list_add(current->bio_list, bio); + bio_list_add(¤t->bio_list[0], bio); goto out; } @@ -2012,23 +2016,39 @@ blk_qc_t generic_make_request(struct bio *bio) * bio_list, and call into ->make_request() again. */ BUG_ON(bio->bi_next); - bio_list_init(&bio_list_on_stack); - current->bio_list = &bio_list_on_stack; + bio_list_init(&bio_list_on_stack[0]); + current->bio_list = bio_list_on_stack; do { struct request_queue *q = bdev_get_queue(bio->bi_bdev); if (likely(blk_queue_enter(q, false) == 0)) { + struct bio_list lower, same; + + /* Create a fresh bio_list for all subordinate requests */ + bio_list_on_stack[1] = bio_list_on_stack[0]; + bio_list_init(&bio_list_on_stack[0]); ret = q->make_request_fn(q, bio); blk_queue_exit(q); - bio = bio_list_pop(current->bio_list); + /* sort new bios into those for a lower level + * and those for the same level + */ + bio_list_init(&lower); + bio_list_init(&same); + while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL) + if (q == bdev_get_queue(bio->bi_bdev)) + bio_list_add(&same, bio); + else + bio_list_add(&lower, bio); + /* now assemble so we handle the lowest level first */ + bio_list_merge(&bio_list_on_stack[0], &lower); + bio_list_merge(&bio_list_on_stack[0], &same); + bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]); } else { - struct bio *bio_next = bio_list_pop(current->bio_list); - bio_io_error(bio); - bio = bio_next; } + bio = bio_list_pop(&bio_list_on_stack[0]); } while (bio); current->bio_list = NULL; /* deactivate */ diff --git a/block/blk-ioc.c b/block/blk-ioc.c index b12f9c87b4c3..63898d229cb9 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -7,6 +7,7 @@ #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/slab.h> +#include <linux/sched/task.h> #include "blk.h" @@ -36,8 +37,8 @@ static void icq_free_icq_rcu(struct rcu_head *head) } /* - * Exit an icq. Called with both ioc and q locked for sq, only ioc locked for - * mq. + * Exit an icq. Called with ioc locked for blk-mq, and with both ioc + * and queue locked for legacy. */ static void ioc_exit_icq(struct io_cq *icq) { @@ -54,7 +55,10 @@ static void ioc_exit_icq(struct io_cq *icq) icq->flags |= ICQ_EXITED; } -/* Release an icq. Called with both ioc and q locked. */ +/* + * Release an icq. Called with ioc locked for blk-mq, and with both ioc + * and queue locked for legacy. + */ static void ioc_destroy_icq(struct io_cq *icq) { struct io_context *ioc = icq->ioc; @@ -62,7 +66,6 @@ static void ioc_destroy_icq(struct io_cq *icq) struct elevator_type *et = q->elevator->type; lockdep_assert_held(&ioc->lock); - lockdep_assert_held(q->queue_lock); radix_tree_delete(&ioc->icq_tree, icq->q->id); hlist_del_init(&icq->ioc_node); @@ -222,24 +225,40 @@ void exit_io_context(struct task_struct *task) put_io_context_active(ioc); } +static void __ioc_clear_queue(struct list_head *icq_list) +{ + unsigned long flags; + + while (!list_empty(icq_list)) { + struct io_cq *icq = list_entry(icq_list->next, + struct io_cq, q_node); + struct io_context *ioc = icq->ioc; + + spin_lock_irqsave(&ioc->lock, flags); + ioc_destroy_icq(icq); + spin_unlock_irqrestore(&ioc->lock, flags); + } +} + /** * ioc_clear_queue - break any ioc association with the specified queue * @q: request_queue being cleared * - * Walk @q->icq_list and exit all io_cq's. Must be called with @q locked. + * Walk @q->icq_list and exit all io_cq's. */ void ioc_clear_queue(struct request_queue *q) { - lockdep_assert_held(q->queue_lock); + LIST_HEAD(icq_list); - while (!list_empty(&q->icq_list)) { - struct io_cq *icq = list_entry(q->icq_list.next, - struct io_cq, q_node); - struct io_context *ioc = icq->ioc; + spin_lock_irq(q->queue_lock); + list_splice_init(&q->icq_list, &icq_list); - spin_lock(&ioc->lock); - ioc_destroy_icq(icq); - spin_unlock(&ioc->lock); + if (q->mq_ops) { + spin_unlock_irq(q->queue_lock); + __ioc_clear_queue(&icq_list); + } else { + __ioc_clear_queue(&icq_list); + spin_unlock_irq(q->queue_lock); } } diff --git a/block/blk-map.c b/block/blk-map.c index 2f18c2a0be1b..3b5cb863318f 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -2,6 +2,7 @@ * Functions related to mapping data to requests */ #include <linux/kernel.h> +#include <linux/sched/task_stack.h> #include <linux/module.h> #include <linux/bio.h> #include <linux/blkdev.h> diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 9e8d6795a8c1..09af8ff18719 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -110,15 +110,14 @@ struct request *blk_mq_sched_get_request(struct request_queue *q, struct blk_mq_alloc_data *data) { struct elevator_queue *e = q->elevator; - struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *ctx; struct request *rq; blk_queue_enter_live(q); - ctx = blk_mq_get_ctx(q); - hctx = blk_mq_map_queue(q, ctx->cpu); - - blk_mq_set_alloc_data(data, q, data->flags, ctx, hctx); + data->q = q; + if (likely(!data->ctx)) + data->ctx = blk_mq_get_ctx(q); + if (likely(!data->hctx)) + data->hctx = blk_mq_map_queue(q, data->ctx->cpu); if (e) { data->flags |= BLK_MQ_REQ_INTERNAL; @@ -135,8 +134,6 @@ struct request *blk_mq_sched_get_request(struct request_queue *q, rq = __blk_mq_alloc_request(data, op); } else { rq = __blk_mq_alloc_request(data, op); - if (rq) - data->hctx->tags->rqs[rq->tag] = rq; } if (rq) { @@ -205,7 +202,7 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) * needing a restart in that case. */ if (!list_empty(&rq_list)) { - blk_mq_sched_mark_restart(hctx); + blk_mq_sched_mark_restart_hctx(hctx); did_work = blk_mq_dispatch_rq_list(hctx, &rq_list); } else if (!has_sched_dispatch) { blk_mq_flush_busy_ctxs(hctx, &rq_list); @@ -331,20 +328,16 @@ static void blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx) { + struct request_queue *q = hctx->queue; unsigned int i; - if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) + if (test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) { + if (test_and_clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) { + queue_for_each_hw_ctx(q, hctx, i) + blk_mq_sched_restart_hctx(hctx); + } + } else { blk_mq_sched_restart_hctx(hctx); - else { - struct request_queue *q = hctx->queue; - - if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) - return; - - clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags); - - queue_for_each_hw_ctx(q, hctx, i) - blk_mq_sched_restart_hctx(hctx); } } @@ -458,7 +451,8 @@ int blk_mq_sched_setup(struct request_queue *q) */ ret = 0; queue_for_each_hw_ctx(q, hctx, i) { - hctx->sched_tags = blk_mq_alloc_rq_map(set, i, q->nr_requests, 0); + hctx->sched_tags = blk_mq_alloc_rq_map(set, i, + q->nr_requests, set->reserved_tags); if (!hctx->sched_tags) { ret = -ENOMEM; break; @@ -498,15 +492,6 @@ int blk_mq_sched_init(struct request_queue *q) { int ret; -#if defined(CONFIG_DEFAULT_SQ_NONE) - if (q->nr_hw_queues == 1) - return 0; -#endif -#if defined(CONFIG_DEFAULT_MQ_NONE) - if (q->nr_hw_queues > 1) - return 0; -#endif - mutex_lock(&q->sysfs_lock); ret = elevator_init(q, NULL); mutex_unlock(&q->sysfs_lock); diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index 7b5f3b95c78e..a75b16b123f7 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -122,17 +122,27 @@ static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx) return false; } -static inline void blk_mq_sched_mark_restart(struct blk_mq_hw_ctx *hctx) +/* + * Mark a hardware queue as needing a restart. + */ +static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx) { - if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) { + if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); - if (hctx->flags & BLK_MQ_F_TAG_SHARED) { - struct request_queue *q = hctx->queue; +} + +/* + * Mark a hardware queue and the request queue it belongs to as needing a + * restart. + */ +static inline void blk_mq_sched_mark_restart_queue(struct blk_mq_hw_ctx *hctx) +{ + struct request_queue *q = hctx->queue; - if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) - set_bit(QUEUE_FLAG_RESTART, &q->queue_flags); - } - } + if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) + set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); + if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) + set_bit(QUEUE_FLAG_RESTART, &q->queue_flags); } static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx) diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 295e69670c39..d745ab81033a 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -17,6 +17,15 @@ static void blk_mq_sysfs_release(struct kobject *kobj) { } +static void blk_mq_hw_sysfs_release(struct kobject *kobj) +{ + struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx, + kobj); + free_cpumask_var(hctx->cpumask); + kfree(hctx->ctxs); + kfree(hctx); +} + struct blk_mq_ctx_sysfs_entry { struct attribute attr; ssize_t (*show)(struct blk_mq_ctx *, char *); @@ -200,7 +209,7 @@ static struct kobj_type blk_mq_ctx_ktype = { static struct kobj_type blk_mq_hw_ktype = { .sysfs_ops = &blk_mq_hw_sysfs_ops, .default_attrs = default_hw_ctx_attrs, - .release = blk_mq_sysfs_release, + .release = blk_mq_hw_sysfs_release, }; static void blk_mq_unregister_hctx(struct blk_mq_hw_ctx *hctx) @@ -242,24 +251,15 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx) static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q) { struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *ctx; - int i, j; + int i; - queue_for_each_hw_ctx(q, hctx, i) { + queue_for_each_hw_ctx(q, hctx, i) blk_mq_unregister_hctx(hctx); - hctx_for_each_ctx(hctx, ctx, j) - kobject_put(&ctx->kobj); - - kobject_put(&hctx->kobj); - } - blk_mq_debugfs_unregister_hctxs(q); kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); kobject_del(&q->mq_kobj); - kobject_put(&q->mq_kobj); - kobject_put(&dev->kobj); q->mq_sysfs_init_done = false; @@ -277,7 +277,19 @@ void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx) kobject_init(&hctx->kobj, &blk_mq_hw_ktype); } -static void blk_mq_sysfs_init(struct request_queue *q) +void blk_mq_sysfs_deinit(struct request_queue *q) +{ + struct blk_mq_ctx *ctx; + int cpu; + + for_each_possible_cpu(cpu) { + ctx = per_cpu_ptr(q->queue_ctx, cpu); + kobject_put(&ctx->kobj); + } + kobject_put(&q->mq_kobj); +} + +void blk_mq_sysfs_init(struct request_queue *q) { struct blk_mq_ctx *ctx; int cpu; @@ -297,8 +309,6 @@ int blk_mq_register_dev(struct device *dev, struct request_queue *q) blk_mq_disable_hotplug(); - blk_mq_sysfs_init(q); - ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq"); if (ret < 0) goto out; diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 54c84363c1b2..9d97bfc4d465 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -181,7 +181,7 @@ found_tag: void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, unsigned int tag) { - if (tag >= tags->nr_reserved_tags) { + if (!blk_mq_tag_is_reserved(tags, tag)) { const int real_tag = tag - tags->nr_reserved_tags; BUG_ON(real_tag >= tags->nr_tags); @@ -295,6 +295,9 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set) for (i = 0; i < set->nr_hw_queues; i++) { struct blk_mq_tags *tags = set->tags[i]; + if (!tags) + continue; + for (j = 0; j < tags->nr_tags; j++) { if (!tags->static_rqs[j]) continue; diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 63497423c5cd..5cb51e53cc03 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -85,4 +85,10 @@ static inline void blk_mq_tag_set_rq(struct blk_mq_hw_ctx *hctx, hctx->tags->rqs[tag] = rq; } +static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags, + unsigned int tag) +{ + return tag < tags->nr_reserved_tags; +} + #endif diff --git a/block/blk-mq-virtio.c b/block/blk-mq-virtio.c new file mode 100644 index 000000000000..c3afbca11299 --- /dev/null +++ b/block/blk-mq-virtio.c @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016 Christoph Hellwig. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#include <linux/device.h> +#include <linux/blk-mq.h> +#include <linux/blk-mq-virtio.h> +#include <linux/virtio_config.h> +#include <linux/module.h> +#include "blk-mq.h" + +/** + * blk_mq_virtio_map_queues - provide a default queue mapping for virtio device + * @set: tagset to provide the mapping for + * @vdev: virtio device associated with @set. + * @first_vec: first interrupt vectors to use for queues (usually 0) + * + * This function assumes the virtio device @vdev has at least as many available + * interrupt vetors as @set has queues. It will then queuery the vector + * corresponding to each queue for it's affinity mask and built queue mapping + * that maps a queue to the CPUs that have irq affinity for the corresponding + * vector. + */ +int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set, + struct virtio_device *vdev, int first_vec) +{ + const struct cpumask *mask; + unsigned int queue, cpu; + + if (!vdev->config->get_vq_affinity) + goto fallback; + + for (queue = 0; queue < set->nr_hw_queues; queue++) { + mask = vdev->config->get_vq_affinity(vdev, first_vec + queue); + if (!mask) + goto fallback; + + for_each_cpu(cpu, mask) + set->mq_map[cpu] = queue; + } + + return 0; +fallback: + return blk_mq_map_queues(set); +} +EXPORT_SYMBOL_GPL(blk_mq_virtio_map_queues); diff --git a/block/blk-mq.c b/block/blk-mq.c index b29e7dc7b309..a4546f060e80 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -20,6 +20,8 @@ #include <linux/cpu.h> #include <linux/cache.h> #include <linux/sched/sysctl.h> +#include <linux/sched/topology.h> +#include <linux/sched/signal.h> #include <linux/delay.h> #include <linux/crash_dump.h> #include <linux/prefetch.h> @@ -75,10 +77,20 @@ void blk_mq_freeze_queue_start(struct request_queue *q) } EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start); -static void blk_mq_freeze_queue_wait(struct request_queue *q) +void blk_mq_freeze_queue_wait(struct request_queue *q) { wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter)); } +EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait); + +int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, + unsigned long timeout) +{ + return wait_event_timeout(q->mq_freeze_wq, + percpu_ref_is_zero(&q->q_usage_counter), + timeout); +} +EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait_timeout); /* * Guarantee no request is in use, so we can change any data structure of @@ -234,6 +246,7 @@ struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data, } rq->tag = tag; rq->internal_tag = -1; + data->hctx->tags->rqs[rq->tag] = rq; } blk_mq_rq_ctx_init(data->q, data->ctx, rq, op); @@ -273,10 +286,9 @@ EXPORT_SYMBOL(blk_mq_alloc_request); struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw, unsigned int flags, unsigned int hctx_idx) { - struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *ctx; + struct blk_mq_alloc_data alloc_data = { .flags = flags }; struct request *rq; - struct blk_mq_alloc_data alloc_data; + unsigned int cpu; int ret; /* @@ -299,25 +311,23 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw, * Check if the hardware context is actually mapped to anything. * If not tell the caller that it should skip this queue. */ - hctx = q->queue_hw_ctx[hctx_idx]; - if (!blk_mq_hw_queue_mapped(hctx)) { - ret = -EXDEV; - goto out_queue_exit; + alloc_data.hctx = q->queue_hw_ctx[hctx_idx]; + if (!blk_mq_hw_queue_mapped(alloc_data.hctx)) { + blk_queue_exit(q); + return ERR_PTR(-EXDEV); } - ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask)); + cpu = cpumask_first(alloc_data.hctx->cpumask); + alloc_data.ctx = __blk_mq_get_ctx(q, cpu); - blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx); - rq = __blk_mq_alloc_request(&alloc_data, rw); - if (!rq) { - ret = -EWOULDBLOCK; - goto out_queue_exit; - } - - return rq; + rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data); -out_queue_exit: + blk_mq_put_ctx(alloc_data.ctx); blk_queue_exit(q); - return ERR_PTR(ret); + + if (!rq) + return ERR_PTR(-EWOULDBLOCK); + + return rq; } EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx); @@ -852,6 +862,9 @@ done: return true; } + if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag)) + data.flags |= BLK_MQ_REQ_RESERVED; + rq->tag = blk_mq_get_tag(&data); if (rq->tag >= 0) { if (blk_mq_tag_busy(data.hctx)) { @@ -865,12 +878,9 @@ done: return false; } -static void blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, - struct request *rq) +static void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, + struct request *rq) { - if (rq->tag == -1 || rq->internal_tag == -1) - return; - blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, rq->tag); rq->tag = -1; @@ -880,6 +890,26 @@ static void blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, } } +static void blk_mq_put_driver_tag_hctx(struct blk_mq_hw_ctx *hctx, + struct request *rq) +{ + if (rq->tag == -1 || rq->internal_tag == -1) + return; + + __blk_mq_put_driver_tag(hctx, rq); +} + +static void blk_mq_put_driver_tag(struct request *rq) +{ + struct blk_mq_hw_ctx *hctx; + + if (rq->tag == -1 || rq->internal_tag == -1) + return; + + hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); + __blk_mq_put_driver_tag(hctx, rq); +} + /* * If we fail getting a driver tag because all the driver tags are already * assigned and on the dispatch list, BUT the first entry does not have a @@ -904,6 +934,44 @@ static bool reorder_tags_to_front(struct list_head *list) return first != NULL; } +static int blk_mq_dispatch_wake(wait_queue_t *wait, unsigned mode, int flags, + void *key) +{ + struct blk_mq_hw_ctx *hctx; + + hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait); + + list_del(&wait->task_list); + clear_bit_unlock(BLK_MQ_S_TAG_WAITING, &hctx->state); + blk_mq_run_hw_queue(hctx, true); + return 1; +} + +static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx *hctx) +{ + struct sbq_wait_state *ws; + + /* + * The TAG_WAITING bit serves as a lock protecting hctx->dispatch_wait. + * The thread which wins the race to grab this bit adds the hardware + * queue to the wait queue. + */ + if (test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state) || + test_and_set_bit_lock(BLK_MQ_S_TAG_WAITING, &hctx->state)) + return false; + + init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake); + ws = bt_wait_ptr(&hctx->tags->bitmap_tags, hctx); + + /* + * As soon as this returns, it's no longer safe to fiddle with + * hctx->dispatch_wait, since a completion can wake up the wait queue + * and unlock the bit. + */ + add_wait_queue(&ws->wait, &hctx->dispatch_wait); + return true; +} + bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) { struct request_queue *q = hctx->queue; @@ -931,20 +999,39 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) continue; /* - * We failed getting a driver tag. Mark the queue(s) - * as needing a restart. Retry getting a tag again, - * in case the needed IO completed right before we - * marked the queue as needing a restart. + * The initial allocation attempt failed, so we need to + * rerun the hardware queue when a tag is freed. */ - blk_mq_sched_mark_restart(hctx); - if (!blk_mq_get_driver_tag(rq, &hctx, false)) + if (blk_mq_dispatch_wait_add(hctx)) { + /* + * It's possible that a tag was freed in the + * window between the allocation failure and + * adding the hardware queue to the wait queue. + */ + if (!blk_mq_get_driver_tag(rq, &hctx, false)) + break; + } else { break; + } } + list_del_init(&rq->queuelist); bd.rq = rq; bd.list = dptr; - bd.last = list_empty(list); + + /* + * Flag last if we have no more requests, or if we have more + * but can't assign a driver tag to it. + */ + if (list_empty(list)) + bd.last = true; + else { + struct request *nxt; + + nxt = list_first_entry(list, struct request, queuelist); + bd.last = !blk_mq_get_driver_tag(nxt, NULL, false); + } ret = q->mq_ops->queue_rq(hctx, &bd); switch (ret) { @@ -952,7 +1039,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) queued++; break; case BLK_MQ_RQ_QUEUE_BUSY: - blk_mq_put_driver_tag(hctx, rq); + blk_mq_put_driver_tag_hctx(hctx, rq); list_add(&rq->queuelist, list); __blk_mq_requeue_request(rq); break; @@ -982,6 +1069,13 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) * that is where we will continue on next queue run. */ if (!list_empty(list)) { + /* + * If we got a driver tag for the next request already, + * free it again. + */ + rq = list_first_entry(list, struct request, queuelist); + blk_mq_put_driver_tag(rq); + spin_lock(&hctx->lock); list_splice_init(list, &hctx->dispatch); spin_unlock(&hctx->lock); @@ -995,10 +1089,11 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) * * blk_mq_run_hw_queue() already checks the STOPPED bit * - * If RESTART is set, then let completion restart the queue - * instead of potentially looping here. + * If RESTART or TAG_WAITING is set, then let completion restart + * the queue instead of potentially looping here. */ - if (!blk_mq_sched_needs_restart(hctx)) + if (!blk_mq_sched_needs_restart(hctx) && + !test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state)) blk_mq_run_hw_queue(hctx, true); } @@ -1339,7 +1434,8 @@ static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq) return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true); } -static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie) +static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie, + bool may_sleep) { struct request_queue *q = rq->q; struct blk_mq_queue_data bd = { @@ -1380,7 +1476,7 @@ static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie) } insert: - blk_mq_sched_insert_request(rq, false, true, true, false); + blk_mq_sched_insert_request(rq, false, true, false, may_sleep); } /* @@ -1474,11 +1570,11 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) if (!(data.hctx->flags & BLK_MQ_F_BLOCKING)) { rcu_read_lock(); - blk_mq_try_issue_directly(old_rq, &cookie); + blk_mq_try_issue_directly(old_rq, &cookie, false); rcu_read_unlock(); } else { srcu_idx = srcu_read_lock(&data.hctx->queue_rq_srcu); - blk_mq_try_issue_directly(old_rq, &cookie); + blk_mq_try_issue_directly(old_rq, &cookie, true); srcu_read_unlock(&data.hctx->queue_rq_srcu, srcu_idx); } goto done; @@ -1667,16 +1763,20 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, unsigned int reserved_tags) { struct blk_mq_tags *tags; + int node; + + node = blk_mq_hw_queue_to_node(set->mq_map, hctx_idx); + if (node == NUMA_NO_NODE) + node = set->numa_node; - tags = blk_mq_init_tags(nr_tags, reserved_tags, - set->numa_node, + tags = blk_mq_init_tags(nr_tags, reserved_tags, node, BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags)); if (!tags) return NULL; tags->rqs = kzalloc_node(nr_tags * sizeof(struct request *), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, - set->numa_node); + node); if (!tags->rqs) { blk_mq_free_tags(tags); return NULL; @@ -1684,7 +1784,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, tags->static_rqs = kzalloc_node(nr_tags * sizeof(struct request *), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, - set->numa_node); + node); if (!tags->static_rqs) { kfree(tags->rqs); blk_mq_free_tags(tags); @@ -1704,6 +1804,11 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, { unsigned int i, j, entries_per_page, max_order = 4; size_t rq_size, left; + int node; + + node = blk_mq_hw_queue_to_node(set->mq_map, hctx_idx); + if (node == NUMA_NO_NODE) + node = set->numa_node; INIT_LIST_HEAD(&tags->page_list); @@ -1725,7 +1830,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, this_order--; do { - page = alloc_pages_node(set->numa_node, + page = alloc_pages_node(node, GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO, this_order); if (page) @@ -1758,7 +1863,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, if (set->ops->init_request) { if (set->ops->init_request(set->driver_data, rq, hctx_idx, i, - set->numa_node)) { + node)) { tags->static_rqs[i] = NULL; goto fail; } @@ -1851,16 +1956,6 @@ static void blk_mq_exit_hw_queues(struct request_queue *q, } } -static void blk_mq_free_hw_queues(struct request_queue *q, - struct blk_mq_tag_set *set) -{ - struct blk_mq_hw_ctx *hctx; - unsigned int i; - - queue_for_each_hw_ctx(q, hctx, i) - free_cpumask_var(hctx->cpumask); -} - static int blk_mq_init_hctx(struct request_queue *q, struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx, unsigned hctx_idx) @@ -1941,7 +2036,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i); struct blk_mq_hw_ctx *hctx; - memset(__ctx, 0, sizeof(*__ctx)); __ctx->cpu = i; spin_lock_init(&__ctx->lock); INIT_LIST_HEAD(&__ctx->rq_list); @@ -2153,15 +2247,19 @@ void blk_mq_release(struct request_queue *q) queue_for_each_hw_ctx(q, hctx, i) { if (!hctx) continue; - kfree(hctx->ctxs); - kfree(hctx); + kobject_put(&hctx->kobj); } q->mq_map = NULL; kfree(q->queue_hw_ctx); - /* ctx kobj stays in queue_ctx */ + /* + * release .mq_kobj and sw queue's kobject now because + * both share lifetime with request queue. + */ + blk_mq_sysfs_deinit(q); + free_percpu(q->queue_ctx); } @@ -2226,10 +2324,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, if (hctx->tags) blk_mq_free_map_and_requests(set, j); blk_mq_exit_hctx(q, set, hctx, j); - free_cpumask_var(hctx->cpumask); kobject_put(&hctx->kobj); - kfree(hctx->ctxs); - kfree(hctx); hctxs[j] = NULL; } @@ -2248,6 +2343,9 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, if (!q->queue_ctx) goto err_exit; + /* init q->mq_kobj and sw queues' kobjects */ + blk_mq_sysfs_init(q); + q->queue_hw_ctx = kzalloc_node(nr_cpu_ids * sizeof(*(q->queue_hw_ctx)), GFP_KERNEL, set->numa_node); if (!q->queue_hw_ctx) @@ -2338,7 +2436,6 @@ void blk_mq_free_queue(struct request_queue *q) blk_mq_del_queue_tag_set(q); blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); - blk_mq_free_hw_queues(q, set); } /* Basically redo blk_mq_init_queue with queue frozen */ diff --git a/block/blk-mq.h b/block/blk-mq.h index 24b2256186f3..b79f9a7d8cf6 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -77,6 +77,8 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, /* * sysfs helpers */ +extern void blk_mq_sysfs_init(struct request_queue *q); +extern void blk_mq_sysfs_deinit(struct request_queue *q); extern int blk_mq_sysfs_register(struct request_queue *q); extern void blk_mq_sysfs_unregister(struct request_queue *q); extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); @@ -146,16 +148,6 @@ struct blk_mq_alloc_data { struct blk_mq_hw_ctx *hctx; }; -static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data, - struct request_queue *q, unsigned int flags, - struct blk_mq_ctx *ctx, struct blk_mq_hw_ctx *hctx) -{ - data->q = q; - data->flags = flags; - data->ctx = ctx; - data->hctx = hctx; -} - static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data) { if (data->flags & BLK_MQ_REQ_INTERNAL) diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 06cf9807f49a..87b7df4851bf 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -9,6 +9,7 @@ #include <linux/interrupt.h> #include <linux/cpu.h> #include <linux/sched.h> +#include <linux/sched/topology.h> #include "blk.h" diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 002af836aa87..c44b321335f3 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -815,9 +815,7 @@ static void blk_release_queue(struct kobject *kobj) blkcg_exit_queue(q); if (q->elevator) { - spin_lock_irq(q->queue_lock); ioc_clear_queue(q); - spin_unlock_irq(q->queue_lock); elevator_exit(q->elevator); } diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 82fd0cc394eb..8fab716e4059 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -185,7 +185,7 @@ static struct throtl_grp *sq_to_tg(struct throtl_service_queue *sq) * sq_to_td - return throtl_data the specified service queue belongs to * @sq: the throtl_service_queue of interest * - * A service_queue can be embeded in either a throtl_grp or throtl_data. + * A service_queue can be embedded in either a throtl_grp or throtl_data. * Determine the associated throtl_data accordingly and return it. */ static struct throtl_data *sq_to_td(struct throtl_service_queue *sq) diff --git a/block/bsg.c b/block/bsg.c index a9a8b8e0446f..74835dbf0c47 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -573,7 +573,7 @@ bsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) int ret; ssize_t bytes_read; - dprintk("%s: read %Zd bytes\n", bd->name, count); + dprintk("%s: read %zd bytes\n", bd->name, count); bsg_set_block(bd, file); @@ -648,7 +648,7 @@ bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) ssize_t bytes_written; int ret; - dprintk("%s: write %Zd bytes\n", bd->name, count); + dprintk("%s: write %zd bytes\n", bd->name, count); if (unlikely(segment_eq(get_fs(), KERNEL_DS))) return -EINVAL; @@ -667,7 +667,7 @@ bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) if (!bytes_written || err_block_err(ret)) bytes_written = ret; - dprintk("%s: returning %Zd\n", bd->name, bytes_written); + dprintk("%s: returning %zd\n", bd->name, bytes_written); return bytes_written; } diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 137944777859..440b95ee593c 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -8,6 +8,7 @@ */ #include <linux/module.h> #include <linux/slab.h> +#include <linux/sched/clock.h> #include <linux/blkdev.h> #include <linux/elevator.h> #include <linux/ktime.h> diff --git a/block/elevator.c b/block/elevator.c index 699d10f71a2c..01139f549b5b 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -220,17 +220,24 @@ int elevator_init(struct request_queue *q, char *name) } if (!e) { - if (q->mq_ops && q->nr_hw_queues == 1) - e = elevator_get(CONFIG_DEFAULT_SQ_IOSCHED, false); - else if (q->mq_ops) - e = elevator_get(CONFIG_DEFAULT_MQ_IOSCHED, false); - else + /* + * For blk-mq devices, we default to using mq-deadline, + * if available, for single queue devices. If deadline + * isn't available OR we have multiple queues, default + * to "none". + */ + if (q->mq_ops) { + if (q->nr_hw_queues == 1) + e = elevator_get("mq-deadline", false); + if (!e) + return 0; + } else e = elevator_get(CONFIG_DEFAULT_IOSCHED, false); if (!e) { printk(KERN_ERR "Default I/O scheduler not found. " \ - "Using noop/none.\n"); + "Using noop.\n"); e = elevator_get("noop", false); } } @@ -976,9 +983,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) if (old_registered) elv_unregister_queue(q); - spin_lock_irq(q->queue_lock); ioc_clear_queue(q); - spin_unlock_irq(q->queue_lock); } /* allocate, init and register new elevator */ diff --git a/block/genhd.c b/block/genhd.c index 3631cd480295..a9c516a8b37d 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -572,20 +572,6 @@ exit: disk_part_iter_exit(&piter); } -void put_disk_devt(struct disk_devt *disk_devt) -{ - if (disk_devt && atomic_dec_and_test(&disk_devt->count)) - disk_devt->release(disk_devt); -} -EXPORT_SYMBOL(put_disk_devt); - -void get_disk_devt(struct disk_devt *disk_devt) -{ - if (disk_devt) - atomic_inc(&disk_devt->count); -} -EXPORT_SYMBOL(get_disk_devt); - /** * device_add_disk - add partitioning information to kernel list * @parent: parent device for the disk @@ -626,13 +612,6 @@ void device_add_disk(struct device *parent, struct gendisk *disk) disk_alloc_events(disk); - /* - * Take a reference on the devt and assign it to queue since it - * must not be reallocated while the bdi is registered - */ - disk->queue->disk_devt = disk->disk_devt; - get_disk_devt(disk->disk_devt); - /* Register BDI before referencing it from bdev */ bdi = disk->queue->backing_dev_info; bdi_register_owner(bdi, disk_to_dev(disk)); @@ -669,19 +648,28 @@ void del_gendisk(struct gendisk *disk) disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); while ((part = disk_part_iter_next(&piter))) { - bdev_unhash_inode(MKDEV(disk->major, - disk->first_minor + part->partno)); invalidate_partition(disk, part->partno); + bdev_unhash_inode(part_devt(part)); delete_partition(disk, part->partno); } disk_part_iter_exit(&piter); invalidate_partition(disk, 0); + bdev_unhash_inode(disk_devt(disk)); set_capacity(disk, 0); disk->flags &= ~GENHD_FL_UP; sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); - blk_unregister_queue(disk); + if (disk->queue) { + /* + * Unregister bdi before releasing device numbers (as they can + * get reused and we'd get clashes in sysfs). + */ + bdi_unregister(disk->queue->backing_dev_info); + blk_unregister_queue(disk); + } else { + WARN_ON(1); + } blk_unregister_region(disk_devt(disk), disk->minors); part_stat_set_all(&disk->part0, 0); diff --git a/block/ioprio.c b/block/ioprio.c index 01b8116298a1..0c47a00f92a8 100644 --- a/block/ioprio.c +++ b/block/ioprio.c @@ -23,8 +23,11 @@ #include <linux/kernel.h> #include <linux/export.h> #include <linux/ioprio.h> +#include <linux/cred.h> #include <linux/blkdev.h> #include <linux/capability.h> +#include <linux/sched/user.h> +#include <linux/sched/task.h> #include <linux/syscalls.h> #include <linux/security.h> #include <linux/pid_namespace.h> @@ -122,14 +125,14 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio) if (!user) break; - do_each_thread(g, p) { + for_each_process_thread(g, p) { if (!uid_eq(task_uid(p), uid) || !task_pid_vnr(p)) continue; ret = set_task_ioprio(p, ioprio); if (ret) goto free_uid; - } while_each_thread(g, p); + } free_uid: if (who) free_uid(user); @@ -222,7 +225,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who) if (!user) break; - do_each_thread(g, p) { + for_each_process_thread(g, p) { if (!uid_eq(task_uid(p), user->uid) || !task_pid_vnr(p)) continue; @@ -233,7 +236,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who) ret = tmpio; else ret = ioprio_best(ret, tmpio); - } while_each_thread(g, p); + } if (who) free_uid(user); diff --git a/block/sed-opal.c b/block/sed-opal.c index d1c52ba4d62d..14035f826b5e 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -34,7 +34,11 @@ #define IO_BUFFER_LENGTH 2048 #define MAX_TOKS 64 -typedef int (*opal_step)(struct opal_dev *dev); +struct opal_step { + int (*fn)(struct opal_dev *dev, void *data); + void *data; +}; +typedef int (cont_fn)(struct opal_dev *dev); enum opal_atom_width { OPAL_WIDTH_TINY, @@ -80,9 +84,7 @@ struct opal_dev { void *data; sec_send_recv *send_recv; - const opal_step *funcs; - void **func_data; - int state; + const struct opal_step *steps; struct mutex dev_lock; u16 comid; u32 hsn; @@ -213,8 +215,6 @@ static const u8 opalmethod[][OPAL_UID_LENGTH] = { { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x08, 0x03 }, }; -typedef int (cont_fn)(struct opal_dev *dev); - static int end_opal_session_error(struct opal_dev *dev); struct opal_suspend_data { @@ -375,18 +375,18 @@ static void check_geometry(struct opal_dev *dev, const void *data) static int next(struct opal_dev *dev) { - opal_step func; - int error = 0; + const struct opal_step *step; + int state = 0, error = 0; do { - func = dev->funcs[dev->state]; - if (!func) + step = &dev->steps[state]; + if (!step->fn) break; - error = func(dev); + error = step->fn(dev, step->data); if (error) { pr_err("Error on step function: %d with error %d: %s\n", - dev->state, error, + state, error, opal_error_to_human(error)); /* For each OPAL command we do a discovery0 then we @@ -396,10 +396,13 @@ static int next(struct opal_dev *dev) * session. Therefore we shouldn't attempt to terminate * a session, as one has not yet been created. */ - if (dev->state > 1) - return end_opal_session_error(dev); + if (state > 1) { + end_opal_session_error(dev); + return error; + } + } - dev->state++; + state++; } while (!error); return error; @@ -411,10 +414,17 @@ static int opal_discovery0_end(struct opal_dev *dev) const struct d0_header *hdr = (struct d0_header *)dev->resp; const u8 *epos = dev->resp, *cpos = dev->resp; u16 comid = 0; + u32 hlen = be32_to_cpu(hdr->length); + + print_buffer(dev->resp, hlen); - print_buffer(dev->resp, be32_to_cpu(hdr->length)); + if (hlen > IO_BUFFER_LENGTH - sizeof(*hdr)) { + pr_warn("Discovery length overflows buffer (%zu+%u)/%u\n", + sizeof(*hdr), hlen, IO_BUFFER_LENGTH); + return -EFAULT; + } - epos += be32_to_cpu(hdr->length); /* end of buffer */ + epos += hlen; /* end of buffer */ cpos += sizeof(*hdr); /* current position on buffer */ while (cpos < epos && supported) { @@ -476,7 +486,7 @@ static int opal_discovery0_end(struct opal_dev *dev) return 0; } -static int opal_discovery0(struct opal_dev *dev) +static int opal_discovery0(struct opal_dev *dev, void *data) { int ret; @@ -662,52 +672,29 @@ static int cmd_finalize(struct opal_dev *cmd, u32 hsn, u32 tsn) return 0; } -static enum opal_response_token token_type(const struct parsed_resp *resp, - int n) -{ - const struct opal_resp_tok *tok; - - if (n >= resp->num) { - pr_err("Token number doesn't exist: %d, resp: %d\n", - n, resp->num); - return OPAL_DTA_TOKENID_INVALID; - } - - tok = &resp->toks[n]; - if (tok->len == 0) { - pr_err("Token length must be non-zero\n"); - return OPAL_DTA_TOKENID_INVALID; - } - - return tok->type; -} - -/* - * This function returns 0 in case of invalid token. One should call - * token_type() first to find out if the token is valid or not. - */ -static enum opal_token response_get_token(const struct parsed_resp *resp, - int n) +static const struct opal_resp_tok *response_get_token( + const struct parsed_resp *resp, + int n) { const struct opal_resp_tok *tok; if (n >= resp->num) { pr_err("Token number doesn't exist: %d, resp: %d\n", n, resp->num); - return 0; + return ERR_PTR(-EINVAL); } tok = &resp->toks[n]; if (tok->len == 0) { pr_err("Token length must be non-zero\n"); - return 0; + return ERR_PTR(-EINVAL); } - return tok->pos[0]; + return tok; } -static size_t response_parse_tiny(struct opal_resp_tok *tok, - const u8 *pos) +static ssize_t response_parse_tiny(struct opal_resp_tok *tok, + const u8 *pos) { tok->pos = pos; tok->len = 1; @@ -723,8 +710,8 @@ static size_t response_parse_tiny(struct opal_resp_tok *tok, return tok->len; } -static size_t response_parse_short(struct opal_resp_tok *tok, - const u8 *pos) +static ssize_t response_parse_short(struct opal_resp_tok *tok, + const u8 *pos) { tok->pos = pos; tok->len = (pos[0] & SHORT_ATOM_LEN_MASK) + 1; @@ -736,7 +723,7 @@ static size_t response_parse_short(struct opal_resp_tok *tok, tok->type = OPAL_DTA_TOKENID_SINT; } else { u64 u_integer = 0; - int i, b = 0; + ssize_t i, b = 0; tok->type = OPAL_DTA_TOKENID_UINT; if (tok->len > 9) { @@ -753,8 +740,8 @@ static size_t response_parse_short(struct opal_resp_tok *tok, return tok->len; } -static size_t response_parse_medium(struct opal_resp_tok *tok, - const u8 *pos) +static ssize_t response_parse_medium(struct opal_resp_tok *tok, + const u8 *pos) { tok->pos = pos; tok->len = (((pos[0] & MEDIUM_ATOM_LEN_MASK) << 8) | pos[1]) + 2; @@ -770,8 +757,8 @@ static size_t response_parse_medium(struct opal_resp_tok *tok, return tok->len; } -static size_t response_parse_long(struct opal_resp_tok *tok, - const u8 *pos) +static ssize_t response_parse_long(struct opal_resp_tok *tok, + const u8 *pos) { tok->pos = pos; tok->len = ((pos[1] << 16) | (pos[2] << 8) | pos[3]) + 4; @@ -787,8 +774,8 @@ static size_t response_parse_long(struct opal_resp_tok *tok, return tok->len; } -static size_t response_parse_token(struct opal_resp_tok *tok, - const u8 *pos) +static ssize_t response_parse_token(struct opal_resp_tok *tok, + const u8 *pos) { tok->pos = pos; tok->len = 1; @@ -805,8 +792,9 @@ static int response_parse(const u8 *buf, size_t length, struct opal_resp_tok *iter; int num_entries = 0; int total; - size_t token_length; + ssize_t token_length; const u8 *pos; + u32 clen, plen, slen; if (!buf) return -EFAULT; @@ -818,17 +806,16 @@ static int response_parse(const u8 *buf, size_t length, pos = buf; pos += sizeof(*hdr); - pr_debug("Response size: cp: %d, pkt: %d, subpkt: %d\n", - be32_to_cpu(hdr->cp.length), - be32_to_cpu(hdr->pkt.length), - be32_to_cpu(hdr->subpkt.length)); - - if (hdr->cp.length == 0 || hdr->pkt.length == 0 || - hdr->subpkt.length == 0) { - pr_err("Bad header length. cp: %d, pkt: %d, subpkt: %d\n", - be32_to_cpu(hdr->cp.length), - be32_to_cpu(hdr->pkt.length), - be32_to_cpu(hdr->subpkt.length)); + clen = be32_to_cpu(hdr->cp.length); + plen = be32_to_cpu(hdr->pkt.length); + slen = be32_to_cpu(hdr->subpkt.length); + pr_debug("Response size: cp: %u, pkt: %u, subpkt: %u\n", + clen, plen, slen); + + if (clen == 0 || plen == 0 || slen == 0 || + slen > IO_BUFFER_LENGTH - sizeof(*hdr)) { + pr_err("Bad header length. cp: %u, pkt: %u, subpkt: %u\n", + clen, plen, slen); print_buffer(pos, sizeof(*hdr)); return -EINVAL; } @@ -837,7 +824,7 @@ static int response_parse(const u8 *buf, size_t length, return -EFAULT; iter = resp->toks; - total = be32_to_cpu(hdr->subpkt.length); + total = slen; print_buffer(pos, total); while (total > 0) { if (pos[0] <= TINY_ATOM_BYTE) /* tiny atom */ @@ -851,8 +838,8 @@ static int response_parse(const u8 *buf, size_t length, else /* TOKEN */ token_length = response_parse_token(iter, pos); - if (token_length == -EINVAL) - return -EINVAL; + if (token_length < 0) + return token_length; pos += token_length; total -= token_length; @@ -922,20 +909,32 @@ static u64 response_get_u64(const struct parsed_resp *resp, int n) return resp->toks[n].stored.u; } +static bool response_token_matches(const struct opal_resp_tok *token, u8 match) +{ + if (IS_ERR(token) || + token->type != OPAL_DTA_TOKENID_TOKEN || + token->pos[0] != match) + return false; + return true; +} + static u8 response_status(const struct parsed_resp *resp) { - if (token_type(resp, 0) == OPAL_DTA_TOKENID_TOKEN && - response_get_token(resp, 0) == OPAL_ENDOFSESSION) { + const struct opal_resp_tok *tok; + + tok = response_get_token(resp, 0); + if (response_token_matches(tok, OPAL_ENDOFSESSION)) return 0; - } if (resp->num < 5) return DTAERROR_NO_METHOD_STATUS; - if (token_type(resp, resp->num - 1) != OPAL_DTA_TOKENID_TOKEN || - token_type(resp, resp->num - 5) != OPAL_DTA_TOKENID_TOKEN || - response_get_token(resp, resp->num - 1) != OPAL_ENDLIST || - response_get_token(resp, resp->num - 5) != OPAL_STARTLIST) + tok = response_get_token(resp, resp->num - 5); + if (!response_token_matches(tok, OPAL_STARTLIST)) + return DTAERROR_NO_METHOD_STATUS; + + tok = response_get_token(resp, resp->num - 1); + if (!response_token_matches(tok, OPAL_ENDLIST)) return DTAERROR_NO_METHOD_STATUS; return response_get_u64(resp, resp->num - 4); @@ -1022,9 +1021,8 @@ static int finalize_and_send(struct opal_dev *dev, cont_fn cont) return opal_send_recv(dev, cont); } -static int gen_key(struct opal_dev *dev) +static int gen_key(struct opal_dev *dev, void *data) { - const u8 *method; u8 uid[OPAL_UID_LENGTH]; int err = 0; @@ -1032,7 +1030,6 @@ static int gen_key(struct opal_dev *dev) set_comid(dev, dev->comid); memcpy(uid, dev->prev_data, min(sizeof(uid), dev->prev_d_len)); - method = opalmethod[OPAL_GENKEY]; kfree(dev->prev_data); dev->prev_data = NULL; @@ -1076,15 +1073,14 @@ static int get_active_key_cont(struct opal_dev *dev) return 0; } -static int get_active_key(struct opal_dev *dev) +static int get_active_key(struct opal_dev *dev, void *data) { u8 uid[OPAL_UID_LENGTH]; int err = 0; - u8 *lr; + u8 *lr = data; clear_opal_cmd(dev); set_comid(dev, dev->comid); - lr = dev->func_data[dev->state]; err = build_locking_range(uid, sizeof(uid), *lr); if (err) @@ -1167,17 +1163,16 @@ static inline int enable_global_lr(struct opal_dev *dev, u8 *uid, return err; } -static int setup_locking_range(struct opal_dev *dev) +static int setup_locking_range(struct opal_dev *dev, void *data) { u8 uid[OPAL_UID_LENGTH]; - struct opal_user_lr_setup *setup; + struct opal_user_lr_setup *setup = data; u8 lr; int err = 0; clear_opal_cmd(dev); set_comid(dev, dev->comid); - setup = dev->func_data[dev->state]; lr = setup->session.opal_key.lr; err = build_locking_range(uid, sizeof(uid), lr); if (err) @@ -1290,20 +1285,19 @@ static int start_generic_opal_session(struct opal_dev *dev, return finalize_and_send(dev, start_opal_session_cont); } -static int start_anybodyASP_opal_session(struct opal_dev *dev) +static int start_anybodyASP_opal_session(struct opal_dev *dev, void *data) { return start_generic_opal_session(dev, OPAL_ANYBODY_UID, OPAL_ADMINSP_UID, NULL, 0); } -static int start_SIDASP_opal_session(struct opal_dev *dev) +static int start_SIDASP_opal_session(struct opal_dev *dev, void *data) { int ret; const u8 *key = dev->prev_data; - struct opal_key *okey; if (!key) { - okey = dev->func_data[dev->state]; + const struct opal_key *okey = data; ret = start_generic_opal_session(dev, OPAL_SID_UID, OPAL_ADMINSP_UID, okey->key, @@ -1318,22 +1312,21 @@ static int start_SIDASP_opal_session(struct opal_dev *dev) return ret; } -static inline int start_admin1LSP_opal_session(struct opal_dev *dev) +static int start_admin1LSP_opal_session(struct opal_dev *dev, void *data) { - struct opal_key *key = dev->func_data[dev->state]; - + struct opal_key *key = data; return start_generic_opal_session(dev, OPAL_ADMIN1_UID, OPAL_LOCKINGSP_UID, key->key, key->key_len); } -static int start_auth_opal_session(struct opal_dev *dev) +static int start_auth_opal_session(struct opal_dev *dev, void *data) { + struct opal_session_info *session = data; u8 lk_ul_user[OPAL_UID_LENGTH]; + size_t keylen = session->opal_key.key_len; int err = 0; - struct opal_session_info *session = dev->func_data[dev->state]; - size_t keylen = session->opal_key.key_len; u8 *key = session->opal_key.key; u32 hsn = GENERIC_HOST_SESSION_NUM; @@ -1383,7 +1376,7 @@ static int start_auth_opal_session(struct opal_dev *dev) return finalize_and_send(dev, start_opal_session_cont); } -static int revert_tper(struct opal_dev *dev) +static int revert_tper(struct opal_dev *dev, void *data) { int err = 0; @@ -1405,9 +1398,9 @@ static int revert_tper(struct opal_dev *dev) return finalize_and_send(dev, parse_and_check_status); } -static int internal_activate_user(struct opal_dev *dev) +static int internal_activate_user(struct opal_dev *dev, void *data) { - struct opal_session_info *session = dev->func_data[dev->state]; + struct opal_session_info *session = data; u8 uid[OPAL_UID_LENGTH]; int err = 0; @@ -1440,15 +1433,14 @@ static int internal_activate_user(struct opal_dev *dev) return finalize_and_send(dev, parse_and_check_status); } -static int erase_locking_range(struct opal_dev *dev) +static int erase_locking_range(struct opal_dev *dev, void *data) { - struct opal_session_info *session; + struct opal_session_info *session = data; u8 uid[OPAL_UID_LENGTH]; int err = 0; clear_opal_cmd(dev); set_comid(dev, dev->comid); - session = dev->func_data[dev->state]; if (build_locking_range(uid, sizeof(uid), session->opal_key.lr) < 0) return -ERANGE; @@ -1467,9 +1459,9 @@ static int erase_locking_range(struct opal_dev *dev) return finalize_and_send(dev, parse_and_check_status); } -static int set_mbr_done(struct opal_dev *dev) +static int set_mbr_done(struct opal_dev *dev, void *data) { - u8 mbr_done_tf = *(u8 *)dev->func_data[dev->state]; + u8 *mbr_done_tf = data; int err = 0; clear_opal_cmd(dev); @@ -1485,7 +1477,7 @@ static int set_mbr_done(struct opal_dev *dev) add_token_u8(&err, dev, OPAL_STARTLIST); add_token_u8(&err, dev, OPAL_STARTNAME); add_token_u8(&err, dev, 2); /* Done */ - add_token_u8(&err, dev, mbr_done_tf); /* Done T or F */ + add_token_u8(&err, dev, *mbr_done_tf); /* Done T or F */ add_token_u8(&err, dev, OPAL_ENDNAME); add_token_u8(&err, dev, OPAL_ENDLIST); add_token_u8(&err, dev, OPAL_ENDNAME); @@ -1499,9 +1491,9 @@ static int set_mbr_done(struct opal_dev *dev) return finalize_and_send(dev, parse_and_check_status); } -static int set_mbr_enable_disable(struct opal_dev *dev) +static int set_mbr_enable_disable(struct opal_dev *dev, void *data) { - u8 mbr_en_dis = *(u8 *)dev->func_data[dev->state]; + u8 *mbr_en_dis = data; int err = 0; clear_opal_cmd(dev); @@ -1517,7 +1509,7 @@ static int set_mbr_enable_disable(struct opal_dev *dev) add_token_u8(&err, dev, OPAL_STARTLIST); add_token_u8(&err, dev, OPAL_STARTNAME); add_token_u8(&err, dev, 1); - add_token_u8(&err, dev, mbr_en_dis); + add_token_u8(&err, dev, *mbr_en_dis); add_token_u8(&err, dev, OPAL_ENDNAME); add_token_u8(&err, dev, OPAL_ENDLIST); add_token_u8(&err, dev, OPAL_ENDNAME); @@ -1558,11 +1550,10 @@ static int generic_pw_cmd(u8 *key, size_t key_len, u8 *cpin_uid, return err; } -static int set_new_pw(struct opal_dev *dev) +static int set_new_pw(struct opal_dev *dev, void *data) { u8 cpin_uid[OPAL_UID_LENGTH]; - struct opal_session_info *usr = dev->func_data[dev->state]; - + struct opal_session_info *usr = data; memcpy(cpin_uid, opaluid[OPAL_C_PIN_ADMIN1], OPAL_UID_LENGTH); @@ -1583,10 +1574,10 @@ static int set_new_pw(struct opal_dev *dev) return finalize_and_send(dev, parse_and_check_status); } -static int set_sid_cpin_pin(struct opal_dev *dev) +static int set_sid_cpin_pin(struct opal_dev *dev, void *data) { u8 cpin_uid[OPAL_UID_LENGTH]; - struct opal_key *key = dev->func_data[dev->state]; + struct opal_key *key = data; memcpy(cpin_uid, opaluid[OPAL_C_PIN_SID], OPAL_UID_LENGTH); @@ -1597,18 +1588,16 @@ static int set_sid_cpin_pin(struct opal_dev *dev) return finalize_and_send(dev, parse_and_check_status); } -static int add_user_to_lr(struct opal_dev *dev) +static int add_user_to_lr(struct opal_dev *dev, void *data) { u8 lr_buffer[OPAL_UID_LENGTH]; u8 user_uid[OPAL_UID_LENGTH]; - struct opal_lock_unlock *lkul; + struct opal_lock_unlock *lkul = data; int err = 0; clear_opal_cmd(dev); set_comid(dev, dev->comid); - lkul = dev->func_data[dev->state]; - memcpy(lr_buffer, opaluid[OPAL_LOCKINGRANGE_ACE_RDLOCKED], OPAL_UID_LENGTH); @@ -1675,19 +1664,16 @@ static int add_user_to_lr(struct opal_dev *dev) return finalize_and_send(dev, parse_and_check_status); } -static int lock_unlock_locking_range(struct opal_dev *dev) +static int lock_unlock_locking_range(struct opal_dev *dev, void *data) { u8 lr_buffer[OPAL_UID_LENGTH]; - const u8 *method; - struct opal_lock_unlock *lkul; + struct opal_lock_unlock *lkul = data; u8 read_locked = 1, write_locked = 1; int err = 0; clear_opal_cmd(dev); set_comid(dev, dev->comid); - method = opalmethod[OPAL_SET]; - lkul = dev->func_data[dev->state]; if (build_locking_range(lr_buffer, sizeof(lr_buffer), lkul->session.opal_key.lr) < 0) return -ERANGE; @@ -1739,19 +1725,16 @@ static int lock_unlock_locking_range(struct opal_dev *dev) } -static int lock_unlock_locking_range_sum(struct opal_dev *dev) +static int lock_unlock_locking_range_sum(struct opal_dev *dev, void *data) { u8 lr_buffer[OPAL_UID_LENGTH]; u8 read_locked = 1, write_locked = 1; - const u8 *method; - struct opal_lock_unlock *lkul; + struct opal_lock_unlock *lkul = data; int ret; clear_opal_cmd(dev); set_comid(dev, dev->comid); - method = opalmethod[OPAL_SET]; - lkul = dev->func_data[dev->state]; if (build_locking_range(lr_buffer, sizeof(lr_buffer), lkul->session.opal_key.lr) < 0) return -ERANGE; @@ -1782,9 +1765,9 @@ static int lock_unlock_locking_range_sum(struct opal_dev *dev) return finalize_and_send(dev, parse_and_check_status); } -static int activate_lsp(struct opal_dev *dev) +static int activate_lsp(struct opal_dev *dev, void *data) { - struct opal_lr_act *opal_act; + struct opal_lr_act *opal_act = data; u8 user_lr[OPAL_UID_LENGTH]; u8 uint_3 = 0x83; int err = 0, i; @@ -1792,8 +1775,6 @@ static int activate_lsp(struct opal_dev *dev) clear_opal_cmd(dev); set_comid(dev, dev->comid); - opal_act = dev->func_data[dev->state]; - add_token_u8(&err, dev, OPAL_CALL); add_token_bytestring(&err, dev, opaluid[OPAL_LOCKINGSP_UID], OPAL_UID_LENGTH); @@ -1858,7 +1839,7 @@ static int get_lsp_lifecycle_cont(struct opal_dev *dev) } /* Determine if we're in the Manufactured Inactive or Active state */ -static int get_lsp_lifecycle(struct opal_dev *dev) +static int get_lsp_lifecycle(struct opal_dev *dev, void *data) { int err = 0; @@ -1919,14 +1900,13 @@ static int get_msid_cpin_pin_cont(struct opal_dev *dev) return 0; } -static int get_msid_cpin_pin(struct opal_dev *dev) +static int get_msid_cpin_pin(struct opal_dev *dev, void *data) { int err = 0; clear_opal_cmd(dev); set_comid(dev, dev->comid); - add_token_u8(&err, dev, OPAL_CALL); add_token_bytestring(&err, dev, opaluid[OPAL_C_PIN_MSID], OPAL_UID_LENGTH); @@ -1956,64 +1936,76 @@ static int get_msid_cpin_pin(struct opal_dev *dev) return finalize_and_send(dev, get_msid_cpin_pin_cont); } -static int build_end_opal_session(struct opal_dev *dev) +static int end_opal_session(struct opal_dev *dev, void *data) { int err = 0; clear_opal_cmd(dev); - set_comid(dev, dev->comid); add_token_u8(&err, dev, OPAL_ENDOFSESSION); - return err; -} -static int end_opal_session(struct opal_dev *dev) -{ - int ret = build_end_opal_session(dev); - - if (ret < 0) - return ret; + if (err < 0) + return err; return finalize_and_send(dev, end_session_cont); } static int end_opal_session_error(struct opal_dev *dev) { - const opal_step error_end_session[] = { - end_opal_session, - NULL, + const struct opal_step error_end_session[] = { + { end_opal_session, }, + { NULL, } }; - dev->funcs = error_end_session; - dev->state = 0; + dev->steps = error_end_session; return next(dev); } static inline void setup_opal_dev(struct opal_dev *dev, - const opal_step *funcs) + const struct opal_step *steps) { - dev->state = 0; - dev->funcs = funcs; + dev->steps = steps; dev->tsn = 0; dev->hsn = 0; - dev->func_data = NULL; dev->prev_data = NULL; } static int check_opal_support(struct opal_dev *dev) { - static const opal_step funcs[] = { - opal_discovery0, - NULL + const struct opal_step steps[] = { + { opal_discovery0, }, + { NULL, } }; int ret; mutex_lock(&dev->dev_lock); - setup_opal_dev(dev, funcs); + setup_opal_dev(dev, steps); ret = next(dev); dev->supported = !ret; mutex_unlock(&dev->dev_lock); return ret; } +static void clean_opal_dev(struct opal_dev *dev) +{ + + struct opal_suspend_data *suspend, *next; + + mutex_lock(&dev->dev_lock); + list_for_each_entry_safe(suspend, next, &dev->unlk_lst, node) { + list_del(&suspend->node); + kfree(suspend); + } + mutex_unlock(&dev->dev_lock); +} + +void free_opal_dev(struct opal_dev *dev) +{ + if (!dev) + return; + clean_opal_dev(dev); + kfree(dev); +} +EXPORT_SYMBOL(free_opal_dev); + struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv) { struct opal_dev *dev; @@ -2038,24 +2030,18 @@ EXPORT_SYMBOL(init_opal_dev); static int opal_secure_erase_locking_range(struct opal_dev *dev, struct opal_session_info *opal_session) { - void *data[3] = { NULL }; - static const opal_step erase_funcs[] = { - opal_discovery0, - start_auth_opal_session, - get_active_key, - gen_key, - end_opal_session, - NULL, + const struct opal_step erase_steps[] = { + { opal_discovery0, }, + { start_auth_opal_session, opal_session }, + { get_active_key, &opal_session->opal_key.lr }, + { gen_key, }, + { end_opal_session, }, + { NULL, } }; int ret; mutex_lock(&dev->dev_lock); - setup_opal_dev(dev, erase_funcs); - - dev->func_data = data; - dev->func_data[1] = opal_session; - dev->func_data[2] = &opal_session->opal_key.lr; - + setup_opal_dev(dev, erase_steps); ret = next(dev); mutex_unlock(&dev->dev_lock); return ret; @@ -2064,23 +2050,17 @@ static int opal_secure_erase_locking_range(struct opal_dev *dev, static int opal_erase_locking_range(struct opal_dev *dev, struct opal_session_info *opal_session) { - void *data[3] = { NULL }; - static const opal_step erase_funcs[] = { - opal_discovery0, - start_auth_opal_session, - erase_locking_range, - end_opal_session, - NULL, + const struct opal_step erase_steps[] = { + { opal_discovery0, }, + { start_auth_opal_session, opal_session }, + { erase_locking_range, opal_session }, + { end_opal_session, }, + { NULL, } }; int ret; mutex_lock(&dev->dev_lock); - setup_opal_dev(dev, erase_funcs); - - dev->func_data = data; - dev->func_data[1] = opal_session; - dev->func_data[2] = opal_session; - + setup_opal_dev(dev, erase_steps); ret = next(dev); mutex_unlock(&dev->dev_lock); return ret; @@ -2089,16 +2069,15 @@ static int opal_erase_locking_range(struct opal_dev *dev, static int opal_enable_disable_shadow_mbr(struct opal_dev *dev, struct opal_mbr_data *opal_mbr) { - void *func_data[6] = { NULL }; - static const opal_step mbr_funcs[] = { - opal_discovery0, - start_admin1LSP_opal_session, - set_mbr_done, - end_opal_session, - start_admin1LSP_opal_session, - set_mbr_enable_disable, - end_opal_session, - NULL, + const struct opal_step mbr_steps[] = { + { opal_discovery0, }, + { start_admin1LSP_opal_session, &opal_mbr->key }, + { set_mbr_done, &opal_mbr->enable_disable }, + { end_opal_session, }, + { start_admin1LSP_opal_session, &opal_mbr->key }, + { set_mbr_enable_disable, &opal_mbr->enable_disable }, + { end_opal_session, }, + { NULL, } }; int ret; @@ -2107,12 +2086,7 @@ static int opal_enable_disable_shadow_mbr(struct opal_dev *dev, return -EINVAL; mutex_lock(&dev->dev_lock); - setup_opal_dev(dev, mbr_funcs); - dev->func_data = func_data; - dev->func_data[1] = &opal_mbr->key; - dev->func_data[2] = &opal_mbr->enable_disable; - dev->func_data[4] = &opal_mbr->key; - dev->func_data[5] = &opal_mbr->enable_disable; + setup_opal_dev(dev, mbr_steps); ret = next(dev); mutex_unlock(&dev->dev_lock); return ret; @@ -2139,13 +2113,12 @@ static int opal_save(struct opal_dev *dev, struct opal_lock_unlock *lk_unlk) static int opal_add_user_to_lr(struct opal_dev *dev, struct opal_lock_unlock *lk_unlk) { - void *func_data[3] = { NULL }; - static const opal_step funcs[] = { - opal_discovery0, - start_admin1LSP_opal_session, - add_user_to_lr, - end_opal_session, - NULL + const struct opal_step steps[] = { + { opal_discovery0, }, + { start_admin1LSP_opal_session, &lk_unlk->session.opal_key }, + { add_user_to_lr, lk_unlk }, + { end_opal_session, }, + { NULL, } }; int ret; @@ -2154,7 +2127,7 @@ static int opal_add_user_to_lr(struct opal_dev *dev, pr_err("Locking state was not RO or RW\n"); return -EINVAL; } - if (lk_unlk->session.who < OPAL_USER1 && + if (lk_unlk->session.who < OPAL_USER1 || lk_unlk->session.who > OPAL_USER9) { pr_err("Authority was not within the range of users: %d\n", lk_unlk->session.who); @@ -2167,10 +2140,7 @@ static int opal_add_user_to_lr(struct opal_dev *dev, } mutex_lock(&dev->dev_lock); - setup_opal_dev(dev, funcs); - dev->func_data = func_data; - dev->func_data[1] = &lk_unlk->session.opal_key; - dev->func_data[2] = lk_unlk; + setup_opal_dev(dev, steps); ret = next(dev); mutex_unlock(&dev->dev_lock); return ret; @@ -2178,55 +2148,54 @@ static int opal_add_user_to_lr(struct opal_dev *dev, static int opal_reverttper(struct opal_dev *dev, struct opal_key *opal) { - void *data[2] = { NULL }; - static const opal_step revert_funcs[] = { - opal_discovery0, - start_SIDASP_opal_session, - revert_tper, /* controller will terminate session */ - NULL, + const struct opal_step revert_steps[] = { + { opal_discovery0, }, + { start_SIDASP_opal_session, opal }, + { revert_tper, }, /* controller will terminate session */ + { NULL, } }; int ret; mutex_lock(&dev->dev_lock); - setup_opal_dev(dev, revert_funcs); - dev->func_data = data; - dev->func_data[1] = opal; + setup_opal_dev(dev, revert_steps); ret = next(dev); mutex_unlock(&dev->dev_lock); - return ret; -} -static int __opal_lock_unlock_sum(struct opal_dev *dev) -{ - static const opal_step ulk_funcs_sum[] = { - opal_discovery0, - start_auth_opal_session, - lock_unlock_locking_range_sum, - end_opal_session, - NULL - }; + /* + * If we successfully reverted lets clean + * any saved locking ranges. + */ + if (!ret) + clean_opal_dev(dev); - dev->funcs = ulk_funcs_sum; - return next(dev); + return ret; } -static int __opal_lock_unlock(struct opal_dev *dev) +static int __opal_lock_unlock(struct opal_dev *dev, + struct opal_lock_unlock *lk_unlk) { - static const opal_step _unlock_funcs[] = { - opal_discovery0, - start_auth_opal_session, - lock_unlock_locking_range, - end_opal_session, - NULL + const struct opal_step unlock_steps[] = { + { opal_discovery0, }, + { start_auth_opal_session, &lk_unlk->session }, + { lock_unlock_locking_range, lk_unlk }, + { end_opal_session, }, + { NULL, } + }; + const struct opal_step unlock_sum_steps[] = { + { opal_discovery0, }, + { start_auth_opal_session, &lk_unlk->session }, + { lock_unlock_locking_range_sum, lk_unlk }, + { end_opal_session, }, + { NULL, } }; - dev->funcs = _unlock_funcs; + dev->steps = lk_unlk->session.sum ? unlock_sum_steps : unlock_steps; return next(dev); } -static int opal_lock_unlock(struct opal_dev *dev, struct opal_lock_unlock *lk_unlk) +static int opal_lock_unlock(struct opal_dev *dev, + struct opal_lock_unlock *lk_unlk) { - void *func_data[3] = { NULL }; int ret; if (lk_unlk->session.who < OPAL_ADMIN1 || @@ -2234,43 +2203,30 @@ static int opal_lock_unlock(struct opal_dev *dev, struct opal_lock_unlock *lk_un return -EINVAL; mutex_lock(&dev->dev_lock); - setup_opal_dev(dev, NULL); - dev->func_data = func_data; - dev->func_data[1] = &lk_unlk->session; - dev->func_data[2] = lk_unlk; - - if (lk_unlk->session.sum) - ret = __opal_lock_unlock_sum(dev); - else - ret = __opal_lock_unlock(dev); - + ret = __opal_lock_unlock(dev, lk_unlk); mutex_unlock(&dev->dev_lock); return ret; } static int opal_take_ownership(struct opal_dev *dev, struct opal_key *opal) { - static const opal_step owner_funcs[] = { - opal_discovery0, - start_anybodyASP_opal_session, - get_msid_cpin_pin, - end_opal_session, - start_SIDASP_opal_session, - set_sid_cpin_pin, - end_opal_session, - NULL + const struct opal_step owner_steps[] = { + { opal_discovery0, }, + { start_anybodyASP_opal_session, }, + { get_msid_cpin_pin, }, + { end_opal_session, }, + { start_SIDASP_opal_session, opal }, + { set_sid_cpin_pin, opal }, + { end_opal_session, }, + { NULL, } }; - void *data[6] = { NULL }; int ret; if (!dev) return -ENODEV; mutex_lock(&dev->dev_lock); - setup_opal_dev(dev, owner_funcs); - dev->func_data = data; - dev->func_data[4] = opal; - dev->func_data[5] = opal; + setup_opal_dev(dev, owner_steps); ret = next(dev); mutex_unlock(&dev->dev_lock); return ret; @@ -2278,14 +2234,13 @@ static int opal_take_ownership(struct opal_dev *dev, struct opal_key *opal) static int opal_activate_lsp(struct opal_dev *dev, struct opal_lr_act *opal_lr_act) { - void *data[4] = { NULL }; - static const opal_step active_funcs[] = { - opal_discovery0, - start_SIDASP_opal_session, /* Open session as SID auth */ - get_lsp_lifecycle, - activate_lsp, - end_opal_session, - NULL + const struct opal_step active_steps[] = { + { opal_discovery0, }, + { start_SIDASP_opal_session, &opal_lr_act->key }, + { get_lsp_lifecycle, }, + { activate_lsp, opal_lr_act }, + { end_opal_session, }, + { NULL, } }; int ret; @@ -2293,10 +2248,7 @@ static int opal_activate_lsp(struct opal_dev *dev, struct opal_lr_act *opal_lr_a return -EINVAL; mutex_lock(&dev->dev_lock); - setup_opal_dev(dev, active_funcs); - dev->func_data = data; - dev->func_data[1] = &opal_lr_act->key; - dev->func_data[3] = opal_lr_act; + setup_opal_dev(dev, active_steps); ret = next(dev); mutex_unlock(&dev->dev_lock); return ret; @@ -2305,21 +2257,17 @@ static int opal_activate_lsp(struct opal_dev *dev, struct opal_lr_act *opal_lr_a static int opal_setup_locking_range(struct opal_dev *dev, struct opal_user_lr_setup *opal_lrs) { - void *data[3] = { NULL }; - static const opal_step lr_funcs[] = { - opal_discovery0, - start_auth_opal_session, - setup_locking_range, - end_opal_session, - NULL, + const struct opal_step lr_steps[] = { + { opal_discovery0, }, + { start_auth_opal_session, &opal_lrs->session }, + { setup_locking_range, opal_lrs }, + { end_opal_session, }, + { NULL, } }; int ret; mutex_lock(&dev->dev_lock); - setup_opal_dev(dev, lr_funcs); - dev->func_data = data; - dev->func_data[1] = &opal_lrs->session; - dev->func_data[2] = opal_lrs; + setup_opal_dev(dev, lr_steps); ret = next(dev); mutex_unlock(&dev->dev_lock); return ret; @@ -2327,14 +2275,13 @@ static int opal_setup_locking_range(struct opal_dev *dev, static int opal_set_new_pw(struct opal_dev *dev, struct opal_new_pw *opal_pw) { - static const opal_step pw_funcs[] = { - opal_discovery0, - start_auth_opal_session, - set_new_pw, - end_opal_session, - NULL + const struct opal_step pw_steps[] = { + { opal_discovery0, }, + { start_auth_opal_session, &opal_pw->session }, + { set_new_pw, &opal_pw->new_user_pw }, + { end_opal_session, }, + { NULL } }; - void *data[3] = { NULL }; int ret; if (opal_pw->session.who < OPAL_ADMIN1 || @@ -2344,11 +2291,7 @@ static int opal_set_new_pw(struct opal_dev *dev, struct opal_new_pw *opal_pw) return -EINVAL; mutex_lock(&dev->dev_lock); - setup_opal_dev(dev, pw_funcs); - dev->func_data = data; - dev->func_data[1] = (void *) &opal_pw->session; - dev->func_data[2] = (void *) &opal_pw->new_user_pw; - + setup_opal_dev(dev, pw_steps); ret = next(dev); mutex_unlock(&dev->dev_lock); return ret; @@ -2357,28 +2300,24 @@ static int opal_set_new_pw(struct opal_dev *dev, struct opal_new_pw *opal_pw) static int opal_activate_user(struct opal_dev *dev, struct opal_session_info *opal_session) { - static const opal_step act_funcs[] = { - opal_discovery0, - start_admin1LSP_opal_session, - internal_activate_user, - end_opal_session, - NULL + const struct opal_step act_steps[] = { + { opal_discovery0, }, + { start_admin1LSP_opal_session, &opal_session->opal_key }, + { internal_activate_user, opal_session }, + { end_opal_session, }, + { NULL, } }; - void *data[3] = { NULL }; int ret; /* We can't activate Admin1 it's active as manufactured */ - if (opal_session->who < OPAL_USER1 && + if (opal_session->who < OPAL_USER1 || opal_session->who > OPAL_USER9) { pr_err("Who was not a valid user: %d\n", opal_session->who); return -EINVAL; } mutex_lock(&dev->dev_lock); - setup_opal_dev(dev, act_funcs); - dev->func_data = data; - dev->func_data[1] = &opal_session->opal_key; - dev->func_data[2] = opal_session; + setup_opal_dev(dev, act_steps); ret = next(dev); mutex_unlock(&dev->dev_lock); return ret; @@ -2387,7 +2326,6 @@ static int opal_activate_user(struct opal_dev *dev, bool opal_unlock_from_suspend(struct opal_dev *dev) { struct opal_suspend_data *suspend; - void *func_data[3] = { NULL }; bool was_failure = false; int ret = 0; @@ -2398,19 +2336,12 @@ bool opal_unlock_from_suspend(struct opal_dev *dev) mutex_lock(&dev->dev_lock); setup_opal_dev(dev, NULL); - dev->func_data = func_data; list_for_each_entry(suspend, &dev->unlk_lst, node) { - dev->state = 0; - dev->func_data[1] = &suspend->unlk.session; - dev->func_data[2] = &suspend->unlk; dev->tsn = 0; dev->hsn = 0; - if (suspend->unlk.session.sum) - ret = __opal_lock_unlock_sum(dev); - else - ret = __opal_lock_unlock(dev); + ret = __opal_lock_unlock(dev, &suspend->unlk); if (ret) { pr_warn("Failed to unlock LR %hhu with sum %d\n", suspend->unlk.session.opal_key.lr, @@ -2437,7 +2368,7 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg) return -ENOTSUPP; } - p = memdup_user(arg, _IOC_SIZE(cmd)); + p = memdup_user(arg, _IOC_SIZE(cmd)); if (IS_ERR(p)) return PTR_ERR(p); |