diff options
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/brd.c | 67 | ||||
-rw-r--r-- | drivers/block/loop.c | 14 | ||||
-rw-r--r-- | drivers/block/rbd.c | 7 | ||||
-rw-r--r-- | drivers/block/ublk_drv.c | 100 | ||||
-rw-r--r-- | drivers/block/virtio_blk.c | 4 | ||||
-rw-r--r-- | drivers/block/zram/zram_drv.c | 15 |
6 files changed, 130 insertions, 77 deletions
diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 20acc4a1fd6d..a8a77a1efe1e 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -78,32 +78,25 @@ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector) } /* - * Look up and return a brd's page for a given sector. - * If one does not exist, allocate an empty page, and insert that. Then - * return it. + * Insert a new page for a given sector, if one does not already exist. */ -static struct page *brd_insert_page(struct brd_device *brd, sector_t sector) +static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp) { pgoff_t idx; struct page *page; - gfp_t gfp_flags; + int ret = 0; page = brd_lookup_page(brd, sector); if (page) - return page; + return 0; - /* - * Must use NOIO because we don't want to recurse back into the - * block or filesystem layers from page reclaim. - */ - gfp_flags = GFP_NOIO | __GFP_ZERO | __GFP_HIGHMEM; - page = alloc_page(gfp_flags); + page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM); if (!page) - return NULL; + return -ENOMEM; - if (radix_tree_preload(GFP_NOIO)) { + if (radix_tree_maybe_preload(gfp)) { __free_page(page); - return NULL; + return -ENOMEM; } spin_lock(&brd->brd_lock); @@ -112,16 +105,17 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector) if (radix_tree_insert(&brd->brd_pages, idx, page)) { __free_page(page); page = radix_tree_lookup(&brd->brd_pages, idx); - BUG_ON(!page); - BUG_ON(page->index != idx); + if (!page) + ret = -ENOMEM; + else if (page->index != idx) + ret = -EIO; } else { brd->brd_nr_pages++; } spin_unlock(&brd->brd_lock); radix_tree_preload_end(); - - return page; + return ret; } /* @@ -170,20 +164,22 @@ static void brd_free_pages(struct brd_device *brd) /* * copy_to_brd_setup must be called before copy_to_brd. It may sleep. */ -static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n) +static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n, + gfp_t gfp) { unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; size_t copy; + int ret; copy = min_t(size_t, n, PAGE_SIZE - offset); - if (!brd_insert_page(brd, sector)) - return -ENOSPC; + ret = brd_insert_page(brd, sector, gfp); + if (ret) + return ret; if (copy < n) { sector += copy >> SECTOR_SHIFT; - if (!brd_insert_page(brd, sector)) - return -ENOSPC; + ret = brd_insert_page(brd, sector, gfp); } - return 0; + return ret; } /* @@ -256,20 +252,26 @@ static void copy_from_brd(void *dst, struct brd_device *brd, * Process a single bvec of a bio. */ static int brd_do_bvec(struct brd_device *brd, struct page *page, - unsigned int len, unsigned int off, enum req_op op, + unsigned int len, unsigned int off, blk_opf_t opf, sector_t sector) { void *mem; int err = 0; - if (op_is_write(op)) { - err = copy_to_brd_setup(brd, sector, len); + if (op_is_write(opf)) { + /* + * Must use NOIO because we don't want to recurse back into the + * block or filesystem layers from page reclaim. + */ + gfp_t gfp = opf & REQ_NOWAIT ? GFP_NOWAIT : GFP_NOIO; + + err = copy_to_brd_setup(brd, sector, len, gfp); if (err) goto out; } mem = kmap_atomic(page); - if (!op_is_write(op)) { + if (!op_is_write(opf)) { copy_from_brd(mem + off, brd, sector, len); flush_dcache_page(page); } else { @@ -298,8 +300,12 @@ static void brd_submit_bio(struct bio *bio) (len & (SECTOR_SIZE - 1))); err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset, - bio_op(bio), sector); + bio->bi_opf, sector); if (err) { + if (err == -ENOMEM && bio->bi_opf & REQ_NOWAIT) { + bio_wouldblock_error(bio); + return; + } bio_io_error(bio); return; } @@ -412,6 +418,7 @@ static int brd_alloc(int i) /* Tell the block layer that this is not a rotational device */ blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); + blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue); err = add_disk(disk); if (err) goto out_cleanup_disk; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 1518a6423279..5f04235e4ff7 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -90,7 +90,7 @@ struct loop_cmd { }; #define LOOP_IDLE_WORKER_TIMEOUT (60 * HZ) -#define LOOP_DEFAULT_HW_Q_DEPTH (128) +#define LOOP_DEFAULT_HW_Q_DEPTH 128 static DEFINE_IDR(loop_index_idr); static DEFINE_MUTEX(loop_ctl_mutex); @@ -1792,9 +1792,15 @@ static int hw_queue_depth = LOOP_DEFAULT_HW_Q_DEPTH; static int loop_set_hw_queue_depth(const char *s, const struct kernel_param *p) { - int ret = kstrtoint(s, 10, &hw_queue_depth); + int qd, ret; - return (ret || (hw_queue_depth < 1)) ? -EINVAL : 0; + ret = kstrtoint(s, 0, &qd); + if (ret < 0) + return ret; + if (qd < 1) + return -EINVAL; + hw_queue_depth = qd; + return 0; } static const struct kernel_param_ops loop_hw_qdepth_param_ops = { @@ -1803,7 +1809,7 @@ static const struct kernel_param_ops loop_hw_qdepth_param_ops = { }; device_param_cb(hw_queue_depth, &loop_hw_qdepth_param_ops, &hw_queue_depth, 0444); -MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 128"); +MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: " __stringify(LOOP_DEFAULT_HW_Q_DEPTH)); MODULE_LICENSE("GPL"); MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR); diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 04453f4a319c..1faca7e07a4d 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3068,13 +3068,12 @@ static int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap) for (i = 0; i < obj_req->copyup_bvec_count; i++) { unsigned int len = min(obj_overlap, (u64)PAGE_SIZE); + struct page *page = alloc_page(GFP_NOIO); - obj_req->copyup_bvecs[i].bv_page = alloc_page(GFP_NOIO); - if (!obj_req->copyup_bvecs[i].bv_page) + if (!page) return -ENOMEM; - obj_req->copyup_bvecs[i].bv_offset = 0; - obj_req->copyup_bvecs[i].bv_len = len; + bvec_set_page(&obj_req->copyup_bvecs[i], page, len, 0); obj_overlap -= len; } diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index a725a236a38f..b9c759cef00e 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -140,7 +140,7 @@ struct ublk_device { char *__queues; - unsigned short queue_size; + unsigned int queue_size; struct ublksrv_ctrl_dev_info dev_info; struct blk_mq_tag_set tag_set; @@ -150,6 +150,7 @@ struct ublk_device { #define UB_STATE_OPEN 0 #define UB_STATE_USED 1 +#define UB_STATE_DELETED 2 unsigned long state; int ub_number; @@ -322,7 +323,7 @@ static inline struct ublk_queue *ublk_get_queue(struct ublk_device *dev, static inline bool ublk_rq_has_data(const struct request *rq) { - return rq->bio && bio_has_data(rq->bio); + return bio_has_data(rq->bio); } static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq, @@ -377,8 +378,50 @@ static void ublk_free_disk(struct gendisk *disk) put_device(&ub->cdev_dev); } +static void ublk_store_owner_uid_gid(unsigned int *owner_uid, + unsigned int *owner_gid) +{ + kuid_t uid; + kgid_t gid; + + current_uid_gid(&uid, &gid); + + *owner_uid = from_kuid(&init_user_ns, uid); + *owner_gid = from_kgid(&init_user_ns, gid); +} + +static int ublk_open(struct block_device *bdev, fmode_t mode) +{ + struct ublk_device *ub = bdev->bd_disk->private_data; + + if (capable(CAP_SYS_ADMIN)) + return 0; + + /* + * If it is one unprivileged device, only owner can open + * the disk. Otherwise it could be one trap made by one + * evil user who grants this disk's privileges to other + * users deliberately. + * + * This way is reasonable too given anyone can create + * unprivileged device, and no need other's grant. + */ + if (ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV) { + unsigned int curr_uid, curr_gid; + + ublk_store_owner_uid_gid(&curr_uid, &curr_gid); + + if (curr_uid != ub->dev_info.owner_uid || curr_gid != + ub->dev_info.owner_gid) + return -EPERM; + } + + return 0; +} + static const struct block_device_operations ub_fops = { .owner = THIS_MODULE, + .open = ublk_open, .free_disk = ublk_free_disk, }; @@ -623,7 +666,7 @@ static void ublk_complete_rq(struct request *req) } /* - * FLUSH or DISCARD usually won't return bytes returned, so end them + * FLUSH, DISCARD or WRITE_ZEROES usually won't return bytes returned, so end them * directly. * * Both the two needn't unmap. @@ -1222,6 +1265,7 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) u32 cmd_op = cmd->cmd_op; unsigned tag = ub_cmd->tag; int ret = -EINVAL; + struct request *req; pr_devel("%s: received: cmd op %d queue %d tag %d result %d\n", __func__, cmd->cmd_op, ub_cmd->q_id, tag, @@ -1272,8 +1316,8 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) */ if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV) goto out; - /* FETCH_RQ has to provide IO buffer */ - if (!ub_cmd->addr) + /* FETCH_RQ has to provide IO buffer if NEED GET DATA is not enabled */ + if (!ub_cmd->addr && !ublk_need_get_data(ubq)) goto out; io->cmd = cmd; io->flags |= UBLK_IO_FLAG_ACTIVE; @@ -1282,8 +1326,12 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) ublk_mark_io_ready(ub, ubq); break; case UBLK_IO_COMMIT_AND_FETCH_REQ: - /* FETCH_RQ has to provide IO buffer */ - if (!ub_cmd->addr) + req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag); + /* + * COMMIT_AND_FETCH_REQ has to provide IO buffer if NEED GET DATA is + * not enabled or it is Read IO. + */ + if (!ub_cmd->addr && (!ublk_need_get_data(ubq) || req_op(req) == REQ_OP_READ)) goto out; if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) goto out; @@ -1536,7 +1584,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) goto out_unlock; } - disk = blk_mq_alloc_disk(&ub->tag_set, ub); + disk = blk_mq_alloc_disk(&ub->tag_set, NULL); if (IS_ERR(disk)) { ret = PTR_ERR(disk); goto out_unlock; @@ -1620,17 +1668,6 @@ out_free_cpumask: return ret; } -static void ublk_store_owner_uid_gid(struct ublksrv_ctrl_dev_info *info) -{ - kuid_t uid; - kgid_t gid; - - current_uid_gid(&uid, &gid); - - info->owner_uid = from_kuid(&init_user_ns, uid); - info->owner_gid = from_kgid(&init_user_ns, gid); -} - static inline void ublk_dump_dev_info(struct ublksrv_ctrl_dev_info *info) { pr_devel("%s: dev id %d flags %llx\n", __func__, @@ -1664,7 +1701,7 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) return -EPERM; /* the created device is always owned by current user */ - ublk_store_owner_uid_gid(&info); + ublk_store_owner_uid_gid(&info.owner_uid, &info.owner_gid); if (header->dev_id != info.dev_id) { pr_warn("%s: dev id not match %u %u\n", @@ -1773,20 +1810,33 @@ static int ublk_ctrl_del_dev(struct ublk_device **p_ub) if (ret) return ret; - ublk_remove(ub); + if (!test_bit(UB_STATE_DELETED, &ub->state)) { + ublk_remove(ub); + set_bit(UB_STATE_DELETED, &ub->state); + } /* Mark the reference as consumed */ *p_ub = NULL; ublk_put_device(ub); + mutex_unlock(&ublk_ctl_mutex); /* * Wait until the idr is removed, then it can be reused after * DEL_DEV command is returned. + * + * If we returns because of user interrupt, future delete command + * may come: + * + * - the device number isn't freed, this device won't or needn't + * be deleted again, since UB_STATE_DELETED is set, and device + * will be released after the last reference is dropped + * + * - the device number is freed already, we will not find this + * device via ublk_get_device_from_id() */ - wait_event(ublk_idr_wq, ublk_idr_freed(idx)); - mutex_unlock(&ublk_ctl_mutex); + wait_event_interruptible(ublk_idr_wq, ublk_idr_freed(idx)); - return ret; + return 0; } static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd) @@ -2029,7 +2079,7 @@ static int ublk_char_dev_permission(struct ublk_device *ub, if (stat.rdev != ub->cdev_dev.devt || !S_ISCHR(stat.mode)) goto exit; - err = inode_permission(&init_user_ns, + err = inode_permission(&nop_mnt_idmap, d_backing_inode(path.dentry), mask); exit: path_put(&path); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 6a77fa917428..dc6e9b989910 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -170,9 +170,7 @@ static int virtblk_setup_discard_write_zeroes_erase(struct request *req, bool un WARN_ON_ONCE(n != segments); - req->special_vec.bv_page = virt_to_page(range); - req->special_vec.bv_offset = offset_in_page(range); - req->special_vec.bv_len = sizeof(*range) * segments; + bvec_set_virt(&req->special_vec, range, sizeof(*range) * segments); req->rq_flags |= RQF_SPECIAL_PAYLOAD; return 0; diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index e290d6d97047..bd8ae4822dc3 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -703,9 +703,7 @@ static ssize_t writeback_store(struct device *dev, for (; nr_pages != 0; index++, nr_pages--) { struct bio_vec bvec; - bvec.bv_page = page; - bvec.bv_len = PAGE_SIZE; - bvec.bv_offset = 0; + bvec_set_page(&bvec, page, PAGE_SIZE, 0); spin_lock(&zram->wb_limit_lock); if (zram->wb_limit_enable && !zram->bd_wb_limit) { @@ -1380,12 +1378,9 @@ out: static int zram_bvec_read_from_bdev(struct zram *zram, struct page *page, u32 index, struct bio *bio, bool partial_io) { - struct bio_vec bvec = { - .bv_page = page, - .bv_len = PAGE_SIZE, - .bv_offset = 0, - }; + struct bio_vec bvec; + bvec_set_page(&bvec, page, PAGE_SIZE, 0); return read_from_bdev(zram, &bvec, zram_get_element(zram, index), bio, partial_io); } @@ -1652,9 +1647,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, memcpy_from_bvec(dst + offset, bvec); kunmap_atomic(dst); - vec.bv_page = page; - vec.bv_len = PAGE_SIZE; - vec.bv_offset = 0; + bvec_set_page(&vec, page, PAGE_SIZE, 0); } ret = __zram_bvec_write(zram, &vec, index, bio); |