summaryrefslogtreecommitdiff
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/brd.c67
-rw-r--r--drivers/block/loop.c14
-rw-r--r--drivers/block/rbd.c7
-rw-r--r--drivers/block/ublk_drv.c100
-rw-r--r--drivers/block/virtio_blk.c4
-rw-r--r--drivers/block/zram/zram_drv.c15
6 files changed, 130 insertions, 77 deletions
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 20acc4a1fd6d..a8a77a1efe1e 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -78,32 +78,25 @@ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
}
/*
- * Look up and return a brd's page for a given sector.
- * If one does not exist, allocate an empty page, and insert that. Then
- * return it.
+ * Insert a new page for a given sector, if one does not already exist.
*/
-static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
+static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp)
{
pgoff_t idx;
struct page *page;
- gfp_t gfp_flags;
+ int ret = 0;
page = brd_lookup_page(brd, sector);
if (page)
- return page;
+ return 0;
- /*
- * Must use NOIO because we don't want to recurse back into the
- * block or filesystem layers from page reclaim.
- */
- gfp_flags = GFP_NOIO | __GFP_ZERO | __GFP_HIGHMEM;
- page = alloc_page(gfp_flags);
+ page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
if (!page)
- return NULL;
+ return -ENOMEM;
- if (radix_tree_preload(GFP_NOIO)) {
+ if (radix_tree_maybe_preload(gfp)) {
__free_page(page);
- return NULL;
+ return -ENOMEM;
}
spin_lock(&brd->brd_lock);
@@ -112,16 +105,17 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
if (radix_tree_insert(&brd->brd_pages, idx, page)) {
__free_page(page);
page = radix_tree_lookup(&brd->brd_pages, idx);
- BUG_ON(!page);
- BUG_ON(page->index != idx);
+ if (!page)
+ ret = -ENOMEM;
+ else if (page->index != idx)
+ ret = -EIO;
} else {
brd->brd_nr_pages++;
}
spin_unlock(&brd->brd_lock);
radix_tree_preload_end();
-
- return page;
+ return ret;
}
/*
@@ -170,20 +164,22 @@ static void brd_free_pages(struct brd_device *brd)
/*
* copy_to_brd_setup must be called before copy_to_brd. It may sleep.
*/
-static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n)
+static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n,
+ gfp_t gfp)
{
unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
size_t copy;
+ int ret;
copy = min_t(size_t, n, PAGE_SIZE - offset);
- if (!brd_insert_page(brd, sector))
- return -ENOSPC;
+ ret = brd_insert_page(brd, sector, gfp);
+ if (ret)
+ return ret;
if (copy < n) {
sector += copy >> SECTOR_SHIFT;
- if (!brd_insert_page(brd, sector))
- return -ENOSPC;
+ ret = brd_insert_page(brd, sector, gfp);
}
- return 0;
+ return ret;
}
/*
@@ -256,20 +252,26 @@ static void copy_from_brd(void *dst, struct brd_device *brd,
* Process a single bvec of a bio.
*/
static int brd_do_bvec(struct brd_device *brd, struct page *page,
- unsigned int len, unsigned int off, enum req_op op,
+ unsigned int len, unsigned int off, blk_opf_t opf,
sector_t sector)
{
void *mem;
int err = 0;
- if (op_is_write(op)) {
- err = copy_to_brd_setup(brd, sector, len);
+ if (op_is_write(opf)) {
+ /*
+ * Must use NOIO because we don't want to recurse back into the
+ * block or filesystem layers from page reclaim.
+ */
+ gfp_t gfp = opf & REQ_NOWAIT ? GFP_NOWAIT : GFP_NOIO;
+
+ err = copy_to_brd_setup(brd, sector, len, gfp);
if (err)
goto out;
}
mem = kmap_atomic(page);
- if (!op_is_write(op)) {
+ if (!op_is_write(opf)) {
copy_from_brd(mem + off, brd, sector, len);
flush_dcache_page(page);
} else {
@@ -298,8 +300,12 @@ static void brd_submit_bio(struct bio *bio)
(len & (SECTOR_SIZE - 1)));
err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
- bio_op(bio), sector);
+ bio->bi_opf, sector);
if (err) {
+ if (err == -ENOMEM && bio->bi_opf & REQ_NOWAIT) {
+ bio_wouldblock_error(bio);
+ return;
+ }
bio_io_error(bio);
return;
}
@@ -412,6 +418,7 @@ static int brd_alloc(int i)
/* Tell the block layer that this is not a rotational device */
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
+ blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue);
err = add_disk(disk);
if (err)
goto out_cleanup_disk;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 1518a6423279..5f04235e4ff7 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -90,7 +90,7 @@ struct loop_cmd {
};
#define LOOP_IDLE_WORKER_TIMEOUT (60 * HZ)
-#define LOOP_DEFAULT_HW_Q_DEPTH (128)
+#define LOOP_DEFAULT_HW_Q_DEPTH 128
static DEFINE_IDR(loop_index_idr);
static DEFINE_MUTEX(loop_ctl_mutex);
@@ -1792,9 +1792,15 @@ static int hw_queue_depth = LOOP_DEFAULT_HW_Q_DEPTH;
static int loop_set_hw_queue_depth(const char *s, const struct kernel_param *p)
{
- int ret = kstrtoint(s, 10, &hw_queue_depth);
+ int qd, ret;
- return (ret || (hw_queue_depth < 1)) ? -EINVAL : 0;
+ ret = kstrtoint(s, 0, &qd);
+ if (ret < 0)
+ return ret;
+ if (qd < 1)
+ return -EINVAL;
+ hw_queue_depth = qd;
+ return 0;
}
static const struct kernel_param_ops loop_hw_qdepth_param_ops = {
@@ -1803,7 +1809,7 @@ static const struct kernel_param_ops loop_hw_qdepth_param_ops = {
};
device_param_cb(hw_queue_depth, &loop_hw_qdepth_param_ops, &hw_queue_depth, 0444);
-MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 128");
+MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: " __stringify(LOOP_DEFAULT_HW_Q_DEPTH));
MODULE_LICENSE("GPL");
MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 04453f4a319c..1faca7e07a4d 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3068,13 +3068,12 @@ static int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap)
for (i = 0; i < obj_req->copyup_bvec_count; i++) {
unsigned int len = min(obj_overlap, (u64)PAGE_SIZE);
+ struct page *page = alloc_page(GFP_NOIO);
- obj_req->copyup_bvecs[i].bv_page = alloc_page(GFP_NOIO);
- if (!obj_req->copyup_bvecs[i].bv_page)
+ if (!page)
return -ENOMEM;
- obj_req->copyup_bvecs[i].bv_offset = 0;
- obj_req->copyup_bvecs[i].bv_len = len;
+ bvec_set_page(&obj_req->copyup_bvecs[i], page, len, 0);
obj_overlap -= len;
}
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index a725a236a38f..b9c759cef00e 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -140,7 +140,7 @@ struct ublk_device {
char *__queues;
- unsigned short queue_size;
+ unsigned int queue_size;
struct ublksrv_ctrl_dev_info dev_info;
struct blk_mq_tag_set tag_set;
@@ -150,6 +150,7 @@ struct ublk_device {
#define UB_STATE_OPEN 0
#define UB_STATE_USED 1
+#define UB_STATE_DELETED 2
unsigned long state;
int ub_number;
@@ -322,7 +323,7 @@ static inline struct ublk_queue *ublk_get_queue(struct ublk_device *dev,
static inline bool ublk_rq_has_data(const struct request *rq)
{
- return rq->bio && bio_has_data(rq->bio);
+ return bio_has_data(rq->bio);
}
static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq,
@@ -377,8 +378,50 @@ static void ublk_free_disk(struct gendisk *disk)
put_device(&ub->cdev_dev);
}
+static void ublk_store_owner_uid_gid(unsigned int *owner_uid,
+ unsigned int *owner_gid)
+{
+ kuid_t uid;
+ kgid_t gid;
+
+ current_uid_gid(&uid, &gid);
+
+ *owner_uid = from_kuid(&init_user_ns, uid);
+ *owner_gid = from_kgid(&init_user_ns, gid);
+}
+
+static int ublk_open(struct block_device *bdev, fmode_t mode)
+{
+ struct ublk_device *ub = bdev->bd_disk->private_data;
+
+ if (capable(CAP_SYS_ADMIN))
+ return 0;
+
+ /*
+ * If it is one unprivileged device, only owner can open
+ * the disk. Otherwise it could be one trap made by one
+ * evil user who grants this disk's privileges to other
+ * users deliberately.
+ *
+ * This way is reasonable too given anyone can create
+ * unprivileged device, and no need other's grant.
+ */
+ if (ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV) {
+ unsigned int curr_uid, curr_gid;
+
+ ublk_store_owner_uid_gid(&curr_uid, &curr_gid);
+
+ if (curr_uid != ub->dev_info.owner_uid || curr_gid !=
+ ub->dev_info.owner_gid)
+ return -EPERM;
+ }
+
+ return 0;
+}
+
static const struct block_device_operations ub_fops = {
.owner = THIS_MODULE,
+ .open = ublk_open,
.free_disk = ublk_free_disk,
};
@@ -623,7 +666,7 @@ static void ublk_complete_rq(struct request *req)
}
/*
- * FLUSH or DISCARD usually won't return bytes returned, so end them
+ * FLUSH, DISCARD or WRITE_ZEROES usually won't return bytes returned, so end them
* directly.
*
* Both the two needn't unmap.
@@ -1222,6 +1265,7 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
u32 cmd_op = cmd->cmd_op;
unsigned tag = ub_cmd->tag;
int ret = -EINVAL;
+ struct request *req;
pr_devel("%s: received: cmd op %d queue %d tag %d result %d\n",
__func__, cmd->cmd_op, ub_cmd->q_id, tag,
@@ -1272,8 +1316,8 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
*/
if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)
goto out;
- /* FETCH_RQ has to provide IO buffer */
- if (!ub_cmd->addr)
+ /* FETCH_RQ has to provide IO buffer if NEED GET DATA is not enabled */
+ if (!ub_cmd->addr && !ublk_need_get_data(ubq))
goto out;
io->cmd = cmd;
io->flags |= UBLK_IO_FLAG_ACTIVE;
@@ -1282,8 +1326,12 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
ublk_mark_io_ready(ub, ubq);
break;
case UBLK_IO_COMMIT_AND_FETCH_REQ:
- /* FETCH_RQ has to provide IO buffer */
- if (!ub_cmd->addr)
+ req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag);
+ /*
+ * COMMIT_AND_FETCH_REQ has to provide IO buffer if NEED GET DATA is
+ * not enabled or it is Read IO.
+ */
+ if (!ub_cmd->addr && (!ublk_need_get_data(ubq) || req_op(req) == REQ_OP_READ))
goto out;
if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
goto out;
@@ -1536,7 +1584,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
goto out_unlock;
}
- disk = blk_mq_alloc_disk(&ub->tag_set, ub);
+ disk = blk_mq_alloc_disk(&ub->tag_set, NULL);
if (IS_ERR(disk)) {
ret = PTR_ERR(disk);
goto out_unlock;
@@ -1620,17 +1668,6 @@ out_free_cpumask:
return ret;
}
-static void ublk_store_owner_uid_gid(struct ublksrv_ctrl_dev_info *info)
-{
- kuid_t uid;
- kgid_t gid;
-
- current_uid_gid(&uid, &gid);
-
- info->owner_uid = from_kuid(&init_user_ns, uid);
- info->owner_gid = from_kgid(&init_user_ns, gid);
-}
-
static inline void ublk_dump_dev_info(struct ublksrv_ctrl_dev_info *info)
{
pr_devel("%s: dev id %d flags %llx\n", __func__,
@@ -1664,7 +1701,7 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
return -EPERM;
/* the created device is always owned by current user */
- ublk_store_owner_uid_gid(&info);
+ ublk_store_owner_uid_gid(&info.owner_uid, &info.owner_gid);
if (header->dev_id != info.dev_id) {
pr_warn("%s: dev id not match %u %u\n",
@@ -1773,20 +1810,33 @@ static int ublk_ctrl_del_dev(struct ublk_device **p_ub)
if (ret)
return ret;
- ublk_remove(ub);
+ if (!test_bit(UB_STATE_DELETED, &ub->state)) {
+ ublk_remove(ub);
+ set_bit(UB_STATE_DELETED, &ub->state);
+ }
/* Mark the reference as consumed */
*p_ub = NULL;
ublk_put_device(ub);
+ mutex_unlock(&ublk_ctl_mutex);
/*
* Wait until the idr is removed, then it can be reused after
* DEL_DEV command is returned.
+ *
+ * If we returns because of user interrupt, future delete command
+ * may come:
+ *
+ * - the device number isn't freed, this device won't or needn't
+ * be deleted again, since UB_STATE_DELETED is set, and device
+ * will be released after the last reference is dropped
+ *
+ * - the device number is freed already, we will not find this
+ * device via ublk_get_device_from_id()
*/
- wait_event(ublk_idr_wq, ublk_idr_freed(idx));
- mutex_unlock(&ublk_ctl_mutex);
+ wait_event_interruptible(ublk_idr_wq, ublk_idr_freed(idx));
- return ret;
+ return 0;
}
static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd)
@@ -2029,7 +2079,7 @@ static int ublk_char_dev_permission(struct ublk_device *ub,
if (stat.rdev != ub->cdev_dev.devt || !S_ISCHR(stat.mode))
goto exit;
- err = inode_permission(&init_user_ns,
+ err = inode_permission(&nop_mnt_idmap,
d_backing_inode(path.dentry), mask);
exit:
path_put(&path);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 6a77fa917428..dc6e9b989910 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -170,9 +170,7 @@ static int virtblk_setup_discard_write_zeroes_erase(struct request *req, bool un
WARN_ON_ONCE(n != segments);
- req->special_vec.bv_page = virt_to_page(range);
- req->special_vec.bv_offset = offset_in_page(range);
- req->special_vec.bv_len = sizeof(*range) * segments;
+ bvec_set_virt(&req->special_vec, range, sizeof(*range) * segments);
req->rq_flags |= RQF_SPECIAL_PAYLOAD;
return 0;
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index e290d6d97047..bd8ae4822dc3 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -703,9 +703,7 @@ static ssize_t writeback_store(struct device *dev,
for (; nr_pages != 0; index++, nr_pages--) {
struct bio_vec bvec;
- bvec.bv_page = page;
- bvec.bv_len = PAGE_SIZE;
- bvec.bv_offset = 0;
+ bvec_set_page(&bvec, page, PAGE_SIZE, 0);
spin_lock(&zram->wb_limit_lock);
if (zram->wb_limit_enable && !zram->bd_wb_limit) {
@@ -1380,12 +1378,9 @@ out:
static int zram_bvec_read_from_bdev(struct zram *zram, struct page *page,
u32 index, struct bio *bio, bool partial_io)
{
- struct bio_vec bvec = {
- .bv_page = page,
- .bv_len = PAGE_SIZE,
- .bv_offset = 0,
- };
+ struct bio_vec bvec;
+ bvec_set_page(&bvec, page, PAGE_SIZE, 0);
return read_from_bdev(zram, &bvec, zram_get_element(zram, index), bio,
partial_io);
}
@@ -1652,9 +1647,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
memcpy_from_bvec(dst + offset, bvec);
kunmap_atomic(dst);
- vec.bv_page = page;
- vec.bv_len = PAGE_SIZE;
- vec.bv_offset = 0;
+ bvec_set_page(&vec, page, PAGE_SIZE, 0);
}
ret = __zram_bvec_write(zram, &vec, index, bio);