summaryrefslogtreecommitdiff
path: root/drivers/block/nbd.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-11-01 19:27:38 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2021-11-01 19:27:38 +0300
commit643a7234e0960cf63f1a51a15cfc969fafcbabad (patch)
treee6522bf2e6a74148952af11a03606b152ee1e251 /drivers/block/nbd.c
parent33c8846c814c1c27c6e33af005042d15061f948b (diff)
parent15dfc662ef31a20b59097d59b0792b06770255fa (diff)
downloadlinux-643a7234e0960cf63f1a51a15cfc969fafcbabad.tar.xz
Merge tag 'for-5.16/drivers-2021-10-29' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe: - paride driver cleanups (Christoph) - Remove cryptoloop support (Christoph) - null_blk poll support (me) - Now that add_disk() supports proper error handling, add it to various drivers (Luis) - Make ataflop actually work again (Michael) - s390 dasd fixes (Stefan, Heiko) - nbd fixes (Yu, Ye) - Remove redundant wq flush in mtip32xx (Christophe) - NVMe updates - fix a multipath partition scanning deadlock (Hannes Reinecke) - generate uevent once a multipath namespace is operational again (Hannes Reinecke) - support unique discovery controller NQNs (Hannes Reinecke) - fix use-after-free when a port is removed (Israel Rukshin) - clear shadow doorbell memory on resets (Keith Busch) - use struct_size (Len Baker) - add error handling support for add_disk (Luis Chamberlain) - limit the maximal queue size for RDMA controllers (Max Gurtovoy) - use a few more symbolic names (Max Gurtovoy) - fix error code in nvme_rdma_setup_ctrl (Max Gurtovoy) - add support for ->map_queues on FC (Saurav Kashyap) - support the current discovery subsystem entry (Hannes Reinecke) - use flex_array_size and struct_size (Len Baker) - bcache fixes (Christoph, Coly, Chao, Lin, Qing) - MD updates (Christoph, Guoqing, Xiao) - Misc fixes (Dan, Ding, Jiapeng, Shin'ichiro, Ye) * tag 'for-5.16/drivers-2021-10-29' of git://git.kernel.dk/linux-block: (117 commits) null_blk: Fix handling of submit_queues and poll_queues attributes block: ataflop: Fix warning comparing pointer to 0 bcache: replace snprintf in show functions with sysfs_emit bcache: move uapi header bcache.h to bcache code directory nvmet: use flex_array_size and struct_size nvmet: register discovery subsystem as 'current' nvmet: switch check for subsystem type nvme: add new discovery log page entry definitions block: ataflop: more blk-mq refactoring fixes block: remove support for cryptoloop and the xor transfer mtd: add add_disk() error handling rnbd: add error handling support for add_disk() um/drivers/ubd_kern: add error handling support for add_disk() m68k/emu/nfblock: add error handling support for add_disk() xen-blkfront: add error handling support for add_disk() bcache: add error handling support for add_disk() dm: add add_disk() error handling block: aoe: fixup coccinelle warnings nvmet: use struct_size over open coded arithmetic nvme: drop scan_lock and always kick requeue list when removing namespaces ...
Diffstat (limited to 'drivers/block/nbd.c')
-rw-r--r--drivers/block/nbd.c161
1 files changed, 112 insertions, 49 deletions
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 504c20a2f33e..b47b2a87ae8f 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -122,15 +122,21 @@ struct nbd_device {
struct work_struct remove_work;
struct list_head list;
- struct task_struct *task_recv;
struct task_struct *task_setup;
unsigned long flags;
+ pid_t pid; /* pid of nbd-client, if attached */
char *backend;
};
#define NBD_CMD_REQUEUED 1
+/*
+ * This flag will be set if nbd_queue_rq() succeed, and will be checked and
+ * cleared in completion. Both setting and clearing of the flag are protected
+ * by cmd->lock.
+ */
+#define NBD_CMD_INFLIGHT 2
struct nbd_cmd {
struct nbd_device *nbd;
@@ -217,7 +223,7 @@ static ssize_t pid_show(struct device *dev,
struct gendisk *disk = dev_to_disk(dev);
struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
- return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
+ return sprintf(buf, "%d\n", nbd->pid);
}
static const struct device_attribute pid_attr = {
@@ -322,7 +328,7 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
nbd->config->bytesize = bytesize;
nbd->config->blksize_bits = __ffs(blksize);
- if (!nbd->task_recv)
+ if (!nbd->pid)
return 0;
if (nbd->config->flags & NBD_FLAG_SEND_TRIM) {
@@ -398,6 +404,11 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
if (!mutex_trylock(&cmd->lock))
return BLK_EH_RESET_TIMER;
+ if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
+ mutex_unlock(&cmd->lock);
+ return BLK_EH_DONE;
+ }
+
if (!refcount_inc_not_zero(&nbd->config_refs)) {
cmd->status = BLK_STS_TIMEOUT;
mutex_unlock(&cmd->lock);
@@ -477,7 +488,8 @@ done:
}
/*
- * Send or receive packet.
+ * Send or receive packet. Return a positive value on success and
+ * negtive value on failue, and never return 0.
*/
static int sock_xmit(struct nbd_device *nbd, int index, int send,
struct iov_iter *iter, int msg_flags, int *sent)
@@ -603,7 +615,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
result = sock_xmit(nbd, index, 1, &from,
(type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent);
trace_nbd_header_sent(req, handle);
- if (result <= 0) {
+ if (result < 0) {
if (was_interrupted(result)) {
/* If we havne't sent anything we can just return BUSY,
* however if we have sent something we need to make
@@ -647,7 +659,7 @@ send_pages:
skip = 0;
}
result = sock_xmit(nbd, index, 1, &from, flags, &sent);
- if (result <= 0) {
+ if (result < 0) {
if (was_interrupted(result)) {
/* We've already sent the header, we
* have no choice but to set pending and
@@ -681,38 +693,45 @@ out:
return 0;
}
-/* NULL returned = something went wrong, inform userspace */
-static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
+static int nbd_read_reply(struct nbd_device *nbd, int index,
+ struct nbd_reply *reply)
{
- struct nbd_config *config = nbd->config;
- int result;
- struct nbd_reply reply;
- struct nbd_cmd *cmd;
- struct request *req = NULL;
- u64 handle;
- u16 hwq;
- u32 tag;
- struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)};
+ struct kvec iov = {.iov_base = reply, .iov_len = sizeof(*reply)};
struct iov_iter to;
- int ret = 0;
+ int result;
- reply.magic = 0;
- iov_iter_kvec(&to, READ, &iov, 1, sizeof(reply));
+ reply->magic = 0;
+ iov_iter_kvec(&to, READ, &iov, 1, sizeof(*reply));
result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
- if (result <= 0) {
- if (!nbd_disconnected(config))
+ if (result < 0) {
+ if (!nbd_disconnected(nbd->config))
dev_err(disk_to_dev(nbd->disk),
"Receive control failed (result %d)\n", result);
- return ERR_PTR(result);
+ return result;
}
- if (ntohl(reply.magic) != NBD_REPLY_MAGIC) {
+ if (ntohl(reply->magic) != NBD_REPLY_MAGIC) {
dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n",
- (unsigned long)ntohl(reply.magic));
- return ERR_PTR(-EPROTO);
+ (unsigned long)ntohl(reply->magic));
+ return -EPROTO;
}
- memcpy(&handle, reply.handle, sizeof(handle));
+ return 0;
+}
+
+/* NULL returned = something went wrong, inform userspace */
+static struct nbd_cmd *nbd_handle_reply(struct nbd_device *nbd, int index,
+ struct nbd_reply *reply)
+{
+ int result;
+ struct nbd_cmd *cmd;
+ struct request *req = NULL;
+ u64 handle;
+ u16 hwq;
+ u32 tag;
+ int ret = 0;
+
+ memcpy(&handle, reply->handle, sizeof(handle));
tag = nbd_handle_to_tag(handle);
hwq = blk_mq_unique_tag_to_hwq(tag);
if (hwq < nbd->tag_set.nr_hw_queues)
@@ -727,6 +746,16 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
cmd = blk_mq_rq_to_pdu(req);
mutex_lock(&cmd->lock);
+ if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
+ dev_err(disk_to_dev(nbd->disk), "Suspicious reply %d (status %u flags %lu)",
+ tag, cmd->status, cmd->flags);
+ ret = -ENOENT;
+ goto out;
+ }
+ if (cmd->index != index) {
+ dev_err(disk_to_dev(nbd->disk), "Unexpected reply %d from different sock %d (expected %d)",
+ tag, index, cmd->index);
+ }
if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) {
dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n",
req, cmd->cmd_cookie, nbd_handle_to_cookie(handle));
@@ -745,9 +774,9 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
ret = -ENOENT;
goto out;
}
- if (ntohl(reply.error)) {
+ if (ntohl(reply->error)) {
dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
- ntohl(reply.error));
+ ntohl(reply->error));
cmd->status = BLK_STS_IOERR;
goto out;
}
@@ -756,11 +785,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
if (rq_data_dir(req) != WRITE) {
struct req_iterator iter;
struct bio_vec bvec;
+ struct iov_iter to;
rq_for_each_segment(bvec, req, iter) {
iov_iter_bvec(&to, READ, &bvec, 1, bvec.bv_len);
result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
- if (result <= 0) {
+ if (result < 0) {
dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
result);
/*
@@ -769,7 +799,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
* and let the timeout stuff handle resubmitting
* this request onto another connection.
*/
- if (nbd_disconnected(config)) {
+ if (nbd_disconnected(nbd->config)) {
cmd->status = BLK_STS_IOERR;
goto out;
}
@@ -793,24 +823,46 @@ static void recv_work(struct work_struct *work)
work);
struct nbd_device *nbd = args->nbd;
struct nbd_config *config = nbd->config;
+ struct request_queue *q = nbd->disk->queue;
+ struct nbd_sock *nsock;
struct nbd_cmd *cmd;
struct request *rq;
while (1) {
- cmd = nbd_read_stat(nbd, args->index);
- if (IS_ERR(cmd)) {
- struct nbd_sock *nsock = config->socks[args->index];
+ struct nbd_reply reply;
- mutex_lock(&nsock->tx_lock);
- nbd_mark_nsock_dead(nbd, nsock, 1);
- mutex_unlock(&nsock->tx_lock);
+ if (nbd_read_reply(nbd, args->index, &reply))
+ break;
+
+ /*
+ * Grab .q_usage_counter so request pool won't go away, then no
+ * request use-after-free is possible during nbd_handle_reply().
+ * If queue is frozen, there won't be any inflight requests, we
+ * needn't to handle the incoming garbage message.
+ */
+ if (!percpu_ref_tryget(&q->q_usage_counter)) {
+ dev_err(disk_to_dev(nbd->disk), "%s: no io inflight\n",
+ __func__);
+ break;
+ }
+
+ cmd = nbd_handle_reply(nbd, args->index, &reply);
+ if (IS_ERR(cmd)) {
+ percpu_ref_put(&q->q_usage_counter);
break;
}
rq = blk_mq_rq_from_pdu(cmd);
if (likely(!blk_should_fake_timeout(rq->q)))
blk_mq_complete_request(rq);
+ percpu_ref_put(&q->q_usage_counter);
}
+
+ nsock = config->socks[args->index];
+ mutex_lock(&nsock->tx_lock);
+ nbd_mark_nsock_dead(nbd, nsock, 1);
+ mutex_unlock(&nsock->tx_lock);
+
nbd_config_put(nbd);
atomic_dec(&config->recv_threads);
wake_up(&config->recv_wq);
@@ -826,6 +878,10 @@ static bool nbd_clear_req(struct request *req, void *data, bool reserved)
return true;
mutex_lock(&cmd->lock);
+ if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
+ mutex_unlock(&cmd->lock);
+ return true;
+ }
cmd->status = BLK_STS_IOERR;
mutex_unlock(&cmd->lock);
@@ -907,7 +963,6 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
if (!refcount_inc_not_zero(&nbd->config_refs)) {
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Socks array is empty\n");
- blk_mq_start_request(req);
return -EINVAL;
}
config = nbd->config;
@@ -916,7 +971,6 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Attempted send on invalid socket\n");
nbd_config_put(nbd);
- blk_mq_start_request(req);
return -EINVAL;
}
cmd->status = BLK_STS_OK;
@@ -940,7 +994,6 @@ again:
*/
sock_shutdown(nbd);
nbd_config_put(nbd);
- blk_mq_start_request(req);
return -EIO;
}
goto again;
@@ -962,7 +1015,13 @@ again:
* returns EAGAIN can be retried on a different socket.
*/
ret = nbd_send_cmd(nbd, cmd, index);
- if (ret == -EAGAIN) {
+ /*
+ * Access to this flag is protected by cmd->lock, thus it's safe to set
+ * the flag after nbd_send_cmd() succeed to send request to server.
+ */
+ if (!ret)
+ __set_bit(NBD_CMD_INFLIGHT, &cmd->flags);
+ else if (ret == -EAGAIN) {
dev_err_ratelimited(disk_to_dev(nbd->disk),
"Request send failed, requeueing\n");
nbd_mark_nsock_dead(nbd, nsock, 1);
@@ -1199,7 +1258,7 @@ static void send_disconnects(struct nbd_device *nbd)
iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
mutex_lock(&nsock->tx_lock);
ret = sock_xmit(nbd, i, 1, &from, 0, NULL);
- if (ret <= 0)
+ if (ret < 0)
dev_err(disk_to_dev(nbd->disk),
"Send disconnect failed %d\n", ret);
mutex_unlock(&nsock->tx_lock);
@@ -1236,7 +1295,7 @@ static void nbd_config_put(struct nbd_device *nbd)
if (test_and_clear_bit(NBD_RT_HAS_PID_FILE,
&config->runtime_flags))
device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
- nbd->task_recv = NULL;
+ nbd->pid = 0;
if (test_and_clear_bit(NBD_RT_HAS_BACKEND_FILE,
&config->runtime_flags)) {
device_remove_file(disk_to_dev(nbd->disk), &backend_attr);
@@ -1277,7 +1336,7 @@ static int nbd_start_device(struct nbd_device *nbd)
int num_connections = config->num_connections;
int error = 0, i;
- if (nbd->task_recv)
+ if (nbd->pid)
return -EBUSY;
if (!config->socks)
return -EINVAL;
@@ -1296,7 +1355,7 @@ static int nbd_start_device(struct nbd_device *nbd)
}
blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections);
- nbd->task_recv = current;
+ nbd->pid = task_pid_nr(current);
nbd_parse_flags(nbd);
@@ -1552,8 +1611,8 @@ static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
{
struct nbd_device *nbd = s->private;
- if (nbd->task_recv)
- seq_printf(s, "recv: %d\n", task_pid_nr(nbd->task_recv));
+ if (nbd->pid)
+ seq_printf(s, "recv: %d\n", nbd->pid);
return 0;
}
@@ -1757,7 +1816,9 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
disk->fops = &nbd_fops;
disk->private_data = nbd;
sprintf(disk->disk_name, "nbd%d", index);
- add_disk(disk);
+ err = add_disk(disk);
+ if (err)
+ goto out_err_disk;
/*
* Now publish the device.
@@ -1766,6 +1827,8 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
nbd_total_devices++;
return nbd;
+out_err_disk:
+ blk_cleanup_disk(disk);
out_free_idr:
mutex_lock(&nbd_index_mutex);
idr_remove(&nbd_index_idr, index);
@@ -2130,7 +2193,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
mutex_lock(&nbd->config_lock);
config = nbd->config;
if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
- !nbd->task_recv) {
+ !nbd->pid) {
dev_err(nbd_to_dev(nbd),
"not configured, cannot reconfigure\n");
ret = -EINVAL;