summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-01-19 05:22:40 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2024-01-19 05:22:40 +0300
commit9d1694dc91ce7b80bc96d6d8eaf1a1eca668d847 (patch)
treed9093aecb9261cccaea1f0a58887fcd9db542172 /drivers
parente9a5a78d1ad8ceb4e3df6d6ad93360094c84ac40 (diff)
parentb2e792ae883a0aa976d4176dfa7dc933263440ea (diff)
downloadlinux-9d1694dc91ce7b80bc96d6d8eaf1a1eca668d847.tar.xz
Merge tag 'for-6.8/block-2024-01-18' of git://git.kernel.dk/linux
Pull block fixes from Jens Axboe: - NVMe pull request via Keith: - tcp, fc, and rdma target fixes (Maurizio, Daniel, Hannes, Christoph) - discard fixes and improvements (Christoph) - timeout debug improvements (Keith, Max) - various cleanups (Daniel, Max, Giuxen) - trace event string fixes (Arnd) - shadow doorbell setup on reset fix (William) - a write zeroes quirk for SK Hynix (Jim) - MD pull request via Song: - Sparse warning since v6.0 (Bart) - /proc/mdstat regression since v6.7 (Yu Kuai) - Use symbolic error value (Christian) - IO Priority documentation update (Christian) - Fix for accessing queue limits without having entered the queue (Christoph, me) - Fix for loop dio support (Christoph) - Move null_blk off deprecated ida interface (Christophe) - Ensure nbd initializes full msghdr (Eric) - Fix for a regression with the folio conversion, which is now easier to hit because of an unrelated change (Matthew) - Remove redundant check in virtio-blk (Li) - Fix for a potential hang in sbitmap (Ming) - Fix for partial zone appending (Damien) - Misc changes and fixes (Bart, me, Kemeng, Dmitry) * tag 'for-6.8/block-2024-01-18' of git://git.kernel.dk/linux: (45 commits) Documentation: block: ioprio: Update schedulers loop: fix the the direct I/O support check when used on top of block devices blk-mq: Remove the hctx 'run' debugfs attribute nbd: always initialize struct msghdr completely block: Fix iterating over an empty bio with bio_for_each_folio_all block: bio-integrity: fix kcalloc() arguments order virtio_blk: remove duplicate check if queue is broken in virtblk_done sbitmap: remove stale comment in sbq_calc_wake_batch block: Correct a documentation comment in blk-cgroup.c null_blk: Remove usage of the deprecated ida_simple_xx() API block: ensure we hold a queue reference when using queue limits blk-mq: rename blk_mq_can_use_cached_rq block: print symbolic error name instead of error code blk-mq: fix IO hang from sbitmap wakeup race nvmet-rdma: avoid circular locking dependency on install_queue() nvmet-tcp: avoid circular locking dependency on install_queue() nvme-pci: set doorbell config before unquiescing block: fix partial zone append completion handling in req_bio_endio() block/iocost: silence warning on 'last_period' potentially being unused md/raid1: Use blk_opf_t for read and write operations ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/block/loop.c52
-rw-r--r--drivers/block/nbd.c6
-rw-r--r--drivers/block/null_blk/main.c4
-rw-r--r--drivers/block/virtio_blk.c2
-rw-r--r--drivers/md/md.c40
-rw-r--r--drivers/md/raid1.c12
-rw-r--r--drivers/nvme/common/keyring.c2
-rw-r--r--drivers/nvme/host/core.c41
-rw-r--r--drivers/nvme/host/nvme.h16
-rw-r--r--drivers/nvme/host/pci.c27
-rw-r--r--drivers/nvme/host/pr.c2
-rw-r--r--drivers/nvme/host/rdma.c11
-rw-r--r--drivers/nvme/host/sysfs.c8
-rw-r--r--drivers/nvme/host/tcp.c11
-rw-r--r--drivers/nvme/target/fc.c2
-rw-r--r--drivers/nvme/target/fcloop.c7
-rw-r--r--drivers/nvme/target/rdma.c19
-rw-r--r--drivers/nvme/target/tcp.c48
-rw-r--r--drivers/nvme/target/trace.c6
-rw-r--r--drivers/nvme/target/trace.h33
20 files changed, 207 insertions, 142 deletions
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 146b32fa7b47..f8145499da38 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -165,39 +165,37 @@ static loff_t get_loop_size(struct loop_device *lo, struct file *file)
return get_size(lo->lo_offset, lo->lo_sizelimit, file);
}
+/*
+ * We support direct I/O only if lo_offset is aligned with the logical I/O size
+ * of backing device, and the logical block size of loop is bigger than that of
+ * the backing device.
+ */
+static bool lo_bdev_can_use_dio(struct loop_device *lo,
+ struct block_device *backing_bdev)
+{
+ unsigned short sb_bsize = bdev_logical_block_size(backing_bdev);
+
+ if (queue_logical_block_size(lo->lo_queue) < sb_bsize)
+ return false;
+ if (lo->lo_offset & (sb_bsize - 1))
+ return false;
+ return true;
+}
+
static void __loop_update_dio(struct loop_device *lo, bool dio)
{
struct file *file = lo->lo_backing_file;
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- unsigned short sb_bsize = 0;
- unsigned dio_align = 0;
+ struct inode *inode = file->f_mapping->host;
+ struct block_device *backing_bdev = NULL;
bool use_dio;
- if (inode->i_sb->s_bdev) {
- sb_bsize = bdev_logical_block_size(inode->i_sb->s_bdev);
- dio_align = sb_bsize - 1;
- }
+ if (S_ISBLK(inode->i_mode))
+ backing_bdev = I_BDEV(inode);
+ else if (inode->i_sb->s_bdev)
+ backing_bdev = inode->i_sb->s_bdev;
- /*
- * We support direct I/O only if lo_offset is aligned with the
- * logical I/O size of backing device, and the logical block
- * size of loop is bigger than the backing device's.
- *
- * TODO: the above condition may be loosed in the future, and
- * direct I/O may be switched runtime at that time because most
- * of requests in sane applications should be PAGE_SIZE aligned
- */
- if (dio) {
- if (queue_logical_block_size(lo->lo_queue) >= sb_bsize &&
- !(lo->lo_offset & dio_align) &&
- (file->f_mode & FMODE_CAN_ODIRECT))
- use_dio = true;
- else
- use_dio = false;
- } else {
- use_dio = false;
- }
+ use_dio = dio && (file->f_mode & FMODE_CAN_ODIRECT) &&
+ (!backing_bdev || lo_bdev_can_use_dio(lo, backing_bdev));
if (lo->use_dio == use_dio)
return;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 4e72ec4e25ac..33a8f37bb6a1 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -508,7 +508,7 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send,
struct iov_iter *iter, int msg_flags, int *sent)
{
int result;
- struct msghdr msg;
+ struct msghdr msg = {} ;
unsigned int noreclaim_flag;
if (unlikely(!sock)) {
@@ -524,10 +524,6 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send,
do {
sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
sock->sk->sk_use_task_frag = false;
- msg.msg_name = NULL;
- msg.msg_namelen = 0;
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
msg.msg_flags = msg_flags | MSG_NOSIGNAL;
if (send)
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 9f7695f00c2d..36755f263e8e 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -1840,7 +1840,7 @@ static void null_del_dev(struct nullb *nullb)
dev = nullb->dev;
- ida_simple_remove(&nullb_indexes, nullb->index);
+ ida_free(&nullb_indexes, nullb->index);
list_del_init(&nullb->list);
@@ -2174,7 +2174,7 @@ static int null_add_dev(struct nullb_device *dev)
blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
mutex_lock(&lock);
- rv = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
+ rv = ida_alloc(&nullb_indexes, GFP_KERNEL);
if (rv < 0) {
mutex_unlock(&lock);
goto out_cleanup_zone;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 3b6b9abb8ce1..5bf98fd6a651 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -367,8 +367,6 @@ static void virtblk_done(struct virtqueue *vq)
blk_mq_complete_request(req);
req_done = true;
}
- if (unlikely(virtqueue_is_broken(vq)))
- break;
} while (!virtqueue_enable_cb(vq));
/* In case queue is stopped waiting for more buffers. */
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 0a2bd72a6d76..2266358d8074 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -8132,6 +8132,19 @@ static void status_unused(struct seq_file *seq)
seq_printf(seq, "\n");
}
+static void status_personalities(struct seq_file *seq)
+{
+ struct md_personality *pers;
+
+ seq_puts(seq, "Personalities : ");
+ spin_lock(&pers_lock);
+ list_for_each_entry(pers, &pers_list, list)
+ seq_printf(seq, "[%s] ", pers->name);
+
+ spin_unlock(&pers_lock);
+ seq_puts(seq, "\n");
+}
+
static int status_resync(struct seq_file *seq, struct mddev *mddev)
{
sector_t max_sectors, resync, res;
@@ -8273,20 +8286,10 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev)
static void *md_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(&all_mddevs_lock)
{
- struct md_personality *pers;
-
- seq_puts(seq, "Personalities : ");
- spin_lock(&pers_lock);
- list_for_each_entry(pers, &pers_list, list)
- seq_printf(seq, "[%s] ", pers->name);
-
- spin_unlock(&pers_lock);
- seq_puts(seq, "\n");
seq->poll_event = atomic_read(&md_event_count);
-
spin_lock(&all_mddevs_lock);
- return seq_list_start(&all_mddevs, *pos);
+ return seq_list_start_head(&all_mddevs, *pos);
}
static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
@@ -8297,16 +8300,23 @@ static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static void md_seq_stop(struct seq_file *seq, void *v)
__releases(&all_mddevs_lock)
{
- status_unused(seq);
spin_unlock(&all_mddevs_lock);
}
static int md_seq_show(struct seq_file *seq, void *v)
{
- struct mddev *mddev = list_entry(v, struct mddev, all_mddevs);
+ struct mddev *mddev;
sector_t sectors;
struct md_rdev *rdev;
+ if (v == &all_mddevs) {
+ status_personalities(seq);
+ if (list_empty(&all_mddevs))
+ status_unused(seq);
+ return 0;
+ }
+
+ mddev = list_entry(v, struct mddev, all_mddevs);
if (!mddev_get(mddev))
return 0;
@@ -8382,6 +8392,10 @@ static int md_seq_show(struct seq_file *seq, void *v)
}
spin_unlock(&mddev->lock);
spin_lock(&all_mddevs_lock);
+
+ if (mddev == list_last_entry(&all_mddevs, struct mddev, all_mddevs))
+ status_unused(seq);
+
if (atomic_dec_and_test(&mddev->active))
__mddev_put(mddev);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index aaa434f0c175..24f0d799fd98 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1968,12 +1968,12 @@ static void end_sync_write(struct bio *bio)
}
static int r1_sync_page_io(struct md_rdev *rdev, sector_t sector,
- int sectors, struct page *page, int rw)
+ int sectors, struct page *page, blk_opf_t rw)
{
if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
/* success */
return 1;
- if (rw == WRITE) {
+ if (rw == REQ_OP_WRITE) {
set_bit(WriteErrorSeen, &rdev->flags);
if (!test_and_set_bit(WantReplacement,
&rdev->flags))
@@ -2090,7 +2090,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
rdev = conf->mirrors[d].rdev;
if (r1_sync_page_io(rdev, sect, s,
pages[idx],
- WRITE) == 0) {
+ REQ_OP_WRITE) == 0) {
r1_bio->bios[d]->bi_end_io = NULL;
rdev_dec_pending(rdev, mddev);
}
@@ -2105,7 +2105,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
rdev = conf->mirrors[d].rdev;
if (r1_sync_page_io(rdev, sect, s,
pages[idx],
- READ) != 0)
+ REQ_OP_READ) != 0)
atomic_add(s, &rdev->corrected_errors);
}
sectors -= s;
@@ -2321,7 +2321,7 @@ static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio)
!test_bit(Faulty, &rdev->flags)) {
atomic_inc(&rdev->nr_pending);
r1_sync_page_io(rdev, sect, s,
- conf->tmppage, WRITE);
+ conf->tmppage, REQ_OP_WRITE);
rdev_dec_pending(rdev, mddev);
}
}
@@ -2335,7 +2335,7 @@ static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio)
!test_bit(Faulty, &rdev->flags)) {
atomic_inc(&rdev->nr_pending);
if (r1_sync_page_io(rdev, sect, s,
- conf->tmppage, READ)) {
+ conf->tmppage, REQ_OP_READ)) {
atomic_add(s, &rdev->corrected_errors);
pr_info("md/raid1:%s: read error corrected (%d sectors at %llu on %pg)\n",
mdname(mddev), s,
diff --git a/drivers/nvme/common/keyring.c b/drivers/nvme/common/keyring.c
index ee341b83eeba..a5c0431c101c 100644
--- a/drivers/nvme/common/keyring.c
+++ b/drivers/nvme/common/keyring.c
@@ -111,7 +111,7 @@ static struct key *nvme_tls_psk_lookup(struct key *keyring,
* should be preferred to 'generated' PSKs,
* and SHA-384 should be preferred to SHA-256.
*/
-struct nvme_tls_psk_priority_list {
+static struct nvme_tls_psk_priority_list {
bool generated;
enum nvme_tcp_tls_cipher cipher;
} nvme_tls_psk_prio[] = {
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 0af612387083..85ab0fcf9e88 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1740,13 +1740,13 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk,
struct nvme_ns_head *head)
{
struct request_queue *queue = disk->queue;
- u32 size = queue_logical_block_size(queue);
+ u32 max_discard_sectors;
- if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(head, UINT_MAX))
- ctrl->max_discard_sectors =
- nvme_lba_to_sect(head, ctrl->dmrsl);
-
- if (ctrl->max_discard_sectors == 0) {
+ if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(head, UINT_MAX)) {
+ max_discard_sectors = nvme_lba_to_sect(head, ctrl->dmrsl);
+ } else if (ctrl->oncs & NVME_CTRL_ONCS_DSM) {
+ max_discard_sectors = UINT_MAX;
+ } else {
blk_queue_max_discard_sectors(queue, 0);
return;
}
@@ -1754,14 +1754,22 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk,
BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
NVME_DSM_MAX_RANGES);
- queue->limits.discard_granularity = size;
-
- /* If discard is already enabled, don't reset queue limits */
+ /*
+ * If discard is already enabled, don't reset queue limits.
+ *
+ * This works around the fact that the block layer can't cope well with
+ * updating the hardware limits when overridden through sysfs. This is
+ * harmless because discard limits in NVMe are purely advisory.
+ */
if (queue->limits.max_discard_sectors)
return;
- blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors);
- blk_queue_max_discard_segments(queue, ctrl->max_discard_segments);
+ blk_queue_max_discard_sectors(queue, max_discard_sectors);
+ if (ctrl->dmrl)
+ blk_queue_max_discard_segments(queue, ctrl->dmrl);
+ else
+ blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES);
+ queue->limits.discard_granularity = queue_logical_block_size(queue);
if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
@@ -2930,14 +2938,6 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
struct nvme_id_ctrl_nvm *id;
int ret;
- if (ctrl->oncs & NVME_CTRL_ONCS_DSM) {
- ctrl->max_discard_sectors = UINT_MAX;
- ctrl->max_discard_segments = NVME_DSM_MAX_RANGES;
- } else {
- ctrl->max_discard_sectors = 0;
- ctrl->max_discard_segments = 0;
- }
-
/*
* Even though NVMe spec explicitly states that MDTS is not applicable
* to the write-zeroes, we are cautious and limit the size to the
@@ -2967,8 +2967,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
if (ret)
goto free_data;
- if (id->dmrl)
- ctrl->max_discard_segments = id->dmrl;
+ ctrl->dmrl = id->dmrl;
ctrl->dmrsl = le32_to_cpu(id->dmrsl);
if (id->wzsl)
ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 4be7f6822966..030c80818240 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -303,14 +303,13 @@ struct nvme_ctrl {
u32 max_hw_sectors;
u32 max_segments;
u32 max_integrity_segments;
- u32 max_discard_sectors;
- u32 max_discard_segments;
u32 max_zeroes_sectors;
#ifdef CONFIG_BLK_DEV_ZONED
u32 max_zone_append;
#endif
u16 crdt[3];
u16 oncs;
+ u8 dmrl;
u32 dmrsl;
u16 oacs;
u16 sqsize;
@@ -932,6 +931,10 @@ extern struct device_attribute dev_attr_ana_grpid;
extern struct device_attribute dev_attr_ana_state;
extern struct device_attribute subsys_attr_iopolicy;
+static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
+{
+ return disk->fops == &nvme_ns_head_ops;
+}
#else
#define multipath false
static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
@@ -1009,6 +1012,10 @@ static inline void nvme_mpath_start_request(struct request *rq)
static inline void nvme_mpath_end_request(struct request *rq)
{
}
+static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
+{
+ return false;
+}
#endif /* CONFIG_NVME_MULTIPATH */
int nvme_revalidate_zones(struct nvme_ns *ns);
@@ -1037,7 +1044,10 @@ static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
{
- return dev_to_disk(dev)->private_data;
+ struct gendisk *disk = dev_to_disk(dev);
+
+ WARN_ON(nvme_disk_is_ns_head(disk));
+ return disk->private_data;
}
#ifdef CONFIG_NVME_HWMON
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 61af7ff1a9d6..c1d6357ec98a 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1284,6 +1284,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
struct request *abort_req;
struct nvme_command cmd = { };
u32 csts = readl(dev->bar + NVME_REG_CSTS);
+ u8 opcode;
/* If PCI error recovery process is happening, we cannot reset or
* the recovery mechanism will surely fail.
@@ -1310,8 +1311,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT) {
dev_warn(dev->ctrl.device,
- "I/O %d QID %d timeout, completion polled\n",
- req->tag, nvmeq->qid);
+ "I/O tag %d (%04x) QID %d timeout, completion polled\n",
+ req->tag, nvme_cid(req), nvmeq->qid);
return BLK_EH_DONE;
}
@@ -1327,8 +1328,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
fallthrough;
case NVME_CTRL_DELETING:
dev_warn_ratelimited(dev->ctrl.device,
- "I/O %d QID %d timeout, disable controller\n",
- req->tag, nvmeq->qid);
+ "I/O tag %d (%04x) QID %d timeout, disable controller\n",
+ req->tag, nvme_cid(req), nvmeq->qid);
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
nvme_dev_disable(dev, true);
return BLK_EH_DONE;
@@ -1343,10 +1344,12 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
* command was already aborted once before and still hasn't been
* returned to the driver, or if this is the admin queue.
*/
+ opcode = nvme_req(req)->cmd->common.opcode;
if (!nvmeq->qid || iod->aborted) {
dev_warn(dev->ctrl.device,
- "I/O %d QID %d timeout, reset controller\n",
- req->tag, nvmeq->qid);
+ "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, reset controller\n",
+ req->tag, nvme_cid(req), opcode,
+ nvme_opcode_str(nvmeq->qid, opcode, 0), nvmeq->qid);
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
goto disable;
}
@@ -1362,10 +1365,10 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
cmd.abort.sqid = cpu_to_le16(nvmeq->qid);
dev_warn(nvmeq->dev->ctrl.device,
- "I/O %d (%s) QID %d timeout, aborting\n",
- req->tag,
- nvme_get_opcode_str(nvme_req(req)->cmd->common.opcode),
- nvmeq->qid);
+ "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, aborting req_op:%s(%u) size:%u\n",
+ req->tag, nvme_cid(req), opcode, nvme_get_opcode_str(opcode),
+ nvmeq->qid, blk_op_str(req_op(req)), req_op(req),
+ blk_rq_bytes(req));
abort_req = blk_mq_alloc_request(dev->ctrl.admin_q, nvme_req_op(&cmd),
BLK_MQ_REQ_NOWAIT);
@@ -2743,10 +2746,10 @@ static void nvme_reset_work(struct work_struct *work)
* controller around but remove all namespaces.
*/
if (dev->online_queues > 1) {
+ nvme_dbbuf_set(dev);
nvme_unquiesce_io_queues(&dev->ctrl);
nvme_wait_freeze(&dev->ctrl);
nvme_pci_update_nr_queues(dev);
- nvme_dbbuf_set(dev);
nvme_unfreeze(&dev->ctrl);
} else {
dev_warn(dev->ctrl.device, "IO queues lost\n");
@@ -3408,6 +3411,8 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x1c5c, 0x174a), /* SK Hynix P31 SSD */
.driver_data = NVME_QUIRK_BOGUS_NID, },
+ { PCI_DEVICE(0x1c5c, 0x1D59), /* SK Hynix BC901 */
+ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x15b7, 0x2001), /* Sandisk Skyhawk */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x1d97, 0x2263), /* SPCC */
diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c
index 391b1465ebfd..fc3eed00f9ff 100644
--- a/drivers/nvme/host/pr.c
+++ b/drivers/nvme/host/pr.c
@@ -98,7 +98,7 @@ static int nvme_send_pr_command(struct block_device *bdev,
struct nvme_command *c, void *data, unsigned int data_len)
{
if (IS_ENABLED(CONFIG_NVME_MULTIPATH) &&
- bdev->bd_disk->fops == &nvme_ns_head_ops)
+ nvme_disk_is_ns_head(bdev->bd_disk))
return nvme_send_ns_head_pr_command(bdev, c, data, data_len);
return nvme_send_ns_pr_command(bdev->bd_disk->private_data, c, data,
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index c89503da24d7..11dde0d83044 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1946,9 +1946,14 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq)
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_queue *queue = req->queue;
struct nvme_rdma_ctrl *ctrl = queue->ctrl;
-
- dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n",
- rq->tag, nvme_rdma_queue_idx(queue));
+ u8 opcode = req->req.cmd->common.opcode;
+ u8 fctype = req->req.cmd->fabrics.fctype;
+ int qid = nvme_rdma_queue_idx(queue);
+
+ dev_warn(ctrl->ctrl.device,
+ "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout\n",
+ rq->tag, nvme_cid(rq), opcode,
+ nvme_opcode_str(qid, opcode, fctype), qid);
if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_LIVE) {
/*
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
index ac24ad102380..754e91111042 100644
--- a/drivers/nvme/host/sysfs.c
+++ b/drivers/nvme/host/sysfs.c
@@ -39,10 +39,9 @@ static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev)
{
struct gendisk *disk = dev_to_disk(dev);
- if (disk->fops == &nvme_bdev_ops)
- return nvme_get_ns_from_dev(dev)->head;
- else
+ if (nvme_disk_is_ns_head(disk))
return disk->private_data;
+ return nvme_get_ns_from_dev(dev)->head;
}
static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
@@ -233,7 +232,8 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj,
}
#ifdef CONFIG_NVME_MULTIPATH
if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) {
- if (dev_to_disk(dev)->fops != &nvme_bdev_ops) /* per-path attr */
+ /* per-path attr */
+ if (nvme_disk_is_ns_head(dev_to_disk(dev)))
return 0;
if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl))
return 0;
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 08805f027810..d058d990532b 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1922,14 +1922,13 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl)
ctrl->opts->subsysnqn);
if (!pskid) {
dev_err(ctrl->device, "no valid PSK found\n");
- ret = -ENOKEY;
- goto out_free_queue;
+ return -ENOKEY;
}
}
ret = nvme_tcp_alloc_queue(ctrl, 0, pskid);
if (ret)
- goto out_free_queue;
+ return ret;
ret = nvme_tcp_alloc_async_req(to_tcp_ctrl(ctrl));
if (ret)
@@ -2433,9 +2432,9 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq)
int qid = nvme_tcp_queue_id(req->queue);
dev_warn(ctrl->device,
- "queue %d: timeout cid %#x type %d opcode %#x (%s)\n",
- nvme_tcp_queue_id(req->queue), nvme_cid(rq), pdu->hdr.type,
- opc, nvme_opcode_str(qid, opc, fctype));
+ "I/O tag %d (%04x) type %d opcode %#x (%s) QID %d timeout\n",
+ rq->tag, nvme_cid(rq), pdu->hdr.type, opc,
+ nvme_opcode_str(qid, opc, fctype), qid);
if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) {
/*
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index bd59990b5250..bda7a3009e85 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -1031,7 +1031,7 @@ nvmet_fc_match_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle)
list_for_each_entry(host, &tgtport->host_list, host_list) {
if (host->hosthandle == hosthandle && !host->invalid) {
if (nvmet_fc_hostport_get(host))
- return (host);
+ return host;
}
}
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index c65a73433c05..ead349af30f1 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -995,11 +995,6 @@ fcloop_nport_free(struct kref *ref)
{
struct fcloop_nport *nport =
container_of(ref, struct fcloop_nport, ref);
- unsigned long flags;
-
- spin_lock_irqsave(&fcloop_lock, flags);
- list_del(&nport->nport_list);
- spin_unlock_irqrestore(&fcloop_lock, flags);
kfree(nport);
}
@@ -1357,6 +1352,8 @@ __unlink_remote_port(struct fcloop_nport *nport)
nport->tport->remoteport = NULL;
nport->rport = NULL;
+ list_del(&nport->nport_list);
+
return rport;
}
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 4597bca43a6d..667f9c04f35d 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -37,6 +37,8 @@
#define NVMET_RDMA_MAX_MDTS 8
#define NVMET_RDMA_MAX_METADATA_MDTS 5
+#define NVMET_RDMA_BACKLOG 128
+
struct nvmet_rdma_srq;
struct nvmet_rdma_cmd {
@@ -1583,8 +1585,19 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
}
if (queue->host_qid == 0) {
- /* Let inflight controller teardown complete */
- flush_workqueue(nvmet_wq);
+ struct nvmet_rdma_queue *q;
+ int pending = 0;
+
+ /* Check for pending controller teardown */
+ mutex_lock(&nvmet_rdma_queue_mutex);
+ list_for_each_entry(q, &nvmet_rdma_queue_list, queue_list) {
+ if (q->nvme_sq.ctrl == queue->nvme_sq.ctrl &&
+ q->state == NVMET_RDMA_Q_DISCONNECTING)
+ pending++;
+ }
+ mutex_unlock(&nvmet_rdma_queue_mutex);
+ if (pending > NVMET_RDMA_BACKLOG)
+ return NVME_SC_CONNECT_CTRL_BUSY;
}
ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn);
@@ -1880,7 +1893,7 @@ static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port)
goto out_destroy_id;
}
- ret = rdma_listen(cm_id, 128);
+ ret = rdma_listen(cm_id, NVMET_RDMA_BACKLOG);
if (ret) {
pr_err("listening to %pISpcs failed (%d)\n", addr, ret);
goto out_destroy_id;
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 4cc27856aa8f..6a1e6bb80062 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -24,6 +24,8 @@
#include "nvmet.h"
#define NVMET_TCP_DEF_INLINE_DATA_SIZE (4 * PAGE_SIZE)
+#define NVMET_TCP_MAXH2CDATA 0x400000 /* 16M arbitrary limit */
+#define NVMET_TCP_BACKLOG 128
static int param_store_val(const char *str, int *val, int min, int max)
{
@@ -923,7 +925,7 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue)
icresp->hdr.pdo = 0;
icresp->hdr.plen = cpu_to_le32(icresp->hdr.hlen);
icresp->pfv = cpu_to_le16(NVME_TCP_PFV_1_0);
- icresp->maxdata = cpu_to_le32(0x400000); /* 16M arbitrary limit */
+ icresp->maxdata = cpu_to_le32(NVMET_TCP_MAXH2CDATA);
icresp->cpda = 0;
if (queue->hdr_digest)
icresp->digest |= NVME_TCP_HDR_DIGEST_ENABLE;
@@ -978,13 +980,13 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
{
struct nvme_tcp_data_pdu *data = &queue->pdu.data;
struct nvmet_tcp_cmd *cmd;
+ unsigned int exp_data_len;
if (likely(queue->nr_cmds)) {
if (unlikely(data->ttag >= queue->nr_cmds)) {
pr_err("queue %d: received out of bound ttag %u, nr_cmds %u\n",
queue->idx, data->ttag, queue->nr_cmds);
- nvmet_tcp_fatal_error(queue);
- return -EPROTO;
+ goto err_proto;
}
cmd = &queue->cmds[data->ttag];
} else {
@@ -995,19 +997,32 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
pr_err("ttag %u unexpected data offset %u (expected %u)\n",
data->ttag, le32_to_cpu(data->data_offset),
cmd->rbytes_done);
- /* FIXME: use path and transport errors */
- nvmet_req_complete(&cmd->req,
- NVME_SC_INVALID_FIELD | NVME_SC_DNR);
- return -EPROTO;
+ goto err_proto;
}
+ exp_data_len = le32_to_cpu(data->hdr.plen) -
+ nvmet_tcp_hdgst_len(queue) -
+ nvmet_tcp_ddgst_len(queue) -
+ sizeof(*data);
+
cmd->pdu_len = le32_to_cpu(data->data_length);
+ if (unlikely(cmd->pdu_len != exp_data_len ||
+ cmd->pdu_len == 0 ||
+ cmd->pdu_len > NVMET_TCP_MAXH2CDATA)) {
+ pr_err("H2CData PDU len %u is invalid\n", cmd->pdu_len);
+ goto err_proto;
+ }
cmd->pdu_recv = 0;
nvmet_tcp_build_pdu_iovec(cmd);
queue->cmd = cmd;
queue->rcv_state = NVMET_TCP_RECV_DATA;
return 0;
+
+err_proto:
+ /* FIXME: use proper transport errors */
+ nvmet_tcp_fatal_error(queue);
+ return -EPROTO;
}
static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
@@ -1768,7 +1783,7 @@ static int nvmet_tcp_try_peek_pdu(struct nvmet_tcp_queue *queue)
(int)sizeof(struct nvme_tcp_icreq_pdu));
if (hdr->type == nvme_tcp_icreq &&
hdr->hlen == sizeof(struct nvme_tcp_icreq_pdu) &&
- hdr->plen == (__le32)sizeof(struct nvme_tcp_icreq_pdu)) {
+ hdr->plen == cpu_to_le32(sizeof(struct nvme_tcp_icreq_pdu))) {
pr_debug("queue %d: icreq detected\n",
queue->idx);
return len;
@@ -2053,7 +2068,7 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport)
goto err_sock;
}
- ret = kernel_listen(port->sock, 128);
+ ret = kernel_listen(port->sock, NVMET_TCP_BACKLOG);
if (ret) {
pr_err("failed to listen %d on port sock\n", ret);
goto err_sock;
@@ -2119,8 +2134,19 @@ static u16 nvmet_tcp_install_queue(struct nvmet_sq *sq)
container_of(sq, struct nvmet_tcp_queue, nvme_sq);
if (sq->qid == 0) {
- /* Let inflight controller teardown complete */
- flush_workqueue(nvmet_wq);
+ struct nvmet_tcp_queue *q;
+ int pending = 0;
+
+ /* Check for pending controller teardown */
+ mutex_lock(&nvmet_tcp_queue_mutex);
+ list_for_each_entry(q, &nvmet_tcp_queue_list, queue_list) {
+ if (q->nvme_sq.ctrl == sq->ctrl &&
+ q->state == NVMET_TCP_Q_DISCONNECTING)
+ pending++;
+ }
+ mutex_unlock(&nvmet_tcp_queue_mutex);
+ if (pending > NVMET_TCP_BACKLOG)
+ return NVME_SC_CONNECT_CTRL_BUSY;
}
queue->nr_cmds = sq->size * 2;
diff --git a/drivers/nvme/target/trace.c b/drivers/nvme/target/trace.c
index bff454d46255..6ee1f3db81d0 100644
--- a/drivers/nvme/target/trace.c
+++ b/drivers/nvme/target/trace.c
@@ -211,7 +211,7 @@ const char *nvmet_trace_disk_name(struct trace_seq *p, char *name)
return ret;
}
-const char *nvmet_trace_ctrl_name(struct trace_seq *p, struct nvmet_ctrl *ctrl)
+const char *nvmet_trace_ctrl_id(struct trace_seq *p, u16 ctrl_id)
{
const char *ret = trace_seq_buffer_ptr(p);
@@ -224,8 +224,8 @@ const char *nvmet_trace_ctrl_name(struct trace_seq *p, struct nvmet_ctrl *ctrl)
* If we can know the extra data of the connect command in this stage,
* we can update this print statement later.
*/
- if (ctrl)
- trace_seq_printf(p, "%d", ctrl->cntlid);
+ if (ctrl_id)
+ trace_seq_printf(p, "%d", ctrl_id);
else
trace_seq_printf(p, "_");
trace_seq_putc(p, 0);
diff --git a/drivers/nvme/target/trace.h b/drivers/nvme/target/trace.h
index 6109b3806b12..7f7ebf9558e5 100644
--- a/drivers/nvme/target/trace.h
+++ b/drivers/nvme/target/trace.h
@@ -32,18 +32,24 @@ const char *nvmet_trace_parse_fabrics_cmd(struct trace_seq *p, u8 fctype,
nvmet_trace_parse_nvm_cmd(p, opcode, cdw10) : \
nvmet_trace_parse_admin_cmd(p, opcode, cdw10)))
-const char *nvmet_trace_ctrl_name(struct trace_seq *p, struct nvmet_ctrl *ctrl);
-#define __print_ctrl_name(ctrl) \
- nvmet_trace_ctrl_name(p, ctrl)
+const char *nvmet_trace_ctrl_id(struct trace_seq *p, u16 ctrl_id);
+#define __print_ctrl_id(ctrl_id) \
+ nvmet_trace_ctrl_id(p, ctrl_id)
const char *nvmet_trace_disk_name(struct trace_seq *p, char *name);
#define __print_disk_name(name) \
nvmet_trace_disk_name(p, name)
#ifndef TRACE_HEADER_MULTI_READ
-static inline struct nvmet_ctrl *nvmet_req_to_ctrl(struct nvmet_req *req)
+static inline u16 nvmet_req_to_ctrl_id(struct nvmet_req *req)
{
- return req->sq->ctrl;
+ /*
+ * The queue and controller pointers are not valid until an association
+ * has been established.
+ */
+ if (!req->sq || !req->sq->ctrl)
+ return 0;
+ return req->sq->ctrl->cntlid;
}
static inline void __assign_req_name(char *name, struct nvmet_req *req)
@@ -53,8 +59,7 @@ static inline void __assign_req_name(char *name, struct nvmet_req *req)
return;
}
- strncpy(name, req->ns->device_path,
- min_t(size_t, DISK_NAME_LEN, strlen(req->ns->device_path)));
+ strscpy_pad(name, req->ns->device_path, DISK_NAME_LEN);
}
#endif
@@ -63,7 +68,7 @@ TRACE_EVENT(nvmet_req_init,
TP_ARGS(req, cmd),
TP_STRUCT__entry(
__field(struct nvme_command *, cmd)
- __field(struct nvmet_ctrl *, ctrl)
+ __field(u16, ctrl_id)
__array(char, disk, DISK_NAME_LEN)
__field(int, qid)
__field(u16, cid)
@@ -76,7 +81,7 @@ TRACE_EVENT(nvmet_req_init,
),
TP_fast_assign(
__entry->cmd = cmd;
- __entry->ctrl = nvmet_req_to_ctrl(req);
+ __entry->ctrl_id = nvmet_req_to_ctrl_id(req);
__assign_req_name(__entry->disk, req);
__entry->qid = req->sq->qid;
__entry->cid = cmd->common.command_id;
@@ -85,12 +90,12 @@ TRACE_EVENT(nvmet_req_init,
__entry->flags = cmd->common.flags;
__entry->nsid = le32_to_cpu(cmd->common.nsid);
__entry->metadata = le64_to_cpu(cmd->common.metadata);
- memcpy(__entry->cdw10, &cmd->common.cdw10,
+ memcpy(__entry->cdw10, &cmd->common.cdws,
sizeof(__entry->cdw10));
),
TP_printk("nvmet%s: %sqid=%d, cmdid=%u, nsid=%u, flags=%#x, "
"meta=%#llx, cmd=(%s, %s)",
- __print_ctrl_name(__entry->ctrl),
+ __print_ctrl_id(__entry->ctrl_id),
__print_disk_name(__entry->disk),
__entry->qid, __entry->cid, __entry->nsid,
__entry->flags, __entry->metadata,
@@ -104,7 +109,7 @@ TRACE_EVENT(nvmet_req_complete,
TP_PROTO(struct nvmet_req *req),
TP_ARGS(req),
TP_STRUCT__entry(
- __field(struct nvmet_ctrl *, ctrl)
+ __field(u16, ctrl_id)
__array(char, disk, DISK_NAME_LEN)
__field(int, qid)
__field(int, cid)
@@ -112,7 +117,7 @@ TRACE_EVENT(nvmet_req_complete,
__field(u16, status)
),
TP_fast_assign(
- __entry->ctrl = nvmet_req_to_ctrl(req);
+ __entry->ctrl_id = nvmet_req_to_ctrl_id(req);
__entry->qid = req->cq->qid;
__entry->cid = req->cqe->command_id;
__entry->result = le64_to_cpu(req->cqe->result.u64);
@@ -120,7 +125,7 @@ TRACE_EVENT(nvmet_req_complete,
__assign_req_name(__entry->disk, req);
),
TP_printk("nvmet%s: %sqid=%d, cmdid=%u, res=%#llx, status=%#x",
- __print_ctrl_name(__entry->ctrl),
+ __print_ctrl_id(__entry->ctrl_id),
__print_disk_name(__entry->disk),
__entry->qid, __entry->cid, __entry->result, __entry->status)