From 7a6691f1f89784f775fa0c54be57533445726068 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 18 May 2022 16:37:59 +0300 Subject: vdpa: Fix error logic in vdpa_nl_cmd_dev_get_doit In vdpa_nl_cmd_dev_get_doit(), if the call to genlmsg_reply() fails we must not call nlmsg_free() since this is done inside genlmsg_reply(). Fix it. Fixes: bc0d90ee021f ("vdpa: Enable user to query vdpa device info") Reviewed-by: Si-Wei Liu Acked-by: Jason Wang Signed-off-by: Eli Cohen Message-Id: <20220518133804.1075129-2-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 2b75c00b1005..fac89a0d8178 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -756,14 +756,19 @@ static int vdpa_nl_cmd_dev_get_doit(struct sk_buff *skb, struct genl_info *info) goto mdev_err; } err = vdpa_dev_fill(vdev, msg, info->snd_portid, info->snd_seq, 0, info->extack); - if (!err) - err = genlmsg_reply(msg, info); + if (err) + goto mdev_err; + + err = genlmsg_reply(msg, info); + put_device(dev); + mutex_unlock(&vdpa_dev_mutex); + return err; + mdev_err: put_device(dev); err: mutex_unlock(&vdpa_dev_mutex); - if (err) - nlmsg_free(msg); + nlmsg_free(msg); return err; } -- cgit v1.2.3 From 13b00b135665c92065a27c0c39dd97e0f380bd4f Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 18 May 2022 16:38:00 +0300 Subject: vdpa: Add support for querying vendor statistics Allows to read vendor statistics of a vdpa device. The specific statistics data are received from the upstream driver in the form of an (attribute name, attribute value) pairs. An example of statistics for mlx5_vdpa device are: received_desc - number of descriptors received by the virtqueue completed_desc - number of descriptors completed by the virtqueue A descriptor using indirect buffers is still counted as 1. In addition, N chained descriptors are counted correctly N times as one would expect. A new callback was added to vdpa_config_ops which provides the means for the vdpa driver to return statistics results. The interface allows for reading all the supported virtqueues, including the control virtqueue if it exists. Below are some examples taken from mlx5_vdpa which are introduced in the following patch: 1. Read statistics for the virtqueue at index 1 $ vdpa dev vstats show vdpa-a qidx 1 vdpa-a: queue_type tx queue_index 1 received_desc 3844836 completed_desc 3844836 2. Read statistics for the virtqueue at index 32 $ vdpa dev vstats show vdpa-a qidx 32 vdpa-a: queue_type control_vq queue_index 32 received_desc 62 completed_desc 62 3. Read statisitics for the virtqueue at index 0 with json output $ vdpa -j dev vstats show vdpa-a qidx 0 {"vstats":{"vdpa-a":{ "queue_type":"rx","queue_index":0,"name":"received_desc","value":417776,\ "name":"completed_desc","value":417548}}} 4. Read statistics for the virtqueue at index 0 with preety json output $ vdpa -jp dev vstats show vdpa-a qidx 0 { "vstats": { "vdpa-a": { "queue_type": "rx", "queue_index": 0, "name": "received_desc", "value": 417776, "name": "completed_desc", "value": 417548 } } } Signed-off-by: Eli Cohen Message-Id: <20220518133804.1075129-3-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 162 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/vdpa.h | 3 + include/uapi/linux/vdpa.h | 6 ++ 3 files changed, 171 insertions(+) (limited to 'drivers') diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index fac89a0d8178..31b5eb2c0778 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -914,6 +914,108 @@ out: return err; } +static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct sk_buff *msg, + struct genl_info *info, u32 index) +{ + struct virtio_net_config config = {}; + u64 features; + u16 max_vqp; + u8 status; + int err; + + status = vdev->config->get_status(vdev); + if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) { + NL_SET_ERR_MSG_MOD(info->extack, "feature negotiation not complete"); + return -EAGAIN; + } + vdpa_get_config_unlocked(vdev, 0, &config, sizeof(config)); + + max_vqp = le16_to_cpu(config.max_virtqueue_pairs); + if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MAX_VQP, max_vqp)) + return -EMSGSIZE; + + features = vdev->config->get_driver_features(vdev); + if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_NEGOTIATED_FEATURES, + features, VDPA_ATTR_PAD)) + return -EMSGSIZE; + + if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index)) + return -EMSGSIZE; + + err = vdev->config->get_vendor_vq_stats(vdev, index, msg, info->extack); + if (err) + return err; + + return 0; +} + +static int vendor_stats_fill(struct vdpa_device *vdev, struct sk_buff *msg, + struct genl_info *info, u32 index) +{ + int err; + + mutex_lock(&vdev->cf_mutex); + if (!vdev->config->get_vendor_vq_stats) { + err = -EOPNOTSUPP; + goto out; + } + + err = vdpa_fill_stats_rec(vdev, msg, info, index); +out: + mutex_unlock(&vdev->cf_mutex); + return err; +} + +static int vdpa_dev_vendor_stats_fill(struct vdpa_device *vdev, + struct sk_buff *msg, + struct genl_info *info, u32 index) +{ + u32 device_id; + void *hdr; + int err; + u32 portid = info->snd_portid; + u32 seq = info->snd_seq; + u32 flags = 0; + + hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags, + VDPA_CMD_DEV_VSTATS_GET); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev))) { + err = -EMSGSIZE; + goto undo_msg; + } + + device_id = vdev->config->get_device_id(vdev); + if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) { + err = -EMSGSIZE; + goto undo_msg; + } + + switch (device_id) { + case VIRTIO_ID_NET: + if (index > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) { + NL_SET_ERR_MSG_MOD(info->extack, "queue index excceeds max value"); + err = -ERANGE; + break; + } + + err = vendor_stats_fill(vdev, msg, info, index); + break; + default: + err = -EOPNOTSUPP; + break; + } + genlmsg_end(msg, hdr); + + return err; + +undo_msg: + genlmsg_cancel(msg, hdr); + return err; +} + static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff *skb, struct genl_info *info) { struct vdpa_device *vdev; @@ -995,6 +1097,60 @@ vdpa_nl_cmd_dev_config_get_dumpit(struct sk_buff *msg, struct netlink_callback * return msg->len; } +static int vdpa_nl_cmd_dev_stats_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct vdpa_device *vdev; + struct sk_buff *msg; + const char *devname; + struct device *dev; + u32 index; + int err; + + if (!info->attrs[VDPA_ATTR_DEV_NAME]) + return -EINVAL; + + if (!info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX]) + return -EINVAL; + + devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + index = nla_get_u32(info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX]); + mutex_lock(&vdpa_dev_mutex); + dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match); + if (!dev) { + NL_SET_ERR_MSG_MOD(info->extack, "device not found"); + err = -ENODEV; + goto dev_err; + } + vdev = container_of(dev, struct vdpa_device, dev); + if (!vdev->mdev) { + NL_SET_ERR_MSG_MOD(info->extack, "unmanaged vdpa device"); + err = -EINVAL; + goto mdev_err; + } + err = vdpa_dev_vendor_stats_fill(vdev, msg, info, index); + if (err) + goto mdev_err; + + err = genlmsg_reply(msg, info); + + put_device(dev); + mutex_unlock(&vdpa_dev_mutex); + + return err; + +mdev_err: + put_device(dev); +dev_err: + nlmsg_free(msg); + mutex_unlock(&vdpa_dev_mutex); + return err; +} + static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = { [VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type = NLA_NUL_STRING }, [VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING }, @@ -1035,6 +1191,12 @@ static const struct genl_ops vdpa_nl_ops[] = { .doit = vdpa_nl_cmd_dev_config_get_doit, .dumpit = vdpa_nl_cmd_dev_config_get_dumpit, }, + { + .cmd = VDPA_CMD_DEV_VSTATS_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = vdpa_nl_cmd_dev_stats_get_doit, + .flags = GENL_ADMIN_PERM, + }, }; static struct genl_family vdpa_nl_family __ro_after_init = { diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 8943a209202e..2ae8443331e1 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -276,6 +276,9 @@ struct vdpa_config_ops { const struct vdpa_vq_state *state); int (*get_vq_state)(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state); + int (*get_vendor_vq_stats)(struct vdpa_device *vdev, u16 idx, + struct sk_buff *msg, + struct netlink_ext_ack *extack); struct vdpa_notification_area (*get_vq_notification)(struct vdpa_device *vdev, u16 idx); /* vq irq is not expected to be changed once DRIVER_OK is set */ diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index 1061d8d2d09d..25c55cab3d7c 100644 --- a/include/uapi/linux/vdpa.h +++ b/include/uapi/linux/vdpa.h @@ -18,6 +18,7 @@ enum vdpa_command { VDPA_CMD_DEV_DEL, VDPA_CMD_DEV_GET, /* can dump */ VDPA_CMD_DEV_CONFIG_GET, /* can dump */ + VDPA_CMD_DEV_VSTATS_GET, }; enum vdpa_attr { @@ -46,6 +47,11 @@ enum vdpa_attr { VDPA_ATTR_DEV_NEGOTIATED_FEATURES, /* u64 */ VDPA_ATTR_DEV_MGMTDEV_MAX_VQS, /* u32 */ VDPA_ATTR_DEV_SUPPORTED_FEATURES, /* u64 */ + + VDPA_ATTR_DEV_QUEUE_INDEX, /* u32 */ + VDPA_ATTR_DEV_VENDOR_ATTR_NAME, /* string */ + VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, /* u64 */ + /* new attributes must be added above here */ VDPA_ATTR_MAX, }; -- cgit v1.2.3 From 0078ad905dc8eada34461312a0060b0904c57e2d Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 18 May 2022 16:38:01 +0300 Subject: net/vdpa: Use readers/writers semaphore instead of vdpa_dev_mutex Use rw_semaphore instead of mutex to control access to vdpa devices. This can be especially beneficial in case processes poll on statistics information. Suggested-by: Si-Wei Liu Reviewed-by: Si-Wei Liu Acked-by: Jason Wang Signed-off-by: Eli Cohen Message-Id: <20220518133804.1075129-4-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 64 ++++++++++++++++++++++++++--------------------------- 1 file changed, 32 insertions(+), 32 deletions(-) (limited to 'drivers') diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 31b5eb2c0778..2ff7de5e6b2f 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -18,7 +18,7 @@ static LIST_HEAD(mdev_head); /* A global mutex that protects vdpa management device and device level operations. */ -static DEFINE_MUTEX(vdpa_dev_mutex); +static DECLARE_RWSEM(vdpa_dev_lock); static DEFINE_IDA(vdpa_index_ida); void vdpa_set_status(struct vdpa_device *vdev, u8 status) @@ -238,7 +238,7 @@ static int __vdpa_register_device(struct vdpa_device *vdev, u32 nvqs) vdev->nvqs = nvqs; - lockdep_assert_held(&vdpa_dev_mutex); + lockdep_assert_held(&vdpa_dev_lock); dev = bus_find_device(&vdpa_bus, NULL, dev_name(&vdev->dev), vdpa_name_match); if (dev) { put_device(dev); @@ -278,9 +278,9 @@ int vdpa_register_device(struct vdpa_device *vdev, u32 nvqs) { int err; - mutex_lock(&vdpa_dev_mutex); + down_write(&vdpa_dev_lock); err = __vdpa_register_device(vdev, nvqs); - mutex_unlock(&vdpa_dev_mutex); + up_write(&vdpa_dev_lock); return err; } EXPORT_SYMBOL_GPL(vdpa_register_device); @@ -293,7 +293,7 @@ EXPORT_SYMBOL_GPL(vdpa_register_device); */ void _vdpa_unregister_device(struct vdpa_device *vdev) { - lockdep_assert_held(&vdpa_dev_mutex); + lockdep_assert_held(&vdpa_dev_lock); WARN_ON(!vdev->mdev); device_unregister(&vdev->dev); } @@ -305,9 +305,9 @@ EXPORT_SYMBOL_GPL(_vdpa_unregister_device); */ void vdpa_unregister_device(struct vdpa_device *vdev) { - mutex_lock(&vdpa_dev_mutex); + down_write(&vdpa_dev_lock); device_unregister(&vdev->dev); - mutex_unlock(&vdpa_dev_mutex); + up_write(&vdpa_dev_lock); } EXPORT_SYMBOL_GPL(vdpa_unregister_device); @@ -352,9 +352,9 @@ int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev) return -EINVAL; INIT_LIST_HEAD(&mdev->list); - mutex_lock(&vdpa_dev_mutex); + down_write(&vdpa_dev_lock); list_add_tail(&mdev->list, &mdev_head); - mutex_unlock(&vdpa_dev_mutex); + up_write(&vdpa_dev_lock); return 0; } EXPORT_SYMBOL_GPL(vdpa_mgmtdev_register); @@ -371,14 +371,14 @@ static int vdpa_match_remove(struct device *dev, void *data) void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev) { - mutex_lock(&vdpa_dev_mutex); + down_write(&vdpa_dev_lock); list_del(&mdev->list); /* Filter out all the entries belong to this management device and delete it. */ bus_for_each_dev(&vdpa_bus, NULL, mdev, vdpa_match_remove); - mutex_unlock(&vdpa_dev_mutex); + up_write(&vdpa_dev_lock); } EXPORT_SYMBOL_GPL(vdpa_mgmtdev_unregister); @@ -532,17 +532,17 @@ static int vdpa_nl_cmd_mgmtdev_get_doit(struct sk_buff *skb, struct genl_info *i if (!msg) return -ENOMEM; - mutex_lock(&vdpa_dev_mutex); + down_read(&vdpa_dev_lock); mdev = vdpa_mgmtdev_get_from_attr(info->attrs); if (IS_ERR(mdev)) { - mutex_unlock(&vdpa_dev_mutex); + up_read(&vdpa_dev_lock); NL_SET_ERR_MSG_MOD(info->extack, "Fail to find the specified mgmt device"); err = PTR_ERR(mdev); goto out; } err = vdpa_mgmtdev_fill(mdev, msg, info->snd_portid, info->snd_seq, 0); - mutex_unlock(&vdpa_dev_mutex); + up_read(&vdpa_dev_lock); if (err) goto out; err = genlmsg_reply(msg, info); @@ -561,7 +561,7 @@ vdpa_nl_cmd_mgmtdev_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) int idx = 0; int err; - mutex_lock(&vdpa_dev_mutex); + down_read(&vdpa_dev_lock); list_for_each_entry(mdev, &mdev_head, list) { if (idx < start) { idx++; @@ -574,7 +574,7 @@ vdpa_nl_cmd_mgmtdev_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) idx++; } out: - mutex_unlock(&vdpa_dev_mutex); + up_read(&vdpa_dev_lock); cb->args[0] = idx; return msg->len; } @@ -627,7 +627,7 @@ static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *i !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - mutex_lock(&vdpa_dev_mutex); + down_write(&vdpa_dev_lock); mdev = vdpa_mgmtdev_get_from_attr(info->attrs); if (IS_ERR(mdev)) { NL_SET_ERR_MSG_MOD(info->extack, "Fail to find the specified management device"); @@ -643,7 +643,7 @@ static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *i err = mdev->ops->dev_add(mdev, name, &config); err: - mutex_unlock(&vdpa_dev_mutex); + up_write(&vdpa_dev_lock); return err; } @@ -659,7 +659,7 @@ static int vdpa_nl_cmd_dev_del_set_doit(struct sk_buff *skb, struct genl_info *i return -EINVAL; name = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); - mutex_lock(&vdpa_dev_mutex); + down_write(&vdpa_dev_lock); dev = bus_find_device(&vdpa_bus, NULL, name, vdpa_name_match); if (!dev) { NL_SET_ERR_MSG_MOD(info->extack, "device not found"); @@ -677,7 +677,7 @@ static int vdpa_nl_cmd_dev_del_set_doit(struct sk_buff *skb, struct genl_info *i mdev_err: put_device(dev); dev_err: - mutex_unlock(&vdpa_dev_mutex); + up_write(&vdpa_dev_lock); return err; } @@ -743,7 +743,7 @@ static int vdpa_nl_cmd_dev_get_doit(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - mutex_lock(&vdpa_dev_mutex); + down_read(&vdpa_dev_lock); dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match); if (!dev) { NL_SET_ERR_MSG_MOD(info->extack, "device not found"); @@ -761,13 +761,13 @@ static int vdpa_nl_cmd_dev_get_doit(struct sk_buff *skb, struct genl_info *info) err = genlmsg_reply(msg, info); put_device(dev); - mutex_unlock(&vdpa_dev_mutex); + up_read(&vdpa_dev_lock); return err; mdev_err: put_device(dev); err: - mutex_unlock(&vdpa_dev_mutex); + up_read(&vdpa_dev_lock); nlmsg_free(msg); return err; } @@ -809,9 +809,9 @@ static int vdpa_nl_cmd_dev_get_dumpit(struct sk_buff *msg, struct netlink_callba info.start_idx = cb->args[0]; info.idx = 0; - mutex_lock(&vdpa_dev_mutex); + down_read(&vdpa_dev_lock); bus_for_each_dev(&vdpa_bus, NULL, &info, vdpa_dev_dump); - mutex_unlock(&vdpa_dev_mutex); + up_read(&vdpa_dev_lock); cb->args[0] = info.idx; return msg->len; } @@ -1031,7 +1031,7 @@ static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff *skb, struct genl_info if (!msg) return -ENOMEM; - mutex_lock(&vdpa_dev_mutex); + down_read(&vdpa_dev_lock); dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match); if (!dev) { NL_SET_ERR_MSG_MOD(info->extack, "device not found"); @@ -1052,7 +1052,7 @@ static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff *skb, struct genl_info mdev_err: put_device(dev); dev_err: - mutex_unlock(&vdpa_dev_mutex); + up_read(&vdpa_dev_lock); if (err) nlmsg_free(msg); return err; @@ -1090,9 +1090,9 @@ vdpa_nl_cmd_dev_config_get_dumpit(struct sk_buff *msg, struct netlink_callback * info.start_idx = cb->args[0]; info.idx = 0; - mutex_lock(&vdpa_dev_mutex); + down_read(&vdpa_dev_lock); bus_for_each_dev(&vdpa_bus, NULL, &info, vdpa_dev_config_dump); - mutex_unlock(&vdpa_dev_mutex); + up_read(&vdpa_dev_lock); cb->args[0] = info.idx; return msg->len; } @@ -1119,7 +1119,7 @@ static int vdpa_nl_cmd_dev_stats_get_doit(struct sk_buff *skb, return -ENOMEM; index = nla_get_u32(info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX]); - mutex_lock(&vdpa_dev_mutex); + down_read(&vdpa_dev_lock); dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match); if (!dev) { NL_SET_ERR_MSG_MOD(info->extack, "device not found"); @@ -1139,7 +1139,7 @@ static int vdpa_nl_cmd_dev_stats_get_doit(struct sk_buff *skb, err = genlmsg_reply(msg, info); put_device(dev); - mutex_unlock(&vdpa_dev_mutex); + up_read(&vdpa_dev_lock); return err; @@ -1147,7 +1147,7 @@ mdev_err: put_device(dev); dev_err: nlmsg_free(msg); - mutex_unlock(&vdpa_dev_mutex); + up_read(&vdpa_dev_lock); return err; } -- cgit v1.2.3 From a6a51adc6e8aafebfe0c4beb80e99694ea562b40 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 18 May 2022 16:38:02 +0300 Subject: net/vdpa: Use readers/writers semaphore instead of cf_mutex Replace cf_mutex with rw_semaphore to reflect the fact that some calls could be called concurrently but can suffice with read lock. Suggested-by: Si-Wei Liu Signed-off-by: Eli Cohen Message-Id: <20220518133804.1075129-5-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 25 ++++++++++++------------- include/linux/vdpa.h | 12 ++++++------ 2 files changed, 18 insertions(+), 19 deletions(-) (limited to 'drivers') diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 2ff7de5e6b2f..9d3534a0bc5f 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -23,9 +23,9 @@ static DEFINE_IDA(vdpa_index_ida); void vdpa_set_status(struct vdpa_device *vdev, u8 status) { - mutex_lock(&vdev->cf_mutex); + down_write(&vdev->cf_lock); vdev->config->set_status(vdev, status); - mutex_unlock(&vdev->cf_mutex); + up_write(&vdev->cf_lock); } EXPORT_SYMBOL(vdpa_set_status); @@ -148,7 +148,6 @@ static void vdpa_release_dev(struct device *d) ops->free(vdev); ida_simple_remove(&vdpa_index_ida, vdev->index); - mutex_destroy(&vdev->cf_mutex); kfree(vdev->driver_override); kfree(vdev); } @@ -211,7 +210,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, if (err) goto err_name; - mutex_init(&vdev->cf_mutex); + init_rwsem(&vdev->cf_lock); device_initialize(&vdev->dev); return vdev; @@ -407,9 +406,9 @@ static void vdpa_get_config_unlocked(struct vdpa_device *vdev, void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf, unsigned int len) { - mutex_lock(&vdev->cf_mutex); + down_read(&vdev->cf_lock); vdpa_get_config_unlocked(vdev, offset, buf, len); - mutex_unlock(&vdev->cf_mutex); + up_read(&vdev->cf_lock); } EXPORT_SYMBOL_GPL(vdpa_get_config); @@ -423,9 +422,9 @@ EXPORT_SYMBOL_GPL(vdpa_get_config); void vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf, unsigned int length) { - mutex_lock(&vdev->cf_mutex); + down_write(&vdev->cf_lock); vdev->config->set_config(vdev, offset, buf, length); - mutex_unlock(&vdev->cf_mutex); + up_write(&vdev->cf_lock); } EXPORT_SYMBOL_GPL(vdpa_set_config); @@ -866,7 +865,7 @@ vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u8 status; int err; - mutex_lock(&vdev->cf_mutex); + down_read(&vdev->cf_lock); status = vdev->config->get_status(vdev); if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) { NL_SET_ERR_MSG_MOD(extack, "Features negotiation not completed"); @@ -903,14 +902,14 @@ vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, if (err) goto msg_err; - mutex_unlock(&vdev->cf_mutex); + up_read(&vdev->cf_lock); genlmsg_end(msg, hdr); return 0; msg_err: genlmsg_cancel(msg, hdr); out: - mutex_unlock(&vdev->cf_mutex); + up_read(&vdev->cf_lock); return err; } @@ -954,7 +953,7 @@ static int vendor_stats_fill(struct vdpa_device *vdev, struct sk_buff *msg, { int err; - mutex_lock(&vdev->cf_mutex); + down_read(&vdev->cf_lock); if (!vdev->config->get_vendor_vq_stats) { err = -EOPNOTSUPP; goto out; @@ -962,7 +961,7 @@ static int vendor_stats_fill(struct vdpa_device *vdev, struct sk_buff *msg, err = vdpa_fill_stats_rec(vdev, msg, info, index); out: - mutex_unlock(&vdev->cf_mutex); + up_read(&vdev->cf_lock); return err; } diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 2ae8443331e1..2cb14847831e 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -66,7 +66,7 @@ struct vdpa_mgmt_dev; * @dma_dev: the actual device that is performing DMA * @driver_override: driver name to force a match * @config: the configuration ops for this device. - * @cf_mutex: Protects get and set access to configuration layout. + * @cf_lock: Protects get and set access to configuration layout. * @index: device index * @features_valid: were features initialized? for legacy guests * @use_va: indicate whether virtual address must be used by this device @@ -79,7 +79,7 @@ struct vdpa_device { struct device *dma_dev; const char *driver_override; const struct vdpa_config_ops *config; - struct mutex cf_mutex; /* Protects get/set config */ + struct rw_semaphore cf_lock; /* Protects get/set config */ unsigned int index; bool features_valid; bool use_va; @@ -398,10 +398,10 @@ static inline int vdpa_reset(struct vdpa_device *vdev) const struct vdpa_config_ops *ops = vdev->config; int ret; - mutex_lock(&vdev->cf_mutex); + down_write(&vdev->cf_lock); vdev->features_valid = false; ret = ops->reset(vdev); - mutex_unlock(&vdev->cf_mutex); + up_write(&vdev->cf_lock); return ret; } @@ -420,9 +420,9 @@ static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features) { int ret; - mutex_lock(&vdev->cf_mutex); + down_write(&vdev->cf_lock); ret = vdpa_set_features_unlocked(vdev, features); - mutex_unlock(&vdev->cf_mutex); + up_write(&vdev->cf_lock); return ret; } -- cgit v1.2.3 From 1892a3d425bf525ac98d6d3534035e6ed2bfab50 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 18 May 2022 16:38:03 +0300 Subject: vdpa/mlx5: Add support for reading descriptor statistics Implement the get_vq_stats calback of vdpa_config_ops to return the statistics for a virtqueue. The statistics are provided as vendor specific statistics where the driver provides a pair of attribute name and attribute value. Currently supported are received descriptors and completed descriptors. Signed-off-by: Eli Cohen Message-Id: <20220518133804.1075129-6-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/core/mlx5_vdpa.h | 2 + drivers/vdpa/mlx5/net/mlx5_vnet.c | 149 +++++++++++++++++++++++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 1 + include/linux/mlx5/mlx5_ifc_vdpa.h | 39 ++++++++++ 4 files changed, 191 insertions(+) (limited to 'drivers') diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h index daaf7b503677..44104093163b 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -61,6 +61,8 @@ struct mlx5_control_vq { struct vringh_kiov riov; struct vringh_kiov wiov; unsigned short head; + unsigned int received_desc; + unsigned int completed_desc; }; struct mlx5_vdpa_wq_ent { diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index e0de44000d92..2b815ef850c8 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -119,6 +119,7 @@ struct mlx5_vdpa_virtqueue { struct mlx5_vdpa_umem umem2; struct mlx5_vdpa_umem umem3; + u32 counter_set_id; bool initialized; int index; u32 virtq_id; @@ -818,6 +819,12 @@ static u16 get_features_12_3(u64 features) (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6); } +static bool counters_supported(const struct mlx5_vdpa_dev *mvdev) +{ + return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) & + BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); +} + static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) { int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in); @@ -872,6 +879,8 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id); MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size); MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn); + if (counters_supported(&ndev->mvdev)) + MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id); err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); if (err) @@ -1135,6 +1144,47 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque return err; } +static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {}; + u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {}; + void *cmd_hdr; + int err; + + if (!counters_supported(&ndev->mvdev)) + return 0; + + cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr); + + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); + + err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return 0; +} + +static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {}; + + if (!counters_supported(&ndev->mvdev)) + return; + + MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id); + MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid); + MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); + if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) + mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id); +} + static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) { u16 idx = mvq->index; @@ -1162,6 +1212,10 @@ static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) if (err) goto err_connect; + err = counter_set_alloc(ndev, mvq); + if (err) + goto err_counter; + err = create_virtqueue(ndev, mvq); if (err) goto err_connect; @@ -1179,6 +1233,8 @@ static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) return 0; err_connect: + counter_set_dealloc(ndev, mvq); +err_counter: qp_destroy(ndev, &mvq->vqqp); err_vqqp: qp_destroy(ndev, &mvq->fwqp); @@ -1223,6 +1279,7 @@ static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue * suspend_vq(ndev, mvq); destroy_virtqueue(ndev, mvq); + counter_set_dealloc(ndev, mvq); qp_destroy(ndev, &mvq->vqqp); qp_destroy(ndev, &mvq->fwqp); cq_destroy(ndev, mvq->index); @@ -1659,6 +1716,7 @@ static void mlx5_cvq_kick_handler(struct work_struct *work) if (read != sizeof(ctrl)) break; + cvq->received_desc++; switch (ctrl.class) { case VIRTIO_NET_CTRL_MAC: status = handle_ctrl_mac(mvdev, ctrl.cmd); @@ -1682,6 +1740,7 @@ static void mlx5_cvq_kick_handler(struct work_struct *work) if (vringh_need_notify_iotlb(&cvq->vring)) vringh_notify(&cvq->vring); + cvq->completed_desc++; queue_work(mvdev->wq, &wqent->work); break; } @@ -2303,6 +2362,8 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev) mlx5_vdpa_destroy_mr(&ndev->mvdev); ndev->mvdev.status = 0; ndev->cur_num_vqs = 0; + ndev->mvdev.cvq.received_desc = 0; + ndev->mvdev.cvq.completed_desc = 0; memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1)); ndev->mvdev.actual_features = 0; ++mvdev->generation; @@ -2422,6 +2483,93 @@ static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev) return mvdev->actual_features; } +static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, + u64 *received_desc, u64 *completed_desc) +{ + u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {}; + u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {}; + void *cmd_hdr; + void *ctx; + int err; + + if (!counters_supported(&ndev->mvdev)) + return -EOPNOTSUPP; + + if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) + return -EAGAIN; + + cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr); + + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id); + + err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters); + *received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc); + *completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc); + return 0; +} + +static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx, + struct sk_buff *msg, + struct netlink_ext_ack *extack) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq; + struct mlx5_control_vq *cvq; + u64 received_desc; + u64 completed_desc; + int err = 0; + + mutex_lock(&ndev->reslock); + if (!is_index_valid(mvdev, idx)) { + NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid"); + err = -EINVAL; + goto out_err; + } + + if (idx == ctrl_vq_idx(mvdev)) { + cvq = &mvdev->cvq; + received_desc = cvq->received_desc; + completed_desc = cvq->completed_desc; + goto out; + } + + mvq = &ndev->vqs[idx]; + err = counter_set_query(ndev, mvq, &received_desc, &completed_desc); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "failed to query hardware"); + goto out_err; + } + +out: + err = -EMSGSIZE; + if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc")) + goto out_err; + + if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc, + VDPA_ATTR_PAD)) + goto out_err; + + if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc")) + goto out_err; + + if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc, + VDPA_ATTR_PAD)) + goto out_err; + + err = 0; +out_err: + mutex_unlock(&ndev->reslock); + return err; +} + static const struct vdpa_config_ops mlx5_vdpa_ops = { .set_vq_address = mlx5_vdpa_set_vq_address, .set_vq_num = mlx5_vdpa_set_vq_num, @@ -2431,6 +2579,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = { .get_vq_ready = mlx5_vdpa_get_vq_ready, .set_vq_state = mlx5_vdpa_set_vq_state, .get_vq_state = mlx5_vdpa_get_vq_state, + .get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats, .get_vq_notification = mlx5_get_vq_notification, .get_vq_irq = mlx5_get_vq_irq, .get_vq_align = mlx5_vdpa_get_vq_align, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 78b3d3465dd7..2a8334bb5f82 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -87,6 +87,7 @@ enum { enum { MLX5_OBJ_TYPE_GENEVE_TLV_OPT = 0x000b, MLX5_OBJ_TYPE_VIRTIO_NET_Q = 0x000d, + MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS = 0x001c, MLX5_OBJ_TYPE_MATCH_DEFINER = 0x0018, MLX5_OBJ_TYPE_MKEY = 0xff01, MLX5_OBJ_TYPE_QP = 0xff02, diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h index 1a9c9d94cb59..4414ed5b6ed2 100644 --- a/include/linux/mlx5/mlx5_ifc_vdpa.h +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h @@ -165,4 +165,43 @@ struct mlx5_ifc_modify_virtio_net_q_out_bits { struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; }; +struct mlx5_ifc_virtio_q_counters_bits { + u8 modify_field_select[0x40]; + u8 reserved_at_40[0x40]; + u8 received_desc[0x40]; + u8 completed_desc[0x40]; + u8 error_cqes[0x20]; + u8 bad_desc_errors[0x20]; + u8 exceed_max_chain[0x20]; + u8 invalid_buffer[0x20]; + u8 reserved_at_180[0x280]; +}; + +struct mlx5_ifc_create_virtio_q_counters_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; + struct mlx5_ifc_virtio_q_counters_bits virtio_q_counters; +}; + +struct mlx5_ifc_create_virtio_q_counters_out_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; + struct mlx5_ifc_virtio_q_counters_bits virtio_q_counters; +}; + +struct mlx5_ifc_destroy_virtio_q_counters_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; +}; + +struct mlx5_ifc_destroy_virtio_q_counters_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits hdr; +}; + +struct mlx5_ifc_query_virtio_q_counters_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; +}; + +struct mlx5_ifc_query_virtio_q_counters_out_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; + struct mlx5_ifc_virtio_q_counters_bits counters; +}; + #endif /* __MLX5_IFC_VDPA_H_ */ -- cgit v1.2.3 From 759ae7f9bf1e6b7f5c9c197d7207e2be1dfd74b1 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 18 May 2022 16:38:04 +0300 Subject: vdpa/mlx5: Use readers/writers semaphore instead of mutex Reading statistics could be done intensively and by several processes concurrently. Reader's lock is sufficient in this case. Change reslock from mutex to a rwsem. Suggested-by: Si-Wei Liu Signed-off-by: Eli Cohen Message-Id: <20220518133804.1075129-7-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 41 ++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 22 deletions(-) (limited to 'drivers') diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 2b815ef850c8..57cfc64248b7 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -155,7 +155,7 @@ struct mlx5_vdpa_net { * since memory map might change and we need to destroy and create * resources while driver in operational. */ - struct mutex reslock; + struct rw_semaphore reslock; struct mlx5_flow_table *rxft; struct mlx5_fc *rx_counter; struct mlx5_flow_handle *rx_rule_ucast; @@ -1695,7 +1695,7 @@ static void mlx5_cvq_kick_handler(struct work_struct *work) ndev = to_mlx5_vdpa_ndev(mvdev); cvq = &mvdev->cvq; - mutex_lock(&ndev->reslock); + down_write(&ndev->reslock); if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) goto out; @@ -1746,7 +1746,7 @@ static void mlx5_cvq_kick_handler(struct work_struct *work) } out: - mutex_unlock(&ndev->reslock); + up_write(&ndev->reslock); } static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) @@ -2244,7 +2244,7 @@ static int setup_driver(struct mlx5_vdpa_dev *mvdev) struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); int err; - WARN_ON(!mutex_is_locked(&ndev->reslock)); + WARN_ON(!rwsem_is_locked(&ndev->reslock)); if (ndev->setup) { mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n"); @@ -2292,7 +2292,7 @@ out: static void teardown_driver(struct mlx5_vdpa_net *ndev) { - WARN_ON(!mutex_is_locked(&ndev->reslock)); + WARN_ON(!rwsem_is_locked(&ndev->reslock)); if (!ndev->setup) return; @@ -2322,7 +2322,7 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) print_status(mvdev, status, true); - mutex_lock(&ndev->reslock); + down_write(&ndev->reslock); if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) { if (status & VIRTIO_CONFIG_S_DRIVER_OK) { @@ -2338,14 +2338,14 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) } ndev->mvdev.status = status; - mutex_unlock(&ndev->reslock); + up_write(&ndev->reslock); return; err_setup: mlx5_vdpa_destroy_mr(&ndev->mvdev); ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED; err_clear: - mutex_unlock(&ndev->reslock); + up_write(&ndev->reslock); } static int mlx5_vdpa_reset(struct vdpa_device *vdev) @@ -2356,7 +2356,7 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev) print_status(mvdev, 0, true); mlx5_vdpa_info(mvdev, "performing device reset\n"); - mutex_lock(&ndev->reslock); + down_write(&ndev->reslock); teardown_driver(ndev); clear_vqs_ready(ndev); mlx5_vdpa_destroy_mr(&ndev->mvdev); @@ -2371,7 +2371,7 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev) if (mlx5_vdpa_create_mr(mvdev, NULL)) mlx5_vdpa_warn(mvdev, "create MR failed\n"); } - mutex_unlock(&ndev->reslock); + up_write(&ndev->reslock); return 0; } @@ -2411,7 +2411,7 @@ static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb bool change_map; int err; - mutex_lock(&ndev->reslock); + down_write(&ndev->reslock); err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map); if (err) { @@ -2423,7 +2423,7 @@ static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb err = mlx5_vdpa_change_map(mvdev, iotlb); err: - mutex_unlock(&ndev->reslock); + up_write(&ndev->reslock); return err; } @@ -2442,7 +2442,6 @@ static void mlx5_vdpa_free(struct vdpa_device *vdev) mlx5_mpfs_del_mac(pfmdev, ndev->config.mac); } mlx5_vdpa_free_resources(&ndev->mvdev); - mutex_destroy(&ndev->reslock); kfree(ndev->event_cbs); kfree(ndev->vqs); } @@ -2527,7 +2526,7 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx, u64 completed_desc; int err = 0; - mutex_lock(&ndev->reslock); + down_read(&ndev->reslock); if (!is_index_valid(mvdev, idx)) { NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid"); err = -EINVAL; @@ -2566,7 +2565,7 @@ out: err = 0; out_err: - mutex_unlock(&ndev->reslock); + up_read(&ndev->reslock); return err; } @@ -2835,18 +2834,18 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, } init_mvqs(ndev); - mutex_init(&ndev->reslock); + init_rwsem(&ndev->reslock); config = &ndev->config; if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) { err = config_func_mtu(mdev, add_config->net.mtu); if (err) - goto err_mtu; + goto err_alloc; } err = query_mtu(mdev, &mtu); if (err) - goto err_mtu; + goto err_alloc; ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu); @@ -2860,14 +2859,14 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, } else { err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac); if (err) - goto err_mtu; + goto err_alloc; } if (!is_zero_ether_addr(config->mac)) { pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev)); err = mlx5_mpfs_add_mac(pfmdev, config->mac); if (err) - goto err_mtu; + goto err_alloc; ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC); } @@ -2917,8 +2916,6 @@ err_res: err_mpfs: if (!is_zero_ether_addr(config->mac)) mlx5_mpfs_del_mac(pfmdev, config->mac); -err_mtu: - mutex_destroy(&ndev->reslock); err_alloc: put_device(&mvdev->vdev.dev); return err; -- cgit v1.2.3 From 4e0400525691d0e676dbe002641f9a61261f1e1b Mon Sep 17 00:00:00 2001 From: Suwan Kim Date: Thu, 7 Apr 2022 00:32:06 +0900 Subject: virtio-blk: support polling I/O This patch supports polling I/O via virtio-blk driver. Polling feature is enabled by module parameter "poll_queues" and it sets dedicated polling queues for virtio-blk. This patch improves the polling I/O throughput and latency. The virtio-blk driver doesn't not have a poll function and a poll queue and it has been operating in interrupt driven method even if the polling function is called in the upper layer. virtio-blk polling is implemented upon 'batched completion' of block layer. virtblk_poll() queues completed request to io_comp_batch->req_list and later, virtblk_complete_batch() calls unmap function and ends the requests in batch. virtio-blk reads the number of poll queues from module parameter "poll_queues". If VM sets queue parameter as below, ("num-queues=N" [QEMU property], "poll_queues=M" [module parameter]) It allocates N virtqueues to virtio_blk->vqs[N] and it uses [0..(N-M-1)] as default queues and [(N-M)..(N-1)] as poll queues. Unlike the default queues, the poll queues have no callback function. Regarding HW-SW queue mapping, the default queue mapping uses the existing method that condsiders MSI irq vector. But the poll queue doesn't have an irq, so it uses the regular blk-mq cpu mapping. For verifying the improvement, I did Fio polling I/O performance test with io_uring engine with the options below. (io_uring, hipri, randread, direct=1, bs=512, iodepth=64 numjobs=N) I set 4 vcpu and 4 virtio-blk queues - 2 default queues and 2 poll queues for VM. As a result, IOPS and average latency improved about 10%. Test result: - Fio io_uring poll without virtio-blk poll support -- numjobs=1 : IOPS = 339K, avg latency = 188.33us -- numjobs=2 : IOPS = 367K, avg latency = 347.33us -- numjobs=4 : IOPS = 383K, avg latency = 682.06us - Fio io_uring poll with virtio-blk poll support -- numjobs=1 : IOPS = 385K, avg latency = 165.94us -- numjobs=2 : IOPS = 408K, avg latency = 313.28us -- numjobs=4 : IOPS = 424K, avg latency = 613.05us Reviewed-by: Stefan Hajnoczi Reviewed-by: Christoph Hellwig Reviewed-by: Max Gurtovoy Signed-off-by: Suwan Kim Message-Id: <20220406153207.163134-2-suwan.kim027@gmail.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Chaitanya Kulkarni --- drivers/block/virtio_blk.c | 106 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 103 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index d624cc8eddc3..ad5f9ce8f3f9 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -37,6 +37,10 @@ MODULE_PARM_DESC(num_request_queues, "0 for no limit. " "Values > nr_cpu_ids truncated to nr_cpu_ids."); +static unsigned int poll_queues; +module_param(poll_queues, uint, 0644); +MODULE_PARM_DESC(poll_queues, "The number of dedicated virtqueues for polling I/O"); + static int major; static DEFINE_IDA(vd_index_ida); @@ -74,6 +78,7 @@ struct virtio_blk { /* num of vqs */ int num_vqs; + int io_queues[HCTX_MAX_TYPES]; struct virtio_blk_vq *vqs; }; @@ -512,6 +517,7 @@ static int init_vq(struct virtio_blk *vblk) const char **names; struct virtqueue **vqs; unsigned short num_vqs; + unsigned int num_poll_vqs; struct virtio_device *vdev = vblk->vdev; struct irq_affinity desc = { 0, }; @@ -520,6 +526,7 @@ static int init_vq(struct virtio_blk *vblk) &num_vqs); if (err) num_vqs = 1; + if (!err && !num_vqs) { dev_err(&vdev->dev, "MQ advertised but zero queues reported\n"); return -EINVAL; @@ -529,6 +536,17 @@ static int init_vq(struct virtio_blk *vblk) min_not_zero(num_request_queues, nr_cpu_ids), num_vqs); + num_poll_vqs = min_t(unsigned int, poll_queues, num_vqs - 1); + + vblk->io_queues[HCTX_TYPE_DEFAULT] = num_vqs - num_poll_vqs; + vblk->io_queues[HCTX_TYPE_READ] = 0; + vblk->io_queues[HCTX_TYPE_POLL] = num_poll_vqs; + + dev_info(&vdev->dev, "%d/%d/%d default/read/poll queues\n", + vblk->io_queues[HCTX_TYPE_DEFAULT], + vblk->io_queues[HCTX_TYPE_READ], + vblk->io_queues[HCTX_TYPE_POLL]); + vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL); if (!vblk->vqs) return -ENOMEM; @@ -541,12 +559,18 @@ static int init_vq(struct virtio_blk *vblk) goto out; } - for (i = 0; i < num_vqs; i++) { + for (i = 0; i < num_vqs - num_poll_vqs; i++) { callbacks[i] = virtblk_done; snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i); names[i] = vblk->vqs[i].name; } + for (; i < num_vqs; i++) { + callbacks[i] = NULL; + snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req_poll.%d", i); + names[i] = vblk->vqs[i].name; + } + /* Discover virtqueues and write information to configuration. */ err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc); if (err) @@ -692,16 +716,89 @@ static const struct attribute_group *virtblk_attr_groups[] = { static int virtblk_map_queues(struct blk_mq_tag_set *set) { struct virtio_blk *vblk = set->driver_data; + int i, qoff; + + for (i = 0, qoff = 0; i < set->nr_maps; i++) { + struct blk_mq_queue_map *map = &set->map[i]; + + map->nr_queues = vblk->io_queues[i]; + map->queue_offset = qoff; + qoff += map->nr_queues; + + if (map->nr_queues == 0) + continue; + + /* + * Regular queues have interrupts and hence CPU affinity is + * defined by the core virtio code, but polling queues have + * no interrupts so we let the block layer assign CPU affinity. + */ + if (i == HCTX_TYPE_POLL) + blk_mq_map_queues(&set->map[i]); + else + blk_mq_virtio_map_queues(&set->map[i], vblk->vdev, 0); + } + + return 0; +} + +static void virtblk_complete_batch(struct io_comp_batch *iob) +{ + struct request *req; - return blk_mq_virtio_map_queues(&set->map[HCTX_TYPE_DEFAULT], - vblk->vdev, 0); + rq_list_for_each(&iob->req_list, req) { + virtblk_unmap_data(req, blk_mq_rq_to_pdu(req)); + virtblk_cleanup_cmd(req); + } + blk_mq_end_request_batch(iob); +} + +static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) +{ + struct virtio_blk *vblk = hctx->queue->queuedata; + struct virtio_blk_vq *vq = hctx->driver_data; + struct virtblk_req *vbr; + unsigned long flags; + unsigned int len; + int found = 0; + + spin_lock_irqsave(&vq->lock, flags); + + while ((vbr = virtqueue_get_buf(vq->vq, &len)) != NULL) { + struct request *req = blk_mq_rq_from_pdu(vbr); + + found++; + if (!blk_mq_add_to_batch(req, iob, vbr->status, + virtblk_complete_batch)) + blk_mq_complete_request(req); + } + + if (found) + blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); + + spin_unlock_irqrestore(&vq->lock, flags); + + return found; +} + +static int virtblk_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, + unsigned int hctx_idx) +{ + struct virtio_blk *vblk = data; + struct virtio_blk_vq *vq = &vblk->vqs[hctx_idx]; + + WARN_ON(vblk->tag_set.tags[hctx_idx] != hctx->tags); + hctx->driver_data = vq; + return 0; } static const struct blk_mq_ops virtio_mq_ops = { .queue_rq = virtio_queue_rq, .commit_rqs = virtio_commit_rqs, + .init_hctx = virtblk_init_hctx, .complete = virtblk_request_done, .map_queues = virtblk_map_queues, + .poll = virtblk_poll, }; static unsigned int virtblk_queue_depth; @@ -778,6 +875,9 @@ static int virtblk_probe(struct virtio_device *vdev) sizeof(struct scatterlist) * VIRTIO_BLK_INLINE_SG_CNT; vblk->tag_set.driver_data = vblk; vblk->tag_set.nr_hw_queues = vblk->num_vqs; + vblk->tag_set.nr_maps = 1; + if (vblk->io_queues[HCTX_TYPE_POLL]) + vblk->tag_set.nr_maps = 3; err = blk_mq_alloc_tag_set(&vblk->tag_set); if (err) -- cgit v1.2.3 From 0e9911fa768f32f30e5678512ea405d99a7a9fef Mon Sep 17 00:00:00 2001 From: Suwan Kim Date: Thu, 7 Apr 2022 00:32:07 +0900 Subject: virtio-blk: support mq_ops->queue_rqs() This patch supports mq_ops->queue_rqs() hook. It has an advantage of batch submission to virtio-blk driver. It also helps polling I/O because polling uses batched completion of block layer. Batch submission in queue_rqs() can boost polling performance. In queue_rqs(), it iterates plug->mq_list, collects requests that belong to same HW queue until it encounters a request from other HW queue or sees the end of the list. Then, virtio-blk adds requests into virtqueue and kicks virtqueue to submit requests. If there is an error, it inserts error request to requeue_list and passes it to ordinary block layer path. For verification, I did fio test. (io_uring, randread, direct=1, bs=4K, iodepth=64 numjobs=N) I set 4 vcpu and 2 virtio-blk queues for VM and run fio test 5 times. It shows about 2% improvement. | numjobs=2 | numjobs=4 ----------------------------------------------------------- fio without queue_rqs() | 291K IOPS | 238K IOPS ----------------------------------------------------------- fio with queue_rqs() | 295K IOPS | 243K IOPS For polling I/O performance, I also did fio test as below. (io_uring, hipri, randread, direct=1, bs=512, iodepth=64 numjobs=4) I set 4 vcpu and 2 poll queues for VM. It shows about 2% improvement in polling I/O. | IOPS | avg latency ----------------------------------------------------------- fio poll without queue_rqs() | 424K | 613.05 usec ----------------------------------------------------------- fio poll with queue_rqs() | 435K | 601.01 usec Reviewed-by: Stefan Hajnoczi Reviewed-by: Christoph Hellwig Signed-off-by: Suwan Kim Message-Id: <20220406153207.163134-3-suwan.kim027@gmail.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Chaitanya Kulkarni --- drivers/block/virtio_blk.c | 114 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 98 insertions(+), 16 deletions(-) (limited to 'drivers') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index ad5f9ce8f3f9..6fc7850c2b0a 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -101,8 +101,7 @@ static inline blk_status_t virtblk_result(struct virtblk_req *vbr) } } -static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr, - struct scatterlist *data_sg, bool have_data) +static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr) { struct scatterlist hdr, status, *sgs[3]; unsigned int num_out = 0, num_in = 0; @@ -110,11 +109,11 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr, sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); sgs[num_out++] = &hdr; - if (have_data) { + if (vbr->sg_table.nents) { if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT)) - sgs[num_out++] = data_sg; + sgs[num_out++] = vbr->sg_table.sgl; else - sgs[num_out + num_in++] = data_sg; + sgs[num_out + num_in++] = vbr->sg_table.sgl; } sg_init_one(&status, &vbr->status, sizeof(vbr->status)); @@ -304,6 +303,28 @@ static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx) virtqueue_notify(vq->vq); } +static blk_status_t virtblk_prep_rq(struct blk_mq_hw_ctx *hctx, + struct virtio_blk *vblk, + struct request *req, + struct virtblk_req *vbr) +{ + blk_status_t status; + + status = virtblk_setup_cmd(vblk->vdev, req, vbr); + if (unlikely(status)) + return status; + + blk_mq_start_request(req); + + vbr->sg_table.nents = virtblk_map_data(hctx, req, vbr); + if (unlikely(vbr->sg_table.nents < 0)) { + virtblk_cleanup_cmd(req); + return BLK_STS_RESOURCE; + } + + return BLK_STS_OK; +} + static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -311,26 +332,17 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req = bd->rq; struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); unsigned long flags; - int num; int qid = hctx->queue_num; bool notify = false; blk_status_t status; int err; - status = virtblk_setup_cmd(vblk->vdev, req, vbr); + status = virtblk_prep_rq(hctx, vblk, req, vbr); if (unlikely(status)) return status; - blk_mq_start_request(req); - - num = virtblk_map_data(hctx, req, vbr); - if (unlikely(num < 0)) { - virtblk_cleanup_cmd(req); - return BLK_STS_RESOURCE; - } - spin_lock_irqsave(&vblk->vqs[qid].lock, flags); - err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg_table.sgl, num); + err = virtblk_add_req(vblk->vqs[qid].vq, vbr); if (err) { virtqueue_kick(vblk->vqs[qid].vq); /* Don't stop the queue if -ENOMEM: we may have failed to @@ -360,6 +372,75 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; } +static bool virtblk_prep_rq_batch(struct request *req) +{ + struct virtio_blk *vblk = req->mq_hctx->queue->queuedata; + struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); + + req->mq_hctx->tags->rqs[req->tag] = req; + + return virtblk_prep_rq(req->mq_hctx, vblk, req, vbr) == BLK_STS_OK; +} + +static bool virtblk_add_req_batch(struct virtio_blk_vq *vq, + struct request **rqlist, + struct request **requeue_list) +{ + unsigned long flags; + int err; + bool kick; + + spin_lock_irqsave(&vq->lock, flags); + + while (!rq_list_empty(*rqlist)) { + struct request *req = rq_list_pop(rqlist); + struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); + + err = virtblk_add_req(vq->vq, vbr); + if (err) { + virtblk_unmap_data(req, vbr); + virtblk_cleanup_cmd(req); + rq_list_add(requeue_list, req); + } + } + + kick = virtqueue_kick_prepare(vq->vq); + spin_unlock_irqrestore(&vq->lock, flags); + + return kick; +} + +static void virtio_queue_rqs(struct request **rqlist) +{ + struct request *req, *next, *prev = NULL; + struct request *requeue_list = NULL; + + rq_list_for_each_safe(rqlist, req, next) { + struct virtio_blk_vq *vq = req->mq_hctx->driver_data; + bool kick; + + if (!virtblk_prep_rq_batch(req)) { + rq_list_move(rqlist, &requeue_list, req, prev); + req = prev; + if (!req) + continue; + } + + if (!next || req->mq_hctx != next->mq_hctx) { + req->rq_next = NULL; + kick = virtblk_add_req_batch(vq, rqlist, &requeue_list); + if (kick) + virtqueue_notify(vq->vq); + + *rqlist = next; + prev = NULL; + } else + prev = req; + } + + *rqlist = requeue_list; +} + /* return id (s/n) string for *disk to *id_str */ static int virtblk_get_id(struct gendisk *disk, char *id_str) @@ -794,6 +875,7 @@ static int virtblk_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, static const struct blk_mq_ops virtio_mq_ops = { .queue_rq = virtio_queue_rq, + .queue_rqs = virtio_queue_rqs, .commit_rqs = virtio_commit_rqs, .init_hctx = virtblk_init_hctx, .complete = virtblk_request_done, -- cgit v1.2.3 From 35c51e093d956f6d058e193711c8d424817a44a9 Mon Sep 17 00:00:00 2001 From: Xianting Tian Date: Mon, 28 Mar 2022 18:58:16 +0800 Subject: virtio_ring: remove unnecessary to_vvq call in vring hot path It passes '_vq' to virtqueue_use_indirect(), which still calls to_vvq to get 'vq', let's directly pass 'vq'. It can avoid unnecessary call of to_vvq in hot path. Signed-off-by: Xianting Tian Message-Id: <20220328105817.1028065-1-xianting.tian@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Stefano Garzarella --- drivers/virtio/virtio_ring.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index cfb028ca238e..f72d5ae2cd8f 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -205,11 +205,9 @@ struct vring_virtqueue { #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) -static inline bool virtqueue_use_indirect(struct virtqueue *_vq, +static inline bool virtqueue_use_indirect(struct vring_virtqueue *vq, unsigned int total_sg) { - struct vring_virtqueue *vq = to_vvq(_vq); - /* * If the host supports indirect descriptor tables, and we have multiple * buffers, then go indirect. FIXME: tune this threshold @@ -499,7 +497,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, head = vq->free_head; - if (virtqueue_use_indirect(_vq, total_sg)) + if (virtqueue_use_indirect(vq, total_sg)) desc = alloc_indirect_split(_vq, total_sg, gfp); else { desc = NULL; @@ -1178,7 +1176,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, BUG_ON(total_sg == 0); - if (virtqueue_use_indirect(_vq, total_sg)) { + if (virtqueue_use_indirect(vq, total_sg)) { err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, in_sgs, data, gfp); if (err != -ENOMEM) { -- cgit v1.2.3 From b4b4ff73ef047556e81694174bb8561457f25eeb Mon Sep 17 00:00:00 2001 From: Xianting Tian Date: Mon, 28 Mar 2022 18:58:17 +0800 Subject: virtio_ring: add unlikely annotation for free descs check The 'if (vq->vq.num_free < descs_used)' check will almost always be false. Signed-off-by: Xianting Tian Message-Id: <20220328105817.1028065-2-xianting.tian@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Stefano Garzarella --- drivers/virtio/virtio_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index f72d5ae2cd8f..0dc930de72f5 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -517,7 +517,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, descs_used = total_sg; } - if (vq->vq.num_free < descs_used) { + if (unlikely(vq->vq.num_free < descs_used)) { pr_debug("Can't add buf len %i - avail = %i\n", descs_used, vq->vq.num_free); /* FIXME: for historical reasons, we force a notify here if -- cgit v1.2.3 From ea239a67461a34839e2172ead3043295a9173389 Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Wed, 30 Mar 2022 23:33:42 +0530 Subject: virtio-vdpa: don't set callback if virtio doesn't need it There's no need for setting callbacks for the driver that doesn't care about that. Signed-off-by: Jason Wang Signed-off-by: Gautam Dawar Message-Id: <20220330180436.24644-3-gdawar@xilinx.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index 76504559bc25..46c71653f508 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -184,7 +184,7 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, } /* Setup virtqueue callback */ - cb.callback = virtio_vdpa_virtqueue_cb; + cb.callback = callback ? virtio_vdpa_virtqueue_cb : NULL; cb.private = info; ops->set_vq_cb(vdpa, index, &cb); ops->set_vq_num(vdpa, index, virtqueue_get_vring_size(vq)); -- cgit v1.2.3 From ae967246d0997a684093ffc06a14999292ad9276 Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Wed, 30 Mar 2022 23:33:43 +0530 Subject: vhost-vdpa: passing iotlb to IOMMU mapping helpers To prepare for the ASID support for vhost-vdpa, try to pass IOTLB object to dma helpers. No functional changes, it's just a preparation for support multiple IOTLBs. Signed-off-by: Jason Wang Signed-off-by: Gautam Dawar Message-Id: <20220330180436.24644-4-gdawar@xilinx.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 67 +++++++++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 30 deletions(-) (limited to 'drivers') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 4c2f0bd06285..6d670e32e67b 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -537,10 +537,11 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, return r; } -static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last) +static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, + struct vhost_iotlb *iotlb, + u64 start, u64 last) { struct vhost_dev *dev = &v->vdev; - struct vhost_iotlb *iotlb = dev->iotlb; struct vhost_iotlb_map *map; struct page *page; unsigned long pfn, pinned; @@ -559,10 +560,10 @@ static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last) } } -static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, u64 start, u64 last) +static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, + struct vhost_iotlb *iotlb, + u64 start, u64 last) { - struct vhost_dev *dev = &v->vdev; - struct vhost_iotlb *iotlb = dev->iotlb; struct vhost_iotlb_map *map; struct vdpa_map_file *map_file; @@ -574,21 +575,24 @@ static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, u64 start, u64 last) } } -static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) +static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, + struct vhost_iotlb *iotlb, + u64 start, u64 last) { struct vdpa_device *vdpa = v->vdpa; if (vdpa->use_va) - return vhost_vdpa_va_unmap(v, start, last); + return vhost_vdpa_va_unmap(v, iotlb, start, last); - return vhost_vdpa_pa_unmap(v, start, last); + return vhost_vdpa_pa_unmap(v, iotlb, start, last); } static void vhost_vdpa_iotlb_free(struct vhost_vdpa *v) { struct vhost_dev *dev = &v->vdev; + struct vhost_iotlb *iotlb = dev->iotlb; - vhost_vdpa_iotlb_unmap(v, 0ULL, 0ULL - 1); + vhost_vdpa_iotlb_unmap(v, iotlb, 0ULL, 0ULL - 1); kfree(dev->iotlb); dev->iotlb = NULL; } @@ -615,15 +619,15 @@ static int perm_to_iommu_flags(u32 perm) return flags | IOMMU_CACHE; } -static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova, - u64 size, u64 pa, u32 perm, void *opaque) +static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, + u64 iova, u64 size, u64 pa, u32 perm, void *opaque) { struct vhost_dev *dev = &v->vdev; struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; int r = 0; - r = vhost_iotlb_add_range_ctx(dev->iotlb, iova, iova + size - 1, + r = vhost_iotlb_add_range_ctx(iotlb, iova, iova + size - 1, pa, perm, opaque); if (r) return r; @@ -632,13 +636,13 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova, r = ops->dma_map(vdpa, iova, size, pa, perm, opaque); } else if (ops->set_map) { if (!v->in_batch) - r = ops->set_map(vdpa, dev->iotlb); + r = ops->set_map(vdpa, iotlb); } else { r = iommu_map(v->domain, iova, pa, size, perm_to_iommu_flags(perm)); } if (r) { - vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); + vhost_iotlb_del_range(iotlb, iova, iova + size - 1); return r; } @@ -648,25 +652,27 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova, return 0; } -static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) +static void vhost_vdpa_unmap(struct vhost_vdpa *v, + struct vhost_iotlb *iotlb, + u64 iova, u64 size) { - struct vhost_dev *dev = &v->vdev; struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; - vhost_vdpa_iotlb_unmap(v, iova, iova + size - 1); + vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1); if (ops->dma_map) { ops->dma_unmap(vdpa, iova, size); } else if (ops->set_map) { if (!v->in_batch) - ops->set_map(vdpa, dev->iotlb); + ops->set_map(vdpa, iotlb); } else { iommu_unmap(v->domain, iova, size); } } static int vhost_vdpa_va_map(struct vhost_vdpa *v, + struct vhost_iotlb *iotlb, u64 iova, u64 size, u64 uaddr, u32 perm) { struct vhost_dev *dev = &v->vdev; @@ -696,7 +702,7 @@ static int vhost_vdpa_va_map(struct vhost_vdpa *v, offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start; map_file->offset = offset; map_file->file = get_file(vma->vm_file); - ret = vhost_vdpa_map(v, map_iova, map_size, uaddr, + ret = vhost_vdpa_map(v, iotlb, map_iova, map_size, uaddr, perm, map_file); if (ret) { fput(map_file->file); @@ -709,7 +715,7 @@ next: map_iova += map_size; } if (ret) - vhost_vdpa_unmap(v, iova, map_iova - iova); + vhost_vdpa_unmap(v, iotlb, iova, map_iova - iova); mmap_read_unlock(dev->mm); @@ -717,6 +723,7 @@ next: } static int vhost_vdpa_pa_map(struct vhost_vdpa *v, + struct vhost_iotlb *iotlb, u64 iova, u64 size, u64 uaddr, u32 perm) { struct vhost_dev *dev = &v->vdev; @@ -780,7 +787,7 @@ static int vhost_vdpa_pa_map(struct vhost_vdpa *v, if (last_pfn && (this_pfn != last_pfn + 1)) { /* Pin a contiguous chunk of memory */ csize = PFN_PHYS(last_pfn - map_pfn + 1); - ret = vhost_vdpa_map(v, iova, csize, + ret = vhost_vdpa_map(v, iotlb, iova, csize, PFN_PHYS(map_pfn), perm, NULL); if (ret) { @@ -810,7 +817,7 @@ static int vhost_vdpa_pa_map(struct vhost_vdpa *v, } /* Pin the rest chunk */ - ret = vhost_vdpa_map(v, iova, PFN_PHYS(last_pfn - map_pfn + 1), + ret = vhost_vdpa_map(v, iotlb, iova, PFN_PHYS(last_pfn - map_pfn + 1), PFN_PHYS(map_pfn), perm, NULL); out: if (ret) { @@ -830,7 +837,7 @@ out: for (pfn = map_pfn; pfn <= last_pfn; pfn++) unpin_user_page(pfn_to_page(pfn)); } - vhost_vdpa_unmap(v, start, size); + vhost_vdpa_unmap(v, iotlb, start, size); } unlock: mmap_read_unlock(dev->mm); @@ -841,11 +848,10 @@ free: } static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, + struct vhost_iotlb *iotlb, struct vhost_iotlb_msg *msg) { - struct vhost_dev *dev = &v->vdev; struct vdpa_device *vdpa = v->vdpa; - struct vhost_iotlb *iotlb = dev->iotlb; if (msg->iova < v->range.first || !msg->size || msg->iova > U64_MAX - msg->size + 1 || @@ -857,10 +863,10 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, return -EEXIST; if (vdpa->use_va) - return vhost_vdpa_va_map(v, msg->iova, msg->size, + return vhost_vdpa_va_map(v, iotlb, msg->iova, msg->size, msg->uaddr, msg->perm); - return vhost_vdpa_pa_map(v, msg->iova, msg->size, msg->uaddr, + return vhost_vdpa_pa_map(v, iotlb, msg->iova, msg->size, msg->uaddr, msg->perm); } @@ -870,6 +876,7 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev); struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; + struct vhost_iotlb *iotlb = dev->iotlb; int r = 0; mutex_lock(&dev->mutex); @@ -880,17 +887,17 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, switch (msg->type) { case VHOST_IOTLB_UPDATE: - r = vhost_vdpa_process_iotlb_update(v, msg); + r = vhost_vdpa_process_iotlb_update(v, iotlb, msg); break; case VHOST_IOTLB_INVALIDATE: - vhost_vdpa_unmap(v, msg->iova, msg->size); + vhost_vdpa_unmap(v, iotlb, msg->iova, msg->size); break; case VHOST_IOTLB_BATCH_BEGIN: v->in_batch = true; break; case VHOST_IOTLB_BATCH_END: if (v->in_batch && ops->set_map) - ops->set_map(vdpa, dev->iotlb); + ops->set_map(vdpa, iotlb); v->in_batch = false; break; default: -- cgit v1.2.3 From 0b7ee47c5f36634926def0142a515eafedc8a779 Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Wed, 30 Mar 2022 23:33:44 +0530 Subject: vhost-vdpa: switch to use vhost-vdpa specific IOTLB To ease the implementation of per group ASID support for vDPA device. This patch switches to use a vhost-vdpa specific IOTLB to avoid the unnecessary refactoring of the vhost core. Signed-off-by: Jason Wang Signed-off-by: Gautam Dawar Message-Id: <20220330180436.24644-5-gdawar@xilinx.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 6d670e32e67b..632c43eb5ecf 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -39,6 +39,7 @@ struct vhost_vdpa { struct vhost_virtqueue *vqs; struct completion completion; struct vdpa_device *vdpa; + struct vhost_iotlb *iotlb; struct device dev; struct cdev cdev; atomic_t opened; @@ -589,12 +590,11 @@ static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, static void vhost_vdpa_iotlb_free(struct vhost_vdpa *v) { - struct vhost_dev *dev = &v->vdev; - struct vhost_iotlb *iotlb = dev->iotlb; + struct vhost_iotlb *iotlb = v->iotlb; vhost_vdpa_iotlb_unmap(v, iotlb, 0ULL, 0ULL - 1); - kfree(dev->iotlb); - dev->iotlb = NULL; + kfree(v->iotlb); + v->iotlb = NULL; } static int perm_to_iommu_flags(u32 perm) @@ -876,7 +876,7 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev); struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; - struct vhost_iotlb *iotlb = dev->iotlb; + struct vhost_iotlb *iotlb = v->iotlb; int r = 0; mutex_lock(&dev->mutex); @@ -1017,15 +1017,15 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false, vhost_vdpa_process_iotlb_msg); - dev->iotlb = vhost_iotlb_alloc(0, 0); - if (!dev->iotlb) { + v->iotlb = vhost_iotlb_alloc(0, 0); + if (!v->iotlb) { r = -ENOMEM; goto err_init_iotlb; } r = vhost_vdpa_alloc_domain(v); if (r) - goto err_init_iotlb; + goto err_alloc_domain; vhost_vdpa_set_iova_range(v); @@ -1033,6 +1033,8 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) return 0; +err_alloc_domain: + vhost_vdpa_iotlb_free(v); err_init_iotlb: vhost_dev_cleanup(&v->vdev); kfree(vqs); -- cgit v1.2.3 From d4821902e43453b85b31329441a9f6ac071228a8 Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Wed, 30 Mar 2022 23:33:45 +0530 Subject: vdpa: introduce virtqueue groups This patch introduces virtqueue groups to vDPA device. The virtqueue group is the minimal set of virtqueues that must share an address space. And the address space identifier could only be attached to a specific virtqueue group. Signed-off-by: Jason Wang Signed-off-by: Gautam Dawar Message-Id: <20220330180436.24644-6-gdawar@xilinx.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/alibaba/eni_vdpa.c | 2 +- drivers/vdpa/ifcvf/ifcvf_main.c | 8 +++++++- drivers/vdpa/mlx5/net/mlx5_vnet.c | 8 +++++++- drivers/vdpa/vdpa.c | 3 +++ drivers/vdpa/vdpa_sim/vdpa_sim.c | 9 ++++++++- drivers/vdpa/vdpa_sim/vdpa_sim.h | 1 + drivers/vdpa/vdpa_user/vduse_dev.c | 2 +- drivers/vdpa/virtio_pci/vp_vdpa.c | 2 +- include/linux/vdpa.h | 16 ++++++++++++---- 9 files changed, 41 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/vdpa/alibaba/eni_vdpa.c b/drivers/vdpa/alibaba/eni_vdpa.c index f480d54f308c..3e93c5eb0cf9 100644 --- a/drivers/vdpa/alibaba/eni_vdpa.c +++ b/drivers/vdpa/alibaba/eni_vdpa.c @@ -470,7 +470,7 @@ static int eni_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) return ret; eni_vdpa = vdpa_alloc_device(struct eni_vdpa, vdpa, - dev, &eni_vdpa_ops, NULL, false); + dev, &eni_vdpa_ops, 1, NULL, false); if (IS_ERR(eni_vdpa)) { ENI_ERR(pdev, "failed to allocate vDPA structure\n"); return PTR_ERR(eni_vdpa); diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index 4366320fb68d..fde33e143246 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -626,6 +626,11 @@ static size_t ifcvf_vdpa_get_config_size(struct vdpa_device *vdpa_dev) return vf->config_size; } +static u32 ifcvf_vdpa_get_vq_group(struct vdpa_device *vdpa, u16 idx) +{ + return 0; +} + static void ifcvf_vdpa_get_config(struct vdpa_device *vdpa_dev, unsigned int offset, void *buf, unsigned int len) @@ -704,6 +709,7 @@ static const struct vdpa_config_ops ifc_vdpa_ops = { .get_device_id = ifcvf_vdpa_get_device_id, .get_vendor_id = ifcvf_vdpa_get_vendor_id, .get_vq_align = ifcvf_vdpa_get_vq_align, + .get_vq_group = ifcvf_vdpa_get_vq_group, .get_config_size = ifcvf_vdpa_get_config_size, .get_config = ifcvf_vdpa_get_config, .set_config = ifcvf_vdpa_set_config, @@ -758,7 +764,7 @@ static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, pdev = ifcvf_mgmt_dev->pdev; dev = &pdev->dev; adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa, - dev, &ifc_vdpa_ops, name, false); + dev, &ifc_vdpa_ops, 1, name, false); if (IS_ERR(adapter)) { IFCVF_ERR(pdev, "Failed to allocate vDPA structure"); return PTR_ERR(adapter); diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 57cfc64248b7..5647e12056d8 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1947,6 +1947,11 @@ static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev) return PAGE_SIZE; } +static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdpa, u16 idx) +{ + return 0; +} + enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9, MLX5_VIRTIO_NET_F_CSUM = 1 << 10, MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11, @@ -2582,6 +2587,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = { .get_vq_notification = mlx5_get_vq_notification, .get_vq_irq = mlx5_get_vq_irq, .get_vq_align = mlx5_vdpa_get_vq_align, + .get_vq_group = mlx5_vdpa_get_vq_group, .get_device_features = mlx5_vdpa_get_device_features, .set_driver_features = mlx5_vdpa_set_driver_features, .get_driver_features = mlx5_vdpa_get_driver_features, @@ -2817,7 +2823,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, } ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops, - name, false); + 1, name, false); if (IS_ERR(ndev)) return PTR_ERR(ndev); diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 9d3534a0bc5f..4ca54779be2c 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -158,6 +158,7 @@ static void vdpa_release_dev(struct device *d) * initialized but before registered. * @parent: the parent device * @config: the bus operations that is supported by this device + * @ngroups: number of groups supported by this device * @size: size of the parent structure that contains private data * @name: name of the vdpa device; optional. * @use_va: indicate whether virtual address must be used by this device @@ -170,6 +171,7 @@ static void vdpa_release_dev(struct device *d) */ struct vdpa_device *__vdpa_alloc_device(struct device *parent, const struct vdpa_config_ops *config, + unsigned int ngroups, size_t size, const char *name, bool use_va) { @@ -202,6 +204,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, vdev->config = config; vdev->features_valid = false; vdev->use_va = use_va; + vdev->ngroups = ngroups; if (name) err = dev_set_name(&vdev->dev, "%s", name); diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index ddbe142af09a..c98cb1f869fa 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -250,7 +250,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) else ops = &vdpasim_config_ops; - vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, + vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, 1, dev_attr->name, false); if (IS_ERR(vdpasim)) { ret = PTR_ERR(vdpasim); @@ -399,6 +399,11 @@ static u32 vdpasim_get_vq_align(struct vdpa_device *vdpa) return VDPASIM_QUEUE_ALIGN; } +static u32 vdpasim_get_vq_group(struct vdpa_device *vdpa, u16 idx) +{ + return 0; +} + static u64 vdpasim_get_device_features(struct vdpa_device *vdpa) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); @@ -620,6 +625,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = { .set_vq_state = vdpasim_set_vq_state, .get_vq_state = vdpasim_get_vq_state, .get_vq_align = vdpasim_get_vq_align, + .get_vq_group = vdpasim_get_vq_group, .get_device_features = vdpasim_get_device_features, .set_driver_features = vdpasim_set_driver_features, .get_driver_features = vdpasim_get_driver_features, @@ -650,6 +656,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = { .set_vq_state = vdpasim_set_vq_state, .get_vq_state = vdpasim_get_vq_state, .get_vq_align = vdpasim_get_vq_align, + .get_vq_group = vdpasim_get_vq_group, .get_device_features = vdpasim_get_device_features, .set_driver_features = vdpasim_set_driver_features, .get_driver_features = vdpasim_get_driver_features, diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h index cd58e888bcf3..0be7c1e7ef80 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.h +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h @@ -63,6 +63,7 @@ struct vdpasim { u32 status; u32 generation; u64 features; + u32 groups; /* spinlock to synchronize iommu table */ spinlock_t iommu_lock; }; diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index f85d1a08ed87..4ee6850b9a68 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -1495,7 +1495,7 @@ static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) return -EEXIST; vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev, - &vduse_vdpa_config_ops, name, true); + &vduse_vdpa_config_ops, 1, name, true); if (IS_ERR(vdev)) return PTR_ERR(vdev); diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c index cce101e6a940..e18dfe993901 100644 --- a/drivers/vdpa/virtio_pci/vp_vdpa.c +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c @@ -466,7 +466,7 @@ static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) return ret; vp_vdpa = vdpa_alloc_device(struct vp_vdpa, vdpa, - dev, &vp_vdpa_ops, NULL, false); + dev, &vp_vdpa_ops, 1, NULL, false); if (IS_ERR(vp_vdpa)) { dev_err(dev, "vp_vdpa: Failed to allocate vDPA structure\n"); return PTR_ERR(vp_vdpa); diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 2cb14847831e..e4e53574183e 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -85,6 +85,7 @@ struct vdpa_device { bool use_va; u32 nvqs; struct vdpa_mgmt_dev *mdev; + unsigned int ngroups; }; /** @@ -172,6 +173,10 @@ struct vdpa_map_file { * for the device * @vdev: vdpa device * Returns virtqueue algin requirement + * @get_vq_group: Get the group id for a specific virtqueue + * @vdev: vdpa device + * @idx: virtqueue index + * Returns u32: group id for this virtqueue * @get_device_features: Get virtio features supported by the device * @vdev: vdpa device * Returns the virtio features support by the @@ -286,6 +291,7 @@ struct vdpa_config_ops { /* Device ops */ u32 (*get_vq_align)(struct vdpa_device *vdev); + u32 (*get_vq_group)(struct vdpa_device *vdev, u16 idx); u64 (*get_device_features)(struct vdpa_device *vdev); int (*set_driver_features)(struct vdpa_device *vdev, u64 features); u64 (*get_driver_features)(struct vdpa_device *vdev); @@ -318,6 +324,7 @@ struct vdpa_config_ops { struct vdpa_device *__vdpa_alloc_device(struct device *parent, const struct vdpa_config_ops *config, + unsigned int ngroups, size_t size, const char *name, bool use_va); @@ -328,17 +335,18 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, * @member: the name of struct vdpa_device within the @dev_struct * @parent: the parent device * @config: the bus operations that is supported by this device + * @ngroups: the number of virtqueue groups supported by this device * @name: name of the vdpa device * @use_va: indicate whether virtual address must be used by this device * * Return allocated data structure or ERR_PTR upon error */ -#define vdpa_alloc_device(dev_struct, member, parent, config, name, use_va) \ - container_of(__vdpa_alloc_device( \ - parent, config, \ +#define vdpa_alloc_device(dev_struct, member, parent, config, ngroups, name, use_va) \ + container_of((__vdpa_alloc_device( \ + parent, config, ngroups, \ sizeof(dev_struct) + \ BUILD_BUG_ON_ZERO(offsetof( \ - dev_struct, member)), name, use_va), \ + dev_struct, member)), name, use_va)), \ dev_struct, member) int vdpa_register_device(struct vdpa_device *vdev, u32 nvqs); -- cgit v1.2.3 From db9adcbf4286ad1c1fca091a870db6e49bb0df07 Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Wed, 30 Mar 2022 23:33:46 +0530 Subject: vdpa: multiple address spaces support This patches introduces the multiple address spaces support for vDPA device. This idea is to identify a specific address space via an dedicated identifier - ASID. During vDPA device allocation, vDPA device driver needs to report the number of address spaces supported by the device then the DMA mapping ops of the vDPA device needs to be extended to support ASID. This helps to isolate the environments for the virtqueue that will not be assigned directly. E.g in the case of virtio-net, the control virtqueue will not be assigned directly to guest. As a start, simply claim 1 virtqueue groups and 1 address spaces for all vDPA devices. And vhost-vDPA will simply reject the device with more than 1 virtqueue groups or address spaces. Signed-off-by: Jason Wang Signed-off-by: Gautam Dawar Message-Id: <20220330180436.24644-7-gdawar@xilinx.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/alibaba/eni_vdpa.c | 2 +- drivers/vdpa/ifcvf/ifcvf_main.c | 2 +- drivers/vdpa/mlx5/net/mlx5_vnet.c | 5 +++-- drivers/vdpa/vdpa.c | 4 +++- drivers/vdpa/vdpa_sim/vdpa_sim.c | 10 ++++++---- drivers/vdpa/vdpa_user/vduse_dev.c | 3 ++- drivers/vdpa/virtio_pci/vp_vdpa.c | 2 +- drivers/vhost/vdpa.c | 14 +++++++++----- include/linux/vdpa.h | 28 +++++++++++++++++++--------- 9 files changed, 45 insertions(+), 25 deletions(-) (limited to 'drivers') diff --git a/drivers/vdpa/alibaba/eni_vdpa.c b/drivers/vdpa/alibaba/eni_vdpa.c index 3e93c5eb0cf9..5a09a09cca70 100644 --- a/drivers/vdpa/alibaba/eni_vdpa.c +++ b/drivers/vdpa/alibaba/eni_vdpa.c @@ -470,7 +470,7 @@ static int eni_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) return ret; eni_vdpa = vdpa_alloc_device(struct eni_vdpa, vdpa, - dev, &eni_vdpa_ops, 1, NULL, false); + dev, &eni_vdpa_ops, 1, 1, NULL, false); if (IS_ERR(eni_vdpa)) { ENI_ERR(pdev, "failed to allocate vDPA structure\n"); return PTR_ERR(eni_vdpa); diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index fde33e143246..c1767a0ce630 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -764,7 +764,7 @@ static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, pdev = ifcvf_mgmt_dev->pdev; dev = &pdev->dev; adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa, - dev, &ifc_vdpa_ops, 1, name, false); + dev, &ifc_vdpa_ops, 1, 1, name, false); if (IS_ERR(adapter)) { IFCVF_ERR(pdev, "Failed to allocate vDPA structure"); return PTR_ERR(adapter); diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 5647e12056d8..dcca782c698e 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2409,7 +2409,8 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev) return mvdev->generation; } -static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb) +static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid, + struct vhost_iotlb *iotlb) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); @@ -2823,7 +2824,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, } ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops, - 1, name, false); + 1, 1, name, false); if (IS_ERR(ndev)) return PTR_ERR(ndev); diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 4ca54779be2c..f15fb11010a8 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -159,6 +159,7 @@ static void vdpa_release_dev(struct device *d) * @parent: the parent device * @config: the bus operations that is supported by this device * @ngroups: number of groups supported by this device + * @nas: number of address spaces supported by this device * @size: size of the parent structure that contains private data * @name: name of the vdpa device; optional. * @use_va: indicate whether virtual address must be used by this device @@ -171,7 +172,7 @@ static void vdpa_release_dev(struct device *d) */ struct vdpa_device *__vdpa_alloc_device(struct device *parent, const struct vdpa_config_ops *config, - unsigned int ngroups, + unsigned int ngroups, unsigned int nas, size_t size, const char *name, bool use_va) { @@ -205,6 +206,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, vdev->features_valid = false; vdev->use_va = use_va; vdev->ngroups = ngroups; + vdev->nas = nas; if (name) err = dev_set_name(&vdev->dev, "%s", name); diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index c98cb1f869fa..659e2e2e4b0c 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -251,7 +251,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) ops = &vdpasim_config_ops; vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, 1, - dev_attr->name, false); + 1, dev_attr->name, false); if (IS_ERR(vdpasim)) { ret = PTR_ERR(vdpasim); goto err_alloc; @@ -539,7 +539,7 @@ static struct vdpa_iova_range vdpasim_get_iova_range(struct vdpa_device *vdpa) return range; } -static int vdpasim_set_map(struct vdpa_device *vdpa, +static int vdpasim_set_map(struct vdpa_device *vdpa, unsigned int asid, struct vhost_iotlb *iotlb) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); @@ -566,7 +566,8 @@ err: return ret; } -static int vdpasim_dma_map(struct vdpa_device *vdpa, u64 iova, u64 size, +static int vdpasim_dma_map(struct vdpa_device *vdpa, unsigned int asid, + u64 iova, u64 size, u64 pa, u32 perm, void *opaque) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); @@ -580,7 +581,8 @@ static int vdpasim_dma_map(struct vdpa_device *vdpa, u64 iova, u64 size, return ret; } -static int vdpasim_dma_unmap(struct vdpa_device *vdpa, u64 iova, u64 size) +static int vdpasim_dma_unmap(struct vdpa_device *vdpa, unsigned int asid, + u64 iova, u64 size) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 4ee6850b9a68..d503848b3b6e 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -693,6 +693,7 @@ static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa) } static int vduse_vdpa_set_map(struct vdpa_device *vdpa, + unsigned int asid, struct vhost_iotlb *iotlb) { struct vduse_dev *dev = vdpa_to_vduse(vdpa); @@ -1495,7 +1496,7 @@ static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) return -EEXIST; vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev, - &vduse_vdpa_config_ops, 1, name, true); + &vduse_vdpa_config_ops, 1, 1, name, true); if (IS_ERR(vdev)) return PTR_ERR(vdev); diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c index e18dfe993901..35acba0e8d6d 100644 --- a/drivers/vdpa/virtio_pci/vp_vdpa.c +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c @@ -466,7 +466,7 @@ static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) return ret; vp_vdpa = vdpa_alloc_device(struct vp_vdpa, vdpa, - dev, &vp_vdpa_ops, 1, NULL, false); + dev, &vp_vdpa_ops, 1, 1, NULL, false); if (IS_ERR(vp_vdpa)) { dev_err(dev, "vp_vdpa: Failed to allocate vDPA structure\n"); return PTR_ERR(vp_vdpa); diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 632c43eb5ecf..9202ff97ddb5 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -633,10 +633,10 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, return r; if (ops->dma_map) { - r = ops->dma_map(vdpa, iova, size, pa, perm, opaque); + r = ops->dma_map(vdpa, 0, iova, size, pa, perm, opaque); } else if (ops->set_map) { if (!v->in_batch) - r = ops->set_map(vdpa, iotlb); + r = ops->set_map(vdpa, 0, iotlb); } else { r = iommu_map(v->domain, iova, pa, size, perm_to_iommu_flags(perm)); @@ -662,10 +662,10 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1); if (ops->dma_map) { - ops->dma_unmap(vdpa, iova, size); + ops->dma_unmap(vdpa, 0, iova, size); } else if (ops->set_map) { if (!v->in_batch) - ops->set_map(vdpa, iotlb); + ops->set_map(vdpa, 0, iotlb); } else { iommu_unmap(v->domain, iova, size); } @@ -897,7 +897,7 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, break; case VHOST_IOTLB_BATCH_END: if (v->in_batch && ops->set_map) - ops->set_map(vdpa, iotlb); + ops->set_map(vdpa, 0, iotlb); v->in_batch = false; break; default: @@ -1163,6 +1163,10 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa) int minor; int r; + /* Only support 1 address space and 1 groups */ + if (vdpa->ngroups != 1 || vdpa->nas != 1) + return -EOPNOTSUPP; + v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!v) return -ENOMEM; diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index e4e53574183e..1515748e84ff 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -69,6 +69,8 @@ struct vdpa_mgmt_dev; * @cf_lock: Protects get and set access to configuration layout. * @index: device index * @features_valid: were features initialized? for legacy guests + * @ngroups: the number of virtqueue groups + * @nas: the number of address spaces * @use_va: indicate whether virtual address must be used by this device * @nvqs: maximum number of supported virtqueues * @mdev: management device pointer; caller must setup when registering device as part @@ -86,6 +88,7 @@ struct vdpa_device { u32 nvqs; struct vdpa_mgmt_dev *mdev; unsigned int ngroups; + unsigned int nas; }; /** @@ -241,6 +244,7 @@ struct vdpa_map_file { * Needed for device that using device * specific DMA translation (on-chip IOMMU) * @vdev: vdpa device + * @asid: address space identifier * @iotlb: vhost memory mapping to be * used by the vDPA * Returns integer: success (0) or error (< 0) @@ -249,6 +253,7 @@ struct vdpa_map_file { * specific DMA translation (on-chip IOMMU) * and preferring incremental map. * @vdev: vdpa device + * @asid: address space identifier * @iova: iova to be mapped * @size: size of the area * @pa: physical address for the map @@ -260,6 +265,7 @@ struct vdpa_map_file { * specific DMA translation (on-chip IOMMU) * and preferring incremental unmap. * @vdev: vdpa device + * @asid: address space identifier * @iova: iova to be unmapped * @size: size of the area * Returns integer: success (0) or error (< 0) @@ -313,10 +319,12 @@ struct vdpa_config_ops { struct vdpa_iova_range (*get_iova_range)(struct vdpa_device *vdev); /* DMA ops */ - int (*set_map)(struct vdpa_device *vdev, struct vhost_iotlb *iotlb); - int (*dma_map)(struct vdpa_device *vdev, u64 iova, u64 size, - u64 pa, u32 perm, void *opaque); - int (*dma_unmap)(struct vdpa_device *vdev, u64 iova, u64 size); + int (*set_map)(struct vdpa_device *vdev, unsigned int asid, + struct vhost_iotlb *iotlb); + int (*dma_map)(struct vdpa_device *vdev, unsigned int asid, + u64 iova, u64 size, u64 pa, u32 perm, void *opaque); + int (*dma_unmap)(struct vdpa_device *vdev, unsigned int asid, + u64 iova, u64 size); /* Free device resources */ void (*free)(struct vdpa_device *vdev); @@ -324,7 +332,7 @@ struct vdpa_config_ops { struct vdpa_device *__vdpa_alloc_device(struct device *parent, const struct vdpa_config_ops *config, - unsigned int ngroups, + unsigned int ngroups, unsigned int nas, size_t size, const char *name, bool use_va); @@ -336,17 +344,19 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, * @parent: the parent device * @config: the bus operations that is supported by this device * @ngroups: the number of virtqueue groups supported by this device + * @nas: the number of address spaces * @name: name of the vdpa device * @use_va: indicate whether virtual address must be used by this device * * Return allocated data structure or ERR_PTR upon error */ -#define vdpa_alloc_device(dev_struct, member, parent, config, ngroups, name, use_va) \ +#define vdpa_alloc_device(dev_struct, member, parent, config, ngroups, nas, \ + name, use_va) \ container_of((__vdpa_alloc_device( \ - parent, config, ngroups, \ - sizeof(dev_struct) + \ + parent, config, ngroups, nas, \ + (sizeof(dev_struct) + \ BUILD_BUG_ON_ZERO(offsetof( \ - dev_struct, member)), name, use_va)), \ + dev_struct, member))), name, use_va)), \ dev_struct, member) int vdpa_register_device(struct vdpa_device *vdev, u32 nvqs); -- cgit v1.2.3 From 1cb108994c6830cc6a6e066ad7d9a22ef59fa167 Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Wed, 30 Mar 2022 23:33:48 +0530 Subject: vhost_iotlb: split out IOTLB initialization This patch splits out IOTLB initialization to make sure it could be reused by external modules. Signed-off-by: Jason Wang Signed-off-by: Gautam Dawar Message-Id: <20220330180436.24644-9-gdawar@xilinx.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/iotlb.c | 23 ++++++++++++++++++----- include/linux/vhost_iotlb.h | 2 ++ 2 files changed, 20 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/vhost/iotlb.c b/drivers/vhost/iotlb.c index 5829cf2d0552..ea61330a3431 100644 --- a/drivers/vhost/iotlb.c +++ b/drivers/vhost/iotlb.c @@ -125,6 +125,23 @@ void vhost_iotlb_del_range(struct vhost_iotlb *iotlb, u64 start, u64 last) } EXPORT_SYMBOL_GPL(vhost_iotlb_del_range); +/** + * vhost_iotlb_init - initialize a vhost IOTLB + * @iotlb: the IOTLB that needs to be initialized + * @limit: maximum number of IOTLB entries + * @flags: VHOST_IOTLB_FLAG_XXX + */ +void vhost_iotlb_init(struct vhost_iotlb *iotlb, unsigned int limit, + unsigned int flags) +{ + iotlb->root = RB_ROOT_CACHED; + iotlb->limit = limit; + iotlb->nmaps = 0; + iotlb->flags = flags; + INIT_LIST_HEAD(&iotlb->list); +} +EXPORT_SYMBOL_GPL(vhost_iotlb_init); + /** * vhost_iotlb_alloc - add a new vhost IOTLB * @limit: maximum number of IOTLB entries @@ -139,11 +156,7 @@ struct vhost_iotlb *vhost_iotlb_alloc(unsigned int limit, unsigned int flags) if (!iotlb) return NULL; - iotlb->root = RB_ROOT_CACHED; - iotlb->limit = limit; - iotlb->nmaps = 0; - iotlb->flags = flags; - INIT_LIST_HEAD(&iotlb->list); + vhost_iotlb_init(iotlb, limit, flags); return iotlb; } diff --git a/include/linux/vhost_iotlb.h b/include/linux/vhost_iotlb.h index 2d0e2f52f938..e79a40838998 100644 --- a/include/linux/vhost_iotlb.h +++ b/include/linux/vhost_iotlb.h @@ -36,6 +36,8 @@ int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, u64 start, u64 last, u64 addr, unsigned int perm); void vhost_iotlb_del_range(struct vhost_iotlb *iotlb, u64 start, u64 last); +void vhost_iotlb_init(struct vhost_iotlb *iotlb, unsigned int limit, + unsigned int flags); struct vhost_iotlb *vhost_iotlb_alloc(unsigned int limit, unsigned int flags); void vhost_iotlb_free(struct vhost_iotlb *iotlb); void vhost_iotlb_reset(struct vhost_iotlb *iotlb); -- cgit v1.2.3 From 91233ad711866f4e375742d84ef3ed6aab9daa96 Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Wed, 30 Mar 2022 23:33:49 +0530 Subject: vhost: support ASID in IOTLB API This patches allows userspace to send ASID based IOTLB message to vhost. This idea is to use the reserved u32 field in the existing V2 IOTLB message. Vhost device should advertise this capability via VHOST_BACKEND_F_IOTLB_ASID backend feature. Signed-off-by: Jason Wang Signed-off-by: Gautam Dawar Message-Id: <20220330180436.24644-10-gdawar@xilinx.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 5 ++++- drivers/vhost/vhost.c | 23 ++++++++++++++++++----- drivers/vhost/vhost.h | 4 ++-- include/uapi/linux/vhost_types.h | 6 +++++- 4 files changed, 29 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 9202ff97ddb5..174c9e81df4e 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -870,7 +870,7 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, msg->perm); } -static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, +static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid, struct vhost_iotlb_msg *msg) { struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev); @@ -879,6 +879,9 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, struct vhost_iotlb *iotlb = v->iotlb; int r = 0; + if (asid != 0) + return -EINVAL; + mutex_lock(&dev->mutex); r = vhost_dev_check_owner(dev); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index d02173fb290c..d1e58f976f6e 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -468,7 +468,7 @@ void vhost_dev_init(struct vhost_dev *dev, struct vhost_virtqueue **vqs, int nvqs, int iov_limit, int weight, int byte_weight, bool use_worker, - int (*msg_handler)(struct vhost_dev *dev, + int (*msg_handler)(struct vhost_dev *dev, u32 asid, struct vhost_iotlb_msg *msg)) { struct vhost_virtqueue *vq; @@ -1090,11 +1090,14 @@ static bool umem_access_ok(u64 uaddr, u64 size, int access) return true; } -static int vhost_process_iotlb_msg(struct vhost_dev *dev, +static int vhost_process_iotlb_msg(struct vhost_dev *dev, u32 asid, struct vhost_iotlb_msg *msg) { int ret = 0; + if (asid != 0) + return -EINVAL; + mutex_lock(&dev->mutex); vhost_dev_lock_vqs(dev); switch (msg->type) { @@ -1141,6 +1144,7 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev, struct vhost_iotlb_msg msg; size_t offset; int type, ret; + u32 asid = 0; ret = copy_from_iter(&type, sizeof(type), from); if (ret != sizeof(type)) { @@ -1156,7 +1160,16 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev, offset = offsetof(struct vhost_msg, iotlb) - sizeof(int); break; case VHOST_IOTLB_MSG_V2: - offset = sizeof(__u32); + if (vhost_backend_has_feature(dev->vqs[0], + VHOST_BACKEND_F_IOTLB_ASID)) { + ret = copy_from_iter(&asid, sizeof(asid), from); + if (ret != sizeof(asid)) { + ret = -EINVAL; + goto done; + } + offset = sizeof(__u16); + } else + offset = sizeof(__u32); break; default: ret = -EINVAL; @@ -1178,9 +1191,9 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev, } if (dev->msg_handler) - ret = dev->msg_handler(dev, &msg); + ret = dev->msg_handler(dev, asid, &msg); else - ret = vhost_process_iotlb_msg(dev, &msg); + ret = vhost_process_iotlb_msg(dev, asid, &msg); if (ret) { ret = -EFAULT; goto done; diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 638bb640d6b4..9f238d6c7b58 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -161,7 +161,7 @@ struct vhost_dev { int byte_weight; u64 kcov_handle; bool use_worker; - int (*msg_handler)(struct vhost_dev *dev, + int (*msg_handler)(struct vhost_dev *dev, u32 asid, struct vhost_iotlb_msg *msg); }; @@ -169,7 +169,7 @@ bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len); void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs, int iov_limit, int weight, int byte_weight, bool use_worker, - int (*msg_handler)(struct vhost_dev *dev, + int (*msg_handler)(struct vhost_dev *dev, u32 asid, struct vhost_iotlb_msg *msg)); long vhost_dev_set_owner(struct vhost_dev *dev); bool vhost_dev_has_owner(struct vhost_dev *dev); diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h index 76ee7016c501..634cee485abb 100644 --- a/include/uapi/linux/vhost_types.h +++ b/include/uapi/linux/vhost_types.h @@ -87,7 +87,7 @@ struct vhost_msg { struct vhost_msg_v2 { __u32 type; - __u32 reserved; + __u32 asid; union { struct vhost_iotlb_msg iotlb; __u8 padding[64]; @@ -157,5 +157,9 @@ struct vhost_vdpa_iova_range { #define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1 /* IOTLB can accept batching hints */ #define VHOST_BACKEND_F_IOTLB_BATCH 0x2 +/* IOTLB can accept address space identifier through V2 type of IOTLB + * message + */ +#define VHOST_BACKEND_F_IOTLB_ASID 0x3 #endif -- cgit v1.2.3 From 3d5698793897a2b9c0060d899881d1a0591630d5 Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Wed, 30 Mar 2022 23:33:50 +0530 Subject: vhost-vdpa: introduce asid based IOTLB This patch converts the vhost-vDPA device to support multiple IOTLBs tagged via ASID via hlist. This will be used for supporting multiple address spaces in the following patches. Signed-off-by: Jason Wang Signed-off-by: Gautam Dawar Message-Id: <20220330180436.24644-11-gdawar@xilinx.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 97 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 72 insertions(+), 25 deletions(-) (limited to 'drivers') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 174c9e81df4e..cd1bee536c46 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -33,13 +33,21 @@ enum { #define VHOST_VDPA_DEV_MAX (1U << MINORBITS) +#define VHOST_VDPA_IOTLB_BUCKETS 16 + +struct vhost_vdpa_as { + struct hlist_node hash_link; + struct vhost_iotlb iotlb; + u32 id; +}; + struct vhost_vdpa { struct vhost_dev vdev; struct iommu_domain *domain; struct vhost_virtqueue *vqs; struct completion completion; struct vdpa_device *vdpa; - struct vhost_iotlb *iotlb; + struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS]; struct device dev; struct cdev cdev; atomic_t opened; @@ -55,6 +63,51 @@ static DEFINE_IDA(vhost_vdpa_ida); static dev_t vhost_vdpa_major; +static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32 asid) +{ + struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS]; + struct vhost_vdpa_as *as; + + hlist_for_each_entry(as, head, hash_link) + if (as->id == asid) + return as; + + return NULL; +} + +static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid) +{ + struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS]; + struct vhost_vdpa_as *as; + + if (asid_to_as(v, asid)) + return NULL; + + as = kmalloc(sizeof(*as), GFP_KERNEL); + if (!as) + return NULL; + + vhost_iotlb_init(&as->iotlb, 0, 0); + as->id = asid; + hlist_add_head(&as->hash_link, head); + + return as; +} + +static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid) +{ + struct vhost_vdpa_as *as = asid_to_as(v, asid); + + if (!as) + return -EINVAL; + + hlist_del(&as->hash_link); + vhost_iotlb_reset(&as->iotlb); + kfree(as); + + return 0; +} + static void handle_vq_kick(struct vhost_work *work) { struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, @@ -588,15 +641,6 @@ static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, return vhost_vdpa_pa_unmap(v, iotlb, start, last); } -static void vhost_vdpa_iotlb_free(struct vhost_vdpa *v) -{ - struct vhost_iotlb *iotlb = v->iotlb; - - vhost_vdpa_iotlb_unmap(v, iotlb, 0ULL, 0ULL - 1); - kfree(v->iotlb); - v->iotlb = NULL; -} - static int perm_to_iommu_flags(u32 perm) { int flags = 0; @@ -876,7 +920,8 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid, struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev); struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; - struct vhost_iotlb *iotlb = v->iotlb; + struct vhost_vdpa_as *as = asid_to_as(v, 0); + struct vhost_iotlb *iotlb = &as->iotlb; int r = 0; if (asid != 0) @@ -987,6 +1032,13 @@ static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v) } } +static void vhost_vdpa_cleanup(struct vhost_vdpa *v) +{ + vhost_dev_cleanup(&v->vdev); + kfree(v->vdev.vqs); + vhost_vdpa_remove_as(v, 0); +} + static int vhost_vdpa_open(struct inode *inode, struct file *filep) { struct vhost_vdpa *v; @@ -1020,15 +1072,12 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false, vhost_vdpa_process_iotlb_msg); - v->iotlb = vhost_iotlb_alloc(0, 0); - if (!v->iotlb) { - r = -ENOMEM; - goto err_init_iotlb; - } + if (!vhost_vdpa_alloc_as(v, 0)) + goto err_alloc_as; r = vhost_vdpa_alloc_domain(v); if (r) - goto err_alloc_domain; + goto err_alloc_as; vhost_vdpa_set_iova_range(v); @@ -1036,11 +1085,8 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) return 0; -err_alloc_domain: - vhost_vdpa_iotlb_free(v); -err_init_iotlb: - vhost_dev_cleanup(&v->vdev); - kfree(vqs); +err_alloc_as: + vhost_vdpa_cleanup(v); err: atomic_dec(&v->opened); return r; @@ -1064,11 +1110,9 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep) vhost_vdpa_clean_irq(v); vhost_vdpa_reset(v); vhost_dev_stop(&v->vdev); - vhost_vdpa_iotlb_free(v); vhost_vdpa_free_domain(v); vhost_vdpa_config_put(v); vhost_dev_cleanup(&v->vdev); - kfree(v->vdev.vqs); mutex_unlock(&d->mutex); atomic_dec(&v->opened); @@ -1164,7 +1208,7 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa) const struct vdpa_config_ops *ops = vdpa->config; struct vhost_vdpa *v; int minor; - int r; + int i, r; /* Only support 1 address space and 1 groups */ if (vdpa->ngroups != 1 || vdpa->nas != 1) @@ -1212,6 +1256,9 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa) init_completion(&v->completion); vdpa_set_drvdata(vdpa, v); + for (i = 0; i < VHOST_VDPA_IOTLB_BUCKETS; i++) + INIT_HLIST_HEAD(&v->as[i]); + return 0; err: -- cgit v1.2.3 From 3ace88bd37436abc84906312146fe5158a469142 Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Wed, 30 Mar 2022 23:33:51 +0530 Subject: vhost-vdpa: introduce uAPI to get the number of virtqueue groups Follows the vDPA support for multiple address spaces, this patch introduce uAPI for the userspace to know the number of virtqueue groups supported by the vDPA device. Signed-off-by: Jason Wang Signed-off-by: Gautam Dawar Message-Id: <20220330180436.24644-12-gdawar@xilinx.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 4 ++++ include/uapi/linux/vhost.h | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index cd1bee536c46..92f78df0f685 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -559,6 +559,10 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, case VHOST_VDPA_GET_VRING_NUM: r = vhost_vdpa_get_vring_num(v, argp); break; + case VHOST_VDPA_GET_GROUP_NUM: + r = copy_to_user(argp, &v->vdpa->ngroups, + sizeof(v->vdpa->ngroups)); + break; case VHOST_SET_LOG_BASE: case VHOST_SET_LOG_FD: r = -ENOIOCTLCMD; diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index 8f7b4a95d6f9..61317c61d768 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -145,11 +145,13 @@ /* Get the valid iova range */ #define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ struct vhost_vdpa_iova_range) - /* Get the config size */ #define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) /* Get the count of all virtqueues */ #define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) +/* Get the number of virtqueue groups. */ +#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32) + #endif -- cgit v1.2.3 From a0c95f201170bd559737d3cdc8a950aea62f29c6 Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Wed, 30 Mar 2022 23:33:52 +0530 Subject: vhost-vdpa: introduce uAPI to get the number of address spaces This patch introduces the uAPI for getting the number of address spaces supported by this vDPA device. Signed-off-by: Jason Wang Signed-off-by: Gautam Dawar Message-Id: <20220330180436.24644-13-gdawar@xilinx.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 3 +++ include/uapi/linux/vhost.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'drivers') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 92f78df0f685..a017011ad1f5 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -563,6 +563,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, r = copy_to_user(argp, &v->vdpa->ngroups, sizeof(v->vdpa->ngroups)); break; + case VHOST_VDPA_GET_AS_NUM: + r = copy_to_user(argp, &v->vdpa->nas, sizeof(v->vdpa->nas)); + break; case VHOST_SET_LOG_BASE: case VHOST_SET_LOG_FD: r = -ENOIOCTLCMD; diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index 61317c61d768..51322008901a 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -154,4 +154,6 @@ /* Get the number of virtqueue groups. */ #define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32) +/* Get the number of address spaces. */ +#define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int) #endif -- cgit v1.2.3 From 2d1fcb7758e49fd9caf150f3c70804b95b2ce80c Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Wed, 30 Mar 2022 23:33:53 +0530 Subject: vhost-vdpa: uAPI to get virtqueue group id Follows the support for virtqueue group in vDPA. This patches introduces uAPI to get the virtqueue group ID for a specific virtqueue in vhost-vdpa. Signed-off-by: Jason Wang Signed-off-by: Gautam Dawar Message-Id: <20220330180436.24644-14-gdawar@xilinx.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 8 ++++++++ include/uapi/linux/vhost.h | 8 ++++++++ 2 files changed, 16 insertions(+) (limited to 'drivers') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index a017011ad1f5..aa5cacdc5263 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -465,6 +465,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, return -EFAULT; ops->set_vq_ready(vdpa, idx, s.num); return 0; + case VHOST_VDPA_GET_VRING_GROUP: + s.index = idx; + s.num = ops->get_vq_group(vdpa, idx); + if (s.num >= vdpa->ngroups) + return -EIO; + else if (copy_to_user(argp, &s, sizeof(s))) + return -EFAULT; + return 0; case VHOST_GET_VRING_BASE: r = ops->get_vq_state(v->vdpa, idx, &vq_state); if (r) diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index 51322008901a..668914c87f74 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -156,4 +156,12 @@ /* Get the number of address spaces. */ #define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int) + +/* Get the group for a virtqueue: read index, write group in num, + * The virtqueue index is stored in the index field of + * vhost_vring_state. The group for this specific virtqueue is + * returned via num field of vhost_vring_state. + */ +#define VHOST_VDPA_GET_VRING_GROUP _IOWR(VHOST_VIRTIO, 0x7B, \ + struct vhost_vring_state) #endif -- cgit v1.2.3 From 84d7c8fd3aade2fe79313003ed06ede431ec2a6d Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Wed, 30 Mar 2022 23:33:54 +0530 Subject: vhost-vdpa: introduce uAPI to set group ASID Follows the vDPA support for associating ASID to a specific virtqueue group. This patch adds a uAPI to support setting them from userspace. Signed-off-by: Jason Wang Signed-off-by: Gautam Dawar Message-Id: <20220330180436.24644-15-gdawar@xilinx.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 8 ++++++++ include/uapi/linux/vhost.h | 7 +++++++ 2 files changed, 15 insertions(+) (limited to 'drivers') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index aa5cacdc5263..6c7ee0f18892 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -473,6 +473,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, else if (copy_to_user(argp, &s, sizeof(s))) return -EFAULT; return 0; + case VHOST_VDPA_SET_GROUP_ASID: + if (copy_from_user(&s, argp, sizeof(s))) + return -EFAULT; + if (s.num >= vdpa->nas) + return -EINVAL; + if (!ops->set_group_asid) + return -EOPNOTSUPP; + return ops->set_group_asid(vdpa, idx, s.num); case VHOST_GET_VRING_BASE: r = ops->get_vq_state(v->vdpa, idx, &vq_state); if (r) diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index 668914c87f74..cab645d4a645 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -164,4 +164,11 @@ */ #define VHOST_VDPA_GET_VRING_GROUP _IOWR(VHOST_VIRTIO, 0x7B, \ struct vhost_vring_state) +/* Set the ASID for a virtqueue group. The group index is stored in + * the index field of vhost_vring_state, the ASID associated with this + * group is stored at num field of vhost_vring_state. + */ +#define VHOST_VDPA_SET_GROUP_ASID _IOW(VHOST_VIRTIO, 0x7C, \ + struct vhost_vring_state) + #endif -- cgit v1.2.3 From aaca8373c4b1e010b8d748fc99d929de1bf860b8 Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Wed, 30 Mar 2022 23:33:55 +0530 Subject: vhost-vdpa: support ASID based IOTLB API This patch extends the vhost-vdpa to support ASID based IOTLB API. The vhost-vdpa device will allocated multiple IOTLBs for vDPA device that supports multiple address spaces. The IOTLBs and vDPA device memory mappings is determined and maintained through ASID. Note that we still don't support vDPA device with more than one address spaces that depends on platform IOMMU. This work will be done by moving the IOMMU logic from vhost-vDPA to vDPA device driver. Signed-off-by: Jason Wang Signed-off-by: Gautam Dawar Message-Id: <20220330180436.24644-16-gdawar@xilinx.com> Signed-off-by: Michael S. Tsirkin Includes fixup: vhost-vdpa: Fix some error handling path in vhost_vdpa_process_iotlb_msg() In the error paths introduced by the original patch, a mutex may be left locked. Add the correct goto instead of a direct return. Signed-off-by: Christophe JAILLET Message-Id: <89ef0ae4c26ac3cfa440c71e97e392dcb328ac1b.1653227924.git.christophe.jaillet@wanadoo.fr> Acked-by: Jason Wang Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 111 +++++++++++++++++++++++++++++++++++++++++--------- drivers/vhost/vhost.c | 2 +- 2 files changed, 93 insertions(+), 20 deletions(-) (limited to 'drivers') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 6c7ee0f18892..3e86080041fc 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -28,7 +28,8 @@ enum { VHOST_VDPA_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) | - (1ULL << VHOST_BACKEND_F_IOTLB_BATCH), + (1ULL << VHOST_BACKEND_F_IOTLB_BATCH) | + (1ULL << VHOST_BACKEND_F_IOTLB_ASID), }; #define VHOST_VDPA_DEV_MAX (1U << MINORBITS) @@ -57,12 +58,20 @@ struct vhost_vdpa { struct eventfd_ctx *config_ctx; int in_batch; struct vdpa_iova_range range; + u32 batch_asid; }; static DEFINE_IDA(vhost_vdpa_ida); static dev_t vhost_vdpa_major; +static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb) +{ + struct vhost_vdpa_as *as = container_of(iotlb, struct + vhost_vdpa_as, iotlb); + return as->id; +} + static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32 asid) { struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS]; @@ -75,6 +84,16 @@ static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32 asid) return NULL; } +static struct vhost_iotlb *asid_to_iotlb(struct vhost_vdpa *v, u32 asid) +{ + struct vhost_vdpa_as *as = asid_to_as(v, asid); + + if (!as) + return NULL; + + return &as->iotlb; +} + static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid) { struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS]; @@ -83,6 +102,9 @@ static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid) if (asid_to_as(v, asid)) return NULL; + if (asid >= v->vdpa->nas) + return NULL; + as = kmalloc(sizeof(*as), GFP_KERNEL); if (!as) return NULL; @@ -94,6 +116,17 @@ static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid) return as; } +static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v, + u32 asid) +{ + struct vhost_vdpa_as *as = asid_to_as(v, asid); + + if (as) + return as; + + return vhost_vdpa_alloc_as(v, asid); +} + static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid) { struct vhost_vdpa_as *as = asid_to_as(v, asid); @@ -692,6 +725,7 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, struct vhost_dev *dev = &v->vdev; struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; + u32 asid = iotlb_to_asid(iotlb); int r = 0; r = vhost_iotlb_add_range_ctx(iotlb, iova, iova + size - 1, @@ -700,10 +734,10 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, return r; if (ops->dma_map) { - r = ops->dma_map(vdpa, 0, iova, size, pa, perm, opaque); + r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque); } else if (ops->set_map) { if (!v->in_batch) - r = ops->set_map(vdpa, 0, iotlb); + r = ops->set_map(vdpa, asid, iotlb); } else { r = iommu_map(v->domain, iova, pa, size, perm_to_iommu_flags(perm)); @@ -725,17 +759,24 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, { struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; + u32 asid = iotlb_to_asid(iotlb); vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1); if (ops->dma_map) { - ops->dma_unmap(vdpa, 0, iova, size); + ops->dma_unmap(vdpa, asid, iova, size); } else if (ops->set_map) { if (!v->in_batch) - ops->set_map(vdpa, 0, iotlb); + ops->set_map(vdpa, asid, iotlb); } else { iommu_unmap(v->domain, iova, size); } + + /* If we are in the middle of batch processing, delay the free + * of AS until BATCH_END. + */ + if (!v->in_batch && !iotlb->nmaps) + vhost_vdpa_remove_as(v, asid); } static int vhost_vdpa_va_map(struct vhost_vdpa *v, @@ -943,19 +984,40 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid, struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev); struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; - struct vhost_vdpa_as *as = asid_to_as(v, 0); - struct vhost_iotlb *iotlb = &as->iotlb; + struct vhost_iotlb *iotlb = NULL; + struct vhost_vdpa_as *as = NULL; int r = 0; - if (asid != 0) - return -EINVAL; - mutex_lock(&dev->mutex); r = vhost_dev_check_owner(dev); if (r) goto unlock; + if (msg->type == VHOST_IOTLB_UPDATE || + msg->type == VHOST_IOTLB_BATCH_BEGIN) { + as = vhost_vdpa_find_alloc_as(v, asid); + if (!as) { + dev_err(&v->dev, "can't find and alloc asid %d\n", + asid); + r = -EINVAL; + goto unlock; + } + iotlb = &as->iotlb; + } else + iotlb = asid_to_iotlb(v, asid); + + if ((v->in_batch && v->batch_asid != asid) || !iotlb) { + if (v->in_batch && v->batch_asid != asid) { + dev_info(&v->dev, "batch id %d asid %d\n", + v->batch_asid, asid); + } + if (!iotlb) + dev_err(&v->dev, "no iotlb for asid %d\n", asid); + r = -EINVAL; + goto unlock; + } + switch (msg->type) { case VHOST_IOTLB_UPDATE: r = vhost_vdpa_process_iotlb_update(v, iotlb, msg); @@ -964,12 +1026,15 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid, vhost_vdpa_unmap(v, iotlb, msg->iova, msg->size); break; case VHOST_IOTLB_BATCH_BEGIN: + v->batch_asid = asid; v->in_batch = true; break; case VHOST_IOTLB_BATCH_END: if (v->in_batch && ops->set_map) - ops->set_map(vdpa, 0, iotlb); + ops->set_map(vdpa, asid, iotlb); v->in_batch = false; + if (!iotlb->nmaps) + vhost_vdpa_remove_as(v, asid); break; default: r = -EINVAL; @@ -1057,9 +1122,17 @@ static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v) static void vhost_vdpa_cleanup(struct vhost_vdpa *v) { + struct vhost_vdpa_as *as; + u32 asid; + vhost_dev_cleanup(&v->vdev); kfree(v->vdev.vqs); - vhost_vdpa_remove_as(v, 0); + + for (asid = 0; asid < v->vdpa->nas; asid++) { + as = asid_to_as(v, asid); + if (as) + vhost_vdpa_remove_as(v, asid); + } } static int vhost_vdpa_open(struct inode *inode, struct file *filep) @@ -1095,12 +1168,9 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false, vhost_vdpa_process_iotlb_msg); - if (!vhost_vdpa_alloc_as(v, 0)) - goto err_alloc_as; - r = vhost_vdpa_alloc_domain(v); if (r) - goto err_alloc_as; + goto err_alloc_domain; vhost_vdpa_set_iova_range(v); @@ -1108,7 +1178,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) return 0; -err_alloc_as: +err_alloc_domain: vhost_vdpa_cleanup(v); err: atomic_dec(&v->opened); @@ -1233,8 +1303,11 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa) int minor; int i, r; - /* Only support 1 address space and 1 groups */ - if (vdpa->ngroups != 1 || vdpa->nas != 1) + /* We can't support platform IOMMU device with more than 1 + * group or as + */ + if (!ops->set_map && !ops->dma_map && + (vdpa->ngroups > 1 || vdpa->nas > 1)) return -EOPNOTSUPP; v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index d1e58f976f6e..5022c648d9c0 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1167,7 +1167,7 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev, ret = -EINVAL; goto done; } - offset = sizeof(__u16); + offset = 0; } else offset = sizeof(__u32); break; -- cgit v1.2.3 From 05b6976212d4cffde60ff53716c40809ebdf4aee Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Wed, 30 Mar 2022 23:33:56 +0530 Subject: vdpa_sim: advertise VIRTIO_NET_F_MTU We've already reported maximum mtu via config space, so let's advertise the feature. Signed-off-by: Jason Wang Signed-off-by: Gautam Dawar Message-Id: <20220330180436.24644-17-gdawar@xilinx.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c index d5324f6fd8c7..2d1d8c59d0ea 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -26,7 +26,8 @@ #define DRV_LICENSE "GPL v2" #define VDPASIM_NET_FEATURES (VDPASIM_FEATURES | \ - (1ULL << VIRTIO_NET_F_MAC)) + (1ULL << VIRTIO_NET_F_MAC) | \ + (1ULL << VIRTIO_NET_F_MTU)) #define VDPASIM_NET_VQ_NUM 2 -- cgit v1.2.3 From ec103d983bb56037a147dcdd9798cfa4fcdab126 Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Wed, 30 Mar 2022 23:33:57 +0530 Subject: vdpa_sim: factor out buffer completion logic Wrap up common buffer completion logic in to vdpasim_net_complete Signed-off-by: Jason Wang Signed-off-by: Gautam Dawar Message-Id: <20220330180436.24644-18-gdawar@xilinx.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c index 2d1d8c59d0ea..f4607172b0b8 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -31,6 +31,22 @@ #define VDPASIM_NET_VQ_NUM 2 +static void vdpasim_net_complete(struct vdpasim_virtqueue *vq, size_t len) +{ + /* Make sure data is wrote before advancing index */ + smp_wmb(); + + vringh_complete_iotlb(&vq->vring, vq->head, len); + + /* Make sure used is visible before rasing the interrupt. */ + smp_wmb(); + + local_bh_disable(); + if (vringh_need_notify_iotlb(&vq->vring) > 0) + vringh_notify(&vq->vring); + local_bh_enable(); +} + static void vdpasim_net_work(struct work_struct *work) { struct vdpasim *vdpasim = container_of(work, struct vdpasim, work); @@ -78,21 +94,8 @@ static void vdpasim_net_work(struct work_struct *work) total_write += write; } - /* Make sure data is wrote before advancing index */ - smp_wmb(); - - vringh_complete_iotlb(&txq->vring, txq->head, 0); - vringh_complete_iotlb(&rxq->vring, rxq->head, total_write); - - /* Make sure used is visible before rasing the interrupt. */ - smp_wmb(); - - local_bh_disable(); - if (vringh_need_notify_iotlb(&txq->vring) > 0) - vringh_notify(&txq->vring); - if (vringh_need_notify_iotlb(&rxq->vring) > 0) - vringh_notify(&rxq->vring); - local_bh_enable(); + vdpasim_net_complete(txq, 0); + vdpasim_net_complete(rxq, total_write); if (++pkts > 4) { schedule_work(&vdpasim->work); -- cgit v1.2.3 From cfe226892913a448e83e7a19db93862baa3cb99c Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Wed, 30 Mar 2022 23:33:58 +0530 Subject: vdpa_sim: filter destination mac address This patch implements a simple unicast filter for vDPA simulator. Signed-off-by: Jason Wang Signed-off-by: Gautam Dawar Message-Id: <20220330180436.24644-19-gdawar@xilinx.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 49 +++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 18 deletions(-) (limited to 'drivers') diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c index f4607172b0b8..5fa59d4fddc8 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -47,13 +47,28 @@ static void vdpasim_net_complete(struct vdpasim_virtqueue *vq, size_t len) local_bh_enable(); } +static bool receive_filter(struct vdpasim *vdpasim, size_t len) +{ + bool modern = vdpasim->features & (1ULL << VIRTIO_F_VERSION_1); + size_t hdr_len = modern ? sizeof(struct virtio_net_hdr_v1) : + sizeof(struct virtio_net_hdr); + struct virtio_net_config *vio_config = vdpasim->config; + + if (len < ETH_ALEN + hdr_len) + return false; + + if (!strncmp(vdpasim->buffer + hdr_len, vio_config->mac, ETH_ALEN)) + return true; + + return false; +} + static void vdpasim_net_work(struct work_struct *work) { struct vdpasim *vdpasim = container_of(work, struct vdpasim, work); struct vdpasim_virtqueue *txq = &vdpasim->vqs[1]; struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0]; ssize_t read, write; - size_t total_write; int pkts = 0; int err; @@ -66,36 +81,34 @@ static void vdpasim_net_work(struct work_struct *work) goto out; while (true) { - total_write = 0; err = vringh_getdesc_iotlb(&txq->vring, &txq->out_iov, NULL, &txq->head, GFP_ATOMIC); if (err <= 0) break; + read = vringh_iov_pull_iotlb(&txq->vring, &txq->out_iov, + vdpasim->buffer, + PAGE_SIZE); + + if (!receive_filter(vdpasim, read)) { + vdpasim_net_complete(txq, 0); + continue; + } + err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->in_iov, &rxq->head, GFP_ATOMIC); if (err <= 0) { - vringh_complete_iotlb(&txq->vring, txq->head, 0); + vdpasim_net_complete(txq, 0); break; } - while (true) { - read = vringh_iov_pull_iotlb(&txq->vring, &txq->out_iov, - vdpasim->buffer, - PAGE_SIZE); - if (read <= 0) - break; - - write = vringh_iov_push_iotlb(&rxq->vring, &rxq->in_iov, - vdpasim->buffer, read); - if (write <= 0) - break; - - total_write += write; - } + write = vringh_iov_push_iotlb(&rxq->vring, &rxq->in_iov, + vdpasim->buffer, read); + if (write <= 0) + break; vdpasim_net_complete(txq, 0); - vdpasim_net_complete(rxq, total_write); + vdpasim_net_complete(rxq, write); if (++pkts > 4) { schedule_work(&vdpasim->work); -- cgit v1.2.3 From bda324fd037a6b0d44da5699574ce741ca161bc4 Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Wed, 30 Mar 2022 23:33:59 +0530 Subject: vdpasim: control virtqueue support This patch introduces the control virtqueue support for vDPA simulator. This is a requirement for supporting advanced features like multiqueue. A requirement for control virtqueue is to isolate its memory access from the rx/tx virtqueues. This is because when using vDPA device for VM, the control virqueue is not directly assigned to VM. Userspace (Qemu) will present a shadow control virtqueue to control for recording the device states. The isolation is done via the virtqueue groups and ASID support in vDPA through vhost-vdpa. The simulator is extended to have: 1) three virtqueues: RXVQ, TXVQ and CVQ (control virtqueue) 2) two virtqueue groups: group 0 contains RXVQ and TXVQ; group 1 contains CVQ 3) two address spaces and the simulator simply implements the address spaces by mapping it 1:1 to IOTLB. For the VM use cases, userspace(Qemu) may set AS 0 to group 0 and AS 1 to group 1. So we have: 1) The IOTLB for virtqueue group 0 contains the mappings of guest, so RX and TX can be assigned to guest directly. 2) The IOTLB for virtqueue group 1 contains the mappings of CVQ which is the buffers that allocated and managed by VMM only. So CVQ of vhost-vdpa is visible to VMM only. And Guest can not access the CVQ of vhost-vdpa. For the other use cases, since AS 0 is associated to all virtqueue groups by default. All virtqueues share the same mapping by default. To demonstrate the function, VIRITO_NET_F_CTRL_MACADDR is implemented in the simulator for the driver to set mac address. Signed-off-by: Jason Wang Signed-off-by: Gautam Dawar Message-Id: <20220330180436.24644-20-gdawar@xilinx.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 91 +++++++++++++++++++++++++++++------- drivers/vdpa/vdpa_sim/vdpa_sim.h | 2 + drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 88 +++++++++++++++++++++++++++++++++- 3 files changed, 161 insertions(+), 20 deletions(-) (limited to 'drivers') diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 659e2e2e4b0c..51bd0bafce06 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -96,11 +96,17 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim) { int i; - for (i = 0; i < vdpasim->dev_attr.nvqs; i++) + spin_lock(&vdpasim->iommu_lock); + + for (i = 0; i < vdpasim->dev_attr.nvqs; i++) { vdpasim_vq_reset(vdpasim, &vdpasim->vqs[i]); + vringh_set_iotlb(&vdpasim->vqs[i].vring, &vdpasim->iommu[0], + &vdpasim->iommu_lock); + } + + for (i = 0; i < vdpasim->dev_attr.nas; i++) + vhost_iotlb_reset(&vdpasim->iommu[i]); - spin_lock(&vdpasim->iommu_lock); - vhost_iotlb_reset(vdpasim->iommu); spin_unlock(&vdpasim->iommu_lock); vdpasim->features = 0; @@ -145,7 +151,7 @@ static dma_addr_t vdpasim_map_range(struct vdpasim *vdpasim, phys_addr_t paddr, dma_addr = iova_dma_addr(&vdpasim->iova, iova); spin_lock(&vdpasim->iommu_lock); - ret = vhost_iotlb_add_range(vdpasim->iommu, (u64)dma_addr, + ret = vhost_iotlb_add_range(&vdpasim->iommu[0], (u64)dma_addr, (u64)dma_addr + size - 1, (u64)paddr, perm); spin_unlock(&vdpasim->iommu_lock); @@ -161,7 +167,7 @@ static void vdpasim_unmap_range(struct vdpasim *vdpasim, dma_addr_t dma_addr, size_t size) { spin_lock(&vdpasim->iommu_lock); - vhost_iotlb_del_range(vdpasim->iommu, (u64)dma_addr, + vhost_iotlb_del_range(&vdpasim->iommu[0], (u64)dma_addr, (u64)dma_addr + size - 1); spin_unlock(&vdpasim->iommu_lock); @@ -250,8 +256,9 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) else ops = &vdpasim_config_ops; - vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, 1, - 1, dev_attr->name, false); + vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, + dev_attr->ngroups, dev_attr->nas, + dev_attr->name, false); if (IS_ERR(vdpasim)) { ret = PTR_ERR(vdpasim); goto err_alloc; @@ -278,16 +285,20 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) if (!vdpasim->vqs) goto err_iommu; - vdpasim->iommu = vhost_iotlb_alloc(max_iotlb_entries, 0); + vdpasim->iommu = kmalloc_array(vdpasim->dev_attr.nas, + sizeof(*vdpasim->iommu), GFP_KERNEL); if (!vdpasim->iommu) goto err_iommu; + for (i = 0; i < vdpasim->dev_attr.nas; i++) + vhost_iotlb_init(&vdpasim->iommu[i], 0, 0); + vdpasim->buffer = kvmalloc(dev_attr->buffer_size, GFP_KERNEL); if (!vdpasim->buffer) goto err_iommu; for (i = 0; i < dev_attr->nvqs; i++) - vringh_set_iotlb(&vdpasim->vqs[i].vring, vdpasim->iommu, + vringh_set_iotlb(&vdpasim->vqs[i].vring, &vdpasim->iommu[0], &vdpasim->iommu_lock); ret = iova_cache_get(); @@ -401,7 +412,11 @@ static u32 vdpasim_get_vq_align(struct vdpa_device *vdpa) static u32 vdpasim_get_vq_group(struct vdpa_device *vdpa, u16 idx) { - return 0; + /* RX and TX belongs to group 0, CVQ belongs to group 1 */ + if (idx == 2) + return 1; + else + return 0; } static u64 vdpasim_get_device_features(struct vdpa_device *vdpa) @@ -539,20 +554,53 @@ static struct vdpa_iova_range vdpasim_get_iova_range(struct vdpa_device *vdpa) return range; } +static int vdpasim_set_group_asid(struct vdpa_device *vdpa, unsigned int group, + unsigned int asid) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vhost_iotlb *iommu; + int i; + + if (group > vdpasim->dev_attr.ngroups) + return -EINVAL; + + if (asid > vdpasim->dev_attr.nas) + return -EINVAL; + + iommu = &vdpasim->iommu[asid]; + + spin_lock(&vdpasim->lock); + + for (i = 0; i < vdpasim->dev_attr.nvqs; i++) + if (vdpasim_get_vq_group(vdpa, i) == group) + vringh_set_iotlb(&vdpasim->vqs[i].vring, iommu, + &vdpasim->iommu_lock); + + spin_unlock(&vdpasim->lock); + + return 0; +} + static int vdpasim_set_map(struct vdpa_device *vdpa, unsigned int asid, struct vhost_iotlb *iotlb) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); struct vhost_iotlb_map *map; + struct vhost_iotlb *iommu; u64 start = 0ULL, last = 0ULL - 1; int ret; + if (asid >= vdpasim->dev_attr.nas) + return -EINVAL; + spin_lock(&vdpasim->iommu_lock); - vhost_iotlb_reset(vdpasim->iommu); + + iommu = &vdpasim->iommu[asid]; + vhost_iotlb_reset(iommu); for (map = vhost_iotlb_itree_first(iotlb, start, last); map; map = vhost_iotlb_itree_next(map, start, last)) { - ret = vhost_iotlb_add_range(vdpasim->iommu, map->start, + ret = vhost_iotlb_add_range(iommu, map->start, map->last, map->addr, map->perm); if (ret) goto err; @@ -561,7 +609,7 @@ static int vdpasim_set_map(struct vdpa_device *vdpa, unsigned int asid, return 0; err: - vhost_iotlb_reset(vdpasim->iommu); + vhost_iotlb_reset(iommu); spin_unlock(&vdpasim->iommu_lock); return ret; } @@ -573,9 +621,12 @@ static int vdpasim_dma_map(struct vdpa_device *vdpa, unsigned int asid, struct vdpasim *vdpasim = vdpa_to_sim(vdpa); int ret; + if (asid >= vdpasim->dev_attr.nas) + return -EINVAL; + spin_lock(&vdpasim->iommu_lock); - ret = vhost_iotlb_add_range_ctx(vdpasim->iommu, iova, iova + size - 1, - pa, perm, opaque); + ret = vhost_iotlb_add_range_ctx(&vdpasim->iommu[asid], iova, + iova + size - 1, pa, perm, opaque); spin_unlock(&vdpasim->iommu_lock); return ret; @@ -586,8 +637,11 @@ static int vdpasim_dma_unmap(struct vdpa_device *vdpa, unsigned int asid, { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + if (asid >= vdpasim->dev_attr.nas) + return -EINVAL; + spin_lock(&vdpasim->iommu_lock); - vhost_iotlb_del_range(vdpasim->iommu, iova, iova + size - 1); + vhost_iotlb_del_range(&vdpasim->iommu[asid], iova, iova + size - 1); spin_unlock(&vdpasim->iommu_lock); return 0; @@ -611,8 +665,7 @@ static void vdpasim_free(struct vdpa_device *vdpa) } kvfree(vdpasim->buffer); - if (vdpasim->iommu) - vhost_iotlb_free(vdpasim->iommu); + vhost_iotlb_free(vdpasim->iommu); kfree(vdpasim->vqs); kfree(vdpasim->config); } @@ -643,6 +696,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = { .set_config = vdpasim_set_config, .get_generation = vdpasim_get_generation, .get_iova_range = vdpasim_get_iova_range, + .set_group_asid = vdpasim_set_group_asid, .dma_map = vdpasim_dma_map, .dma_unmap = vdpasim_dma_unmap, .free = vdpasim_free, @@ -674,6 +728,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = { .set_config = vdpasim_set_config, .get_generation = vdpasim_get_generation, .get_iova_range = vdpasim_get_iova_range, + .set_group_asid = vdpasim_set_group_asid, .set_map = vdpasim_set_map, .free = vdpasim_free, }; diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h index 0be7c1e7ef80..622782e92239 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.h +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h @@ -41,6 +41,8 @@ struct vdpasim_dev_attr { size_t buffer_size; int nvqs; u32 id; + u32 ngroups; + u32 nas; work_func_t work_fn; void (*get_config)(struct vdpasim *vdpasim, void *config); diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c index 5fa59d4fddc8..5125976a4df8 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -27,9 +27,14 @@ #define VDPASIM_NET_FEATURES (VDPASIM_FEATURES | \ (1ULL << VIRTIO_NET_F_MAC) | \ - (1ULL << VIRTIO_NET_F_MTU)) + (1ULL << VIRTIO_NET_F_MTU) | \ + (1ULL << VIRTIO_NET_F_CTRL_VQ) | \ + (1ULL << VIRTIO_NET_F_CTRL_MAC_ADDR)) -#define VDPASIM_NET_VQ_NUM 2 +/* 3 virtqueues, 2 address spaces, 2 virtqueue groups */ +#define VDPASIM_NET_VQ_NUM 3 +#define VDPASIM_NET_AS_NUM 2 +#define VDPASIM_NET_GROUP_NUM 2 static void vdpasim_net_complete(struct vdpasim_virtqueue *vq, size_t len) { @@ -63,6 +68,81 @@ static bool receive_filter(struct vdpasim *vdpasim, size_t len) return false; } +static virtio_net_ctrl_ack vdpasim_handle_ctrl_mac(struct vdpasim *vdpasim, + u8 cmd) +{ + struct virtio_net_config *vio_config = vdpasim->config; + struct vdpasim_virtqueue *cvq = &vdpasim->vqs[2]; + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; + size_t read; + + switch (cmd) { + case VIRTIO_NET_CTRL_MAC_ADDR_SET: + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->in_iov, + vio_config->mac, ETH_ALEN); + if (read == ETH_ALEN) + status = VIRTIO_NET_OK; + break; + default: + break; + } + + return status; +} + +static void vdpasim_handle_cvq(struct vdpasim *vdpasim) +{ + struct vdpasim_virtqueue *cvq = &vdpasim->vqs[2]; + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; + struct virtio_net_ctrl_hdr ctrl; + size_t read, write; + int err; + + if (!(vdpasim->features & (1ULL << VIRTIO_NET_F_CTRL_VQ))) + return; + + if (!cvq->ready) + return; + + while (true) { + err = vringh_getdesc_iotlb(&cvq->vring, &cvq->in_iov, + &cvq->out_iov, + &cvq->head, GFP_ATOMIC); + if (err <= 0) + break; + + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->in_iov, &ctrl, + sizeof(ctrl)); + if (read != sizeof(ctrl)) + break; + + switch (ctrl.class) { + case VIRTIO_NET_CTRL_MAC: + status = vdpasim_handle_ctrl_mac(vdpasim, ctrl.cmd); + break; + default: + break; + } + + /* Make sure data is wrote before advancing index */ + smp_wmb(); + + write = vringh_iov_push_iotlb(&cvq->vring, &cvq->out_iov, + &status, sizeof(status)); + vringh_complete_iotlb(&cvq->vring, cvq->head, write); + vringh_kiov_cleanup(&cvq->in_iov); + vringh_kiov_cleanup(&cvq->out_iov); + + /* Make sure used is visible before rasing the interrupt. */ + smp_wmb(); + + local_bh_disable(); + if (cvq->cb) + cvq->cb(cvq->private); + local_bh_enable(); + } +} + static void vdpasim_net_work(struct work_struct *work) { struct vdpasim *vdpasim = container_of(work, struct vdpasim, work); @@ -77,6 +157,8 @@ static void vdpasim_net_work(struct work_struct *work) if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) goto out; + vdpasim_handle_cvq(vdpasim); + if (!txq->ready || !rxq->ready) goto out; @@ -162,6 +244,8 @@ static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, dev_attr.id = VIRTIO_ID_NET; dev_attr.supported_features = VDPASIM_NET_FEATURES; dev_attr.nvqs = VDPASIM_NET_VQ_NUM; + dev_attr.ngroups = VDPASIM_NET_GROUP_NUM; + dev_attr.nas = VDPASIM_NET_AS_NUM; dev_attr.config_size = sizeof(struct virtio_net_config); dev_attr.get_config = vdpasim_net_get_config; dev_attr.work_fn = vdpasim_net_work; -- cgit v1.2.3 From 7a836a2aba09479c8e71fa43249eecc4af945f61 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 3 Apr 2022 11:11:14 +0200 Subject: virtio: pci: Fix an error handling path in vp_modern_probe() If an error occurs after a successful pci_request_selected_regions() call, it should be undone by a corresponding pci_release_selected_regions() call, as already done in vp_modern_remove(). Fixes: fd502729fbbf ("virtio-pci: introduce modern device module") Signed-off-by: Christophe JAILLET Message-Id: <237109725aad2c3c03d14549f777b1927c84b045.1648977064.git.christophe.jaillet@wanadoo.fr> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_modern_dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c index 591738ad3d56..4093f9cca7a6 100644 --- a/drivers/virtio/virtio_pci_modern_dev.c +++ b/drivers/virtio/virtio_pci_modern_dev.c @@ -347,6 +347,7 @@ err_map_notify: err_map_isr: pci_iounmap(pci_dev, mdev->common); err_map_common: + pci_release_selected_regions(pci_dev, mdev->modern_bars); return err; } EXPORT_SYMBOL_GPL(vp_modern_probe); -- cgit v1.2.3 From 7e415282b41bf0d15c6e0fe268f822d9b083f2f7 Mon Sep 17 00:00:00 2001 From: Murilo Opsfelder Araujo Date: Thu, 14 Apr 2022 23:30:02 -0300 Subject: virtio-pci: Remove wrong address verification in vp_del_vqs() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCC 12 enhanced -Waddress when comparing array address to null [0], which warns: drivers/virtio/virtio_pci_common.c: In function ‘vp_del_vqs’: drivers/virtio/virtio_pci_common.c:257:29: warning: the comparison will always evaluate as ‘true’ for the pointer operand in ‘vp_dev->msix_affinity_masks + (sizetype)((long unsigned int)i * 256)’ must not be NULL [-Waddress] 257 | if (vp_dev->msix_affinity_masks[i]) | ^~~~~~ In fact, the verification is comparing the result of a pointer arithmetic, the address "msix_affinity_masks + i", which will always evaluate to true. Under the hood, free_cpumask_var() calls kfree(), which is safe to pass NULL, not requiring non-null verification. So remove the verification to make compiler happy (happy compiler, happy life). [0] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102103 Signed-off-by: Murilo Opsfelder Araujo Message-Id: <20220415023002.49805-1-muriloo@linux.ibm.com> Signed-off-by: Michael S. Tsirkin Acked-by: Christophe de Dinechin --- drivers/virtio/virtio_pci_common.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index d724f676608b..5046efcffb4c 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -254,8 +254,7 @@ void vp_del_vqs(struct virtio_device *vdev) if (vp_dev->msix_affinity_masks) { for (i = 0; i < vp_dev->msix_vectors; i++) - if (vp_dev->msix_affinity_masks[i]) - free_cpumask_var(vp_dev->msix_affinity_masks[i]); + free_cpumask_var(vp_dev->msix_affinity_masks[i]); } if (vp_dev->msix_enabled) { -- cgit v1.2.3 From 6fd763d155860eb7ea3a93c8b3bf926940ffa3fb Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Fri, 6 May 2022 21:16:23 +0800 Subject: virtio-crypto: change code style Use temporary variable to make code easy to read and maintain. /* Pad cipher's parameters */ vcrypto->ctrl.u.sym_create_session.op_type = cpu_to_le32(VIRTIO_CRYPTO_SYM_OP_CIPHER); vcrypto->ctrl.u.sym_create_session.u.cipher.para.algo = vcrypto->ctrl.header.algo; vcrypto->ctrl.u.sym_create_session.u.cipher.para.keylen = cpu_to_le32(keylen); vcrypto->ctrl.u.sym_create_session.u.cipher.para.op = cpu_to_le32(op); --> sym_create_session = &ctrl->u.sym_create_session; sym_create_session->op_type = cpu_to_le32(VIRTIO_CRYPTO_SYM_OP_CIPHER); sym_create_session->u.cipher.para.algo = ctrl->header.algo; sym_create_session->u.cipher.para.keylen = cpu_to_le32(keylen); sym_create_session->u.cipher.para.op = cpu_to_le32(op); The new style shows more obviously: - the variable we want to operate. - an assignment statement in a single line. Cc: Michael S. Tsirkin Cc: Jason Wang Cc: Gonglei Reviewed-by: Gonglei Signed-off-by: zhenwei pi Message-Id: <20220506131627.180784-2-pizhenwei@bytedance.com> Signed-off-by: Michael S. Tsirkin --- .../crypto/virtio/virtio_crypto_akcipher_algs.c | 40 +++++++----- .../crypto/virtio/virtio_crypto_skcipher_algs.c | 72 +++++++++++----------- 2 files changed, 59 insertions(+), 53 deletions(-) (limited to 'drivers') diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c index f3ec9420215e..20901a263fc8 100644 --- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c @@ -106,23 +106,27 @@ static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher unsigned int inlen; int err; unsigned int num_out = 0, num_in = 0; + struct virtio_crypto_op_ctrl_req *ctrl; + struct virtio_crypto_session_input *input; pkey = kmemdup(key, keylen, GFP_ATOMIC); if (!pkey) return -ENOMEM; spin_lock(&vcrypto->ctrl_lock); - memcpy(&vcrypto->ctrl.header, header, sizeof(vcrypto->ctrl.header)); - memcpy(&vcrypto->ctrl.u, para, sizeof(vcrypto->ctrl.u)); - vcrypto->input.status = cpu_to_le32(VIRTIO_CRYPTO_ERR); + ctrl = &vcrypto->ctrl; + memcpy(&ctrl->header, header, sizeof(ctrl->header)); + memcpy(&ctrl->u, para, sizeof(ctrl->u)); + input = &vcrypto->input; + input->status = cpu_to_le32(VIRTIO_CRYPTO_ERR); - sg_init_one(&outhdr_sg, &vcrypto->ctrl, sizeof(vcrypto->ctrl)); + sg_init_one(&outhdr_sg, ctrl, sizeof(*ctrl)); sgs[num_out++] = &outhdr_sg; sg_init_one(&key_sg, pkey, keylen); sgs[num_out++] = &key_sg; - sg_init_one(&inhdr_sg, &vcrypto->input, sizeof(vcrypto->input)); + sg_init_one(&inhdr_sg, input, sizeof(*input)); sgs[num_out + num_in++] = &inhdr_sg; err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, num_out, num_in, vcrypto, GFP_ATOMIC); @@ -134,12 +138,12 @@ static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher !virtqueue_is_broken(vcrypto->ctrl_vq)) cpu_relax(); - if (le32_to_cpu(vcrypto->input.status) != VIRTIO_CRYPTO_OK) { + if (le32_to_cpu(input->status) != VIRTIO_CRYPTO_OK) { err = -EINVAL; goto out; } - ctx->session_id = le64_to_cpu(vcrypto->input.session_id); + ctx->session_id = le64_to_cpu(input->session_id); ctx->session_valid = true; err = 0; @@ -149,7 +153,7 @@ out: if (err < 0) pr_err("virtio_crypto: Create session failed status: %u\n", - le32_to_cpu(vcrypto->input.status)); + le32_to_cpu(input->status)); return err; } @@ -161,23 +165,27 @@ static int virtio_crypto_alg_akcipher_close_session(struct virtio_crypto_akciphe struct virtio_crypto *vcrypto = ctx->vcrypto; unsigned int num_out = 0, num_in = 0, inlen; int err; + struct virtio_crypto_op_ctrl_req *ctrl; + struct virtio_crypto_inhdr *ctrl_status; spin_lock(&vcrypto->ctrl_lock); if (!ctx->session_valid) { err = 0; goto out; } - vcrypto->ctrl_status.status = VIRTIO_CRYPTO_ERR; - vcrypto->ctrl.header.opcode = cpu_to_le32(VIRTIO_CRYPTO_AKCIPHER_DESTROY_SESSION); - vcrypto->ctrl.header.queue_id = 0; + ctrl_status = &vcrypto->ctrl_status; + ctrl_status->status = VIRTIO_CRYPTO_ERR; + ctrl = &vcrypto->ctrl; + ctrl->header.opcode = cpu_to_le32(VIRTIO_CRYPTO_AKCIPHER_DESTROY_SESSION); + ctrl->header.queue_id = 0; - destroy_session = &vcrypto->ctrl.u.destroy_session; + destroy_session = &ctrl->u.destroy_session; destroy_session->session_id = cpu_to_le64(ctx->session_id); - sg_init_one(&outhdr_sg, &vcrypto->ctrl, sizeof(vcrypto->ctrl)); + sg_init_one(&outhdr_sg, ctrl, sizeof(*ctrl)); sgs[num_out++] = &outhdr_sg; - sg_init_one(&inhdr_sg, &vcrypto->ctrl_status.status, sizeof(vcrypto->ctrl_status.status)); + sg_init_one(&inhdr_sg, &ctrl_status->status, sizeof(ctrl_status->status)); sgs[num_out + num_in++] = &inhdr_sg; err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, num_out, num_in, vcrypto, GFP_ATOMIC); @@ -189,7 +197,7 @@ static int virtio_crypto_alg_akcipher_close_session(struct virtio_crypto_akciphe !virtqueue_is_broken(vcrypto->ctrl_vq)) cpu_relax(); - if (vcrypto->ctrl_status.status != VIRTIO_CRYPTO_OK) { + if (ctrl_status->status != VIRTIO_CRYPTO_OK) { err = -EINVAL; goto out; } @@ -201,7 +209,7 @@ out: spin_unlock(&vcrypto->ctrl_lock); if (err < 0) { pr_err("virtio_crypto: Close session failed status: %u, session_id: 0x%llx\n", - vcrypto->ctrl_status.status, destroy_session->session_id); + ctrl_status->status, destroy_session->session_id); } return err; diff --git a/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c index a618c46a52b8..e3c5bc8d6112 100644 --- a/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c @@ -123,6 +123,9 @@ static int virtio_crypto_alg_skcipher_init_session( int op = encrypt ? VIRTIO_CRYPTO_OP_ENCRYPT : VIRTIO_CRYPTO_OP_DECRYPT; int err; unsigned int num_out = 0, num_in = 0; + struct virtio_crypto_op_ctrl_req *ctrl; + struct virtio_crypto_session_input *input; + struct virtio_crypto_sym_create_session_req *sym_create_session; /* * Avoid to do DMA from the stack, switch to using @@ -135,24 +138,22 @@ static int virtio_crypto_alg_skcipher_init_session( spin_lock(&vcrypto->ctrl_lock); /* Pad ctrl header */ - vcrypto->ctrl.header.opcode = - cpu_to_le32(VIRTIO_CRYPTO_CIPHER_CREATE_SESSION); - vcrypto->ctrl.header.algo = cpu_to_le32(alg); + ctrl = &vcrypto->ctrl; + ctrl->header.opcode = cpu_to_le32(VIRTIO_CRYPTO_CIPHER_CREATE_SESSION); + ctrl->header.algo = cpu_to_le32(alg); /* Set the default dataqueue id to 0 */ - vcrypto->ctrl.header.queue_id = 0; + ctrl->header.queue_id = 0; - vcrypto->input.status = cpu_to_le32(VIRTIO_CRYPTO_ERR); + input = &vcrypto->input; + input->status = cpu_to_le32(VIRTIO_CRYPTO_ERR); /* Pad cipher's parameters */ - vcrypto->ctrl.u.sym_create_session.op_type = - cpu_to_le32(VIRTIO_CRYPTO_SYM_OP_CIPHER); - vcrypto->ctrl.u.sym_create_session.u.cipher.para.algo = - vcrypto->ctrl.header.algo; - vcrypto->ctrl.u.sym_create_session.u.cipher.para.keylen = - cpu_to_le32(keylen); - vcrypto->ctrl.u.sym_create_session.u.cipher.para.op = - cpu_to_le32(op); - - sg_init_one(&outhdr, &vcrypto->ctrl, sizeof(vcrypto->ctrl)); + sym_create_session = &ctrl->u.sym_create_session; + sym_create_session->op_type = cpu_to_le32(VIRTIO_CRYPTO_SYM_OP_CIPHER); + sym_create_session->u.cipher.para.algo = ctrl->header.algo; + sym_create_session->u.cipher.para.keylen = cpu_to_le32(keylen); + sym_create_session->u.cipher.para.op = cpu_to_le32(op); + + sg_init_one(&outhdr, ctrl, sizeof(*ctrl)); sgs[num_out++] = &outhdr; /* Set key */ @@ -160,7 +161,7 @@ static int virtio_crypto_alg_skcipher_init_session( sgs[num_out++] = &key_sg; /* Return status and session id back */ - sg_init_one(&inhdr, &vcrypto->input, sizeof(vcrypto->input)); + sg_init_one(&inhdr, input, sizeof(*input)); sgs[num_out + num_in++] = &inhdr; err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, num_out, @@ -180,20 +181,18 @@ static int virtio_crypto_alg_skcipher_init_session( !virtqueue_is_broken(vcrypto->ctrl_vq)) cpu_relax(); - if (le32_to_cpu(vcrypto->input.status) != VIRTIO_CRYPTO_OK) { + if (le32_to_cpu(input->status) != VIRTIO_CRYPTO_OK) { spin_unlock(&vcrypto->ctrl_lock); pr_err("virtio_crypto: Create session failed status: %u\n", - le32_to_cpu(vcrypto->input.status)); + le32_to_cpu(input->status)); kfree_sensitive(cipher_key); return -EINVAL; } if (encrypt) - ctx->enc_sess_info.session_id = - le64_to_cpu(vcrypto->input.session_id); + ctx->enc_sess_info.session_id = le64_to_cpu(input->session_id); else - ctx->dec_sess_info.session_id = - le64_to_cpu(vcrypto->input.session_id); + ctx->dec_sess_info.session_id = le64_to_cpu(input->session_id); spin_unlock(&vcrypto->ctrl_lock); @@ -211,30 +210,30 @@ static int virtio_crypto_alg_skcipher_close_session( struct virtio_crypto *vcrypto = ctx->vcrypto; int err; unsigned int num_out = 0, num_in = 0; + struct virtio_crypto_op_ctrl_req *ctrl; + struct virtio_crypto_inhdr *ctrl_status; spin_lock(&vcrypto->ctrl_lock); - vcrypto->ctrl_status.status = VIRTIO_CRYPTO_ERR; + ctrl_status = &vcrypto->ctrl_status; + ctrl_status->status = VIRTIO_CRYPTO_ERR; /* Pad ctrl header */ - vcrypto->ctrl.header.opcode = - cpu_to_le32(VIRTIO_CRYPTO_CIPHER_DESTROY_SESSION); + ctrl = &vcrypto->ctrl; + ctrl->header.opcode = cpu_to_le32(VIRTIO_CRYPTO_CIPHER_DESTROY_SESSION); /* Set the default virtqueue id to 0 */ - vcrypto->ctrl.header.queue_id = 0; + ctrl->header.queue_id = 0; - destroy_session = &vcrypto->ctrl.u.destroy_session; + destroy_session = &ctrl->u.destroy_session; if (encrypt) - destroy_session->session_id = - cpu_to_le64(ctx->enc_sess_info.session_id); + destroy_session->session_id = cpu_to_le64(ctx->enc_sess_info.session_id); else - destroy_session->session_id = - cpu_to_le64(ctx->dec_sess_info.session_id); + destroy_session->session_id = cpu_to_le64(ctx->dec_sess_info.session_id); - sg_init_one(&outhdr, &vcrypto->ctrl, sizeof(vcrypto->ctrl)); + sg_init_one(&outhdr, ctrl, sizeof(*ctrl)); sgs[num_out++] = &outhdr; /* Return status and session id back */ - sg_init_one(&status_sg, &vcrypto->ctrl_status.status, - sizeof(vcrypto->ctrl_status.status)); + sg_init_one(&status_sg, &ctrl_status->status, sizeof(ctrl_status->status)); sgs[num_out + num_in++] = &status_sg; err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, num_out, @@ -249,11 +248,10 @@ static int virtio_crypto_alg_skcipher_close_session( !virtqueue_is_broken(vcrypto->ctrl_vq)) cpu_relax(); - if (vcrypto->ctrl_status.status != VIRTIO_CRYPTO_OK) { + if (ctrl_status->status != VIRTIO_CRYPTO_OK) { spin_unlock(&vcrypto->ctrl_lock); pr_err("virtio_crypto: Close session failed status: %u, session_id: 0x%llx\n", - vcrypto->ctrl_status.status, - destroy_session->session_id); + ctrl_status->status, destroy_session->session_id); return -EINVAL; } -- cgit v1.2.3 From 0756ad15b1fef287d4d8fa11bc36ea77a5c42e4a Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Fri, 6 May 2022 21:16:24 +0800 Subject: virtio-crypto: use private buffer for control request Originally, all of the control requests share a single buffer( ctrl & input & ctrl_status fields in struct virtio_crypto), this allows queue depth 1 only, the performance of control queue gets limited by this design. In this patch, each request allocates request buffer dynamically, and free buffer after request, so the scope protected by ctrl_lock also get optimized here. It's possible to optimize control queue depth in the next step. A necessary comment is already in code, still describe it again: /* * Note: there are padding fields in request, clear them to zero before * sending to host to avoid to divulge any information. * Ex, virtio_crypto_ctrl_request::ctrl::u::destroy_session::padding[48] */ So use kzalloc to allocate buffer of struct virtio_crypto_ctrl_request. Potentially dereferencing uninitialized variables: Reported-by: kernel test robot Reported-by: Dan Carpenter Cc: Michael S. Tsirkin Cc: Jason Wang Cc: Gonglei Reviewed-by: Gonglei Signed-off-by: zhenwei pi Message-Id: <20220506131627.180784-3-pizhenwei@bytedance.com> Signed-off-by: Michael S. Tsirkin --- .../crypto/virtio/virtio_crypto_akcipher_algs.c | 57 +++++++++++++--------- drivers/crypto/virtio/virtio_crypto_common.h | 17 +++++-- .../crypto/virtio/virtio_crypto_skcipher_algs.c | 50 ++++++++++++------- 3 files changed, 79 insertions(+), 45 deletions(-) (limited to 'drivers') diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c index 20901a263fc8..698ea57e2649 100644 --- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c @@ -108,16 +108,22 @@ static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher unsigned int num_out = 0, num_in = 0; struct virtio_crypto_op_ctrl_req *ctrl; struct virtio_crypto_session_input *input; + struct virtio_crypto_ctrl_request *vc_ctrl_req; pkey = kmemdup(key, keylen, GFP_ATOMIC); if (!pkey) return -ENOMEM; - spin_lock(&vcrypto->ctrl_lock); - ctrl = &vcrypto->ctrl; + vc_ctrl_req = kzalloc(sizeof(*vc_ctrl_req), GFP_KERNEL); + if (!vc_ctrl_req) { + err = -ENOMEM; + goto out; + } + + ctrl = &vc_ctrl_req->ctrl; memcpy(&ctrl->header, header, sizeof(ctrl->header)); memcpy(&ctrl->u, para, sizeof(ctrl->u)); - input = &vcrypto->input; + input = &vc_ctrl_req->input; input->status = cpu_to_le32(VIRTIO_CRYPTO_ERR); sg_init_one(&outhdr_sg, ctrl, sizeof(*ctrl)); @@ -129,16 +135,22 @@ static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher sg_init_one(&inhdr_sg, input, sizeof(*input)); sgs[num_out + num_in++] = &inhdr_sg; + spin_lock(&vcrypto->ctrl_lock); err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, num_out, num_in, vcrypto, GFP_ATOMIC); - if (err < 0) + if (err < 0) { + spin_unlock(&vcrypto->ctrl_lock); goto out; + } virtqueue_kick(vcrypto->ctrl_vq); while (!virtqueue_get_buf(vcrypto->ctrl_vq, &inlen) && !virtqueue_is_broken(vcrypto->ctrl_vq)) cpu_relax(); + spin_unlock(&vcrypto->ctrl_lock); if (le32_to_cpu(input->status) != VIRTIO_CRYPTO_OK) { + pr_err("virtio_crypto: Create session failed status: %u\n", + le32_to_cpu(input->status)); err = -EINVAL; goto out; } @@ -148,13 +160,9 @@ static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher err = 0; out: - spin_unlock(&vcrypto->ctrl_lock); + kfree(vc_ctrl_req); kfree_sensitive(pkey); - if (err < 0) - pr_err("virtio_crypto: Create session failed status: %u\n", - le32_to_cpu(input->status)); - return err; } @@ -167,15 +175,18 @@ static int virtio_crypto_alg_akcipher_close_session(struct virtio_crypto_akciphe int err; struct virtio_crypto_op_ctrl_req *ctrl; struct virtio_crypto_inhdr *ctrl_status; + struct virtio_crypto_ctrl_request *vc_ctrl_req; - spin_lock(&vcrypto->ctrl_lock); - if (!ctx->session_valid) { - err = 0; - goto out; - } - ctrl_status = &vcrypto->ctrl_status; + if (!ctx->session_valid) + return 0; + + vc_ctrl_req = kzalloc(sizeof(*vc_ctrl_req), GFP_KERNEL); + if (!vc_ctrl_req) + return -ENOMEM; + + ctrl_status = &vc_ctrl_req->ctrl_status; ctrl_status->status = VIRTIO_CRYPTO_ERR; - ctrl = &vcrypto->ctrl; + ctrl = &vc_ctrl_req->ctrl; ctrl->header.opcode = cpu_to_le32(VIRTIO_CRYPTO_AKCIPHER_DESTROY_SESSION); ctrl->header.queue_id = 0; @@ -188,16 +199,22 @@ static int virtio_crypto_alg_akcipher_close_session(struct virtio_crypto_akciphe sg_init_one(&inhdr_sg, &ctrl_status->status, sizeof(ctrl_status->status)); sgs[num_out + num_in++] = &inhdr_sg; + spin_lock(&vcrypto->ctrl_lock); err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, num_out, num_in, vcrypto, GFP_ATOMIC); - if (err < 0) + if (err < 0) { + spin_unlock(&vcrypto->ctrl_lock); goto out; + } virtqueue_kick(vcrypto->ctrl_vq); while (!virtqueue_get_buf(vcrypto->ctrl_vq, &inlen) && !virtqueue_is_broken(vcrypto->ctrl_vq)) cpu_relax(); + spin_unlock(&vcrypto->ctrl_lock); if (ctrl_status->status != VIRTIO_CRYPTO_OK) { + pr_err("virtio_crypto: Close session failed status: %u, session_id: 0x%llx\n", + ctrl_status->status, destroy_session->session_id); err = -EINVAL; goto out; } @@ -206,11 +223,7 @@ static int virtio_crypto_alg_akcipher_close_session(struct virtio_crypto_akciphe ctx->session_valid = false; out: - spin_unlock(&vcrypto->ctrl_lock); - if (err < 0) { - pr_err("virtio_crypto: Close session failed status: %u, session_id: 0x%llx\n", - ctrl_status->status, destroy_session->session_id); - } + kfree(vc_ctrl_req); return err; } diff --git a/drivers/crypto/virtio/virtio_crypto_common.h b/drivers/crypto/virtio/virtio_crypto_common.h index e693d4ee83a6..2422237ec4e6 100644 --- a/drivers/crypto/virtio/virtio_crypto_common.h +++ b/drivers/crypto/virtio/virtio_crypto_common.h @@ -13,6 +13,7 @@ #include #include #include +#include /* Internal representation of a data virtqueue */ @@ -65,11 +66,6 @@ struct virtio_crypto { /* Maximum size of per request */ u64 max_size; - /* Control VQ buffers: protected by the ctrl_lock */ - struct virtio_crypto_op_ctrl_req ctrl; - struct virtio_crypto_session_input input; - struct virtio_crypto_inhdr ctrl_status; - unsigned long status; atomic_t ref_count; struct list_head list; @@ -85,6 +81,17 @@ struct virtio_crypto_sym_session_info { __u64 session_id; }; +/* + * Note: there are padding fields in request, clear them to zero before + * sending to host to avoid to divulge any information. + * Ex, virtio_crypto_ctrl_request::ctrl::u::destroy_session::padding[48] + */ +struct virtio_crypto_ctrl_request { + struct virtio_crypto_op_ctrl_req ctrl; + struct virtio_crypto_session_input input; + struct virtio_crypto_inhdr ctrl_status; +}; + struct virtio_crypto_request; typedef void (*virtio_crypto_data_callback) (struct virtio_crypto_request *vc_req, int len); diff --git a/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c index e3c5bc8d6112..6aaf0869b211 100644 --- a/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c @@ -126,6 +126,7 @@ static int virtio_crypto_alg_skcipher_init_session( struct virtio_crypto_op_ctrl_req *ctrl; struct virtio_crypto_session_input *input; struct virtio_crypto_sym_create_session_req *sym_create_session; + struct virtio_crypto_ctrl_request *vc_ctrl_req; /* * Avoid to do DMA from the stack, switch to using @@ -136,15 +137,20 @@ static int virtio_crypto_alg_skcipher_init_session( if (!cipher_key) return -ENOMEM; - spin_lock(&vcrypto->ctrl_lock); + vc_ctrl_req = kzalloc(sizeof(*vc_ctrl_req), GFP_KERNEL); + if (!vc_ctrl_req) { + err = -ENOMEM; + goto out; + } + /* Pad ctrl header */ - ctrl = &vcrypto->ctrl; + ctrl = &vc_ctrl_req->ctrl; ctrl->header.opcode = cpu_to_le32(VIRTIO_CRYPTO_CIPHER_CREATE_SESSION); ctrl->header.algo = cpu_to_le32(alg); /* Set the default dataqueue id to 0 */ ctrl->header.queue_id = 0; - input = &vcrypto->input; + input = &vc_ctrl_req->input; input->status = cpu_to_le32(VIRTIO_CRYPTO_ERR); /* Pad cipher's parameters */ sym_create_session = &ctrl->u.sym_create_session; @@ -164,12 +170,12 @@ static int virtio_crypto_alg_skcipher_init_session( sg_init_one(&inhdr, input, sizeof(*input)); sgs[num_out + num_in++] = &inhdr; + spin_lock(&vcrypto->ctrl_lock); err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, num_out, num_in, vcrypto, GFP_ATOMIC); if (err < 0) { spin_unlock(&vcrypto->ctrl_lock); - kfree_sensitive(cipher_key); - return err; + goto out; } virtqueue_kick(vcrypto->ctrl_vq); @@ -180,13 +186,13 @@ static int virtio_crypto_alg_skcipher_init_session( while (!virtqueue_get_buf(vcrypto->ctrl_vq, &tmp) && !virtqueue_is_broken(vcrypto->ctrl_vq)) cpu_relax(); + spin_unlock(&vcrypto->ctrl_lock); if (le32_to_cpu(input->status) != VIRTIO_CRYPTO_OK) { - spin_unlock(&vcrypto->ctrl_lock); pr_err("virtio_crypto: Create session failed status: %u\n", le32_to_cpu(input->status)); - kfree_sensitive(cipher_key); - return -EINVAL; + err = -EINVAL; + goto out; } if (encrypt) @@ -194,10 +200,11 @@ static int virtio_crypto_alg_skcipher_init_session( else ctx->dec_sess_info.session_id = le64_to_cpu(input->session_id); - spin_unlock(&vcrypto->ctrl_lock); - + err = 0; +out: + kfree(vc_ctrl_req); kfree_sensitive(cipher_key); - return 0; + return err; } static int virtio_crypto_alg_skcipher_close_session( @@ -212,12 +219,16 @@ static int virtio_crypto_alg_skcipher_close_session( unsigned int num_out = 0, num_in = 0; struct virtio_crypto_op_ctrl_req *ctrl; struct virtio_crypto_inhdr *ctrl_status; + struct virtio_crypto_ctrl_request *vc_ctrl_req; - spin_lock(&vcrypto->ctrl_lock); - ctrl_status = &vcrypto->ctrl_status; + vc_ctrl_req = kzalloc(sizeof(*vc_ctrl_req), GFP_KERNEL); + if (!vc_ctrl_req) + return -ENOMEM; + + ctrl_status = &vc_ctrl_req->ctrl_status; ctrl_status->status = VIRTIO_CRYPTO_ERR; /* Pad ctrl header */ - ctrl = &vcrypto->ctrl; + ctrl = &vc_ctrl_req->ctrl; ctrl->header.opcode = cpu_to_le32(VIRTIO_CRYPTO_CIPHER_DESTROY_SESSION); /* Set the default virtqueue id to 0 */ ctrl->header.queue_id = 0; @@ -236,28 +247,31 @@ static int virtio_crypto_alg_skcipher_close_session( sg_init_one(&status_sg, &ctrl_status->status, sizeof(ctrl_status->status)); sgs[num_out + num_in++] = &status_sg; + spin_lock(&vcrypto->ctrl_lock); err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, num_out, num_in, vcrypto, GFP_ATOMIC); if (err < 0) { spin_unlock(&vcrypto->ctrl_lock); - return err; + goto out; } virtqueue_kick(vcrypto->ctrl_vq); while (!virtqueue_get_buf(vcrypto->ctrl_vq, &tmp) && !virtqueue_is_broken(vcrypto->ctrl_vq)) cpu_relax(); + spin_unlock(&vcrypto->ctrl_lock); if (ctrl_status->status != VIRTIO_CRYPTO_OK) { - spin_unlock(&vcrypto->ctrl_lock); pr_err("virtio_crypto: Close session failed status: %u, session_id: 0x%llx\n", ctrl_status->status, destroy_session->session_id); return -EINVAL; } - spin_unlock(&vcrypto->ctrl_lock); - return 0; + err = 0; +out: + kfree(vc_ctrl_req); + return err; } static int virtio_crypto_alg_skcipher_init_sessions( -- cgit v1.2.3 From 977231e8d45657871a86fe3c7bed94921d04e447 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Fri, 6 May 2022 21:16:25 +0800 Subject: virtio-crypto: wait ctrl queue instead of busy polling Originally, after submitting request into virtio crypto control queue, the guest side polls the result from the virt queue. This works like following: CPU0 CPU1 ... CPUx CPUy | | | | \ \ / / \--------spin_lock(&vcrypto->ctrl_lock)-------/ | virtqueue add & kick | busy poll virtqueue | spin_unlock(&vcrypto->ctrl_lock) ... There are two problems: 1, The queue depth is always 1, the performance of a virtio crypto device gets limited. Multi user processes share a single control queue, and hit spin lock race from control queue. Test on Intel Platinum 8260, a single worker gets ~35K/s create/close session operations, and 8 workers get ~40K/s operations with 800% CPU utilization. 2, The control request is supposed to get handled immediately, but in the current implementation of QEMU(v6.2), the vCPU thread kicks another thread to do this work, the latency also gets unstable. Tracking latency of virtio_crypto_alg_akcipher_close_session in 5s: usecs : count distribution 0 -> 1 : 0 | | 2 -> 3 : 7 | | 4 -> 7 : 72 | | 8 -> 15 : 186485 |************************| 16 -> 31 : 687 | | 32 -> 63 : 5 | | 64 -> 127 : 3 | | 128 -> 255 : 1 | | 256 -> 511 : 0 | | 512 -> 1023 : 0 | | 1024 -> 2047 : 0 | | 2048 -> 4095 : 0 | | 4096 -> 8191 : 0 | | 8192 -> 16383 : 2 | | This means that a CPU may hold vcrypto->ctrl_lock as long as 8192~16383us. To improve the performance of control queue, a request on control queue waits completion instead of busy polling to reduce lock racing, and gets completed by control queue callback. CPU0 CPU1 ... CPUx CPUy | | | | \ \ / / \--------spin_lock(&vcrypto->ctrl_lock)-------/ | virtqueue add & kick | ---------spin_unlock(&vcrypto->ctrl_lock)------ / / \ \ | | | | wait wait wait wait Test this patch, the guest side get ~200K/s operations with 300% CPU utilization. Cc: Michael S. Tsirkin Cc: Jason Wang Cc: Gonglei Reviewed-by: Gonglei Signed-off-by: zhenwei pi Message-Id: <20220506131627.180784-4-pizhenwei@bytedance.com> Signed-off-by: Michael S. Tsirkin --- .../crypto/virtio/virtio_crypto_akcipher_algs.c | 29 +++--------- drivers/crypto/virtio/virtio_crypto_common.h | 4 ++ drivers/crypto/virtio/virtio_crypto_core.c | 52 +++++++++++++++++++++- .../crypto/virtio/virtio_crypto_skcipher_algs.c | 34 ++------------ 4 files changed, 64 insertions(+), 55 deletions(-) (limited to 'drivers') diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c index 698ea57e2649..382ccec9ab12 100644 --- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c @@ -103,7 +103,6 @@ static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher struct scatterlist outhdr_sg, key_sg, inhdr_sg, *sgs[3]; struct virtio_crypto *vcrypto = ctx->vcrypto; uint8_t *pkey; - unsigned int inlen; int err; unsigned int num_out = 0, num_in = 0; struct virtio_crypto_op_ctrl_req *ctrl; @@ -135,18 +134,9 @@ static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher sg_init_one(&inhdr_sg, input, sizeof(*input)); sgs[num_out + num_in++] = &inhdr_sg; - spin_lock(&vcrypto->ctrl_lock); - err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, num_out, num_in, vcrypto, GFP_ATOMIC); - if (err < 0) { - spin_unlock(&vcrypto->ctrl_lock); + err = virtio_crypto_ctrl_vq_request(vcrypto, sgs, num_out, num_in, vc_ctrl_req); + if (err < 0) goto out; - } - - virtqueue_kick(vcrypto->ctrl_vq); - while (!virtqueue_get_buf(vcrypto->ctrl_vq, &inlen) && - !virtqueue_is_broken(vcrypto->ctrl_vq)) - cpu_relax(); - spin_unlock(&vcrypto->ctrl_lock); if (le32_to_cpu(input->status) != VIRTIO_CRYPTO_OK) { pr_err("virtio_crypto: Create session failed status: %u\n", @@ -171,7 +161,7 @@ static int virtio_crypto_alg_akcipher_close_session(struct virtio_crypto_akciphe struct scatterlist outhdr_sg, inhdr_sg, *sgs[2]; struct virtio_crypto_destroy_session_req *destroy_session; struct virtio_crypto *vcrypto = ctx->vcrypto; - unsigned int num_out = 0, num_in = 0, inlen; + unsigned int num_out = 0, num_in = 0; int err; struct virtio_crypto_op_ctrl_req *ctrl; struct virtio_crypto_inhdr *ctrl_status; @@ -199,18 +189,9 @@ static int virtio_crypto_alg_akcipher_close_session(struct virtio_crypto_akciphe sg_init_one(&inhdr_sg, &ctrl_status->status, sizeof(ctrl_status->status)); sgs[num_out + num_in++] = &inhdr_sg; - spin_lock(&vcrypto->ctrl_lock); - err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, num_out, num_in, vcrypto, GFP_ATOMIC); - if (err < 0) { - spin_unlock(&vcrypto->ctrl_lock); + err = virtio_crypto_ctrl_vq_request(vcrypto, sgs, num_out, num_in, vc_ctrl_req); + if (err < 0) goto out; - } - - virtqueue_kick(vcrypto->ctrl_vq); - while (!virtqueue_get_buf(vcrypto->ctrl_vq, &inlen) && - !virtqueue_is_broken(vcrypto->ctrl_vq)) - cpu_relax(); - spin_unlock(&vcrypto->ctrl_lock); if (ctrl_status->status != VIRTIO_CRYPTO_OK) { pr_err("virtio_crypto: Close session failed status: %u, session_id: 0x%llx\n", diff --git a/drivers/crypto/virtio/virtio_crypto_common.h b/drivers/crypto/virtio/virtio_crypto_common.h index 2422237ec4e6..59a4c0259456 100644 --- a/drivers/crypto/virtio/virtio_crypto_common.h +++ b/drivers/crypto/virtio/virtio_crypto_common.h @@ -90,6 +90,7 @@ struct virtio_crypto_ctrl_request { struct virtio_crypto_op_ctrl_req ctrl; struct virtio_crypto_session_input input; struct virtio_crypto_inhdr ctrl_status; + struct completion compl; }; struct virtio_crypto_request; @@ -141,5 +142,8 @@ int virtio_crypto_skcipher_algs_register(struct virtio_crypto *vcrypto); void virtio_crypto_skcipher_algs_unregister(struct virtio_crypto *vcrypto); int virtio_crypto_akcipher_algs_register(struct virtio_crypto *vcrypto); void virtio_crypto_akcipher_algs_unregister(struct virtio_crypto *vcrypto); +int virtio_crypto_ctrl_vq_request(struct virtio_crypto *vcrypto, struct scatterlist *sgs[], + unsigned int out_sgs, unsigned int in_sgs, + struct virtio_crypto_ctrl_request *vc_ctrl_req); #endif /* _VIRTIO_CRYPTO_COMMON_H */ diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c index c6f482db0bc0..60490ffa3df1 100644 --- a/drivers/crypto/virtio/virtio_crypto_core.c +++ b/drivers/crypto/virtio/virtio_crypto_core.c @@ -22,6 +22,56 @@ virtcrypto_clear_request(struct virtio_crypto_request *vc_req) } } +static void virtio_crypto_ctrlq_callback(struct virtio_crypto_ctrl_request *vc_ctrl_req) +{ + complete(&vc_ctrl_req->compl); +} + +static void virtcrypto_ctrlq_callback(struct virtqueue *vq) +{ + struct virtio_crypto *vcrypto = vq->vdev->priv; + struct virtio_crypto_ctrl_request *vc_ctrl_req; + unsigned long flags; + unsigned int len; + + spin_lock_irqsave(&vcrypto->ctrl_lock, flags); + do { + virtqueue_disable_cb(vq); + while ((vc_ctrl_req = virtqueue_get_buf(vq, &len)) != NULL) { + spin_unlock_irqrestore(&vcrypto->ctrl_lock, flags); + virtio_crypto_ctrlq_callback(vc_ctrl_req); + spin_lock_irqsave(&vcrypto->ctrl_lock, flags); + } + if (unlikely(virtqueue_is_broken(vq))) + break; + } while (!virtqueue_enable_cb(vq)); + spin_unlock_irqrestore(&vcrypto->ctrl_lock, flags); +} + +int virtio_crypto_ctrl_vq_request(struct virtio_crypto *vcrypto, struct scatterlist *sgs[], + unsigned int out_sgs, unsigned int in_sgs, + struct virtio_crypto_ctrl_request *vc_ctrl_req) +{ + int err; + unsigned long flags; + + init_completion(&vc_ctrl_req->compl); + + spin_lock_irqsave(&vcrypto->ctrl_lock, flags); + err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, out_sgs, in_sgs, vc_ctrl_req, GFP_ATOMIC); + if (err < 0) { + spin_unlock_irqrestore(&vcrypto->ctrl_lock, flags); + return err; + } + + virtqueue_kick(vcrypto->ctrl_vq); + spin_unlock_irqrestore(&vcrypto->ctrl_lock, flags); + + wait_for_completion(&vc_ctrl_req->compl); + + return 0; +} + static void virtcrypto_dataq_callback(struct virtqueue *vq) { struct virtio_crypto *vcrypto = vq->vdev->priv; @@ -73,7 +123,7 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi) goto err_names; /* Parameters for control virtqueue */ - callbacks[total_vqs - 1] = NULL; + callbacks[total_vqs - 1] = virtcrypto_ctrlq_callback; names[total_vqs - 1] = "controlq"; /* Allocate/initialize parameters for data virtqueues */ diff --git a/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c index 6aaf0869b211..e553ccadbcbc 100644 --- a/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c @@ -118,7 +118,6 @@ static int virtio_crypto_alg_skcipher_init_session( int encrypt) { struct scatterlist outhdr, key_sg, inhdr, *sgs[3]; - unsigned int tmp; struct virtio_crypto *vcrypto = ctx->vcrypto; int op = encrypt ? VIRTIO_CRYPTO_OP_ENCRYPT : VIRTIO_CRYPTO_OP_DECRYPT; int err; @@ -170,23 +169,9 @@ static int virtio_crypto_alg_skcipher_init_session( sg_init_one(&inhdr, input, sizeof(*input)); sgs[num_out + num_in++] = &inhdr; - spin_lock(&vcrypto->ctrl_lock); - err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, num_out, - num_in, vcrypto, GFP_ATOMIC); - if (err < 0) { - spin_unlock(&vcrypto->ctrl_lock); + err = virtio_crypto_ctrl_vq_request(vcrypto, sgs, num_out, num_in, vc_ctrl_req); + if (err < 0) goto out; - } - virtqueue_kick(vcrypto->ctrl_vq); - - /* - * Trapping into the hypervisor, so the request should be - * handled immediately. - */ - while (!virtqueue_get_buf(vcrypto->ctrl_vq, &tmp) && - !virtqueue_is_broken(vcrypto->ctrl_vq)) - cpu_relax(); - spin_unlock(&vcrypto->ctrl_lock); if (le32_to_cpu(input->status) != VIRTIO_CRYPTO_OK) { pr_err("virtio_crypto: Create session failed status: %u\n", @@ -212,7 +197,6 @@ static int virtio_crypto_alg_skcipher_close_session( int encrypt) { struct scatterlist outhdr, status_sg, *sgs[2]; - unsigned int tmp; struct virtio_crypto_destroy_session_req *destroy_session; struct virtio_crypto *vcrypto = ctx->vcrypto; int err; @@ -247,19 +231,9 @@ static int virtio_crypto_alg_skcipher_close_session( sg_init_one(&status_sg, &ctrl_status->status, sizeof(ctrl_status->status)); sgs[num_out + num_in++] = &status_sg; - spin_lock(&vcrypto->ctrl_lock); - err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, num_out, - num_in, vcrypto, GFP_ATOMIC); - if (err < 0) { - spin_unlock(&vcrypto->ctrl_lock); + err = virtio_crypto_ctrl_vq_request(vcrypto, sgs, num_out, num_in, vc_ctrl_req); + if (err < 0) goto out; - } - virtqueue_kick(vcrypto->ctrl_vq); - - while (!virtqueue_get_buf(vcrypto->ctrl_vq, &tmp) && - !virtqueue_is_broken(vcrypto->ctrl_vq)) - cpu_relax(); - spin_unlock(&vcrypto->ctrl_lock); if (ctrl_status->status != VIRTIO_CRYPTO_OK) { pr_err("virtio_crypto: Close session failed status: %u, session_id: 0x%llx\n", -- cgit v1.2.3 From a36bd0ad9fbf69d0d711b1c105954ce8d6cc144a Mon Sep 17 00:00:00 2001 From: lei he Date: Fri, 6 May 2022 21:16:26 +0800 Subject: virtio-crypto: adjust dst_len at ops callback For some akcipher operations(eg, decryption of pkcs1pad(rsa)), the length of returned result maybe less than akcipher_req->dst_len, we need to recalculate the actual dst_len through the virt-queue protocol. Cc: Michael S. Tsirkin Cc: Jason Wang Cc: Gonglei Reviewed-by: Gonglei Signed-off-by: lei he Signed-off-by: zhenwei pi Message-Id: <20220506131627.180784-5-pizhenwei@bytedance.com> Signed-off-by: Michael S. Tsirkin --- drivers/crypto/virtio/virtio_crypto_akcipher_algs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c index 382ccec9ab12..2a60d0525cde 100644 --- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c @@ -90,9 +90,12 @@ static void virtio_crypto_dataq_akcipher_callback(struct virtio_crypto_request * } akcipher_req = vc_akcipher_req->akcipher_req; - if (vc_akcipher_req->opcode != VIRTIO_CRYPTO_AKCIPHER_VERIFY) + if (vc_akcipher_req->opcode != VIRTIO_CRYPTO_AKCIPHER_VERIFY) { + /* actuall length maybe less than dst buffer */ + akcipher_req->dst_len = len - sizeof(vc_req->status); sg_copy_from_buffer(akcipher_req->dst, sg_nents(akcipher_req->dst), vc_akcipher_req->dst_buf, akcipher_req->dst_len); + } virtio_crypto_akcipher_finalize_req(vc_akcipher_req, akcipher_req, error); } -- cgit v1.2.3 From 4e0d352af04cf4e019d3e45229eaaff9e8ffb33d Mon Sep 17 00:00:00 2001 From: lei he Date: Fri, 6 May 2022 21:16:27 +0800 Subject: virtio-crypto: enable retry for virtio-crypto-dev Enable retry for virtio-crypto-dev, so that crypto-engine can process cipher-requests parallelly. Cc: Michael S. Tsirkin Cc: Jason Wang Cc: Gonglei Reviewed-by: Gonglei Signed-off-by: lei he Signed-off-by: zhenwei pi Message-Id: <20220506131627.180784-6-pizhenwei@bytedance.com> Signed-off-by: Michael S. Tsirkin --- drivers/crypto/virtio/virtio_crypto_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c index 60490ffa3df1..1198bd306365 100644 --- a/drivers/crypto/virtio/virtio_crypto_core.c +++ b/drivers/crypto/virtio/virtio_crypto_core.c @@ -144,7 +144,8 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi) spin_lock_init(&vi->data_vq[i].lock); vi->data_vq[i].vq = vqs[i]; /* Initialize crypto engine */ - vi->data_vq[i].engine = crypto_engine_alloc_init(dev, 1); + vi->data_vq[i].engine = crypto_engine_alloc_init_and_set(dev, true, NULL, true, + virtqueue_get_vring_size(vqs[i])); if (!vi->data_vq[i].engine) { ret = -ENOMEM; goto err_engine; -- cgit v1.2.3 From 3153234097f6a0d06981565eb3eec3cb37dea8f8 Mon Sep 17 00:00:00 2001 From: Solomon Tan Date: Mon, 18 Apr 2022 02:54:35 +0000 Subject: virtio: Replace unsigned with unsigned int This patch addresses the checkpatch.pl warning where unsigned int is preferred over unsigned. Signed-off-by: Solomon Tan Message-Id: Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio.c | 2 +- drivers/virtio/virtio_balloon.c | 12 ++++++------ drivers/virtio/virtio_mmio.c | 12 ++++++------ drivers/virtio/virtio_pci_common.c | 12 ++++++------ drivers/virtio/virtio_pci_common.h | 10 +++++----- drivers/virtio/virtio_pci_legacy.c | 10 +++++----- drivers/virtio/virtio_pci_modern.c | 12 ++++++------ drivers/virtio/virtio_ring.c | 12 ++++++------ drivers/virtio/virtio_vdpa.c | 10 +++++----- 9 files changed, 46 insertions(+), 46 deletions(-) (limited to 'drivers') diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 22f15f444f75..ce424c16997d 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -169,7 +169,7 @@ EXPORT_SYMBOL_GPL(virtio_add_status); /* Do some validation, then set FEATURES_OK */ static int virtio_features_ok(struct virtio_device *dev) { - unsigned status; + unsigned int status; int ret; might_sleep(); diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index f4c34a2a6b8e..b9737da6c4dd 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -27,7 +27,7 @@ * multiple balloon pages. All memory counters in this driver are in balloon * page units. */ -#define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT) +#define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned int)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT) #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256 /* Maximum number of (4k) pages to deflate on OOM notifications. */ #define VIRTIO_BALLOON_OOM_NR_PAGES 256 @@ -208,10 +208,10 @@ static void set_page_pfns(struct virtio_balloon *vb, page_to_balloon_pfn(page) + i); } -static unsigned fill_balloon(struct virtio_balloon *vb, size_t num) +static unsigned int fill_balloon(struct virtio_balloon *vb, size_t num) { - unsigned num_allocated_pages; - unsigned num_pfns; + unsigned int num_allocated_pages; + unsigned int num_pfns; struct page *page; LIST_HEAD(pages); @@ -272,9 +272,9 @@ static void release_pages_balloon(struct virtio_balloon *vb, } } -static unsigned leak_balloon(struct virtio_balloon *vb, size_t num) +static unsigned int leak_balloon(struct virtio_balloon *vb, size_t num) { - unsigned num_freed_pages; + unsigned int num_freed_pages; struct page *page; struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info; LIST_HEAD(pages); diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 56128b9c46eb..b717302dc4ac 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -144,8 +144,8 @@ static int vm_finalize_features(struct virtio_device *vdev) return 0; } -static void vm_get(struct virtio_device *vdev, unsigned offset, - void *buf, unsigned len) +static void vm_get(struct virtio_device *vdev, unsigned int offset, + void *buf, unsigned int len) { struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); void __iomem *base = vm_dev->base + VIRTIO_MMIO_CONFIG; @@ -186,8 +186,8 @@ static void vm_get(struct virtio_device *vdev, unsigned offset, } } -static void vm_set(struct virtio_device *vdev, unsigned offset, - const void *buf, unsigned len) +static void vm_set(struct virtio_device *vdev, unsigned int offset, + const void *buf, unsigned int len) { struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); void __iomem *base = vm_dev->base + VIRTIO_MMIO_CONFIG; @@ -345,7 +345,7 @@ static void vm_del_vqs(struct virtio_device *vdev) free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev); } -static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, +static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int index, void (*callback)(struct virtqueue *vq), const char *name, bool ctx) { @@ -455,7 +455,7 @@ error_available: return ERR_PTR(err); } -static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs, +static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 5046efcffb4c..ca51fcc9daab 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -104,8 +104,8 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, { struct virtio_pci_device *vp_dev = to_vp_device(vdev); const char *name = dev_name(&vp_dev->vdev.dev); - unsigned flags = PCI_IRQ_MSIX; - unsigned i, v; + unsigned int flags = PCI_IRQ_MSIX; + unsigned int i, v; int err = -ENOMEM; vp_dev->msix_vectors = nvectors; @@ -171,7 +171,7 @@ error: return err; } -static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index, +static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int index, void (*callback)(struct virtqueue *vq), const char *name, bool ctx, @@ -275,7 +275,7 @@ void vp_del_vqs(struct virtio_device *vdev) vp_dev->vqs = NULL; } -static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, +static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], bool per_vq_vectors, const bool *ctx, @@ -349,7 +349,7 @@ error_find: return err; } -static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs, +static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], const bool *ctx) { @@ -388,7 +388,7 @@ out_del_vqs: } /* the config->find_vqs() implementation */ -int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, +int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], const bool *ctx, struct irq_affinity *desc) diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index eb17a29fc7ef..23112d84218f 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -38,7 +38,7 @@ struct virtio_pci_vq_info { struct list_head node; /* MSI-X vector (or none) */ - unsigned msix_vector; + unsigned int msix_vector; }; /* Our device structure */ @@ -68,16 +68,16 @@ struct virtio_pci_device { * and I'm too lazy to allocate each name separately. */ char (*msix_names)[256]; /* Number of available vectors */ - unsigned msix_vectors; + unsigned int msix_vectors; /* Vectors allocated, excluding per-vq vectors if any */ - unsigned msix_used_vectors; + unsigned int msix_used_vectors; /* Whether we have vector per vq */ bool per_vq_vectors; struct virtqueue *(*setup_vq)(struct virtio_pci_device *vp_dev, struct virtio_pci_vq_info *info, - unsigned idx, + unsigned int idx, void (*callback)(struct virtqueue *vq), const char *name, bool ctx, @@ -108,7 +108,7 @@ bool vp_notify(struct virtqueue *vq); /* the config->del_vqs() implementation */ void vp_del_vqs(struct virtio_device *vdev); /* the config->find_vqs() implementation */ -int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs, +int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], const bool *ctx, struct irq_affinity *desc); diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index 6f4e34ce96b8..7fe4caa4b519 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -45,8 +45,8 @@ static int vp_finalize_features(struct virtio_device *vdev) } /* virtio config->get() implementation */ -static void vp_get(struct virtio_device *vdev, unsigned offset, - void *buf, unsigned len) +static void vp_get(struct virtio_device *vdev, unsigned int offset, + void *buf, unsigned int len) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); void __iomem *ioaddr = vp_dev->ldev.ioaddr + @@ -61,8 +61,8 @@ static void vp_get(struct virtio_device *vdev, unsigned offset, /* the config->set() implementation. it's symmetric to the config->get() * implementation */ -static void vp_set(struct virtio_device *vdev, unsigned offset, - const void *buf, unsigned len) +static void vp_set(struct virtio_device *vdev, unsigned int offset, + const void *buf, unsigned int len) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); void __iomem *ioaddr = vp_dev->ldev.ioaddr + @@ -109,7 +109,7 @@ static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, struct virtio_pci_vq_info *info, - unsigned index, + unsigned int index, void (*callback)(struct virtqueue *vq), const char *name, bool ctx, diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index a2671a20ef77..4acb34409f0b 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -60,8 +60,8 @@ static int vp_finalize_features(struct virtio_device *vdev) } /* virtio config->get() implementation */ -static void vp_get(struct virtio_device *vdev, unsigned offset, - void *buf, unsigned len) +static void vp_get(struct virtio_device *vdev, unsigned int offset, + void *buf, unsigned int len) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_modern_device *mdev = &vp_dev->mdev; @@ -98,8 +98,8 @@ static void vp_get(struct virtio_device *vdev, unsigned offset, /* the config->set() implementation. it's symmetric to the config->get() * implementation */ -static void vp_set(struct virtio_device *vdev, unsigned offset, - const void *buf, unsigned len) +static void vp_set(struct virtio_device *vdev, unsigned int offset, + const void *buf, unsigned int len) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_modern_device *mdev = &vp_dev->mdev; @@ -183,7 +183,7 @@ static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, struct virtio_pci_vq_info *info, - unsigned index, + unsigned int index, void (*callback)(struct virtqueue *vq), const char *name, bool ctx, @@ -248,7 +248,7 @@ err_map_notify: return ERR_PTR(err); } -static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs, +static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], const bool *ctx, diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 0dc930de72f5..9d0bae4293be 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -809,7 +809,7 @@ static void virtqueue_disable_cb_split(struct virtqueue *_vq) } } -static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) +static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); u16 last_used_idx; @@ -834,7 +834,7 @@ static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) return last_used_idx; } -static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx) +static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -1486,7 +1486,7 @@ static void virtqueue_disable_cb_packed(struct virtqueue *_vq) } } -static unsigned virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) +static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -2025,7 +2025,7 @@ EXPORT_SYMBOL_GPL(virtqueue_disable_cb); * Caller must ensure we don't call this with other virtqueue * operations at the same time (except where noted). */ -unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) +unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -2046,7 +2046,7 @@ EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); * * This does not need to be serialized. */ -bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) +bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -2072,7 +2072,7 @@ EXPORT_SYMBOL_GPL(virtqueue_poll); */ bool virtqueue_enable_cb(struct virtqueue *_vq) { - unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); + unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq); return !virtqueue_poll(_vq, last_used_idx); } diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index 46c71653f508..c40f7deb6b5a 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -53,16 +53,16 @@ static struct vdpa_device *vd_get_vdpa(struct virtio_device *vdev) return to_virtio_vdpa_device(vdev)->vdpa; } -static void virtio_vdpa_get(struct virtio_device *vdev, unsigned offset, - void *buf, unsigned len) +static void virtio_vdpa_get(struct virtio_device *vdev, unsigned int offset, + void *buf, unsigned int len) { struct vdpa_device *vdpa = vd_get_vdpa(vdev); vdpa_get_config(vdpa, offset, buf, len); } -static void virtio_vdpa_set(struct virtio_device *vdev, unsigned offset, - const void *buf, unsigned len) +static void virtio_vdpa_set(struct virtio_device *vdev, unsigned int offset, + const void *buf, unsigned int len) { struct vdpa_device *vdpa = vd_get_vdpa(vdev); @@ -263,7 +263,7 @@ static void virtio_vdpa_del_vqs(struct virtio_device *vdev) virtio_vdpa_del_vq(vq); } -static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned nvqs, +static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], -- cgit v1.2.3 From 0619eda83d905a9e8eedfb5699d6f520f901281c Mon Sep 17 00:00:00 2001 From: Solomon Tan Date: Mon, 18 Apr 2022 02:56:24 +0000 Subject: virtio: Replace long long int with long long This patch addresses the checkpatch.pl warning that long long is preferred over long long int. Signed-off-by: Solomon Tan Message-Id: Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mmio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index b717302dc4ac..839684d672af 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -657,7 +657,7 @@ static int vm_cmdline_set(const char *device, int err; struct resource resources[2] = {}; char *str; - long long int base, size; + long long base, size; unsigned int irq; int processed, consumed = 0; struct platform_device *pdev; -- cgit v1.2.3 From ffbda8e9df10d1784d5427ec199e7d8308e3763f Mon Sep 17 00:00:00 2001 From: Cindy Lu Date: Fri, 29 Apr 2022 17:10:30 +0800 Subject: vdpa/vp_vdpa : add vdpa tool support in vp_vdpa this patch is to add the support for vdpa tool in vp_vdpa here is the example steps modprobe vp_vdpa modprobe vhost_vdpa echo 0000:00:06.0>/sys/bus/pci/drivers/virtio-pci/unbind echo 1af4 1041 > /sys/bus/pci/drivers/vp-vdpa/new_id vdpa dev add name vdpa1 mgmtdev pci/0000:00:06.0 Signed-off-by: Cindy Lu Message-Id: <20220429091030.547434-1-lulu@redhat.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/virtio_pci/vp_vdpa.c | 161 ++++++++++++++++++++++++++++++-------- include/linux/vdpa.h | 2 +- 2 files changed, 130 insertions(+), 33 deletions(-) (limited to 'drivers') diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c index 35acba0e8d6d..04522077735b 100644 --- a/drivers/vdpa/virtio_pci/vp_vdpa.c +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c @@ -32,7 +32,7 @@ struct vp_vring { struct vp_vdpa { struct vdpa_device vdpa; - struct virtio_pci_modern_device mdev; + struct virtio_pci_modern_device *mdev; struct vp_vring *vring; struct vdpa_callback config_cb; char msix_name[VP_VDPA_NAME_SIZE]; @@ -41,6 +41,12 @@ struct vp_vdpa { int vectors; }; +struct vp_vdpa_mgmtdev { + struct vdpa_mgmt_dev mgtdev; + struct virtio_pci_modern_device *mdev; + struct vp_vdpa *vp_vdpa; +}; + static struct vp_vdpa *vdpa_to_vp(struct vdpa_device *vdpa) { return container_of(vdpa, struct vp_vdpa, vdpa); @@ -50,7 +56,12 @@ static struct virtio_pci_modern_device *vdpa_to_mdev(struct vdpa_device *vdpa) { struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); - return &vp_vdpa->mdev; + return vp_vdpa->mdev; +} + +static struct virtio_pci_modern_device *vp_vdpa_to_mdev(struct vp_vdpa *vp_vdpa) +{ + return vp_vdpa->mdev; } static u64 vp_vdpa_get_device_features(struct vdpa_device *vdpa) @@ -96,7 +107,7 @@ static int vp_vdpa_get_vq_irq(struct vdpa_device *vdpa, u16 idx) static void vp_vdpa_free_irq(struct vp_vdpa *vp_vdpa) { - struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev; + struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa); struct pci_dev *pdev = mdev->pci_dev; int i; @@ -143,7 +154,7 @@ static irqreturn_t vp_vdpa_config_handler(int irq, void *arg) static int vp_vdpa_request_irq(struct vp_vdpa *vp_vdpa) { - struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev; + struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa); struct pci_dev *pdev = mdev->pci_dev; int i, ret, irq; int queues = vp_vdpa->queues; @@ -198,7 +209,7 @@ err: static void vp_vdpa_set_status(struct vdpa_device *vdpa, u8 status) { struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); - struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev; + struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa); u8 s = vp_vdpa_get_status(vdpa); if (status & VIRTIO_CONFIG_S_DRIVER_OK && @@ -212,7 +223,7 @@ static void vp_vdpa_set_status(struct vdpa_device *vdpa, u8 status) static int vp_vdpa_reset(struct vdpa_device *vdpa) { struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); - struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev; + struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa); u8 s = vp_vdpa_get_status(vdpa); vp_modern_set_status(mdev, 0); @@ -372,7 +383,7 @@ static void vp_vdpa_get_config(struct vdpa_device *vdpa, void *buf, unsigned int len) { struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); - struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev; + struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa); u8 old, new; u8 *p; int i; @@ -392,7 +403,7 @@ static void vp_vdpa_set_config(struct vdpa_device *vdpa, unsigned int len) { struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); - struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev; + struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa); const u8 *p = buf; int i; @@ -412,7 +423,7 @@ static struct vdpa_notification_area vp_vdpa_get_vq_notification(struct vdpa_device *vdpa, u16 qid) { struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); - struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev; + struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa); struct vdpa_notification_area notify; notify.addr = vp_vdpa->vring[qid].notify_pa; @@ -454,38 +465,31 @@ static void vp_vdpa_free_irq_vectors(void *data) pci_free_irq_vectors(data); } -static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) +static int vp_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, + const struct vdpa_dev_set_config *add_config) { - struct virtio_pci_modern_device *mdev; + struct vp_vdpa_mgmtdev *vp_vdpa_mgtdev = + container_of(v_mdev, struct vp_vdpa_mgmtdev, mgtdev); + + struct virtio_pci_modern_device *mdev = vp_vdpa_mgtdev->mdev; + struct pci_dev *pdev = mdev->pci_dev; struct device *dev = &pdev->dev; - struct vp_vdpa *vp_vdpa; + struct vp_vdpa *vp_vdpa = NULL; int ret, i; - ret = pcim_enable_device(pdev); - if (ret) - return ret; - vp_vdpa = vdpa_alloc_device(struct vp_vdpa, vdpa, - dev, &vp_vdpa_ops, 1, 1, NULL, false); + dev, &vp_vdpa_ops, 1, 1, name, false); + if (IS_ERR(vp_vdpa)) { dev_err(dev, "vp_vdpa: Failed to allocate vDPA structure\n"); return PTR_ERR(vp_vdpa); } - mdev = &vp_vdpa->mdev; - mdev->pci_dev = pdev; - - ret = vp_modern_probe(mdev); - if (ret) { - dev_err(&pdev->dev, "Failed to probe modern PCI device\n"); - goto err; - } - - pci_set_master(pdev); - pci_set_drvdata(pdev, vp_vdpa); + vp_vdpa_mgtdev->vp_vdpa = vp_vdpa; vp_vdpa->vdpa.dma_dev = &pdev->dev; vp_vdpa->queues = vp_modern_get_num_queues(mdev); + vp_vdpa->mdev = mdev; ret = devm_add_action_or_reset(dev, vp_vdpa_free_irq_vectors, pdev); if (ret) { @@ -516,7 +520,8 @@ static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) } vp_vdpa->config_irq = VIRTIO_MSI_NO_VECTOR; - ret = vdpa_register_device(&vp_vdpa->vdpa, vp_vdpa->queues); + vp_vdpa->vdpa.mdev = &vp_vdpa_mgtdev->mgtdev; + ret = _vdpa_register_device(&vp_vdpa->vdpa, vp_vdpa->queues); if (ret) { dev_err(&pdev->dev, "Failed to register to vdpa bus\n"); goto err; @@ -529,12 +534,104 @@ err: return ret; } +static void vp_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, + struct vdpa_device *dev) +{ + struct vp_vdpa_mgmtdev *vp_vdpa_mgtdev = + container_of(v_mdev, struct vp_vdpa_mgmtdev, mgtdev); + + struct vp_vdpa *vp_vdpa = vp_vdpa_mgtdev->vp_vdpa; + + _vdpa_unregister_device(&vp_vdpa->vdpa); + vp_vdpa_mgtdev->vp_vdpa = NULL; +} + +static const struct vdpa_mgmtdev_ops vp_vdpa_mdev_ops = { + .dev_add = vp_vdpa_dev_add, + .dev_del = vp_vdpa_dev_del, +}; + +static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct vp_vdpa_mgmtdev *vp_vdpa_mgtdev = NULL; + struct vdpa_mgmt_dev *mgtdev; + struct device *dev = &pdev->dev; + struct virtio_pci_modern_device *mdev = NULL; + struct virtio_device_id *mdev_id = NULL; + int err; + + vp_vdpa_mgtdev = kzalloc(sizeof(*vp_vdpa_mgtdev), GFP_KERNEL); + if (!vp_vdpa_mgtdev) + return -ENOMEM; + + mgtdev = &vp_vdpa_mgtdev->mgtdev; + mgtdev->ops = &vp_vdpa_mdev_ops; + mgtdev->device = dev; + + mdev = kzalloc(sizeof(struct virtio_pci_modern_device), GFP_KERNEL); + if (!mdev) { + err = -ENOMEM; + goto mdev_err; + } + + mdev_id = kzalloc(sizeof(struct virtio_device_id), GFP_KERNEL); + if (!mdev_id) { + err = -ENOMEM; + goto mdev_id_err; + } + + vp_vdpa_mgtdev->mdev = mdev; + mdev->pci_dev = pdev; + + err = pcim_enable_device(pdev); + if (err) { + goto probe_err; + } + + err = vp_modern_probe(mdev); + if (err) { + dev_err(&pdev->dev, "Failed to probe modern PCI device\n"); + goto probe_err; + } + + mdev_id->device = mdev->id.device; + mdev_id->vendor = mdev->id.vendor; + mgtdev->id_table = mdev_id; + mgtdev->max_supported_vqs = vp_modern_get_num_queues(mdev); + mgtdev->supported_features = vp_modern_get_features(mdev); + pci_set_master(pdev); + pci_set_drvdata(pdev, vp_vdpa_mgtdev); + + err = vdpa_mgmtdev_register(mgtdev); + if (err) { + dev_err(&pdev->dev, "Failed to register vdpa mgmtdev device\n"); + goto register_err; + } + + return 0; + +register_err: + vp_modern_remove(vp_vdpa_mgtdev->mdev); +probe_err: + kfree(mdev_id); +mdev_id_err: + kfree(mdev); +mdev_err: + kfree(vp_vdpa_mgtdev); + return err; +} + static void vp_vdpa_remove(struct pci_dev *pdev) { - struct vp_vdpa *vp_vdpa = pci_get_drvdata(pdev); + struct vp_vdpa_mgmtdev *vp_vdpa_mgtdev = pci_get_drvdata(pdev); + struct virtio_pci_modern_device *mdev = NULL; - vp_modern_remove(&vp_vdpa->mdev); - vdpa_unregister_device(&vp_vdpa->vdpa); + mdev = vp_vdpa_mgtdev->mdev; + vp_modern_remove(mdev); + vdpa_mgmtdev_unregister(&vp_vdpa_mgtdev->mgtdev); + kfree(&vp_vdpa_mgtdev->mgtdev.id_table); + kfree(mdev); + kfree(vp_vdpa_mgtdev); } static struct pci_driver vp_vdpa_driver = { diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index f336d253db3d..15af802d41c4 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -492,7 +492,7 @@ struct vdpa_mgmtdev_ops { struct vdpa_mgmt_dev { struct device *device; const struct vdpa_mgmtdev_ops *ops; - const struct virtio_device_id *id_table; + struct virtio_device_id *id_table; u64 config_attr_mask; struct list_head list; u64 supported_features; -- cgit v1.2.3 From ac33f84ba5ffcc6c8a4c3ee98c92f722feb64f43 Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Sun, 24 Apr 2022 15:28:06 +0800 Subject: vDPA/ifcvf: fix uninitialized config_vector warning Static checkers are not informed that config_vector is controlled by vf->msix_vector_status, which can only be MSIX_VECTOR_SHARED_VQ_AND_CONFIG, MSIX_VECTOR_SHARED_VQ_AND_CONFIG and MSIX_VECTOR_DEV_SHARED. This commit uses an "if...elseif...else" code block to tell the checkers that it is a complete set, and config_vector can be initialized anyway Signed-off-by: Zhu Lingshan Reviewed-by: Dan Carpenter Message-Id: <20220424072806.1083189-1-lingshan.zhu@intel.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/ifcvf/ifcvf_main.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index c1767a0ce630..750e5f23406d 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -290,16 +290,16 @@ static int ifcvf_request_config_irq(struct ifcvf_adapter *adapter) struct ifcvf_hw *vf = &adapter->vf; int config_vector, ret; - if (vf->msix_vector_status == MSIX_VECTOR_DEV_SHARED) - return 0; - if (vf->msix_vector_status == MSIX_VECTOR_PER_VQ_AND_CONFIG) - /* vector 0 ~ vf->nr_vring for vqs, num vf->nr_vring vector for config interrupt */ config_vector = vf->nr_vring; - - if (vf->msix_vector_status == MSIX_VECTOR_SHARED_VQ_AND_CONFIG) + else if (vf->msix_vector_status == MSIX_VECTOR_SHARED_VQ_AND_CONFIG) /* vector 0 for vqs and 1 for config interrupt */ config_vector = 1; + else if (vf->msix_vector_status == MSIX_VECTOR_DEV_SHARED) + /* re-use the vqs vector */ + return 0; + else + return -EINVAL; snprintf(vf->config_msix_name, 256, "ifcvf[%s]-config\n", pci_name(pdev)); -- cgit v1.2.3 From 242436973831aa97e8ce19533c6c912ea8def31b Mon Sep 17 00:00:00 2001 From: Eugenio Pérez Date: Thu, 19 May 2022 16:59:19 +0200 Subject: vdpasim: allow to enable a vq repeatedly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Code must be resilient to enable a queue many times. At the moment the queue is resetting so it's definitely not the expected behavior. v2: set vq->ready = 0 at disable. Fixes: 2c53d0f64c06 ("vdpasim: vDPA device simulator") Cc: stable@vger.kernel.org Signed-off-by: Eugenio Pérez Message-Id: <20220519145919.772896-1-eperezma@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 51bd0bafce06..50d721072beb 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -364,11 +364,14 @@ static void vdpasim_set_vq_ready(struct vdpa_device *vdpa, u16 idx, bool ready) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + bool old_ready; spin_lock(&vdpasim->lock); + old_ready = vq->ready; vq->ready = ready; - if (vq->ready) + if (vq->ready && !old_ready) { vdpasim_queue_ready(vdpasim, idx); + } spin_unlock(&vdpasim->lock); } -- cgit v1.2.3 From 2536b2ca15418c517e3629cc3dd757f811ce52b2 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 27 May 2022 14:01:12 +0800 Subject: virtio: use virtio_device_ready() in virtio_device_restore() It will allow us to do extension on virtio_device_ready() without duplicating code. Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: "Paul E. McKenney" Cc: Marc Zyngier Cc: Halil Pasic Cc: Cornelia Huck Cc: Vineeth Vijayan Cc: Peter Oberparleiter Cc: linux-s390@vger.kernel.org Reviewed-by: Cornelia Huck Signed-off-by: Stefano Garzarella Signed-off-by: Jason Wang Message-Id: <20220527060120.20964-2-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Xuan Zhuo --- drivers/virtio/virtio.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index ce424c16997d..938e975029d4 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -526,8 +526,9 @@ int virtio_device_restore(struct virtio_device *dev) goto err; } - /* Finally, tell the device we're all set */ - virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); + /* If restore didn't do it, mark device DRIVER_OK ourselves. */ + if (!(dev->config->get_status(dev) & VIRTIO_CONFIG_S_DRIVER_OK)) + virtio_device_ready(dev); virtio_config_enable(dev); -- cgit v1.2.3 From 0aa96837c3ded5de58782a07b88176f2f1d01404 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 27 May 2022 14:01:13 +0800 Subject: virtio: use virtio_reset_device() when possible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows us to do common extension without duplicating code. Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: "Paul E. McKenney" Cc: Marc Zyngier Cc: Halil Pasic Cc: Cornelia Huck Cc: Vineeth Vijayan Cc: Peter Oberparleiter Cc: linux-s390@vger.kernel.org Reviewed-by: Cornelia Huck Signed-off-by: Jason Wang Message-Id: <20220527060120.20964-3-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Xuan Zhuo Reviewed-by: Eugenio Pérez --- drivers/virtio/virtio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 938e975029d4..aa1eb5132767 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -430,7 +430,7 @@ int register_virtio_device(struct virtio_device *dev) /* We always start by resetting the device, in case a previous * driver messed it up. This also tests that code path a little. */ - dev->config->reset(dev); + virtio_reset_device(dev); /* Acknowledge that we've seen the device. */ virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); @@ -496,7 +496,7 @@ int virtio_device_restore(struct virtio_device *dev) /* We always start by resetting the device, in case a previous * driver messed it up. */ - dev->config->reset(dev); + virtio_reset_device(dev); /* Acknowledge that we've seen the device. */ virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); -- cgit v1.2.3 From 48b3dd24382139a239a43bca7c1c632438d1dfb2 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 27 May 2022 14:01:15 +0800 Subject: virtio-pci: implement synchronize_cbs() We can simply reuse vp_synchronize_vectors() for .synchronize_cbs(). Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: "Paul E. McKenney" Cc: Marc Zyngier Cc: Halil Pasic Cc: Cornelia Huck Cc: Vineeth Vijayan Cc: Peter Oberparleiter Cc: linux-s390@vger.kernel.org Reviewed-by: Cornelia Huck Signed-off-by: Jason Wang Message-Id: <20220527060120.20964-5-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Xuan Zhuo --- drivers/virtio/virtio_pci_legacy.c | 1 + drivers/virtio/virtio_pci_modern.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index 7fe4caa4b519..a5e5721145c7 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -192,6 +192,7 @@ static const struct virtio_config_ops virtio_pci_config_ops = { .reset = vp_reset, .find_vqs = vp_find_vqs, .del_vqs = vp_del_vqs, + .synchronize_cbs = vp_synchronize_vectors, .get_features = vp_get_features, .finalize_features = vp_finalize_features, .bus_name = vp_bus_name, diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 4acb34409f0b..623906b4996c 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -394,6 +394,7 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = { .reset = vp_reset, .find_vqs = vp_modern_find_vqs, .del_vqs = vp_del_vqs, + .synchronize_cbs = vp_synchronize_vectors, .get_features = vp_get_features, .finalize_features = vp_finalize_features, .bus_name = vp_bus_name, @@ -411,6 +412,7 @@ static const struct virtio_config_ops virtio_pci_config_ops = { .reset = vp_reset, .find_vqs = vp_modern_find_vqs, .del_vqs = vp_del_vqs, + .synchronize_cbs = vp_synchronize_vectors, .get_features = vp_get_features, .finalize_features = vp_finalize_features, .bus_name = vp_bus_name, -- cgit v1.2.3 From 9e9b2893284fa0681ab5c0632d02214385a174c3 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 27 May 2022 14:01:16 +0800 Subject: virtio-mmio: implement synchronize_cbs() Simply synchronize the platform irq that is used by us. Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: "Paul E. McKenney" Cc: Marc Zyngier Cc: Halil Pasic Cc: Cornelia Huck Cc: Vineeth Vijayan Cc: Peter Oberparleiter Cc: linux-s390@vger.kernel.org Reviewed-by: Cornelia Huck Signed-off-by: Jason Wang Message-Id: <20220527060120.20964-6-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Xuan Zhuo --- drivers/virtio/virtio_mmio.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers') diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 839684d672af..c9699a59f93c 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -345,6 +345,13 @@ static void vm_del_vqs(struct virtio_device *vdev) free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev); } +static void vm_synchronize_cbs(struct virtio_device *vdev) +{ + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); + + synchronize_irq(platform_get_irq(vm_dev->pdev, 0)); +} + static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int index, void (*callback)(struct virtqueue *vq), const char *name, bool ctx) @@ -541,6 +548,7 @@ static const struct virtio_config_ops virtio_mmio_config_ops = { .finalize_features = vm_finalize_features, .bus_name = vm_bus_name, .get_shm_region = vm_get_shm_region, + .synchronize_cbs = vm_synchronize_cbs, }; -- cgit v1.2.3 From 3a232277c1ac6df70552129a9722abc6ab250128 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 27 May 2022 14:01:17 +0800 Subject: virtio-ccw: implement synchronize_cbs() This patch tries to implement the synchronize_cbs() for ccw. For the vring_interrupt() that is called via virtio_airq_handler(), the synchronization is simply done via the airq_info's lock. For the vring_interrupt() that is called via virtio_ccw_int_handler(), a per device rwlock is introduced and used in the synchronization method. Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: "Paul E. McKenney" Cc: Marc Zyngier Cc: Halil Pasic Cc: Cornelia Huck Cc: Vineeth Vijayan Cc: Peter Oberparleiter Cc: linux-s390@vger.kernel.org Reviewed-by: Halil Pasic Signed-off-by: Jason Wang Message-Id: <20220527060120.20964-7-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/s390/virtio/virtio_ccw.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'drivers') diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index d35e7a3f7067..c188e4f20ca3 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -62,6 +62,7 @@ struct virtio_ccw_device { unsigned int revision; /* Transport revision */ wait_queue_head_t wait_q; spinlock_t lock; + rwlock_t irq_lock; struct mutex io_lock; /* Serializes I/O requests */ struct list_head virtqueues; bool is_thinint; @@ -984,6 +985,30 @@ static const char *virtio_ccw_bus_name(struct virtio_device *vdev) return dev_name(&vcdev->cdev->dev); } +static void virtio_ccw_synchronize_cbs(struct virtio_device *vdev) +{ + struct virtio_ccw_device *vcdev = to_vc_device(vdev); + struct airq_info *info = vcdev->airq_info; + + if (info) { + /* + * This device uses adapter interrupts: synchronize with + * vring_interrupt() called by virtio_airq_handler() + * via the indicator area lock. + */ + write_lock_irq(&info->lock); + write_unlock_irq(&info->lock); + } else { + /* This device uses classic interrupts: synchronize + * with vring_interrupt() called by + * virtio_ccw_int_handler() via the per-device + * irq_lock + */ + write_lock_irq(&vcdev->irq_lock); + write_unlock_irq(&vcdev->irq_lock); + } +} + static const struct virtio_config_ops virtio_ccw_config_ops = { .get_features = virtio_ccw_get_features, .finalize_features = virtio_ccw_finalize_features, @@ -995,6 +1020,7 @@ static const struct virtio_config_ops virtio_ccw_config_ops = { .find_vqs = virtio_ccw_find_vqs, .del_vqs = virtio_ccw_del_vqs, .bus_name = virtio_ccw_bus_name, + .synchronize_cbs = virtio_ccw_synchronize_cbs, }; @@ -1106,6 +1132,8 @@ static void virtio_ccw_int_handler(struct ccw_device *cdev, vcdev->err = -EIO; } virtio_ccw_check_activity(vcdev, activity); + /* Interrupts are disabled here */ + read_lock(&vcdev->irq_lock); for_each_set_bit(i, indicators(vcdev), sizeof(*indicators(vcdev)) * BITS_PER_BYTE) { /* The bit clear must happen before the vring kick. */ @@ -1114,6 +1142,7 @@ static void virtio_ccw_int_handler(struct ccw_device *cdev, vq = virtio_ccw_vq_by_ind(vcdev, i); vring_interrupt(0, vq); } + read_unlock(&vcdev->irq_lock); if (test_bit(0, indicators2(vcdev))) { virtio_config_changed(&vcdev->vdev); clear_bit(0, indicators2(vcdev)); @@ -1284,6 +1313,7 @@ static int virtio_ccw_online(struct ccw_device *cdev) init_waitqueue_head(&vcdev->wait_q); INIT_LIST_HEAD(&vcdev->virtqueues); spin_lock_init(&vcdev->lock); + rwlock_init(&vcdev->irq_lock); mutex_init(&vcdev->io_lock); spin_lock_irqsave(get_ccwdev_lock(cdev), flags); -- cgit v1.2.3 From be83f04d2529e8dc4273efdd1ccf7b7502741071 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 27 May 2022 14:01:18 +0800 Subject: virtio: allow to unbreak virtqueue This patch allows the new introduced __virtio_break_device() to unbreak the virtqueue. Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: "Paul E. McKenney" Cc: Marc Zyngier Cc: Halil Pasic Cc: Cornelia Huck Cc: Vineeth Vijayan Cc: Peter Oberparleiter Cc: linux-s390@vger.kernel.org Signed-off-by: Jason Wang Message-Id: <20220527060120.20964-8-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Xuan Zhuo --- drivers/virtio/virtio_ring.c | 22 ++++++++++++++++++++++ include/linux/virtio.h | 1 + 2 files changed, 23 insertions(+) (limited to 'drivers') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 9d0bae4293be..9c231e1fded7 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2395,6 +2395,28 @@ void virtio_break_device(struct virtio_device *dev) } EXPORT_SYMBOL_GPL(virtio_break_device); +/* + * This should allow the device to be used by the driver. You may + * need to grab appropriate locks to flush the write to + * vq->broken. This should only be used in some specific case e.g + * (probing and restoring). This function should only be called by the + * core, not directly by the driver. + */ +void __virtio_unbreak_device(struct virtio_device *dev) +{ + struct virtqueue *_vq; + + spin_lock(&dev->vqs_list_lock); + list_for_each_entry(_vq, &dev->vqs, list) { + struct vring_virtqueue *vq = to_vvq(_vq); + + /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ + WRITE_ONCE(vq->broken, false); + } + spin_unlock(&dev->vqs_list_lock); +} +EXPORT_SYMBOL_GPL(__virtio_unbreak_device); + dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 5464f398912a..d8fdf170637c 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -131,6 +131,7 @@ void unregister_virtio_device(struct virtio_device *dev); bool is_virtio_device(struct device *dev); void virtio_break_device(struct virtio_device *dev); +void __virtio_unbreak_device(struct virtio_device *dev); void virtio_config_changed(struct virtio_device *dev); #ifdef CONFIG_PM_SLEEP -- cgit v1.2.3 From 8b4ec69d7e098a7ddf832e1e7840de53ed474c77 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 27 May 2022 14:01:19 +0800 Subject: virtio: harden vring IRQ This is a rework on the previous IRQ hardening that is done for virtio-pci where several drawbacks were found and were reverted: 1) try to use IRQF_NO_AUTOEN which is not friendly to affinity managed IRQ that is used by some device such as virtio-blk 2) done only for PCI transport The vq->broken is re-used in this patch for implementing the IRQ hardening. The vq->broken is set to true during both initialization and reset. And the vq->broken is set to false in virtio_device_ready(). Then vring_interrupt() can check and return when vq->broken is true. And in this case, switch to return IRQ_NONE to let the interrupt core aware of such invalid interrupt to prevent IRQ storm. The reason of using a per queue variable instead of a per device one is that we may need it for per queue reset hardening in the future. Note that the hardening is only done for vring interrupt since the config interrupt hardening is already done in commit 22b7050a024d7 ("virtio: defer config changed notifications"). But the method that is used by config interrupt can't be reused by the vring interrupt handler because it uses spinlock to do the synchronization which is expensive. Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: "Paul E. McKenney" Cc: Marc Zyngier Cc: Halil Pasic Cc: Cornelia Huck Cc: Vineeth Vijayan Cc: Peter Oberparleiter Cc: linux-s390@vger.kernel.org Signed-off-by: Jason Wang Message-Id: <20220527060120.20964-9-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Xuan Zhuo --- drivers/s390/virtio/virtio_ccw.c | 4 ++++ drivers/virtio/virtio.c | 15 ++++++++++++--- drivers/virtio/virtio_mmio.c | 5 +++++ drivers/virtio/virtio_pci_modern_dev.c | 5 +++++ drivers/virtio/virtio_ring.c | 11 +++++++---- include/linux/virtio_config.h | 20 ++++++++++++++++++++ 6 files changed, 53 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index c188e4f20ca3..97e51c34e6cf 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -971,6 +971,10 @@ static void virtio_ccw_set_status(struct virtio_device *vdev, u8 status) ccw->flags = 0; ccw->count = sizeof(status); ccw->cda = (__u32)(unsigned long)&vcdev->dma_area->status; + /* We use ssch for setting the status which is a serializing + * instruction that guarantees the memory writes have + * completed before ssch. + */ ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_STATUS); /* Write failed? We assume status is unchanged. */ if (ret) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index aa1eb5132767..95fac4c97c8b 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -220,6 +220,15 @@ static int virtio_features_ok(struct virtio_device *dev) * */ void virtio_reset_device(struct virtio_device *dev) { + /* + * The below virtio_synchronize_cbs() guarantees that any + * interrupt for this line arriving after + * virtio_synchronize_vqs() has completed is guaranteed to see + * vq->broken as true. + */ + virtio_break_device(dev); + virtio_synchronize_cbs(dev); + dev->config->reset(dev); } EXPORT_SYMBOL_GPL(virtio_reset_device); @@ -428,6 +437,9 @@ int register_virtio_device(struct virtio_device *dev) dev->config_enabled = false; dev->config_change_pending = false; + INIT_LIST_HEAD(&dev->vqs); + spin_lock_init(&dev->vqs_list_lock); + /* We always start by resetting the device, in case a previous * driver messed it up. This also tests that code path a little. */ virtio_reset_device(dev); @@ -435,9 +447,6 @@ int register_virtio_device(struct virtio_device *dev) /* Acknowledge that we've seen the device. */ virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); - INIT_LIST_HEAD(&dev->vqs); - spin_lock_init(&dev->vqs_list_lock); - /* * device_add() causes the bus infrastructure to look for a matching * driver. diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index c9699a59f93c..f9a36bc7ac27 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -253,6 +253,11 @@ static void vm_set_status(struct virtio_device *vdev, u8 status) /* We should never be setting status to 0. */ BUG_ON(status == 0); + /* + * Per memory-barriers.txt, wmb() is not needed to guarantee + * that the the cache coherent memory writes have completed + * before writing to the MMIO region. + */ writel(status, vm_dev->base + VIRTIO_MMIO_STATUS); } diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c index 4093f9cca7a6..a0fa14f28a7f 100644 --- a/drivers/virtio/virtio_pci_modern_dev.c +++ b/drivers/virtio/virtio_pci_modern_dev.c @@ -467,6 +467,11 @@ void vp_modern_set_status(struct virtio_pci_modern_device *mdev, { struct virtio_pci_common_cfg __iomem *cfg = mdev->common; + /* + * Per memory-barriers.txt, wmb() is not needed to guarantee + * that the the cache coherent memory writes have completed + * before writing to the MMIO region. + */ vp_iowrite8(status, &cfg->device_status); } EXPORT_SYMBOL_GPL(vp_modern_set_status); diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 9c231e1fded7..13a7348cedff 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1688,7 +1688,7 @@ static struct virtqueue *vring_create_virtqueue_packed( vq->we_own_ring = true; vq->notify = notify; vq->weak_barriers = weak_barriers; - vq->broken = false; + vq->broken = true; vq->last_used_idx = 0; vq->event_triggered = false; vq->num_added = 0; @@ -2134,8 +2134,11 @@ irqreturn_t vring_interrupt(int irq, void *_vq) return IRQ_NONE; } - if (unlikely(vq->broken)) - return IRQ_HANDLED; + if (unlikely(vq->broken)) { + dev_warn_once(&vq->vq.vdev->dev, + "virtio vring IRQ raised before DRIVER_OK"); + return IRQ_NONE; + } /* Just a hint for performance: so it's ok that this can be racy! */ if (vq->event) @@ -2177,7 +2180,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, vq->we_own_ring = false; vq->notify = notify; vq->weak_barriers = weak_barriers; - vq->broken = false; + vq->broken = true; vq->last_used_idx = 0; vq->event_triggered = false; vq->num_added = 0; diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 25be018810a7..d4edfd7d91bb 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -256,6 +256,26 @@ void virtio_device_ready(struct virtio_device *dev) unsigned status = dev->config->get_status(dev); BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK); + + /* + * The virtio_synchronize_cbs() makes sure vring_interrupt() + * will see the driver specific setup if it sees vq->broken + * as false (even if the notifications come before DRIVER_OK). + */ + virtio_synchronize_cbs(dev); + __virtio_unbreak_device(dev); + /* + * The transport should ensure the visibility of vq->broken + * before setting DRIVER_OK. See the comments for the transport + * specific set_status() method. + * + * A well behaved device will only notify a virtqueue after + * DRIVER_OK, this means the device should "see" the coherenct + * memory write that set vq->broken as false which is done by + * the driver when it sees DRIVER_OK, then the following + * driver's vring_interrupt() will see vq->broken as false so + * we won't lose any notification. + */ dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK); } -- cgit v1.2.3 From 4f58afd6eb177a75dec61c65fdc72db31db93c82 Mon Sep 17 00:00:00 2001 From: keliu Date: Fri, 27 May 2022 07:33:02 +0000 Subject: virtio: Directly use ida_alloc()/free() Use ida_alloc()/ida_free() instead of deprecated ida_simple_get()/ida_simple_remove() . Signed-off-by: keliu Message-Id: <20220527073302.2474073-1-liuke94@huawei.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 95fac4c97c8b..ef04a96942bf 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -422,7 +422,7 @@ int register_virtio_device(struct virtio_device *dev) device_initialize(&dev->dev); /* Assign a unique device index and hence name. */ - err = ida_simple_get(&virtio_index_ida, 0, 0, GFP_KERNEL); + err = ida_alloc(&virtio_index_ida, GFP_KERNEL); if (err < 0) goto out; @@ -460,7 +460,7 @@ int register_virtio_device(struct virtio_device *dev) out_of_node_put: of_node_put(dev->dev.of_node); out_ida_remove: - ida_simple_remove(&virtio_index_ida, dev->index); + ida_free(&virtio_index_ida, dev->index); out: virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); return err; @@ -478,7 +478,7 @@ void unregister_virtio_device(struct virtio_device *dev) int index = dev->index; /* save for after device release */ device_unregister(&dev->dev); - ida_simple_remove(&virtio_index_ida, index); + ida_free(&virtio_index_ida, index); } EXPORT_SYMBOL_GPL(unregister_virtio_device); -- cgit v1.2.3 From 1f97b9785076d32fbabb8fa23889f9969c84118d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 23 May 2022 11:30:57 +0300 Subject: vdpasim: Off by one in vdpasim_set_group_asid() The > comparison needs to be >= to prevent an out of bounds access of the vdpasim->iommu[] array. The vdpasim->iommu[] is allocated in vdpasim_create() and it has vdpasim->dev_attr.nas elements. Fixes: 87e5afeac247 ("vdpasim: control virtqueue support") Signed-off-by: Dan Carpenter Message-Id: Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 50d721072beb..0f2865899647 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -567,7 +567,7 @@ static int vdpasim_set_group_asid(struct vdpa_device *vdpa, unsigned int group, if (group > vdpasim->dev_attr.ngroups) return -EINVAL; - if (asid > vdpasim->dev_attr.nas) + if (asid >= vdpasim->dev_attr.nas) return -EINVAL; iommu = &vdpasim->iommu[asid]; -- cgit v1.2.3 From f4a8686ec7a34f940d36784872036fbacb1b4623 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 23 May 2022 11:33:26 +0300 Subject: vhost-vdpa: return -EFAULT on copy_to_user() failure The copy_to_user() function returns the number of bytes remaining to be copied. However, we need to return a negative error code, -EFAULT, to the user. Fixes: 87f4c217413a ("vhost-vdpa: introduce uAPI to get the number of virtqueue groups") Fixes: e96ef636f154 ("vhost-vdpa: introduce uAPI to get the number of address spaces") Signed-off-by: Dan Carpenter Message-Id: Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella --- drivers/vhost/vdpa.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 3e86080041fc..935a1d0ddb97 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -609,11 +609,13 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, r = vhost_vdpa_get_vring_num(v, argp); break; case VHOST_VDPA_GET_GROUP_NUM: - r = copy_to_user(argp, &v->vdpa->ngroups, - sizeof(v->vdpa->ngroups)); + if (copy_to_user(argp, &v->vdpa->ngroups, + sizeof(v->vdpa->ngroups))) + r = -EFAULT; break; case VHOST_VDPA_GET_AS_NUM: - r = copy_to_user(argp, &v->vdpa->nas, sizeof(v->vdpa->nas)); + if (copy_to_user(argp, &v->vdpa->nas, sizeof(v->vdpa->nas))) + r = -EFAULT; break; case VHOST_SET_LOG_BASE: case VHOST_SET_LOG_FD: -- cgit v1.2.3 From 6fcf224c379f07c73fb972007c93db8c05d930d7 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 17 May 2022 13:08:43 -0500 Subject: vhost: get rid of vhost_poll_flush() wrapper vhost_poll_flush() is a simple wrapper around vhost_work_dev_flush(). It gives wrong impression that we are doing some work over vhost_poll, while in fact it flushes vhost_poll->dev. It only complicate understanding of the code and leads to mistakes like flushing the same vhost_dev several times in a row. Just remove vhost_poll_flush() and call vhost_work_dev_flush() directly. Signed-off-by: Andrey Ryabinin [merge vhost_poll_flush removal from Stefano Garzarella] Signed-off-by: Mike Christie Reviewed-by: Chaitanya Kulkarni Acked-by: Jason Wang Reviewed-by: Stefano Garzarella Message-Id: <20220517180850.198915-2-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 4 ++-- drivers/vhost/test.c | 2 +- drivers/vhost/vhost.c | 12 ++---------- drivers/vhost/vhost.h | 1 - drivers/vhost/vsock.c | 2 +- 5 files changed, 6 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 297b5db47454..d648faf015d3 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1376,8 +1376,8 @@ static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock, static void vhost_net_flush_vq(struct vhost_net *n, int index) { - vhost_poll_flush(n->poll + index); - vhost_poll_flush(&n->vqs[index].vq.poll); + vhost_work_dev_flush(n->poll[index].dev); + vhost_work_dev_flush(n->vqs[index].vq.poll.dev); } static void vhost_net_flush(struct vhost_net *n) diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 05740cba1cd8..f0ac9e35f5d6 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -146,7 +146,7 @@ static void vhost_test_stop(struct vhost_test *n, void **privatep) static void vhost_test_flush_vq(struct vhost_test *n, int index) { - vhost_poll_flush(&n->vqs[index].poll); + vhost_work_dev_flush(n->vqs[index].poll.dev); } static void vhost_test_flush(struct vhost_test *n) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 5022c648d9c0..43f6ac2d21cd 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -245,14 +245,6 @@ void vhost_work_dev_flush(struct vhost_dev *dev) } EXPORT_SYMBOL_GPL(vhost_work_dev_flush); -/* Flush any work that has been scheduled. When calling this, don't hold any - * locks that are also used by the callback. */ -void vhost_poll_flush(struct vhost_poll *poll) -{ - vhost_work_dev_flush(poll->dev); -} -EXPORT_SYMBOL_GPL(vhost_poll_flush); - void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) { if (!dev->worker) @@ -663,7 +655,7 @@ void vhost_dev_stop(struct vhost_dev *dev) for (i = 0; i < dev->nvqs; ++i) { if (dev->vqs[i]->kick && dev->vqs[i]->handle_kick) { vhost_poll_stop(&dev->vqs[i]->poll); - vhost_poll_flush(&dev->vqs[i]->poll); + vhost_work_dev_flush(dev->vqs[i]->poll.dev); } } } @@ -1732,7 +1724,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg mutex_unlock(&vq->mutex); if (pollstop && vq->handle_kick) - vhost_poll_flush(&vq->poll); + vhost_work_dev_flush(vq->poll.dev); return r; } EXPORT_SYMBOL_GPL(vhost_vring_ioctl); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 9f238d6c7b58..b85410124305 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -44,7 +44,6 @@ void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, __poll_t mask, struct vhost_dev *dev); int vhost_poll_start(struct vhost_poll *poll, struct file *file); void vhost_poll_stop(struct vhost_poll *poll); -void vhost_poll_flush(struct vhost_poll *poll); void vhost_poll_queue(struct vhost_poll *poll); void vhost_work_dev_flush(struct vhost_dev *dev); diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index e6c9d41db1de..a4c8ae92a0fb 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -709,7 +709,7 @@ static void vhost_vsock_flush(struct vhost_vsock *vsock) for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) if (vsock->vqs[i].handle_kick) - vhost_poll_flush(&vsock->vqs[i].poll); + vhost_work_dev_flush(vsock->vqs[i].poll.dev); vhost_work_dev_flush(&vsock->dev); } -- cgit v1.2.3 From 6ca84326c283e2f5d4ea920dec6f9d4272e4d124 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 17 May 2022 13:08:44 -0500 Subject: vhost: flush dev once during vhost_dev_stop When vhost_work_dev_flush returns all work queued at that time will have completed. There is then no need to flush after every vhost_poll_stop call, and we can move the flush call to after the loop that stops the pollers. Signed-off-by: Mike Christie Acked-by: Jason Wang Reviewed-by: Stefano Garzarella Message-Id: <20220517180850.198915-3-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 43f6ac2d21cd..d82b9394d89a 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -653,11 +653,11 @@ void vhost_dev_stop(struct vhost_dev *dev) int i; for (i = 0; i < dev->nvqs; ++i) { - if (dev->vqs[i]->kick && dev->vqs[i]->handle_kick) { + if (dev->vqs[i]->kick && dev->vqs[i]->handle_kick) vhost_poll_stop(&dev->vqs[i]->poll); - vhost_work_dev_flush(dev->vqs[i]->poll.dev); - } } + + vhost_work_dev_flush(dev); } EXPORT_SYMBOL_GPL(vhost_dev_stop); -- cgit v1.2.3 From 2c029f3298594c5160ae1036f03db607fa891484 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 17 May 2022 13:08:45 -0500 Subject: vhost_net: get rid of vhost_net_flush_vq() and extra flush calls vhost_net_flush_vq() calls vhost_work_dev_flush() twice passing vhost_dev pointer obtained via 'n->poll[index].dev' and 'n->vqs[index].vq.poll.dev'. This is actually the same pointer, initialized in vhost_net_open()/vhost_dev_init()/vhost_poll_init() Remove vhost_net_flush_vq() and call vhost_work_dev_flush() directly. Do the flushes only once instead of several flush calls in a row which seems rather useless. Signed-off-by: Andrey Ryabinin [drop vhost_dev forward declaration in vhost.h] Signed-off-by: Mike Christie Acked-by: Jason Wang Reviewed-by: Stefano Garzarella Message-Id: <20220517180850.198915-4-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index d648faf015d3..20eb076300bb 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1374,16 +1374,9 @@ static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock, *rx_sock = vhost_net_stop_vq(n, &n->vqs[VHOST_NET_VQ_RX].vq); } -static void vhost_net_flush_vq(struct vhost_net *n, int index) -{ - vhost_work_dev_flush(n->poll[index].dev); - vhost_work_dev_flush(n->vqs[index].vq.poll.dev); -} - static void vhost_net_flush(struct vhost_net *n) { - vhost_net_flush_vq(n, VHOST_NET_VQ_TX); - vhost_net_flush_vq(n, VHOST_NET_VQ_RX); + vhost_work_dev_flush(&n->dev); if (n->vqs[VHOST_NET_VQ_TX].ubufs) { mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); n->tx_flush = true; @@ -1572,7 +1565,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) } if (oldsock) { - vhost_net_flush_vq(n, index); + vhost_work_dev_flush(&n->dev); sockfd_put(oldsock); } -- cgit v1.2.3 From c5514758ddd9a774d879b098bf7364c45a76bed5 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 17 May 2022 13:08:46 -0500 Subject: vhost_test: remove vhost_test_flush_vq() vhost_test_flush_vq() just a simple wrapper around vhost_work_dev_flush() which seems have no value. It's just easier to call vhost_work_dev_flush() directly. Besides there is no point in obtaining vhost_dev pointer via 'n->vqs[index].poll.dev' while we can just use &n->dev. It's the same pointers, see vhost_test_open()/vhost_dev_init(). Signed-off-by: Andrey Ryabinin Signed-off-by: Mike Christie Reviewed-by: Chaitanya Kulkarni Acked-by: Jason Wang Message-Id: <20220517180850.198915-5-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella --- drivers/vhost/test.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index f0ac9e35f5d6..de39151366c5 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -144,14 +144,9 @@ static void vhost_test_stop(struct vhost_test *n, void **privatep) *privatep = vhost_test_stop_vq(n, n->vqs + VHOST_TEST_VQ); } -static void vhost_test_flush_vq(struct vhost_test *n, int index) -{ - vhost_work_dev_flush(n->vqs[index].poll.dev); -} - static void vhost_test_flush(struct vhost_test *n) { - vhost_test_flush_vq(n, VHOST_TEST_VQ); + vhost_work_dev_flush(&n->dev); } static int vhost_test_release(struct inode *inode, struct file *f) @@ -210,7 +205,7 @@ static long vhost_test_run(struct vhost_test *n, int test) goto err; if (oldpriv) { - vhost_test_flush_vq(n, index); + vhost_test_flush(n); } } @@ -303,7 +298,7 @@ static long vhost_test_set_backend(struct vhost_test *n, unsigned index, int fd) mutex_unlock(&vq->mutex); if (enable) { - vhost_test_flush_vq(n, index); + vhost_test_flush(n); } mutex_unlock(&n->dev.mutex); -- cgit v1.2.3 From 15538ba5ffaa5cbdfd11161ded503b27f5c1f0af Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 17 May 2022 13:08:47 -0500 Subject: vhost_vsock: simplify vhost_vsock_flush() vhost_vsock_flush() calls vhost_work_dev_flush(vsock->vqs[i].poll.dev) before vhost_work_dev_flush(&vsock->dev). This seems pointless as vsock->vqs[i].poll.dev is the same as &vsock->dev and several flushes in a row doesn't do anything useful, one is just enough. Signed-off-by: Andrey Ryabinin Reviewed-by: Stefano Garzarella Signed-off-by: Mike Christie Acked-by: Jason Wang Message-Id: <20220517180850.198915-6-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vsock.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers') diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index a4c8ae92a0fb..96be63697117 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -705,11 +705,6 @@ out: static void vhost_vsock_flush(struct vhost_vsock *vsock) { - int i; - - for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) - if (vsock->vqs[i].handle_kick) - vhost_work_dev_flush(vsock->vqs[i].poll.dev); vhost_work_dev_flush(&vsock->dev); } -- cgit v1.2.3 From c3d284cf789ddf7a99ed7f414092ed9ea75aa883 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 17 May 2022 13:08:48 -0500 Subject: vhost-scsi: drop flush after vhost_dev_cleanup The flush after vhost_dev_cleanup is not needed because: 1. It doesn't do anything. vhost_dev_cleanup will stop the worker thread so the flush call will just return since the worker has not device. 2. It's not needed for the re-queue case. vhost_scsi_evt_handle_kick grabs the mutex and if the backend is NULL will return without queueing a work. vhost_scsi_clear_endpoint will set the backend to NULL under the vq->mutex then drops the mutex and does a flush. So we know when vhost_scsi_clear_endpoint has dropped the mutex after clearing the backend no evt related work will be able to requeue. The flush would then make sure any queued evts are run and return. Signed-off-by: Mike Christie Acked-by: Jason Wang Message-Id: <20220517180850.198915-7-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers') diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 532e204f2b1b..94535c813ef7 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1827,8 +1827,6 @@ static int vhost_scsi_release(struct inode *inode, struct file *f) vhost_scsi_clear_endpoint(vs, &t); vhost_dev_stop(&vs->dev); vhost_dev_cleanup(&vs->dev); - /* Jobs can re-queue themselves in evt kick handler. Do extra flush. */ - vhost_scsi_flush(vs); kfree(vs->dev.vqs); kvfree(vs); return 0; -- cgit v1.2.3 From f3a1aad9a448e40d4833913cbd50aeec538a74fe Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 17 May 2022 13:08:49 -0500 Subject: vhost-test: drop flush after vhost_dev_cleanup The flush after vhost_dev_cleanup is not needed because: 1. It doesn't do anything. vhost_dev_cleanup will stop the worker thread so the flush call will just return since the worker has not device. 2. It's not needed. The comment about jobs re-queueing themselves does not look correct because handle_vq does not requeue work. Signed-off-by: Mike Christie Acked-by: Jason Wang Message-Id: <20220517180850.198915-8-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/test.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers') diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index de39151366c5..6c139f18bc54 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -158,9 +158,6 @@ static int vhost_test_release(struct inode *inode, struct file *f) vhost_test_flush(n); vhost_dev_stop(&n->dev); vhost_dev_cleanup(&n->dev); - /* We do an extra flush before freeing memory, - * since jobs can re-queue themselves. */ - vhost_test_flush(n); kfree(n->dev.vqs); kfree(n); return 0; -- cgit v1.2.3 From b2ffa407ed5dd931d6b0657cc8824aa0f4e73a7a Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 17 May 2022 13:08:50 -0500 Subject: vhost: rename vhost_work_dev_flush This patch renames vhost_work_dev_flush to just vhost_dev_flush to relfect that it flushes everything on the device and that drivers don't know/care that polls are based on vhost_works. Drivers just flush the entire device and polls, and works for vhost-scsi management TMFs and IO net virtqueues, etc all are flushed. Signed-off-by: Mike Christie Acked-by: Jason Wang Reviewed-by: Stefano Garzarella Message-Id: <20220517180850.198915-9-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 4 ++-- drivers/vhost/scsi.c | 2 +- drivers/vhost/test.c | 2 +- drivers/vhost/vhost.c | 10 +++++----- drivers/vhost/vhost.h | 2 +- drivers/vhost/vsock.c | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 20eb076300bb..68e4ecd1cc0e 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1376,7 +1376,7 @@ static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock, static void vhost_net_flush(struct vhost_net *n) { - vhost_work_dev_flush(&n->dev); + vhost_dev_flush(&n->dev); if (n->vqs[VHOST_NET_VQ_TX].ubufs) { mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); n->tx_flush = true; @@ -1565,7 +1565,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) } if (oldsock) { - vhost_work_dev_flush(&n->dev); + vhost_dev_flush(&n->dev); sockfd_put(oldsock); } diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 94535c813ef7..ffd9e6c2ffc1 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1436,7 +1436,7 @@ static void vhost_scsi_flush(struct vhost_scsi *vs) kref_put(&old_inflight[i]->kref, vhost_scsi_done_inflight); /* Flush both the vhost poll and vhost work */ - vhost_work_dev_flush(&vs->dev); + vhost_dev_flush(&vs->dev); /* Wait for all reqs issued before the flush to be finished */ for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 6c139f18bc54..bc8e7fb1e635 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -146,7 +146,7 @@ static void vhost_test_stop(struct vhost_test *n, void **privatep) static void vhost_test_flush(struct vhost_test *n) { - vhost_work_dev_flush(&n->dev); + vhost_dev_flush(&n->dev); } static int vhost_test_release(struct inode *inode, struct file *f) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index d82b9394d89a..40097826cff0 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -231,7 +231,7 @@ void vhost_poll_stop(struct vhost_poll *poll) } EXPORT_SYMBOL_GPL(vhost_poll_stop); -void vhost_work_dev_flush(struct vhost_dev *dev) +void vhost_dev_flush(struct vhost_dev *dev) { struct vhost_flush_struct flush; @@ -243,7 +243,7 @@ void vhost_work_dev_flush(struct vhost_dev *dev) wait_for_completion(&flush.wait_event); } } -EXPORT_SYMBOL_GPL(vhost_work_dev_flush); +EXPORT_SYMBOL_GPL(vhost_dev_flush); void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) { @@ -530,7 +530,7 @@ static int vhost_attach_cgroups(struct vhost_dev *dev) attach.owner = current; vhost_work_init(&attach.work, vhost_attach_cgroups_work); vhost_work_queue(dev, &attach.work); - vhost_work_dev_flush(dev); + vhost_dev_flush(dev); return attach.ret; } @@ -657,7 +657,7 @@ void vhost_dev_stop(struct vhost_dev *dev) vhost_poll_stop(&dev->vqs[i]->poll); } - vhost_work_dev_flush(dev); + vhost_dev_flush(dev); } EXPORT_SYMBOL_GPL(vhost_dev_stop); @@ -1724,7 +1724,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg mutex_unlock(&vq->mutex); if (pollstop && vq->handle_kick) - vhost_work_dev_flush(vq->poll.dev); + vhost_dev_flush(vq->poll.dev); return r; } EXPORT_SYMBOL_GPL(vhost_vring_ioctl); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index b85410124305..d9109107af08 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -45,7 +45,7 @@ void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, int vhost_poll_start(struct vhost_poll *poll, struct file *file); void vhost_poll_stop(struct vhost_poll *poll); void vhost_poll_queue(struct vhost_poll *poll); -void vhost_work_dev_flush(struct vhost_dev *dev); +void vhost_dev_flush(struct vhost_dev *dev); struct vhost_log { u64 addr; diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 96be63697117..368330417bde 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -705,7 +705,7 @@ out: static void vhost_vsock_flush(struct vhost_vsock *vsock) { - vhost_work_dev_flush(&vsock->dev); + vhost_dev_flush(&vsock->dev); } static void vhost_vsock_reset_orphans(struct sock *sk) -- cgit v1.2.3 From 7becdd13b640a6f91219ae3f201afa03ed67876b Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 11 Apr 2022 15:29:40 +0300 Subject: vdpa/mlx5: Remove flow counter from steering The flow counter has been introduced in early versions of the driver to aid in debugging. It is no longer needed and can harm performance. Remove it. Signed-off-by: Eli Cohen Message-Id: <20220411122942.225717-2-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) (limited to 'drivers') diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index dcca782c698e..40fdc242bd61 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -157,7 +157,6 @@ struct mlx5_vdpa_net { */ struct rw_semaphore reslock; struct mlx5_flow_table *rxft; - struct mlx5_fc *rx_counter; struct mlx5_flow_handle *rx_rule_ucast; struct mlx5_flow_handle *rx_rule_mcast; bool setup; @@ -1406,7 +1405,7 @@ static void destroy_tir(struct mlx5_vdpa_net *ndev) static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev) { - struct mlx5_flow_destination dest[2] = {}; + struct mlx5_flow_destination dest = {}; struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_act flow_act = {}; struct mlx5_flow_namespace *ns; @@ -1438,12 +1437,6 @@ static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev) goto err_ns; } - ndev->rx_counter = mlx5_fc_create(ndev->mvdev.mdev, false); - if (IS_ERR(ndev->rx_counter)) { - err = PTR_ERR(ndev->rx_counter); - goto err_fc; - } - headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16); memset(dmac_c, 0xff, ETH_ALEN); @@ -1451,12 +1444,10 @@ static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev) dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16); ether_addr_copy(dmac_v, ndev->config.mac); - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; - dest[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR; - dest[0].tir_num = ndev->res.tirn; - dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest[1].counter_id = mlx5_fc_id(ndev->rx_counter); - ndev->rx_rule_ucast = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dest, 2); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = ndev->res.tirn; + ndev->rx_rule_ucast = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1); if (IS_ERR(ndev->rx_rule_ucast)) { err = PTR_ERR(ndev->rx_rule_ucast); @@ -1469,7 +1460,7 @@ static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev) dmac_c[0] = 1; dmac_v[0] = 1; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - ndev->rx_rule_mcast = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dest, 1); + ndev->rx_rule_mcast = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1); if (IS_ERR(ndev->rx_rule_mcast)) { err = PTR_ERR(ndev->rx_rule_mcast); ndev->rx_rule_mcast = NULL; @@ -1483,8 +1474,6 @@ err_rule_mcast: mlx5_del_flow_rules(ndev->rx_rule_ucast); ndev->rx_rule_ucast = NULL; err_rule_ucast: - mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter); -err_fc: mlx5_destroy_flow_table(ndev->rxft); err_ns: kvfree(spec); @@ -1500,7 +1489,6 @@ static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev) ndev->rx_rule_mcast = NULL; mlx5_del_flow_rules(ndev->rx_rule_ucast); ndev->rx_rule_ucast = NULL; - mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter); mlx5_destroy_flow_table(ndev->rxft); } -- cgit v1.2.3 From baf2ad3f6a985354293e371b9ba12b162d639e29 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 11 Apr 2022 15:29:42 +0300 Subject: vdpa/mlx5: Add RX MAC VLAN filter support Support HW offloaded filtering of MAC/VLAN packets. To allow that, we add a handler to handle VLAN configurations coming through the control VQ. Two operations are supported. 1. Adding VLAN - in this case, an entry will be added to the RX flow table that will allow the combination of the MAC/VLAN to be forwarded to the TIR. 2. Removing VLAN - will remove the entry from the flow table, effectively blocking such packets from going through. Currently the control VQ does not propagate changes to the MAC of the VLAN device so we always use the MAC of the parent device. Examples: 1. Create vlan device: $ ip link add link ens1 name ens1.8 type vlan id 8 Signed-off-by: Eli Cohen Message-Id: <20220411122942.225717-4-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 274 ++++++++++++++++++++++++++++++-------- 1 file changed, 216 insertions(+), 58 deletions(-) (limited to 'drivers') diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 40fdc242bd61..b7a955479156 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -48,6 +48,8 @@ MODULE_LICENSE("Dual BSD/GPL"); #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature))) +#define MLX5V_UNTAGGED 0x1000 + struct mlx5_vdpa_net_resources { u32 tisn; u32 tdn; @@ -144,6 +146,8 @@ static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx) return idx <= mvdev->max_idx; } +#define MLX5V_MACVLAN_SIZE 256 + struct mlx5_vdpa_net { struct mlx5_vdpa_dev mvdev; struct mlx5_vdpa_net_resources res; @@ -157,14 +161,20 @@ struct mlx5_vdpa_net { */ struct rw_semaphore reslock; struct mlx5_flow_table *rxft; - struct mlx5_flow_handle *rx_rule_ucast; - struct mlx5_flow_handle *rx_rule_mcast; bool setup; u32 cur_num_vqs; u32 rqt_size; struct notifier_block nb; struct vdpa_callback config_cb; struct mlx5_vdpa_wq_ent cvq_ent; + struct hlist_head macvlan_hash[MLX5V_MACVLAN_SIZE]; +}; + +struct macvlan_node { + struct hlist_node hlist; + struct mlx5_flow_handle *ucast_rule; + struct mlx5_flow_handle *mcast_rule; + u64 macvlan; }; static void free_resources(struct mlx5_vdpa_net *ndev); @@ -1403,12 +1413,17 @@ static void destroy_tir(struct mlx5_vdpa_net *ndev) mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn); } -static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev) +#define MAX_STEERING_ENT 0x8000 +#define MAX_STEERING_GROUPS 2 + +static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac, + u16 vid, bool tagged, + struct mlx5_flow_handle **ucast, + struct mlx5_flow_handle **mcast) { struct mlx5_flow_destination dest = {}; - struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_act flow_act = {}; - struct mlx5_flow_namespace *ns; + struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; void *headers_c; void *headers_v; @@ -1421,74 +1436,178 @@ static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev) return -ENOMEM; spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - ft_attr.max_fte = 2; - ft_attr.autogroup.max_num_groups = 2; - - ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS); - if (!ns) { - mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n"); - err = -EOPNOTSUPP; - goto err_ns; - } - - ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); - if (IS_ERR(ndev->rxft)) { - err = PTR_ERR(ndev->rxft); - goto err_ns; - } - headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); - dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16); - memset(dmac_c, 0xff, ETH_ALEN); headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16); dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16); - ether_addr_copy(dmac_v, ndev->config.mac); - + memset(dmac_c, 0xff, ETH_ALEN); + ether_addr_copy(dmac_v, mac); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); + if (tagged) { + MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, vid); + } flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; dest.tir_num = ndev->res.tirn; - ndev->rx_rule_ucast = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1); + rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) + return PTR_ERR(rule); - if (IS_ERR(ndev->rx_rule_ucast)) { - err = PTR_ERR(ndev->rx_rule_ucast); - ndev->rx_rule_ucast = NULL; - goto err_rule_ucast; - } + *ucast = rule; memset(dmac_c, 0, ETH_ALEN); memset(dmac_v, 0, ETH_ALEN); dmac_c[0] = 1; dmac_v[0] = 1; - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - ndev->rx_rule_mcast = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1); - if (IS_ERR(ndev->rx_rule_mcast)) { - err = PTR_ERR(ndev->rx_rule_mcast); - ndev->rx_rule_mcast = NULL; - goto err_rule_mcast; + rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, &dest, 1); + kvfree(spec); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto err_mcast; } - kvfree(spec); + *mcast = rule; return 0; -err_rule_mcast: - mlx5_del_flow_rules(ndev->rx_rule_ucast); - ndev->rx_rule_ucast = NULL; -err_rule_ucast: - mlx5_destroy_flow_table(ndev->rxft); -err_ns: - kvfree(spec); +err_mcast: + mlx5_del_flow_rules(*ucast); + return err; +} + +static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev, + struct mlx5_flow_handle *ucast, + struct mlx5_flow_handle *mcast) +{ + mlx5_del_flow_rules(ucast); + mlx5_del_flow_rules(mcast); +} + +static u64 search_val(u8 *mac, u16 vlan, bool tagged) +{ + u64 val; + + if (!tagged) + vlan = MLX5V_UNTAGGED; + + val = (u64)vlan << 48 | + (u64)mac[0] << 40 | + (u64)mac[1] << 32 | + (u64)mac[2] << 24 | + (u64)mac[3] << 16 | + (u64)mac[4] << 8 | + (u64)mac[5]; + + return val; +} + +static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value) +{ + struct macvlan_node *pos; + u32 idx; + + idx = hash_64(value, 8); // tbd 8 + hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) { + if (pos->macvlan == value) + return pos; + } + return NULL; +} + +static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged) // vlan -> vid +{ + struct macvlan_node *ptr; + u64 val; + u32 idx; + int err; + + val = search_val(mac, vlan, tagged); + if (mac_vlan_lookup(ndev, val)) + return -EEXIST; + + ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return -ENOMEM; + + err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, vlan, tagged, + &ptr->ucast_rule, &ptr->mcast_rule); + if (err) + goto err_add; + + ptr->macvlan = val; + idx = hash_64(val, 8); + hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]); + return 0; + +err_add: + kfree(ptr); return err; } -static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev) +static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged) { - if (!ndev->rx_rule_ucast) + struct macvlan_node *ptr; + + ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged)); + if (!ptr) return; - mlx5_del_flow_rules(ndev->rx_rule_mcast); - ndev->rx_rule_mcast = NULL; - mlx5_del_flow_rules(ndev->rx_rule_ucast); - ndev->rx_rule_ucast = NULL; + hlist_del(&ptr->hlist); + mlx5_vdpa_del_mac_vlan_rules(ndev, ptr->ucast_rule, ptr->mcast_rule); + kfree(ptr); +} + +static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev) +{ + struct macvlan_node *pos; + struct hlist_node *n; + int i; + + for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) { + hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) { + hlist_del(&pos->hlist); + mlx5_vdpa_del_mac_vlan_rules(ndev, pos->ucast_rule, pos->mcast_rule); + kfree(pos); + } + } +} + +static int setup_steering(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns; + int err; + + ft_attr.max_fte = MAX_STEERING_ENT; + ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS; + + ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS); + if (!ns) { + mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n"); + return -EOPNOTSUPP; + } + + ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(ndev->rxft)) { + mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n"); + return PTR_ERR(ndev->rxft); + } + + err = mac_vlan_add(ndev, ndev->config.mac, 0, false); + if (err) + goto err_add; + + return 0; + +err_add: + mlx5_destroy_flow_table(ndev->rxft); + return err; +} + +static void teardown_steering(struct mlx5_vdpa_net *ndev) +{ + clear_mac_vlan_table(ndev); mlx5_destroy_flow_table(ndev->rxft); } @@ -1539,9 +1658,9 @@ static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) /* Need recreate the flow table entry, so that the packet could forward back */ - remove_fwd_to_tir(ndev); + mac_vlan_del(ndev, ndev->config.mac, 0, false); - if (add_fwd_to_tir(ndev)) { + if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) { mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n"); /* Although it hardly run here, we still need double check */ @@ -1565,7 +1684,7 @@ static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) memcpy(ndev->config.mac, mac_back, ETH_ALEN); - if (add_fwd_to_tir(ndev)) + if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n"); break; @@ -1667,6 +1786,42 @@ static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd) return status; } +static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd) +{ + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; + struct mlx5_control_vq *cvq = &mvdev->cvq; + __virtio16 vlan; + size_t read; + u16 id; + + switch (cmd) { + case VIRTIO_NET_CTRL_VLAN_ADD: + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan)); + if (read != sizeof(vlan)) + break; + + id = mlx5vdpa16_to_cpu(mvdev, vlan); + if (mac_vlan_add(ndev, ndev->config.mac, id, true)) + break; + + status = VIRTIO_NET_OK; + break; + case VIRTIO_NET_CTRL_VLAN_DEL: + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan)); + if (read != sizeof(vlan)) + break; + + id = mlx5vdpa16_to_cpu(mvdev, vlan); + mac_vlan_del(ndev, ndev->config.mac, id, true); + break; + default: + break; +} + +return status; +} + static void mlx5_cvq_kick_handler(struct work_struct *work) { virtio_net_ctrl_ack status = VIRTIO_NET_ERR; @@ -1712,7 +1867,9 @@ static void mlx5_cvq_kick_handler(struct work_struct *work) case VIRTIO_NET_CTRL_MQ: status = handle_ctrl_mq(mvdev, ctrl.cmd); break; - + case VIRTIO_NET_CTRL_VLAN: + status = handle_ctrl_vlan(mvdev, ctrl.cmd); + break; default: break; } @@ -1977,6 +2134,7 @@ static u64 get_supported_features(struct mlx5_core_dev *mdev) mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ); mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS); mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU); + mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN); return mlx_vdpa_features; } @@ -2262,9 +2420,9 @@ static int setup_driver(struct mlx5_vdpa_dev *mvdev) goto err_tir; } - err = add_fwd_to_tir(ndev); + err = setup_steering(ndev); if (err) { - mlx5_vdpa_warn(mvdev, "add_fwd_to_tir\n"); + mlx5_vdpa_warn(mvdev, "setup_steering\n"); goto err_fwd; } ndev->setup = true; @@ -2290,7 +2448,7 @@ static void teardown_driver(struct mlx5_vdpa_net *ndev) if (!ndev->setup) return; - remove_fwd_to_tir(ndev); + teardown_steering(ndev); destroy_tir(ndev); destroy_rqt(ndev); teardown_virtqueues(ndev); -- cgit v1.2.3 From bd8bb9aed56b1814784a975e2dfea12a9adcee92 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 24 May 2022 13:55:57 +0800 Subject: vdpa: ifcvf: set pci driver data in probe We should set the pci driver data in probe instead of the vdpa device adding callback. Otherwise if no vDPA device is created we will lose the pointer to the management device. Fixes: 6b5df347c6482 ("vDPA/ifcvf: implement management netlink framework for ifcvf") Tested-by: Zheyu Ma Signed-off-by: Jason Wang Message-Id: <20220524055557.1938-1-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/ifcvf/ifcvf_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index 750e5f23406d..0a5670729412 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -771,7 +771,6 @@ static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, } ifcvf_mgmt_dev->adapter = adapter; - pci_set_drvdata(pdev, ifcvf_mgmt_dev); vf = &adapter->vf; vf->dev_type = get_dev_type(pdev); @@ -886,6 +885,8 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err; } + pci_set_drvdata(pdev, ifcvf_mgmt_dev); + return 0; err: -- cgit v1.2.3