diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-15 23:21:13 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-15 23:21:13 +0300 |
commit | 4138f02288333cb596885e9af03dd3ea2de845cb (patch) | |
tree | 96567ff0801b022508baa3d42127f721126421e9 /drivers/vfio/pci/mlx5/cmd.c | |
parent | 4f712ee0cbbd5c777d270427092bb301fc31044f (diff) | |
parent | 7447d911af699a15f8d050dfcb7c680a86f87012 (diff) | |
download | linux-4138f02288333cb596885e9af03dd3ea2de845cb.tar.xz |
Merge tag 'vfio-v6.9-rc1' of https://github.com/awilliam/linux-vfio
Pull VFIO updates from Alex Williamson:
- Add warning in unlikely case that device is not captured with
driver_override (Kunwu Chan)
- Error handling improvements in mlx5-vfio-pci to detect firmware
tracking object error states, logging of firmware error syndrom, and
releasing of firmware resources in aborted migration sequence (Yishai
Hadas)
- Correct an un-alphabetized VFIO MAINTAINERS entry (Alex Williamson)
- Make the mdev_bus_type const and also make the class struct const for
a couple of the vfio-mdev sample drivers (Ricardo B. Marliere)
- Addition of a new vfio-pci variant driver for the GPU of NVIDIA's
Grace-Hopper superchip. During initialization of the chip-to-chip
interconnect in this hardware module, the PCI BARs of the device
become unused in favor of a faster, coherent mechanism for exposing
device memory. This driver primarily changes the VFIO representation
of the device to masquerade this coherent aperture to replace the
physical PCI BARs for userspace drivers. This also incorporates use
of a new vma flag allowing KVM to use write combining attributes for
uncached device memory (Ankit Agrawal)
- Reset fixes and cleanups for the pds-vfio-pci driver. Save and
restore files were previously leaked if the device didn't pass
through an error state, this is resolved and later re-fixed to
prevent access to the now freed files. Reset handling is also
refactored to remove the complicated deferred reset mechanism (Brett
Creeley)
- Remove some references to pl330 in the vfio-platform amba driver
(Geert Uytterhoeven)
- Remove twice redundant and ugly code to unpin incidental pins of the
zero-page (Alex Williamson)
- Deferred reset logic is also removed from the hisi-acc-vfio-pci
driver as a simplification (Shameer Kolothum)
- Enforce that mlx5-vfio-pci devices must support PRE_COPY and remove
resulting unnecessary code. There is no device firmware that has been
available publicly without this support (Yishai Hadas)
- Switch over to using the .remove_new callback for vfio-platform in
support of the broader transition for a void remove function (Uwe
Kleine-König)
- Resolve multiple issues in interrupt code for VFIO bus drivers that
allow calling eventfd_signal() on a NULL context. This also remove a
potential race in INTx setup on certain hardware for vfio-pci, races
with various mechanisms to mask INTx, and leaked virqfds in
vfio-platform (Alex Williamson)
* tag 'vfio-v6.9-rc1' of https://github.com/awilliam/linux-vfio: (29 commits)
vfio/fsl-mc: Block calling interrupt handler without trigger
vfio/platform: Create persistent IRQ handlers
vfio/platform: Disable virqfds on cleanup
vfio/pci: Create persistent INTx handler
vfio: Introduce interface to flush virqfd inject workqueue
vfio/pci: Lock external INTx masking ops
vfio/pci: Disable auto-enable of exclusive INTx IRQ
vfio/pds: Refactor/simplify reset logic
vfio/pds: Make sure migration file isn't accessed after reset
vfio/platform: Convert to platform remove callback returning void
vfio/mlx5: Enforce PRE_COPY support
vfio/mbochs: make mbochs_class constant
vfio/mdpy: make mdpy_class constant
hisi_acc_vfio_pci: Remove the deferred_reset logic
Revert "vfio/type1: Unpin zero pages"
vfio/nvgrace-gpu: Convey kvm to map device memory region as noncached
vfio: amba: Rename pl330_ids[] to vfio_amba_ids[]
vfio/pds: Always clear the save/restore FDs on reset
vfio/nvgrace-gpu: Add vfio pci variant module for grace hopper
vfio/pci: rename and export range_intersect_range
...
Diffstat (limited to 'drivers/vfio/pci/mlx5/cmd.c')
-rw-r--r-- | drivers/vfio/pci/mlx5/cmd.c | 157 |
1 files changed, 132 insertions, 25 deletions
diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index efd1d252cdc9..41a4b0cf4297 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -108,8 +108,9 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, ret = wait_for_completion_interruptible(&mvdev->saving_migf->save_comp); if (ret) return ret; - if (mvdev->saving_migf->state == - MLX5_MIGF_STATE_PRE_COPY_ERROR) { + /* Upon cleanup, ignore previous pre_copy error state */ + if (mvdev->saving_migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR && + !(query_flags & MLX5VF_QUERY_CLEANUP)) { /* * In case we had a PRE_COPY error, only query full * image for final image @@ -121,6 +122,11 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, } query_flags &= ~MLX5VF_QUERY_INC; } + /* Block incremental query which is state-dependent */ + if (mvdev->saving_migf->state == MLX5_MIGF_STATE_ERROR) { + complete(&mvdev->saving_migf->save_comp); + return -ENODEV; + } } MLX5_SET(query_vhca_migration_state_in, in, opcode, @@ -149,6 +155,12 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, return 0; } +static void set_tracker_change_event(struct mlx5vf_pci_core_device *mvdev) +{ + mvdev->tracker.object_changed = true; + complete(&mvdev->tracker_comp); +} + static void set_tracker_error(struct mlx5vf_pci_core_device *mvdev) { /* Mark the tracker under an error and wake it up if it's running */ @@ -189,7 +201,7 @@ void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev) /* Must be done outside the lock to let it progress */ set_tracker_error(mvdev); mutex_lock(&mvdev->state_mutex); - mlx5vf_disable_fds(mvdev); + mlx5vf_disable_fds(mvdev, NULL); _mlx5vf_free_page_tracker_resources(mvdev); mlx5vf_state_mutex_unlock(mvdev); } @@ -221,6 +233,10 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, if (!MLX5_CAP_GEN(mvdev->mdev, migration)) goto end; + if (!(MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) && + MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state))) + goto end; + mvdev->vf_id = pci_iov_vf_id(pdev); if (mvdev->vf_id < 0) goto end; @@ -250,17 +266,14 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, mvdev->migrate_cap = 1; mvdev->core_device.vdev.migration_flags = VFIO_MIGRATION_STOP_COPY | - VFIO_MIGRATION_P2P; + VFIO_MIGRATION_P2P | + VFIO_MIGRATION_PRE_COPY; + mvdev->core_device.vdev.mig_ops = mig_ops; init_completion(&mvdev->tracker_comp); if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization)) mvdev->core_device.vdev.log_ops = log_ops; - if (MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) && - MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state)) - mvdev->core_device.vdev.migration_flags |= - VFIO_MIGRATION_PRE_COPY; - if (MLX5_CAP_GEN_2(mvdev->mdev, migration_in_chunks)) mvdev->chunk_mode = 1; @@ -402,6 +415,50 @@ void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf) kfree(buf); } +static int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, + unsigned int npages) +{ + unsigned int to_alloc = npages; + struct page **page_list; + unsigned long filled; + unsigned int to_fill; + int ret; + + to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list)); + page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT); + if (!page_list) + return -ENOMEM; + + do { + filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill, + page_list); + if (!filled) { + ret = -ENOMEM; + goto err; + } + to_alloc -= filled; + ret = sg_alloc_append_table_from_pages( + &buf->table, page_list, filled, 0, + filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC, + GFP_KERNEL_ACCOUNT); + + if (ret) + goto err; + buf->allocated_length += filled * PAGE_SIZE; + /* clean input for another bulk allocation */ + memset(page_list, 0, filled * sizeof(*page_list)); + to_fill = min_t(unsigned int, to_alloc, + PAGE_SIZE / sizeof(*page_list)); + } while (to_alloc > 0); + + kvfree(page_list); + return 0; + +err: + kvfree(page_list); + return ret; +} + struct mlx5_vhca_data_buffer * mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, size_t length, @@ -608,8 +665,13 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) err: /* The error flow can't run from an interrupt context */ - if (status == -EREMOTEIO) + if (status == -EREMOTEIO) { status = MLX5_GET(save_vhca_state_out, async_data->out, status); + /* Failed in FW, print cmd out failure details */ + mlx5_cmd_out_err(migf->mvdev->mdev, MLX5_CMD_OP_SAVE_VHCA_STATE, 0, + async_data->out); + } + async_data->status = status; queue_work(migf->mvdev->cb_wq, &async_data->work); } @@ -623,6 +685,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {}; struct mlx5_vhca_data_buffer *header_buf = NULL; struct mlx5vf_async_data *async_data; + bool pre_copy_cleanup = false; int err; lockdep_assert_held(&mvdev->state_mutex); @@ -633,6 +696,10 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, if (err) return err; + if ((migf->state == MLX5_MIGF_STATE_PRE_COPY || + migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR) && !track && !inc) + pre_copy_cleanup = true; + if (migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR) /* * In case we had a PRE_COPY error, SAVE is triggered only for @@ -651,29 +718,27 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, async_data = &migf->async_data; async_data->buf = buf; - async_data->stop_copy_chunk = !track; + async_data->stop_copy_chunk = (!track && !pre_copy_cleanup); async_data->out = kvzalloc(out_size, GFP_KERNEL); if (!async_data->out) { err = -ENOMEM; goto err_out; } - if (MLX5VF_PRE_COPY_SUPP(mvdev)) { - if (async_data->stop_copy_chunk) { - u8 header_idx = buf->stop_copy_chunk_num ? - buf->stop_copy_chunk_num - 1 : 0; + if (async_data->stop_copy_chunk) { + u8 header_idx = buf->stop_copy_chunk_num ? + buf->stop_copy_chunk_num - 1 : 0; - header_buf = migf->buf_header[header_idx]; - migf->buf_header[header_idx] = NULL; - } + header_buf = migf->buf_header[header_idx]; + migf->buf_header[header_idx] = NULL; + } - if (!header_buf) { - header_buf = mlx5vf_get_data_buffer(migf, - sizeof(struct mlx5_vf_migration_header), DMA_NONE); - if (IS_ERR(header_buf)) { - err = PTR_ERR(header_buf); - goto err_free; - } + if (!header_buf) { + header_buf = mlx5vf_get_data_buffer(migf, + sizeof(struct mlx5_vf_migration_header), DMA_NONE); + if (IS_ERR(header_buf)) { + err = PTR_ERR(header_buf); + goto err_free; } } @@ -900,6 +965,29 @@ static int mlx5vf_cmd_modify_tracker(struct mlx5_core_dev *mdev, return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); } +static int mlx5vf_cmd_query_tracker(struct mlx5_core_dev *mdev, + struct mlx5_vhca_page_tracker *tracker) +{ + u32 out[MLX5_ST_SZ_DW(query_page_track_obj_out)] = {}; + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + void *obj_context; + void *cmd_hdr; + int err; + + cmd_hdr = MLX5_ADDR_OF(modify_page_track_obj_in, in, general_obj_in_cmd_hdr); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, tracker->id); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + obj_context = MLX5_ADDR_OF(query_page_track_obj_out, out, obj_context); + tracker->status = MLX5_GET(page_track, obj_context, state); + return 0; +} + static int alloc_cq_frag_buf(struct mlx5_core_dev *mdev, struct mlx5_vhca_cq_buf *buf, int nent, int cqe_size) @@ -957,9 +1045,11 @@ static int mlx5vf_event_notifier(struct notifier_block *nb, unsigned long type, mlx5_nb_cof(nb, struct mlx5_vhca_page_tracker, nb); struct mlx5vf_pci_core_device *mvdev = container_of( tracker, struct mlx5vf_pci_core_device, tracker); + struct mlx5_eqe_obj_change *object; struct mlx5_eqe *eqe = data; u8 event_type = (u8)type; u8 queue_type; + u32 obj_id; int qp_num; switch (event_type) { @@ -975,6 +1065,12 @@ static int mlx5vf_event_notifier(struct notifier_block *nb, unsigned long type, break; set_tracker_error(mvdev); break; + case MLX5_EVENT_TYPE_OBJECT_CHANGE: + object = &eqe->data.obj_change; + obj_id = be32_to_cpu(object->obj_id); + if (obj_id == tracker->id) + set_tracker_change_event(mvdev); + break; default: break; } @@ -1634,6 +1730,11 @@ int mlx5vf_tracker_read_and_clear(struct vfio_device *vdev, unsigned long iova, goto end; } + if (tracker->is_err) { + err = -EIO; + goto end; + } + mdev = mvdev->mdev; err = mlx5vf_cmd_modify_tracker(mdev, tracker->id, iova, length, MLX5_PAGE_TRACK_STATE_REPORTING); @@ -1652,6 +1753,12 @@ int mlx5vf_tracker_read_and_clear(struct vfio_device *vdev, unsigned long iova, dirty, &tracker->status); if (poll_err == CQ_EMPTY) { wait_for_completion(&mvdev->tracker_comp); + if (tracker->object_changed) { + tracker->object_changed = false; + err = mlx5vf_cmd_query_tracker(mdev, tracker); + if (err) + goto end; + } continue; } } |