diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-15 23:21:13 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-15 23:21:13 +0300 |
commit | 4138f02288333cb596885e9af03dd3ea2de845cb (patch) | |
tree | 96567ff0801b022508baa3d42127f721126421e9 /drivers/vfio/pci/mlx5 | |
parent | 4f712ee0cbbd5c777d270427092bb301fc31044f (diff) | |
parent | 7447d911af699a15f8d050dfcb7c680a86f87012 (diff) | |
download | linux-4138f02288333cb596885e9af03dd3ea2de845cb.tar.xz |
Merge tag 'vfio-v6.9-rc1' of https://github.com/awilliam/linux-vfio
Pull VFIO updates from Alex Williamson:
- Add warning in unlikely case that device is not captured with
driver_override (Kunwu Chan)
- Error handling improvements in mlx5-vfio-pci to detect firmware
tracking object error states, logging of firmware error syndrom, and
releasing of firmware resources in aborted migration sequence (Yishai
Hadas)
- Correct an un-alphabetized VFIO MAINTAINERS entry (Alex Williamson)
- Make the mdev_bus_type const and also make the class struct const for
a couple of the vfio-mdev sample drivers (Ricardo B. Marliere)
- Addition of a new vfio-pci variant driver for the GPU of NVIDIA's
Grace-Hopper superchip. During initialization of the chip-to-chip
interconnect in this hardware module, the PCI BARs of the device
become unused in favor of a faster, coherent mechanism for exposing
device memory. This driver primarily changes the VFIO representation
of the device to masquerade this coherent aperture to replace the
physical PCI BARs for userspace drivers. This also incorporates use
of a new vma flag allowing KVM to use write combining attributes for
uncached device memory (Ankit Agrawal)
- Reset fixes and cleanups for the pds-vfio-pci driver. Save and
restore files were previously leaked if the device didn't pass
through an error state, this is resolved and later re-fixed to
prevent access to the now freed files. Reset handling is also
refactored to remove the complicated deferred reset mechanism (Brett
Creeley)
- Remove some references to pl330 in the vfio-platform amba driver
(Geert Uytterhoeven)
- Remove twice redundant and ugly code to unpin incidental pins of the
zero-page (Alex Williamson)
- Deferred reset logic is also removed from the hisi-acc-vfio-pci
driver as a simplification (Shameer Kolothum)
- Enforce that mlx5-vfio-pci devices must support PRE_COPY and remove
resulting unnecessary code. There is no device firmware that has been
available publicly without this support (Yishai Hadas)
- Switch over to using the .remove_new callback for vfio-platform in
support of the broader transition for a void remove function (Uwe
Kleine-König)
- Resolve multiple issues in interrupt code for VFIO bus drivers that
allow calling eventfd_signal() on a NULL context. This also remove a
potential race in INTx setup on certain hardware for vfio-pci, races
with various mechanisms to mask INTx, and leaked virqfds in
vfio-platform (Alex Williamson)
* tag 'vfio-v6.9-rc1' of https://github.com/awilliam/linux-vfio: (29 commits)
vfio/fsl-mc: Block calling interrupt handler without trigger
vfio/platform: Create persistent IRQ handlers
vfio/platform: Disable virqfds on cleanup
vfio/pci: Create persistent INTx handler
vfio: Introduce interface to flush virqfd inject workqueue
vfio/pci: Lock external INTx masking ops
vfio/pci: Disable auto-enable of exclusive INTx IRQ
vfio/pds: Refactor/simplify reset logic
vfio/pds: Make sure migration file isn't accessed after reset
vfio/platform: Convert to platform remove callback returning void
vfio/mlx5: Enforce PRE_COPY support
vfio/mbochs: make mbochs_class constant
vfio/mdpy: make mdpy_class constant
hisi_acc_vfio_pci: Remove the deferred_reset logic
Revert "vfio/type1: Unpin zero pages"
vfio/nvgrace-gpu: Convey kvm to map device memory region as noncached
vfio: amba: Rename pl330_ids[] to vfio_amba_ids[]
vfio/pds: Always clear the save/restore FDs on reset
vfio/nvgrace-gpu: Add vfio pci variant module for grace hopper
vfio/pci: rename and export range_intersect_range
...
Diffstat (limited to 'drivers/vfio/pci/mlx5')
-rw-r--r-- | drivers/vfio/pci/mlx5/cmd.c | 157 | ||||
-rw-r--r-- | drivers/vfio/pci/mlx5/cmd.h | 11 | ||||
-rw-r--r-- | drivers/vfio/pci/mlx5/main.c | 148 |
3 files changed, 176 insertions, 140 deletions
diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index efd1d252cdc9..41a4b0cf4297 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -108,8 +108,9 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, ret = wait_for_completion_interruptible(&mvdev->saving_migf->save_comp); if (ret) return ret; - if (mvdev->saving_migf->state == - MLX5_MIGF_STATE_PRE_COPY_ERROR) { + /* Upon cleanup, ignore previous pre_copy error state */ + if (mvdev->saving_migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR && + !(query_flags & MLX5VF_QUERY_CLEANUP)) { /* * In case we had a PRE_COPY error, only query full * image for final image @@ -121,6 +122,11 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, } query_flags &= ~MLX5VF_QUERY_INC; } + /* Block incremental query which is state-dependent */ + if (mvdev->saving_migf->state == MLX5_MIGF_STATE_ERROR) { + complete(&mvdev->saving_migf->save_comp); + return -ENODEV; + } } MLX5_SET(query_vhca_migration_state_in, in, opcode, @@ -149,6 +155,12 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, return 0; } +static void set_tracker_change_event(struct mlx5vf_pci_core_device *mvdev) +{ + mvdev->tracker.object_changed = true; + complete(&mvdev->tracker_comp); +} + static void set_tracker_error(struct mlx5vf_pci_core_device *mvdev) { /* Mark the tracker under an error and wake it up if it's running */ @@ -189,7 +201,7 @@ void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev) /* Must be done outside the lock to let it progress */ set_tracker_error(mvdev); mutex_lock(&mvdev->state_mutex); - mlx5vf_disable_fds(mvdev); + mlx5vf_disable_fds(mvdev, NULL); _mlx5vf_free_page_tracker_resources(mvdev); mlx5vf_state_mutex_unlock(mvdev); } @@ -221,6 +233,10 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, if (!MLX5_CAP_GEN(mvdev->mdev, migration)) goto end; + if (!(MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) && + MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state))) + goto end; + mvdev->vf_id = pci_iov_vf_id(pdev); if (mvdev->vf_id < 0) goto end; @@ -250,17 +266,14 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, mvdev->migrate_cap = 1; mvdev->core_device.vdev.migration_flags = VFIO_MIGRATION_STOP_COPY | - VFIO_MIGRATION_P2P; + VFIO_MIGRATION_P2P | + VFIO_MIGRATION_PRE_COPY; + mvdev->core_device.vdev.mig_ops = mig_ops; init_completion(&mvdev->tracker_comp); if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization)) mvdev->core_device.vdev.log_ops = log_ops; - if (MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) && - MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state)) - mvdev->core_device.vdev.migration_flags |= - VFIO_MIGRATION_PRE_COPY; - if (MLX5_CAP_GEN_2(mvdev->mdev, migration_in_chunks)) mvdev->chunk_mode = 1; @@ -402,6 +415,50 @@ void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf) kfree(buf); } +static int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, + unsigned int npages) +{ + unsigned int to_alloc = npages; + struct page **page_list; + unsigned long filled; + unsigned int to_fill; + int ret; + + to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list)); + page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT); + if (!page_list) + return -ENOMEM; + + do { + filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill, + page_list); + if (!filled) { + ret = -ENOMEM; + goto err; + } + to_alloc -= filled; + ret = sg_alloc_append_table_from_pages( + &buf->table, page_list, filled, 0, + filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC, + GFP_KERNEL_ACCOUNT); + + if (ret) + goto err; + buf->allocated_length += filled * PAGE_SIZE; + /* clean input for another bulk allocation */ + memset(page_list, 0, filled * sizeof(*page_list)); + to_fill = min_t(unsigned int, to_alloc, + PAGE_SIZE / sizeof(*page_list)); + } while (to_alloc > 0); + + kvfree(page_list); + return 0; + +err: + kvfree(page_list); + return ret; +} + struct mlx5_vhca_data_buffer * mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, size_t length, @@ -608,8 +665,13 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) err: /* The error flow can't run from an interrupt context */ - if (status == -EREMOTEIO) + if (status == -EREMOTEIO) { status = MLX5_GET(save_vhca_state_out, async_data->out, status); + /* Failed in FW, print cmd out failure details */ + mlx5_cmd_out_err(migf->mvdev->mdev, MLX5_CMD_OP_SAVE_VHCA_STATE, 0, + async_data->out); + } + async_data->status = status; queue_work(migf->mvdev->cb_wq, &async_data->work); } @@ -623,6 +685,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {}; struct mlx5_vhca_data_buffer *header_buf = NULL; struct mlx5vf_async_data *async_data; + bool pre_copy_cleanup = false; int err; lockdep_assert_held(&mvdev->state_mutex); @@ -633,6 +696,10 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, if (err) return err; + if ((migf->state == MLX5_MIGF_STATE_PRE_COPY || + migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR) && !track && !inc) + pre_copy_cleanup = true; + if (migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR) /* * In case we had a PRE_COPY error, SAVE is triggered only for @@ -651,29 +718,27 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, async_data = &migf->async_data; async_data->buf = buf; - async_data->stop_copy_chunk = !track; + async_data->stop_copy_chunk = (!track && !pre_copy_cleanup); async_data->out = kvzalloc(out_size, GFP_KERNEL); if (!async_data->out) { err = -ENOMEM; goto err_out; } - if (MLX5VF_PRE_COPY_SUPP(mvdev)) { - if (async_data->stop_copy_chunk) { - u8 header_idx = buf->stop_copy_chunk_num ? - buf->stop_copy_chunk_num - 1 : 0; + if (async_data->stop_copy_chunk) { + u8 header_idx = buf->stop_copy_chunk_num ? + buf->stop_copy_chunk_num - 1 : 0; - header_buf = migf->buf_header[header_idx]; - migf->buf_header[header_idx] = NULL; - } + header_buf = migf->buf_header[header_idx]; + migf->buf_header[header_idx] = NULL; + } - if (!header_buf) { - header_buf = mlx5vf_get_data_buffer(migf, - sizeof(struct mlx5_vf_migration_header), DMA_NONE); - if (IS_ERR(header_buf)) { - err = PTR_ERR(header_buf); - goto err_free; - } + if (!header_buf) { + header_buf = mlx5vf_get_data_buffer(migf, + sizeof(struct mlx5_vf_migration_header), DMA_NONE); + if (IS_ERR(header_buf)) { + err = PTR_ERR(header_buf); + goto err_free; } } @@ -900,6 +965,29 @@ static int mlx5vf_cmd_modify_tracker(struct mlx5_core_dev *mdev, return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); } +static int mlx5vf_cmd_query_tracker(struct mlx5_core_dev *mdev, + struct mlx5_vhca_page_tracker *tracker) +{ + u32 out[MLX5_ST_SZ_DW(query_page_track_obj_out)] = {}; + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + void *obj_context; + void *cmd_hdr; + int err; + + cmd_hdr = MLX5_ADDR_OF(modify_page_track_obj_in, in, general_obj_in_cmd_hdr); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, tracker->id); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + obj_context = MLX5_ADDR_OF(query_page_track_obj_out, out, obj_context); + tracker->status = MLX5_GET(page_track, obj_context, state); + return 0; +} + static int alloc_cq_frag_buf(struct mlx5_core_dev *mdev, struct mlx5_vhca_cq_buf *buf, int nent, int cqe_size) @@ -957,9 +1045,11 @@ static int mlx5vf_event_notifier(struct notifier_block *nb, unsigned long type, mlx5_nb_cof(nb, struct mlx5_vhca_page_tracker, nb); struct mlx5vf_pci_core_device *mvdev = container_of( tracker, struct mlx5vf_pci_core_device, tracker); + struct mlx5_eqe_obj_change *object; struct mlx5_eqe *eqe = data; u8 event_type = (u8)type; u8 queue_type; + u32 obj_id; int qp_num; switch (event_type) { @@ -975,6 +1065,12 @@ static int mlx5vf_event_notifier(struct notifier_block *nb, unsigned long type, break; set_tracker_error(mvdev); break; + case MLX5_EVENT_TYPE_OBJECT_CHANGE: + object = &eqe->data.obj_change; + obj_id = be32_to_cpu(object->obj_id); + if (obj_id == tracker->id) + set_tracker_change_event(mvdev); + break; default: break; } @@ -1634,6 +1730,11 @@ int mlx5vf_tracker_read_and_clear(struct vfio_device *vdev, unsigned long iova, goto end; } + if (tracker->is_err) { + err = -EIO; + goto end; + } + mdev = mvdev->mdev; err = mlx5vf_cmd_modify_tracker(mdev, tracker->id, iova, length, MLX5_PAGE_TRACK_STATE_REPORTING); @@ -1652,6 +1753,12 @@ int mlx5vf_tracker_read_and_clear(struct vfio_device *vdev, unsigned long iova, dirty, &tracker->status); if (poll_err == CQ_EMPTY) { wait_for_completion(&mvdev->tracker_comp); + if (tracker->object_changed) { + tracker->object_changed = false; + err = mlx5vf_cmd_query_tracker(mdev, tracker); + if (err) + goto end; + } continue; } } diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h index f2c7227fa683..df421dc6de04 100644 --- a/drivers/vfio/pci/mlx5/cmd.h +++ b/drivers/vfio/pci/mlx5/cmd.h @@ -13,9 +13,6 @@ #include <linux/mlx5/cq.h> #include <linux/mlx5/qp.h> -#define MLX5VF_PRE_COPY_SUPP(mvdev) \ - ((mvdev)->core_device.vdev.migration_flags & VFIO_MIGRATION_PRE_COPY) - enum mlx5_vf_migf_state { MLX5_MIGF_STATE_ERROR = 1, MLX5_MIGF_STATE_PRE_COPY_ERROR, @@ -25,7 +22,6 @@ enum mlx5_vf_migf_state { }; enum mlx5_vf_load_state { - MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER, MLX5_VF_LOAD_STATE_READ_HEADER, MLX5_VF_LOAD_STATE_PREP_HEADER_DATA, MLX5_VF_LOAD_STATE_READ_HEADER_DATA, @@ -162,6 +158,7 @@ struct mlx5_vhca_page_tracker { u32 id; u32 pdn; u8 is_err:1; + u8 object_changed:1; struct mlx5_uars_page *uar; struct mlx5_vhca_cq cq; struct mlx5_vhca_qp *host_qp; @@ -196,6 +193,7 @@ struct mlx5vf_pci_core_device { enum { MLX5VF_QUERY_INC = (1UL << 0), MLX5VF_QUERY_FINAL = (1UL << 1), + MLX5VF_QUERY_CLEANUP = (1UL << 2), }; int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod); @@ -226,12 +224,11 @@ struct mlx5_vhca_data_buffer * mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, size_t length, enum dma_data_direction dma_dir); void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf); -int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, - unsigned int npages); struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, unsigned long offset); void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev); -void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev); +void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev, + enum mlx5_vf_migf_state *last_save_state); void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work); void mlx5vf_mig_file_set_save_work(struct mlx5_vf_migration_file *migf, u8 chunk_num, size_t next_required_umem_size); diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index fe09a8c8af95..61d9b0f9146d 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -65,50 +65,6 @@ mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, return NULL; } -int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, - unsigned int npages) -{ - unsigned int to_alloc = npages; - struct page **page_list; - unsigned long filled; - unsigned int to_fill; - int ret; - - to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list)); - page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT); - if (!page_list) - return -ENOMEM; - - do { - filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill, - page_list); - if (!filled) { - ret = -ENOMEM; - goto err; - } - to_alloc -= filled; - ret = sg_alloc_append_table_from_pages( - &buf->table, page_list, filled, 0, - filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC, - GFP_KERNEL_ACCOUNT); - - if (ret) - goto err; - buf->allocated_length += filled * PAGE_SIZE; - /* clean input for another bulk allocation */ - memset(page_list, 0, filled * sizeof(*page_list)); - to_fill = min_t(unsigned int, to_alloc, - PAGE_SIZE / sizeof(*page_list)); - } while (to_alloc > 0); - - kvfree(page_list); - return 0; - -err: - kvfree(page_list); - return ret; -} - static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf) { mutex_lock(&migf->lock); @@ -777,36 +733,6 @@ mlx5vf_append_page_to_mig_buf(struct mlx5_vhca_data_buffer *vhca_buf, return 0; } -static int -mlx5vf_resume_read_image_no_header(struct mlx5_vhca_data_buffer *vhca_buf, - loff_t requested_length, - const char __user **buf, size_t *len, - loff_t *pos, ssize_t *done) -{ - int ret; - - if (requested_length > MAX_LOAD_SIZE) - return -ENOMEM; - - if (vhca_buf->allocated_length < requested_length) { - ret = mlx5vf_add_migration_pages( - vhca_buf, - DIV_ROUND_UP(requested_length - vhca_buf->allocated_length, - PAGE_SIZE)); - if (ret) - return ret; - } - - while (*len) { - ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, len, pos, - done); - if (ret) - return ret; - } - - return 0; -} - static ssize_t mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf, struct mlx5_vhca_data_buffer *vhca_buf, @@ -1038,13 +964,6 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE; break; } - case MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER: - ret = mlx5vf_resume_read_image_no_header(vhca_buf, - requested_length, - &buf, &len, pos, &done); - if (ret) - goto out_unlock; - break; case MLX5_VF_LOAD_STATE_READ_IMAGE: ret = mlx5vf_resume_read_image(migf, vhca_buf, migf->record_size, @@ -1114,21 +1033,16 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) } migf->buf[0] = buf; - if (MLX5VF_PRE_COPY_SUPP(mvdev)) { - buf = mlx5vf_alloc_data_buffer(migf, - sizeof(struct mlx5_vf_migration_header), DMA_NONE); - if (IS_ERR(buf)) { - ret = PTR_ERR(buf); - goto out_buf; - } - - migf->buf_header[0] = buf; - migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER; - } else { - /* Initial state will be to read the image */ - migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER; + buf = mlx5vf_alloc_data_buffer(migf, + sizeof(struct mlx5_vf_migration_header), DMA_NONE); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + goto out_buf; } + migf->buf_header[0] = buf; + migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER; + stream_open(migf->filp->f_inode, migf->filp); mutex_init(&migf->lock); INIT_LIST_HEAD(&migf->buf_list); @@ -1146,7 +1060,8 @@ end: return ERR_PTR(ret); } -void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev) +void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev, + enum mlx5_vf_migf_state *last_save_state) { if (mvdev->resuming_migf) { mlx5vf_disable_fd(mvdev->resuming_migf); @@ -1157,6 +1072,8 @@ void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev) if (mvdev->saving_migf) { mlx5_cmd_cleanup_async_ctx(&mvdev->saving_migf->async_ctx); cancel_work_sync(&mvdev->saving_migf->async_data.work); + if (last_save_state) + *last_save_state = mvdev->saving_migf->state; mlx5vf_disable_fd(mvdev->saving_migf); wake_up_interruptible(&mvdev->saving_migf->poll_wait); mlx5fv_cmd_clean_migf_resources(mvdev->saving_migf); @@ -1217,12 +1134,34 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, return migf->filp; } - if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) || - (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) || + if (cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) { + mlx5vf_disable_fds(mvdev, NULL); + return NULL; + } + + if ((cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) || (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_RUNNING_P2P)) { - mlx5vf_disable_fds(mvdev); - return NULL; + struct mlx5_vf_migration_file *migf = mvdev->saving_migf; + struct mlx5_vhca_data_buffer *buf; + enum mlx5_vf_migf_state state; + size_t size; + + ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &size, NULL, + MLX5VF_QUERY_INC | MLX5VF_QUERY_CLEANUP); + if (ret) + return ERR_PTR(ret); + buf = mlx5vf_get_data_buffer(migf, size, DMA_FROM_DEVICE); + if (IS_ERR(buf)) + return ERR_CAST(buf); + /* pre_copy cleanup */ + ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, false, false); + if (ret) { + mlx5vf_put_data_buffer(buf); + return ERR_PTR(ret); + } + mlx5vf_disable_fds(mvdev, &state); + return (state != MLX5_MIGF_STATE_ERROR) ? NULL : ERR_PTR(-EIO); } if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) { @@ -1237,14 +1176,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, } if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) { - if (!MLX5VF_PRE_COPY_SUPP(mvdev)) { - ret = mlx5vf_cmd_load_vhca_state(mvdev, - mvdev->resuming_migf, - mvdev->resuming_migf->buf[0]); - if (ret) - return ERR_PTR(ret); - } - mlx5vf_disable_fds(mvdev); + mlx5vf_disable_fds(mvdev, NULL); return NULL; } @@ -1289,7 +1221,7 @@ again: mvdev->deferred_reset = false; spin_unlock(&mvdev->reset_lock); mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING; - mlx5vf_disable_fds(mvdev); + mlx5vf_disable_fds(mvdev, NULL); goto again; } mutex_unlock(&mvdev->state_mutex); |