summaryrefslogtreecommitdiff
path: root/drivers/vfio/pci
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vfio/pci')
-rw-r--r--drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c4
-rw-r--r--drivers/vfio/pci/mlx5/cmd.c79
-rw-r--r--drivers/vfio/pci/mlx5/cmd.h28
-rw-r--r--drivers/vfio/pci/mlx5/main.c261
-rw-r--r--drivers/vfio/pci/vfio_pci_config.c6
-rw-r--r--drivers/vfio/pci/vfio_pci_core.c9
-rw-r--r--drivers/vfio/pci/vfio_pci_igd.c2
-rw-r--r--drivers/vfio/pci/vfio_pci_intrs.c10
-rw-r--r--drivers/vfio/pci/vfio_pci_rdwr.c2
9 files changed, 322 insertions, 79 deletions
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index 0bba3b05c6c7..a117eaf21c14 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -744,7 +744,7 @@ hisi_acc_vf_pci_resume(struct hisi_acc_vf_core_device *hisi_acc_vdev)
{
struct hisi_acc_vf_migration_file *migf;
- migf = kzalloc(sizeof(*migf), GFP_KERNEL);
+ migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT);
if (!migf)
return ERR_PTR(-ENOMEM);
@@ -863,7 +863,7 @@ hisi_acc_open_saving_migf(struct hisi_acc_vf_core_device *hisi_acc_vdev)
struct hisi_acc_vf_migration_file *migf;
int ret;
- migf = kzalloc(sizeof(*migf), GFP_KERNEL);
+ migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT);
if (!migf)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index 64e68d13cb98..deed156e6165 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -7,6 +7,29 @@
enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
+static int mlx5vf_is_migratable(struct mlx5_core_dev *mdev, u16 func_id)
+{
+ int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ void *query_cap = NULL, *cap;
+ int ret;
+
+ query_cap = kzalloc(query_sz, GFP_KERNEL);
+ if (!query_cap)
+ return -ENOMEM;
+
+ ret = mlx5_vport_get_other_func_cap(mdev, func_id, query_cap,
+ MLX5_CAP_GENERAL_2);
+ if (ret)
+ goto out;
+
+ cap = MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability);
+ if (!MLX5_GET(cmd_hca_cap_2, cap, migratable))
+ ret = -EOPNOTSUPP;
+out:
+ kfree(query_cap);
+ return ret;
+}
+
static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id,
u16 *vhca_id);
static void
@@ -195,6 +218,10 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
if (mvdev->vf_id < 0)
goto end;
+ ret = mlx5vf_is_migratable(mvdev->mdev, mvdev->vf_id + 1);
+ if (ret)
+ goto end;
+
if (mlx5vf_cmd_get_vhca_id(mvdev->mdev, mvdev->vf_id + 1,
&mvdev->vhca_id))
goto end;
@@ -373,7 +400,7 @@ mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
struct mlx5_vhca_data_buffer *buf;
int ret;
- buf = kzalloc(sizeof(*buf), GFP_KERNEL);
+ buf = kzalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
if (!buf)
return ERR_PTR(-ENOMEM);
@@ -473,7 +500,7 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work)
}
static int add_buf_header(struct mlx5_vhca_data_buffer *header_buf,
- size_t image_size)
+ size_t image_size, bool initial_pre_copy)
{
struct mlx5_vf_migration_file *migf = header_buf->migf;
struct mlx5_vf_migration_header header = {};
@@ -481,7 +508,9 @@ static int add_buf_header(struct mlx5_vhca_data_buffer *header_buf,
struct page *page;
u8 *to_buff;
- header.image_size = cpu_to_le64(image_size);
+ header.record_size = cpu_to_le64(image_size);
+ header.flags = cpu_to_le32(MLX5_MIGF_HEADER_FLAGS_TAG_MANDATORY);
+ header.tag = cpu_to_le32(MLX5_MIGF_HEADER_TAG_FW_DATA);
page = mlx5vf_get_migration_page(header_buf, 0);
if (!page)
return -EINVAL;
@@ -489,12 +518,13 @@ static int add_buf_header(struct mlx5_vhca_data_buffer *header_buf,
memcpy(to_buff, &header, sizeof(header));
kunmap_local(to_buff);
header_buf->length = sizeof(header);
- header_buf->header_image_size = image_size;
header_buf->start_pos = header_buf->migf->max_pos;
migf->max_pos += header_buf->length;
spin_lock_irqsave(&migf->list_lock, flags);
list_add_tail(&header_buf->buf_elm, &migf->buf_list);
spin_unlock_irqrestore(&migf->list_lock, flags);
+ if (initial_pre_copy)
+ migf->pre_copy_initial_bytes += sizeof(header);
return 0;
}
@@ -508,11 +538,14 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
if (!status) {
size_t image_size;
unsigned long flags;
+ bool initial_pre_copy = migf->state != MLX5_MIGF_STATE_PRE_COPY &&
+ !async_data->last_chunk;
image_size = MLX5_GET(save_vhca_state_out, async_data->out,
actual_image_size);
if (async_data->header_buf) {
- status = add_buf_header(async_data->header_buf, image_size);
+ status = add_buf_header(async_data->header_buf, image_size,
+ initial_pre_copy);
if (status)
goto err;
}
@@ -522,6 +555,8 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
spin_lock_irqsave(&migf->list_lock, flags);
list_add_tail(&async_data->buf->buf_elm, &migf->buf_list);
spin_unlock_irqrestore(&migf->list_lock, flags);
+ if (initial_pre_copy)
+ migf->pre_copy_initial_bytes += image_size;
migf->state = async_data->last_chunk ?
MLX5_MIGF_STATE_COMPLETE : MLX5_MIGF_STATE_PRE_COPY;
wake_up_interruptible(&migf->poll_wait);
@@ -583,11 +618,16 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
}
if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
- header_buf = mlx5vf_get_data_buffer(migf,
- sizeof(struct mlx5_vf_migration_header), DMA_NONE);
- if (IS_ERR(header_buf)) {
- err = PTR_ERR(header_buf);
- goto err_free;
+ if (async_data->last_chunk && migf->buf_header) {
+ header_buf = migf->buf_header;
+ migf->buf_header = NULL;
+ } else {
+ header_buf = mlx5vf_get_data_buffer(migf,
+ sizeof(struct mlx5_vf_migration_header), DMA_NONE);
+ if (IS_ERR(header_buf)) {
+ err = PTR_ERR(header_buf);
+ goto err_free;
+ }
}
}
@@ -790,7 +830,7 @@ static int mlx5vf_create_tracker(struct mlx5_core_dev *mdev,
node = interval_tree_iter_first(ranges, 0, ULONG_MAX);
for (i = 0; i < num_ranges; i++) {
void *addr_range_i_base = range_list_ptr + record_size * i;
- unsigned long length = node->last - node->start;
+ unsigned long length = node->last - node->start + 1;
MLX5_SET64(page_track_range, addr_range_i_base, start_address,
node->start);
@@ -800,7 +840,7 @@ static int mlx5vf_create_tracker(struct mlx5_core_dev *mdev,
}
WARN_ON(node);
- log_addr_space_size = ilog2(total_ranges_len);
+ log_addr_space_size = ilog2(roundup_pow_of_two(total_ranges_len));
if (log_addr_space_size <
(MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_min_addr_space)) ||
log_addr_space_size >
@@ -1032,18 +1072,18 @@ mlx5vf_create_rc_qp(struct mlx5_core_dev *mdev,
void *in;
int err;
- qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL_ACCOUNT);
if (!qp)
return ERR_PTR(-ENOMEM);
- qp->rq.wqe_cnt = roundup_pow_of_two(max_recv_wr);
- log_rq_stride = ilog2(MLX5_SEND_WQE_DS);
- log_rq_sz = ilog2(qp->rq.wqe_cnt);
err = mlx5_db_alloc_node(mdev, &qp->db, mdev->priv.numa_node);
if (err)
goto err_free;
if (max_recv_wr) {
+ qp->rq.wqe_cnt = roundup_pow_of_two(max_recv_wr);
+ log_rq_stride = ilog2(MLX5_SEND_WQE_DS);
+ log_rq_sz = ilog2(qp->rq.wqe_cnt);
err = mlx5_frag_buf_alloc_node(mdev,
wq_get_byte_sz(log_rq_sz, log_rq_stride),
&qp->buf, mdev->priv.numa_node);
@@ -1213,12 +1253,13 @@ static int alloc_recv_pages(struct mlx5_vhca_recv_buf *recv_buf,
int i;
recv_buf->page_list = kvcalloc(npages, sizeof(*recv_buf->page_list),
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (!recv_buf->page_list)
return -ENOMEM;
for (;;) {
- filled = alloc_pages_bulk_array(GFP_KERNEL, npages - done,
+ filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT,
+ npages - done,
recv_buf->page_list + done);
if (!filled)
goto err;
@@ -1248,7 +1289,7 @@ static int register_dma_recv_pages(struct mlx5_core_dev *mdev,
recv_buf->dma_addrs = kvcalloc(recv_buf->npages,
sizeof(*recv_buf->dma_addrs),
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (!recv_buf->dma_addrs)
return -ENOMEM;
diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h
index 5483171d57ad..aec4c69dd6c1 100644
--- a/drivers/vfio/pci/mlx5/cmd.h
+++ b/drivers/vfio/pci/mlx5/cmd.h
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/vfio_pci_core.h>
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
#include <linux/mlx5/cq.h>
#include <linux/mlx5/qp.h>
@@ -26,15 +27,33 @@ enum mlx5_vf_migf_state {
enum mlx5_vf_load_state {
MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER,
MLX5_VF_LOAD_STATE_READ_HEADER,
+ MLX5_VF_LOAD_STATE_PREP_HEADER_DATA,
+ MLX5_VF_LOAD_STATE_READ_HEADER_DATA,
MLX5_VF_LOAD_STATE_PREP_IMAGE,
MLX5_VF_LOAD_STATE_READ_IMAGE,
MLX5_VF_LOAD_STATE_LOAD_IMAGE,
};
+struct mlx5_vf_migration_tag_stop_copy_data {
+ __le64 stop_copy_size;
+};
+
+enum mlx5_vf_migf_header_flags {
+ MLX5_MIGF_HEADER_FLAGS_TAG_MANDATORY = 0,
+ MLX5_MIGF_HEADER_FLAGS_TAG_OPTIONAL = 1 << 0,
+};
+
+enum mlx5_vf_migf_header_tag {
+ MLX5_MIGF_HEADER_TAG_FW_DATA = 0,
+ MLX5_MIGF_HEADER_TAG_STOP_COPY_SIZE = 1 << 0,
+};
+
struct mlx5_vf_migration_header {
- __le64 image_size;
+ __le64 record_size;
/* For future use in case we may need to change the kernel protocol */
- __le64 flags;
+ __le32 flags; /* Use mlx5_vf_migf_header_flags */
+ __le32 tag; /* Use mlx5_vf_migf_header_tag */
+ __u8 data[]; /* Its size is given in the record_size */
};
struct mlx5_vhca_data_buffer {
@@ -42,7 +61,6 @@ struct mlx5_vhca_data_buffer {
loff_t start_pos;
u64 length;
u64 allocated_length;
- u64 header_image_size;
u32 mkey;
enum dma_data_direction dma_dir;
u8 dmaed:1;
@@ -72,6 +90,10 @@ struct mlx5_vf_migration_file {
enum mlx5_vf_load_state load_state;
u32 pdn;
loff_t max_pos;
+ u64 record_size;
+ u32 record_tag;
+ u64 stop_copy_prep_size;
+ u64 pre_copy_initial_bytes;
struct mlx5_vhca_data_buffer *buf;
struct mlx5_vhca_data_buffer *buf_header;
spinlock_t list_lock;
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index 9feb89c6d939..e897537a9e8a 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -21,8 +21,8 @@
#include "cmd.h"
-/* Arbitrary to prevent userspace from consuming endless memory */
-#define MAX_MIGRATION_SIZE (512*1024*1024)
+/* Device specification max LOAD size */
+#define MAX_LOAD_SIZE (BIT_ULL(__mlx5_bit_sz(load_vhca_state_in, size)) - 1)
static struct mlx5vf_pci_core_device *mlx5vf_drvdata(struct pci_dev *pdev)
{
@@ -73,12 +73,13 @@ int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
int ret;
to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
- page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL);
+ page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT);
if (!page_list)
return -ENOMEM;
do {
- filled = alloc_pages_bulk_array(GFP_KERNEL, to_fill, page_list);
+ filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill,
+ page_list);
if (!filled) {
ret = -ENOMEM;
goto err;
@@ -87,7 +88,7 @@ int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
ret = sg_alloc_append_table_from_pages(
&buf->table, page_list, filled, 0,
filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (ret)
goto err;
@@ -303,6 +304,87 @@ static void mlx5vf_mark_err(struct mlx5_vf_migration_file *migf)
wake_up_interruptible(&migf->poll_wait);
}
+static int mlx5vf_add_stop_copy_header(struct mlx5_vf_migration_file *migf)
+{
+ size_t size = sizeof(struct mlx5_vf_migration_header) +
+ sizeof(struct mlx5_vf_migration_tag_stop_copy_data);
+ struct mlx5_vf_migration_tag_stop_copy_data data = {};
+ struct mlx5_vhca_data_buffer *header_buf = NULL;
+ struct mlx5_vf_migration_header header = {};
+ unsigned long flags;
+ struct page *page;
+ u8 *to_buff;
+ int ret;
+
+ header_buf = mlx5vf_get_data_buffer(migf, size, DMA_NONE);
+ if (IS_ERR(header_buf))
+ return PTR_ERR(header_buf);
+
+ header.record_size = cpu_to_le64(sizeof(data));
+ header.flags = cpu_to_le32(MLX5_MIGF_HEADER_FLAGS_TAG_OPTIONAL);
+ header.tag = cpu_to_le32(MLX5_MIGF_HEADER_TAG_STOP_COPY_SIZE);
+ page = mlx5vf_get_migration_page(header_buf, 0);
+ if (!page) {
+ ret = -EINVAL;
+ goto err;
+ }
+ to_buff = kmap_local_page(page);
+ memcpy(to_buff, &header, sizeof(header));
+ header_buf->length = sizeof(header);
+ data.stop_copy_size = cpu_to_le64(migf->buf->allocated_length);
+ memcpy(to_buff + sizeof(header), &data, sizeof(data));
+ header_buf->length += sizeof(data);
+ kunmap_local(to_buff);
+ header_buf->start_pos = header_buf->migf->max_pos;
+ migf->max_pos += header_buf->length;
+ spin_lock_irqsave(&migf->list_lock, flags);
+ list_add_tail(&header_buf->buf_elm, &migf->buf_list);
+ spin_unlock_irqrestore(&migf->list_lock, flags);
+ migf->pre_copy_initial_bytes = size;
+ return 0;
+err:
+ mlx5vf_put_data_buffer(header_buf);
+ return ret;
+}
+
+static int mlx5vf_prep_stop_copy(struct mlx5_vf_migration_file *migf,
+ size_t state_size)
+{
+ struct mlx5_vhca_data_buffer *buf;
+ size_t inc_state_size;
+ int ret;
+
+ /* let's be ready for stop_copy size that might grow by 10 percents */
+ if (check_add_overflow(state_size, state_size / 10, &inc_state_size))
+ inc_state_size = state_size;
+
+ buf = mlx5vf_get_data_buffer(migf, inc_state_size, DMA_FROM_DEVICE);
+ if (IS_ERR(buf))
+ return PTR_ERR(buf);
+
+ migf->buf = buf;
+ buf = mlx5vf_get_data_buffer(migf,
+ sizeof(struct mlx5_vf_migration_header), DMA_NONE);
+ if (IS_ERR(buf)) {
+ ret = PTR_ERR(buf);
+ goto err;
+ }
+
+ migf->buf_header = buf;
+ ret = mlx5vf_add_stop_copy_header(migf);
+ if (ret)
+ goto err_header;
+ return 0;
+
+err_header:
+ mlx5vf_put_data_buffer(migf->buf_header);
+ migf->buf_header = NULL;
+err:
+ mlx5vf_put_data_buffer(migf->buf);
+ migf->buf = NULL;
+ return ret;
+}
+
static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
{
@@ -313,7 +395,7 @@ static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd,
loff_t *pos = &filp->f_pos;
unsigned long minsz;
size_t inc_length = 0;
- bool end_of_data;
+ bool end_of_data = false;
int ret;
if (cmd != VFIO_MIG_GET_PRECOPY_INFO)
@@ -357,25 +439,19 @@ static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd,
goto err_migf_unlock;
}
- buf = mlx5vf_get_data_buff_from_pos(migf, *pos, &end_of_data);
- if (buf) {
- if (buf->start_pos == 0) {
- info.initial_bytes = buf->header_image_size - *pos;
- } else if (buf->start_pos ==
- sizeof(struct mlx5_vf_migration_header)) {
- /* First data buffer following the header */
- info.initial_bytes = buf->start_pos +
- buf->length - *pos;
- } else {
- info.dirty_bytes = buf->start_pos + buf->length - *pos;
- }
+ if (migf->pre_copy_initial_bytes > *pos) {
+ info.initial_bytes = migf->pre_copy_initial_bytes - *pos;
} else {
- if (!end_of_data) {
- ret = -EINVAL;
- goto err_migf_unlock;
+ buf = mlx5vf_get_data_buff_from_pos(migf, *pos, &end_of_data);
+ if (buf) {
+ info.dirty_bytes = buf->start_pos + buf->length - *pos;
+ } else {
+ if (!end_of_data) {
+ ret = -EINVAL;
+ goto err_migf_unlock;
+ }
+ info.dirty_bytes = inc_length;
}
-
- info.dirty_bytes = inc_length;
}
if (!end_of_data || !inc_length) {
@@ -440,10 +516,16 @@ static int mlx5vf_pci_save_device_inc_data(struct mlx5vf_pci_core_device *mvdev)
if (ret)
goto err;
- buf = mlx5vf_get_data_buffer(migf, length, DMA_FROM_DEVICE);
- if (IS_ERR(buf)) {
- ret = PTR_ERR(buf);
- goto err;
+ /* Checking whether we have a matching pre-allocated buffer that can fit */
+ if (migf->buf && migf->buf->allocated_length >= length) {
+ buf = migf->buf;
+ migf->buf = NULL;
+ } else {
+ buf = mlx5vf_get_data_buffer(migf, length, DMA_FROM_DEVICE);
+ if (IS_ERR(buf)) {
+ ret = PTR_ERR(buf);
+ goto err;
+ }
}
ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, true, false);
@@ -467,7 +549,7 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track)
size_t length;
int ret;
- migf = kzalloc(sizeof(*migf), GFP_KERNEL);
+ migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT);
if (!migf)
return ERR_PTR(-ENOMEM);
@@ -502,6 +584,12 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track)
if (ret)
goto out_pd;
+ if (track) {
+ ret = mlx5vf_prep_stop_copy(migf, length);
+ if (ret)
+ goto out_pd;
+ }
+
buf = mlx5vf_alloc_data_buffer(migf, length, DMA_FROM_DEVICE);
if (IS_ERR(buf)) {
ret = PTR_ERR(buf);
@@ -515,7 +603,7 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track)
out_save:
mlx5vf_free_data_buffer(buf);
out_pd:
- mlx5vf_cmd_dealloc_pd(migf);
+ mlx5fv_cmd_clean_migf_resources(migf);
out_free:
fput(migf->filp);
end:
@@ -564,7 +652,7 @@ mlx5vf_resume_read_image_no_header(struct mlx5_vhca_data_buffer *vhca_buf,
{
int ret;
- if (requested_length > MAX_MIGRATION_SIZE)
+ if (requested_length > MAX_LOAD_SIZE)
return -ENOMEM;
if (vhca_buf->allocated_length < requested_length) {
@@ -616,6 +704,56 @@ mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf,
}
static int
+mlx5vf_resume_read_header_data(struct mlx5_vf_migration_file *migf,
+ struct mlx5_vhca_data_buffer *vhca_buf,
+ const char __user **buf, size_t *len,
+ loff_t *pos, ssize_t *done)
+{
+ size_t copy_len, to_copy;
+ size_t required_data;
+ u8 *to_buff;
+ int ret;
+
+ required_data = migf->record_size - vhca_buf->length;
+ to_copy = min_t(size_t, *len, required_data);
+ copy_len = to_copy;
+ while (to_copy) {
+ ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, &to_copy, pos,
+ done);
+ if (ret)
+ return ret;
+ }
+
+ *len -= copy_len;
+ if (vhca_buf->length == migf->record_size) {
+ switch (migf->record_tag) {
+ case MLX5_MIGF_HEADER_TAG_STOP_COPY_SIZE:
+ {
+ struct page *page;
+
+ page = mlx5vf_get_migration_page(vhca_buf, 0);
+ if (!page)
+ return -EINVAL;
+ to_buff = kmap_local_page(page);
+ migf->stop_copy_prep_size = min_t(u64,
+ le64_to_cpup((__le64 *)to_buff), MAX_LOAD_SIZE);
+ kunmap_local(to_buff);
+ break;
+ }
+ default:
+ /* Optional tag */
+ break;
+ }
+
+ migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
+ migf->max_pos += migf->record_size;
+ vhca_buf->length = 0;
+ }
+
+ return 0;
+}
+
+static int
mlx5vf_resume_read_header(struct mlx5_vf_migration_file *migf,
struct mlx5_vhca_data_buffer *vhca_buf,
const char __user **buf,
@@ -645,23 +783,38 @@ mlx5vf_resume_read_header(struct mlx5_vf_migration_file *migf,
*len -= copy_len;
vhca_buf->length += copy_len;
if (vhca_buf->length == sizeof(struct mlx5_vf_migration_header)) {
- u64 flags;
+ u64 record_size;
+ u32 flags;
- vhca_buf->header_image_size = le64_to_cpup((__le64 *)to_buff);
- if (vhca_buf->header_image_size > MAX_MIGRATION_SIZE) {
+ record_size = le64_to_cpup((__le64 *)to_buff);
+ if (record_size > MAX_LOAD_SIZE) {
ret = -ENOMEM;
goto end;
}
- flags = le64_to_cpup((__le64 *)(to_buff +
+ migf->record_size = record_size;
+ flags = le32_to_cpup((__le32 *)(to_buff +
offsetof(struct mlx5_vf_migration_header, flags)));
- if (flags) {
- ret = -EOPNOTSUPP;
- goto end;
+ migf->record_tag = le32_to_cpup((__le32 *)(to_buff +
+ offsetof(struct mlx5_vf_migration_header, tag)));
+ switch (migf->record_tag) {
+ case MLX5_MIGF_HEADER_TAG_FW_DATA:
+ migf->load_state = MLX5_VF_LOAD_STATE_PREP_IMAGE;
+ break;
+ case MLX5_MIGF_HEADER_TAG_STOP_COPY_SIZE:
+ migf->load_state = MLX5_VF_LOAD_STATE_PREP_HEADER_DATA;
+ break;
+ default:
+ if (!(flags & MLX5_MIGF_HEADER_FLAGS_TAG_OPTIONAL)) {
+ ret = -EOPNOTSUPP;
+ goto end;
+ }
+ /* We may read and skip this optional record data */
+ migf->load_state = MLX5_VF_LOAD_STATE_PREP_HEADER_DATA;
}
- migf->load_state = MLX5_VF_LOAD_STATE_PREP_IMAGE;
migf->max_pos += vhca_buf->length;
+ vhca_buf->length = 0;
*has_work = true;
}
end:
@@ -705,9 +858,34 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
if (ret)
goto out_unlock;
break;
+ case MLX5_VF_LOAD_STATE_PREP_HEADER_DATA:
+ if (vhca_buf_header->allocated_length < migf->record_size) {
+ mlx5vf_free_data_buffer(vhca_buf_header);
+
+ migf->buf_header = mlx5vf_alloc_data_buffer(migf,
+ migf->record_size, DMA_NONE);
+ if (IS_ERR(migf->buf_header)) {
+ ret = PTR_ERR(migf->buf_header);
+ migf->buf_header = NULL;
+ goto out_unlock;
+ }
+
+ vhca_buf_header = migf->buf_header;
+ }
+
+ vhca_buf_header->start_pos = migf->max_pos;
+ migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER_DATA;
+ break;
+ case MLX5_VF_LOAD_STATE_READ_HEADER_DATA:
+ ret = mlx5vf_resume_read_header_data(migf, vhca_buf_header,
+ &buf, &len, pos, &done);
+ if (ret)
+ goto out_unlock;
+ break;
case MLX5_VF_LOAD_STATE_PREP_IMAGE:
{
- u64 size = vhca_buf_header->header_image_size;
+ u64 size = max(migf->record_size,
+ migf->stop_copy_prep_size);
if (vhca_buf->allocated_length < size) {
mlx5vf_free_data_buffer(vhca_buf);
@@ -736,7 +914,7 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
break;
case MLX5_VF_LOAD_STATE_READ_IMAGE:
ret = mlx5vf_resume_read_image(migf, vhca_buf,
- vhca_buf_header->header_image_size,
+ migf->record_size,
&buf, &len, pos, &done, &has_work);
if (ret)
goto out_unlock;
@@ -749,7 +927,6 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
/* prep header buf for next image */
vhca_buf_header->length = 0;
- vhca_buf_header->header_image_size = 0;
/* prep data buf for next image */
vhca_buf->length = 0;
@@ -781,7 +958,7 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
struct mlx5_vhca_data_buffer *buf;
int ret;
- migf = kzalloc(sizeof(*migf), GFP_KERNEL);
+ migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT);
if (!migf)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index 4a350421c5f6..523e0144c86f 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -1244,7 +1244,7 @@ static int vfio_msi_cap_len(struct vfio_pci_core_device *vdev, u8 pos)
if (vdev->msi_perm)
return len;
- vdev->msi_perm = kmalloc(sizeof(struct perm_bits), GFP_KERNEL);
+ vdev->msi_perm = kmalloc(sizeof(struct perm_bits), GFP_KERNEL_ACCOUNT);
if (!vdev->msi_perm)
return -ENOMEM;
@@ -1731,11 +1731,11 @@ int vfio_config_init(struct vfio_pci_core_device *vdev)
* no requirements on the length of a capability, so the gap between
* capabilities needs byte granularity.
*/
- map = kmalloc(pdev->cfg_size, GFP_KERNEL);
+ map = kmalloc(pdev->cfg_size, GFP_KERNEL_ACCOUNT);
if (!map)
return -ENOMEM;
- vconfig = kmalloc(pdev->cfg_size, GFP_KERNEL);
+ vconfig = kmalloc(pdev->cfg_size, GFP_KERNEL_ACCOUNT);
if (!vconfig) {
kfree(map);
return -ENOMEM;
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 26a541cc64d1..a5ab416cf476 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -144,7 +144,8 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev)
* of the exclusive page in case that hot-add
* device's bar is assigned into it.
*/
- dummy_res = kzalloc(sizeof(*dummy_res), GFP_KERNEL);
+ dummy_res =
+ kzalloc(sizeof(*dummy_res), GFP_KERNEL_ACCOUNT);
if (dummy_res == NULL)
goto no_mmap;
@@ -863,7 +864,7 @@ int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev,
region = krealloc(vdev->region,
(vdev->num_regions + 1) * sizeof(*region),
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (!region)
return -ENOMEM;
@@ -1644,7 +1645,7 @@ static int __vfio_pci_add_vma(struct vfio_pci_core_device *vdev,
{
struct vfio_pci_mmap_vma *mmap_vma;
- mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL);
+ mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL_ACCOUNT);
if (!mmap_vma)
return -ENOMEM;
@@ -1799,7 +1800,7 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma
* See remap_pfn_range(), called from vfio_pci_fault() but we can't
* change vm_flags within the fault handler. Set them now.
*/
- vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
+ vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
vma->vm_ops = &vfio_pci_mmap_ops;
return 0;
diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c
index 5e6ca5926954..dd70e2431bd7 100644
--- a/drivers/vfio/pci/vfio_pci_igd.c
+++ b/drivers/vfio/pci/vfio_pci_igd.c
@@ -180,7 +180,7 @@ static int vfio_pci_igd_opregion_init(struct vfio_pci_core_device *vdev)
if (!addr || !(~addr))
return -ENODEV;
- opregionvbt = kzalloc(sizeof(*opregionvbt), GFP_KERNEL);
+ opregionvbt = kzalloc(sizeof(*opregionvbt), GFP_KERNEL_ACCOUNT);
if (!opregionvbt)
return -ENOMEM;
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 40c3d7cf163f..bffb0741518b 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -177,7 +177,7 @@ static int vfio_intx_enable(struct vfio_pci_core_device *vdev)
if (!vdev->pdev->irq)
return -ENODEV;
- vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL);
+ vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL_ACCOUNT);
if (!vdev->ctx)
return -ENOMEM;
@@ -216,7 +216,7 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd)
if (fd < 0) /* Disable only */
return 0;
- vdev->ctx[0].name = kasprintf(GFP_KERNEL, "vfio-intx(%s)",
+ vdev->ctx[0].name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)",
pci_name(pdev));
if (!vdev->ctx[0].name)
return -ENOMEM;
@@ -284,7 +284,8 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi
if (!is_irq_none(vdev))
return -EINVAL;
- vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL);
+ vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx),
+ GFP_KERNEL_ACCOUNT);
if (!vdev->ctx)
return -ENOMEM;
@@ -343,7 +344,8 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
if (fd < 0)
return 0;
- vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)",
+ vdev->ctx[vector].name = kasprintf(GFP_KERNEL_ACCOUNT,
+ "vfio-msi%s[%d](%s)",
msix ? "x" : "", vector,
pci_name(pdev));
if (!vdev->ctx[vector].name)
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index e352a033b4ae..e27de61ac9fe 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -470,7 +470,7 @@ int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
goto out_unlock;
}
- ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL);
+ ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL_ACCOUNT);
if (!ioeventfd) {
ret = -ENOMEM;
goto out_unlock;