summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-02-23 22:27:49 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2017-02-23 22:27:49 +0300
commitaf17fe7a63db7e11d65f1296f0cbf156a89a2735 (patch)
tree39b8c379a5a30e1468684832945eaab704f6d095
parentf14cc3b13d8f3ceee862f8365d37ba214630126a (diff)
parentcdbe33d0f82d68ff74f05502a4c26e65ec7e90bb (diff)
downloadlinux-af17fe7a63db7e11d65f1296f0cbf156a89a2735.tar.xz
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull Mellanox rdma updates from Doug Ledford: "Mellanox specific updates for 4.11 merge window Because the Mellanox code required being based on a net-next tree, I keept it separate from the remainder of the RDMA stack submission that is based on 4.10-rc3. This branch contains: - Various mlx4 and mlx5 fixes and minor changes - Support for adding a tag match rule to flow specs - Support for cvlan offload operation for raw ethernet QPs - A change to the core IB code to recognize raw eth capabilities and enumerate them (touches non-Mellanox code) - Implicit On-Demand Paging memory registration support" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (40 commits) IB/mlx5: Fix configuration of port capabilities IB/mlx4: Take source GID by index from HW GID table IB/mlx5: Fix blue flame buffer size calculation IB/mlx4: Remove unused variable from function declaration IB: Query ports via the core instead of direct into the driver IB: Add protocol for USNIC IB/mlx4: Support raw packet protocol IB/mlx5: Support raw packet protocol IB/core: Add raw packet protocol IB/mlx5: Add implicit MR support IB/mlx5: Expose MR cache for mlx5_ib IB/mlx5: Add null_mkey access IB/umem: Indicate that process is being terminated IB/umem: Update on demand page (ODP) support IB/core: Add implicit MR flag IB/mlx5: Support creation of a WQ with scatter FCS offload IB/mlx5: Enable QP creation with cvlan offload IB/mlx5: Enable WQ creation and modification with cvlan offload IB/mlx5: Expose vlan offloads capabilities IB/uverbs: Enable QP creation with cvlan offload ...
-rw-r--r--drivers/infiniband/core/umem.c3
-rw-r--r--drivers/infiniband/core/umem_odp.c92
-rw-r--r--drivers/infiniband/core/umem_rbtree.c21
-rw-r--r--drivers/infiniband/core/uverbs.h1
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c53
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c7
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c8
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c1
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c7
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c8
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c1
-rw-r--r--drivers/infiniband/hw/mlx4/main.c27
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c56
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c1
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile2
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c48
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h40
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c14
-rw-r--r--drivers/infiniband/hw/mlx5/main.c337
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h46
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c128
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c505
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c91
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c11
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c9
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c5
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c9
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c1
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c9
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c1
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c4
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c2
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c5
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c4
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c7
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c12
-rw-r--r--include/linux/mlx5/driver.h6
-rw-r--r--include/linux/mlx5/mlx5_ifc.h2
-rw-r--r--include/rdma/ib_umem_odp.h21
-rw-r--r--include/rdma/ib_verbs.h57
-rw-r--r--include/uapi/rdma/ib_user_verbs.h19
42 files changed, 1417 insertions, 270 deletions
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 4609b921f899..446b56a5260b 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -99,9 +99,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (dmasync)
dma_attrs |= DMA_ATTR_WRITE_BARRIER;
- if (!size)
- return ERR_PTR(-EINVAL);
-
/*
* If the combination of the addr and size requested for this memory
* region causes an integer overflow, return error.
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 6b079a31dced..f2fc0431512d 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -239,6 +239,71 @@ static const struct mmu_notifier_ops ib_umem_notifiers = {
.invalidate_range_end = ib_umem_notifier_invalidate_range_end,
};
+struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
+ unsigned long addr,
+ size_t size)
+{
+ struct ib_umem *umem;
+ struct ib_umem_odp *odp_data;
+ int pages = size >> PAGE_SHIFT;
+ int ret;
+
+ umem = kzalloc(sizeof(*umem), GFP_KERNEL);
+ if (!umem)
+ return ERR_PTR(-ENOMEM);
+
+ umem->context = context;
+ umem->length = size;
+ umem->address = addr;
+ umem->page_size = PAGE_SIZE;
+ umem->writable = 1;
+
+ odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
+ if (!odp_data) {
+ ret = -ENOMEM;
+ goto out_umem;
+ }
+ odp_data->umem = umem;
+
+ mutex_init(&odp_data->umem_mutex);
+ init_completion(&odp_data->notifier_completion);
+
+ odp_data->page_list = vzalloc(pages * sizeof(*odp_data->page_list));
+ if (!odp_data->page_list) {
+ ret = -ENOMEM;
+ goto out_odp_data;
+ }
+
+ odp_data->dma_list = vzalloc(pages * sizeof(*odp_data->dma_list));
+ if (!odp_data->dma_list) {
+ ret = -ENOMEM;
+ goto out_page_list;
+ }
+
+ down_write(&context->umem_rwsem);
+ context->odp_mrs_count++;
+ rbt_ib_umem_insert(&odp_data->interval_tree, &context->umem_tree);
+ if (likely(!atomic_read(&context->notifier_count)))
+ odp_data->mn_counters_active = true;
+ else
+ list_add(&odp_data->no_private_counters,
+ &context->no_private_counters);
+ up_write(&context->umem_rwsem);
+
+ umem->odp_data = odp_data;
+
+ return umem;
+
+out_page_list:
+ vfree(odp_data->page_list);
+out_odp_data:
+ kfree(odp_data);
+out_umem:
+ kfree(umem);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(ib_alloc_odp_umem);
+
int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
{
int ret_val;
@@ -270,18 +335,20 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
init_completion(&umem->odp_data->notifier_completion);
- umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) *
+ if (ib_umem_num_pages(umem)) {
+ umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) *
sizeof(*umem->odp_data->page_list));
- if (!umem->odp_data->page_list) {
- ret_val = -ENOMEM;
- goto out_odp_data;
- }
+ if (!umem->odp_data->page_list) {
+ ret_val = -ENOMEM;
+ goto out_odp_data;
+ }
- umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) *
+ umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) *
sizeof(*umem->odp_data->dma_list));
- if (!umem->odp_data->dma_list) {
- ret_val = -ENOMEM;
- goto out_page_list;
+ if (!umem->odp_data->dma_list) {
+ ret_val = -ENOMEM;
+ goto out_page_list;
+ }
}
/*
@@ -466,6 +533,7 @@ static int ib_umem_odp_map_dma_single_page(
}
umem->odp_data->dma_list[page_index] = dma_addr | access_mask;
umem->odp_data->page_list[page_index] = page;
+ umem->npages++;
stored_page = 1;
} else if (umem->odp_data->page_list[page_index] == page) {
umem->odp_data->dma_list[page_index] |= access_mask;
@@ -505,7 +573,8 @@ out:
* for failure.
* An -EAGAIN error code is returned when a concurrent mmu notifier prevents
* the function from completing its task.
- *
+ * An -ENOENT error code indicates that userspace process is being terminated
+ * and mm was already destroyed.
* @umem: the umem to map and pin
* @user_virt: the address from which we need to map.
* @bcnt: the minimal number of bytes to pin and map. The mapping might be
@@ -553,7 +622,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
owning_mm = get_task_mm(owning_process);
if (owning_mm == NULL) {
- ret = -EINVAL;
+ ret = -ENOENT;
goto out_put_task;
}
@@ -665,6 +734,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
put_page(page);
umem->odp_data->page_list[idx] = NULL;
umem->odp_data->dma_list[idx] = 0;
+ umem->npages--;
}
}
mutex_unlock(&umem->odp_data->umem_mutex);
diff --git a/drivers/infiniband/core/umem_rbtree.c b/drivers/infiniband/core/umem_rbtree.c
index 727d788448f5..d176597b4d78 100644
--- a/drivers/infiniband/core/umem_rbtree.c
+++ b/drivers/infiniband/core/umem_rbtree.c
@@ -78,17 +78,32 @@ int rbt_ib_umem_for_each_in_range(struct rb_root *root,
void *cookie)
{
int ret_val = 0;
- struct umem_odp_node *node;
+ struct umem_odp_node *node, *next;
struct ib_umem_odp *umem;
if (unlikely(start == last))
return ret_val;
- for (node = rbt_ib_umem_iter_first(root, start, last - 1); node;
- node = rbt_ib_umem_iter_next(node, start, last - 1)) {
+ for (node = rbt_ib_umem_iter_first(root, start, last - 1);
+ node; node = next) {
+ next = rbt_ib_umem_iter_next(node, start, last - 1);
umem = container_of(node, struct ib_umem_odp, interval_tree);
ret_val = cb(umem->umem, start, last, cookie) || ret_val;
}
return ret_val;
}
+EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range);
+
+struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root *root,
+ u64 addr, u64 length)
+{
+ struct umem_odp_node *node;
+
+ node = rbt_ib_umem_iter_first(root, addr, addr + length - 1);
+ if (node)
+ return container_of(node, struct ib_umem_odp, interval_tree);
+ return NULL;
+
+}
+EXPORT_SYMBOL(rbt_ib_umem_lookup);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 455034ac994e..e1bedf0bac04 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -228,6 +228,7 @@ struct ib_uverbs_flow_spec {
struct ib_uverbs_flow_spec_ipv4 ipv4;
struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
struct ib_uverbs_flow_spec_ipv6 ipv6;
+ struct ib_uverbs_flow_spec_action_tag flow_tag;
};
};
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 700782203483..b4b395a054ac 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1891,7 +1891,8 @@ static int create_qp(struct ib_uverbs_file *file,
IB_QP_CREATE_CROSS_CHANNEL |
IB_QP_CREATE_MANAGED_SEND |
IB_QP_CREATE_MANAGED_RECV |
- IB_QP_CREATE_SCATTER_FCS)) {
+ IB_QP_CREATE_SCATTER_FCS |
+ IB_QP_CREATE_CVLAN_STRIPPING)) {
ret = -EINVAL;
goto err_put;
}
@@ -3143,6 +3144,25 @@ out_put:
return ret ? ret : in_len;
}
+static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
+{
+ ib_spec->type = kern_spec->type;
+ switch (ib_spec->type) {
+ case IB_FLOW_SPEC_ACTION_TAG:
+ if (kern_spec->flow_tag.size !=
+ sizeof(struct ib_uverbs_flow_spec_action_tag))
+ return -EINVAL;
+
+ ib_spec->flow_tag.size = sizeof(struct ib_flow_spec_action_tag);
+ ib_spec->flow_tag.tag_id = kern_spec->flow_tag.tag_id;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec)
{
/* Returns user space filter size, includes padding */
@@ -3167,8 +3187,8 @@ static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size,
return kern_filter_size;
}
-static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
- union ib_flow_spec *ib_spec)
+static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
{
ssize_t actual_filter_sz;
ssize_t kern_filter_sz;
@@ -3263,6 +3283,18 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
return 0;
}
+static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
+{
+ if (kern_spec->reserved)
+ return -EINVAL;
+
+ if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG)
+ return kern_spec_to_ib_spec_action(kern_spec, ib_spec);
+ else
+ return kern_spec_to_ib_spec_filter(kern_spec, ib_spec);
+}
+
int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
struct ib_udata *ucore,
@@ -3325,6 +3357,9 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
wq_init_attr.wq_context = file;
wq_init_attr.wq_type = cmd.wq_type;
wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
+ if (ucore->inlen >= (offsetof(typeof(cmd), create_flags) +
+ sizeof(cmd.create_flags)))
+ wq_init_attr.create_flags = cmd.create_flags;
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
wq = pd->device->create_wq(pd, &wq_init_attr, uhw);
@@ -3480,7 +3515,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
if (!cmd.attr_mask)
return -EINVAL;
- if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE))
+ if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
return -EINVAL;
wq = idr_read_wq(cmd.wq_handle, file->ucontext);
@@ -3489,6 +3524,10 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
wq_attr.curr_wq_state = cmd.curr_wq_state;
wq_attr.wq_state = cmd.wq_state;
+ if (cmd.attr_mask & IB_WQ_FLAGS) {
+ wq_attr.flags = cmd.flags;
+ wq_attr.flags_mask = cmd.flags_mask;
+ }
ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
put_wq_read(wq);
return ret;
@@ -4323,6 +4362,12 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
resp.max_wq_type_rq = attr.max_wq_type_rq;
resp.response_length += sizeof(resp.max_wq_type_rq);
+
+ if (ucore->outlen < resp.response_length + sizeof(resp.raw_packet_caps))
+ goto end;
+
+ resp.raw_packet_caps = attr.raw_packet_caps;
+ resp.response_length += sizeof(resp.raw_packet_caps);
end:
err = ib_copy_to_udata(ucore, &resp, resp.response_length);
return err;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 6262dc035f3c..48649f93258a 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1133,7 +1133,7 @@ static int iwch_query_port(struct ib_device *ibdev,
dev = to_iwch_dev(ibdev);
netdev = dev->rdev.port_info.lldevs[port-1];
- memset(props, 0, sizeof(struct ib_port_attr));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->max_mtu = IB_MTU_4096;
props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
@@ -1329,13 +1329,14 @@ static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
- err = iwch_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
return 0;
}
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 3345e1c312f7..bdf7de571d83 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -370,8 +370,7 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port,
dev = to_c4iw_dev(ibdev);
netdev = dev->rdev.lldi.ports[port-1];
-
- memset(props, 0, sizeof(struct ib_port_attr));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->max_mtu = IB_MTU_4096;
props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
@@ -508,13 +507,14 @@ static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
- err = c4iw_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
return 0;
}
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 5ba4c0dec348..33f00f0719c5 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1302,6 +1302,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
u16 lid = ppd->lid;
+ /* props being zeroed by the caller, avoid zeroing it here */
props->lid = lid ? lid : 0;
props->lmc = ppd->lmc;
/* OPA logical states match IB logical states */
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index cf14679664ca..6843409fba29 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -250,7 +250,7 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num,
assert(port_num > 0);
port = port_num - 1;
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->max_mtu = hr_dev->caps.max_mtu;
props->gid_tbl_len = hr_dev->caps.gid_table_len[port];
@@ -401,14 +401,15 @@ static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
struct ib_port_attr attr;
int ret;
- ret = hns_roce_query_port(ib_dev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+
+ ret = ib_query_port(ib_dev, port_num, &attr);
if (ret)
return ret;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 4c000d60d5c6..5f695bf232a8 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -97,8 +97,7 @@ static int i40iw_query_port(struct ib_device *ibdev,
struct i40iw_device *iwdev = to_iwdev(ibdev);
struct net_device *netdev = iwdev->netdev;
- memset(props, 0, sizeof(*props));
-
+ /* props being zeroed by the caller, avoid zeroing it here */
props->max_mtu = IB_MTU_4096;
props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
@@ -2497,14 +2496,15 @@ static int i40iw_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
- err = i40iw_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
return 0;
}
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
index 06020c54db20..ea24230ea0d4 100644
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -499,6 +499,7 @@ static int set_guid_rec(struct ib_device *ibdev,
struct list_head *head =
&dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
+ memset(&attr, 0, sizeof(attr));
err = __mlx4_ib_query_port(ibdev, port, &attr, 1);
if (err) {
pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n",
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 7031a8dd4d14..211cbbe9ccd1 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -678,7 +678,7 @@ static u8 state_to_phys_state(enum ib_port_state state)
}
static int eth_link_query_port(struct ib_device *ibdev, u8 port,
- struct ib_port_attr *props, int netw_view)
+ struct ib_port_attr *props)
{
struct mlx4_ib_dev *mdev = to_mdev(ibdev);
@@ -741,11 +741,11 @@ int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
{
int err;
- memset(props, 0, sizeof *props);
+ /* props being zeroed by the caller, avoid zeroing it here */
err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
ib_link_query_port(ibdev, port, props, netw_view) :
- eth_link_query_port(ibdev, port, props, netw_view);
+ eth_link_query_port(ibdev, port, props);
return err;
}
@@ -1014,7 +1014,7 @@ static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
mutex_lock(&mdev->cap_mask_mutex);
- err = mlx4_ib_query_port(ibdev, port, &attr);
+ err = ib_query_port(ibdev, port, &attr);
if (err)
goto out;
@@ -2537,24 +2537,27 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
struct mlx4_ib_dev *mdev = to_mdev(ibdev);
int err;
- err = mlx4_ib_query_port(ibdev, port_num, &attr);
- if (err)
- return err;
-
- immutable->pkey_tbl_len = attr.pkey_tbl_len;
- immutable->gid_tbl_len = attr.gid_tbl_len;
-
if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
} else {
if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ immutable->core_cap_flags |= RDMA_CORE_PORT_RAW_PACKET;
+ if (immutable->core_cap_flags & (RDMA_CORE_PORT_IBA_ROCE |
+ RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP))
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
}
- immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
return 0;
}
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 7d76f769233c..c34eebc7db65 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -2420,11 +2420,31 @@ static u8 sl_to_vl(struct mlx4_ib_dev *dev, u8 sl, int port_num)
return vl;
}
+static int fill_gid_by_hw_index(struct mlx4_ib_dev *ibdev, u8 port_num,
+ int index, union ib_gid *gid,
+ enum ib_gid_type *gid_type)
+{
+ struct mlx4_ib_iboe *iboe = &ibdev->iboe;
+ struct mlx4_port_gid_table *port_gid_table;
+ unsigned long flags;
+
+ port_gid_table = &iboe->gids[port_num - 1];
+ spin_lock_irqsave(&iboe->lock, flags);
+ memcpy(gid, &port_gid_table->gids[index].gid, sizeof(*gid));
+ *gid_type = port_gid_table->gids[index].gid_type;
+ spin_unlock_irqrestore(&iboe->lock, flags);
+ if (!memcmp(gid, &zgid, sizeof(*gid)))
+ return -ENOENT;
+
+ return 0;
+}
+
#define MLX4_ROCEV2_QP1_SPORT 0xC000
static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
struct ib_device *ib_dev = sqp->qp.ibqp.device;
+ struct mlx4_ib_dev *ibdev = to_mdev(ib_dev);
struct mlx4_wqe_mlx_seg *mlx = wqe;
struct mlx4_wqe_ctrl_seg *ctrl = wqe;
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
@@ -2450,8 +2470,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
is_grh = mlx4_ib_ah_grh_present(ah);
if (is_eth) {
- struct ib_gid_attr gid_attr;
-
+ enum ib_gid_type gid_type;
if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
/* When multi-function is enabled, the ib_core gid
* indexes don't necessarily match the hw ones, so
@@ -2462,18 +2481,11 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
if (err)
return err;
} else {
- err = ib_get_cached_gid(ib_dev,
- be32_to_cpu(ah->av.ib.port_pd) >> 24,
- ah->av.ib.gid_index, &sgid,
- &gid_attr);
- if (!err) {
- if (gid_attr.ndev)
- dev_put(gid_attr.ndev);
- if (!memcmp(&sgid, &zgid, sizeof(sgid)))
- err = -ENOENT;
- }
+ err = fill_gid_by_hw_index(ibdev, sqp->qp.port,
+ ah->av.ib.gid_index,
+ &sgid, &gid_type);
if (!err) {
- is_udp = gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
+ is_udp = gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
if (is_udp) {
if (ipv6_addr_v4mapped((struct in6_addr *)&sgid))
ip_version = 4;
@@ -2951,21 +2963,17 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
if (sqp->roce_v2_gsi) {
struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah);
- struct ib_gid_attr gid_attr;
+ enum ib_gid_type gid_type;
union ib_gid gid;
- if (!ib_get_cached_gid(ibqp->device,
- be32_to_cpu(ah->av.ib.port_pd) >> 24,
- ah->av.ib.gid_index, &gid,
- &gid_attr)) {
- if (gid_attr.ndev)
- dev_put(gid_attr.ndev);
- qp = (gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
- to_mqp(sqp->roce_v2_gsi) : qp;
- } else {
+ if (!fill_gid_by_hw_index(mdev, sqp->qp.port,
+ ah->av.ib.gid_index,
+ &gid, &gid_type))
+ qp = (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
+ to_mqp(sqp->roce_v2_gsi) : qp;
+ else
pr_err("Failed to get gid at index %d. RoCEv2 will not work properly\n",
ah->av.ib.gid_index);
- }
}
}
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index 69fb5ba94d0f..0ba5ba7540c8 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -226,6 +226,7 @@ static int add_port_entries(struct mlx4_ib_dev *device, int port_num)
int ret = 0 ;
struct ib_port_attr attr;
+ memset(&attr, 0, sizeof(attr));
/* get the physical gid and pkey table sizes.*/
ret = __mlx4_ib_query_port(&device->ib_dev, port_num, &attr, 1);
if (ret)
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 7493a83acd28..90ad2adc752f 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -1,4 +1,4 @@
obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
-mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o
+mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
new file mode 100644
index 000000000000..cdc2d3017da7
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "cmd.h"
+
+int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey)
+{
+ u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {};
+ int err;
+
+ MLX5_SET(query_special_contexts_in, in, opcode,
+ MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *null_mkey = MLX5_GET(query_special_contexts_out, out,
+ null_mkey);
+ return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
new file mode 100644
index 000000000000..7ca8a7b6434d
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_IB_CMD_H
+#define MLX5_IB_CMD_H
+
+#include <linux/kernel.h>
+#include <linux/mlx5/driver.h>
+
+int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
+#endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 39e58489dcc2..8dacb49eabd9 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -42,12 +42,24 @@ enum {
MLX5_IB_VENDOR_CLASS2 = 0xa
};
+static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num,
+ struct ib_mad *in_mad)
+{
+ if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED &&
+ in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ return true;
+ return dev->mdev->port_caps[port_num - 1].has_smi;
+}
+
int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const void *in_mad, void *response_mad)
{
u8 op_modifier = 0;
+ if (!can_do_mad_ifc(dev, port, (struct ib_mad *)in_mad))
+ return -EPERM;
+
/* Key check traps can't be generated unless we have in_wc to
* tell us where to send the trap.
*/
@@ -515,7 +527,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
if (!in_mad || !out_mad)
goto out;
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 9d8535385bb8..6a8498c052a5 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -65,10 +65,6 @@ MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRIVER_VERSION);
-static int deprecated_prof_sel = 2;
-module_param_named(prof_sel, deprecated_prof_sel, int, 0444);
-MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
-
static char mlx5_version[] =
DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
@@ -175,7 +171,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
enum ib_mtu ndev_ib_mtu;
u16 qkey_viol_cntr;
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->port_cap_flags |= IB_PORT_CM_SUP;
props->port_cap_flags |= IB_PORT_IP_BASED_GIDS;
@@ -326,6 +322,27 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
}
+int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
+ int index, enum ib_gid_type *gid_type)
+{
+ struct ib_gid_attr attr;
+ union ib_gid gid;
+ int ret;
+
+ ret = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr);
+ if (ret)
+ return ret;
+
+ if (!attr.ndev)
+ return -ENODEV;
+
+ dev_put(attr.ndev);
+
+ *gid_type = attr.gid_type;
+
+ return 0;
+}
+
static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
@@ -565,8 +582,15 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) {
- if (MLX5_CAP_ETH(mdev, csum_cap))
+ if (MLX5_CAP_ETH(mdev, csum_cap)) {
+ /* Legacy bit to support old userspace libraries */
props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
+ props->raw_packet_caps |= IB_RAW_PACKET_CAP_IP_CSUM;
+ }
+
+ if (MLX5_CAP_ETH(dev->mdev, vlan_cap))
+ props->raw_packet_caps |=
+ IB_RAW_PACKET_CAP_CVLAN_STRIPPING;
if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
max_tso = MLX5_CAP_ETH(mdev, max_lso_cap);
@@ -605,8 +629,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
- MLX5_CAP_ETH(dev->mdev, scatter_fcs))
+ MLX5_CAP_ETH(dev->mdev, scatter_fcs)) {
+ /* Legacy bit to support old userspace libraries */
props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS;
+ props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
+ }
if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS))
props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
@@ -831,7 +858,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
goto out;
}
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep);
if (err)
@@ -969,6 +996,31 @@ static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
return err;
}
+static int set_port_caps_atomic(struct mlx5_ib_dev *dev, u8 port_num, u32 mask,
+ u32 value)
+{
+ struct mlx5_hca_vport_context ctx = {};
+ int err;
+
+ err = mlx5_query_hca_vport_context(dev->mdev, 0,
+ port_num, 0, &ctx);
+ if (err)
+ return err;
+
+ if (~ctx.cap_mask1_perm & mask) {
+ mlx5_ib_warn(dev, "trying to change bitmask 0x%X but change supported 0x%X\n",
+ mask, ctx.cap_mask1_perm);
+ return -EINVAL;
+ }
+
+ ctx.cap_mask1 = value;
+ ctx.cap_mask1_perm = mask;
+ err = mlx5_core_modify_hca_vport_context(dev->mdev, 0,
+ port_num, 0, &ctx);
+
+ return err;
+}
+
static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
struct ib_port_modify *props)
{
@@ -976,10 +1028,20 @@ static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
struct ib_port_attr attr;
u32 tmp;
int err;
+ u32 change_mask;
+ u32 value;
+ bool is_ib = (mlx5_ib_port_link_layer(ibdev, port) ==
+ IB_LINK_LAYER_INFINIBAND);
+
+ if (MLX5_CAP_GEN(dev->mdev, ib_virt) && is_ib) {
+ change_mask = props->clr_port_cap_mask | props->set_port_cap_mask;
+ value = ~props->clr_port_cap_mask | props->set_port_cap_mask;
+ return set_port_caps_atomic(dev, port, change_mask, value);
+ }
mutex_lock(&dev->cap_mask_mutex);
- err = mlx5_ib_query_port(ibdev, port, &attr);
+ err = ib_query_port(ibdev, port, &attr);
if (err)
goto out;
@@ -1661,6 +1723,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
#define LAST_IPV6_FIELD traffic_class
#define LAST_TCP_UDP_FIELD src_port
#define LAST_TUNNEL_FIELD tunnel_id
+#define LAST_FLOW_TAG_FIELD tag_id
/* Field is the last supported field */
#define FIELDS_NOT_SUPPORTED(filter, field)\
@@ -1671,7 +1734,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
sizeof(filter.field))
static int parse_flow_attr(u32 *match_c, u32 *match_v,
- const union ib_flow_spec *ib_spec)
+ const union ib_flow_spec *ib_spec, u32 *tag_id)
{
void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
misc_parameters);
@@ -1695,7 +1758,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
case IB_FLOW_SPEC_ETH:
if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
dmac_47_16),
@@ -1743,7 +1806,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
break;
case IB_FLOW_SPEC_IPV4:
if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
ethertype, 0xffff);
@@ -1775,7 +1838,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
break;
case IB_FLOW_SPEC_IPV6:
if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
ethertype, 0xffff);
@@ -1816,7 +1879,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
case IB_FLOW_SPEC_TCP:
if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
LAST_TCP_UDP_FIELD))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
0xff);
@@ -1836,7 +1899,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
case IB_FLOW_SPEC_UDP:
if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
LAST_TCP_UDP_FIELD))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
0xff);
@@ -1856,13 +1919,22 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
case IB_FLOW_SPEC_VXLAN_TUNNEL:
if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
LAST_TUNNEL_FIELD))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
ntohl(ib_spec->tunnel.mask.tunnel_id));
MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
ntohl(ib_spec->tunnel.val.tunnel_id));
break;
+ case IB_FLOW_SPEC_ACTION_TAG:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
+ LAST_FLOW_TAG_FIELD))
+ return -EOPNOTSUPP;
+ if (ib_spec->flow_tag.tag_id >= BIT(24))
+ return -EINVAL;
+
+ *tag_id = ib_spec->flow_tag.tag_id;
+ break;
default:
return -EINVAL;
}
@@ -2046,6 +2118,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
struct mlx5_flow_spec *spec;
const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
unsigned int spec_index;
+ u32 flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
int err = 0;
if (!is_valid_attr(flow_attr))
@@ -2062,7 +2135,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
err = parse_flow_attr(spec->match_criteria,
- spec->match_value, ib_flow);
+ spec->match_value, ib_flow, &flow_tag);
if (err < 0)
goto free;
@@ -2072,7 +2145,16 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
- flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+
+ if (flow_tag != MLX5_FS_DEFAULT_FLOW_TAG &&
+ (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
+ mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
+ flow_tag, flow_attr->type);
+ err = -EINVAL;
+ goto free;
+ }
+ flow_act.flow_tag = flow_tag;
handler->rule = mlx5_add_flow_rules(ft, spec,
&flow_act,
dst, 1);
@@ -2542,6 +2624,35 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
ibdev->ib_active = false;
}
+static int set_has_smi_cap(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_hca_vport_context vport_ctx;
+ int err;
+ int port;
+
+ for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
+ dev->mdev->port_caps[port - 1].has_smi = false;
+ if (MLX5_CAP_GEN(dev->mdev, port_type) ==
+ MLX5_CAP_PORT_TYPE_IB) {
+ if (MLX5_CAP_GEN(dev->mdev, ib_virt)) {
+ err = mlx5_query_hca_vport_context(dev->mdev, 0,
+ port, 0,
+ &vport_ctx);
+ if (err) {
+ mlx5_ib_err(dev, "query_hca_vport_context for port=%d failed %d\n",
+ port, err);
+ return err;
+ }
+ dev->mdev->port_caps[port - 1].has_smi =
+ vport_ctx.has_smi;
+ } else {
+ dev->mdev->port_caps[port - 1].has_smi = true;
+ }
+ }
+ }
+ return 0;
+}
+
static void get_ext_port_caps(struct mlx5_ib_dev *dev)
{
int port;
@@ -2566,6 +2677,10 @@ static int get_port_caps(struct mlx5_ib_dev *dev)
if (!dprops)
goto out;
+ err = set_has_smi_cap(dev);
+ if (err)
+ goto out;
+
err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw);
if (err) {
mlx5_ib_warn(dev, "query_device failed %d\n", err);
@@ -2573,6 +2688,7 @@ static int get_port_caps(struct mlx5_ib_dev *dev)
}
for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
+ memset(pprops, 0, sizeof(*pprops));
err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
if (err) {
mlx5_ib_warn(dev, "query_port %d failed %d\n",
@@ -2867,11 +2983,13 @@ static u32 get_core_cap_flags(struct ib_device *ibdev)
if (ll == IB_LINK_LAYER_INFINIBAND)
return RDMA_CORE_PORT_IBA_IB;
+ ret = RDMA_CORE_PORT_RAW_PACKET;
+
if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
- return 0;
+ return ret;
if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
- return 0;
+ return ret;
if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
ret |= RDMA_CORE_PORT_IBA_ROCE;
@@ -2890,7 +3008,9 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
int err;
- err = mlx5_ib_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = get_core_cap_flags(ibdev);
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
@@ -3011,13 +3131,102 @@ static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
mlx5_nic_vport_disable_roce(dev->mdev);
}
+struct mlx5_ib_q_counter {
+ const char *name;
+ size_t offset;
+};
+
+#define INIT_Q_COUNTER(_name) \
+ { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
+
+static const struct mlx5_ib_q_counter basic_q_cnts[] = {
+ INIT_Q_COUNTER(rx_write_requests),
+ INIT_Q_COUNTER(rx_read_requests),
+ INIT_Q_COUNTER(rx_atomic_requests),
+ INIT_Q_COUNTER(out_of_buffer),
+};
+
+static const struct mlx5_ib_q_counter out_of_seq_q_cnts[] = {
+ INIT_Q_COUNTER(out_of_sequence),
+};
+
+static const struct mlx5_ib_q_counter retrans_q_cnts[] = {
+ INIT_Q_COUNTER(duplicate_request),
+ INIT_Q_COUNTER(rnr_nak_retry_err),
+ INIT_Q_COUNTER(packet_seq_err),
+ INIT_Q_COUNTER(implied_nak_seq_err),
+ INIT_Q_COUNTER(local_ack_timeout_err),
+};
+
static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
{
unsigned int i;
- for (i = 0; i < dev->num_ports; i++)
+ for (i = 0; i < dev->num_ports; i++) {
mlx5_core_dealloc_q_counter(dev->mdev,
- dev->port[i].q_cnt_id);
+ dev->port[i].q_cnts.set_id);
+ kfree(dev->port[i].q_cnts.names);
+ kfree(dev->port[i].q_cnts.offsets);
+ }
+}
+
+static int __mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev,
+ const char ***names,
+ size_t **offsets,
+ u32 *num)
+{
+ u32 num_counters;
+
+ num_counters = ARRAY_SIZE(basic_q_cnts);
+
+ if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
+ num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
+
+ if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
+ num_counters += ARRAY_SIZE(retrans_q_cnts);
+
+ *names = kcalloc(num_counters, sizeof(**names), GFP_KERNEL);
+ if (!*names)
+ return -ENOMEM;
+
+ *offsets = kcalloc(num_counters, sizeof(**offsets), GFP_KERNEL);
+ if (!*offsets)
+ goto err_names;
+
+ *num = num_counters;
+
+ return 0;
+
+err_names:
+ kfree(*names);
+ return -ENOMEM;
+}
+
+static void mlx5_ib_fill_q_counters(struct mlx5_ib_dev *dev,
+ const char **names,
+ size_t *offsets)
+{
+ int i;
+ int j = 0;
+
+ for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
+ names[j] = basic_q_cnts[i].name;
+ offsets[j] = basic_q_cnts[i].offset;
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
+ for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
+ names[j] = out_of_seq_q_cnts[i].name;
+ offsets[j] = out_of_seq_q_cnts[i].offset;
+ }
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
+ for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
+ names[j] = retrans_q_cnts[i].name;
+ offsets[j] = retrans_q_cnts[i].offset;
+ }
+ }
}
static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
@@ -3026,14 +3235,26 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
int ret;
for (i = 0; i < dev->num_ports; i++) {
+ struct mlx5_ib_port *port = &dev->port[i];
+
ret = mlx5_core_alloc_q_counter(dev->mdev,
- &dev->port[i].q_cnt_id);
+ &port->q_cnts.set_id);
if (ret) {
mlx5_ib_warn(dev,
"couldn't allocate queue counter for port %d, err %d\n",
i + 1, ret);
goto dealloc_counters;
}
+
+ ret = __mlx5_ib_alloc_q_counters(dev,
+ &port->q_cnts.names,
+ &port->q_cnts.offsets,
+ &port->q_cnts.num_counters);
+ if (ret)
+ goto dealloc_counters;
+
+ mlx5_ib_fill_q_counters(dev, port->q_cnts.names,
+ port->q_cnts.offsets);
}
return 0;
@@ -3041,62 +3262,39 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
dealloc_counters:
while (--i >= 0)
mlx5_core_dealloc_q_counter(dev->mdev,
- dev->port[i].q_cnt_id);
+ dev->port[i].q_cnts.set_id);
return ret;
}
-static const char * const names[] = {
- "rx_write_requests",
- "rx_read_requests",
- "rx_atomic_requests",
- "out_of_buffer",
- "out_of_sequence",
- "duplicate_request",
- "rnr_nak_retry_err",
- "packet_seq_err",
- "implied_nak_seq_err",
- "local_ack_timeout_err",
-};
-
-static const size_t stats_offsets[] = {
- MLX5_BYTE_OFF(query_q_counter_out, rx_write_requests),
- MLX5_BYTE_OFF(query_q_counter_out, rx_read_requests),
- MLX5_BYTE_OFF(query_q_counter_out, rx_atomic_requests),
- MLX5_BYTE_OFF(query_q_counter_out, out_of_buffer),
- MLX5_BYTE_OFF(query_q_counter_out, out_of_sequence),
- MLX5_BYTE_OFF(query_q_counter_out, duplicate_request),
- MLX5_BYTE_OFF(query_q_counter_out, rnr_nak_retry_err),
- MLX5_BYTE_OFF(query_q_counter_out, packet_seq_err),
- MLX5_BYTE_OFF(query_q_counter_out, implied_nak_seq_err),
- MLX5_BYTE_OFF(query_q_counter_out, local_ack_timeout_err),
-};
-
static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
u8 port_num)
{
- BUILD_BUG_ON(ARRAY_SIZE(names) != ARRAY_SIZE(stats_offsets));
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_ib_port *port = &dev->port[port_num - 1];
/* We support only per port stats */
if (port_num == 0)
return NULL;
- return rdma_alloc_hw_stats_struct(names, ARRAY_SIZE(names),
+ return rdma_alloc_hw_stats_struct(port->q_cnts.names,
+ port->q_cnts.num_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
- u8 port, int index)
+ u8 port_num, int index)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_ib_port *port = &dev->port[port_num - 1];
int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
void *out;
__be32 val;
int ret;
int i;
- if (!port || !stats)
+ if (!stats)
return -ENOSYS;
out = mlx5_vzalloc(outlen);
@@ -3104,18 +3302,19 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
return -ENOMEM;
ret = mlx5_core_query_q_counter(dev->mdev,
- dev->port[port - 1].q_cnt_id, 0,
+ port->q_cnts.set_id, 0,
out, outlen);
if (ret)
goto free;
- for (i = 0; i < ARRAY_SIZE(names); i++) {
- val = *(__be32 *)(out + stats_offsets[i]);
+ for (i = 0; i < port->q_cnts.num_counters; i++) {
+ val = *(__be32 *)(out + port->q_cnts.offsets[i]);
stats->value[i] = (u64)be32_to_cpu(val);
}
+
free:
kvfree(out);
- return ARRAY_SIZE(names);
+ return port->q_cnts.num_counters;
}
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
@@ -3267,8 +3466,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
(1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
}
- if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
- MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
+ if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats;
}
@@ -3322,9 +3520,11 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
if (err)
goto err_rsrc;
- err = mlx5_ib_alloc_q_counters(dev);
- if (err)
- goto err_odp;
+ if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
+ err = mlx5_ib_alloc_q_counters(dev);
+ if (err)
+ goto err_odp;
+ }
dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
if (!dev->mdev->priv.uar)
@@ -3373,7 +3573,8 @@ err_uar_page:
mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
err_q_cnt:
- mlx5_ib_dealloc_q_counters(dev);
+ if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
+ mlx5_ib_dealloc_q_counters(dev);
err_odp:
mlx5_ib_odp_remove_one(dev);
@@ -3406,7 +3607,8 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
mlx5_free_bfreg(dev->mdev, &dev->bfreg);
mlx5_put_uars_page(dev->mdev, mdev->priv.uar);
- mlx5_ib_dealloc_q_counters(dev);
+ if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
+ mlx5_ib_dealloc_q_counters(dev);
destroy_umrc_res(dev);
mlx5_ib_odp_remove_one(dev);
destroy_dev_resources(&dev->devr);
@@ -3430,8 +3632,7 @@ static int __init mlx5_ib_init(void)
{
int err;
- if (deprecated_prof_sel != 2)
- pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
+ mlx5_ib_odp_init();
err = mlx5_register_interface(&mlx5_ib_interface);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index e1a4b93dce6b..3cd064b5f0bf 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -202,6 +202,7 @@ struct mlx5_ib_flow_db {
#define MLX5_IB_UPD_XLT_ADDR BIT(3)
#define MLX5_IB_UPD_XLT_PD BIT(4)
#define MLX5_IB_UPD_XLT_ACCESS BIT(5)
+#define MLX5_IB_UPD_XLT_INDIRECT BIT(6)
/* Private QP creation flags to be passed in ib_qp_init_attr.create_flags.
*
@@ -220,6 +221,10 @@ struct wr_list {
u16 next;
};
+enum mlx5_ib_rq_flags {
+ MLX5_IB_RQ_CVLAN_STRIPPING = 1 << 0,
+};
+
struct mlx5_ib_wq {
u64 *wrid;
u32 *wr_data;
@@ -308,6 +313,7 @@ struct mlx5_ib_rq {
struct mlx5_db *doorbell;
u32 tirn;
u8 state;
+ u32 flags;
};
struct mlx5_ib_sq {
@@ -392,6 +398,7 @@ enum mlx5_ib_qp_flags {
MLX5_IB_QP_SQPN_QP1 = 1 << 6,
MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7,
MLX5_IB_QP_RSS = 1 << 8,
+ MLX5_IB_QP_CVLAN_STRIPPING = 1 << 9,
};
struct mlx5_umr_wr {
@@ -497,6 +504,10 @@ struct mlx5_ib_mr {
int live;
void *descs_alloc;
int access_flags; /* Needed for rereg MR */
+
+ struct mlx5_ib_mr *parent;
+ atomic_t num_leaf_free;
+ wait_queue_head_t q_leaf_free;
};
struct mlx5_ib_mw {
@@ -535,6 +546,10 @@ struct mlx5_cache_ent {
struct dentry *dir;
char name[4];
u32 order;
+ u32 xlt;
+ u32 access_mode;
+ u32 page;
+
u32 size;
u32 cur;
u32 miss;
@@ -549,6 +564,7 @@ struct mlx5_cache_ent {
struct work_struct work;
struct delayed_work dwork;
int pending;
+ struct completion compl;
};
struct mlx5_mr_cache {
@@ -579,8 +595,15 @@ struct mlx5_ib_resources {
struct mutex mutex;
};
+struct mlx5_ib_q_counters {
+ const char **names;
+ size_t *offsets;
+ u32 num_counters;
+ u16 set_id;
+};
+
struct mlx5_ib_port {
- u16 q_cnt_id;
+ struct mlx5_ib_q_counters q_cnts;
};
struct mlx5_roce {
@@ -619,6 +642,7 @@ struct mlx5_ib_dev {
* being used by a page fault handler.
*/
struct srcu_struct mr_srcu;
+ u32 null_mkey;
#endif
struct mlx5_ib_flow_db flow_db;
/* protect resources needed as part of reset flow */
@@ -771,6 +795,9 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
int mlx5_ib_dealloc_mw(struct ib_mw *mw);
int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int page_shift, int flags);
+struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
+ int access_flags);
+void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
u64 length, u64 virt_addr, int access_flags,
struct ib_pd *pd, struct ib_udata *udata);
@@ -824,7 +851,9 @@ void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
-int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
+
+struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry);
+void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
struct ib_mr_status *mr_status);
struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
@@ -848,6 +877,9 @@ int __init mlx5_ib_odp_init(void);
void mlx5_ib_odp_cleanup(void);
void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
unsigned long end);
+void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent);
+void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
+ size_t nentries, struct mlx5_ib_mr *mr, int flags);
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
{
@@ -855,9 +887,13 @@ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
}
static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
-static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {}
+static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {}
static inline int mlx5_ib_odp_init(void) { return 0; }
-static inline void mlx5_ib_odp_cleanup(void) {}
+static inline void mlx5_ib_odp_cleanup(void) {}
+static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {}
+static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
+ size_t nentries, struct mlx5_ib_mr *mr,
+ int flags) {}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
@@ -872,6 +908,8 @@ int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
int index);
+int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
+ int index, enum ib_gid_type *gid_type);
/* GSI QP helper functions */
struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 8cf2a67f9fb0..3c1f483d003f 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -49,6 +49,7 @@ enum {
static int clean_mr(struct mlx5_ib_mr *mr);
static int use_umr(struct mlx5_ib_dev *dev, int order);
+static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
@@ -149,6 +150,9 @@ static void reg_mr_callback(int status, void *context)
if (err)
pr_err("Error inserting to mkey tree. 0x%x\n", -err);
write_unlock_irqrestore(&table->lock, flags);
+
+ if (!completion_done(&ent->compl))
+ complete(&ent->compl);
}
static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
@@ -157,7 +161,6 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
struct mlx5_cache_ent *ent = &cache->ent[c];
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
struct mlx5_ib_mr *mr;
- int npages = 1 << ent->order;
void *mkc;
u32 *in;
int err = 0;
@@ -185,11 +188,11 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
MLX5_SET(mkc, mkc, free, 1);
MLX5_SET(mkc, mkc, umr_en, 1);
- MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
+ MLX5_SET(mkc, mkc, access_mode, ent->access_mode);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2);
- MLX5_SET(mkc, mkc, log_page_size, 12);
+ MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
+ MLX5_SET(mkc, mkc, log_page_size, ent->page);
spin_lock_irq(&ent->lock);
ent->pending++;
@@ -447,6 +450,42 @@ static void cache_work_func(struct work_struct *work)
__cache_work_func(ent);
}
+struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
+{
+ struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_cache_ent *ent;
+ struct mlx5_ib_mr *mr;
+ int err;
+
+ if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) {
+ mlx5_ib_err(dev, "cache entry %d is out of range\n", entry);
+ return NULL;
+ }
+
+ ent = &cache->ent[entry];
+ while (1) {
+ spin_lock_irq(&ent->lock);
+ if (list_empty(&ent->head)) {
+ spin_unlock_irq(&ent->lock);
+
+ err = add_keys(dev, entry, 1);
+ if (err && err != -EAGAIN)
+ return ERR_PTR(err);
+
+ wait_for_completion(&ent->compl);
+ } else {
+ mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
+ list);
+ list_del(&mr->list);
+ ent->cur--;
+ spin_unlock_irq(&ent->lock);
+ if (ent->cur < ent->limit)
+ queue_work(cache->wq, &ent->work);
+ return mr;
+ }
+ }
+}
+
static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
{
struct mlx5_mr_cache *cache = &dev->cache;
@@ -456,12 +495,12 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
int i;
c = order2idx(dev, order);
- if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
+ if (c < 0 || c > MAX_UMR_CACHE_ENTRY) {
mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
return NULL;
}
- for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
+ for (i = c; i < MAX_UMR_CACHE_ENTRY; i++) {
ent = &cache->ent[i];
mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
@@ -488,7 +527,7 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
return mr;
}
-static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
@@ -500,6 +539,10 @@ static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
return;
}
+
+ if (unreg_umr(dev, mr))
+ return;
+
ent = &cache->ent[c];
spin_lock_irq(&ent->lock);
list_add_tail(&mr->list, &ent->head);
@@ -602,7 +645,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
- int limit;
int err;
int i;
@@ -615,26 +657,35 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
- INIT_LIST_HEAD(&cache->ent[i].head);
- spin_lock_init(&cache->ent[i].lock);
-
ent = &cache->ent[i];
INIT_LIST_HEAD(&ent->head);
spin_lock_init(&ent->lock);
ent->order = i + 2;
ent->dev = dev;
+ ent->limit = 0;
- if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
- mlx5_core_is_pf(dev->mdev) &&
- use_umr(dev, ent->order))
- limit = dev->mdev->profile->mr_cache[i].limit;
- else
- limit = 0;
-
+ init_completion(&ent->compl);
INIT_WORK(&ent->work, cache_work_func);
INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
- ent->limit = limit;
queue_work(cache->wq, &ent->work);
+
+ if (i > MAX_UMR_CACHE_ENTRY) {
+ mlx5_odp_init_mr_cache_entry(ent);
+ continue;
+ }
+
+ if (!use_umr(dev, ent->order))
+ continue;
+
+ ent->page = PAGE_SHIFT;
+ ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) /
+ MLX5_IB_UMR_OCTOWORD;
+ ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
+ if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
+ mlx5_core_is_pf(dev->mdev))
+ ent->limit = dev->mdev->profile->mr_cache[i].limit;
+ else
+ ent->limit = 0;
}
err = mlx5_mr_cache_debugfs_init(dev);
@@ -758,7 +809,7 @@ static int get_octo_len(u64 addr, u64 len, int page_size)
static int use_umr(struct mlx5_ib_dev *dev, int order)
{
if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
- return order < MAX_MR_CACHE_ENTRIES + 2;
+ return order <= MAX_UMR_CACHE_ENTRY + 2;
return order <= MLX5_MAX_UMR_SHIFT;
}
@@ -871,7 +922,7 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
MLX5_IB_UPD_XLT_ENABLE);
if (err) {
- free_cached_mr(dev, mr);
+ mlx5_mr_cache_free(dev, mr);
return ERR_PTR(err);
}
@@ -886,6 +937,10 @@ static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages,
{
struct mlx5_ib_dev *dev = mr->dev;
struct ib_umem *umem = mr->umem;
+ if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
+ mlx5_odp_populate_klm(xlt, idx, npages, mr, flags);
+ return npages;
+ }
npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx);
@@ -919,7 +974,9 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
struct mlx5_umr_wr wr;
struct ib_sge sg;
int err = 0;
- int desc_size = sizeof(struct mlx5_mtt);
+ int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
+ ? sizeof(struct mlx5_klm)
+ : sizeof(struct mlx5_mtt);
const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
const int page_mask = page_align - 1;
size_t pages_mapped = 0;
@@ -1091,6 +1148,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
goto err_2;
}
mr->mmkey.type = MLX5_MKEY_MR;
+ mr->desc_size = sizeof(struct mlx5_mtt);
mr->umem = umem;
mr->dev = dev;
mr->live = 1;
@@ -1136,6 +1194,18 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
start, virt_addr, length, access_flags);
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ if (!start && length == U64_MAX) {
+ if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
+ !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
+ return ERR_PTR(-EINVAL);
+
+ mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags);
+ return &mr->ibmr;
+ }
+#endif
+
err = mr_umem_get(pd, start, length, access_flags, &umem, &npages,
&page_shift, &ncont, &order);
@@ -1398,12 +1468,7 @@ static int clean_mr(struct mlx5_ib_mr *mr)
return err;
}
} else {
- err = unreg_umr(dev, mr);
- if (err) {
- mlx5_ib_warn(dev, "failed unregister\n");
- return err;
- }
- free_cached_mr(dev, mr);
+ mlx5_mr_cache_free(dev, mr);
}
if (!umred)
@@ -1426,8 +1491,11 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
/* Wait for all running page-fault handlers to finish. */
synchronize_srcu(&dev->mr_srcu);
/* Destroy all page mappings */
- mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
- ib_umem_end(umem));
+ if (umem->odp_data->page_list)
+ mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
+ ib_umem_end(umem));
+ else
+ mlx5_ib_free_implicit_mr(mr);
/*
* We kill the umem before the MR for ODP,
* so that there will not be any invalidations in
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index e5bc267aca73..d7b12f0750e2 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -34,6 +34,7 @@
#include <rdma/ib_umem_odp.h>
#include "mlx5_ib.h"
+#include "cmd.h"
#define MAX_PREFETCH_LEN (4*1024*1024U)
@@ -41,6 +42,140 @@
* a pagefault. */
#define MMU_NOTIFIER_TIMEOUT 1000
+#define MLX5_IMR_MTT_BITS (30 - PAGE_SHIFT)
+#define MLX5_IMR_MTT_SHIFT (MLX5_IMR_MTT_BITS + PAGE_SHIFT)
+#define MLX5_IMR_MTT_ENTRIES BIT_ULL(MLX5_IMR_MTT_BITS)
+#define MLX5_IMR_MTT_SIZE BIT_ULL(MLX5_IMR_MTT_SHIFT)
+#define MLX5_IMR_MTT_MASK (~(MLX5_IMR_MTT_SIZE - 1))
+
+#define MLX5_KSM_PAGE_SHIFT MLX5_IMR_MTT_SHIFT
+
+static u64 mlx5_imr_ksm_entries;
+
+static int check_parent(struct ib_umem_odp *odp,
+ struct mlx5_ib_mr *parent)
+{
+ struct mlx5_ib_mr *mr = odp->private;
+
+ return mr && mr->parent == parent;
+}
+
+static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp)
+{
+ struct mlx5_ib_mr *mr = odp->private, *parent = mr->parent;
+ struct ib_ucontext *ctx = odp->umem->context;
+ struct rb_node *rb;
+
+ down_read(&ctx->umem_rwsem);
+ while (1) {
+ rb = rb_next(&odp->interval_tree.rb);
+ if (!rb)
+ goto not_found;
+ odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
+ if (check_parent(odp, parent))
+ goto end;
+ }
+not_found:
+ odp = NULL;
+end:
+ up_read(&ctx->umem_rwsem);
+ return odp;
+}
+
+static struct ib_umem_odp *odp_lookup(struct ib_ucontext *ctx,
+ u64 start, u64 length,
+ struct mlx5_ib_mr *parent)
+{
+ struct ib_umem_odp *odp;
+ struct rb_node *rb;
+
+ down_read(&ctx->umem_rwsem);
+ odp = rbt_ib_umem_lookup(&ctx->umem_tree, start, length);
+ if (!odp)
+ goto end;
+
+ while (1) {
+ if (check_parent(odp, parent))
+ goto end;
+ rb = rb_next(&odp->interval_tree.rb);
+ if (!rb)
+ goto not_found;
+ odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
+ if (ib_umem_start(odp->umem) > start + length)
+ goto not_found;
+ }
+not_found:
+ odp = NULL;
+end:
+ up_read(&ctx->umem_rwsem);
+ return odp;
+}
+
+void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
+ size_t nentries, struct mlx5_ib_mr *mr, int flags)
+{
+ struct ib_pd *pd = mr->ibmr.pd;
+ struct ib_ucontext *ctx = pd->uobject->context;
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct ib_umem_odp *odp;
+ unsigned long va;
+ int i;
+
+ if (flags & MLX5_IB_UPD_XLT_ZAP) {
+ for (i = 0; i < nentries; i++, pklm++) {
+ pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
+ pklm->key = cpu_to_be32(dev->null_mkey);
+ pklm->va = 0;
+ }
+ return;
+ }
+
+ odp = odp_lookup(ctx, offset * MLX5_IMR_MTT_SIZE,
+ nentries * MLX5_IMR_MTT_SIZE, mr);
+
+ for (i = 0; i < nentries; i++, pklm++) {
+ pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
+ va = (offset + i) * MLX5_IMR_MTT_SIZE;
+ if (odp && odp->umem->address == va) {
+ struct mlx5_ib_mr *mtt = odp->private;
+
+ pklm->key = cpu_to_be32(mtt->ibmr.lkey);
+ odp = odp_next(odp);
+ } else {
+ pklm->key = cpu_to_be32(dev->null_mkey);
+ }
+ mlx5_ib_dbg(dev, "[%d] va %lx key %x\n",
+ i, va, be32_to_cpu(pklm->key));
+ }
+}
+
+static void mr_leaf_free_action(struct work_struct *work)
+{
+ struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work);
+ int idx = ib_umem_start(odp->umem) >> MLX5_IMR_MTT_SHIFT;
+ struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent;
+
+ mr->parent = NULL;
+ synchronize_srcu(&mr->dev->mr_srcu);
+
+ if (!READ_ONCE(odp->dying)) {
+ mr->parent = imr;
+ if (atomic_dec_and_test(&imr->num_leaf_free))
+ wake_up(&imr->q_leaf_free);
+ return;
+ }
+
+ ib_umem_release(odp->umem);
+ if (imr->live)
+ mlx5_ib_update_xlt(imr, idx, 1, 0,
+ MLX5_IB_UPD_XLT_INDIRECT |
+ MLX5_IB_UPD_XLT_ATOMIC);
+ mlx5_mr_cache_free(mr->dev, mr);
+
+ if (atomic_dec_and_test(&imr->num_leaf_free))
+ wake_up(&imr->q_leaf_free);
+}
+
void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
unsigned long end)
{
@@ -111,6 +246,13 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
*/
ib_umem_odp_unmap_dma_pages(umem, start, end);
+
+ if (unlikely(!umem->npages && mr->parent &&
+ !umem->odp_data->dying)) {
+ WRITE_ONCE(umem->odp_data->dying, 1);
+ atomic_inc(&mr->parent->num_leaf_free);
+ schedule_work(&umem->odp_data->work);
+ }
}
void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
@@ -147,6 +289,11 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.atomic))
caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC;
+ if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) &&
+ MLX5_CAP_GEN(dev->mdev, null_mkey) &&
+ MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
+ caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT;
+
return;
}
@@ -184,6 +331,197 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
wq_num);
}
+static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
+ struct ib_umem *umem,
+ bool ksm, int access_flags)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_ib_mr *mr;
+ int err;
+
+ mr = mlx5_mr_cache_alloc(dev, ksm ? MLX5_IMR_KSM_CACHE_ENTRY :
+ MLX5_IMR_MTT_CACHE_ENTRY);
+
+ if (IS_ERR(mr))
+ return mr;
+
+ mr->ibmr.pd = pd;
+
+ mr->dev = dev;
+ mr->access_flags = access_flags;
+ mr->mmkey.iova = 0;
+ mr->umem = umem;
+
+ if (ksm) {
+ err = mlx5_ib_update_xlt(mr, 0,
+ mlx5_imr_ksm_entries,
+ MLX5_KSM_PAGE_SHIFT,
+ MLX5_IB_UPD_XLT_INDIRECT |
+ MLX5_IB_UPD_XLT_ZAP |
+ MLX5_IB_UPD_XLT_ENABLE);
+
+ } else {
+ err = mlx5_ib_update_xlt(mr, 0,
+ MLX5_IMR_MTT_ENTRIES,
+ PAGE_SHIFT,
+ MLX5_IB_UPD_XLT_ZAP |
+ MLX5_IB_UPD_XLT_ENABLE |
+ MLX5_IB_UPD_XLT_ATOMIC);
+ }
+
+ if (err)
+ goto fail;
+
+ mr->ibmr.lkey = mr->mmkey.key;
+ mr->ibmr.rkey = mr->mmkey.key;
+
+ mr->live = 1;
+
+ mlx5_ib_dbg(dev, "key %x dev %p mr %p\n",
+ mr->mmkey.key, dev->mdev, mr);
+
+ return mr;
+
+fail:
+ mlx5_ib_err(dev, "Failed to register MKEY %d\n", err);
+ mlx5_mr_cache_free(dev, mr);
+
+ return ERR_PTR(err);
+}
+
+static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr,
+ u64 io_virt, size_t bcnt)
+{
+ struct ib_ucontext *ctx = mr->ibmr.pd->uobject->context;
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device);
+ struct ib_umem_odp *odp, *result = NULL;
+ u64 addr = io_virt & MLX5_IMR_MTT_MASK;
+ int nentries = 0, start_idx = 0, ret;
+ struct mlx5_ib_mr *mtt;
+ struct ib_umem *umem;
+
+ mutex_lock(&mr->umem->odp_data->umem_mutex);
+ odp = odp_lookup(ctx, addr, 1, mr);
+
+ mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n",
+ io_virt, bcnt, addr, odp);
+
+next_mr:
+ if (likely(odp)) {
+ if (nentries)
+ nentries++;
+ } else {
+ umem = ib_alloc_odp_umem(ctx, addr, MLX5_IMR_MTT_SIZE);
+ if (IS_ERR(umem)) {
+ mutex_unlock(&mr->umem->odp_data->umem_mutex);
+ return ERR_CAST(umem);
+ }
+
+ mtt = implicit_mr_alloc(mr->ibmr.pd, umem, 0, mr->access_flags);
+ if (IS_ERR(mtt)) {
+ mutex_unlock(&mr->umem->odp_data->umem_mutex);
+ ib_umem_release(umem);
+ return ERR_CAST(mtt);
+ }
+
+ odp = umem->odp_data;
+ odp->private = mtt;
+ mtt->umem = umem;
+ mtt->mmkey.iova = addr;
+ mtt->parent = mr;
+ INIT_WORK(&odp->work, mr_leaf_free_action);
+
+ if (!nentries)
+ start_idx = addr >> MLX5_IMR_MTT_SHIFT;
+ nentries++;
+ }
+
+ odp->dying = 0;
+
+ /* Return first odp if region not covered by single one */
+ if (likely(!result))
+ result = odp;
+
+ addr += MLX5_IMR_MTT_SIZE;
+ if (unlikely(addr < io_virt + bcnt)) {
+ odp = odp_next(odp);
+ if (odp && odp->umem->address != addr)
+ odp = NULL;
+ goto next_mr;
+ }
+
+ if (unlikely(nentries)) {
+ ret = mlx5_ib_update_xlt(mr, start_idx, nentries, 0,
+ MLX5_IB_UPD_XLT_INDIRECT |
+ MLX5_IB_UPD_XLT_ATOMIC);
+ if (ret) {
+ mlx5_ib_err(dev, "Failed to update PAS\n");
+ result = ERR_PTR(ret);
+ }
+ }
+
+ mutex_unlock(&mr->umem->odp_data->umem_mutex);
+ return result;
+}
+
+struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
+ int access_flags)
+{
+ struct ib_ucontext *ctx = pd->ibpd.uobject->context;
+ struct mlx5_ib_mr *imr;
+ struct ib_umem *umem;
+
+ umem = ib_umem_get(ctx, 0, 0, IB_ACCESS_ON_DEMAND, 0);
+ if (IS_ERR(umem))
+ return ERR_CAST(umem);
+
+ imr = implicit_mr_alloc(&pd->ibpd, umem, 1, access_flags);
+ if (IS_ERR(imr)) {
+ ib_umem_release(umem);
+ return ERR_CAST(imr);
+ }
+
+ imr->umem = umem;
+ init_waitqueue_head(&imr->q_leaf_free);
+ atomic_set(&imr->num_leaf_free, 0);
+
+ return imr;
+}
+
+static int mr_leaf_free(struct ib_umem *umem, u64 start,
+ u64 end, void *cookie)
+{
+ struct mlx5_ib_mr *mr = umem->odp_data->private, *imr = cookie;
+
+ if (mr->parent != imr)
+ return 0;
+
+ ib_umem_odp_unmap_dma_pages(umem,
+ ib_umem_start(umem),
+ ib_umem_end(umem));
+
+ if (umem->odp_data->dying)
+ return 0;
+
+ WRITE_ONCE(umem->odp_data->dying, 1);
+ atomic_inc(&imr->num_leaf_free);
+ schedule_work(&umem->odp_data->work);
+
+ return 0;
+}
+
+void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
+{
+ struct ib_ucontext *ctx = imr->ibmr.pd->uobject->context;
+
+ down_read(&ctx->umem_rwsem);
+ rbt_ib_umem_for_each_in_range(&ctx->umem_tree, 0, ULLONG_MAX,
+ mr_leaf_free, imr);
+ up_read(&ctx->umem_rwsem);
+
+ wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
+}
+
/*
* Handle a single data segment in a page-fault WQE or RDMA region.
*
@@ -195,47 +533,43 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
* -EFAULT when there's an error mapping the requested pages. The caller will
* abort the page fault handling.
*/
-static int pagefault_single_data_segment(struct mlx5_ib_dev *mib_dev,
+static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
u32 key, u64 io_virt, size_t bcnt,
u32 *bytes_committed,
u32 *bytes_mapped)
{
int srcu_key;
- unsigned int current_seq;
+ unsigned int current_seq = 0;
u64 start_idx;
int npages = 0, ret = 0;
struct mlx5_ib_mr *mr;
u64 access_mask = ODP_READ_ALLOWED_BIT;
+ struct ib_umem_odp *odp;
+ int implicit = 0;
+ size_t size;
- srcu_key = srcu_read_lock(&mib_dev->mr_srcu);
- mr = mlx5_ib_odp_find_mr_lkey(mib_dev, key);
+ srcu_key = srcu_read_lock(&dev->mr_srcu);
+ mr = mlx5_ib_odp_find_mr_lkey(dev, key);
/*
* If we didn't find the MR, it means the MR was closed while we were
* handling the ODP event. In this case we return -EFAULT so that the
* QP will be closed.
*/
if (!mr || !mr->ibmr.pd) {
- pr_err("Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
- key);
+ mlx5_ib_dbg(dev, "Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
+ key);
ret = -EFAULT;
goto srcu_unlock;
}
if (!mr->umem->odp_data) {
- pr_debug("skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
- key);
+ mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
+ key);
if (bytes_mapped)
*bytes_mapped +=
(bcnt - *bytes_committed);
goto srcu_unlock;
}
- current_seq = ACCESS_ONCE(mr->umem->odp_data->notifiers_seq);
- /*
- * Ensure the sequence number is valid for some time before we call
- * gup.
- */
- smp_rmb();
-
/*
* Avoid branches - this code will perform correctly
* in all iterations (in iteration 2 and above,
@@ -244,63 +578,109 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *mib_dev,
io_virt += *bytes_committed;
bcnt -= *bytes_committed;
+ if (!mr->umem->odp_data->page_list) {
+ odp = implicit_mr_get_data(mr, io_virt, bcnt);
+
+ if (IS_ERR(odp)) {
+ ret = PTR_ERR(odp);
+ goto srcu_unlock;
+ }
+ mr = odp->private;
+ implicit = 1;
+
+ } else {
+ odp = mr->umem->odp_data;
+ }
+
+next_mr:
+ current_seq = READ_ONCE(odp->notifiers_seq);
+ /*
+ * Ensure the sequence number is valid for some time before we call
+ * gup.
+ */
+ smp_rmb();
+
+ size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);
start_idx = (io_virt - (mr->mmkey.iova & PAGE_MASK)) >> PAGE_SHIFT;
if (mr->umem->writable)
access_mask |= ODP_WRITE_ALLOWED_BIT;
- npages = ib_umem_odp_map_dma_pages(mr->umem, io_virt, bcnt,
- access_mask, current_seq);
- if (npages < 0) {
- ret = npages;
+
+ ret = ib_umem_odp_map_dma_pages(mr->umem, io_virt, size,
+ access_mask, current_seq);
+
+ if (ret < 0)
goto srcu_unlock;
- }
- if (npages > 0) {
- mutex_lock(&mr->umem->odp_data->umem_mutex);
+ if (ret > 0) {
+ int np = ret;
+
+ mutex_lock(&odp->umem_mutex);
if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
/*
* No need to check whether the MTTs really belong to
* this MR, since ib_umem_odp_map_dma_pages already
* checks this.
*/
- ret = mlx5_ib_update_xlt(mr, start_idx, npages,
+ ret = mlx5_ib_update_xlt(mr, start_idx, np,
PAGE_SHIFT,
MLX5_IB_UPD_XLT_ATOMIC);
} else {
ret = -EAGAIN;
}
- mutex_unlock(&mr->umem->odp_data->umem_mutex);
+ mutex_unlock(&odp->umem_mutex);
if (ret < 0) {
if (ret != -EAGAIN)
- pr_err("Failed to update mkey page tables\n");
+ mlx5_ib_err(dev, "Failed to update mkey page tables\n");
goto srcu_unlock;
}
if (bytes_mapped) {
- u32 new_mappings = npages * PAGE_SIZE -
+ u32 new_mappings = np * PAGE_SIZE -
(io_virt - round_down(io_virt, PAGE_SIZE));
- *bytes_mapped += min_t(u32, new_mappings, bcnt);
+ *bytes_mapped += min_t(u32, new_mappings, size);
}
+
+ npages += np;
+ }
+
+ bcnt -= size;
+ if (unlikely(bcnt)) {
+ struct ib_umem_odp *next;
+
+ io_virt += size;
+ next = odp_next(odp);
+ if (unlikely(!next || next->umem->address != io_virt)) {
+ mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n",
+ io_virt, next);
+ ret = -EAGAIN;
+ goto srcu_unlock_no_wait;
+ }
+ odp = next;
+ mr = odp->private;
+ goto next_mr;
}
srcu_unlock:
if (ret == -EAGAIN) {
- if (!mr->umem->odp_data->dying) {
- struct ib_umem_odp *odp_data = mr->umem->odp_data;
+ if (implicit || !odp->dying) {
unsigned long timeout =
msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
if (!wait_for_completion_timeout(
- &odp_data->notifier_completion,
+ &odp->notifier_completion,
timeout)) {
- pr_warn("timeout waiting for mmu notifier completion\n");
+ mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d\n",
+ current_seq, odp->notifiers_seq);
}
} else {
/* The MR is being killed, kill the QP as well. */
ret = -EFAULT;
}
}
- srcu_read_unlock(&mib_dev->mr_srcu, srcu_key);
+
+srcu_unlock_no_wait:
+ srcu_read_unlock(&dev->mr_srcu, srcu_key);
*bytes_committed = 0;
return ret ? ret : npages;
}
@@ -618,8 +998,8 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
goto resolve_page_fault;
} else if (ret < 0 || total_wqe_bytes > bytes_mapped) {
if (ret != -ENOENT)
- mlx5_ib_err(dev, "Error getting user pages for page fault. Error: %d\n",
- ret);
+ mlx5_ib_err(dev, "PAGE FAULT error: %d. QP 0x%x. type: 0x%x\n",
+ ret, pfault->wqe.wq_num, pfault->type);
goto resolve_page_fault;
}
@@ -627,7 +1007,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
resolve_page_fault:
mlx5_ib_page_fault_resume(dev, pfault, resume_with_error);
mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n",
- pfault->token, resume_with_error,
+ pfault->wqe.wq_num, resume_with_error,
pfault->type);
free_page((unsigned long)buffer);
}
@@ -700,10 +1080,9 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
ret = pagefault_single_data_segment(dev, rkey, address,
prefetch_len,
&bytes_committed, NULL);
- if (ret < 0) {
+ if (ret < 0 && ret != -EAGAIN) {
mlx5_ib_warn(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n",
- ret, pfault->token, address,
- prefetch_len);
+ ret, pfault->token, address, prefetch_len);
}
}
}
@@ -728,19 +1107,61 @@ void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
}
}
-int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev)
+void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent)
+{
+ if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
+ return;
+
+ switch (ent->order - 2) {
+ case MLX5_IMR_MTT_CACHE_ENTRY:
+ ent->page = PAGE_SHIFT;
+ ent->xlt = MLX5_IMR_MTT_ENTRIES *
+ sizeof(struct mlx5_mtt) /
+ MLX5_IB_UMR_OCTOWORD;
+ ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
+ ent->limit = 0;
+ break;
+
+ case MLX5_IMR_KSM_CACHE_ENTRY:
+ ent->page = MLX5_KSM_PAGE_SHIFT;
+ ent->xlt = mlx5_imr_ksm_entries *
+ sizeof(struct mlx5_klm) /
+ MLX5_IB_UMR_OCTOWORD;
+ ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM;
+ ent->limit = 0;
+ break;
+ }
+}
+
+int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
{
int ret;
- ret = init_srcu_struct(&ibdev->mr_srcu);
+ ret = init_srcu_struct(&dev->mr_srcu);
if (ret)
return ret;
+ if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) {
+ ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey);
+ if (ret) {
+ mlx5_ib_err(dev, "Error getting null_mkey %d\n", ret);
+ return ret;
+ }
+ }
+
return 0;
}
-void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev)
+void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *dev)
+{
+ cleanup_srcu_struct(&dev->mr_srcu);
+}
+
+int mlx5_ib_odp_init(void)
{
- cleanup_srcu_struct(&ibdev->mr_srcu);
+ mlx5_imr_ksm_entries = BIT_ULL(get_order(TASK_SIZE) -
+ MLX5_IMR_MTT_BITS);
+
+ return 0;
}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index e31bf11ae64f..ad8a2638e339 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -905,7 +905,10 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
else
qp->bf.bfreg = &dev->bfreg;
- qp->bf.buf_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
+ /* We need to divide by two since each register is comprised of
+ * two buffers of identical size, namely odd and even
+ */
+ qp->bf.buf_size = (1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size)) / 2;
uar_index = qp->bf.bfreg->index;
err = calc_sq_size(dev, init_attr, qp);
@@ -1141,7 +1144,8 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
return -ENOMEM;
rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
- MLX5_SET(rqc, rqc, vsd, 1);
+ if (!(rq->flags & MLX5_IB_RQ_CVLAN_STRIPPING))
+ MLX5_SET(rqc, rqc, vsd, 1);
MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
MLX5_SET(rqc, rqc, flush_in_error_en, 1);
@@ -1238,6 +1242,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (qp->rq.wqe_cnt) {
rq->base.container_mibqp = qp;
+ if (qp->flags & MLX5_IB_QP_CVLAN_STRIPPING)
+ rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING;
err = create_raw_packet_qp_rq(dev, rq, in);
if (err)
goto err_destroy_sq;
@@ -1559,6 +1565,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
+ if (init_attr->create_flags & IB_QP_CREATE_CVLAN_STRIPPING) {
+ if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
+ MLX5_CAP_ETH(dev->mdev, vlan_cap)) ||
+ (init_attr->qp_type != IB_QPT_RAW_PACKET))
+ return -EOPNOTSUPP;
+ qp->flags |= MLX5_IB_QP_CVLAN_STRIPPING;
+ }
+
if (pd && pd->uobject) {
if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
mlx5_ib_dbg(dev, "copy failed\n");
@@ -2198,6 +2212,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
{
enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port);
int err;
+ enum ib_gid_type gid_type;
if (attr_mask & IB_QP_PKEY_INDEX)
path->pkey_index = cpu_to_be16(alt ? attr->alt_pkey_index :
@@ -2216,10 +2231,16 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (ll == IB_LINK_LAYER_ETHERNET) {
if (!(ah->ah_flags & IB_AH_GRH))
return -EINVAL;
+ err = mlx5_get_roce_gid_type(dev, port, ah->grh.sgid_index,
+ &gid_type);
+ if (err)
+ return err;
memcpy(path->rmac, ah->dmac, sizeof(ah->dmac));
path->udp_sport = mlx5_get_roce_udp_sport(dev, port,
ah->grh.sgid_index);
path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4;
+ if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ path->ecn_dscp = (ah->grh.traffic_class >> 2) & 0x3f;
} else {
path->fl_free_ar = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
path->fl_free_ar |=
@@ -2422,7 +2443,7 @@ static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
if (raw_qp_param->set_mask & MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID) {
if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) {
MLX5_SET64(modify_rq_in, in, modify_bitmask,
- MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID);
+ MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
MLX5_SET(rqc, rqc, counter_set_id, raw_qp_param->rq_q_ctr_id);
} else
pr_info_once("%s: RAW PACKET QP counters are not supported on current FW\n",
@@ -2777,7 +2798,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
qp->port) - 1;
mibport = &dev->port[port_num];
context->qp_counter_set_usr_page |=
- cpu_to_be32((u32)(mibport->q_cnt_id) << 24);
+ cpu_to_be32((u32)(mibport->q_cnts.set_id) << 24);
}
if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
@@ -2805,7 +2826,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
raw_qp_param.operation = op;
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
- raw_qp_param.rq_q_ctr_id = mibport->q_cnt_id;
+ raw_qp_param.rq_q_ctr_id = mibport->q_cnts.set_id;
raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID;
}
@@ -3637,8 +3658,9 @@ static int set_psv_wr(struct ib_sig_domain *domain,
psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
break;
default:
- pr_err("Bad signature type given.\n");
- return 1;
+ pr_err("Bad signature type (%d) is given.\n",
+ domain->sig_type);
+ return -EINVAL;
}
*seg += sizeof(*psv_seg);
@@ -3978,6 +4000,12 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case IB_QPT_SMI:
+ if (unlikely(!mdev->port_caps[qp->port - 1].has_smi)) {
+ mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n");
+ err = -EPERM;
+ *bad_wr = wr;
+ goto out;
+ }
case MLX5_IB_QPT_HW_GSI:
set_datagram_seg(seg, wr);
seg += sizeof(struct mlx5_wqe_datagram_seg);
@@ -4579,6 +4607,7 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
struct ib_wq_init_attr *init_attr)
{
struct mlx5_ib_dev *dev;
+ int has_net_offloads;
__be64 *rq_pas0;
void *in;
void *rqc;
@@ -4610,9 +4639,28 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
MLX5_SET(wq, wq, log_wq_pg_sz, rwq->log_page_size);
MLX5_SET(wq, wq, wq_signature, rwq->wq_sig);
MLX5_SET64(wq, wq, dbr_addr, rwq->db.dma);
+ has_net_offloads = MLX5_CAP_GEN(dev->mdev, eth_net_offloads);
+ if (init_attr->create_flags & IB_WQ_FLAGS_CVLAN_STRIPPING) {
+ if (!(has_net_offloads && MLX5_CAP_ETH(dev->mdev, vlan_cap))) {
+ mlx5_ib_dbg(dev, "VLAN offloads are not supported\n");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ } else {
+ MLX5_SET(rqc, rqc, vsd, 1);
+ }
+ if (init_attr->create_flags & IB_WQ_FLAGS_SCATTER_FCS) {
+ if (!(has_net_offloads && MLX5_CAP_ETH(dev->mdev, scatter_fcs))) {
+ mlx5_ib_dbg(dev, "Scatter FCS is not supported\n");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ MLX5_SET(rqc, rqc, scatter_fcs, 1);
+ }
rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0);
err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp);
+out:
kvfree(in);
return err;
}
@@ -4896,10 +4944,37 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state);
MLX5_SET(rqc, rqc, state, wq_state);
+ if (wq_attr_mask & IB_WQ_FLAGS) {
+ if (wq_attr->flags_mask & IB_WQ_FLAGS_CVLAN_STRIPPING) {
+ if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
+ MLX5_CAP_ETH(dev->mdev, vlan_cap))) {
+ mlx5_ib_dbg(dev, "VLAN offloads are not "
+ "supported\n");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ MLX5_SET64(modify_rq_in, in, modify_bitmask,
+ MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD);
+ MLX5_SET(rqc, rqc, vsd,
+ (wq_attr->flags & IB_WQ_FLAGS_CVLAN_STRIPPING) ? 0 : 1);
+ }
+ }
+
+ if (curr_wq_state == IB_WQS_RESET && wq_state == IB_WQS_RDY) {
+ if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) {
+ MLX5_SET64(modify_rq_in, in, modify_bitmask,
+ MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
+ MLX5_SET(rqc, rqc, counter_set_id, dev->port->q_cnts.set_id);
+ } else
+ pr_info_once("%s: Receive WQ counters are not supported on current FW\n",
+ dev->ib_dev.name);
+ }
+
err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in, inlen);
- kvfree(in);
if (!err)
rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state;
+out:
+ kvfree(in);
return err;
}
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 6f4397ee1ed6..7cb145f9a6db 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -165,8 +165,6 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
int err;
int i;
struct mlx5_wqe_srq_next_seg *next;
- int page_shift;
- int npages;
err = mlx5_db_alloc(dev->mdev, &srq->db);
if (err) {
@@ -179,7 +177,6 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
err = -ENOMEM;
goto err_db;
}
- page_shift = srq->buf.page_shift;
srq->head = 0;
srq->tail = srq->msrq.max - 1;
@@ -191,10 +188,8 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
cpu_to_be16((i + 1) & (srq->msrq.max - 1));
}
- npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT));
- mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n",
- buf_size, page_shift, srq->buf.npages, npages);
- in->pas = mlx5_vzalloc(sizeof(*in->pas) * npages);
+ mlx5_ib_dbg(dev, "srq->buf.page_shift = %d\n", srq->buf.page_shift);
+ in->pas = mlx5_vzalloc(sizeof(*in->pas) * srq->buf.npages);
if (!in->pas) {
err = -ENOMEM;
goto err_buf;
@@ -208,7 +203,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
}
srq->wq_sig = !!srq_signature;
- in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
in->type == IB_SRQT_XRC)
in->user_index = MLX5_IB_DEFAULT_UIDX;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index d31708742ba5..ce163184e742 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -146,7 +146,7 @@ static int mthca_query_port(struct ib_device *ibdev,
if (!in_mad || !out_mad)
goto out;
- memset(props, 0, sizeof *props);
+ /* props being zeroed by the caller, avoid zeroing it here */
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
@@ -212,7 +212,7 @@ static int mthca_modify_port(struct ib_device *ibdev,
if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
return -ERESTARTSYS;
- err = mthca_query_port(ibdev, port, &attr);
+ err = ib_query_port(ibdev, port, &attr);
if (err)
goto out;
@@ -1166,13 +1166,14 @@ static int mthca_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
- err = mthca_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 5a31f3c6a421..d3eae2f3e9f5 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -475,7 +475,7 @@ static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr
struct nes_vnic *nesvnic = to_nesvnic(ibdev);
struct net_device *netdev = nesvnic->netdev;
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->max_mtu = IB_MTU_4096;
props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
@@ -3660,13 +3660,14 @@ static int nes_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
+ immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
err = nes_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
return 0;
}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 896071502739..3e43bdc81e7a 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -93,15 +93,16 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
int err;
dev = get_ocrdma_dev(ibdev);
- err = ocrdma_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+ if (ocrdma_is_udp_encap_supported(dev))
+ immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
- if (ocrdma_is_udp_encap_supported(dev))
- immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index e06ad7250963..bc9fb144e57b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -210,6 +210,7 @@ int ocrdma_query_port(struct ib_device *ibdev,
struct ocrdma_dev *dev;
struct net_device *netdev;
+ /* props being zeroed by the caller, avoid zeroing it here */
dev = get_ocrdma_dev(ibdev);
if (port > 1) {
pr_err("%s(%d) invalid_port=0x%x\n", __func__,
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 0c51657af151..6b3bb32803bd 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -238,8 +238,8 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
}
rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
- memset(attr, 0, sizeof(*attr));
+ /* *attr being zeroed by the caller, avoid zeroing it here */
if (rdma_port->port_state == QED_RDMA_PORT_UP) {
attr->state = IB_PORT_ACTIVE;
attr->phys_state = 5;
@@ -3494,14 +3494,15 @@ int qedr_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
- err = qedr_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
+ RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
- RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index b0b78e1cec92..6b56f1c01a07 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1220,6 +1220,7 @@ static int qib_query_port(struct rvt_dev_info *rdi, u8 port_num,
enum ib_mtu mtu;
u16 lid = ppd->lid;
+ /* props being zeroed by the caller, avoid zeroing it here */
props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
props->lmc = ppd->lmc;
props->state = dd->f_iblink_state(ppd->lastibcstat);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index 0a89a955550b..4f5a45db08e1 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -321,7 +321,9 @@ static int usnic_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
- err = usnic_ib_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_USNIC;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index 69df8e353123..3284730d3c09 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -330,7 +330,7 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
mutex_lock(&us_ibdev->usdev_lock);
__ethtool_get_link_ksettings(us_ibdev->netdev, &cmd);
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->lid = 0;
props->lmc = 1;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 60cdb7719565..e03d2f6c1f90 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -132,13 +132,14 @@ static int pvrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
- err = pvrdma_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
index c2aa52638dcb..fec17c49103b 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
@@ -135,7 +135,7 @@ int pvrdma_query_port(struct ib_device *ibdev, u8 port,
return err;
}
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->state = pvrdma_port_state_to_ib(resp->attrs.state);
props->max_mtu = pvrdma_mtu_to_ib(resp->attrs.max_mtu);
@@ -275,7 +275,7 @@ int pvrdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
}
mutex_lock(&vdev->port_mutex);
- ret = pvrdma_query_port(ibdev, port, &attr);
+ ret = ib_query_port(ibdev, port, &attr);
if (ret)
goto out;
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index d430c2f7cec4..1165639a914b 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -165,7 +165,7 @@ static int rvt_query_port(struct ib_device *ibdev, u8 port_num,
return -EINVAL;
rvp = rdi->ports[port_index];
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->sm_lid = rvp->sm_lid;
props->sm_sl = rvp->sm_sl;
props->port_cap_flags = rvp->port_cap_flags;
@@ -326,13 +326,14 @@ static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num,
if (port_index < 0)
return -EINVAL;
- err = rvt_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = rdi->dparms.core_cap_flags;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = rdi->dparms.core_cap_flags;
immutable->max_mad_size = rdi->dparms.max_mad_size;
return 0;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index e4de37fb9aab..d2e2eff7a515 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -86,6 +86,7 @@ static int rxe_query_port(struct ib_device *dev,
port = &rxe->port;
+ /* *attr being zeroed by the caller, avoid zeroing it here */
*attr = port->attr;
mutex_lock(&rxe->usdev_lock);
@@ -261,13 +262,14 @@ static int rxe_port_immutable(struct ib_device *dev, u8 port_num,
int err;
struct ib_port_attr attr;
- err = rxe_query_port(dev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+ err = ib_query_port(dev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index ce3d92106386..2478516a61e2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1232,10 +1232,18 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
fs_for_each_fte(fte, fg) {
nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
if (compare_match_value(&fg->mask, match_value, &fte->val) &&
- (flow_act->action & fte->action) &&
- flow_act->flow_tag == fte->flow_tag) {
+ (flow_act->action & fte->action)) {
int old_action = fte->action;
+ if (fte->flow_tag != flow_act->flow_tag) {
+ mlx5_core_warn(get_dev(&fte->node),
+ "FTE flow tag %u already exists with different flow tag %u\n",
+ fte->flow_tag,
+ flow_act->flow_tag);
+ handle = ERR_PTR(-EEXIST);
+ goto unlock_fte;
+ }
+
fte->action |= flow_act->action;
handle = add_rule_fte(fte, fg, dest, dest_num,
old_action != flow_act->action);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 1bc4641734da..2fcff6b4503f 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -295,6 +295,7 @@ struct mlx5_port_caps {
int gid_table_len;
int pkey_table_len;
u8 ext_port_cap;
+ bool has_smi;
};
struct mlx5_cmd_mailbox {
@@ -1061,7 +1062,10 @@ enum {
};
enum {
- MAX_MR_CACHE_ENTRIES = 21,
+ MAX_UMR_CACHE_ENTRY = 20,
+ MLX5_IMR_MTT_CACHE_ENTRY,
+ MLX5_IMR_KSM_CACHE_ENTRY,
+ MAX_MR_CACHE_ENTRIES
};
enum {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index afcd4736d8df..838242697541 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -5013,7 +5013,7 @@ struct mlx5_ifc_modify_rq_out_bits {
enum {
MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD = 1ULL << 1,
- MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID = 1ULL << 3,
+ MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID = 1ULL << 3,
};
struct mlx5_ifc_modify_rq_in_bits {
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index 3da0b167041b..542cd8b3414c 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -79,11 +79,15 @@ struct ib_umem_odp {
struct completion notifier_completion;
int dying;
+ struct work_struct work;
};
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem);
+struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
+ unsigned long addr,
+ size_t size);
void ib_umem_odp_release(struct ib_umem *umem);
@@ -117,10 +121,12 @@ typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end,
int rbt_ib_umem_for_each_in_range(struct rb_root *root, u64 start, u64 end,
umem_call_back cb, void *cookie);
-struct umem_odp_node *rbt_ib_umem_iter_first(struct rb_root *root,
- u64 start, u64 last);
-struct umem_odp_node *rbt_ib_umem_iter_next(struct umem_odp_node *node,
- u64 start, u64 last);
+/*
+ * Find first region intersecting with address range.
+ * Return NULL if not found
+ */
+struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root *root,
+ u64 addr, u64 length);
static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item,
unsigned long mmu_seq)
@@ -153,6 +159,13 @@ static inline int ib_umem_odp_get(struct ib_ucontext *context,
return -EINVAL;
}
+static inline struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
+ unsigned long addr,
+ size_t size)
+{
+ return ERR_PTR(-EINVAL);
+}
+
static inline void ib_umem_odp_release(struct ib_umem *umem) {}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 8c61532cf521..89f5bd4e1d52 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -207,6 +207,7 @@ enum ib_device_cap_flags {
IB_DEVICE_MEM_WINDOW_TYPE_2A = (1 << 23),
IB_DEVICE_MEM_WINDOW_TYPE_2B = (1 << 24),
IB_DEVICE_RC_IP_CSUM = (1 << 25),
+ /* Deprecated. Please use IB_RAW_PACKET_CAP_IP_CSUM. */
IB_DEVICE_RAW_IP_CSUM = (1 << 26),
/*
* Devices should set IB_DEVICE_CROSS_CHANNEL if they
@@ -220,6 +221,7 @@ enum ib_device_cap_flags {
IB_DEVICE_ON_DEMAND_PAGING = (1ULL << 31),
IB_DEVICE_SG_GAPS_REG = (1ULL << 32),
IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33),
+ /* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */
IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34),
};
@@ -241,7 +243,8 @@ enum ib_atomic_cap {
};
enum ib_odp_general_cap_bits {
- IB_ODP_SUPPORT = 1 << 0,
+ IB_ODP_SUPPORT = 1 << 0,
+ IB_ODP_SUPPORT_IMPLICIT = 1 << 1,
};
enum ib_odp_transport_cap_bits {
@@ -330,6 +333,7 @@ struct ib_device_attr {
uint64_t hca_core_clock; /* in KHZ */
struct ib_rss_caps rss_caps;
u32 max_wq_type_rq;
+ u32 raw_packet_caps; /* Use ib_raw_packet_caps enum */
};
enum ib_mtu {
@@ -499,6 +503,8 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
#define RDMA_CORE_CAP_PROT_ROCE 0x00200000
#define RDMA_CORE_CAP_PROT_IWARP 0x00400000
#define RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP 0x00800000
+#define RDMA_CORE_CAP_PROT_RAW_PACKET 0x01000000
+#define RDMA_CORE_CAP_PROT_USNIC 0x02000000
#define RDMA_CORE_PORT_IBA_IB (RDMA_CORE_CAP_PROT_IB \
| RDMA_CORE_CAP_IB_MAD \
@@ -522,6 +528,10 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
#define RDMA_CORE_PORT_INTEL_OPA (RDMA_CORE_PORT_IBA_IB \
| RDMA_CORE_CAP_OPA_MAD)
+#define RDMA_CORE_PORT_RAW_PACKET (RDMA_CORE_CAP_PROT_RAW_PACKET)
+
+#define RDMA_CORE_PORT_USNIC (RDMA_CORE_CAP_PROT_USNIC)
+
struct ib_port_attr {
u64 subnet_prefix;
enum ib_port_state state;
@@ -1019,6 +1029,7 @@ enum ib_qp_create_flags {
IB_QP_CREATE_SIGNATURE_EN = 1 << 6,
IB_QP_CREATE_USE_GFP_NOIO = 1 << 7,
IB_QP_CREATE_SCATTER_FCS = 1 << 8,
+ IB_QP_CREATE_CVLAN_STRIPPING = 1 << 9,
/* reserve bits 26-31 for low level drivers' internal use */
IB_QP_CREATE_RESERVED_START = 1 << 26,
IB_QP_CREATE_RESERVED_END = 1 << 31,
@@ -1470,6 +1481,18 @@ struct ib_srq {
} ext;
};
+enum ib_raw_packet_caps {
+ /* Strip cvlan from incoming packet and report it in the matching work
+ * completion is supported.
+ */
+ IB_RAW_PACKET_CAP_CVLAN_STRIPPING = (1 << 0),
+ /* Scatter FCS field of an incoming packet to host memory is supported.
+ */
+ IB_RAW_PACKET_CAP_SCATTER_FCS = (1 << 1),
+ /* Checksum offloads are supported (for both send and receive). */
+ IB_RAW_PACKET_CAP_IP_CSUM = (1 << 2),
+};
+
enum ib_wq_type {
IB_WQT_RQ
};
@@ -1493,6 +1516,11 @@ struct ib_wq {
atomic_t usecnt;
};
+enum ib_wq_flags {
+ IB_WQ_FLAGS_CVLAN_STRIPPING = 1 << 0,
+ IB_WQ_FLAGS_SCATTER_FCS = 1 << 1,
+};
+
struct ib_wq_init_attr {
void *wq_context;
enum ib_wq_type wq_type;
@@ -1500,16 +1528,20 @@ struct ib_wq_init_attr {
u32 max_sge;
struct ib_cq *cq;
void (*event_handler)(struct ib_event *, void *);
+ u32 create_flags; /* Use enum ib_wq_flags */
};
enum ib_wq_attr_mask {
- IB_WQ_STATE = 1 << 0,
- IB_WQ_CUR_STATE = 1 << 1,
+ IB_WQ_STATE = 1 << 0,
+ IB_WQ_CUR_STATE = 1 << 1,
+ IB_WQ_FLAGS = 1 << 2,
};
struct ib_wq_attr {
enum ib_wq_state wq_state;
enum ib_wq_state curr_wq_state;
+ u32 flags; /* Use enum ib_wq_flags */
+ u32 flags_mask; /* Use enum ib_wq_flags */
};
struct ib_rwq_ind_table {
@@ -1618,6 +1650,8 @@ enum ib_flow_spec_type {
IB_FLOW_SPEC_UDP = 0x41,
IB_FLOW_SPEC_VXLAN_TUNNEL = 0x50,
IB_FLOW_SPEC_INNER = 0x100,
+ /* Actions */
+ IB_FLOW_SPEC_ACTION_TAG = 0x1000,
};
#define IB_FLOW_SPEC_LAYER_MASK 0xF0
#define IB_FLOW_SPEC_SUPPORT_LAYERS 8
@@ -1740,6 +1774,12 @@ struct ib_flow_spec_tunnel {
struct ib_flow_tunnel_filter mask;
};
+struct ib_flow_spec_action_tag {
+ enum ib_flow_spec_type type;
+ u16 size;
+ u32 tag_id;
+};
+
union ib_flow_spec {
struct {
u32 type;
@@ -1751,6 +1791,7 @@ union ib_flow_spec {
struct ib_flow_spec_tcp_udp tcp_udp;
struct ib_flow_spec_ipv6 ipv6;
struct ib_flow_spec_tunnel tunnel;
+ struct ib_flow_spec_action_tag flow_tag;
};
struct ib_flow_attr {
@@ -2333,6 +2374,16 @@ static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num)
rdma_protocol_roce(device, port_num);
}
+static inline bool rdma_protocol_raw_packet(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_RAW_PACKET;
+}
+
+static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_USNIC;
+}
+
/**
* rdma_cap_ib_mad - Check if the port of a device supports Infiniband
* Management Datagrams.
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index f4f87cff6dc6..997f904c7692 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -246,7 +246,7 @@ struct ib_uverbs_ex_query_device_resp {
__u64 device_cap_flags_ex;
struct ib_uverbs_rss_caps rss_caps;
__u32 max_wq_type_rq;
- __u32 reserved;
+ __u32 raw_packet_caps;
};
struct ib_uverbs_query_port {
@@ -934,6 +934,19 @@ struct ib_uverbs_flow_spec_ipv6 {
struct ib_uverbs_flow_ipv6_filter mask;
};
+struct ib_uverbs_flow_spec_action_tag {
+ union {
+ struct ib_uverbs_flow_spec_hdr hdr;
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ };
+ __u32 tag_id;
+ __u32 reserved1;
+};
+
struct ib_uverbs_flow_tunnel_filter {
__be32 tunnel_id;
};
@@ -1053,6 +1066,8 @@ struct ib_uverbs_ex_create_wq {
__u32 cq_handle;
__u32 max_wr;
__u32 max_sge;
+ __u32 create_flags; /* Use enum ib_wq_flags */
+ __u32 reserved;
};
struct ib_uverbs_ex_create_wq_resp {
@@ -1081,6 +1096,8 @@ struct ib_uverbs_ex_modify_wq {
__u32 wq_handle;
__u32 wq_state;
__u32 curr_wq_state;
+ __u32 flags; /* Use enum ib_wq_flags */
+ __u32 flags_mask; /* Use enum ib_wq_flags */
};
/* Prevent memory allocation rather than max expected size */