diff options
Diffstat (limited to 'drivers/infiniband')
30 files changed, 278 insertions, 262 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 1dd467bed8fc..6d7ec371e7b2 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -352,7 +352,7 @@ static bool has_gateway(const struct dst_entry *dst, sa_family_t family) if (family == AF_INET) { rt = container_of(dst, struct rtable, dst); - return rt->rt_gw_family == AF_INET; + return rt->rt_uses_gateway; } rt6 = container_of(dst, struct rt6_info, dst); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index da10e6ccb43c..5920c0085d35 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -4399,6 +4399,7 @@ error2: error1: port_modify.set_port_cap_mask = 0; port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; + kfree(port); while (--i) { if (!rdma_cap_ib_cm(ib_device, i)) continue; @@ -4407,6 +4408,7 @@ error1: ib_modify_port(ib_device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); + kfree(port); } free: kfree(cm_dev); @@ -4460,6 +4462,7 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) spin_unlock_irq(&cm.state_lock); ib_unregister_mad_agent(cur_mad_agent); cm_remove_port_fs(port); + kfree(port); } kfree(cm_dev); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 0e3cf3461999..d78f67623f24 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2396,9 +2396,10 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, conn_id->cm_id.iw = NULL; cma_exch(conn_id, RDMA_CM_DESTROYING); mutex_unlock(&conn_id->handler_mutex); + mutex_unlock(&listen_id->handler_mutex); cma_deref_id(conn_id); rdma_destroy_id(&conn_id->id); - goto out; + return ret; } mutex_unlock(&conn_id->handler_mutex); diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 3a8b0911c3bc..9d07378b5b42 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -199,6 +199,7 @@ void ib_mad_cleanup(void); int ib_sa_init(void); void ib_sa_cleanup(void); +void rdma_nl_init(void); void rdma_nl_exit(void); int ib_nl_handle_resolve_resp(struct sk_buff *skb, diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 99c4a55545cf..50a92442c4f7 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1987,8 +1987,6 @@ static int iw_query_port(struct ib_device *device, if (!netdev) return -ENODEV; - dev_put(netdev); - port_attr->max_mtu = IB_MTU_4096; port_attr->active_mtu = ib_mtu_int_to_enum(netdev->mtu); @@ -1996,19 +1994,22 @@ static int iw_query_port(struct ib_device *device, port_attr->state = IB_PORT_DOWN; port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; } else { - inetdev = in_dev_get(netdev); + rcu_read_lock(); + inetdev = __in_dev_get_rcu(netdev); if (inetdev && inetdev->ifa_list) { port_attr->state = IB_PORT_ACTIVE; port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP; - in_dev_put(inetdev); } else { port_attr->state = IB_PORT_INIT; port_attr->phys_state = IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING; } + + rcu_read_unlock(); } + dev_put(netdev); err = device->ops.query_port(device, port_num, port_attr); if (err) return err; @@ -2715,6 +2716,8 @@ static int __init ib_core_init(void) goto err_comp_unbound; } + rdma_nl_init(); + ret = addr_init(); if (ret) { pr_warn("Could't init IB address resolution\n"); diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 72141c5b7c95..ade71823370f 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -372,6 +372,7 @@ EXPORT_SYMBOL(iw_cm_disconnect); static void destroy_cm_id(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; + struct ib_qp *qp; unsigned long flags; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); @@ -389,6 +390,9 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags); spin_lock_irqsave(&cm_id_priv->lock, flags); + qp = cm_id_priv->qp; + cm_id_priv->qp = NULL; + switch (cm_id_priv->state) { case IW_CM_STATE_LISTEN: cm_id_priv->state = IW_CM_STATE_DESTROYING; @@ -401,7 +405,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) cm_id_priv->state = IW_CM_STATE_DESTROYING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); /* Abrupt close of the connection */ - (void)iwcm_modify_qp_err(cm_id_priv->qp); + (void)iwcm_modify_qp_err(qp); spin_lock_irqsave(&cm_id_priv->lock, flags); break; case IW_CM_STATE_IDLE: @@ -426,11 +430,9 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) BUG(); break; } - if (cm_id_priv->qp) { - cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp); - cm_id_priv->qp = NULL; - } spin_unlock_irqrestore(&cm_id_priv->lock, flags); + if (qp) + cm_id_priv->id.device->ops.iw_rem_ref(qp); if (cm_id->mapped) { iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr); @@ -671,11 +673,11 @@ int iw_cm_accept(struct iw_cm_id *cm_id, BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); cm_id_priv->state = IW_CM_STATE_IDLE; spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id_priv->qp) { - cm_id->device->ops.iw_rem_ref(qp); - cm_id_priv->qp = NULL; - } + qp = cm_id_priv->qp; + cm_id_priv->qp = NULL; spin_unlock_irqrestore(&cm_id_priv->lock, flags); + if (qp) + cm_id->device->ops.iw_rem_ref(qp); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); } @@ -696,7 +698,7 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) struct iwcm_id_private *cm_id_priv; int ret; unsigned long flags; - struct ib_qp *qp; + struct ib_qp *qp = NULL; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); @@ -730,13 +732,13 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) return 0; /* success */ spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id_priv->qp) { - cm_id->device->ops.iw_rem_ref(qp); - cm_id_priv->qp = NULL; - } + qp = cm_id_priv->qp; + cm_id_priv->qp = NULL; cm_id_priv->state = IW_CM_STATE_IDLE; err: spin_unlock_irqrestore(&cm_id_priv->lock, flags); + if (qp) + cm_id->device->ops.iw_rem_ref(qp); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); return ret; @@ -878,6 +880,7 @@ static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { + struct ib_qp *qp = NULL; unsigned long flags; int ret; @@ -896,11 +899,13 @@ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, cm_id_priv->state = IW_CM_STATE_ESTABLISHED; } else { /* REJECTED or RESET */ - cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp); + qp = cm_id_priv->qp; cm_id_priv->qp = NULL; cm_id_priv->state = IW_CM_STATE_IDLE; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); + if (qp) + cm_id_priv->id.device->ops.iw_rem_ref(qp); ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); if (iw_event->private_data_len) @@ -942,21 +947,18 @@ static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv, static int cm_close_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { + struct ib_qp *qp; unsigned long flags; - int ret = 0; + int ret = 0, notify_event = 0; spin_lock_irqsave(&cm_id_priv->lock, flags); + qp = cm_id_priv->qp; + cm_id_priv->qp = NULL; - if (cm_id_priv->qp) { - cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp); - cm_id_priv->qp = NULL; - } switch (cm_id_priv->state) { case IW_CM_STATE_ESTABLISHED: case IW_CM_STATE_CLOSING: cm_id_priv->state = IW_CM_STATE_IDLE; - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); - spin_lock_irqsave(&cm_id_priv->lock, flags); + notify_event = 1; break; case IW_CM_STATE_DESTROYING: break; @@ -965,6 +967,10 @@ static int cm_close_handler(struct iwcm_id_private *cm_id_priv, } spin_unlock_irqrestore(&cm_id_priv->lock, flags); + if (qp) + cm_id_priv->id.device->ops.iw_rem_ref(qp); + if (notify_event) + ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); return ret; } diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index 81dbd5f41bed..8cd31ef25eff 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -42,9 +42,12 @@ #include <linux/module.h> #include "core_priv.h" -static DEFINE_MUTEX(rdma_nl_mutex); static struct { - const struct rdma_nl_cbs *cb_table; + const struct rdma_nl_cbs *cb_table; + /* Synchronizes between ongoing netlink commands and netlink client + * unregistration. + */ + struct rw_semaphore sem; } rdma_nl_types[RDMA_NL_NUM_CLIENTS]; bool rdma_nl_chk_listeners(unsigned int group) @@ -75,70 +78,53 @@ static bool is_nl_msg_valid(unsigned int type, unsigned int op) return (op < max_num_ops[type]) ? true : false; } -static bool -is_nl_valid(const struct sk_buff *skb, unsigned int type, unsigned int op) +static const struct rdma_nl_cbs * +get_cb_table(const struct sk_buff *skb, unsigned int type, unsigned int op) { const struct rdma_nl_cbs *cb_table; - if (!is_nl_msg_valid(type, op)) - return false; - /* * Currently only NLDEV client is supporting netlink commands in * non init_net net namespace. */ if (sock_net(skb->sk) != &init_net && type != RDMA_NL_NLDEV) - return false; + return NULL; - if (!rdma_nl_types[type].cb_table) { - mutex_unlock(&rdma_nl_mutex); - request_module("rdma-netlink-subsys-%d", type); - mutex_lock(&rdma_nl_mutex); - } + cb_table = READ_ONCE(rdma_nl_types[type].cb_table); + if (!cb_table) { + /* + * Didn't get valid reference of the table, attempt module + * load once. + */ + up_read(&rdma_nl_types[type].sem); - cb_table = rdma_nl_types[type].cb_table; + request_module("rdma-netlink-subsys-%d", type); + down_read(&rdma_nl_types[type].sem); + cb_table = READ_ONCE(rdma_nl_types[type].cb_table); + } if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit)) - return false; - return true; + return NULL; + return cb_table; } void rdma_nl_register(unsigned int index, const struct rdma_nl_cbs cb_table[]) { - mutex_lock(&rdma_nl_mutex); - if (!is_nl_msg_valid(index, 0)) { - /* - * All clients are not interesting in success/failure of - * this call. They want to see the print to error log and - * continue their initialization. Print warning for them, - * because it is programmer's error to be here. - */ - mutex_unlock(&rdma_nl_mutex); - WARN(true, - "The not-valid %u index was supplied to RDMA netlink\n", - index); + if (WARN_ON(!is_nl_msg_valid(index, 0)) || + WARN_ON(READ_ONCE(rdma_nl_types[index].cb_table))) return; - } - - if (rdma_nl_types[index].cb_table) { - mutex_unlock(&rdma_nl_mutex); - WARN(true, - "The %u index is already registered in RDMA netlink\n", - index); - return; - } - rdma_nl_types[index].cb_table = cb_table; - mutex_unlock(&rdma_nl_mutex); + /* Pairs with the READ_ONCE in is_nl_valid() */ + smp_store_release(&rdma_nl_types[index].cb_table, cb_table); } EXPORT_SYMBOL(rdma_nl_register); void rdma_nl_unregister(unsigned int index) { - mutex_lock(&rdma_nl_mutex); + down_write(&rdma_nl_types[index].sem); rdma_nl_types[index].cb_table = NULL; - mutex_unlock(&rdma_nl_mutex); + up_write(&rdma_nl_types[index].sem); } EXPORT_SYMBOL(rdma_nl_unregister); @@ -170,15 +156,21 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, unsigned int index = RDMA_NL_GET_CLIENT(type); unsigned int op = RDMA_NL_GET_OP(type); const struct rdma_nl_cbs *cb_table; + int err = -EINVAL; - if (!is_nl_valid(skb, index, op)) + if (!is_nl_msg_valid(index, op)) return -EINVAL; - cb_table = rdma_nl_types[index].cb_table; + down_read(&rdma_nl_types[index].sem); + cb_table = get_cb_table(skb, index, op); + if (!cb_table) + goto done; if ((cb_table[op].flags & RDMA_NL_ADMIN_PERM) && - !netlink_capable(skb, CAP_NET_ADMIN)) - return -EPERM; + !netlink_capable(skb, CAP_NET_ADMIN)) { + err = -EPERM; + goto done; + } /* * LS responses overload the 0x100 (NLM_F_ROOT) flag. Don't @@ -186,8 +178,8 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, */ if (index == RDMA_NL_LS) { if (cb_table[op].doit) - return cb_table[op].doit(skb, nlh, extack); - return -EINVAL; + err = cb_table[op].doit(skb, nlh, extack); + goto done; } /* FIXME: Convert IWCM to properly handle doit callbacks */ if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_IWCM) { @@ -195,14 +187,15 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, .dump = cb_table[op].dump, }; if (c.dump) - return netlink_dump_start(skb->sk, skb, nlh, &c); - return -EINVAL; + err = netlink_dump_start(skb->sk, skb, nlh, &c); + goto done; } if (cb_table[op].doit) - return cb_table[op].doit(skb, nlh, extack); - - return 0; + err = cb_table[op].doit(skb, nlh, extack); +done: + up_read(&rdma_nl_types[index].sem); + return err; } /* @@ -263,9 +256,7 @@ skip: static void rdma_nl_rcv(struct sk_buff *skb) { - mutex_lock(&rdma_nl_mutex); rdma_nl_rcv_skb(skb, &rdma_nl_rcv_msg); - mutex_unlock(&rdma_nl_mutex); } int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid) @@ -297,6 +288,14 @@ int rdma_nl_multicast(struct net *net, struct sk_buff *skb, } EXPORT_SYMBOL(rdma_nl_multicast); +void rdma_nl_init(void) +{ + int idx; + + for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++) + init_rwsem(&rdma_nl_types[idx].sem); +} + void rdma_nl_exit(void) { int idx; diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 7a7474000100..c03af08b80e7 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -778,7 +778,7 @@ static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin, container_of(res, struct rdma_counter, res); if (port && port != counter->port) - return 0; + return -EAGAIN; /* Dump it even query failed */ rdma_counter_query_stats(counter); @@ -1230,7 +1230,7 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; - goto err; + goto err_get; } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, @@ -1787,10 +1787,6 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh, cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); - ret = rdma_counter_unbind_qpn(device, port, qpn, cntn); - if (ret) - goto err_unbind; - if (fill_nldev_handle(msg, device) || nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) || nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) || @@ -1799,13 +1795,15 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh, goto err_fill; } + ret = rdma_counter_unbind_qpn(device, port, qpn, cntn); + if (ret) + goto err_fill; + nlmsg_end(msg, nlh); ib_device_put(device); return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_fill: - rdma_counter_bind_qpn(device, port, qpn, cntn); -err_unbind: nlmsg_free(msg); err: ib_device_put(device); diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index 1ab423b19f77..6eb6d2717ca5 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -426,7 +426,7 @@ int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev) int ret; rdma_for_each_port (dev, i) { - is_ib = rdma_protocol_ib(dev, i++); + is_ib = rdma_protocol_ib(dev, i); if (is_ib) break; } diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index f67a30fda1ed..163ff7ba92b7 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -451,8 +451,10 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp) * that the hardware will not attempt to access the MR any more. */ if (!umem_odp->is_implicit_odp) { + mutex_lock(&umem_odp->umem_mutex); ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp)); + mutex_unlock(&umem_odp->umem_mutex); kvfree(umem_odp->dma_list); kvfree(umem_odp->page_list); } @@ -719,6 +721,8 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, u64 addr; struct ib_device *dev = umem_odp->umem.ibdev; + lockdep_assert_held(&umem_odp->umem_mutex); + virt = max_t(u64, virt, ib_umem_start(umem_odp)); bound = min_t(u64, bound, ib_umem_end(umem_odp)); /* Note that during the run of this function, the @@ -726,7 +730,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, * faults from completion. We might be racing with other * invalidations, so we must make sure we free each page only * once. */ - mutex_lock(&umem_odp->umem_mutex); for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) { idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift; if (umem_odp->page_list[idx]) { @@ -757,7 +760,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, umem_odp->npages--; } } - mutex_unlock(&umem_odp->umem_mutex); } EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages); diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 1e5aeb39f774..63f7f7db5902 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -98,7 +98,7 @@ ib_uverbs_init_udata_buf_or_null(struct ib_udata *udata, struct ib_uverbs_device { atomic_t refcount; - int num_comp_vectors; + u32 num_comp_vectors; struct completion comp; struct device dev; /* First group for device attributes, NULL terminated array */ diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index f974b6854224..35c2841a569e 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -662,16 +662,17 @@ static bool find_gid_index(const union ib_gid *gid, void *context) { struct find_gid_index_context *ctx = context; + u16 vlan_id = 0xffff; + int ret; if (ctx->gid_type != gid_attr->gid_type) return false; - if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) || - (is_vlan_dev(gid_attr->ndev) && - vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id)) + ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL); + if (ret) return false; - return true; + return ctx->vlan_id == vlan_id; } static const struct ib_gid_attr * diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index e87fc0408470..347dc242fb88 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -495,7 +495,6 @@ static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))); release_ep_resources(ep); - kfree_skb(skb); return 0; } @@ -506,7 +505,6 @@ static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))); c4iw_put_ep(&ep->parent_ep->com); release_ep_resources(ep); - kfree_skb(skb); return 0; } @@ -2424,20 +2422,6 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; pr_debug("ep %p tid %u\n", ep, ep->hwtid); - - skb_get(skb); - rpl = cplhdr(skb); - if (!is_t4(adapter_type)) { - skb_trim(skb, roundup(sizeof(*rpl5), 16)); - rpl5 = (void *)rpl; - INIT_TP_WR(rpl5, ep->hwtid); - } else { - skb_trim(skb, sizeof(*rpl)); - INIT_TP_WR(rpl, ep->hwtid); - } - OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, - ep->hwtid)); - cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, enable_tcp_timestamps && req->tcpopt.tstamp, (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); @@ -2483,6 +2467,20 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, if (tcph->ece && tcph->cwr) opt2 |= CCTRL_ECN_V(1); } + + skb_get(skb); + rpl = cplhdr(skb); + if (!is_t4(adapter_type)) { + skb_trim(skb, roundup(sizeof(*rpl5), 16)); + rpl5 = (void *)rpl; + INIT_TP_WR(rpl5, ep->hwtid); + } else { + skb_trim(skb, sizeof(*rpl)); + INIT_TP_WR(rpl, ep->hwtid); + } + OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, + ep->hwtid)); + if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) { u32 isn = (prandom_u32() & ~7UL) - 1; opt2 |= T5_OPT_2_VALID_F; diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index a8b9548bd1a2..599340c1f0b8 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -242,10 +242,13 @@ static void set_ep_sin6_addrs(struct c4iw_ep *ep, } } -static int dump_qp(struct c4iw_qp *qp, struct c4iw_debugfs_data *qpd) +static int dump_qp(unsigned long id, struct c4iw_qp *qp, + struct c4iw_debugfs_data *qpd) { int space; int cc; + if (id != qp->wq.sq.qid) + return 0; space = qpd->bufsize - qpd->pos - 1; if (space == 0) @@ -350,7 +353,7 @@ static int qp_open(struct inode *inode, struct file *file) xa_lock_irq(&qpd->devp->qps); xa_for_each(&qpd->devp->qps, index, qp) - dump_qp(qp, qpd); + dump_qp(index, qp, qpd); xa_unlock_irq(&qpd->devp->qps); qpd->buf[qpd->pos++] = 0; diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index aa772ee0706f..35c284af574d 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -275,13 +275,17 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, struct sk_buff *skb, struct c4iw_wr_wait *wr_waitp) { int err; - struct fw_ri_tpte tpt; + struct fw_ri_tpte *tpt; u32 stag_idx; static atomic_t key; if (c4iw_fatal_error(rdev)) return -EIO; + tpt = kmalloc(sizeof(*tpt), GFP_KERNEL); + if (!tpt) + return -ENOMEM; + stag_state = stag_state > 0; stag_idx = (*stag) >> 8; @@ -291,6 +295,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, mutex_lock(&rdev->stats.lock); rdev->stats.stag.fail++; mutex_unlock(&rdev->stats.lock); + kfree(tpt); return -ENOMEM; } mutex_lock(&rdev->stats.lock); @@ -305,28 +310,28 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, /* write TPT entry */ if (reset_tpt_entry) - memset(&tpt, 0, sizeof(tpt)); + memset(tpt, 0, sizeof(*tpt)); else { - tpt.valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F | + tpt->valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F | FW_RI_TPTE_STAGKEY_V((*stag & FW_RI_TPTE_STAGKEY_M)) | FW_RI_TPTE_STAGSTATE_V(stag_state) | FW_RI_TPTE_STAGTYPE_V(type) | FW_RI_TPTE_PDID_V(pdid)); - tpt.locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) | + tpt->locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) | (bind_enabled ? FW_RI_TPTE_MWBINDEN_F : 0) | FW_RI_TPTE_ADDRTYPE_V((zbva ? FW_RI_ZERO_BASED_TO : FW_RI_VA_BASED_TO))| FW_RI_TPTE_PS_V(page_size)); - tpt.nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32( + tpt->nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32( FW_RI_TPTE_PBLADDR_V(PBL_OFF(rdev, pbl_addr)>>3)); - tpt.len_lo = cpu_to_be32((u32)(len & 0xffffffffUL)); - tpt.va_hi = cpu_to_be32((u32)(to >> 32)); - tpt.va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL)); - tpt.dca_mwbcnt_pstag = cpu_to_be32(0); - tpt.len_hi = cpu_to_be32((u32)(len >> 32)); + tpt->len_lo = cpu_to_be32((u32)(len & 0xffffffffUL)); + tpt->va_hi = cpu_to_be32((u32)(to >> 32)); + tpt->va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL)); + tpt->dca_mwbcnt_pstag = cpu_to_be32(0); + tpt->len_hi = cpu_to_be32((u32)(len >> 32)); } err = write_adapter_mem(rdev, stag_idx + (rdev->lldi.vr->stag.start >> 5), - sizeof(tpt), &tpt, skb, wr_waitp); + sizeof(*tpt), tpt, skb, wr_waitp); if (reset_tpt_entry) { c4iw_put_resource(&rdev->resource.tpt_table, stag_idx); @@ -334,6 +339,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, rdev->stats.stag.cur -= 32; mutex_unlock(&rdev->stats.lock); } + kfree(tpt); return err; } diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index eb9368be28c1..bbcac539777a 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2737,15 +2737,11 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs, if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6) srq->flags = T4_SRQ_LIMIT_SUPPORT; - ret = xa_insert_irq(&rhp->qps, srq->wq.qid, srq, GFP_KERNEL); - if (ret) - goto err_free_queue; - if (udata) { srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL); if (!srq_key_mm) { ret = -ENOMEM; - goto err_remove_handle; + goto err_free_queue; } srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL); if (!srq_db_key_mm) { @@ -2789,8 +2785,6 @@ err_free_srq_db_key_mm: kfree(srq_db_key_mm); err_free_srq_key_mm: kfree(srq_key_mm); -err_remove_handle: - xa_erase_irq(&rhp->qps, srq->wq.qid); err_free_queue: free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, srq->wr_waitp); @@ -2813,8 +2807,6 @@ void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) rhp = srq->rhp; pr_debug("%s id %d\n", __func__, srq->wq.qid); - - xa_erase_irq(&rhp->qps, srq->wq.qid); ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, ibucontext); free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 2395fd4233a7..c61b6022575e 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -65,6 +65,7 @@ #define SDMA_DESCQ_CNT 2048 #define SDMA_DESC_INTR 64 #define INVALID_TAIL 0xffff +#define SDMA_PAD max_t(size_t, MAX_16B_PADDING, sizeof(u32)) static uint sdma_descq_cnt = SDMA_DESCQ_CNT; module_param(sdma_descq_cnt, uint, S_IRUGO); @@ -1296,7 +1297,7 @@ void sdma_clean(struct hfi1_devdata *dd, size_t num_engines) struct sdma_engine *sde; if (dd->sdma_pad_dma) { - dma_free_coherent(&dd->pcidev->dev, 4, + dma_free_coherent(&dd->pcidev->dev, SDMA_PAD, (void *)dd->sdma_pad_dma, dd->sdma_pad_phys); dd->sdma_pad_dma = NULL; @@ -1491,7 +1492,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) } /* Allocate memory for pad */ - dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, sizeof(u32), + dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, SDMA_PAD, &dd->sdma_pad_phys, GFP_KERNEL); if (!dd->sdma_pad_dma) { dd_dev_err(dd, "failed to allocate SendDMA pad memory\n"); @@ -1526,8 +1527,11 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) } ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params); - if (ret < 0) + if (ret < 0) { + kfree(tmp_sdma_rht); goto bail; + } + dd->sdma_rht = tmp_sdma_rht; dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma); diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index b4dcc4d29f84..f21fca3617d5 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -2736,11 +2736,6 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, diff = cmp_psn(psn, flow->flow_state.r_next_psn); if (diff > 0) { - if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) - restart_tid_rdma_read_req(rcd, - qp, - wqe); - /* Drop the packet.*/ goto s_unlock; } else if (diff < 0) { diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 7bff0a1e713d..089e201d7550 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -147,9 +147,6 @@ static int pio_wait(struct rvt_qp *qp, /* Length of buffer to create verbs txreq cache name */ #define TXREQ_NAME_LEN 24 -/* 16B trailing buffer */ -static const u8 trail_buf[MAX_16B_PADDING]; - static uint wss_threshold = 80; module_param(wss_threshold, uint, S_IRUGO); MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy"); @@ -820,8 +817,8 @@ static int build_verbs_tx_desc( /* add icrc, lt byte, and padding to flit */ if (extra_bytes) - ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq, - (void *)trail_buf, extra_bytes); + ret = sdma_txadd_daddr(sde->dd, &tx->txreq, + sde->dd->sdma_pad_phys, extra_bytes); bail_txadd: return ret; @@ -1089,7 +1086,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, } /* add icrc, lt byte, and padding to flit */ if (extra_bytes) - seg_pio_copy_mid(pbuf, trail_buf, extra_bytes); + seg_pio_copy_mid(pbuf, ppd->dd->sdma_pad_dma, + extra_bytes); seg_pio_copy_end(pbuf); } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 7a89d669f8bf..e82567fcdeb7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5389,9 +5389,9 @@ static void hns_roce_v2_free_eq(struct hns_roce_dev *hr_dev, return; } - if (eq->buf_list) - dma_free_coherent(hr_dev->dev, buf_chk_sz, - eq->buf_list->buf, eq->buf_list->map); + dma_free_coherent(hr_dev->dev, buf_chk_sz, eq->buf_list->buf, + eq->buf_list->map); + kfree(eq->buf_list); } static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 8056930bbe2c..cd9ee1664a69 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2773,6 +2773,10 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev) return -ENOMEM; iwibdev = iwdev->iwibdev; rdma_set_device_sysfs_group(&iwibdev->ibdev, &i40iw_attr_group); + ret = ib_device_set_netdev(&iwibdev->ibdev, iwdev->netdev, 1); + if (ret) + goto error; + ret = ib_register_device(&iwibdev->ibdev, "i40iw%d"); if (ret) goto error; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 59022b744144..d609f4659afb 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1298,29 +1298,6 @@ static int devx_handle_mkey_create(struct mlx5_ib_dev *dev, return 0; } -static void devx_free_indirect_mkey(struct rcu_head *rcu) -{ - kfree(container_of(rcu, struct devx_obj, devx_mr.rcu)); -} - -/* This function to delete from the radix tree needs to be called before - * destroying the underlying mkey. Otherwise a race might occur in case that - * other thread will get the same mkey before this one will be deleted, - * in that case it will fail via inserting to the tree its own data. - * - * Note: - * An error in the destroy is not expected unless there is some other indirect - * mkey which points to this one. In a kernel cleanup flow it will be just - * destroyed in the iterative destruction call. In a user flow, in case - * the application didn't close in the expected order it's its own problem, - * the mkey won't be part of the tree, in both cases the kernel is safe. - */ -static void devx_cleanup_mkey(struct devx_obj *obj) -{ - xa_erase(&obj->ib_dev->mdev->priv.mkey_table, - mlx5_base_mkey(obj->devx_mr.mmkey.key)); -} - static void devx_cleanup_subscription(struct mlx5_ib_dev *dev, struct devx_event_subscription *sub) { @@ -1362,8 +1339,16 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, int ret; dev = mlx5_udata_to_mdev(&attrs->driver_udata); - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) - devx_cleanup_mkey(obj); + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { + /* + * The pagefault_single_data_segment() does commands against + * the mmkey, we must wait for that to stop before freeing the + * mkey, as another allocation could get the same mkey #. + */ + xa_erase(&obj->ib_dev->mdev->priv.mkey_table, + mlx5_base_mkey(obj->devx_mr.mmkey.key)); + synchronize_srcu(&dev->mr_srcu); + } if (obj->flags & DEVX_OBJ_FLAGS_DCT) ret = mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct); @@ -1382,12 +1367,6 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, devx_cleanup_subscription(dev, sub_entry); mutex_unlock(&devx_event_table->event_xa_lock); - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { - call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu, - devx_free_indirect_mkey); - return ret; - } - kfree(obj); return ret; } @@ -1491,26 +1470,21 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( &obj_id); WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32)); - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { - err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out); - if (err) - goto obj_destroy; - } - err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len); if (err) - goto err_copy; + goto obj_destroy; if (opcode == MLX5_CMD_OP_CREATE_GENERAL_OBJECT) obj_type = MLX5_GET(general_obj_in_cmd_hdr, cmd_in, obj_type); - obj->obj_id = get_enc_obj_id(opcode | obj_type << 16, obj_id); + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { + err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out); + if (err) + goto obj_destroy; + } return 0; -err_copy: - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) - devx_cleanup_mkey(obj); obj_destroy: if (obj->flags & DEVX_OBJ_FLAGS_DCT) mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 2ceaef3ea3fb..1a98ee2e01c4 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -606,7 +606,7 @@ struct mlx5_ib_mr { struct mlx5_ib_dev *dev; u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; struct mlx5_core_sig_ctx *sig; - int live; + unsigned int live; void *descs_alloc; int access_flags; /* Needed for rereg MR */ @@ -639,7 +639,6 @@ struct mlx5_ib_mw { struct mlx5_ib_devx_mr { struct mlx5_core_mkey mmkey; int ndescs; - struct rcu_head rcu; }; struct mlx5_ib_umr_context { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 1eff031ef048..7019c12005f4 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -84,32 +84,6 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); } -static void update_odp_mr(struct mlx5_ib_mr *mr) -{ - if (is_odp_mr(mr)) { - /* - * This barrier prevents the compiler from moving the - * setting of umem->odp_data->private to point to our - * MR, before reg_umr finished, to ensure that the MR - * initialization have finished before starting to - * handle invalidations. - */ - smp_wmb(); - to_ib_umem_odp(mr->umem)->private = mr; - /* - * Make sure we will see the new - * umem->odp_data->private value in the invalidation - * routines, before we can get page faults on the - * MR. Page faults can happen once we put the MR in - * the tree, below this line. Without the barrier, - * there can be a fault handling and an invalidation - * before umem->odp_data->private == mr is visible to - * the invalidation handler. - */ - smp_wmb(); - } -} - static void reg_mr_callback(int status, struct mlx5_async_work *context) { struct mlx5_ib_mr *mr = @@ -1346,8 +1320,6 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->umem = umem; set_mr_fields(dev, mr, npages, length, access_flags); - update_odp_mr(mr); - if (use_umr) { int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; @@ -1363,10 +1335,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } } - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - mr->live = 1; + if (is_odp_mr(mr)) { + to_ib_umem_odp(mr->umem)->private = mr; atomic_set(&mr->num_pending_prefetch, 0); } + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + smp_store_release(&mr->live, 1); return &mr->ibmr; error: @@ -1441,6 +1415,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, if (!mr->umem) return -EINVAL; + if (is_odp_mr(mr)) + return -EOPNOTSUPP; + if (flags & IB_MR_REREG_TRANS) { addr = virt_addr; len = length; @@ -1486,8 +1463,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, } mr->allocated_from_cache = 0; - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) - mr->live = 1; } else { /* * Send a UMR WQE @@ -1516,7 +1491,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, set_mr_fields(dev, mr, npages, len, access_flags); - update_odp_mr(mr); return 0; err: @@ -1607,15 +1581,16 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) /* Prevent new page faults and * prefetch requests from succeeding */ - mr->live = 0; + WRITE_ONCE(mr->live, 0); + + /* Wait for all running page-fault handlers to finish. */ + synchronize_srcu(&dev->mr_srcu); /* dequeue pending prefetch requests for the mr */ if (atomic_read(&mr->num_pending_prefetch)) flush_workqueue(system_unbound_wq); WARN_ON(atomic_read(&mr->num_pending_prefetch)); - /* Wait for all running page-fault handlers to finish. */ - synchronize_srcu(&dev->mr_srcu); /* Destroy all page mappings */ if (!umem_odp->is_implicit_odp) mlx5_ib_invalidate_range(umem_odp, @@ -1987,14 +1962,25 @@ free: int mlx5_ib_dealloc_mw(struct ib_mw *mw) { + struct mlx5_ib_dev *dev = to_mdev(mw->device); struct mlx5_ib_mw *mmw = to_mmw(mw); int err; - err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev, - &mmw->mmkey); - if (!err) - kfree(mmw); - return err; + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { + xa_erase_irq(&dev->mdev->priv.mkey_table, + mlx5_base_mkey(mmw->mmkey.key)); + /* + * pagefault_single_data_segment() may be accessing mmw under + * SRCU if the user bound an ODP MR to this MW. + */ + synchronize_srcu(&dev->mr_srcu); + } + + err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey); + if (err) + return err; + kfree(mmw); + return 0; } int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 2e9b43061797..3f9478d19376 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -178,6 +178,29 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, return; } + /* + * The locking here is pretty subtle. Ideally the implicit children + * list would be protected by the umem_mutex, however that is not + * possible. Instead this uses a weaker update-then-lock pattern: + * + * srcu_read_lock() + * <change children list> + * mutex_lock(umem_mutex) + * mlx5_ib_update_xlt() + * mutex_unlock(umem_mutex) + * destroy lkey + * + * ie any change the children list must be followed by the locked + * update_xlt before destroying. + * + * The umem_mutex provides the acquire/release semantic needed to make + * the children list visible to a racing thread. While SRCU is not + * technically required, using it gives consistent use of the SRCU + * locking around the children list. + */ + lockdep_assert_held(&to_ib_umem_odp(mr->umem)->umem_mutex); + lockdep_assert_held(&mr->dev->mr_srcu); + odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE, nentries * MLX5_IMR_MTT_SIZE, mr); @@ -202,15 +225,22 @@ static void mr_leaf_free_action(struct work_struct *work) struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work); int idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent; + struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); + int srcu_key; mr->parent = NULL; synchronize_srcu(&mr->dev->mr_srcu); - ib_umem_odp_release(odp); - if (imr->live) + if (smp_load_acquire(&imr->live)) { + srcu_key = srcu_read_lock(&mr->dev->mr_srcu); + mutex_lock(&odp_imr->umem_mutex); mlx5_ib_update_xlt(imr, idx, 1, 0, MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); + mutex_unlock(&odp_imr->umem_mutex); + srcu_read_unlock(&mr->dev->mr_srcu, srcu_key); + } + ib_umem_odp_release(odp); mlx5_mr_cache_free(mr->dev, mr); if (atomic_dec_and_test(&imr->num_leaf_free)) @@ -278,7 +308,6 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, idx - blk_start_idx + 1, 0, MLX5_IB_UPD_XLT_ZAP | MLX5_IB_UPD_XLT_ATOMIC); - mutex_unlock(&umem_odp->umem_mutex); /* * We are now sure that the device will not access the * memory. We can safely unmap it, and mark it as dirty if @@ -289,10 +318,12 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, if (unlikely(!umem_odp->npages && mr->parent && !umem_odp->dying)) { - WRITE_ONCE(umem_odp->dying, 1); + WRITE_ONCE(mr->live, 0); + umem_odp->dying = 1; atomic_inc(&mr->parent->num_leaf_free); schedule_work(&umem_odp->work); } + mutex_unlock(&umem_odp->umem_mutex); } void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) @@ -429,8 +460,6 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; - mr->live = 1; - mlx5_ib_dbg(dev, "key %x dev %p mr %p\n", mr->mmkey.key, dev->mdev, mr); @@ -484,6 +513,8 @@ next_mr: mtt->parent = mr; INIT_WORK(&odp->work, mr_leaf_free_action); + smp_store_release(&mtt->live, 1); + if (!nentries) start_idx = addr >> MLX5_IMR_MTT_SHIFT; nentries++; @@ -536,6 +567,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, init_waitqueue_head(&imr->q_leaf_free); atomic_set(&imr->num_leaf_free, 0); atomic_set(&imr->num_pending_prefetch, 0); + smp_store_release(&imr->live, 1); return imr; } @@ -555,15 +587,19 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) if (mr->parent != imr) continue; + mutex_lock(&umem_odp->umem_mutex); ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp)); - if (umem_odp->dying) + if (umem_odp->dying) { + mutex_unlock(&umem_odp->umem_mutex); continue; + } - WRITE_ONCE(umem_odp->dying, 1); + umem_odp->dying = 1; atomic_inc(&imr->num_leaf_free); schedule_work(&umem_odp->work); + mutex_unlock(&umem_odp->umem_mutex); } up_read(&per_mm->umem_rwsem); @@ -773,7 +809,7 @@ next_mr: switch (mmkey->type) { case MLX5_MKEY_MR: mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - if (!mr->live || !mr->ibmr.pd) { + if (!smp_load_acquire(&mr->live) || !mr->ibmr.pd) { mlx5_ib_dbg(dev, "got dead MR\n"); ret = -EFAULT; goto srcu_unlock; @@ -1641,12 +1677,12 @@ static bool num_pending_prefetch_inc(struct ib_pd *pd, mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - if (mr->ibmr.pd != pd) { + if (!smp_load_acquire(&mr->live)) { ret = false; break; } - if (!mr->live) { + if (mr->ibmr.pd != pd) { ret = false; break; } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 8937d72ddcf6..5fd071c05944 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3249,10 +3249,12 @@ static int modify_raw_packet_qp_sq( } /* Only remove the old rate after new rate was set */ - if ((old_rl.rate && - !mlx5_rl_are_equal(&old_rl, &new_rl)) || - (new_state != MLX5_SQC_STATE_RDY)) + if ((old_rl.rate && !mlx5_rl_are_equal(&old_rl, &new_rl)) || + (new_state != MLX5_SQC_STATE_RDY)) { mlx5_rl_remove_rate(dev, &old_rl); + if (new_state != MLX5_SQC_STATE_RDY) + memset(&new_rl, 0, sizeof(new_rl)); + } ibqp->rl = new_rl; sq->state = new_state; diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 5136b835e1ba..dc71b6e16a07 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -76,7 +76,7 @@ static void qedr_get_dev_fw_str(struct ib_device *ibdev, char *str) struct qedr_dev *qedr = get_qedr_dev(ibdev); u32 fw_ver = (u32)qedr->attr.fw_ver; - snprintf(str, IB_FW_VERSION_NAME_MAX, "%d. %d. %d. %d", + snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d.%d", (fw_ver >> 24) & 0xFF, (fw_ver >> 16) & 0xFF, (fw_ver >> 8) & 0xFF, fw_ver & 0xFF); } diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index 6cac0c88cf39..36cdfbdbd325 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -230,8 +230,6 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq) pvrdma_page_dir_cleanup(dev, &srq->pdir); - kfree(srq); - atomic_dec(&dev->num_srqs); } diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index 430314c8abd9..b4317480cee7 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -182,12 +182,19 @@ void siw_qp_llp_close(struct siw_qp *qp) */ void siw_qp_llp_write_space(struct sock *sk) { - struct siw_cep *cep = sk_to_cep(sk); + struct siw_cep *cep; - cep->sk_write_space(sk); + read_lock(&sk->sk_callback_lock); + + cep = sk_to_cep(sk); + if (cep) { + cep->sk_write_space(sk); - if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) - (void)siw_sq_start(cep->qp); + if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) + (void)siw_sq_start(cep->qp); + } + + read_unlock(&sk->sk_callback_lock); } static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size) @@ -1305,6 +1312,7 @@ int siw_qp_add(struct siw_device *sdev, struct siw_qp *qp) void siw_free_qp(struct kref *ref) { struct siw_qp *found, *qp = container_of(ref, struct siw_qp, ref); + struct siw_base_qp *siw_base_qp = to_siw_base_qp(qp->ib_qp); struct siw_device *sdev = qp->sdev; unsigned long flags; @@ -1327,4 +1335,5 @@ void siw_free_qp(struct kref *ref) atomic_dec(&sdev->num_qp); siw_dbg_qp(qp, "free QP\n"); kfree_rcu(qp, rcu); + kfree(siw_base_qp); } diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 869e02b69a01..b18a677832e1 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -604,7 +604,6 @@ out: int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata) { struct siw_qp *qp = to_siw_qp(base_qp); - struct siw_base_qp *siw_base_qp = to_siw_base_qp(base_qp); struct siw_ucontext *uctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); @@ -641,7 +640,6 @@ int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata) qp->scq = qp->rcq = NULL; siw_qp_put(qp); - kfree(siw_base_qp); return 0; } |