summaryrefslogtreecommitdiff
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/addr.c11
-rw-r--r--drivers/infiniband/core/cm.c126
-rw-r--r--drivers/infiniband/core/cma.c77
-rw-r--r--drivers/infiniband/core/core_priv.h9
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c38
-rw-r--r--drivers/infiniband/core/umem.c2
-rw-r--r--drivers/infiniband/core/umem_odp.c2
-rw-r--r--drivers/infiniband/core/uverbs_main.c7
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c17
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c1
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h2
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c12
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c20
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c72
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.h4
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c27
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h3
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c37
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c19
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h89
-rw-r--r--drivers/infiniband/hw/hfi1/init.c104
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c3
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c13
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c2
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c19
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c25
-rw-r--r--drivers/infiniband/hw/hfi1/trace_rx.h60
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c2
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c5
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c5
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c3
-rw-r--r--drivers/infiniband/hw/mlx5/main.c25
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h2
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c6
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c13
-rw-r--r--drivers/infiniband/hw/nes/nes.c1
-rw-r--r--drivers/infiniband/hw/nes/nes.h4
-rw-r--r--drivers/infiniband/hw/nes/nes_nic.c10
-rw-r--r--drivers/infiniband/hw/qedr/Kconfig1
-rw-r--r--drivers/infiniband/sw/rdmavt/dma.c3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c8
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.c9
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c22
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h20
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c15
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c12
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c92
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c6
50 files changed, 547 insertions, 522 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index b136d3acc5bd..0f58f46dbad7 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -699,13 +699,16 @@ EXPORT_SYMBOL(rdma_addr_cancel);
struct resolve_cb_context {
struct rdma_dev_addr *addr;
struct completion comp;
+ int status;
};
static void resolve_cb(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context)
{
- memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
- rdma_dev_addr));
+ if (!status)
+ memcpy(((struct resolve_cb_context *)context)->addr,
+ addr, sizeof(struct rdma_dev_addr));
+ ((struct resolve_cb_context *)context)->status = status;
complete(&((struct resolve_cb_context *)context)->comp);
}
@@ -743,6 +746,10 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
wait_for_completion(&ctx.comp);
+ ret = ctx.status;
+ if (ret)
+ return ret;
+
memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
if (!dev)
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 0d4b114b4011..cf1edfa1cbac 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -128,6 +128,8 @@ static struct ib_cm {
__be32 random_id_operand;
struct list_head timewait_list;
struct workqueue_struct *wq;
+ /* Sync on cm change port state */
+ spinlock_t state_lock;
} cm;
/* Counter indexes ordered by attribute ID */
@@ -209,6 +211,8 @@ struct cm_port {
struct ib_mad_agent *mad_agent;
struct kobject port_obj;
u8 port_num;
+ struct list_head cm_priv_prim_list;
+ struct list_head cm_priv_altr_list;
struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
};
@@ -289,6 +293,12 @@ struct cm_id_private {
u8 service_timeout;
u8 target_ack_delay;
+ struct list_head prim_list;
+ struct list_head altr_list;
+ /* Indicates that the send port mad is registered and av is set */
+ int prim_send_port_not_ready;
+ int altr_send_port_not_ready;
+
struct list_head work_list;
atomic_t work_count;
};
@@ -307,20 +317,47 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
struct ib_mad_agent *mad_agent;
struct ib_mad_send_buf *m;
struct ib_ah *ah;
+ struct cm_av *av;
+ unsigned long flags, flags2;
+ int ret = 0;
+ /* don't let the port to be released till the agent is down */
+ spin_lock_irqsave(&cm.state_lock, flags2);
+ spin_lock_irqsave(&cm.lock, flags);
+ if (!cm_id_priv->prim_send_port_not_ready)
+ av = &cm_id_priv->av;
+ else if (!cm_id_priv->altr_send_port_not_ready &&
+ (cm_id_priv->alt_av.port))
+ av = &cm_id_priv->alt_av;
+ else {
+ pr_info("%s: not valid CM id\n", __func__);
+ ret = -ENODEV;
+ spin_unlock_irqrestore(&cm.lock, flags);
+ goto out;
+ }
+ spin_unlock_irqrestore(&cm.lock, flags);
+ /* Make sure the port haven't released the mad yet */
mad_agent = cm_id_priv->av.port->mad_agent;
- ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr);
- if (IS_ERR(ah))
- return PTR_ERR(ah);
+ if (!mad_agent) {
+ pr_info("%s: not a valid MAD agent\n", __func__);
+ ret = -ENODEV;
+ goto out;
+ }
+ ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr);
+ if (IS_ERR(ah)) {
+ ret = PTR_ERR(ah);
+ goto out;
+ }
m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
- cm_id_priv->av.pkey_index,
+ av->pkey_index,
0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
GFP_ATOMIC,
IB_MGMT_BASE_VERSION);
if (IS_ERR(m)) {
ib_destroy_ah(ah);
- return PTR_ERR(m);
+ ret = PTR_ERR(m);
+ goto out;
}
/* Timeout set by caller if response is expected. */
@@ -330,7 +367,10 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
atomic_inc(&cm_id_priv->refcount);
m->context[0] = cm_id_priv;
*msg = m;
- return 0;
+
+out:
+ spin_unlock_irqrestore(&cm.state_lock, flags2);
+ return ret;
}
static int cm_alloc_response_msg(struct cm_port *port,
@@ -400,7 +440,8 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
grh, &av->ah_attr);
}
-static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
+static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av,
+ struct cm_id_private *cm_id_priv)
{
struct cm_device *cm_dev;
struct cm_port *port = NULL;
@@ -435,7 +476,17 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
&av->ah_attr);
av->timeout = path->packet_life_time + 1;
- return 0;
+ spin_lock_irqsave(&cm.lock, flags);
+ if (&cm_id_priv->av == av)
+ list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
+ else if (&cm_id_priv->alt_av == av)
+ list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
+ else
+ ret = -EINVAL;
+
+ spin_unlock_irqrestore(&cm.lock, flags);
+
+ return ret;
}
static int cm_alloc_id(struct cm_id_private *cm_id_priv)
@@ -725,6 +776,8 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
spin_lock_init(&cm_id_priv->lock);
init_completion(&cm_id_priv->comp);
INIT_LIST_HEAD(&cm_id_priv->work_list);
+ INIT_LIST_HEAD(&cm_id_priv->prim_list);
+ INIT_LIST_HEAD(&cm_id_priv->altr_list);
atomic_set(&cm_id_priv->work_count, -1);
atomic_set(&cm_id_priv->refcount, 1);
return &cm_id_priv->id;
@@ -940,6 +993,15 @@ retest:
break;
}
+ spin_lock_irq(&cm.lock);
+ if (!list_empty(&cm_id_priv->altr_list) &&
+ (!cm_id_priv->altr_send_port_not_ready))
+ list_del(&cm_id_priv->altr_list);
+ if (!list_empty(&cm_id_priv->prim_list) &&
+ (!cm_id_priv->prim_send_port_not_ready))
+ list_del(&cm_id_priv->prim_list);
+ spin_unlock_irq(&cm.lock);
+
cm_free_id(cm_id->local_id);
cm_deref_id(cm_id_priv);
wait_for_completion(&cm_id_priv->comp);
@@ -1240,12 +1302,13 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
goto out;
}
- ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av);
+ ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av,
+ cm_id_priv);
if (ret)
goto error1;
if (param->alternate_path) {
ret = cm_init_av_by_path(param->alternate_path,
- &cm_id_priv->alt_av);
+ &cm_id_priv->alt_av, cm_id_priv);
if (ret)
goto error1;
}
@@ -1710,7 +1773,8 @@ static int cm_req_handler(struct cm_work *work)
dev_put(gid_attr.ndev);
}
work->path[0].gid_type = gid_attr.gid_type;
- ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
+ ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
+ cm_id_priv);
}
if (ret) {
int err = ib_get_cached_gid(work->port->cm_dev->ib_device,
@@ -1729,7 +1793,8 @@ static int cm_req_handler(struct cm_work *work)
goto rejected;
}
if (req_msg->alt_local_lid) {
- ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av);
+ ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av,
+ cm_id_priv);
if (ret) {
ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
&work->path[0].sgid,
@@ -2797,7 +2862,8 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id,
goto out;
}
- ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av);
+ ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av,
+ cm_id_priv);
if (ret)
goto out;
cm_id_priv->alt_av.timeout =
@@ -2909,7 +2975,8 @@ static int cm_lap_handler(struct cm_work *work)
cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
work->mad_recv_wc->recv_buf.grh,
&cm_id_priv->av);
- cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av);
+ cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
+ cm_id_priv);
ret = atomic_inc_and_test(&cm_id_priv->work_count);
if (!ret)
list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -3101,7 +3168,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
return -EINVAL;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- ret = cm_init_av_by_path(param->path, &cm_id_priv->av);
+ ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv);
if (ret)
goto out;
@@ -3538,7 +3605,9 @@ out:
static int cm_migrate(struct ib_cm_id *cm_id)
{
struct cm_id_private *cm_id_priv;
+ struct cm_av tmp_av;
unsigned long flags;
+ int tmp_send_port_not_ready;
int ret = 0;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
@@ -3547,7 +3616,14 @@ static int cm_migrate(struct ib_cm_id *cm_id)
(cm_id->lap_state == IB_CM_LAP_UNINIT ||
cm_id->lap_state == IB_CM_LAP_IDLE)) {
cm_id->lap_state = IB_CM_LAP_IDLE;
+ /* Swap address vector */
+ tmp_av = cm_id_priv->av;
cm_id_priv->av = cm_id_priv->alt_av;
+ cm_id_priv->alt_av = tmp_av;
+ /* Swap port send ready state */
+ tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready;
+ cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready;
+ cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready;
} else
ret = -EINVAL;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -3958,6 +4034,9 @@ static void cm_add_one(struct ib_device *ib_device)
port->cm_dev = cm_dev;
port->port_num = i;
+ INIT_LIST_HEAD(&port->cm_priv_prim_list);
+ INIT_LIST_HEAD(&port->cm_priv_altr_list);
+
ret = cm_create_port_fs(port);
if (ret)
goto error1;
@@ -4015,6 +4094,8 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
{
struct cm_device *cm_dev = client_data;
struct cm_port *port;
+ struct cm_id_private *cm_id_priv;
+ struct ib_mad_agent *cur_mad_agent;
struct ib_port_modify port_modify = {
.clr_port_cap_mask = IB_PORT_CM_SUP
};
@@ -4038,15 +4119,27 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
port = cm_dev->port[i-1];
ib_modify_port(ib_device, port->port_num, 0, &port_modify);
+ /* Mark all the cm_id's as not valid */
+ spin_lock_irq(&cm.lock);
+ list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list)
+ cm_id_priv->altr_send_port_not_ready = 1;
+ list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list)
+ cm_id_priv->prim_send_port_not_ready = 1;
+ spin_unlock_irq(&cm.lock);
/*
* We flush the queue here after the going_down set, this
* verify that no new works will be queued in the recv handler,
* after that we can call the unregister_mad_agent
*/
flush_workqueue(cm.wq);
- ib_unregister_mad_agent(port->mad_agent);
+ spin_lock_irq(&cm.state_lock);
+ cur_mad_agent = port->mad_agent;
+ port->mad_agent = NULL;
+ spin_unlock_irq(&cm.state_lock);
+ ib_unregister_mad_agent(cur_mad_agent);
cm_remove_port_fs(port);
}
+
device_unregister(cm_dev->device);
kfree(cm_dev);
}
@@ -4059,6 +4152,7 @@ static int __init ib_cm_init(void)
INIT_LIST_HEAD(&cm.device_list);
rwlock_init(&cm.device_lock);
spin_lock_init(&cm.lock);
+ spin_lock_init(&cm.state_lock);
cm.listen_service_table = RB_ROOT;
cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
cm.remote_id_table = RB_ROOT;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 8c30e3dedebe..e7dcfac877ca 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -159,7 +159,7 @@ static LIST_HEAD(dev_list);
static LIST_HEAD(listen_any_list);
static DEFINE_MUTEX(lock);
static struct workqueue_struct *cma_wq;
-static int cma_pernet_id;
+static unsigned int cma_pernet_id;
struct cma_pernet {
struct idr tcp_ps;
@@ -1137,47 +1137,47 @@ static void cma_save_ib_info(struct sockaddr *src_addr,
}
}
-static void cma_save_ip4_info(struct sockaddr *src_addr,
- struct sockaddr *dst_addr,
+static void cma_save_ip4_info(struct sockaddr_in *src_addr,
+ struct sockaddr_in *dst_addr,
struct cma_hdr *hdr,
__be16 local_port)
{
- struct sockaddr_in *ip4;
-
if (src_addr) {
- ip4 = (struct sockaddr_in *)src_addr;
- ip4->sin_family = AF_INET;
- ip4->sin_addr.s_addr = hdr->dst_addr.ip4.addr;
- ip4->sin_port = local_port;
+ *src_addr = (struct sockaddr_in) {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = hdr->dst_addr.ip4.addr,
+ .sin_port = local_port,
+ };
}
if (dst_addr) {
- ip4 = (struct sockaddr_in *)dst_addr;
- ip4->sin_family = AF_INET;
- ip4->sin_addr.s_addr = hdr->src_addr.ip4.addr;
- ip4->sin_port = hdr->port;
+ *dst_addr = (struct sockaddr_in) {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = hdr->src_addr.ip4.addr,
+ .sin_port = hdr->port,
+ };
}
}
-static void cma_save_ip6_info(struct sockaddr *src_addr,
- struct sockaddr *dst_addr,
+static void cma_save_ip6_info(struct sockaddr_in6 *src_addr,
+ struct sockaddr_in6 *dst_addr,
struct cma_hdr *hdr,
__be16 local_port)
{
- struct sockaddr_in6 *ip6;
-
if (src_addr) {
- ip6 = (struct sockaddr_in6 *)src_addr;
- ip6->sin6_family = AF_INET6;
- ip6->sin6_addr = hdr->dst_addr.ip6;
- ip6->sin6_port = local_port;
+ *src_addr = (struct sockaddr_in6) {
+ .sin6_family = AF_INET6,
+ .sin6_addr = hdr->dst_addr.ip6,
+ .sin6_port = local_port,
+ };
}
if (dst_addr) {
- ip6 = (struct sockaddr_in6 *)dst_addr;
- ip6->sin6_family = AF_INET6;
- ip6->sin6_addr = hdr->src_addr.ip6;
- ip6->sin6_port = hdr->port;
+ *dst_addr = (struct sockaddr_in6) {
+ .sin6_family = AF_INET6,
+ .sin6_addr = hdr->src_addr.ip6,
+ .sin6_port = hdr->port,
+ };
}
}
@@ -1202,10 +1202,12 @@ static int cma_save_ip_info(struct sockaddr *src_addr,
switch (cma_get_ip_ver(hdr)) {
case 4:
- cma_save_ip4_info(src_addr, dst_addr, hdr, port);
+ cma_save_ip4_info((struct sockaddr_in *)src_addr,
+ (struct sockaddr_in *)dst_addr, hdr, port);
break;
case 6:
- cma_save_ip6_info(src_addr, dst_addr, hdr, port);
+ cma_save_ip6_info((struct sockaddr_in6 *)src_addr,
+ (struct sockaddr_in6 *)dst_addr, hdr, port);
break;
default:
return -EAFNOSUPPORT;
@@ -2479,6 +2481,18 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos)
return 0;
}
+static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type,
+ unsigned long supported_gids,
+ enum ib_gid_type default_gid)
+{
+ if ((network_type == RDMA_NETWORK_IPV4 ||
+ network_type == RDMA_NETWORK_IPV6) &&
+ test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids))
+ return IB_GID_TYPE_ROCE_UDP_ENCAP;
+
+ return default_gid;
+}
+
static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
{
struct rdma_route *route = &id_priv->id.route;
@@ -2504,6 +2518,8 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
route->num_paths = 1;
if (addr->dev_addr.bound_dev_if) {
+ unsigned long supported_gids;
+
ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
if (!ndev) {
ret = -ENODEV;
@@ -2527,7 +2543,12 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
route->path_rec->net = &init_net;
route->path_rec->ifindex = ndev->ifindex;
- route->path_rec->gid_type = id_priv->gid_type;
+ supported_gids = roce_gid_type_mask_support(id_priv->id.device,
+ id_priv->id.port_num);
+ route->path_rec->gid_type =
+ cma_route_gid_type(addr->dev_addr.network,
+ supported_gids,
+ id_priv->gid_type);
}
if (!ndev) {
ret = -ENODEV;
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 1acc95b3aaa3..d29372624f3a 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -124,14 +124,7 @@ void ib_cache_release_one(struct ib_device *device);
static inline bool rdma_is_upper_dev_rcu(struct net_device *dev,
struct net_device *upper)
{
- struct net_device *_upper = NULL;
- struct list_head *iter;
-
- netdev_for_each_all_upper_dev_rcu(dev, _upper, iter)
- if (_upper == upper)
- break;
-
- return _upper == upper;
+ return netdev_has_upper_dev_all_rcu(dev, upper);
}
int addr_init(void);
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index c86ddcea7675..0621f4455732 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -434,6 +434,26 @@ static void callback_for_addr_gid_device_scan(struct ib_device *device,
&parsed->gid_attr);
}
+struct upper_list {
+ struct list_head list;
+ struct net_device *upper;
+};
+
+static int netdev_upper_walk(struct net_device *upper, void *data)
+{
+ struct upper_list *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
+ struct list_head *upper_list = data;
+
+ if (!entry)
+ return 0;
+
+ list_add_tail(&entry->list, upper_list);
+ dev_hold(upper);
+ entry->upper = upper;
+
+ return 0;
+}
+
static void handle_netdev_upper(struct ib_device *ib_dev, u8 port,
void *cookie,
void (*handle_netdev)(struct ib_device *ib_dev,
@@ -441,28 +461,12 @@ static void handle_netdev_upper(struct ib_device *ib_dev, u8 port,
struct net_device *ndev))
{
struct net_device *ndev = (struct net_device *)cookie;
- struct upper_list {
- struct list_head list;
- struct net_device *upper;
- };
- struct net_device *upper;
- struct list_head *iter;
struct upper_list *upper_iter;
struct upper_list *upper_temp;
LIST_HEAD(upper_list);
rcu_read_lock();
- netdev_for_each_all_upper_dev_rcu(ndev, upper, iter) {
- struct upper_list *entry = kmalloc(sizeof(*entry),
- GFP_ATOMIC);
-
- if (!entry)
- continue;
-
- list_add_tail(&entry->list, &upper_list);
- dev_hold(upper);
- entry->upper = upper;
- }
+ netdev_walk_all_upper_dev_rcu(ndev, netdev_upper_walk, &upper_list);
rcu_read_unlock();
handle_netdev(ib_dev, port, ndev);
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 0120e7ff449f..1e62a5f0cb28 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -175,7 +175,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
cur_base = addr & PAGE_MASK;
- if (npages == 0) {
+ if (npages == 0 || npages > UINT_MAX) {
ret = -EINVAL;
goto out;
}
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 1f0fe3217f23..6b079a31dced 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -578,7 +578,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
*/
npages = get_user_pages_remote(owning_process, owning_mm,
user_virt, gup_num_pages,
- flags, local_page_list, NULL);
+ flags, local_page_list, NULL, NULL);
up_read(&owning_mm->mmap_sem);
if (npages < 0)
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 257d0799b526..813593550c4b 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -263,12 +263,9 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
container_of(uobj, struct ib_uqp_object, uevent.uobject);
idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
- if (qp != qp->real_qp) {
- ib_close_qp(qp);
- } else {
+ if (qp == qp->real_qp)
ib_uverbs_detach_umcast(qp, uqp);
- ib_destroy_qp(qp);
- }
+ ib_destroy_qp(qp);
ib_uverbs_release_uevent(file, &uqp->uevent);
kfree(uqp);
}
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 867b8cf82be8..19c6477af19f 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -666,18 +666,6 @@ skip_cqe:
return ret;
}
-static void invalidate_mr(struct c4iw_dev *rhp, u32 rkey)
-{
- struct c4iw_mr *mhp;
- unsigned long flags;
-
- spin_lock_irqsave(&rhp->lock, flags);
- mhp = get_mhp(rhp, rkey >> 8);
- if (mhp)
- mhp->attr.state = 0;
- spin_unlock_irqrestore(&rhp->lock, flags);
-}
-
/*
* Get one cq entry from c4iw and map it to openib.
*
@@ -733,7 +721,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) {
wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe);
wc->wc_flags |= IB_WC_WITH_INVALIDATE;
- invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey);
+ c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey);
}
} else {
switch (CQE_OPCODE(&cqe)) {
@@ -762,7 +750,8 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
/* Invalidate the MR if the fastreg failed */
if (CQE_STATUS(&cqe) != T4_ERR_SUCCESS)
- invalidate_mr(qhp->rhp, CQE_WRID_FR_STAG(&cqe));
+ c4iw_invalidate_mr(qhp->rhp,
+ CQE_WRID_FR_STAG(&cqe));
break;
default:
printk(KERN_ERR MOD "Unexpected opcode %d "
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index cc7cf18411b8..516b0ae6dc3f 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -1479,6 +1479,7 @@ static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
static struct cxgb4_uld_info c4iw_uld_info = {
.name = DRV_NAME,
.nrxq = MAX_ULD_QSETS,
+ .ntxq = MAX_ULD_QSETS,
.rxq_size = 511,
.ciq = true,
.lro = false,
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 7e7f79e55006..4788e1a46fde 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -999,6 +999,6 @@ extern int db_coalescing_threshold;
extern int use_dsgl;
void c4iw_drain_rq(struct ib_qp *qp);
void c4iw_drain_sq(struct ib_qp *qp);
-
+void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey);
#endif
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 80e27749420a..410408f886c1 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -770,3 +770,15 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr)
kfree(mhp);
return 0;
}
+
+void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey)
+{
+ struct c4iw_mr *mhp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rhp->lock, flags);
+ mhp = get_mhp(rhp, rkey >> 8);
+ if (mhp)
+ mhp->attr.state = 0;
+ spin_unlock_irqrestore(&rhp->lock, flags);
+}
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index f57deba6717c..b7ac97b27c88 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -706,12 +706,8 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
return 0;
}
-static int build_inv_stag(struct c4iw_dev *dev, union t4_wr *wqe,
- struct ib_send_wr *wr, u8 *len16)
+static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
{
- struct c4iw_mr *mhp = get_mhp(dev, wr->ex.invalidate_rkey >> 8);
-
- mhp->attr.state = 0;
wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey);
wqe->inv.r2 = 0;
*len16 = DIV_ROUND_UP(sizeof wqe->inv, 16);
@@ -797,11 +793,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
spin_lock_irqsave(&qhp->lock, flag);
if (t4_wq_in_error(&qhp->wq)) {
spin_unlock_irqrestore(&qhp->lock, flag);
+ *bad_wr = wr;
return -EINVAL;
}
num_wrs = t4_sq_avail(&qhp->wq);
if (num_wrs == 0) {
spin_unlock_irqrestore(&qhp->lock, flag);
+ *bad_wr = wr;
return -ENOMEM;
}
while (wr) {
@@ -840,10 +838,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case IB_WR_RDMA_READ_WITH_INV:
fw_opcode = FW_RI_RDMA_READ_WR;
swsqe->opcode = FW_RI_READ_REQ;
- if (wr->opcode == IB_WR_RDMA_READ_WITH_INV)
+ if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) {
+ c4iw_invalidate_mr(qhp->rhp,
+ wr->sg_list[0].lkey);
fw_flags = FW_RI_RDMA_READ_INVALIDATE;
- else
+ } else {
fw_flags = 0;
+ }
err = build_rdma_read(wqe, wr, &len16);
if (err)
break;
@@ -876,7 +877,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
fw_flags |= FW_RI_LOCAL_FENCE_FLAG;
fw_opcode = FW_RI_INV_LSTAG_WR;
swsqe->opcode = FW_RI_LOCAL_INV;
- err = build_inv_stag(qhp->rhp, wqe, wr, &len16);
+ err = build_inv_stag(wqe, wr, &len16);
+ c4iw_invalidate_mr(qhp->rhp, wr->ex.invalidate_rkey);
break;
default:
PDBG("%s post of type=%d TBD!\n", __func__,
@@ -934,11 +936,13 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
spin_lock_irqsave(&qhp->lock, flag);
if (t4_wq_in_error(&qhp->wq)) {
spin_unlock_irqrestore(&qhp->lock, flag);
+ *bad_wr = wr;
return -EINVAL;
}
num_wrs = t4_rq_avail(&qhp->wq);
if (num_wrs == 0) {
spin_unlock_irqrestore(&qhp->lock, flag);
+ *bad_wr = wr;
return -ENOMEM;
}
while (wr) {
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 4962b6ef1f34..7a3d906b3671 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -776,75 +776,3 @@ void hfi1_put_proc_affinity(int cpu)
}
mutex_unlock(&affinity->lock);
}
-
-int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
- size_t count)
-{
- struct hfi1_affinity_node *entry;
- cpumask_var_t mask;
- int ret, i;
-
- mutex_lock(&node_affinity.lock);
- entry = node_affinity_lookup(dd->node);
-
- if (!entry) {
- ret = -EINVAL;
- goto unlock;
- }
-
- ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
- if (!ret) {
- ret = -ENOMEM;
- goto unlock;
- }
-
- ret = cpulist_parse(buf, mask);
- if (ret)
- goto out;
-
- if (!cpumask_subset(mask, cpu_online_mask) || cpumask_empty(mask)) {
- dd_dev_warn(dd, "Invalid CPU mask\n");
- ret = -EINVAL;
- goto out;
- }
-
- /* reset the SDMA interrupt affinity details */
- init_cpu_mask_set(&entry->def_intr);
- cpumask_copy(&entry->def_intr.mask, mask);
-
- /* Reassign the affinity for each SDMA interrupt. */
- for (i = 0; i < dd->num_msix_entries; i++) {
- struct hfi1_msix_entry *msix;
-
- msix = &dd->msix_entries[i];
- if (msix->type != IRQ_SDMA)
- continue;
-
- ret = get_irq_affinity(dd, msix);
-
- if (ret)
- break;
- }
-out:
- free_cpumask_var(mask);
-unlock:
- mutex_unlock(&node_affinity.lock);
- return ret ? ret : strnlen(buf, PAGE_SIZE);
-}
-
-int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf)
-{
- struct hfi1_affinity_node *entry;
-
- mutex_lock(&node_affinity.lock);
- entry = node_affinity_lookup(dd->node);
-
- if (!entry) {
- mutex_unlock(&node_affinity.lock);
- return -EINVAL;
- }
-
- cpumap_print_to_pagebuf(true, buf, &entry->def_intr.mask);
- mutex_unlock(&node_affinity.lock);
- return strnlen(buf, PAGE_SIZE);
-}
diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index c9453b3d47b4..e78c7aa094e0 100644
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
@@ -94,10 +94,6 @@ int hfi1_get_proc_affinity(int);
/* Release a CPU used by a user process. */
void hfi1_put_proc_affinity(int);
-int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf);
-int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
- size_t count);
-
struct hfi1_affinity_node {
int node;
struct cpu_mask_set def_intr;
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 37d8af50cc13..ef72bc2a9e1d 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -6301,19 +6301,8 @@ void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf)
/* leave shared count at zero for both global and VL15 */
write_global_credit(dd, vau, vl15buf, 0);
- /* We may need some credits for another VL when sending packets
- * with the snoop interface. Dividing it down the middle for VL15
- * and VL0 should suffice.
- */
- if (unlikely(dd->hfi1_snoop.mode_flag == HFI1_PORT_SNOOP_MODE)) {
- write_csr(dd, SEND_CM_CREDIT_VL15, (u64)(vl15buf >> 1)
- << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
- write_csr(dd, SEND_CM_CREDIT_VL, (u64)(vl15buf >> 1)
- << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT);
- } else {
- write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf
- << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
- }
+ write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf
+ << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
}
/*
@@ -9918,9 +9907,6 @@ static void set_lidlmc(struct hfi1_pportdata *ppd)
u32 mask = ~((1U << ppd->lmc) - 1);
u64 c1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG1);
- if (dd->hfi1_snoop.mode_flag)
- dd_dev_info(dd, "Set lid/lmc while snooping");
-
c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK
| DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK);
c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK)
@@ -12115,7 +12101,7 @@ static void update_synth_timer(unsigned long opaque)
mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
}
-#define C_MAX_NAME 13 /* 12 chars + one for /0 */
+#define C_MAX_NAME 16 /* 15 chars + one for /0 */
static int init_cntrs(struct hfi1_devdata *dd)
{
int i, rcv_ctxts, j;
@@ -14466,7 +14452,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
* Any error printing is already done by the init code.
* On return, we have the chip mapped.
*/
- ret = hfi1_pcie_ddinit(dd, pdev, ent);
+ ret = hfi1_pcie_ddinit(dd, pdev);
if (ret < 0)
goto bail_free;
@@ -14694,6 +14680,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret)
goto bail_free_cntrs;
+ init_completion(&dd->user_comp);
+
+ /* The user refcount starts with one to inidicate an active device */
+ atomic_set(&dd->user_refcount, 1);
+
goto bail;
bail_free_rcverr:
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 92345259a8f4..043fd21dc5f3 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -320,6 +320,9 @@
/* DC_DC8051_CFG_MODE.GENERAL bits */
#define DISABLE_SELF_GUID_CHECK 0x2
+/* Bad L2 frame error code */
+#define BAD_L2_ERR 0x6
+
/*
* Eager buffer minimum and maximum sizes supported by the hardware.
* All power-of-two sizes in between are supported as well.
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index d4261163bd25..4fbaee68012b 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -599,7 +599,6 @@ static void __prescan_rxq(struct hfi1_packet *packet)
dd->rhf_offset;
struct rvt_qp *qp;
struct ib_header *hdr;
- struct ib_other_headers *ohdr;
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
u64 rhf = rhf_to_cpu(rhf_addr);
u32 etype = rhf_rcv_type(rhf), qpn, bth1;
@@ -615,18 +614,21 @@ static void __prescan_rxq(struct hfi1_packet *packet)
if (etype != RHF_RCV_TYPE_IB)
goto next;
- hdr = hfi1_get_msgheader(dd, rhf_addr);
+ packet->hdr = hfi1_get_msgheader(dd, rhf_addr);
+ hdr = packet->hdr;
lnh = be16_to_cpu(hdr->lrh[0]) & 3;
- if (lnh == HFI1_LRH_BTH)
- ohdr = &hdr->u.oth;
- else if (lnh == HFI1_LRH_GRH)
- ohdr = &hdr->u.l.oth;
- else
+ if (lnh == HFI1_LRH_BTH) {
+ packet->ohdr = &hdr->u.oth;
+ } else if (lnh == HFI1_LRH_GRH) {
+ packet->ohdr = &hdr->u.l.oth;
+ packet->rcv_flags |= HFI1_HAS_GRH;
+ } else {
goto next; /* just in case */
+ }
- bth1 = be32_to_cpu(ohdr->bth[1]);
+ bth1 = be32_to_cpu(packet->ohdr->bth[1]);
is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK));
if (!is_ecn)
@@ -646,7 +648,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
/* turn off BECN, FECN */
bth1 &= ~(HFI1_FECN_SMASK | HFI1_BECN_SMASK);
- ohdr->bth[1] = cpu_to_be32(bth1);
+ packet->ohdr->bth[1] = cpu_to_be32(bth1);
next:
update_ps_mdata(&mdata, rcd);
}
@@ -1359,12 +1361,25 @@ int process_receive_ib(struct hfi1_packet *packet)
int process_receive_bypass(struct hfi1_packet *packet)
{
+ struct hfi1_devdata *dd = packet->rcd->dd;
+
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
- dd_dev_err(packet->rcd->dd,
+ dd_dev_err(dd,
"Bypass packets are not supported in normal operation. Dropping\n");
- incr_cntr64(&packet->rcd->dd->sw_rcv_bypass_packet_errors);
+ incr_cntr64(&dd->sw_rcv_bypass_packet_errors);
+ if (!(dd->err_info_rcvport.status_and_code & OPA_EI_STATUS_SMASK)) {
+ u64 *flits = packet->ebuf;
+
+ if (flits && !(packet->rhf & RHF_LEN_ERR)) {
+ dd->err_info_rcvport.packet_flit1 = flits[0];
+ dd->err_info_rcvport.packet_flit2 =
+ packet->tlen > sizeof(flits[0]) ? flits[1] : 0;
+ }
+ dd->err_info_rcvport.status_and_code |=
+ (OPA_EI_STATUS_SMASK | BAD_L2_ERR);
+ }
return RHF_RCV_CONTINUE;
}
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 677efa0e8cd6..bd786b7bd30b 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -172,6 +172,9 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
struct hfi1_devdata,
user_cdev);
+ if (!atomic_inc_not_zero(&dd->user_refcount))
+ return -ENXIO;
+
/* Just take a ref now. Not all opens result in a context assign */
kobject_get(&dd->kobj);
@@ -183,11 +186,17 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
fd->rec_cpu_num = -1; /* no cpu affinity by default */
fd->mm = current->mm;
atomic_inc(&fd->mm->mm_count);
- }
+ fp->private_data = fd;
+ } else {
+ fp->private_data = NULL;
+
+ if (atomic_dec_and_test(&dd->user_refcount))
+ complete(&dd->user_comp);
- fp->private_data = fd;
+ return -ENOMEM;
+ }
- return fd ? 0 : -ENOMEM;
+ return 0;
}
static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
@@ -798,6 +807,10 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
done:
mmdrop(fdata->mm);
kobject_put(&dd->kobj);
+
+ if (atomic_dec_and_test(&dd->user_refcount))
+ complete(&dd->user_comp);
+
kfree(fdata);
return 0;
}
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 4163596ce4c9..751a0fb29fa5 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -367,26 +367,6 @@ struct hfi1_packet {
u8 etype;
};
-/*
- * Private data for snoop/capture support.
- */
-struct hfi1_snoop_data {
- int mode_flag;
- struct cdev cdev;
- struct device *class_dev;
- /* protect snoop data */
- spinlock_t snoop_lock;
- struct list_head queue;
- wait_queue_head_t waitq;
- void *filter_value;
- int (*filter_callback)(void *hdr, void *data, void *value);
- u64 dcc_cfg; /* saved value of DCC Cfg register */
-};
-
-/* snoop mode_flag values */
-#define HFI1_PORT_SNOOP_MODE 1U
-#define HFI1_PORT_CAPTURE_MODE 2U
-
struct rvt_sge_state;
/*
@@ -625,8 +605,6 @@ struct hfi1_pportdata {
struct mutex hls_lock;
u32 host_link_state;
- spinlock_t sdma_alllock ____cacheline_aligned_in_smp;
-
u32 lstate; /* logical link state */
/* these are the "32 bit" regs */
@@ -1094,8 +1072,6 @@ struct hfi1_devdata {
char *portcntrnames;
size_t portcntrnameslen;
- struct hfi1_snoop_data hfi1_snoop;
-
struct err_info_rcvport err_info_rcvport;
struct err_info_constraint err_info_rcv_constraint;
struct err_info_constraint err_info_xmit_constraint;
@@ -1133,8 +1109,8 @@ struct hfi1_devdata {
u64 lcb_err_en;
/*
- * Handlers for outgoing data so that snoop/capture does not
- * have to have its hooks in the send path
+ * Capability to have different send engines simply by changing a
+ * pointer value.
*/
send_routine process_pio_send ____cacheline_aligned_in_smp;
send_routine process_dma_send;
@@ -1184,6 +1160,10 @@ struct hfi1_devdata {
spinlock_t aspm_lock;
/* Number of verbs contexts which have disabled ASPM */
atomic_t aspm_disabled_cnt;
+ /* Keeps track of user space clients */
+ atomic_t user_refcount;
+ /* Used to wait for outstanding user space clients before dev removal */
+ struct completion user_comp;
bool eprom_available; /* true if EPROM is available for this device */
bool aspm_supported; /* Does HW support ASPM */
@@ -1234,8 +1214,6 @@ struct hfi1_devdata *hfi1_lookup(int unit);
extern u32 hfi1_cpulist_count;
extern unsigned long *hfi1_cpulist;
-extern unsigned int snoop_drop_send;
-extern unsigned int snoop_force_capture;
int hfi1_init(struct hfi1_devdata *, int);
int hfi1_count_units(int *npresentp, int *nupp);
int hfi1_count_active_units(void);
@@ -1570,13 +1548,6 @@ void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf);
void reset_link_credits(struct hfi1_devdata *dd);
void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu);
-int snoop_recv_handler(struct hfi1_packet *packet);
-int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
- u64 pbc);
-int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
- u64 pbc);
-void snoop_inline_pio_send(struct hfi1_devdata *dd, struct pio_buf *pbuf,
- u64 pbc, const void *from, size_t count);
int set_buffer_control(struct hfi1_pportdata *ppd, struct buffer_control *bc);
static inline struct hfi1_devdata *dd_from_ppd(struct hfi1_pportdata *ppd)
@@ -1787,8 +1758,7 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len);
int hfi1_pcie_init(struct pci_dev *, const struct pci_device_id *);
void hfi1_pcie_cleanup(struct pci_dev *);
-int hfi1_pcie_ddinit(struct hfi1_devdata *, struct pci_dev *,
- const struct pci_device_id *);
+int hfi1_pcie_ddinit(struct hfi1_devdata *, struct pci_dev *);
void hfi1_pcie_ddcleanup(struct hfi1_devdata *);
void hfi1_pcie_flr(struct hfi1_devdata *);
int pcie_speeds(struct hfi1_devdata *);
@@ -1823,8 +1793,6 @@ int kdeth_process_expected(struct hfi1_packet *packet);
int kdeth_process_eager(struct hfi1_packet *packet);
int process_receive_invalid(struct hfi1_packet *packet);
-extern rhf_rcv_function_ptr snoop_rhf_rcv_functions[8];
-
void update_sge(struct rvt_sge_state *ss, u32 length);
/* global module parameter variables */
@@ -1851,9 +1819,6 @@ extern struct mutex hfi1_mutex;
#define DRIVER_NAME "hfi1"
#define HFI1_USER_MINOR_BASE 0
#define HFI1_TRACE_MINOR 127
-#define HFI1_DIAGPKT_MINOR 128
-#define HFI1_DIAG_MINOR_BASE 129
-#define HFI1_SNOOP_CAPTURE_BASE 200
#define HFI1_NMINORS 255
#define PCI_VENDOR_ID_INTEL 0x8086
@@ -1872,7 +1837,13 @@ extern struct mutex hfi1_mutex;
static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
u16 ctxt_type)
{
- u64 base_sc_integrity =
+ u64 base_sc_integrity;
+
+ /* No integrity checks if HFI1_CAP_NO_INTEGRITY is set */
+ if (HFI1_CAP_IS_KSET(NO_INTEGRITY))
+ return 0;
+
+ base_sc_integrity =
SEND_CTXT_CHECK_ENABLE_DISALLOW_BYPASS_BAD_PKT_LEN_SMASK
| SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK
| SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK
@@ -1887,7 +1858,6 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
| SEND_CTXT_CHECK_ENABLE_CHECK_VL_MAPPING_SMASK
| SEND_CTXT_CHECK_ENABLE_CHECK_OPCODE_SMASK
| SEND_CTXT_CHECK_ENABLE_CHECK_SLID_SMASK
- | SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK
| SEND_CTXT_CHECK_ENABLE_CHECK_VL_SMASK
| SEND_CTXT_CHECK_ENABLE_CHECK_ENABLE_SMASK;
@@ -1896,18 +1866,23 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
else
base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY;
- if (is_ax(dd))
- /* turn off send-side job key checks - A0 */
- return base_sc_integrity &
- ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
+ /* turn on send-side job key checks if !A0 */
+ if (!is_ax(dd))
+ base_sc_integrity |= SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
+
return base_sc_integrity;
}
static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
{
- u64 base_sdma_integrity =
+ u64 base_sdma_integrity;
+
+ /* No integrity checks if HFI1_CAP_NO_INTEGRITY is set */
+ if (HFI1_CAP_IS_KSET(NO_INTEGRITY))
+ return 0;
+
+ base_sdma_integrity =
SEND_DMA_CHECK_ENABLE_DISALLOW_BYPASS_BAD_PKT_LEN_SMASK
- | SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK
| SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK
| SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_LONG_IB_PACKETS_SMASK
| SEND_DMA_CHECK_ENABLE_DISALLOW_BAD_PKT_LEN_SMASK
@@ -1919,14 +1894,18 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
| SEND_DMA_CHECK_ENABLE_CHECK_VL_MAPPING_SMASK
| SEND_DMA_CHECK_ENABLE_CHECK_OPCODE_SMASK
| SEND_DMA_CHECK_ENABLE_CHECK_SLID_SMASK
- | SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK
| SEND_DMA_CHECK_ENABLE_CHECK_VL_SMASK
| SEND_DMA_CHECK_ENABLE_CHECK_ENABLE_SMASK;
- if (is_ax(dd))
- /* turn off send-side job key checks - A0 */
- return base_sdma_integrity &
- ~SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
+ if (!HFI1_CAP_IS_KSET(STATIC_RATE_CTRL))
+ base_sdma_integrity |=
+ SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK;
+
+ /* turn on send-side job key checks if !A0 */
+ if (!is_ax(dd))
+ base_sdma_integrity |=
+ SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
+
return base_sdma_integrity;
}
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 60db61536fed..e3b5bc93bc70 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -144,6 +144,8 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
struct hfi1_ctxtdata *rcd;
ppd = dd->pport + (i % dd->num_pports);
+
+ /* dd->rcd[i] gets assigned inside the callee */
rcd = hfi1_create_ctxtdata(ppd, i, dd->node);
if (!rcd) {
dd_dev_err(dd,
@@ -169,8 +171,6 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
if (!rcd->sc) {
dd_dev_err(dd,
"Unable to allocate kernel send context, failing\n");
- dd->rcd[rcd->ctxt] = NULL;
- hfi1_free_ctxtdata(dd, rcd);
goto nomem;
}
@@ -178,9 +178,6 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
if (ret < 0) {
dd_dev_err(dd,
"Failed to setup kernel receive context, failing\n");
- sc_free(rcd->sc);
- dd->rcd[rcd->ctxt] = NULL;
- hfi1_free_ctxtdata(dd, rcd);
ret = -EFAULT;
goto bail;
}
@@ -196,6 +193,10 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
nomem:
ret = -ENOMEM;
bail:
+ if (dd->rcd) {
+ for (i = 0; i < dd->num_rcv_contexts; ++i)
+ hfi1_free_ctxtdata(dd, dd->rcd[i]);
+ }
kfree(dd->rcd);
dd->rcd = NULL;
return ret;
@@ -216,7 +217,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
dd->num_rcv_contexts - dd->first_user_ctxt)
kctxt_ngroups = (dd->rcv_entries.nctxt_extra -
(dd->num_rcv_contexts - dd->first_user_ctxt));
- rcd = kzalloc(sizeof(*rcd), GFP_KERNEL);
+ rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, numa);
if (rcd) {
u32 rcvtids, max_entries;
@@ -261,13 +262,6 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
}
rcd->eager_base = base * dd->rcv_entries.group_size;
- /* Validate and initialize Rcv Hdr Q variables */
- if (rcvhdrcnt % HDRQ_INCREMENT) {
- dd_dev_err(dd,
- "ctxt%u: header queue count %d must be divisible by %lu\n",
- rcd->ctxt, rcvhdrcnt, HDRQ_INCREMENT);
- goto bail;
- }
rcd->rcvhdrq_cnt = rcvhdrcnt;
rcd->rcvhdrqentsize = hfi1_hdrq_entsize;
/*
@@ -506,7 +500,6 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
mutex_init(&ppd->hls_lock);
- spin_lock_init(&ppd->sdma_alllock);
spin_lock_init(&ppd->qsfp_info.qsfp_lock);
ppd->qsfp_info.ppd = ppd;
@@ -1399,28 +1392,43 @@ static void postinit_cleanup(struct hfi1_devdata *dd)
hfi1_free_devdata(dd);
}
+static int init_validate_rcvhdrcnt(struct device *dev, uint thecnt)
+{
+ if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
+ hfi1_early_err(dev, "Receive header queue count too small\n");
+ return -EINVAL;
+ }
+
+ if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
+ hfi1_early_err(dev,
+ "Receive header queue count cannot be greater than %u\n",
+ HFI1_MAX_HDRQ_EGRBUF_CNT);
+ return -EINVAL;
+ }
+
+ if (thecnt % HDRQ_INCREMENT) {
+ hfi1_early_err(dev, "Receive header queue count %d must be divisible by %lu\n",
+ thecnt, HDRQ_INCREMENT);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
int ret = 0, j, pidx, initfail;
- struct hfi1_devdata *dd = ERR_PTR(-EINVAL);
+ struct hfi1_devdata *dd;
struct hfi1_pportdata *ppd;
/* First, lock the non-writable module parameters */
HFI1_CAP_LOCK();
/* Validate some global module parameters */
- if (rcvhdrcnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
- hfi1_early_err(&pdev->dev, "Header queue count too small\n");
- ret = -EINVAL;
- goto bail;
- }
- if (rcvhdrcnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
- hfi1_early_err(&pdev->dev,
- "Receive header queue count cannot be greater than %u\n",
- HFI1_MAX_HDRQ_EGRBUF_CNT);
- ret = -EINVAL;
+ ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt);
+ if (ret)
goto bail;
- }
+
/* use the encoding function as a sanitization check */
if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) {
hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n",
@@ -1461,26 +1469,25 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (ret)
goto bail;
- /*
- * Do device-specific initialization, function table setup, dd
- * allocation, etc.
- */
- switch (ent->device) {
- case PCI_DEVICE_ID_INTEL0:
- case PCI_DEVICE_ID_INTEL1:
- dd = hfi1_init_dd(pdev, ent);
- break;
- default:
+ if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
+ ent->device == PCI_DEVICE_ID_INTEL1)) {
hfi1_early_err(&pdev->dev,
"Failing on unknown Intel deviceid 0x%x\n",
ent->device);
ret = -ENODEV;
+ goto clean_bail;
}
- if (IS_ERR(dd))
+ /*
+ * Do device-specific initialization, function table setup, dd
+ * allocation, etc.
+ */
+ dd = hfi1_init_dd(pdev, ent);
+
+ if (IS_ERR(dd)) {
ret = PTR_ERR(dd);
- if (ret)
goto clean_bail; /* error already printed */
+ }
ret = create_workqueues(dd);
if (ret)
@@ -1538,12 +1545,31 @@ bail:
return ret;
}
+static void wait_for_clients(struct hfi1_devdata *dd)
+{
+ /*
+ * Remove the device init value and complete the device if there is
+ * no clients or wait for active clients to finish.
+ */
+ if (atomic_dec_and_test(&dd->user_refcount))
+ complete(&dd->user_comp);
+
+ wait_for_completion(&dd->user_comp);
+}
+
static void remove_one(struct pci_dev *pdev)
{
struct hfi1_devdata *dd = pci_get_drvdata(pdev);
/* close debugfs files before ib unregister */
hfi1_dbg_ibdev_exit(&dd->verbs_dev);
+
+ /* remove the /dev hfi1 interface */
+ hfi1_device_remove(dd);
+
+ /* wait for existing user space clients to finish */
+ wait_for_clients(dd);
+
/* unregister from IB core */
hfi1_unregister_ib_device(dd);
@@ -1558,8 +1584,6 @@ static void remove_one(struct pci_dev *pdev)
/* wait until all of our (qsfp) queue_work() calls complete */
flush_workqueue(ib_wq);
- hfi1_device_remove(dd);
-
postinit_cleanup(dd);
}
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 89c68da1c273..4ac8f330c5cb 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -157,8 +157,7 @@ void hfi1_pcie_cleanup(struct pci_dev *pdev)
* fields required to re-initialize after a chip reset, or for
* various other purposes
*/
-int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev,
- const struct pci_device_id *ent)
+int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
{
unsigned long len;
resource_size_t addr;
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 64c9eeb52d86..615be68e40b3 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -668,19 +668,12 @@ void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold)
void set_pio_integrity(struct send_context *sc)
{
struct hfi1_devdata *dd = sc->dd;
- u64 reg = 0;
u32 hw_context = sc->hw_context;
int type = sc->type;
- /*
- * No integrity checks if HFI1_CAP_NO_INTEGRITY is set, or if
- * we're snooping.
- */
- if (likely(!HFI1_CAP_IS_KSET(NO_INTEGRITY)) &&
- dd->hfi1_snoop.mode_flag != HFI1_PORT_SNOOP_MODE)
- reg = hfi1_pkt_default_send_ctxt_mask(dd, type);
-
- write_kctxt_csr(dd, hw_context, SC(CHECK_ENABLE), reg);
+ write_kctxt_csr(dd, hw_context,
+ SC(CHECK_ENABLE),
+ hfi1_pkt_default_send_ctxt_mask(dd, type));
}
static u32 get_buffers_allocated(struct send_context *sc)
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 9db260fe782a..809b26eb6d3c 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -89,7 +89,7 @@ void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to)
lockdep_assert_held(&qp->s_lock);
qp->s_flags |= RVT_S_WAIT_RNR;
- qp->s_timer.expires = jiffies + usecs_to_jiffies(to);
+ priv->s_rnr_timer.expires = jiffies + usecs_to_jiffies(to);
add_timer(&priv->s_rnr_timer);
}
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 7102a076146d..1d81cac1fa6c 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -2009,11 +2009,6 @@ static void sdma_hw_start_up(struct sdma_engine *sde)
write_sde_csr(sde, SD(ENG_ERR_CLEAR), reg);
}
-#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
-(r &= ~SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
-
-#define SET_STATIC_RATE_CONTROL_SMASK(r) \
-(r |= SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
/*
* set_sdma_integrity
*
@@ -2022,19 +2017,9 @@ static void sdma_hw_start_up(struct sdma_engine *sde)
static void set_sdma_integrity(struct sdma_engine *sde)
{
struct hfi1_devdata *dd = sde->dd;
- u64 reg;
-
- if (unlikely(HFI1_CAP_IS_KSET(NO_INTEGRITY)))
- return;
-
- reg = hfi1_pkt_base_sdma_integrity(dd);
-
- if (HFI1_CAP_IS_KSET(STATIC_RATE_CTRL))
- CLEAR_STATIC_RATE_CONTROL_SMASK(reg);
- else
- SET_STATIC_RATE_CONTROL_SMASK(reg);
- write_sde_csr(sde, SD(CHECK_ENABLE), reg);
+ write_sde_csr(sde, SD(CHECK_ENABLE),
+ hfi1_pkt_base_sdma_integrity(dd));
}
static void init_sdma_regs(
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index edba22461a9c..919a5474e651 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -49,7 +49,6 @@
#include "hfi.h"
#include "mad.h"
#include "trace.h"
-#include "affinity.h"
/*
* Start of per-port congestion control structures and support code
@@ -623,27 +622,6 @@ static ssize_t show_tempsense(struct device *device,
return ret;
}
-static ssize_t show_sdma_affinity(struct device *device,
- struct device_attribute *attr, char *buf)
-{
- struct hfi1_ibdev *dev =
- container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
- struct hfi1_devdata *dd = dd_from_dev(dev);
-
- return hfi1_get_sdma_affinity(dd, buf);
-}
-
-static ssize_t store_sdma_affinity(struct device *device,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct hfi1_ibdev *dev =
- container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
- struct hfi1_devdata *dd = dd_from_dev(dev);
-
- return hfi1_set_sdma_affinity(dd, buf, count);
-}
-
/*
* end of per-unit (or driver, in some cases, but replicated
* per unit) functions
@@ -658,8 +636,6 @@ static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL);
static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset);
-static DEVICE_ATTR(sdma_affinity, S_IWUSR | S_IRUGO, show_sdma_affinity,
- store_sdma_affinity);
static struct device_attribute *hfi1_attributes[] = {
&dev_attr_hw_rev,
@@ -670,7 +646,6 @@ static struct device_attribute *hfi1_attributes[] = {
&dev_attr_boardversion,
&dev_attr_tempsense,
&dev_attr_chip_reset,
- &dev_attr_sdma_affinity,
};
int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h
index 11e02b228922..f77e59fb43fe 100644
--- a/drivers/infiniband/hw/hfi1/trace_rx.h
+++ b/drivers/infiniband/hw/hfi1/trace_rx.h
@@ -253,66 +253,6 @@ TRACE_EVENT(hfi1_mmu_invalidate,
)
);
-#define SNOOP_PRN \
- "slid %.4x dlid %.4x qpn 0x%.6x opcode 0x%.2x,%s " \
- "svc lvl %d pkey 0x%.4x [header = %d bytes] [data = %d bytes]"
-
-TRACE_EVENT(snoop_capture,
- TP_PROTO(struct hfi1_devdata *dd,
- int hdr_len,
- struct ib_header *hdr,
- int data_len,
- void *data),
- TP_ARGS(dd, hdr_len, hdr, data_len, data),
- TP_STRUCT__entry(
- DD_DEV_ENTRY(dd)
- __field(u16, slid)
- __field(u16, dlid)
- __field(u32, qpn)
- __field(u8, opcode)
- __field(u8, sl)
- __field(u16, pkey)
- __field(u32, hdr_len)
- __field(u32, data_len)
- __field(u8, lnh)
- __dynamic_array(u8, raw_hdr, hdr_len)
- __dynamic_array(u8, raw_pkt, data_len)
- ),
- TP_fast_assign(
- struct ib_other_headers *ohdr;
-
- __entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
- if (__entry->lnh == HFI1_LRH_BTH)
- ohdr = &hdr->u.oth;
- else
- ohdr = &hdr->u.l.oth;
- DD_DEV_ASSIGN(dd);
- __entry->slid = be16_to_cpu(hdr->lrh[3]);
- __entry->dlid = be16_to_cpu(hdr->lrh[1]);
- __entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
- __entry->opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
- __entry->sl = (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
- __entry->pkey = be32_to_cpu(ohdr->bth[0]) & 0xffff;
- __entry->hdr_len = hdr_len;
- __entry->data_len = data_len;
- memcpy(__get_dynamic_array(raw_hdr), hdr, hdr_len);
- memcpy(__get_dynamic_array(raw_pkt), data, data_len);
- ),
- TP_printk(
- "[%s] " SNOOP_PRN,
- __get_str(dev),
- __entry->slid,
- __entry->dlid,
- __entry->qpn,
- __entry->opcode,
- show_ib_opcode(__entry->opcode),
- __entry->sl,
- __entry->pkey,
- __entry->hdr_len,
- __entry->data_len
- )
-);
-
#endif /* __HFI1_TRACE_RX_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 663980ef01a8..7d22f8ee98ef 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -1166,7 +1166,7 @@ static int pin_vector_pages(struct user_sdma_request *req,
rb_node = hfi1_mmu_rb_extract(pq->handler,
(unsigned long)iovec->iov.iov_base,
iovec->iov.iov_len);
- if (rb_node && !IS_ERR(rb_node))
+ if (rb_node)
node = container_of(rb_node, struct sdma_mmu_node, rb);
else
rb_node = NULL;
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 20c6d17ac8b8..077c33d2dc75 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -102,7 +102,10 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
if (vlan_tag < 0x1000)
vlan_tag |= (ah_attr->sl & 7) << 13;
ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
- ah->av.eth.gid_index = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index);
+ ret = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ ah->av.eth.gid_index = ret;
ah->av.eth.vlan = cpu_to_be16(vlan_tag);
ah->av.eth.hop_limit = ah_attr->grh.hop_limit;
if (ah_attr->static_rate) {
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 1ea686b9e0f9..6a0fec357dae 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -253,11 +253,14 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
if (context)
if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) {
err = -EFAULT;
- goto err_dbmap;
+ goto err_cq_free;
}
return &cq->ibcq;
+err_cq_free:
+ mlx4_cq_free(dev->dev, &cq->mcq);
+
err_dbmap:
if (context)
mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db);
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index d72a4367c891..b3ef47c3ab73 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -960,8 +960,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
if (err)
goto err_create;
} else {
- /* for now choose 64 bytes till we have a proper interface */
- cqe_size = 64;
+ cqe_size = cache_line_size() == 128 ? 128 : 64;
err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb,
&index, &inlen);
if (err)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index b81736d625fc..d566f6738833 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1056,7 +1056,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
- resp.cache_line_size = L1_CACHE_BYTES;
+ resp.cache_line_size = cache_line_size();
resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
@@ -1931,7 +1931,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
ft = mlx5_create_auto_grouped_flow_table(ns, priority,
num_entries,
num_groups,
- 0);
+ 0, 0);
if (!IS_ERR(ft)) {
prio->refcount = 0;
@@ -1951,10 +1951,10 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
{
struct mlx5_flow_table *ft = ft_prio->flow_table;
struct mlx5_ib_flow_handler *handler;
+ struct mlx5_flow_act flow_act = {0};
struct mlx5_flow_spec *spec;
const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
unsigned int spec_index;
- u32 action;
int err = 0;
if (!is_valid_attr(flow_attr))
@@ -1979,12 +1979,12 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
}
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
- action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
+ flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+ flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
handler->rule = mlx5_add_flow_rules(ft, spec,
- action,
- MLX5_FS_DEFAULT_FLOW_TAG,
- dst, 1);
+ &flow_act,
+ dst, 1);
if (IS_ERR(handler->rule)) {
err = PTR_ERR(handler->rule);
@@ -2385,14 +2385,14 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
{
struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
struct ib_event ibev;
-
+ bool fatal = false;
u8 port = 0;
switch (event) {
case MLX5_DEV_EVENT_SYS_ERROR:
- ibdev->ib_active = false;
ibev.event = IB_EVENT_DEVICE_FATAL;
mlx5_ib_handle_internal_error(ibdev);
+ fatal = true;
break;
case MLX5_DEV_EVENT_PORT_UP:
@@ -2446,6 +2446,9 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
if (ibdev->ib_active)
ib_dispatch_event(&ibev);
+
+ if (fatal)
+ ibdev->ib_active = false;
}
static void get_ext_port_caps(struct mlx5_ib_dev *dev)
@@ -3209,7 +3212,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
}
err = init_node_data(dev);
if (err)
- goto err_dealloc;
+ goto err_free_port;
mutex_init(&dev->flow_db.lock);
mutex_init(&dev->cap_mask_mutex);
@@ -3219,7 +3222,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
if (ll == IB_LINK_LAYER_ETHERNET) {
err = mlx5_enable_eth(dev);
if (err)
- goto err_dealloc;
+ goto err_free_port;
}
err = create_dev_resources(&dev->devr);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index ab8961cc8bca..6c6057eb60ea 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -629,6 +629,8 @@ struct mlx5_ib_dev {
struct mlx5_ib_resources devr;
struct mlx5_mr_cache cache;
struct timer_list delay_timer;
+ /* Prevents soft lock on massive reg MRs */
+ struct mutex slow_path_mutex;
int fill_delay;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
struct ib_odp_caps odp_caps;
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 67985c69f9b9..8f608debe141 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -610,6 +610,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
int err;
int i;
+ mutex_init(&dev->slow_path_mutex);
cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
if (!cache->wq) {
mlx5_ib_warn(dev, "failed to create work queue\n");
@@ -1216,9 +1217,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto error;
}
- if (!mr)
+ if (!mr) {
+ mutex_lock(&dev->slow_path_mutex);
mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
page_shift, access_flags);
+ mutex_unlock(&dev->slow_path_mutex);
+ }
if (IS_ERR(mr)) {
err = PTR_ERR(mr);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index cc24f2d429b9..a1b3125f0a6e 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -52,7 +52,6 @@ enum {
enum {
MLX5_IB_SQ_STRIDE = 6,
- MLX5_IB_CACHE_LINE_SIZE = 64,
};
static const u32 mlx5_ib_opcode[] = {
@@ -2082,8 +2081,8 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn,
- to_mcq(init_attr->recv_cq)->mcq.cqn,
- to_mcq(init_attr->send_cq)->mcq.cqn);
+ init_attr->recv_cq ? to_mcq(init_attr->recv_cq)->mcq.cqn : -1,
+ init_attr->send_cq ? to_mcq(init_attr->send_cq)->mcq.cqn : -1);
qp->trans_qp.xrcdn = xrcdn;
@@ -4896,6 +4895,14 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
udata->inlen))
return ERR_PTR(-EOPNOTSUPP);
+ if (init_attr->log_ind_tbl_size >
+ MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)) {
+ mlx5_ib_dbg(dev, "log_ind_tbl_size = %d is bigger than supported = %d\n",
+ init_attr->log_ind_tbl_size,
+ MLX5_CAP_GEN(dev->mdev, log_max_rqt_size));
+ return ERR_PTR(-EINVAL);
+ }
+
min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
if (udata->outlen && udata->outlen < min_resp_len)
return ERR_PTR(-EINVAL);
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index 9badd0224ec5..5b9601014f0c 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -65,7 +65,6 @@ MODULE_DESCRIPTION("NetEffect RNIC Low-level iWARP Driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION);
-int max_mtu = 9000;
int interrupt_mod_interval = 0;
/* Interoperability */
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index e7430c9254d3..85acd0843b50 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -83,6 +83,8 @@
#define NES_FIRST_QPN 64
#define NES_SW_CONTEXT_ALIGN 1024
+#define NES_MAX_MTU 9000
+
#define NES_NIC_MAX_NICS 16
#define NES_MAX_ARP_TABLE_SIZE 4096
@@ -169,8 +171,6 @@ do { \
#include "nes_cm.h"
#include "nes_mgt.h"
-extern int max_mtu;
-#define max_frame_len (max_mtu+ETH_HLEN)
extern int interrupt_mod_interval;
extern int nes_if_count;
extern int mpa_version;
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 4dcfe669ebad..5921ea3d50ae 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -985,20 +985,16 @@ static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu)
{
struct nes_vnic *nesvnic = netdev_priv(netdev);
struct nes_device *nesdev = nesvnic->nesdev;
- int ret = 0;
u8 jumbomode = 0;
u32 nic_active;
u32 nic_active_bit;
u32 uc_all_active;
u32 mc_all_active;
- if ((new_mtu < ETH_ZLEN) || (new_mtu > max_mtu))
- return -EINVAL;
-
netdev->mtu = new_mtu;
nesvnic->max_frame_size = new_mtu + VLAN_ETH_HLEN;
- if (netdev->mtu > 1500) {
+ if (netdev->mtu > ETH_DATA_LEN) {
jumbomode=1;
}
nes_nic_init_timer_defaults(nesdev, jumbomode);
@@ -1024,7 +1020,7 @@ static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu)
nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
}
- return ret;
+ return 0;
}
@@ -1670,7 +1666,7 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
netdev->watchdog_timeo = NES_TX_TIMEOUT;
netdev->irq = nesdev->pcidev->irq;
- netdev->mtu = ETH_DATA_LEN;
+ netdev->max_mtu = NES_MAX_MTU;
netdev->hard_header_len = ETH_HLEN;
netdev->addr_len = ETH_ALEN;
netdev->type = ARPHRD_ETHER;
diff --git a/drivers/infiniband/hw/qedr/Kconfig b/drivers/infiniband/hw/qedr/Kconfig
index 7c06d85568d4..6c9f3923e838 100644
--- a/drivers/infiniband/hw/qedr/Kconfig
+++ b/drivers/infiniband/hw/qedr/Kconfig
@@ -2,6 +2,7 @@ config INFINIBAND_QEDR
tristate "QLogic RoCE driver"
depends on 64BIT && QEDE
select QED_LL2
+ select QED_RDMA
---help---
This driver provides low-level InfiniBand over Ethernet
support for QLogic QED host channel adapters (HCAs).
diff --git a/drivers/infiniband/sw/rdmavt/dma.c b/drivers/infiniband/sw/rdmavt/dma.c
index 01f71caa3ac4..f2cefb0d9180 100644
--- a/drivers/infiniband/sw/rdmavt/dma.c
+++ b/drivers/infiniband/sw/rdmavt/dma.c
@@ -90,9 +90,6 @@ static u64 rvt_dma_map_page(struct ib_device *dev, struct page *page,
if (WARN_ON(!valid_dma_direction(direction)))
return BAD_DMA_ADDRESS;
- if (offset + size > PAGE_SIZE)
- return BAD_DMA_ADDRESS;
-
addr = (u64)page_address(page);
if (addr)
addr += offset;
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index a576603304f7..16967cdb45df 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -243,10 +243,8 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port,
{
int err;
struct socket *sock;
- struct udp_port_cfg udp_cfg;
- struct udp_tunnel_sock_cfg tnl_cfg;
-
- memset(&udp_cfg, 0, sizeof(udp_cfg));
+ struct udp_port_cfg udp_cfg = {0};
+ struct udp_tunnel_sock_cfg tnl_cfg = {0};
if (ipv6) {
udp_cfg.family = AF_INET6;
@@ -264,10 +262,8 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port,
return ERR_PTR(err);
}
- tnl_cfg.sk_user_data = NULL;
tnl_cfg.encap_type = 1;
tnl_cfg.encap_rcv = rxe_udp_encap_recv;
- tnl_cfg.encap_destroy = NULL;
/* Setup UDP tunnel */
setup_udp_tunnel_sock(net, sock, &tnl_cfg);
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index b8036cfbce04..c3e60e4bde6e 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -522,6 +522,7 @@ static void rxe_qp_reset(struct rxe_qp *qp)
if (qp->sq.queue) {
__rxe_do_task(&qp->comp.task);
__rxe_do_task(&qp->req.task);
+ rxe_queue_reset(qp->sq.queue);
}
/* cleanup attributes */
@@ -573,6 +574,7 @@ void rxe_qp_error(struct rxe_qp *qp)
{
qp->req.state = QP_STATE_ERROR;
qp->resp.state = QP_STATE_ERROR;
+ qp->attr.qp_state = IB_QPS_ERR;
/* drain work and packet queues */
rxe_run_task(&qp->resp.task, 1);
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c
index 08274254eb88..d14bf496d62d 100644
--- a/drivers/infiniband/sw/rxe/rxe_queue.c
+++ b/drivers/infiniband/sw/rxe/rxe_queue.c
@@ -84,6 +84,15 @@ err1:
return -EINVAL;
}
+inline void rxe_queue_reset(struct rxe_queue *q)
+{
+ /* queue is comprised from header and the memory
+ * of the actual queue. See "struct rxe_queue_buf" in rxe_queue.h
+ * reset only the queue itself and not the management header
+ */
+ memset(q->buf->data, 0, q->buf_size - sizeof(struct rxe_queue_buf));
+}
+
struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe,
int *num_elem,
unsigned int elem_size)
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h
index 239fd609c31e..8c8641c87817 100644
--- a/drivers/infiniband/sw/rxe/rxe_queue.h
+++ b/drivers/infiniband/sw/rxe/rxe_queue.h
@@ -84,6 +84,8 @@ int do_mmap_info(struct rxe_dev *rxe,
size_t buf_size,
struct rxe_mmap_info **ip_p);
+void rxe_queue_reset(struct rxe_queue *q);
+
struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe,
int *num_elem,
unsigned int elem_size);
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index b246653cf713..73d4a97603a1 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -698,7 +698,9 @@ next_wqe:
qp->req.wqe_index);
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
- goto complete;
+ __rxe_do_task(&qp->comp.task);
+ rxe_drop_ref(qp);
+ return 0;
}
payload = mtu;
}
@@ -747,14 +749,16 @@ err:
wqe->status = IB_WC_LOC_PROT_ERR;
wqe->state = wqe_state_error;
-complete:
- if (qp_type(qp) != IB_QPT_RC) {
- while (rxe_completer(qp) == 0)
- ;
- }
- rxe_drop_ref(qp);
- return 0;
-
+ /*
+ * IBA Spec. Section 10.7.3.1 SIGNALED COMPLETIONS
+ * ---------8<---------8<-------------
+ * ...Note that if a completion error occurs, a Work Completion
+ * will always be generated, even if the signaling
+ * indicator requests an Unsignaled Completion.
+ * ---------8<---------8<-------------
+ */
+ wqe->wr.send_flags |= IB_SEND_SIGNALED;
+ __rxe_do_task(&qp->comp.task);
exit:
rxe_drop_ref(qp);
return -EAGAIN;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 7b8d2d9e2263..da12717a3eb7 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -63,6 +63,8 @@ enum ipoib_flush_level {
enum {
IPOIB_ENCAP_LEN = 4,
+ IPOIB_PSEUDO_LEN = 20,
+ IPOIB_HARD_LEN = IPOIB_ENCAP_LEN + IPOIB_PSEUDO_LEN,
IPOIB_UD_HEAD_SIZE = IB_GRH_BYTES + IPOIB_ENCAP_LEN,
IPOIB_UD_RX_SG = 2, /* max buffer needed for 4K mtu */
@@ -134,15 +136,21 @@ struct ipoib_header {
u16 reserved;
};
-struct ipoib_cb {
- struct qdisc_skb_cb qdisc_cb;
- u8 hwaddr[INFINIBAND_ALEN];
+struct ipoib_pseudo_header {
+ u8 hwaddr[INFINIBAND_ALEN];
};
-static inline struct ipoib_cb *ipoib_skb_cb(const struct sk_buff *skb)
+static inline void skb_add_pseudo_hdr(struct sk_buff *skb)
{
- BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct ipoib_cb));
- return (struct ipoib_cb *)skb->cb;
+ char *data = skb_push(skb, IPOIB_PSEUDO_LEN);
+
+ /*
+ * only the ipoib header is present now, make room for a dummy
+ * pseudo header and set skb field accordingly
+ */
+ memset(data, 0, IPOIB_PSEUDO_LEN);
+ skb_reset_mac_header(skb);
+ skb_pull(skb, IPOIB_HARD_LEN);
}
/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 46234f52ee29..096c4f6fbd65 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -63,6 +63,8 @@ MODULE_PARM_DESC(cm_data_debug_level,
#define IPOIB_CM_RX_DELAY (3 * 256 * HZ)
#define IPOIB_CM_RX_UPDATE_MASK (0x3)
+#define IPOIB_CM_RX_RESERVE (ALIGN(IPOIB_HARD_LEN, 16) - IPOIB_ENCAP_LEN)
+
static struct ib_qp_attr ipoib_cm_err_attr = {
.qp_state = IB_QPS_ERR
};
@@ -146,15 +148,15 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
struct sk_buff *skb;
int i;
- skb = dev_alloc_skb(IPOIB_CM_HEAD_SIZE + 12);
+ skb = dev_alloc_skb(ALIGN(IPOIB_CM_HEAD_SIZE + IPOIB_PSEUDO_LEN, 16));
if (unlikely(!skb))
return NULL;
/*
- * IPoIB adds a 4 byte header. So we need 12 more bytes to align the
+ * IPoIB adds a IPOIB_ENCAP_LEN byte header, this will align the
* IP header to a multiple of 16.
*/
- skb_reserve(skb, 12);
+ skb_reserve(skb, IPOIB_CM_RX_RESERVE);
mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_CM_HEAD_SIZE,
DMA_FROM_DEVICE);
@@ -621,9 +623,9 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
if (wc->byte_len < IPOIB_CM_COPYBREAK) {
int dlen = wc->byte_len;
- small_skb = dev_alloc_skb(dlen + 12);
+ small_skb = dev_alloc_skb(dlen + IPOIB_CM_RX_RESERVE);
if (small_skb) {
- skb_reserve(small_skb, 12);
+ skb_reserve(small_skb, IPOIB_CM_RX_RESERVE);
ib_dma_sync_single_for_cpu(priv->ca, rx_ring[wr_id].mapping[0],
dlen, DMA_FROM_DEVICE);
skb_copy_from_linear_data(skb, small_skb->data, dlen);
@@ -660,8 +662,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
copied:
skb->protocol = ((struct ipoib_header *) skb->data)->proto;
- skb_reset_mac_header(skb);
- skb_pull(skb, IPOIB_ENCAP_LEN);
+ skb_add_pseudo_hdr(skb);
++dev->stats.rx_packets;
dev->stats.rx_bytes += skb->len;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 43cf8b8a8d2e..5038f9d2d753 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -128,16 +128,15 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
- skb = dev_alloc_skb(buf_size + IPOIB_ENCAP_LEN);
+ skb = dev_alloc_skb(buf_size + IPOIB_HARD_LEN);
if (unlikely(!skb))
return NULL;
/*
- * IB will leave a 40 byte gap for a GRH and IPoIB adds a 4 byte
- * header. So we need 4 more bytes to get to 48 and align the
- * IP header to a multiple of 16.
+ * the IP header will be at IPOIP_HARD_LEN + IB_GRH_BYTES, that is
+ * 64 bytes aligned
*/
- skb_reserve(skb, 4);
+ skb_reserve(skb, sizeof(struct ipoib_pseudo_header));
mapping = priv->rx_ring[id].mapping;
mapping[0] = ib_dma_map_single(priv->ca, skb->data, buf_size,
@@ -253,8 +252,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
skb_pull(skb, IB_GRH_BYTES);
skb->protocol = ((struct ipoib_header *) skb->data)->proto;
- skb_reset_mac_header(skb);
- skb_pull(skb, IPOIB_ENCAP_LEN);
+ skb_add_pseudo_hdr(skb);
++dev->stats.rx_packets;
dev->stats.rx_bytes += skb->len;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 423b30dfe2d8..3ce0765a05ab 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -292,6 +292,25 @@ static struct net_device *ipoib_get_master_net_dev(struct net_device *dev)
return dev;
}
+struct ipoib_walk_data {
+ const struct sockaddr *addr;
+ struct net_device *result;
+};
+
+static int ipoib_upper_walk(struct net_device *upper, void *_data)
+{
+ struct ipoib_walk_data *data = _data;
+ int ret = 0;
+
+ if (ipoib_is_dev_match_addr_rcu(data->addr, upper)) {
+ dev_hold(upper);
+ data->result = upper;
+ ret = 1;
+ }
+
+ return ret;
+}
+
/**
* Find a net_device matching the given address, which is an upper device of
* the given net_device.
@@ -304,27 +323,21 @@ static struct net_device *ipoib_get_master_net_dev(struct net_device *dev)
static struct net_device *ipoib_get_net_dev_match_addr(
const struct sockaddr *addr, struct net_device *dev)
{
- struct net_device *upper,
- *result = NULL;
- struct list_head *iter;
+ struct ipoib_walk_data data = {
+ .addr = addr,
+ };
rcu_read_lock();
if (ipoib_is_dev_match_addr_rcu(addr, dev)) {
dev_hold(dev);
- result = dev;
+ data.result = dev;
goto out;
}
- netdev_for_each_all_upper_dev_rcu(dev, upper, iter) {
- if (ipoib_is_dev_match_addr_rcu(addr, upper)) {
- dev_hold(upper);
- result = upper;
- break;
- }
- }
+ netdev_walk_all_upper_dev_rcu(dev, ipoib_upper_walk, &data);
out:
rcu_read_unlock();
- return result;
+ return data.result;
}
/* returns the number of IPoIB netdevs on top a given ipoib device matching a
@@ -925,9 +938,12 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
ipoib_neigh_free(neigh);
goto err_drop;
}
- if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
+ if (skb_queue_len(&neigh->queue) <
+ IPOIB_MAX_PATH_REC_QUEUE) {
+ /* put pseudoheader back on for next time */
+ skb_push(skb, IPOIB_PSEUDO_LEN);
__skb_queue_tail(&neigh->queue, skb);
- else {
+ } else {
ipoib_warn(priv, "queue length limit %d. Packet drop.\n",
skb_queue_len(&neigh->queue));
goto err_drop;
@@ -964,7 +980,7 @@ err_drop:
}
static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
- struct ipoib_cb *cb)
+ struct ipoib_pseudo_header *phdr)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_path *path;
@@ -972,16 +988,18 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
spin_lock_irqsave(&priv->lock, flags);
- path = __path_find(dev, cb->hwaddr + 4);
+ path = __path_find(dev, phdr->hwaddr + 4);
if (!path || !path->valid) {
int new_path = 0;
if (!path) {
- path = path_rec_create(dev, cb->hwaddr + 4);
+ path = path_rec_create(dev, phdr->hwaddr + 4);
new_path = 1;
}
if (path) {
if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+ /* put pseudoheader back on for next time */
+ skb_push(skb, IPOIB_PSEUDO_LEN);
__skb_queue_tail(&path->queue, skb);
} else {
++dev->stats.tx_dropped;
@@ -1009,10 +1027,12 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
be16_to_cpu(path->pathrec.dlid));
spin_unlock_irqrestore(&priv->lock, flags);
- ipoib_send(dev, skb, path->ah, IPOIB_QPN(cb->hwaddr));
+ ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
return;
} else if ((path->query || !path_rec_start(dev, path)) &&
skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+ /* put pseudoheader back on for next time */
+ skb_push(skb, IPOIB_PSEUDO_LEN);
__skb_queue_tail(&path->queue, skb);
} else {
++dev->stats.tx_dropped;
@@ -1026,13 +1046,15 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_neigh *neigh;
- struct ipoib_cb *cb = ipoib_skb_cb(skb);
+ struct ipoib_pseudo_header *phdr;
struct ipoib_header *header;
unsigned long flags;
+ phdr = (struct ipoib_pseudo_header *) skb->data;
+ skb_pull(skb, sizeof(*phdr));
header = (struct ipoib_header *) skb->data;
- if (unlikely(cb->hwaddr[4] == 0xff)) {
+ if (unlikely(phdr->hwaddr[4] == 0xff)) {
/* multicast, arrange "if" according to probability */
if ((header->proto != htons(ETH_P_IP)) &&
(header->proto != htons(ETH_P_IPV6)) &&
@@ -1045,13 +1067,13 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
/* Add in the P_Key for multicast*/
- cb->hwaddr[8] = (priv->pkey >> 8) & 0xff;
- cb->hwaddr[9] = priv->pkey & 0xff;
+ phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff;
+ phdr->hwaddr[9] = priv->pkey & 0xff;
- neigh = ipoib_neigh_get(dev, cb->hwaddr);
+ neigh = ipoib_neigh_get(dev, phdr->hwaddr);
if (likely(neigh))
goto send_using_neigh;
- ipoib_mcast_send(dev, cb->hwaddr, skb);
+ ipoib_mcast_send(dev, phdr->hwaddr, skb);
return NETDEV_TX_OK;
}
@@ -1060,16 +1082,16 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
case htons(ETH_P_IP):
case htons(ETH_P_IPV6):
case htons(ETH_P_TIPC):
- neigh = ipoib_neigh_get(dev, cb->hwaddr);
+ neigh = ipoib_neigh_get(dev, phdr->hwaddr);
if (unlikely(!neigh)) {
- neigh_add_path(skb, cb->hwaddr, dev);
+ neigh_add_path(skb, phdr->hwaddr, dev);
return NETDEV_TX_OK;
}
break;
case htons(ETH_P_ARP):
case htons(ETH_P_RARP):
/* for unicast ARP and RARP should always perform path find */
- unicast_arp_send(skb, dev, cb);
+ unicast_arp_send(skb, dev, phdr);
return NETDEV_TX_OK;
default:
/* ethertype not supported by IPoIB */
@@ -1086,11 +1108,13 @@ send_using_neigh:
goto unref;
}
} else if (neigh->ah) {
- ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(cb->hwaddr));
+ ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(phdr->hwaddr));
goto unref;
}
if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+ /* put pseudoheader back on for next time */
+ skb_push(skb, sizeof(*phdr));
spin_lock_irqsave(&priv->lock, flags);
__skb_queue_tail(&neigh->queue, skb);
spin_unlock_irqrestore(&priv->lock, flags);
@@ -1122,8 +1146,8 @@ static int ipoib_hard_header(struct sk_buff *skb,
unsigned short type,
const void *daddr, const void *saddr, unsigned len)
{
+ struct ipoib_pseudo_header *phdr;
struct ipoib_header *header;
- struct ipoib_cb *cb = ipoib_skb_cb(skb);
header = (struct ipoib_header *) skb_push(skb, sizeof *header);
@@ -1132,12 +1156,13 @@ static int ipoib_hard_header(struct sk_buff *skb,
/*
* we don't rely on dst_entry structure, always stuff the
- * destination address into skb->cb so we can figure out where
+ * destination address into skb hard header so we can figure out where
* to send the packet later.
*/
- memcpy(cb->hwaddr, daddr, INFINIBAND_ALEN);
+ phdr = (struct ipoib_pseudo_header *) skb_push(skb, sizeof(*phdr));
+ memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN);
- return sizeof *header;
+ return IPOIB_HARD_LEN;
}
static void ipoib_set_mcast_list(struct net_device *dev)
@@ -1756,7 +1781,7 @@ void ipoib_setup(struct net_device *dev)
dev->flags |= IFF_BROADCAST | IFF_MULTICAST;
- dev->hard_header_len = IPOIB_ENCAP_LEN;
+ dev->hard_header_len = IPOIB_HARD_LEN;
dev->addr_len = INFINIBAND_ALEN;
dev->type = ARPHRD_INFINIBAND;
dev->tx_queue_len = ipoib_sendq_size * 2;
@@ -2001,6 +2026,7 @@ static struct net_device *ipoib_add_port(const char *format,
/* MTU will be reset when mcast join happens */
priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu);
priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu;
+ priv->dev->max_mtu = IPOIB_CM_MTU;
priv->dev->neigh_priv_len = sizeof(struct ipoib_neigh);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 2a3980c2c670..fddff403d5d2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -799,9 +799,11 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
__ipoib_mcast_add(dev, mcast);
list_add_tail(&mcast->list, &priv->multicast_list);
}
- if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE)
+ if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) {
+ /* put pseudoheader back on for next time */
+ skb_push(skb, sizeof(struct ipoib_pseudo_header));
skb_queue_tail(&mcast->pkt_queue, skb);
- else {
+ } else {
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
}